diff options
Diffstat (limited to 'drivers/infiniband')
96 files changed, 2957 insertions, 1584 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 9cbf09e2052f..af939796750d 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -47,6 +47,7 @@ struct addr_req { struct sockaddr src_addr; struct sockaddr dst_addr; struct rdma_dev_addr *addr; + struct rdma_addr_client *client; void *context; void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context); @@ -54,13 +55,33 @@ struct addr_req { int status; }; -static void process_req(void *data); +static void process_req(struct work_struct *work); static DEFINE_MUTEX(lock); static LIST_HEAD(req_list); -static DECLARE_WORK(work, process_req, NULL); +static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; +void rdma_addr_register_client(struct rdma_addr_client *client) +{ + atomic_set(&client->refcount, 1); + init_completion(&client->comp); +} +EXPORT_SYMBOL(rdma_addr_register_client); + +static inline void put_client(struct rdma_addr_client *client) +{ + if (atomic_dec_and_test(&client->refcount)) + complete(&client->comp); +} + +void rdma_addr_unregister_client(struct rdma_addr_client *client) +{ + put_client(client); + wait_for_completion(&client->comp); +} +EXPORT_SYMBOL(rdma_addr_unregister_client); + int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr) { @@ -86,7 +107,7 @@ EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { struct net_device *dev; - u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; int ret; dev = ip_dev_find(ip); @@ -118,7 +139,7 @@ static void queue_req(struct addr_req *req) mutex_lock(&lock); list_for_each_entry_reverse(temp_req, &req_list, list) { - if (time_after(req->timeout, temp_req->timeout)) + if (time_after_eq(req->timeout, temp_req->timeout)) break; } @@ -194,7 +215,7 @@ out: return ret; } -static void process_req(void *data) +static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; struct sockaddr_in *src_in, *dst_in; @@ -204,19 +225,17 @@ static void process_req(void *data) mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { - if (req->status) { + if (req->status == -ENODATA) { src_in = (struct sockaddr_in *) &req->src_addr; dst_in = (struct sockaddr_in *) &req->dst_addr; req->status = addr_resolve_remote(src_in, dst_in, req->addr); + if (req->status && time_after_eq(jiffies, req->timeout)) + req->status = -ETIMEDOUT; + else if (req->status == -ENODATA) + continue; } - if (req->status && time_after(jiffies, req->timeout)) - req->status = -ETIMEDOUT; - else if (req->status == -ENODATA) - continue; - - list_del(&req->list); - list_add_tail(&req->list, &done_list); + list_move_tail(&req->list, &done_list); } if (!list_empty(&req_list)) { @@ -229,6 +248,7 @@ static void process_req(void *data) list_del(&req->list); req->callback(req->status, &req->src_addr, req->addr, req->context); + put_client(req->client); kfree(req); } } @@ -239,7 +259,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in, { struct net_device *dev; u32 src_ip = src_in->sin_addr.s_addr; - u32 dst_ip = dst_in->sin_addr.s_addr; + __be32 dst_ip = dst_in->sin_addr.s_addr; int ret; dev = ip_dev_find(dst_ip); @@ -264,7 +284,8 @@ static int addr_resolve_local(struct sockaddr_in *src_in, return ret; } -int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, +int rdma_resolve_ip(struct rdma_addr_client *client, + struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), @@ -285,6 +306,8 @@ int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, req->addr = addr; req->callback = callback; req->context = context; + req->client = client; + atomic_inc(&client->refcount); src_in = (struct sockaddr_in *) &req->src_addr; dst_in = (struct sockaddr_in *) &req->dst_addr; @@ -305,6 +328,7 @@ int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, break; default: ret = req->status; + atomic_dec(&client->refcount); kfree(req); break; } @@ -321,8 +345,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) if (req->addr == addr) { req->status = -ECANCELED; req->timeout = jiffies; - list_del(&req->list); - list_add(&req->list, &req_list); + list_move(&req->list, &req_list); set_timeout(req->timeout); break; } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 20e9f64e67a6..98272fbbfb31 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -285,9 +285,10 @@ err: kfree(tprops); } -static void ib_cache_task(void *work_ptr) +static void ib_cache_task(struct work_struct *_work) { - struct ib_update_work *work = work_ptr; + struct ib_update_work *work = + container_of(_work, struct ib_update_work, work); ib_cache_update(work->device, work->port_num); kfree(work); @@ -306,7 +307,7 @@ static void ib_cache_event(struct ib_event_handler *handler, event->event == IB_EVENT_CLIENT_REREGISTER) { work = kmalloc(sizeof *work, GFP_ATOMIC); if (work) { - INIT_WORK(&work->work, ib_cache_task, work); + INIT_WORK(&work->work, ib_cache_task); work->device = event->device; work->port_num = event->element.port_num; schedule_work(&work->work); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index f35fcc4c0638..79c937bf6962 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -75,6 +75,7 @@ static struct ib_cm { struct rb_root remote_sidr_table; struct idr local_id_table; __be32 random_id_operand; + struct list_head timewait_list; struct workqueue_struct *wq; } cm; @@ -100,7 +101,7 @@ struct cm_av { }; struct cm_work { - struct work_struct work; + struct delayed_work work; struct list_head list; struct cm_port *port; struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ @@ -112,6 +113,7 @@ struct cm_work { struct cm_timewait_info { struct cm_work work; /* Must be first. */ + struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; __be64 remote_ca_guid; @@ -145,12 +147,12 @@ struct cm_id_private { __be32 rq_psn; int timeout_ms; enum ib_mtu path_mtu; + __be16 pkey; u8 private_data_len; u8 max_cm_retries; u8 peer_to_peer; u8 responder_resources; u8 initiator_depth; - u8 local_ack_timeout; u8 retry_count; u8 rnr_retry_count; u8 service_timeout; @@ -159,7 +161,7 @@ struct cm_id_private { atomic_t work_count; }; -static void cm_work_handler(void *data); +static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { @@ -238,11 +240,10 @@ static void * cm_copy_private_data(const void *private_data, if (!private_data || !private_data_len) return NULL; - data = kmalloc(private_data_len, GFP_KERNEL); + data = kmemdup(private_data, private_data_len, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); - memcpy(data, private_data, private_data_len); return data; } @@ -647,13 +648,6 @@ static inline int cm_convert_to_ms(int iba_time) static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) { - unsigned long flags; - - if (!timewait_info->inserted_remote_id && - !timewait_info->inserted_remote_qp) - return; - - spin_lock_irqsave(&cm.lock, flags); if (timewait_info->inserted_remote_id) { rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); timewait_info->inserted_remote_id = 0; @@ -663,7 +657,6 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); timewait_info->inserted_remote_qp = 0; } - spin_unlock_irqrestore(&cm.lock, flags); } static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) @@ -675,8 +668,7 @@ static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) return ERR_PTR(-ENOMEM); timewait_info->work.local_id = local_id; - INIT_WORK(&timewait_info->work.work, cm_work_handler, - &timewait_info->work); + INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler); timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT; return timewait_info; } @@ -684,8 +676,12 @@ static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; + unsigned long flags; + spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); + list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); + spin_unlock_irqrestore(&cm.lock, flags); /* * The cm_id could be destroyed by the user before we exit timewait. @@ -693,7 +689,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) * timewait before notifying the user that we've exited timewait. */ cm_id_priv->id.state = IB_CM_TIMEWAIT; - wait_time = cm_convert_to_ms(cm_id_priv->local_ack_timeout); + wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1); queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, msecs_to_jiffies(wait_time)); cm_id_priv->timewait_info = NULL; @@ -701,9 +697,13 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) { + unsigned long flags; + cm_id_priv->id.state = IB_CM_IDLE; if (cm_id_priv->timewait_info) { + spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); + spin_unlock_irqrestore(&cm.lock, flags); kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } @@ -1008,6 +1008,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->retry_count = param->retry_count; cm_id_priv->path_mtu = param->primary_path->mtu; + cm_id_priv->pkey = param->primary_path->pkey; cm_id_priv->qp_type = param->qp_type; ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); @@ -1022,8 +1023,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); - cm_id_priv->local_ack_timeout = - cm_req_get_primary_local_ack_timeout(req_msg); spin_lock_irqsave(&cm_id_priv->lock, flags); ret = ib_post_send_mad(cm_id_priv->msg, NULL); @@ -1307,6 +1306,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, if (timewait_info) { cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, timewait_info->work.remote_id); + cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irqrestore(&cm.lock, flags); if (cur_cm_id_priv) { cm_dup_req_handler(work, cur_cm_id_priv); @@ -1315,7 +1315,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); - goto error; + listen_cm_id_priv = NULL; + goto out; } /* Find matching listen request. */ @@ -1323,21 +1324,20 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, req_msg->service_id, req_msg->private_data); if (!listen_cm_id_priv) { + cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, NULL, 0); - goto error; + goto out; } atomic_inc(&listen_cm_id_priv->refcount); atomic_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); spin_unlock_irqrestore(&cm.lock, flags); +out: return listen_cm_id_priv; - -error: cm_cleanup_timewait(cm_id_priv->timewait_info); - return NULL; } static int cm_req_handler(struct cm_work *work) @@ -1407,9 +1407,8 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); + cm_id_priv->pkey = req_msg->pkey; cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); - cm_id_priv->local_ack_timeout = - cm_req_get_primary_local_ack_timeout(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); @@ -1713,7 +1712,7 @@ static int cm_establish_handler(struct cm_work *work) unsigned long flags; int ret; - /* See comment in ib_cm_establish about lookup. */ + /* See comment in cm_establish about lookup. */ cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); if (!cm_id_priv) return -EINVAL; @@ -1899,6 +1898,32 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_drep); +static int cm_issue_drep(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_buf *msg = NULL; + struct cm_dreq_msg *dreq_msg; + struct cm_drep_msg *drep_msg; + int ret; + + ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); + if (ret) + return ret; + + dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad; + drep_msg = (struct cm_drep_msg *) msg->mad; + + cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); + drep_msg->remote_comm_id = dreq_msg->local_comm_id; + drep_msg->local_comm_id = dreq_msg->remote_comm_id; + + ret = ib_post_send_mad(msg, NULL); + if (ret) + cm_free_msg(msg); + + return ret; +} + static int cm_dreq_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; @@ -1910,8 +1935,10 @@ static int cm_dreq_handler(struct cm_work *work) dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); - if (!cm_id_priv) + if (!cm_id_priv) { + cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; + } work->cm_event.private_data = &dreq_msg->private_data; @@ -2371,11 +2398,16 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED || - cm_id->lap_state != IB_CM_LAP_IDLE) { + (cm_id->lap_state != IB_CM_LAP_UNINIT && + cm_id->lap_state != IB_CM_LAP_IDLE)) { ret = -EINVAL; goto out; } + ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); + if (ret) + goto out; + ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; @@ -2400,7 +2432,8 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_lap); -static void cm_format_path_from_lap(struct ib_sa_path_rec *path, +static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, + struct ib_sa_path_rec *path, struct cm_lap_msg *lap_msg) { memset(path, 0, sizeof *path); @@ -2412,10 +2445,10 @@ static void cm_format_path_from_lap(struct ib_sa_path_rec *path, path->hop_limit = lap_msg->alt_hop_limit; path->traffic_class = cm_lap_get_traffic_class(lap_msg); path->reversible = 1; - /* pkey is same as in REQ */ + path->pkey = cm_id_priv->pkey; path->sl = cm_lap_get_sl(lap_msg); path->mtu_selector = IB_SA_EQ; - /* mtu is same as in REQ */ + path->mtu = cm_id_priv->path_mtu; path->rate_selector = IB_SA_EQ; path->rate = cm_lap_get_packet_rate(lap_msg); path->packet_life_time_selector = IB_SA_EQ; @@ -2441,7 +2474,7 @@ static int cm_lap_handler(struct cm_work *work) param = &work->cm_event.param.lap_rcvd; param->alternate_path = &work->path[0]; - cm_format_path_from_lap(param->alternate_path, lap_msg); + cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); work->cm_event.private_data = &lap_msg->private_data; spin_lock_irqsave(&cm_id_priv->lock, flags); @@ -2449,6 +2482,7 @@ static int cm_lap_handler(struct cm_work *work) goto unlock; switch (cm_id_priv->id.lap_state) { + case IB_CM_LAP_UNINIT: case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: @@ -2471,6 +2505,10 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; + cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, + &cm_id_priv->av); + cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -2601,28 +2639,29 @@ static int cm_timewait_handler(struct cm_work *work) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; - unsigned long flags; int ret; timewait_info = (struct cm_timewait_info *)work; - cm_cleanup_timewait(timewait_info); + spin_lock_irq(&cm.lock); + list_del(&timewait_info->list); + spin_unlock_irq(&cm.lock); cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); if (!cm_id_priv) return -EINVAL; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_TIMEWAIT || cm_id_priv->remote_qpn != timewait_info->remote_qpn) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2955,9 +2994,9 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, } } -static void cm_work_handler(void *data) +static void cm_work_handler(struct work_struct *_work) { - struct cm_work *work = data; + struct cm_work *work = container_of(_work, struct cm_work, work.work); int ret; switch (work->cm_event.event) { @@ -3008,7 +3047,7 @@ static void cm_work_handler(void *data) cm_free_work(work); } -int ib_cm_establish(struct ib_cm_id *cm_id) +static int cm_establish(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; struct cm_work *work; @@ -3047,16 +3086,53 @@ int ib_cm_establish(struct ib_cm_id *cm_id) * we need to find the cm_id once we're in the context of the * worker thread, rather than holding a reference on it. */ - INIT_WORK(&work->work, cm_work_handler, work); + INIT_DELAYED_WORK(&work->work, cm_work_handler); work->local_id = cm_id->local_id; work->remote_id = cm_id->remote_id; work->mad_recv_wc = NULL; work->cm_event.event = IB_CM_USER_ESTABLISHED; - queue_work(cm.wq, &work->work); + queue_delayed_work(cm.wq, &work->work, 0); out: return ret; } -EXPORT_SYMBOL(ib_cm_establish); + +static int cm_migrate(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state == IB_CM_ESTABLISHED && + (cm_id->lap_state == IB_CM_LAP_UNINIT || + cm_id->lap_state == IB_CM_LAP_IDLE)) { + cm_id->lap_state = IB_CM_LAP_IDLE; + cm_id_priv->av = cm_id_priv->alt_av; + } else + ret = -EINVAL; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} + +int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event) +{ + int ret; + + switch (event) { + case IB_EVENT_COMM_EST: + ret = cm_establish(cm_id); + break; + case IB_EVENT_PATH_MIG: + ret = cm_migrate(cm_id); + break; + default: + ret = -EINVAL; + } + return ret; +} +EXPORT_SYMBOL(ib_cm_notify); static void cm_recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) @@ -3114,11 +3190,11 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, return; } - INIT_WORK(&work->work, cm_work_handler, work); + INIT_DELAYED_WORK(&work->work, cm_work_handler); work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; work->port = (struct cm_port *)mad_agent->context; - queue_work(cm.wq, &work->work); + queue_delayed_work(cm.wq, &work->work, 0); } static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, @@ -3141,8 +3217,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE; + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC; @@ -3190,6 +3265,9 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; + qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; + qp_attr->alt_timeout = + cm_id_priv->alt_av.packet_life_time + 1; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; } ret = 0; @@ -3216,19 +3294,31 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: - *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; - qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { - *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | - IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->local_ack_timeout; - qp_attr->retry_cnt = cm_id_priv->retry_count; - qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; - qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; - } - if (cm_id_priv->alt_av.ah_attr.dlid) { - *qp_attr_mask |= IB_QP_PATH_MIG_STATE; + if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { + *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; + qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); + if (cm_id_priv->qp_type == IB_QPT_RC) { + *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC; + qp_attr->timeout = + cm_id_priv->av.packet_life_time + 1; + qp_attr->retry_cnt = cm_id_priv->retry_count; + qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + qp_attr->max_rd_atomic = + cm_id_priv->initiator_depth; + } + if (cm_id_priv->alt_av.ah_attr.dlid) { + *qp_attr_mask |= IB_QP_PATH_MIG_STATE; + qp_attr->path_mig_state = IB_MIG_REARM; + } + } else { + *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; + qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; + qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; + qp_attr->alt_timeout = + cm_id_priv->alt_av.packet_life_time + 1; + qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; qp_attr->path_mig_state = IB_MIG_REARM; } ret = 0; @@ -3374,6 +3464,7 @@ static int __init ib_cm_init(void) idr_init(&cm.local_id_table); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); idr_pre_get(&cm.local_id_table, GFP_KERNEL); + INIT_LIST_HEAD(&cm.timewait_list); cm.wq = create_workqueue("ib_cm"); if (!cm.wq) @@ -3391,7 +3482,20 @@ error: static void __exit ib_cm_cleanup(void) { + struct cm_timewait_info *timewait_info, *tmp; + + spin_lock_irq(&cm.lock); + list_for_each_entry(timewait_info, &cm.timewait_list, list) + cancel_delayed_work(&timewait_info->work.work); + spin_unlock_irq(&cm.lock); + destroy_workqueue(cm.wq); + + list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { + list_del(&timewait_info->list); + kfree(timewait_info); + } + ib_unregister_client(&cm_client); idr_destroy(&cm.local_id_table); } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1178bd434d1b..985a6b564d8f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -63,6 +63,7 @@ static struct ib_client cma_client = { }; static struct ib_sa_client sa_client; +static struct rdma_addr_client addr_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); @@ -343,7 +344,7 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) return ret; qp_attr.qp_state = IB_QPS_INIT; - qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr.qp_access_flags = 0; qp_attr.port_num = id_priv->id.port_num; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT); @@ -874,23 +875,25 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, __u16 port; u8 ip_ver; + if (cma_get_net_info(ib_event->private_data, listen_id->ps, + &ip_ver, &port, &src, &dst)) + goto err; + id = rdma_create_id(listen_id->event_handler, listen_id->context, listen_id->ps); if (IS_ERR(id)) - return NULL; + goto err; + + cma_save_net_info(&id->route.addr, &listen_id->route.addr, + ip_ver, port, src, dst); rt = &id->route; rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; - rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL); + rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, + GFP_KERNEL); if (!rt->path_rec) - goto err; + goto destroy_id; - if (cma_get_net_info(ib_event->private_data, listen_id->ps, - &ip_ver, &port, &src, &dst)) - goto err; - - cma_save_net_info(&id->route.addr, &listen_id->route.addr, - ip_ver, port, src, dst); rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; @@ -903,8 +906,10 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; return id_priv; -err: + +destroy_id: rdma_destroy_id(id); +err: return NULL; } @@ -930,12 +935,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) mutex_lock(&lock); ret = cma_acquire_dev(conn_id); mutex_unlock(&lock); - if (ret) { - ret = -ENODEV; - cma_release_remove(conn_id); - rdma_destroy_id(&conn_id->id); - goto out; - } + if (ret) + goto release_conn_id; conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; @@ -945,13 +946,17 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0, ib_event->private_data + offset, IB_CM_REQ_PRIVATE_DATA_SIZE - offset); - if (ret) { - /* Destroy the CM ID by returning a non-zero value. */ - conn_id->cm_id.ib = NULL; - cma_exch(conn_id, CMA_DESTROYING); - cma_release_remove(conn_id); - rdma_destroy_id(&conn_id->id); - } + if (!ret) + goto out; + + /* Destroy the CM ID by returning a non-zero value. */ + conn_id->cm_id.ib = NULL; + +release_conn_id: + cma_exch(conn_id, CMA_DESTROYING); + cma_release_remove(conn_id); + rdma_destroy_id(&conn_id->id); + out: cma_release_remove(listen_id); return ret; @@ -1307,6 +1312,7 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, work->old_state = CMA_ROUTE_QUERY; work->new_state = CMA_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; + work->event.status = status; } queue_work(cma_wq, &work->work); @@ -1334,9 +1340,9 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, return (id_priv->query_id < 0) ? id_priv->query_id : 0; } -static void cma_work_handler(void *data) +static void cma_work_handler(struct work_struct *_work) { - struct cma_work *work = data; + struct cma_work *work = container_of(_work, struct cma_work, work); struct rdma_id_private *id_priv = work->id; int destroy = 0; @@ -1367,7 +1373,7 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) return -ENOMEM; work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler, work); + INIT_WORK(&work->work, cma_work_handler); work->old_state = CMA_ROUTE_QUERY; work->new_state = CMA_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; @@ -1424,7 +1430,7 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) return -ENOMEM; work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler, work); + INIT_WORK(&work->work, cma_work_handler); work->old_state = CMA_ROUTE_QUERY; work->new_state = CMA_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; @@ -1474,19 +1480,18 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) u8 p; mutex_lock(&lock); + if (list_empty(&dev_list)) { + ret = -ENODEV; + goto out; + } list_for_each_entry(cma_dev, &dev_list, list) for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p) - if (!ib_query_port (cma_dev->device, p, &port_attr) && + if (!ib_query_port(cma_dev->device, p, &port_attr) && port_attr.state == IB_PORT_ACTIVE) goto port_found; - if (!list_empty(&dev_list)) { - p = 1; - cma_dev = list_entry(dev_list.next, struct cma_device, list); - } else { - ret = -ENODEV; - goto out; - } + p = 1; + cma_dev = list_entry(dev_list.next, struct cma_device, list); port_found: ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); @@ -1578,7 +1583,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) } work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler, work); + INIT_WORK(&work->work, cma_work_handler); work->old_state = CMA_ADDR_QUERY; work->new_state = CMA_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; @@ -1619,8 +1624,8 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, if (cma_any_addr(dst_addr)) ret = cma_resolve_loopback(id_priv); else - ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr, - &id->route.addr.dev_addr, + ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr, + dst_addr, &id->route.addr.dev_addr, timeout_ms, addr_handler, id_priv); if (ret) goto err; @@ -1756,22 +1761,29 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (!cma_any_addr(addr)) { ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); - if (!ret) { - mutex_lock(&lock); - ret = cma_acquire_dev(id_priv); - mutex_unlock(&lock); - } if (ret) - goto err; + goto err1; + + mutex_lock(&lock); + ret = cma_acquire_dev(id_priv); + mutex_unlock(&lock); + if (ret) + goto err1; } memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); ret = cma_get_port(id_priv); if (ret) - goto err; + goto err2; return 0; -err: +err2: + if (!cma_any_addr(addr)) { + mutex_lock(&lock); + cma_detach_from_dev(id_priv); + mutex_unlock(&lock); + } +err1: cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE); return ret; } @@ -1862,6 +1874,11 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: + if (ret && !IS_ERR(id_priv->cm_id.ib)) { + ib_destroy_cm_id(id_priv->cm_id.ib); + id_priv->cm_id.ib = NULL; + } + kfree(private_data); return ret; } @@ -1889,10 +1906,8 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, cm_id->remote_addr = *sin; ret = cma_modify_qp_rtr(&id_priv->id); - if (ret) { - iw_destroy_cm_id(cm_id); - return ret; - } + if (ret) + goto out; iw_param.ord = conn_param->initiator_depth; iw_param.ird = conn_param->responder_resources; @@ -1904,6 +1919,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, iw_param.qpn = conn_param->qp_num; ret = iw_cm_connect(cm_id, &iw_param); out: + if (ret && !IS_ERR(cm_id)) { + iw_destroy_cm_id(cm_id); + id_priv->cm_id.iw = NULL; + } return ret; } @@ -2102,8 +2121,6 @@ static void cma_add_one(struct ib_device *device) cma_dev->device = device; cma_dev->node_guid = device->node_guid; - if (!cma_dev->node_guid) - goto err; init_completion(&cma_dev->comp); atomic_set(&cma_dev->refcount, 1); @@ -2115,9 +2132,6 @@ static void cma_add_one(struct ib_device *device) list_for_each_entry(id_priv, &listen_any_list, list) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); - return; -err: - kfree(cma_dev); } static int cma_remove_id_dev(struct rdma_id_private *id_priv) @@ -2142,12 +2156,9 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) static void cma_process_remove(struct cma_device *cma_dev) { - struct list_head remove_list; struct rdma_id_private *id_priv; int ret; - INIT_LIST_HEAD(&remove_list); - mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { id_priv = list_entry(cma_dev->id_list.next, @@ -2158,8 +2169,7 @@ static void cma_process_remove(struct cma_device *cma_dev) continue; } - list_del(&id_priv->list); - list_add_tail(&id_priv->list, &remove_list); + list_del_init(&id_priv->list); atomic_inc(&id_priv->refcount); mutex_unlock(&lock); @@ -2201,6 +2211,7 @@ static int cma_init(void) return -ENOMEM; ib_sa_register_client(&sa_client); + rdma_addr_register_client(&addr_client); ret = ib_register_client(&cma_client); if (ret) @@ -2208,6 +2219,7 @@ static int cma_init(void) return 0; err: + rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); return ret; @@ -2216,6 +2228,7 @@ err: static void cma_cleanup(void) { ib_unregister_client(&cma_client); + rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); idr_destroy(&sdp_ps); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index c3fb304a4e86..1039ad57d53b 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -80,7 +80,7 @@ struct iwcm_work { * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If * the backlog is exceeded, then no more connection request events will * be processed. cm_event_handler() returns -ENOMEM in this case. Its up - * to the provider to reject the connectino request. + * to the provider to reject the connection request. * 2) in the connection request workqueue handler, cm_conn_req_handler(). * If work elements cannot be allocated for the new connect request cm_id, * then IWCM will call the provider reject method. This is ok since @@ -131,26 +131,25 @@ static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) } /* - * Save private data from incoming connection requests in the - * cm_id_priv so the low level driver doesn't have to. Adjust + * Save private data from incoming connection requests to + * iw_cm_event, so the low level driver doesn't have to. Adjust * the event ptr to point to the local copy. */ -static int copy_private_data(struct iwcm_id_private *cm_id_priv, - struct iw_cm_event *event) +static int copy_private_data(struct iw_cm_event *event) { void *p; - p = kmalloc(event->private_data_len, GFP_ATOMIC); + p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); if (!p) return -ENOMEM; - memcpy(p, event->private_data, event->private_data_len); event->private_data = p; return 0; } /* - * Release a reference on cm_id. If the last reference is being removed - * and iw_destroy_cm_id is waiting, wake up the waiting thread. + * Release a reference on cm_id. If the last reference is being + * released, enable the waiting thread (in iw_destroy_cm_id) to + * get woken up, and return 1 if a thread is already waiting. */ static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { @@ -243,7 +242,7 @@ static int iwcm_modify_qp_sqd(struct ib_qp *qp) /* * CM_ID <-- CLOSING * - * Block if a passive or active connection is currenlty being processed. Then + * Block if a passive or active connection is currently being processed. Then * process the event as follows: * - If we are ESTABLISHED, move to CLOSING and modify the QP state * based on the abrupt flag @@ -408,7 +407,7 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) { struct iwcm_id_private *cm_id_priv; unsigned long flags; - int ret = 0; + int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -535,7 +534,7 @@ EXPORT_SYMBOL(iw_cm_accept); int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; - int ret = 0; + int ret; unsigned long flags; struct ib_qp *qp; @@ -620,7 +619,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, spin_lock_irqsave(&listen_id_priv->lock, flags); if (listen_id_priv->state != IW_CM_STATE_LISTEN) { spin_unlock_irqrestore(&listen_id_priv->lock, flags); - return; + goto out; } spin_unlock_irqrestore(&listen_id_priv->lock, flags); @@ -629,7 +628,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, listen_id_priv->id.context); /* If the cm_id could not be created, ignore the request */ if (IS_ERR(cm_id)) - return; + goto out; cm_id->provider_data = iw_event->provider_data; cm_id->local_addr = iw_event->local_addr; @@ -642,7 +641,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, if (ret) { iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); - return; + goto out; } /* Call the client CM handler */ @@ -654,6 +653,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, kfree(cm_id); } +out: if (iw_event->private_data_len) kfree(iw_event->private_data); } @@ -674,7 +674,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; - int ret = 0; + int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); @@ -704,7 +704,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; - int ret = 0; + int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); /* @@ -828,9 +828,10 @@ static int process_event(struct iwcm_id_private *cm_id_priv, * thread asleep on the destroy_comp list vs. an object destroyed * here synchronously when the last reference is removed. */ -static void cm_work_handler(void *arg) +static void cm_work_handler(struct work_struct *_work) { - struct iwcm_work *work = arg, lwork; + struct iwcm_work *work = container_of(_work, struct iwcm_work, work); + struct iw_cm_event levent; struct iwcm_id_private *cm_id_priv = work->cm_id; unsigned long flags; int empty; @@ -843,11 +844,11 @@ static void cm_work_handler(void *arg) struct iwcm_work, list); list_del_init(&work->list); empty = list_empty(&cm_id_priv->work_list); - lwork = *work; + levent = work->event; put_work(work); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = process_event(cm_id_priv, &work->event); + ret = process_event(cm_id_priv, &levent); if (ret) { set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); destroy_cm_id(&cm_id_priv->id); @@ -899,14 +900,14 @@ static int cm_event_handler(struct iw_cm_id *cm_id, goto out; } - INIT_WORK(&work->work, cm_work_handler, work); + INIT_WORK(&work->work, cm_work_handler); work->cm_id = cm_id_priv; work->event = *iw_event; if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || work->event.event == IW_CM_EVENT_CONNECT_REPLY) && work->event.private_data_len) { - ret = copy_private_data(cm_id_priv, &work->event); + ret = copy_private_data(&work->event); if (ret) { put_work(work); goto out; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 082f03c158f0..15f38d94b3a8 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -46,7 +46,7 @@ MODULE_DESCRIPTION("kernel IB MAD API"); MODULE_AUTHOR("Hal Rosenstock"); MODULE_AUTHOR("Sean Hefty"); -static kmem_cache_t *ib_mad_cache; +static struct kmem_cache *ib_mad_cache; static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; @@ -65,8 +65,8 @@ static struct ib_mad_agent_private *find_mad_agent( static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); -static void timeout_sends(void *data); -static void local_completions(void *data); +static void timeout_sends(struct work_struct *work); +static void local_completions(struct work_struct *work); static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class); @@ -356,10 +356,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, INIT_LIST_HEAD(&mad_agent_priv->wait_list); INIT_LIST_HEAD(&mad_agent_priv->done_list); INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); - INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv); + INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); - INIT_WORK(&mad_agent_priv->local_work, local_completions, - mad_agent_priv); + INIT_WORK(&mad_agent_priv->local_work, local_completions); atomic_set(&mad_agent_priv->refcount, 1); init_completion(&mad_agent_priv->comp); @@ -1750,7 +1749,7 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, */ (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) - return wr; + return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } /* @@ -2198,12 +2197,12 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, /* * IB MAD completion callback */ -static void ib_mad_completion_handler(void *data) +static void ib_mad_completion_handler(struct work_struct *work) { struct ib_mad_port_private *port_priv; struct ib_wc wc; - port_priv = (struct ib_mad_port_private *)data; + port_priv = container_of(work, struct ib_mad_port_private, work); ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) { @@ -2324,7 +2323,7 @@ void ib_cancel_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_cancel_mad); -static void local_completions(void *data) +static void local_completions(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_local_private *local; @@ -2334,7 +2333,8 @@ static void local_completions(void *data) struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; - mad_agent_priv = (struct ib_mad_agent_private *)data; + mad_agent_priv = + container_of(work, struct ib_mad_agent_private, local_work); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->local_list)) { @@ -2434,14 +2434,15 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) return ret; } -static void timeout_sends(void *data) +static void timeout_sends(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags, delay; - mad_agent_priv = (struct ib_mad_agent_private *)data; + mad_agent_priv = container_of(work, struct ib_mad_agent_private, + timed_work.work); mad_send_wc.vendor_err = 0; spin_lock_irqsave(&mad_agent_priv->lock, flags); @@ -2799,7 +2800,7 @@ static int ib_mad_port_open(struct ib_device *device, ret = -ENOMEM; goto error8; } - INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv); + INIT_WORK(&port_priv->work, ib_mad_completion_handler); spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_mad_port_list); @@ -2987,10 +2988,7 @@ error1: static void __exit ib_mad_cleanup_module(void) { ib_unregister_client(&mad_client); - - if (kmem_cache_destroy(ib_mad_cache)) { - printk(KERN_DEBUG PFX "Failed to destroy ib_mad cache\n"); - } + kmem_cache_destroy(ib_mad_cache); } module_init(ib_mad_init_module); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index d06b59083f6e..d5548e73e068 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -102,7 +102,7 @@ struct ib_mad_agent_private { struct list_head send_list; struct list_head wait_list; struct list_head done_list; - struct work_struct timed_work; + struct delayed_work timed_work; unsigned long timeout; struct list_head local_list; struct work_struct local_work; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 1ef79d015a1e..3663fd7022be 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -45,8 +45,8 @@ enum rmpp_state { struct mad_rmpp_recv { struct ib_mad_agent_private *agent; struct list_head list; - struct work_struct timeout_work; - struct work_struct cleanup_work; + struct delayed_work timeout_work; + struct delayed_work cleanup_work; struct completion comp; enum rmpp_state state; spinlock_t lock; @@ -233,9 +233,10 @@ static void nack_recv(struct ib_mad_agent_private *agent, } } -static void recv_timeout_handler(void *data) +static void recv_timeout_handler(struct work_struct *work) { - struct mad_rmpp_recv *rmpp_recv = data; + struct mad_rmpp_recv *rmpp_recv = + container_of(work, struct mad_rmpp_recv, timeout_work.work); struct ib_mad_recv_wc *rmpp_wc; unsigned long flags; @@ -254,9 +255,10 @@ static void recv_timeout_handler(void *data) ib_free_recv_mad(rmpp_wc); } -static void recv_cleanup_handler(void *data) +static void recv_cleanup_handler(struct work_struct *work) { - struct mad_rmpp_recv *rmpp_recv = data; + struct mad_rmpp_recv *rmpp_recv = + container_of(work, struct mad_rmpp_recv, cleanup_work.work); unsigned long flags; spin_lock_irqsave(&rmpp_recv->agent->lock, flags); @@ -285,8 +287,8 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, rmpp_recv->agent = agent; init_completion(&rmpp_recv->comp); - INIT_WORK(&rmpp_recv->timeout_work, recv_timeout_handler, rmpp_recv); - INIT_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler, rmpp_recv); + INIT_DELAYED_WORK(&rmpp_recv->timeout_work, recv_timeout_handler); + INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler); spin_lock_init(&rmpp_recv->lock); rmpp_recv->state = RMPP_STATE_ACTIVE; atomic_set(&rmpp_recv->refcount, 1); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 1706d3c7e95e..e45afba75341 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -360,9 +360,10 @@ static void free_sm_ah(struct kref *kref) kfree(sm_ah); } -static void update_sm_ah(void *port_ptr) +static void update_sm_ah(struct work_struct *work) { - struct ib_sa_port *port = port_ptr; + struct ib_sa_port *port = + container_of(work, struct ib_sa_port, update_task); struct ib_sa_sm_ah *new_ah, *old_ah; struct ib_port_attr port_attr; struct ib_ah_attr ah_attr; @@ -992,8 +993,7 @@ static void ib_sa_add_one(struct ib_device *device) if (IS_ERR(sa_dev->port[i].agent)) goto err; - INIT_WORK(&sa_dev->port[i].update_task, - update_sm_ah, &sa_dev->port[i]); + INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); } ib_set_client_data(device, &sa_client, sa_dev); @@ -1010,7 +1010,7 @@ static void ib_sa_add_one(struct ib_device *device) goto err; for (i = 0; i <= e - s; ++i) - update_sm_ah(&sa_dev->port[i]); + update_sm_ah(&sa_dev->port[i].update_task); return; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index ad4f4d5c2924..f15220a0ee75 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -161,12 +161,14 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) struct ib_ucm_event, ctx_list); list_del(&uevent->file_list); list_del(&uevent->ctx_list); + mutex_unlock(&ctx->file->file_mutex); /* clear incoming connections. */ if (ib_ucm_new_cm_id(uevent->resp.event)) ib_destroy_cm_id(uevent->cm_id); kfree(uevent); + mutex_lock(&ctx->file->file_mutex); } mutex_unlock(&ctx->file->file_mutex); } @@ -328,20 +330,18 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, } if (uvt->data_len) { - uvt->data = kmalloc(uvt->data_len, GFP_KERNEL); + uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL); if (!uvt->data) goto err1; - memcpy(uvt->data, evt->private_data, uvt->data_len); uvt->resp.present |= IB_UCM_PRES_DATA; } if (uvt->info_len) { - uvt->info = kmalloc(uvt->info_len, GFP_KERNEL); + uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL); if (!uvt->info) goto err2; - memcpy(uvt->info, info, uvt->info_len); uvt->resp.present |= IB_UCM_PRES_INFO; } return 0; @@ -685,11 +685,11 @@ out: return result; } -static ssize_t ib_ucm_establish(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) +static ssize_t ib_ucm_notify(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) { - struct ib_ucm_establish cmd; + struct ib_ucm_notify cmd; struct ib_ucm_context *ctx; int result; @@ -700,7 +700,7 @@ static ssize_t ib_ucm_establish(struct ib_ucm_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); - result = ib_cm_establish(ctx->cm_id); + result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event); ib_ucm_ctx_put(ctx); return result; } @@ -1107,7 +1107,7 @@ static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, - [IB_USER_CM_CMD_ESTABLISH] = ib_ucm_establish, + [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify, [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b72c7f69ca90..743247ec065e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1214,7 +1214,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, resp.qp_access_flags = attr->qp_access_flags; resp.pkey_index = attr->pkey_index; resp.alt_pkey_index = attr->alt_pkey_index; - resp.en_sqd_async_notify = attr->en_sqd_async_notify; + resp.sq_draining = attr->sq_draining; resp.max_rd_atomic = attr->max_rd_atomic; resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; resp.min_rnr_timer = attr->min_rnr_timer; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 4e16314e8e6d..a617ca7b6923 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -534,9 +534,9 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, * module reference. */ filp->f_op = fops_get(&uverbs_event_fops); - filp->f_vfsmnt = mntget(uverbs_event_mnt); - filp->f_dentry = dget(uverbs_event_mnt->mnt_root); - filp->f_mapping = filp->f_dentry->d_inode->i_mapping; + filp->f_path.mnt = mntget(uverbs_event_mnt); + filp->f_path.dentry = dget(uverbs_event_mnt->mnt_root); + filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; filp->f_flags = O_RDONLY; filp->f_mode = FMODE_READ; filp->private_data = ev_file; diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c index efe147dbeb42..db12cc0841df 100644 --- a/drivers/infiniband/core/uverbs_mem.c +++ b/drivers/infiniband/core/uverbs_mem.c @@ -179,9 +179,10 @@ void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) up_write(¤t->mm->mmap_sem); } -static void ib_umem_account(void *work_ptr) +static void ib_umem_account(struct work_struct *_work) { - struct ib_umem_account_work *work = work_ptr; + struct ib_umem_account_work *work = + container_of(_work, struct ib_umem_account_work, work); down_write(&work->mm->mmap_sem); work->mm->locked_vm -= work->diff; @@ -216,7 +217,7 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) return; } - INIT_WORK(&work->work, ib_umem_account, work); + INIT_WORK(&work->work, ib_umem_account); work->mm = mm; work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 9e9120f36019..27fe242ed435 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -72,7 +72,7 @@ static int c2_down(struct net_device *netdev); static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev); static void c2_tx_interrupt(struct net_device *netdev); static void c2_rx_interrupt(struct net_device *netdev); -static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs); +static irqreturn_t c2_interrupt(int irq, void *dev_id); static void c2_tx_timeout(struct net_device *netdev); static int c2_change_mtu(struct net_device *netdev, int new_mtu); static void c2_reset(struct c2_port *c2_port); @@ -544,7 +544,7 @@ static void c2_rx_interrupt(struct net_device *netdev) /* * Handle netisr0 TX & RX interrupts. */ -static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t c2_interrupt(int irq, void *dev_id) { unsigned int netisr0, dmaisr; int handled = 0; @@ -1155,7 +1155,8 @@ static int __devinit c2_probe(struct pci_dev *pcidev, goto bail10; } - c2_register_device(c2dev); + if (c2_register_device(c2dev)) + goto bail10; return 0; @@ -1243,7 +1244,7 @@ static struct pci_driver c2_pci_driver = { static int __init c2_init_module(void) { - return pci_module_init(&c2_pci_driver); + return pci_register_driver(&c2_pci_driver); } static void __exit c2_exit_module(void) diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h index 1b17dcdd0505..04a9db5de881 100644 --- a/drivers/infiniband/hw/amso1100/c2.h +++ b/drivers/infiniband/hw/amso1100/c2.h @@ -302,7 +302,7 @@ struct c2_dev { unsigned long pa; /* PA device memory */ void **qptr_array; - kmem_cache_t *host_msg_cache; + struct kmem_cache *host_msg_cache; struct list_head cca_link; /* adapter list */ struct list_head eh_wakeup_list; /* event wakeup list */ diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c index 08f46c83a3a4..a31439bd3b67 100644 --- a/drivers/infiniband/hw/amso1100/c2_ae.c +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -66,7 +66,6 @@ static int c2_convert_cm_status(u32 c2_status) } } -#ifdef DEBUG static const char* to_event_str(int event) { static const char* event_str[] = { @@ -144,7 +143,6 @@ static const char *to_qp_state_str(int state) return "<invalid QP state>"; }; } -#endif void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) { @@ -197,7 +195,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) "resource=%x, qp_state=%s\n", __FUNCTION__, to_event_str(event_id), - be64_to_cpu(wr->ae.ae_generic.user_context), + (unsigned long long) be64_to_cpu(wr->ae.ae_generic.user_context), be32_to_cpu(wr->ae.ae_generic.resource_type), be32_to_cpu(wr->ae.ae_generic.resource), to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state))); diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c index 1d2529992c0c..0315f99e4191 100644 --- a/drivers/infiniband/hw/amso1100/c2_alloc.c +++ b/drivers/infiniband/hw/amso1100/c2_alloc.c @@ -42,13 +42,14 @@ static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask, { int i; struct sp_chunk *new_head; + dma_addr_t dma_addr; - new_head = (struct sp_chunk *) __get_free_page(gfp_mask); + new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE, + &dma_addr, gfp_mask); if (new_head == NULL) return -ENOMEM; - new_head->dma_addr = dma_map_single(c2dev->ibdev.dma_device, new_head, - PAGE_SIZE, DMA_FROM_DEVICE); + new_head->dma_addr = dma_addr; pci_unmap_addr_set(new_head, mapping, new_head->dma_addr); new_head->next = NULL; @@ -80,10 +81,8 @@ void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root) while (root) { next = root->next; - dma_unmap_single(c2dev->ibdev.dma_device, - pci_unmap_addr(root, mapping), PAGE_SIZE, - DMA_FROM_DEVICE); - __free_page((struct page *) root); + dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root, + pci_unmap_addr(root, mapping)); root = next; } } @@ -115,7 +114,7 @@ u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, ((unsigned long) &(head->shared_ptr[mqsp]) - (unsigned long) head); pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__, - &(head->shared_ptr[mqsp]), (u64)*dma_addr); + &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr); return &(head->shared_ptr[mqsp]); } return NULL; diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c index 485254efdd1e..75b93e9b8810 100644 --- a/drivers/infiniband/hw/amso1100/c2_cm.c +++ b/drivers/infiniband/hw/amso1100/c2_cm.c @@ -302,7 +302,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) vq_req = vq_req_alloc(c2dev); if (!vq_req) { err = -ENOMEM; - goto bail1; + goto bail0; } vq_req->qp = qp; vq_req->cm_id = cm_id; @@ -311,7 +311,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); if (!wr) { err = -ENOMEM; - goto bail2; + goto bail1; } /* Build the WR */ @@ -331,7 +331,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) /* Validate private_data length */ if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { err = -EINVAL; - goto bail2; + goto bail1; } if (iw_param->private_data) { @@ -348,19 +348,19 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) err = vq_send_wr(c2dev, (union c2wr *) wr); if (err) { vq_req_put(c2dev, vq_req); - goto bail2; + goto bail1; } /* Wait for reply from adapter */ err = vq_wait_for_reply(c2dev, vq_req); if (err) - goto bail2; + goto bail1; /* Check that reply is present */ reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg; if (!reply) { err = -ENOMEM; - goto bail2; + goto bail1; } err = c2_errno(reply); @@ -368,9 +368,8 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) if (!err) c2_set_qp_state(qp, C2_QP_STATE_RTS); - bail2: - kfree(wr); bail1: + kfree(wr); vq_req_free(c2dev, vq_req); bail0: if (err) { diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index 9d7bcc5ade93..05c9154d46f4 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -246,20 +246,17 @@ int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) { - - dma_unmap_single(c2dev->ibdev.dma_device, pci_unmap_addr(mq, mapping), - mq->q_size * mq->msg_size, DMA_FROM_DEVICE); - free_pages((unsigned long) mq->msg_pool.host, - get_order(mq->q_size * mq->msg_size)); + dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size, + mq->msg_pool.host, pci_unmap_addr(mq, mapping)); } static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, int msg_size) { - unsigned long pool_start; + u8 *pool_start; - pool_start = __get_free_pages(GFP_KERNEL, - get_order(q_size * msg_size)); + pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size, + &mq->host_dma, GFP_KERNEL); if (!pool_start) return -ENOMEM; @@ -267,13 +264,10 @@ static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, 0, /* index (currently unknown) */ q_size, msg_size, - (u8 *) pool_start, + pool_start, NULL, /* peer (currently unknown) */ C2_MQ_HOST_TARGET); - mq->host_dma = dma_map_single(c2dev->ibdev.dma_device, - (void *)pool_start, - q_size * msg_size, DMA_FROM_DEVICE); pci_unmap_addr_set(mq, mapping, mq->host_dma); return 0; diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index dd6af551108b..fef972752912 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -390,14 +390,18 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, } mr = kmalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) + if (!mr) { + vfree(page_list); return ERR_PTR(-ENOMEM); + } mr->pd = to_c2pd(ib_pd); pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " "*iova_start %llx, first pa %llx, last pa %llx\n", __FUNCTION__, page_shift, pbl_depth, total_len, - *iova_start, page_list[0], page_list[pbl_depth-1]); + (unsigned long long) *iova_start, + (unsigned long long) page_list[0], + (unsigned long long) page_list[pbl_depth-1]); err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list, (1 << page_shift), pbl_depth, total_len, 0, iova_start, @@ -753,20 +757,17 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) int c2_register_device(struct c2_dev *dev) { - int ret; + int ret = -ENOMEM; int i; /* Register pseudo network device */ dev->pseudo_netdev = c2_pseudo_netdev_init(dev); - if (dev->pseudo_netdev) { - ret = register_netdev(dev->pseudo_netdev); - if (ret) { - printk(KERN_ERR PFX - "Unable to register netdev, ret = %d\n", ret); - free_netdev(dev->pseudo_netdev); - return ret; - } - } + if (!dev->pseudo_netdev) + goto out3; + + ret = register_netdev(dev->pseudo_netdev); + if (ret) + goto out2; pr_debug("%s:%u\n", __FUNCTION__, __LINE__); strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX); @@ -844,21 +845,25 @@ int c2_register_device(struct c2_dev *dev) ret = ib_register_device(&dev->ibdev); if (ret) - return ret; + goto out1; for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) { ret = class_device_create_file(&dev->ibdev.class_dev, c2_class_attributes[i]); - if (ret) { - unregister_netdev(dev->pseudo_netdev); - free_netdev(dev->pseudo_netdev); - ib_unregister_device(&dev->ibdev); - return ret; - } + if (ret) + goto out0; } + goto out3; - pr_debug("%s:%u\n", __FUNCTION__, __LINE__); - return 0; +out0: + ib_unregister_device(&dev->ibdev); +out1: + unregister_netdev(dev->pseudo_netdev); +out2: + free_netdev(dev->pseudo_netdev); +out3: + pr_debug("%s:%u ret=%d\n", __FUNCTION__, __LINE__, ret); + return ret; } void c2_unregister_device(struct c2_dev *dev) diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c index 12261132b077..179d005ed4a5 100644 --- a/drivers/infiniband/hw/amso1100/c2_qp.c +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -35,6 +35,8 @@ * */ +#include <linux/delay.h> + #include "c2.h" #include "c2_vq.h" #include "c2_status.h" @@ -562,6 +564,32 @@ int c2_alloc_qp(struct c2_dev *c2dev, return err; } +static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq) +{ + if (send_cq == recv_cq) + spin_lock_irq(&send_cq->lock); + else if (send_cq > recv_cq) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); + } +} + +static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq) +{ + if (send_cq == recv_cq) + spin_unlock_irq(&send_cq->lock); + else if (send_cq > recv_cq) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } +} + void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp) { struct c2_cq *send_cq; @@ -574,15 +602,9 @@ void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp) * Lock CQs here, so that CQ polling code can do QP lookup * without taking a lock. */ - spin_lock_irq(&send_cq->lock); - if (send_cq != recv_cq) - spin_lock(&recv_cq->lock); - + c2_lock_cqs(send_cq, recv_cq); c2_free_qpn(c2dev, qp->qpn); - - if (send_cq != recv_cq) - spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + c2_unlock_cqs(send_cq, recv_cq); /* * Destory qp in the rnic... @@ -705,10 +727,8 @@ static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared) * cannot get on the bus and the card and system hang in a * deadlock -- thus the need for this code. [TOT] */ - while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(0); - } + while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) + udelay(10); __raw_writel(C2_HINT_MAKE(mq_index, shared), c2dev->regs + PCI_BAR0_ADAPTER_HINT); @@ -766,6 +786,7 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, struct c2_dev *c2dev = to_c2dev(ibqp->device); struct c2_qp *qp = to_c2qp(ibqp); union c2wr wr; + unsigned long lock_flags; int err = 0; u32 flags; @@ -881,8 +902,10 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, /* * Post the puppy! */ + spin_lock_irqsave(&qp->lock, lock_flags); err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size); if (err) { + spin_unlock_irqrestore(&qp->lock, lock_flags); break; } @@ -890,6 +913,7 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, * Enqueue mq index to activity FIFO. */ c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count); + spin_unlock_irqrestore(&qp->lock, lock_flags); ib_wr = ib_wr->next; } @@ -905,6 +929,7 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, struct c2_dev *c2dev = to_c2dev(ibqp->device); struct c2_qp *qp = to_c2qp(ibqp); union c2wr wr; + unsigned long lock_flags; int err = 0; if (qp->state > IB_QPS_RTS) @@ -945,8 +970,10 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, break; } + spin_lock_irqsave(&qp->lock, lock_flags); err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size); if (err) { + spin_unlock_irqrestore(&qp->lock, lock_flags); break; } @@ -954,6 +981,7 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, * Enqueue mq index to activity FIFO */ c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count); + spin_unlock_irqrestore(&qp->lock, lock_flags); ib_wr = ib_wr->next; } diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index f49a32b7a8f6..1687c511cb2f 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -150,15 +150,15 @@ static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props) (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg); if (!reply) err = -ENOMEM; - - err = c2_errno(reply); + else + err = c2_errno(reply); if (err) goto bail2; props->fw_ver = ((u64)be32_to_cpu(reply->fw_ver_major) << 32) | - ((be32_to_cpu(reply->fw_ver_minor) && 0xFFFF) << 16) | - (be32_to_cpu(reply->fw_ver_patch) && 0xFFFF); + ((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) | + (be32_to_cpu(reply->fw_ver_patch) & 0xFFFF); memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6); props->max_mr_size = 0xFFFFFFFF; props->page_size_cap = ~(C2_MIN_PAGESIZE-1); @@ -441,7 +441,7 @@ static int c2_rnic_close(struct c2_dev *c2dev) * involves initalizing the various limits and resouce pools that * comprise the RNIC instance. */ -int c2_rnic_init(struct c2_dev *c2dev) +int __devinit c2_rnic_init(struct c2_dev *c2dev) { int err; u32 qsize, msgsize; @@ -517,17 +517,15 @@ int c2_rnic_init(struct c2_dev *c2dev) /* Initialize the Verbs Reply Queue */ qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE)); msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE)); - q1_pages = kmalloc(qsize * msgsize, GFP_KERNEL); + q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize, + &c2dev->rep_vq.host_dma, GFP_KERNEL); if (!q1_pages) { err = -ENOMEM; goto bail1; } - c2dev->rep_vq.host_dma = dma_map_single(c2dev->ibdev.dma_device, - (void *)q1_pages, qsize * msgsize, - DMA_FROM_DEVICE); pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages, - (u64)c2dev->rep_vq.host_dma); + (unsigned long long) c2dev->rep_vq.host_dma); c2_mq_rep_init(&c2dev->rep_vq, 1, qsize, @@ -540,17 +538,15 @@ int c2_rnic_init(struct c2_dev *c2dev) /* Initialize the Asynchronus Event Queue */ qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE)); msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE)); - q2_pages = kmalloc(qsize * msgsize, GFP_KERNEL); + q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize, + &c2dev->aeq.host_dma, GFP_KERNEL); if (!q2_pages) { err = -ENOMEM; goto bail2; } - c2dev->aeq.host_dma = dma_map_single(c2dev->ibdev.dma_device, - (void *)q2_pages, qsize * msgsize, - DMA_FROM_DEVICE); pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); - pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages, - (u64)c2dev->rep_vq.host_dma); + pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q2_pages, + (unsigned long long) c2dev->aeq.host_dma); c2_mq_rep_init(&c2dev->aeq, 2, qsize, @@ -597,17 +593,13 @@ int c2_rnic_init(struct c2_dev *c2dev) bail4: vq_term(c2dev); bail3: - dma_unmap_single(c2dev->ibdev.dma_device, - pci_unmap_addr(&c2dev->aeq, mapping), - c2dev->aeq.q_size * c2dev->aeq.msg_size, - DMA_FROM_DEVICE); - kfree(q2_pages); + dma_free_coherent(&c2dev->pcidev->dev, + c2dev->aeq.q_size * c2dev->aeq.msg_size, + q2_pages, pci_unmap_addr(&c2dev->aeq, mapping)); bail2: - dma_unmap_single(c2dev->ibdev.dma_device, - pci_unmap_addr(&c2dev->rep_vq, mapping), - c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, - DMA_FROM_DEVICE); - kfree(q1_pages); + dma_free_coherent(&c2dev->pcidev->dev, + c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, + q1_pages, pci_unmap_addr(&c2dev->rep_vq, mapping)); bail1: c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); bail0: @@ -619,7 +611,7 @@ int c2_rnic_init(struct c2_dev *c2dev) /* * Called by c2_remove to cleanup the RNIC resources. */ -void c2_rnic_term(struct c2_dev *c2dev) +void __devexit c2_rnic_term(struct c2_dev *c2dev) { /* Close the open adapter instance */ @@ -640,19 +632,17 @@ void c2_rnic_term(struct c2_dev *c2dev) /* Free the verbs request allocator */ vq_term(c2dev); - /* Unmap and free the asynchronus event queue */ - dma_unmap_single(c2dev->ibdev.dma_device, - pci_unmap_addr(&c2dev->aeq, mapping), - c2dev->aeq.q_size * c2dev->aeq.msg_size, - DMA_FROM_DEVICE); - kfree(c2dev->aeq.msg_pool.host); - - /* Unmap and free the verbs reply queue */ - dma_unmap_single(c2dev->ibdev.dma_device, - pci_unmap_addr(&c2dev->rep_vq, mapping), - c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, - DMA_FROM_DEVICE); - kfree(c2dev->rep_vq.msg_pool.host); + /* Free the asynchronus event queue */ + dma_free_coherent(&c2dev->pcidev->dev, + c2dev->aeq.q_size * c2dev->aeq.msg_size, + c2dev->aeq.msg_pool.host, + pci_unmap_addr(&c2dev->aeq, mapping)); + + /* Free the verbs reply queue */ + dma_free_coherent(&c2dev->pcidev->dev, + c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, + c2dev->rep_vq.msg_pool.host, + pci_unmap_addr(&c2dev->rep_vq, mapping)); /* Free the MQ shared pointer pool */ c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c index 40caeb5f41b4..36620a22413c 100644 --- a/drivers/infiniband/hw/amso1100/c2_vq.c +++ b/drivers/infiniband/hw/amso1100/c2_vq.c @@ -164,7 +164,7 @@ void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r) */ void *vq_repbuf_alloc(struct c2_dev *c2dev) { - return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC); + return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC); } /* diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig index 922389b64394..727b10d89686 100644 --- a/drivers/infiniband/hw/ehca/Kconfig +++ b/drivers/infiniband/hw/ehca/Kconfig @@ -10,6 +10,7 @@ config INFINIBAND_EHCA config INFINIBAND_EHCA_SCALING bool "Scaling support (EXPERIMENTAL)" depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL + default y ---help--- eHCA scaling support schedules the CQ callbacks to different CPUs. diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 3bac197f9014..0d6e2c4bb245 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -57,7 +57,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); - av = kmem_cache_alloc(av_cache, SLAB_KERNEL); + av = kmem_cache_alloc(av_cache, GFP_KERNEL); if (!av) { ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", pd, ah_attr); @@ -118,8 +118,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); } - /* for the time being we use a hard coded PMTU of 2048 Bytes */ - av->av.pmtu = 4; + av->av.pmtu = EHCA_MAX_MTU; /* dgid comes in grh.word_3 */ memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, @@ -193,7 +192,7 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); } - new_ehca_av.pmtu = 4; /* see also comment in create_ah() */ + new_ehca_av.pmtu = EHCA_MAX_MTU; memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, sizeof(ah_attr->grh.dgid)); diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 458fe19648a1..93995b658d94 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -134,7 +134,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); - my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL); + my_cq = kmem_cache_alloc(cq_cache, GFP_KERNEL); if (!my_cq) { ehca_err(device, "Out of memory for ehca_cq struct device=%p", device); diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 5eae6ac48425..e1b618c5f685 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -40,6 +40,7 @@ */ #include "ehca_tools.h" +#include "ehca_iverbs.h" #include "hcp_if.h" int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) @@ -49,7 +50,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) ib_device); struct hipz_query_hca *rblock; - rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + rblock = ehca_alloc_fw_ctrlblock(); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -96,7 +97,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); query_device1: - kfree(rblock); + ehca_free_fw_ctrlblock(rblock); return ret; } @@ -109,7 +110,7 @@ int ehca_query_port(struct ib_device *ibdev, ib_device); struct hipz_query_port *rblock; - rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + rblock = ehca_alloc_fw_ctrlblock(); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -162,7 +163,7 @@ int ehca_query_port(struct ib_device *ibdev, props->active_speed = 0x1; query_port1: - kfree(rblock); + ehca_free_fw_ctrlblock(rblock); return ret; } @@ -178,7 +179,7 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) return -EINVAL; } - rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + rblock = ehca_alloc_fw_ctrlblock(); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -193,7 +194,7 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); query_pkey1: - kfree(rblock); + ehca_free_fw_ctrlblock(rblock); return ret; } @@ -211,7 +212,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, return -EINVAL; } - rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + rblock = ehca_alloc_fw_ctrlblock(); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -227,7 +228,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); query_gid1: - kfree(rblock); + ehca_free_fw_ctrlblock(rblock); return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 2a65b5be1979..c3ea746e9045 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -45,6 +45,7 @@ #include "ehca_tools.h" #include "hcp_if.h" #include "hipz_fns.h" +#include "ipz_pt_fn.h" #define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) #define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) @@ -137,38 +138,36 @@ int ehca_error_data(struct ehca_shca *shca, void *data, u64 *rblock; unsigned long block_count; - rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + rblock = ehca_alloc_fw_ctrlblock(); if (!rblock) { ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); ret = -ENOMEM; goto error_data1; } + /* rblock must be 4K aligned and should be 4K large */ ret = hipz_h_error_data(shca->ipz_hca_handle, resource, rblock, &block_count); - if (ret == H_R_STATE) { + if (ret == H_R_STATE) ehca_err(&shca->ib_device, "No error data is available: %lx.", resource); - } else if (ret == H_SUCCESS) { int length; length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); - if (length > PAGE_SIZE) - length = PAGE_SIZE; + if (length > EHCA_PAGESIZE) + length = EHCA_PAGESIZE; print_error_data(shca, data, rblock, length); - } - else { + } else ehca_err(&shca->ib_device, "Error data could not be fetched: %lx", resource); - } - kfree(rblock); + ehca_free_fw_ctrlblock(rblock); error_data1: return ret; @@ -360,7 +359,7 @@ static inline void reset_eq_pending(struct ehca_cq *cq) return; } -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) { struct ehca_shca *shca = (struct ehca_shca*)dev_id; @@ -393,7 +392,7 @@ void ehca_tasklet_neq(unsigned long data) return; } -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) { struct ehca_shca *shca = (struct ehca_shca*)dev_id; diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 85bf1fe16fe4..be579cc0adf6 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -51,10 +51,10 @@ struct ehca_shca; int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs); +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id); void ehca_tasklet_neq(unsigned long data); -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs); +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); void ehca_tasklet_eq(unsigned long data); struct ehca_cpu_comp_task { diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 319c39d47f3a..3720e3032cce 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -179,4 +179,12 @@ int ehca_mmap_register(u64 physical,void **mapped, int ehca_munmap(unsigned long addr, size_t len); +#ifdef CONFIG_PPC_64K_PAGES +void *ehca_alloc_fw_ctrlblock(void); +void ehca_free_fw_ctrlblock(void *ptr); +#else +#define ehca_alloc_fw_ctrlblock() ((void *) get_zeroed_page(GFP_KERNEL)) +#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) +#endif + #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 2380994418a5..cc47e4c13a18 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -40,6 +40,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#ifdef CONFIG_PPC_64K_PAGES +#include <linux/slab.h> +#endif #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" @@ -49,7 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0016"); +MODULE_VERSION("SVNEHCA_0019"); int ehca_open_aqp1 = 0; int ehca_debug_level = 0; @@ -94,11 +97,31 @@ spinlock_t ehca_cq_idr_lock; DEFINE_IDR(ehca_qp_idr); DEFINE_IDR(ehca_cq_idr); + static struct list_head shca_list; /* list of all registered ehcas */ static spinlock_t shca_list_lock; static struct timer_list poll_eqs_timer; +#ifdef CONFIG_PPC_64K_PAGES +static struct kmem_cache *ctblk_cache = NULL; + +void *ehca_alloc_fw_ctrlblock(void) +{ + void *ret = kmem_cache_zalloc(ctblk_cache, GFP_KERNEL); + if (!ret) + ehca_gen_err("Out of memory for ctblk"); + return ret; +} + +void ehca_free_fw_ctrlblock(void *ptr) +{ + if (ptr) + kmem_cache_free(ctblk_cache, ptr); + +} +#endif + static int ehca_create_slab_caches(void) { int ret; @@ -133,6 +156,17 @@ static int ehca_create_slab_caches(void) goto create_slab_caches5; } +#ifdef CONFIG_PPC_64K_PAGES + ctblk_cache = kmem_cache_create("ehca_cache_ctblk", + EHCA_PAGESIZE, H_CB_ALIGNMENT, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!ctblk_cache) { + ehca_gen_err("Cannot create ctblk SLAB cache."); + ehca_cleanup_mrmw_cache(); + goto create_slab_caches5; + } +#endif return 0; create_slab_caches5: @@ -157,6 +191,10 @@ static void ehca_destroy_slab_caches(void) ehca_cleanup_qp_cache(); ehca_cleanup_cq_cache(); ehca_cleanup_pd_cache(); +#ifdef CONFIG_PPC_64K_PAGES + if (ctblk_cache) + kmem_cache_destroy(ctblk_cache); +#endif } #define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) @@ -168,7 +206,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) u64 h_ret; struct hipz_query_hca *rblock; - rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + rblock = ehca_alloc_fw_ctrlblock(); if (!rblock) { ehca_gen_err("Cannot allocate rblock memory."); return -ENOMEM; @@ -211,7 +249,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) shca->sport[1].rate = IB_RATE_30_GBPS; num_ports1: - kfree(rblock); + ehca_free_fw_ctrlblock(rblock); return ret; } @@ -220,7 +258,7 @@ static int init_node_guid(struct ehca_shca *shca) int ret = 0; struct hipz_query_hca *rblock; - rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + rblock = ehca_alloc_fw_ctrlblock(); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -235,11 +273,11 @@ static int init_node_guid(struct ehca_shca *shca) memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); init_node_guid1: - kfree(rblock); + ehca_free_fw_ctrlblock(rblock); return ret; } -int ehca_register_device(struct ehca_shca *shca) +int ehca_init_device(struct ehca_shca *shca) { int ret; @@ -317,11 +355,6 @@ int ehca_register_device(struct ehca_shca *shca) /* shca->ib_device.process_mad = ehca_process_mad; */ shca->ib_device.mmap = ehca_mmap; - ret = ib_register_device(&shca->ib_device); - if (ret) - ehca_err(&shca->ib_device, - "ib_register_device() failed ret=%x", ret); - return ret; } @@ -436,7 +469,7 @@ static ssize_t ehca_show_##name(struct device *dev, \ \ shca = dev->driver_data; \ \ - rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); \ + rblock = ehca_alloc_fw_ctrlblock(); \ if (!rblock) { \ dev_err(dev, "Can't allocate rblock memory."); \ return 0; \ @@ -444,12 +477,12 @@ static ssize_t ehca_show_##name(struct device *dev, \ \ if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ dev_err(dev, "Can't query device properties"); \ - kfree(rblock); \ + ehca_free_fw_ctrlblock(rblock); \ return 0; \ } \ \ data = rblock->name; \ - kfree(rblock); \ + ehca_free_fw_ctrlblock(rblock); \ \ if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ return snprintf(buf, 256, "1\n"); \ @@ -561,9 +594,9 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, goto probe1; } - ret = ehca_register_device(shca); + ret = ehca_init_device(shca); if (ret) { - ehca_gen_err("Cannot register Infiniband device"); + ehca_gen_err("Cannot init ehca device struct"); goto probe1; } @@ -571,7 +604,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); if (ret) { ehca_err(&shca->ib_device, "Cannot create EQ."); - goto probe2; + goto probe1; } ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); @@ -600,6 +633,13 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, goto probe5; } + ret = ib_register_device(&shca->ib_device); + if (ret) { + ehca_err(&shca->ib_device, + "ib_register_device() failed ret=%x", ret); + goto probe6; + } + /* create AQP1 for port 1 */ if (ehca_open_aqp1 == 1) { shca->sport[0].port_state = IB_PORT_DOWN; @@ -607,7 +647,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, if (ret) { ehca_err(&shca->ib_device, "Cannot create AQP1 for port 1."); - goto probe6; + goto probe7; } } @@ -618,7 +658,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, if (ret) { ehca_err(&shca->ib_device, "Cannot create AQP1 for port 2."); - goto probe7; + goto probe8; } } @@ -630,12 +670,15 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, return 0; -probe7: +probe8: ret = ehca_destroy_aqp1(&shca->sport[0]); if (ret) ehca_err(&shca->ib_device, "Cannot destroy AQP1 for port 1. ret=%x", ret); +probe7: + ib_unregister_device(&shca->ib_device); + probe6: ret = ehca_dereg_internal_maxmr(shca); if (ret) @@ -660,9 +703,6 @@ probe3: ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret); -probe2: - ib_unregister_device(&shca->ib_device); - probe1: ib_dealloc_device(&shca->ib_device); @@ -750,7 +790,7 @@ int __init ehca_module_init(void) int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0016)\n"); + "(Rel.: SVNEHCA_0019)\n"); idr_init(&ehca_qp_idr); idr_init(&ehca_cq_idr); spin_lock_init(&ehca_qp_idr_lock); diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 5ca65441e1da..0a5e2214cc5f 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -53,7 +53,7 @@ static struct ehca_mr *ehca_mr_new(void) { struct ehca_mr *me; - me = kmem_cache_alloc(mr_cache, SLAB_KERNEL); + me = kmem_cache_alloc(mr_cache, GFP_KERNEL); if (me) { memset(me, 0, sizeof(struct ehca_mr)); spin_lock_init(&me->mrlock); @@ -72,7 +72,7 @@ static struct ehca_mw *ehca_mw_new(void) { struct ehca_mw *me; - me = kmem_cache_alloc(mw_cache, SLAB_KERNEL); + me = kmem_cache_alloc(mw_cache, GFP_KERNEL); if (me) { memset(me, 0, sizeof(struct ehca_mw)); spin_lock_init(&me->mwlock); @@ -1013,7 +1013,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, u32 i; u64 *kpage; - kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + kpage = ehca_alloc_fw_ctrlblock(); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); ret = -ENOMEM; @@ -1092,7 +1092,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, ehca_reg_mr_rpages_exit1: - kfree(kpage); + ehca_free_fw_ctrlblock(kpage); ehca_reg_mr_rpages_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " @@ -1124,7 +1124,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); - kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + kpage = ehca_alloc_fw_ctrlblock(); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); ret = -ENOMEM; @@ -1181,7 +1181,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, } ehca_rereg_mr_rereg1_exit1: - kfree(kpage); + ehca_free_fw_ctrlblock(kpage); ehca_rereg_mr_rereg1_exit0: if ( ret && (ret != -EAGAIN) ) ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c index 2c3cdc6f7b39..d5345e5b3cd6 100644 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -50,7 +50,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device, { struct ehca_pd *pd; - pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL); + pd = kmem_cache_alloc(pd_cache, GFP_KERNEL); if (!pd) { ehca_err(device, "device=%p context=%p out of memory", device, context); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 4394123cdbd7..c6c9cef203e3 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -450,7 +450,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, if (pd->uobject && udata) context = pd->uobject->context; - my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL); + my_qp = kmem_cache_alloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); return ERR_PTR(-ENOMEM); @@ -732,8 +732,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, u64 h_ret; struct ipz_queue *squeue; void *bad_send_wqe_p, *bad_send_wqe_v; - void *squeue_start_p, *squeue_end_p; - void *squeue_start_v, *squeue_end_v; + u64 q_ofs; struct ehca_wqe *wqe; int qp_num = my_qp->ib_qp.qp_num; @@ -755,26 +754,23 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, if (ehca_debug_level) ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); squeue = &my_qp->ipz_squeue; - squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L)); - squeue_end_p = squeue_start_p+squeue->queue_length; - squeue_start_v = abs_to_virt((u64)squeue_start_p); - squeue_end_v = abs_to_virt((u64)squeue_end_p); - ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p", - qp_num, squeue_start_v, squeue_end_v); + if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { + ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x" + " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); + return -EFAULT; + } /* loop sets wqe's purge bit */ - wqe = (struct ehca_wqe*)bad_send_wqe_v; + wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = 0; while (wqe->optype != 0xff && wqe->wqef != 0xff) { if (ehca_debug_level) ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); wqe->nr_of_data_seg = 0; /* suppress data access */ wqe->wqef = WQEF_PURGE; /* WQE to be purged */ - wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size); + q_ofs = ipz_queue_advance_offset(squeue, q_ofs); + wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = (*bad_wqe_cnt)+1; - if ((void*)wqe >= squeue_end_v) { - wqe = squeue_start_v; - } } /* * bad wqe will be reprocessed and ignored when pol_cq() is called, @@ -811,8 +807,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, unsigned long spl_flags = 0; /* do query_qp to obtain current attr values */ - mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); - if (mqpcb == NULL) { + mqpcb = ehca_alloc_fw_ctrlblock(); + if (!mqpcb) { ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); return -ENOMEM; @@ -1225,7 +1221,7 @@ modify_qp_exit2: } modify_qp_exit1: - kfree(mqpcb); + ehca_free_fw_ctrlblock(mqpcb); return ret; } @@ -1277,7 +1273,7 @@ int ehca_query_qp(struct ib_qp *qp, return -EINVAL; } - qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL ); + qpcb = ehca_alloc_fw_ctrlblock(); if (!qpcb) { ehca_err(qp->device,"Out of memory for qpcb " "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); @@ -1401,7 +1397,7 @@ int ehca_query_qp(struct ib_qp *qp, ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); query_qp_exit1: - kfree(qpcb); + ehca_free_fw_ctrlblock(qpcb); return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 9f56bb846d93..973c4b591545 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -63,6 +63,7 @@ #include <asm/ibmebus.h> #include <asm/io.h> #include <asm/pgtable.h> +#include <asm/hvcall.h> extern int ehca_debug_level; @@ -117,7 +118,7 @@ extern int ehca_debug_level; unsigned int l = (unsigned int)(len); \ unsigned char *deb = (unsigned char*)(adr); \ for (x = 0; x < l; x += 16) { \ - printk("EHCA_DMP:%s" format \ + printk("EHCA_DMP:%s " format \ " adr=%p ofs=%04x %016lx %016lx\n", \ __FUNCTION__, ##args, deb, x, \ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index 3fc92b031c50..fad91368dc5a 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -45,6 +45,8 @@ #include "ehca_tools.h" +#define EHCA_MAX_MTU 4 + /* QP Table Entry Memory Map */ struct hipz_qptemm { u64 qpx_hcr; diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index e028ff1588cc..bf7a40088f61 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -70,6 +70,19 @@ void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) return ret; } +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) +{ + int i; + for (i = 0; i < queue->queue_length / queue->pagesize; i++) { + u64 page = (u64)virt_to_abs(queue->queue_pages[i]); + if (addr >= page && addr < page + queue->pagesize) { + *q_offset = addr - page + i * queue->pagesize; + return 0; + } + } + return -EINVAL; +} + int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, const u32 pagesize, const u32 qe_size, const u32 nr_of_sg) diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 2f13509d5257..dc3bda2634b7 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -150,6 +150,21 @@ static inline void *ipz_qeit_reset(struct ipz_queue *queue) return ipz_qeit_get(queue); } +/* + * return the q_offset corresponding to an absolute address + */ +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset); + +/* + * return the next queue offset. don't modify the queue. + */ +static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset) +{ + offset += queue->qe_size; + if (offset >= queue->queue_length) offset = 0; + return offset; +} + /* struct generic page table */ struct ipz_pt { u64 entries[EHCA_PT_ENTRIES]; diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 574a678e7fdd..90c14543677d 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_IPATH tristate "QLogic InfiniPath Driver" - depends on PCI_MSI && 64BIT && INFINIBAND + depends on (PCI_MSI || HT_IRQ) && 64BIT && INFINIBAND && NET ---help--- This is a driver for QLogic InfiniPath host channel adapters, including InfiniBand verbs support. This driver allows these diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index 5e29cb0095e5..7dc10551cf18 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -10,8 +10,6 @@ ib_ipath-y := \ ipath_eeprom.o \ ipath_file_ops.o \ ipath_fs.o \ - ipath_iba6110.o \ - ipath_iba6120.o \ ipath_init_chip.o \ ipath_intr.o \ ipath_keys.o \ @@ -31,5 +29,8 @@ ib_ipath-y := \ ipath_verbs_mcast.o \ ipath_verbs.o +ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o +ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o + ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index f577905e3aca..54139d398181 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -141,8 +141,9 @@ struct infinipath_stats { * packets if ipath not configured, etc.) */ __u64 sps_krdrops; + __u64 sps_txeparity; /* PIO buffer parity error, recovered */ /* pad for future growth */ - __u64 __sps_pad[46]; + __u64 __sps_pad[45]; }; /* @@ -185,6 +186,9 @@ typedef enum _ipath_ureg { #define IPATH_RUNTIME_PCIE 0x2 #define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 #define IPATH_RUNTIME_RCVHDR_COPY 0x8 +#define IPATH_RUNTIME_MASTER 0x10 +#define IPATH_RUNTIME_PBC_REWRITE 0x20 +#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40 /* * This structure is returned by ipath_userinit() immediately after @@ -202,7 +206,8 @@ struct ipath_base_info { /* version of software, for feature checking. */ __u32 spi_sw_version; /* InfiniPath port assigned, goes into sent packets */ - __u32 spi_port; + __u16 spi_port; + __u16 spi_subport; /* * IB MTU, packets IB data must be less than this. * The MTU is in bytes, and will be a multiple of 4 bytes. @@ -218,7 +223,7 @@ struct ipath_base_info { __u32 spi_tidcnt; /* size of the TID Eager list in infinipath, in entries */ __u32 spi_tidegrcnt; - /* size of a single receive header queue entry. */ + /* size of a single receive header queue entry in words. */ __u32 spi_rcvhdrent_size; /* * Count of receive header queue entries allocated. @@ -310,6 +315,12 @@ struct ipath_base_info { __u32 spi_filler_for_align; /* address of readonly memory copy of the rcvhdrq tail register. */ __u64 spi_rcvhdr_tailaddr; + + /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */ + __u64 spi_subport_uregbase; + __u64 spi_subport_rcvegrbuf; + __u64 spi_subport_rcvhdr_base; + } __attribute__ ((aligned(8))); @@ -328,12 +339,12 @@ struct ipath_base_info { /* * Minor version differences are always compatible - * a within a major version, however if if user software is larger + * a within a major version, however if user software is larger * than driver software, some new features and/or structure fields * may not be implemented; the user code must deal with this if it - * cares, or it must abort after initialization reports the difference + * cares, or it must abort after initialization reports the difference. */ -#define IPATH_USER_SWMINOR 2 +#define IPATH_USER_SWMINOR 3 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) @@ -379,7 +390,16 @@ struct ipath_user_info { */ __u32 spu_rcvhdrsize; - __u64 spu_unused; /* kept for compatible layout */ + /* + * If two or more processes wish to share a port, each process + * must set the spu_subport_cnt and spu_subport_id to the same + * values. The only restriction on the spu_subport_id is that + * it be unique for a given node. + */ + __u16 spu_subport_cnt; + __u16 spu_subport_id; + + __u32 spu_unused; /* kept for compatible layout */ /* * address of struct base_info to write to @@ -392,19 +412,25 @@ struct ipath_user_info { #define IPATH_CMD_MIN 16 -#define IPATH_CMD_USER_INIT 16 /* set up userspace */ +#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */ #define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */ #define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */ #define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ #define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ #define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ +#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */ +#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ +#define IPATH_CMD_USER_INIT 24 /* set up userspace */ -#define IPATH_CMD_MAX 21 +#define IPATH_CMD_MAX 24 struct ipath_port_info { __u32 num_active; /* number of active units */ __u32 unit; /* unit (chip) assigned to caller */ - __u32 port; /* port on unit assigned to caller */ + __u16 port; /* port on unit assigned to caller */ + __u16 subport; /* subport on unit assigned to caller */ + __u16 num_ports; /* number of ports available on unit */ + __u16 num_subports; /* number of subport slaves opened on port */ }; struct ipath_tid_info { @@ -435,6 +461,8 @@ struct ipath_cmd { __u32 recv_ctrl; /* partition key to set */ __u16 part_key; + /* user address of __u32 bitmask of active slaves */ + __u64 slave_mask_addr; } cmd; }; @@ -596,6 +624,10 @@ struct infinipath_counters { /* K_PktFlags bits */ #define INFINIPATH_KPF_INTR 0x1 +#define INFINIPATH_KPF_SUBPORT_MASK 0x3 +#define INFINIPATH_KPF_SUBPORT_SHIFT 1 + +#define INFINIPATH_MAX_SUBPORT 4 /* SendPIO per-buffer control */ #define INFINIPATH_SP_TEST 0x40 @@ -610,7 +642,7 @@ struct ipath_header { /* * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset - * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, - * Port 3, TID 11, offset 14. + * Port 4, TID 11, offset 13. */ __le32 ver_port_tid_offset; __le16 chksum; diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 049221bc590e..87462e0cb4d2 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -46,7 +46,7 @@ */ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) { - struct ipath_cq_wc *wc = cq->queue; + struct ipath_cq_wc *wc; unsigned long flags; u32 head; u32 next; @@ -57,6 +57,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) * Note that the head pointer might be writable by user processes. * Take care to verify it is a sane value. */ + wc = cq->queue; head = wc->head; if (head >= (unsigned) cq->ibcq.cqe) { head = cq->ibcq.cqe; @@ -109,21 +110,27 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct ipath_cq *cq = to_icq(ibcq); - struct ipath_cq_wc *wc = cq->queue; + struct ipath_cq_wc *wc; unsigned long flags; int npolled; + u32 tail; spin_lock_irqsave(&cq->lock, flags); + wc = cq->queue; + tail = wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { - if (wc->tail == wc->head) + if (tail == wc->head) break; - *entry = wc->queue[wc->tail]; - if (wc->tail >= cq->ibcq.cqe) - wc->tail = 0; + *entry = wc->queue[tail]; + if (tail >= cq->ibcq.cqe) + tail = 0; else - wc->tail++; + tail++; } + wc->tail = tail; spin_unlock_irqrestore(&cq->lock, flags); @@ -177,11 +184,6 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, goto done; } - if (dev->n_cqs_allocated == ib_ipath_max_cqs) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - /* Allocate the completion queue structure. */ cq = kmalloc(sizeof(*cq), GFP_KERNEL); if (!cq) { @@ -237,6 +239,16 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, } else cq->ip = NULL; + spin_lock(&dev->n_cqs_lock); + if (dev->n_cqs_allocated == ib_ipath_max_cqs) { + spin_unlock(&dev->n_cqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + + dev->n_cqs_allocated++; + spin_unlock(&dev->n_cqs_lock); + /* * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. * The number of entries should be >= the number requested or return @@ -253,7 +265,6 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, ret = &cq->ibcq; - dev->n_cqs_allocated++; goto done; bail_wc: @@ -280,7 +291,9 @@ int ipath_destroy_cq(struct ib_cq *ibcq) struct ipath_cq *cq = to_icq(ibcq); tasklet_kill(&cq->comptask); + spin_lock(&dev->n_cqs_lock); dev->n_cqs_allocated--; + spin_unlock(&dev->n_cqs_lock); if (cq->ip) kref_put(&cq->ip->ref, ipath_release_mmap_info); else @@ -316,10 +329,16 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) return 0; } +/** + * ipath_resize_cq - change the size of the CQ + * @ibcq: the completion queue + * + * Returns 0 for success. + */ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { struct ipath_cq *cq = to_icq(ibcq); - struct ipath_cq_wc *old_wc = cq->queue; + struct ipath_cq_wc *old_wc; struct ipath_cq_wc *wc; u32 head, tail, n; int ret; @@ -355,6 +374,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) * Make sure head and tail are sane since they * might be user writable. */ + old_wc = cq->queue; head = old_wc->head; if (head > (u32) cq->ibcq.cqe) head = (u32) cq->ibcq.cqe; diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 29958b6e0214..28c087b824c2 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -67,19 +67,54 @@ static struct file_operations diag_file_ops = { .release = ipath_diag_release }; +static ssize_t ipath_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off); + +static struct file_operations diagpkt_file_ops = { + .owner = THIS_MODULE, + .write = ipath_diagpkt_write, +}; + +static atomic_t diagpkt_count = ATOMIC_INIT(0); +static struct cdev *diagpkt_cdev; +static struct class_device *diagpkt_class_dev; + int ipath_diag_add(struct ipath_devdata *dd) { char name[16]; + int ret = 0; + + if (atomic_inc_return(&diagpkt_count) == 1) { + ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR, + "ipath_diagpkt", &diagpkt_file_ops, + &diagpkt_cdev, &diagpkt_class_dev); + + if (ret) { + ipath_dev_err(dd, "Couldn't create ipath_diagpkt " + "device: %d", ret); + goto done; + } + } snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit); - return ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name, - &diag_file_ops, &dd->diag_cdev, - &dd->diag_class_dev); + ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name, + &diag_file_ops, &dd->diag_cdev, + &dd->diag_class_dev); + if (ret) + ipath_dev_err(dd, "Couldn't create %s device: %d", + name, ret); + +done: + return ret; } void ipath_diag_remove(struct ipath_devdata *dd) { + if (atomic_dec_and_test(&diagpkt_count)) + ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev); + ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_class_dev); } @@ -275,30 +310,6 @@ bail: return ret; } -static ssize_t ipath_diagpkt_write(struct file *fp, - const char __user *data, - size_t count, loff_t *off); - -static struct file_operations diagpkt_file_ops = { - .owner = THIS_MODULE, - .write = ipath_diagpkt_write, -}; - -static struct cdev *diagpkt_cdev; -static struct class_device *diagpkt_class_dev; - -int __init ipath_diagpkt_add(void) -{ - return ipath_cdev_init(IPATH_DIAGPKT_MINOR, - "ipath_diagpkt", &diagpkt_file_ops, - &diagpkt_cdev, &diagpkt_class_dev); -} - -void __exit ipath_diagpkt_remove(void) -{ - ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev); -} - /** * ipath_diagpkt_write - write an IB packet * @fp: the diag data device file pointer diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 2108466c7e33..1aeddb48e355 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -95,16 +95,6 @@ const char *ipath_ibcstatus_str[] = { "RecovIdle", }; -/* - * These variables are initialized in the chip-specific files - * but are defined here. - */ -u16 ipath_gpio_sda_num, ipath_gpio_scl_num; -u64 ipath_gpio_sda, ipath_gpio_scl; -u64 infinipath_i_bitsextant; -ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; -u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; - static void __devexit ipath_remove_one(struct pci_dev *); static int __devinit ipath_init_one(struct pci_dev *, const struct pci_device_id *); @@ -314,7 +304,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, } addr = pci_resource_start(pdev, 0); len = pci_resource_len(pdev, 0); - ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %x, vend %x/%x " + ipath_cdbg(VERBOSE, "regbase (0) %llx len %d pdev->irq %d, vend %x/%x " "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, ent->device, ent->driver_data); @@ -400,12 +390,16 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, /* setup the chip-specific functions, as early as possible. */ switch (ent->device) { +#ifdef CONFIG_HT_IRQ case PCI_DEVICE_ID_INFINIPATH_HT: ipath_init_iba6110_funcs(dd); break; +#endif +#ifdef CONFIG_PCI_MSI case PCI_DEVICE_ID_INFINIPATH_PE800: ipath_init_iba6120_funcs(dd); break; +#endif default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " "failing\n", ent->device); @@ -477,15 +471,15 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, * check 0 irq after we return from chip-specific bus setup, since * that can affect this due to setup */ - if (!pdev->irq) + if (!dd->ipath_irq) ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " "work\n"); else { - ret = request_irq(pdev->irq, ipath_intr, IRQF_SHARED, + ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, IPATH_DRV_NAME, dd); if (ret) { ipath_dev_err(dd, "Couldn't setup irq handler, " - "irq=%u: %d\n", pdev->irq, ret); + "irq=%d: %d\n", dd->ipath_irq, ret); goto bail_iounmap; } } @@ -527,28 +521,145 @@ bail: return ret; } +static void __devexit cleanup_device(struct ipath_devdata *dd) +{ + int port; + + ipath_shutdown_device(dd); + + if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { + /* can't do anything more with chip; needs re-init */ + *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; + if (dd->ipath_kregbase) { + /* + * if we haven't already cleaned up before these are + * to ensure any register reads/writes "fail" until + * re-init + */ + dd->ipath_kregbase = NULL; + dd->ipath_uregbase = 0; + dd->ipath_sregbase = 0; + dd->ipath_cregbase = 0; + dd->ipath_kregsize = 0; + } + ipath_disable_wc(dd); + } + + if (dd->ipath_pioavailregs_dma) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + (void *) dd->ipath_pioavailregs_dma, + dd->ipath_pioavailregs_phys); + dd->ipath_pioavailregs_dma = NULL; + } + if (dd->ipath_dummy_hdrq) { + dma_free_coherent(&dd->pcidev->dev, + dd->ipath_pd[0]->port_rcvhdrq_size, + dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); + dd->ipath_dummy_hdrq = NULL; + } + + if (dd->ipath_pageshadow) { + struct page **tmpp = dd->ipath_pageshadow; + dma_addr_t *tmpd = dd->ipath_physshadow; + int i, cnt = 0; + + ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " + "locked\n"); + for (port = 0; port < dd->ipath_cfgports; port++) { + int port_tidbase = port * dd->ipath_rcvtidcnt; + int maxtid = port_tidbase + dd->ipath_rcvtidcnt; + for (i = port_tidbase; i < maxtid; i++) { + if (!tmpp[i]) + continue; + pci_unmap_page(dd->pcidev, tmpd[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + ipath_release_user_pages(&tmpp[i], 1); + tmpp[i] = NULL; + cnt++; + } + } + if (cnt) { + ipath_stats.sps_pageunlocks += cnt; + ipath_cdbg(VERBOSE, "There were still %u expTID " + "entries locked\n", cnt); + } + if (ipath_stats.sps_pagelocks || + ipath_stats.sps_pageunlocks) + ipath_cdbg(VERBOSE, "%llu pages locked, %llu " + "unlocked via ipath_m{un}lock\n", + (unsigned long long) + ipath_stats.sps_pagelocks, + (unsigned long long) + ipath_stats.sps_pageunlocks); + + ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", + dd->ipath_pageshadow); + vfree(dd->ipath_pageshadow); + dd->ipath_pageshadow = NULL; + } + + /* + * free any resources still in use (usually just kernel ports) + * at unload; we do for portcnt, not cfgports, because cfgports + * could have changed while we were loaded. + */ + for (port = 0; port < dd->ipath_portcnt; port++) { + struct ipath_portdata *pd = dd->ipath_pd[port]; + dd->ipath_pd[port] = NULL; + ipath_free_pddata(dd, pd); + } + kfree(dd->ipath_pd); + /* + * debuggability, in case some cleanup path tries to use it + * after this + */ + dd->ipath_pd = NULL; +} + static void __devexit ipath_remove_one(struct pci_dev *pdev) { - struct ipath_devdata *dd; + struct ipath_devdata *dd = pci_get_drvdata(pdev); - ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev); - if (!pdev) - return; + ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); + + if (dd->verbs_dev) + ipath_unregister_ib_device(dd->verbs_dev); - dd = pci_get_drvdata(pdev); - ipath_unregister_ib_device(dd->verbs_dev); ipath_diag_remove(dd); ipath_user_remove(dd); ipathfs_remove_device(dd); ipath_device_remove_group(&pdev->dev, dd); + ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " "unit %u\n", dd, (u32) dd->ipath_unit); - if (dd->ipath_kregbase) { - ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", - dd->ipath_kregbase); - iounmap((volatile void __iomem *) dd->ipath_kregbase); - dd->ipath_kregbase = NULL; - } + + cleanup_device(dd); + + /* + * turn off rcv, send, and interrupts for all ports, all drivers + * should also hard reset the chip here? + * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs + * for all versions of the driver, if they were allocated + */ + if (dd->ipath_irq) { + ipath_cdbg(VERBOSE, "unit %u free irq %d\n", + dd->ipath_unit, dd->ipath_irq); + dd->ipath_f_free_irq(dd); + } else + ipath_dbg("irq is 0, not doing free_irq " + "for unit %u\n", dd->ipath_unit); + /* + * we check for NULL here, because it's outside + * the kregbase check, and we need to call it + * after the free_irq. Thus it's possible that + * the function pointers were never initialized. + */ + if (dd->ipath_f_cleanup) + /* clean up chip-specific stuff */ + dd->ipath_f_cleanup(dd); + + ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); + iounmap((volatile void __iomem *) dd->ipath_kregbase); pci_release_regions(pdev); ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); pci_disable_device(pdev); @@ -760,8 +871,8 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len) static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum, int err) { - return dd->ipath_port0_skbs ? - (void *)dd->ipath_port0_skbs[bufnum]->data : NULL; + return dd->ipath_port0_skbinfo ? + (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; } /** @@ -783,31 +894,34 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, */ /* - * We need 4 extra bytes for unaligned transfer copying + * We need 2 extra bytes for ipath_ether data sent in the + * key header. In order to keep everything dword aligned, + * we'll reserve 4 bytes. */ + len = dd->ipath_ibmaxlen + 4; + if (dd->ipath_flags & IPATH_4BYTE_TID) { - /* we need a 4KB multiple alignment, and there is no way + /* We need a 2KB multiple alignment, and there is no way * to do it except to allocate extra and then skb_reserve * enough to bring it up to the right alignment. */ - len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1; + len += 2047; } - else - len = dd->ipath_ibmaxlen + 4; + skb = __dev_alloc_skb(len, gfp_mask); if (!skb) { ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", len); goto bail; } + + skb_reserve(skb, 4); + if (dd->ipath_flags & IPATH_4BYTE_TID) { - u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4); + u32 una = (unsigned long)skb->data & 2047; if (una) - skb_reserve(skb, 4 + (1 << 11) - una); - else - skb_reserve(skb, 4); - } else - skb_reserve(skb, 4); + skb_reserve(skb, 2048 - una); + } bail: return skb; @@ -1326,6 +1440,9 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, "for port %u rcvhdrqtailaddr failed\n", pd->port_port); ret = -ENOMEM; + dma_free_coherent(&dd->pcidev->dev, amt, + pd->port_rcvhdrq, pd->port_rcvhdrq_phys); + pd->port_rcvhdrq = NULL; goto bail; } pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; @@ -1347,12 +1464,13 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " "hdrtailaddr@%p %llx physical\n", pd->port_port, pd->port_rcvhdrq, - pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr, - (unsigned long long)pd->port_rcvhdrqtailaddr_phys); + (unsigned long long) pd->port_rcvhdrq_phys, + pd->port_rcvhdrtail_kvaddr, (unsigned long long) + pd->port_rcvhdrqtailaddr_phys); /* clear for security and sanity on each use */ memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); - memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); + memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); /* * tell chip each time we init it, even if we are re-using previous @@ -1805,7 +1923,7 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) pd->port_rcvhdrq = NULL; if (pd->port_rcvhdrtail_kvaddr) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *)pd->port_rcvhdrtail_kvaddr, + pd->port_rcvhdrtail_kvaddr, pd->port_rcvhdrqtailaddr_phys); pd->port_rcvhdrtail_kvaddr = NULL; } @@ -1824,24 +1942,32 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) dma_free_coherent(&dd->pcidev->dev, size, base, pd->port_rcvegrbuf_phys[e]); } - vfree(pd->port_rcvegrbuf); + kfree(pd->port_rcvegrbuf); pd->port_rcvegrbuf = NULL; - vfree(pd->port_rcvegrbuf_phys); + kfree(pd->port_rcvegrbuf_phys); pd->port_rcvegrbuf_phys = NULL; pd->port_rcvegrbuf_chunks = 0; - } else if (pd->port_port == 0 && dd->ipath_port0_skbs) { + } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { unsigned e; - struct sk_buff **skbs = dd->ipath_port0_skbs; + struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; - dd->ipath_port0_skbs = NULL; - ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs " - "@ %p\n", pd->port_port, skbs); + dd->ipath_port0_skbinfo = NULL; + ipath_cdbg(VERBOSE, "free closed port %d " + "ipath_port0_skbinfo @ %p\n", pd->port_port, + skbinfo); for (e = 0; e < dd->ipath_rcvegrcnt; e++) - if (skbs[e]) - dev_kfree_skb(skbs[e]); - vfree(skbs); + if (skbinfo[e].skb) { + pci_unmap_single(dd->pcidev, skbinfo[e].phys, + dd->ipath_ibmaxlen, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(skbinfo[e].skb); + } + vfree(skbinfo); } kfree(pd->port_tid_pg_list); + vfree(pd->subport_uregbase); + vfree(pd->subport_rcvegrbuf); + vfree(pd->subport_rcvhdr_base); kfree(pd); } @@ -1882,18 +2008,8 @@ static int __init infinipath_init(void) goto bail_group; } - ret = ipath_diagpkt_add(); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " - "diag data device: error %d\n", -ret); - goto bail_ipathfs; - } - goto bail; -bail_ipathfs: - ipath_exit_ipathfs(); - bail_group: ipath_driver_remove_group(&ipath_driver.driver); @@ -1907,150 +2023,12 @@ bail: return ret; } -static void cleanup_device(struct ipath_devdata *dd) -{ - int port; - - ipath_shutdown_device(dd); - - if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { - /* can't do anything more with chip; needs re-init */ - *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; - if (dd->ipath_kregbase) { - /* - * if we haven't already cleaned up before these are - * to ensure any register reads/writes "fail" until - * re-init - */ - dd->ipath_kregbase = NULL; - dd->ipath_uregbase = 0; - dd->ipath_sregbase = 0; - dd->ipath_cregbase = 0; - dd->ipath_kregsize = 0; - } - ipath_disable_wc(dd); - } - - if (dd->ipath_pioavailregs_dma) { - dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *) dd->ipath_pioavailregs_dma, - dd->ipath_pioavailregs_phys); - dd->ipath_pioavailregs_dma = NULL; - } - if (dd->ipath_dummy_hdrq) { - dma_free_coherent(&dd->pcidev->dev, - dd->ipath_pd[0]->port_rcvhdrq_size, - dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); - dd->ipath_dummy_hdrq = NULL; - } - - if (dd->ipath_pageshadow) { - struct page **tmpp = dd->ipath_pageshadow; - int i, cnt = 0; - - ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " - "locked\n"); - for (port = 0; port < dd->ipath_cfgports; port++) { - int port_tidbase = port * dd->ipath_rcvtidcnt; - int maxtid = port_tidbase + dd->ipath_rcvtidcnt; - for (i = port_tidbase; i < maxtid; i++) { - if (!tmpp[i]) - continue; - ipath_release_user_pages(&tmpp[i], 1); - tmpp[i] = NULL; - cnt++; - } - } - if (cnt) { - ipath_stats.sps_pageunlocks += cnt; - ipath_cdbg(VERBOSE, "There were still %u expTID " - "entries locked\n", cnt); - } - if (ipath_stats.sps_pagelocks || - ipath_stats.sps_pageunlocks) - ipath_cdbg(VERBOSE, "%llu pages locked, %llu " - "unlocked via ipath_m{un}lock\n", - (unsigned long long) - ipath_stats.sps_pagelocks, - (unsigned long long) - ipath_stats.sps_pageunlocks); - - ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", - dd->ipath_pageshadow); - vfree(dd->ipath_pageshadow); - dd->ipath_pageshadow = NULL; - } - - /* - * free any resources still in use (usually just kernel ports) - * at unload; we do for portcnt, not cfgports, because cfgports - * could have changed while we were loaded. - */ - for (port = 0; port < dd->ipath_portcnt; port++) { - struct ipath_portdata *pd = dd->ipath_pd[port]; - dd->ipath_pd[port] = NULL; - ipath_free_pddata(dd, pd); - } - kfree(dd->ipath_pd); - /* - * debuggability, in case some cleanup path tries to use it - * after this - */ - dd->ipath_pd = NULL; -} - static void __exit infinipath_cleanup(void) { - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - ipath_diagpkt_remove(); - ipath_exit_ipathfs(); ipath_driver_remove_group(&ipath_driver.driver); - spin_lock_irqsave(&ipath_devs_lock, flags); - - /* - * turn off rcv, send, and interrupts for all ports, all drivers - * should also hard reset the chip here? - * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs - * for all versions of the driver, if they were allocated - */ - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - if (dd->ipath_kregbase) - cleanup_device(dd); - - if (dd->pcidev) { - if (dd->pcidev->irq) { - ipath_cdbg(VERBOSE, - "unit %u free_irq of irq %x\n", - dd->ipath_unit, dd->pcidev->irq); - free_irq(dd->pcidev->irq, dd); - } else - ipath_dbg("irq is 0, not doing free_irq " - "for unit %u\n", dd->ipath_unit); - - /* - * we check for NULL here, because it's outside - * the kregbase check, and we need to call it - * after the free_irq. Thus it's possible that - * the function pointers were never initialized. - */ - if (dd->ipath_f_cleanup) - /* clean up chip-specific stuff */ - dd->ipath_f_cleanup(dd); - - dd->pcidev = NULL; - } - spin_lock_irqsave(&ipath_devs_lock, flags); - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); pci_unregister_driver(&ipath_driver); diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 3313356ab93a..a4019a6b7560 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -100,9 +100,9 @@ static int i2c_gpio_set(struct ipath_devdata *dd, gpioval = &dd->ipath_gpio_out; read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); if (line == i2c_line_scl) - mask = ipath_gpio_scl; + mask = dd->ipath_gpio_scl; else - mask = ipath_gpio_sda; + mask = dd->ipath_gpio_sda; if (new_line_state == i2c_line_high) /* tri-state the output rather than force high */ @@ -119,12 +119,12 @@ static int i2c_gpio_set(struct ipath_devdata *dd, write_val = 0x0UL; if (line == i2c_line_scl) { - write_val <<= ipath_gpio_scl_num; - *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num); + write_val <<= dd->ipath_gpio_scl_num; + *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num); *gpioval |= write_val; } else { - write_val <<= ipath_gpio_sda_num; - *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num); + write_val <<= dd->ipath_gpio_sda_num; + *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num); *gpioval |= write_val; } ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); @@ -157,9 +157,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd, read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); /* config line to be an input */ if (line == i2c_line_scl) - mask = ipath_gpio_scl; + mask = dd->ipath_gpio_scl; else - mask = ipath_gpio_sda; + mask = dd->ipath_gpio_sda; write_val = read_val & ~mask; ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); @@ -187,6 +187,7 @@ bail: static void i2c_wait_for_writes(struct ipath_devdata *dd) { (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + rmb(); } static void scl_out(struct ipath_devdata *dd, u8 bit) diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 29930e22318e..340f27e3ebff 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -41,6 +41,12 @@ #include "ipath_kernel.h" #include "ipath_common.h" +/* + * mmap64 doesn't allow all 64 bits for 32-bit applications + * so only use the low 43 bits. + */ +#define MMAP64_MASK 0x7FFFFFFFFFFUL + static int ipath_open(struct inode *, struct file *); static int ipath_close(struct inode *, struct file *); static ssize_t ipath_write(struct file *, const char __user *, size_t, @@ -57,18 +63,35 @@ static struct file_operations ipath_file_ops = { .mmap = ipath_mmap }; -static int ipath_get_base_info(struct ipath_portdata *pd, +static int ipath_get_base_info(struct file *fp, void __user *ubase, size_t ubase_size) { + struct ipath_portdata *pd = port_fp(fp); int ret = 0; struct ipath_base_info *kinfo = NULL; struct ipath_devdata *dd = pd->port_dd; + unsigned subport_cnt; + int shared, master; + size_t sz; + + subport_cnt = pd->port_subport_cnt; + if (!subport_cnt) { + shared = 0; + master = 0; + subport_cnt = 1; + } else { + shared = 1; + master = !subport_fp(fp); + } - if (ubase_size < sizeof(*kinfo)) { + sz = sizeof(*kinfo); + /* If port sharing is not requested, allow the old size structure */ + if (!shared) + sz -= 3 * sizeof(u64); + if (ubase_size < sz) { ipath_cdbg(PROC, - "Base size %lu, need %lu (version mismatch?)\n", - (unsigned long) ubase_size, - (unsigned long) sizeof(*kinfo)); + "Base size %zu, need %zu (version mismatch?)\n", + ubase_size, sz); ret = -EINVAL; goto bail; } @@ -95,7 +118,9 @@ static int ipath_get_base_info(struct ipath_portdata *pd, kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / pd->port_rcvegrbuf_chunks; - kinfo->spi_tidcnt = dd->ipath_rcvtidcnt; + kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; + if (master) + kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; /* * for this use, may be ipath_cfgports summed over all chips that * are are configured and present @@ -118,31 +143,75 @@ static int ipath_get_base_info(struct ipath_portdata *pd, * page_address() macro worked, but in 2.6.11, even that returns the * full 64 bit address (upper bits all 1's). So far, using the * physical addresses (or chip offsets, for chip mapping) works, but - * no doubt some future kernel release will chang that, and we'll be - * on to yet another method of dealing with this + * no doubt some future kernel release will change that, and we'll be + * on to yet another method of dealing with this. */ kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; - kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys; + kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + (void *) dd->ipath_statusp - (void *) dd->ipath_pioavailregs_dma; - kinfo->spi_piobufbase = (u64) pd->port_piobufs; - kinfo->__spi_uregbase = - dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + if (!shared) { + kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_piobufbase = (u64) pd->port_piobufs; + kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + } else if (master) { + kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + + (dd->ipath_pbufsport % subport_cnt); + /* Master's PIO buffers are after all the slave's */ + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * + (dd->ipath_pbufsport - kinfo->spi_piocnt); + kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + } else { + unsigned slave = subport_fp(fp) - 1; + + kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * kinfo->spi_piocnt * slave; + kinfo->__spi_uregbase = ((u64) pd->subport_uregbase + + PAGE_SIZE * slave) & MMAP64_MASK; - kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1); - kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * slave) & MMAP64_MASK; + kinfo->spi_rcvhdr_tailaddr = + (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK; + kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf + + dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) & + MMAP64_MASK; + } + + kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / + dd->ipath_palign; kinfo->spi_pioalign = dd->ipath_palign; kinfo->spi_qpair = IPATH_KD_QP; kinfo->spi_piosize = dd->ipath_ibmaxlen; kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ kinfo->spi_port = pd->port_port; + kinfo->spi_subport = subport_fp(fp); kinfo->spi_sw_version = IPATH_KERN_SWVERSION; kinfo->spi_hw_version = dd->ipath_revision; + if (master) { + kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; + kinfo->spi_subport_uregbase = + (u64) pd->subport_uregbase & MMAP64_MASK; + kinfo->spi_subport_rcvegrbuf = + (u64) pd->subport_rcvegrbuf & MMAP64_MASK; + kinfo->spi_subport_rcvhdr_base = + (u64) pd->subport_rcvhdr_base & MMAP64_MASK; + ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", + kinfo->spi_port, kinfo->spi_runtime_flags, + (unsigned long long) kinfo->spi_subport_uregbase, + (unsigned long long) kinfo->spi_subport_rcvegrbuf, + (unsigned long long) kinfo->spi_subport_rcvhdr_base); + } + if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) ret = -EFAULT; @@ -154,6 +223,7 @@ bail: /** * ipath_tid_update - update a port TID * @pd: the port + * @fp: the ipath device file * @ti: the TID information * * The new implementation as of Oct 2004 is that the driver assigns @@ -176,11 +246,11 @@ bail: * virtually contiguous pages, that should change to improve * performance. */ -static int ipath_tid_update(struct ipath_portdata *pd, +static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, const struct ipath_tid_info *ti) { int ret = 0, ntids; - u32 tid, porttid, cnt, i, tidcnt; + u32 tid, porttid, cnt, i, tidcnt, tidoff; u16 *tidlist; struct ipath_devdata *dd = pd->port_dd; u64 physaddr; @@ -188,6 +258,7 @@ static int ipath_tid_update(struct ipath_portdata *pd, u64 __iomem *tidbase; unsigned long tidmap[8]; struct page **pagep = NULL; + unsigned subport = subport_fp(fp); if (!dd->ipath_pageshadow) { ret = -ENOMEM; @@ -204,20 +275,34 @@ static int ipath_tid_update(struct ipath_portdata *pd, ret = -EFAULT; goto done; } - tidcnt = dd->ipath_rcvtidcnt; - if (cnt >= tidcnt) { + porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) { + tidcnt = dd->ipath_rcvtidcnt; + tid = pd->port_tidcursor; + tidoff = 0; + } else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + tidoff = dd->ipath_rcvtidcnt - tidcnt; + porttid += tidoff; + tid = tidcursor_fp(fp); + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + tidoff = tidcnt * (subport - 1); + porttid += tidoff; + tid = tidcursor_fp(fp); + } + if (cnt > tidcnt) { /* make sure it all fits in port_tid_pg_list */ dev_info(&dd->pcidev->dev, "Process tried to allocate %u " "TIDs, only trying max (%u)\n", cnt, tidcnt); cnt = tidcnt; } - pagep = (struct page **)pd->port_tid_pg_list; - tidlist = (u16 *) (&pagep[cnt]); + pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; + tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; memset(tidmap, 0, sizeof(tidmap)); - tid = pd->port_tidcursor; /* before decrement; chip actual # */ - porttid = pd->port_port * tidcnt; ntids = tidcnt; tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + dd->ipath_rcvtidbase + @@ -274,16 +359,19 @@ static int ipath_tid_update(struct ipath_portdata *pd, ret = -ENOMEM; break; } - tidlist[i] = tid; + tidlist[i] = tid + tidoff; ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " - "vaddr %lx\n", i, tid, vaddr); + "vaddr %lx\n", i, tid + tidoff, vaddr); /* we "know" system pages and TID pages are same size */ dd->ipath_pageshadow[porttid + tid] = pagep[i]; + dd->ipath_physshadow[porttid + tid] = ipath_map_page( + dd->pcidev, pagep[i], 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); /* * don't need atomic or it's overhead */ __set_bit(tid, tidmap); - physaddr = page_to_phys(pagep[i]); + physaddr = dd->ipath_physshadow[porttid + tid]; ipath_stats.sps_pagelocks++; ipath_cdbg(VERBOSE, "TID %u, vaddr %lx, physaddr %llx pgp %p\n", @@ -317,6 +405,9 @@ static int ipath_tid_update(struct ipath_portdata *pd, tid); dd->ipath_f_put_tid(dd, &tidbase[tid], 1, dd->ipath_tidinvalid); + pci_unmap_page(dd->pcidev, + dd->ipath_physshadow[porttid + tid], + PAGE_SIZE, PCI_DMA_FROMDEVICE); dd->ipath_pageshadow[porttid + tid] = NULL; ipath_stats.sps_pageunlocks++; } @@ -341,7 +432,10 @@ static int ipath_tid_update(struct ipath_portdata *pd, } if (tid == tidcnt) tid = 0; - pd->port_tidcursor = tid; + if (!pd->port_subport_cnt) + pd->port_tidcursor = tid; + else + tidcursor_fp(fp) = tid; } done: @@ -354,6 +448,7 @@ done: /** * ipath_tid_free - free a port TID * @pd: the port + * @subport: the subport * @ti: the TID info * * right now we are unlocking one page at a time, but since @@ -367,7 +462,7 @@ done: * they pass in to us. */ -static int ipath_tid_free(struct ipath_portdata *pd, +static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, const struct ipath_tid_info *ti) { int ret = 0; @@ -388,11 +483,20 @@ static int ipath_tid_free(struct ipath_portdata *pd, } porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) + tidcnt = dd->ipath_rcvtidcnt; + else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + porttid += dd->ipath_rcvtidcnt - tidcnt; + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + porttid += tidcnt * (subport - 1); + } tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + dd->ipath_rcvtidbase + porttid * sizeof(*tidbase)); - tidcnt = dd->ipath_rcvtidcnt; limit = sizeof(tidmap) * BITS_PER_BYTE; if (limit > tidcnt) /* just in case size changes in future */ @@ -417,6 +521,9 @@ static int ipath_tid_free(struct ipath_portdata *pd, pd->port_pid, tid); dd->ipath_f_put_tid(dd, &tidbase[tid], 1, dd->ipath_tidinvalid); + pci_unmap_page(dd->pcidev, + dd->ipath_physshadow[porttid + tid], + PAGE_SIZE, PCI_DMA_FROMDEVICE); ipath_release_user_pages( &dd->ipath_pageshadow[porttid + tid], 1); dd->ipath_pageshadow[porttid + tid] = NULL; @@ -581,20 +688,24 @@ bail: /** * ipath_manage_rcvq - manage a port's receive queue * @pd: the port + * @subport: the subport * @start_stop: action to carry out * * start_stop == 0 disables receive on the port, for use in queue * overflow conditions. start_stop==1 re-enables, to be used to * re-init the software copy of the head register */ -static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) +static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, + int start_stop) { struct ipath_devdata *dd = pd->port_dd; u64 tval; - ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n", + ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", start_stop ? "en" : "dis", dd->ipath_unit, - pd->port_port); + pd->port_port, subport); + if (subport) + goto bail; /* atomically clear receive enable port. */ if (start_stop) { /* @@ -609,7 +720,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) * updated and correct itself, even in the face of software * bugs. */ - *pd->port_rcvhdrtail_kvaddr = 0; + *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0; set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, &dd->ipath_rcvctrl); } else @@ -630,6 +741,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); } /* always; new head should be equal to new tail; see above */ +bail: return 0; } @@ -687,6 +799,36 @@ static void ipath_clean_part_key(struct ipath_portdata *pd, } } +/* + * Initialize the port data with the receive buffer sizes + * so this can be done while the master port is locked. + * Otherwise, there is a race with a slave opening the port + * and seeing these fields uninitialized. + */ +static void init_user_egr_sizes(struct ipath_portdata *pd) +{ + struct ipath_devdata *dd = pd->port_dd; + unsigned egrperchunk, egrcnt, size; + + /* + * to avoid wasting a lot of memory, we allocate 32KB chunks of + * physically contiguous memory, advance through it until used up + * and then allocate more. Of course, we need memory to store those + * extra pointers, now. Started out with 256KB, but under heavy + * memory pressure (creating large files and then copying them over + * NFS while doing lots of MPI jobs), we hit some allocation + * failures, even though we can sleep... (2.6.10) Still get + * failures at 64K. 32K is the lowest we can go without wasting + * additional memory. + */ + size = 0x8000; + egrperchunk = size / dd->ipath_rcvegrbufsize; + egrcnt = dd->ipath_rcvegrcnt; + pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; + pd->port_rcvegrbufs_perchunk = egrperchunk; + pd->port_rcvegrbuf_size = size; +} + /** * ipath_create_user_egr - allocate eager TID buffers * @pd: the port to allocate TID buffers for @@ -702,7 +844,7 @@ static void ipath_clean_part_key(struct ipath_portdata *pd, static int ipath_create_user_egr(struct ipath_portdata *pd) { struct ipath_devdata *dd = pd->port_dd; - unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff; + unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; int ret; gfp_t gfp_flags; @@ -722,31 +864,18 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); - /* - * to avoid wasting a lot of memory, we allocate 32KB chunks of - * physically contiguous memory, advance through it until used up - * and then allocate more. Of course, we need memory to store those - * extra pointers, now. Started out with 256KB, but under heavy - * memory pressure (creating large files and then copying them over - * NFS while doing lots of MPI jobs), we hit some allocation - * failures, even though we can sleep... (2.6.10) Still get - * failures at 64K. 32K is the lowest we can go without wasting - * additional memory. - */ - size = 0x8000; - alloced = ALIGN(egrsize * egrcnt, size); - egrperchunk = size / egrsize; - chunk = (egrcnt + egrperchunk - 1) / egrperchunk; - pd->port_rcvegrbuf_chunks = chunk; - pd->port_rcvegrbufs_perchunk = egrperchunk; - pd->port_rcvegrbuf_size = size; - pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0])); + chunk = pd->port_rcvegrbuf_chunks; + egrperchunk = pd->port_rcvegrbufs_perchunk; + size = pd->port_rcvegrbuf_size; + pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), + GFP_KERNEL); if (!pd->port_rcvegrbuf) { ret = -ENOMEM; goto bail; } pd->port_rcvegrbuf_phys = - vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0])); + kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), + GFP_KERNEL); if (!pd->port_rcvegrbuf_phys) { ret = -ENOMEM; goto bail_rcvegrbuf; @@ -791,105 +920,23 @@ bail_rcvegrbuf_phys: pd->port_rcvegrbuf_phys[e]); } - vfree(pd->port_rcvegrbuf_phys); + kfree(pd->port_rcvegrbuf_phys); pd->port_rcvegrbuf_phys = NULL; bail_rcvegrbuf: - vfree(pd->port_rcvegrbuf); + kfree(pd->port_rcvegrbuf); pd->port_rcvegrbuf = NULL; bail: return ret; } -static int ipath_do_user_init(struct ipath_portdata *pd, - const struct ipath_user_info *uinfo) -{ - int ret = 0; - struct ipath_devdata *dd = pd->port_dd; - u32 head32; - - /* for now, if major version is different, bail */ - if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { - dev_info(&dd->pcidev->dev, - "User major version %d not same as driver " - "major %d\n", uinfo->spu_userversion >> 16, - IPATH_USER_SWMAJOR); - ret = -ENODEV; - goto done; - } - - if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) - ipath_dbg("User minor version %d not same as driver " - "minor %d\n", uinfo->spu_userversion & 0xffff, - IPATH_USER_SWMINOR); - - if (uinfo->spu_rcvhdrsize) { - ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); - if (ret) - goto done; - } - - /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ - - /* for right now, kernel piobufs are at end, so port 1 is at 0 */ - pd->port_piobufs = dd->ipath_piobufbase + - dd->ipath_pbufsport * (pd->port_port - - 1) * dd->ipath_palign; - ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", - pd->port_port, pd->port_piobufs); - - /* - * Now allocate the rcvhdr Q and eager TIDs; skip the TID - * array for time being. If pd->port_port > chip-supported, - * we need to do extra stuff here to handle by handling overflow - * through port 0, someday - */ - ret = ipath_create_rcvhdrq(dd, pd); - if (!ret) - ret = ipath_create_user_egr(pd); - if (ret) - goto done; - - /* - * set the eager head register for this port to the current values - * of the tail pointers, since we don't know if they were - * updated on last use of the port. - */ - head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); - ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); - dd->ipath_lastegrheads[pd->port_port] = -1; - dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; - ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", - pd->port_port, head32); - pd->port_tidcursor = 0; /* start at beginning after open */ - /* - * now enable the port; the tail registers will be written to memory - * by the chip as soon as it sees the write to - * dd->ipath_kregs->kr_rcvctrl. The update only happens on - * transition from 0 to 1, so clear it first, then set it as part of - * enabling the port. This will (very briefly) affect any other - * open ports, but it shouldn't be long enough to be an issue. - * We explictly set the in-memory copy to 0 beforehand, so we don't - * have to wait to be sure the DMA update has happened. - */ - *pd->port_rcvhdrtail_kvaddr = 0ULL; - set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, - &dd->ipath_rcvctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); -done: - return ret; -} - /* common code for the mappings on dma_alloc_coherent mem */ static int ipath_mmap_mem(struct vm_area_struct *vma, - struct ipath_portdata *pd, unsigned len, - int write_ok, dma_addr_t addr, char *what) + struct ipath_portdata *pd, unsigned len, int write_ok, + void *kvaddr, char *what) { struct ipath_devdata *dd = pd->port_dd; - unsigned pfn = (unsigned long)addr >> PAGE_SHIFT; + unsigned long pfn; int ret; if ((vma->vm_end - vma->vm_start) > len) { @@ -912,17 +959,17 @@ static int ipath_mmap_mem(struct vm_area_struct *vma, vma->vm_flags &= ~VM_MAYWRITE; } + pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); if (ret) - dev_info(&dd->pcidev->dev, - "%s port%u mmap of %lx, %x bytes r%c failed: %d\n", - what, pd->port_port, (unsigned long)addr, len, - write_ok?'w':'o', ret); + dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x " + "bytes r%c failed: %d\n", what, pd->port_port, + pfn, len, write_ok?'w':'o', ret); else - ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n", - what, pd->port_port, (unsigned long)addr, len, - write_ok?'w':'o'); + ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes " + "r%c\n", what, pd->port_port, pfn, len, + write_ok?'w':'o'); bail: return ret; } @@ -957,7 +1004,8 @@ static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, static int mmap_piobufs(struct vm_area_struct *vma, struct ipath_devdata *dd, - struct ipath_portdata *pd) + struct ipath_portdata *pd, + unsigned piobufs, unsigned piocnt) { unsigned long phys; int ret; @@ -968,16 +1016,15 @@ static int mmap_piobufs(struct vm_area_struct *vma, * process data, and catches users who might try to read the i/o * space due to a bug. */ - if ((vma->vm_end - vma->vm_start) > - (dd->ipath_pbufsport * dd->ipath_palign)) { + if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " "reqlen %lx > PAGE\n", vma->vm_end - vma->vm_start); - ret = -EFAULT; + ret = -EINVAL; goto bail; } - phys = dd->ipath_physaddr + pd->port_piobufs; + phys = dd->ipath_physaddr + piobufs; /* * Don't mark this as non-cached, or we don't get the @@ -1011,7 +1058,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, struct ipath_devdata *dd = pd->port_dd; unsigned long start, size; size_t total_size, i; - dma_addr_t *phys; + unsigned long pfn; int ret; size = pd->port_rcvegrbuf_size; @@ -1021,7 +1068,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, "reqlen %lx > actual %lx\n", vma->vm_end - vma->vm_start, (unsigned long) total_size); - ret = -EFAULT; + ret = -EINVAL; goto bail; } @@ -1035,11 +1082,11 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, vma->vm_flags &= ~VM_MAYWRITE; start = vma->vm_start; - phys = pd->port_rcvegrbuf_phys; for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { - ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT, - size, vma->vm_page_prot); + pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT; + ret = remap_pfn_range(vma, start, pfn, size, + vma->vm_page_prot); if (ret < 0) goto bail; } @@ -1049,6 +1096,122 @@ bail: return ret; } +/* + * ipath_file_vma_nopage - handle a VMA page fault. + */ +static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start; + struct page *page = NOPAGE_SIGBUS; + void *pageptr; + + /* + * Convert the vmalloc address into a struct page. + */ + pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT)); + page = vmalloc_to_page(pageptr); + if (!page) + goto out; + + /* Increment the reference count. */ + get_page(page); + if (type) + *type = VM_FAULT_MINOR; +out: + return page; +} + +static struct vm_operations_struct ipath_file_vm_ops = { + .nopage = ipath_file_vma_nopage, +}; + +static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, + struct ipath_portdata *pd, unsigned subport) +{ + unsigned long len; + struct ipath_devdata *dd; + void *addr; + size_t size; + int ret; + + /* If the port is not shared, all addresses should be physical */ + if (!pd->port_subport_cnt) { + ret = -EINVAL; + goto bail; + } + + dd = pd->port_dd; + size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; + + /* + * Master has all the slave uregbase, rcvhdrq, and + * rcvegrbufs mmapped. + */ + if (subport == 0) { + unsigned num_slaves = pd->port_subport_cnt - 1; + + if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) { + addr = pd->subport_uregbase; + size = PAGE_SIZE * num_slaves; + } else if (pgaddr == ((u64) pd->subport_rcvhdr_base & + MMAP64_MASK)) { + addr = pd->subport_rcvhdr_base; + size = pd->port_rcvhdrq_size * num_slaves; + } else if (pgaddr == ((u64) pd->subport_rcvegrbuf & + MMAP64_MASK)) { + addr = pd->subport_rcvegrbuf; + size *= num_slaves; + } else { + ret = -EINVAL; + goto bail; + } + } else if (pgaddr == (((u64) pd->subport_uregbase + + PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) { + addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1); + size = PAGE_SIZE; + } else if (pgaddr == (((u64) pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * (subport - 1)) & + MMAP64_MASK)) { + addr = pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * (subport - 1); + size = pd->port_rcvhdrq_size; + } else if (pgaddr == (((u64) pd->subport_rcvegrbuf + + size * (subport - 1)) & MMAP64_MASK)) { + addr = pd->subport_rcvegrbuf + size * (subport - 1); + /* rcvegrbufs are read-only on the slave */ + if (vma->vm_flags & VM_WRITE) { + dev_info(&dd->pcidev->dev, + "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* + * Don't allow permission to later change to writeable + * with mprotect. + */ + vma->vm_flags &= ~VM_MAYWRITE; + } else { + ret = -EINVAL; + goto bail; + } + len = vma->vm_end - vma->vm_start; + if (len > size) { + ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); + ret = -EINVAL; + goto bail; + } + + vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; + vma->vm_ops = &ipath_file_vm_ops; + vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + ret = 0; + +bail: + return ret; +} + /** * ipath_mmap - mmap various structures into user space * @fp: the file pointer @@ -1064,73 +1227,99 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) struct ipath_portdata *pd; struct ipath_devdata *dd; u64 pgaddr, ureg; + unsigned piobufs, piocnt; int ret; pd = port_fp(fp); + if (!pd) { + ret = -EINVAL; + goto bail; + } dd = pd->port_dd; /* * This is the ipath_do_user_init() code, mapping the shared buffers * into the user process. The address referred to by vm_pgoff is the - * virtual, not physical, address; we only do one mmap for each - * space mapped. + * file offset passed via mmap(). For shared ports, this is the + * kernel vmalloc() address of the pages to share with the master. + * For non-shared or master ports, this is a physical address. + * We only do one mmap for each space mapped. */ pgaddr = vma->vm_pgoff << PAGE_SHIFT; /* - * Must fit in 40 bits for our hardware; some checked elsewhere, - * but we'll be paranoid. Check for 0 is mostly in case one of the - * allocations failed, but user called mmap anyway. We want to catch - * that before it can match. + * Check for 0 in case one of the allocations failed, but user + * called mmap anyway. */ - if (!pgaddr || pgaddr >= (1ULL<<40)) { - ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n", - (unsigned long long)pgaddr, vma->vm_start, vma->vm_end); - return -EINVAL; + if (!pgaddr) { + ret = -EINVAL; + goto bail; } - /* just the offset of the port user registers, not physical addr */ - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; - - ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n", + ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", (unsigned long long) pgaddr, vma->vm_start, - vma->vm_end - vma->vm_start); + vma->vm_end - vma->vm_start, dd->ipath_unit, + pd->port_port, subport_fp(fp)); - if (vma->vm_start & (PAGE_SIZE-1)) { - ipath_dev_err(dd, - "vm_start not aligned: %lx, end=%lx phys %lx\n", - vma->vm_start, vma->vm_end, (unsigned long)pgaddr); - ret = -EINVAL; + /* + * Physical addresses must fit in 40 bits for our hardware. + * Check for kernel virtual addresses first, anything else must + * match a HW or memory address. + */ + if (pgaddr >= (1ULL<<40)) { + ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); + goto bail; } - else if (pgaddr == ureg) + + if (!pd->port_subport_cnt) { + /* port is not shared */ + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + piocnt = dd->ipath_pbufsport; + piobufs = pd->port_piobufs; + } else if (!subport_fp(fp)) { + /* caller is the master */ + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + + (dd->ipath_pbufsport % pd->port_subport_cnt); + piobufs = pd->port_piobufs + + dd->ipath_palign * (dd->ipath_pbufsport - piocnt); + } else { + unsigned slave = subport_fp(fp) - 1; + + /* caller is a slave */ + ureg = 0; + piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; + piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; + } + + if (pgaddr == ureg) ret = mmap_ureg(vma, dd, ureg); - else if (pgaddr == pd->port_piobufs) - ret = mmap_piobufs(vma, dd, pd); - else if (pgaddr == (u64) pd->port_rcvegr_phys) + else if (pgaddr == piobufs) + ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); + else if (pgaddr == dd->ipath_pioavailregs_phys) + /* in-memory copy of pioavail registers */ + ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, + (void *) dd->ipath_pioavailregs_dma, + "pioavail registers"); + else if (subport_fp(fp)) + /* Subports don't mmap the physical receive buffers */ + ret = -EINVAL; + else if (pgaddr == pd->port_rcvegr_phys) ret = mmap_rcvegrbufs(vma, pd); - else if (pgaddr == (u64) pd->port_rcvhdrq_phys) { + else if (pgaddr == (u64) pd->port_rcvhdrq_phys) /* * The rcvhdrq itself; readonly except on HT (so have * to allow writable mapping), multiple pages, contiguous * from an i/o perspective. */ - unsigned total_size = - ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize - * sizeof(u32), PAGE_SIZE); - ret = ipath_mmap_mem(vma, pd, total_size, 1, - pd->port_rcvhdrq_phys, + ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, + pd->port_rcvhdrq, "rcvhdrq"); - } - else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys) + else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) /* in-memory copy of rcvhdrq tail register */ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - pd->port_rcvhdrqtailaddr_phys, + pd->port_rcvhdrtail_kvaddr, "rcvhdrq tail"); - else if (pgaddr == dd->ipath_pioavailregs_phys) - /* in-memory copy of pioavail registers */ - ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - dd->ipath_pioavailregs_phys, - "pioavail registers"); else ret = -EINVAL; @@ -1138,9 +1327,10 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) if (ret < 0) dev_info(&dd->pcidev->dev, - "Failure %d on addr %lx, off %lx\n", - -ret, vma->vm_start, vma->vm_pgoff); - + "Failure %d on off %llx len %lx\n", + -ret, (unsigned long long)pgaddr, + vma->vm_end - vma->vm_start); +bail: return ret; } @@ -1154,6 +1344,8 @@ static unsigned int ipath_poll(struct file *fp, struct ipath_devdata *dd; pd = port_fp(fp); + if (!pd) + goto bail; dd = pd->port_dd; bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT; @@ -1176,7 +1368,7 @@ static unsigned int ipath_poll(struct file *fp, if (tail == head) { set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ + if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ (void)ipath_write_ureg(dd, ur_rcvhdrhead, dd->ipath_rhdrhead_intr_off | head, pd->port_port); @@ -1200,18 +1392,80 @@ static unsigned int ipath_poll(struct file *fp, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); +bail: return pollflag; } +static int init_subports(struct ipath_devdata *dd, + struct ipath_portdata *pd, + const struct ipath_user_info *uinfo) +{ + int ret = 0; + unsigned num_slaves; + size_t size; + + /* Old user binaries don't know about subports */ + if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) + goto bail; + /* + * If the user is requesting zero or one port, + * skip the subport allocation. + */ + if (uinfo->spu_subport_cnt <= 1) + goto bail; + if (uinfo->spu_subport_cnt > 4) { + ret = -EINVAL; + goto bail; + } + + num_slaves = uinfo->spu_subport_cnt - 1; + pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves); + if (!pd->subport_uregbase) { + ret = -ENOMEM; + goto bail; + } + /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ + size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * + sizeof(u32), PAGE_SIZE) * num_slaves; + pd->subport_rcvhdr_base = vmalloc(size); + if (!pd->subport_rcvhdr_base) { + ret = -ENOMEM; + goto bail_ureg; + } + + pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * + pd->port_rcvegrbuf_size * + num_slaves); + if (!pd->subport_rcvegrbuf) { + ret = -ENOMEM; + goto bail_rhdr; + } + + pd->port_subport_cnt = uinfo->spu_subport_cnt; + pd->port_subport_id = uinfo->spu_subport_id; + pd->active_slaves = 1; + goto bail; + +bail_rhdr: + vfree(pd->subport_rcvhdr_base); +bail_ureg: + vfree(pd->subport_uregbase); + pd->subport_uregbase = NULL; +bail: + return ret; +} + static int try_alloc_port(struct ipath_devdata *dd, int port, - struct file *fp) + struct file *fp, + const struct ipath_user_info *uinfo) { + struct ipath_portdata *pd; int ret; - if (!dd->ipath_pd[port]) { - void *p, *ptmp; + if (!(pd = dd->ipath_pd[port])) { + void *ptmp; - p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); + pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); /* * Allocate memory for use in ipath_tid_update() just once @@ -1221,34 +1475,36 @@ static int try_alloc_port(struct ipath_devdata *dd, int port, ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + dd->ipath_rcvtidcnt * sizeof(struct page **), GFP_KERNEL); - if (!p || !ptmp) { + if (!pd || !ptmp) { ipath_dev_err(dd, "Unable to allocate portdata " "memory, failing open\n"); ret = -ENOMEM; - kfree(p); + kfree(pd); kfree(ptmp); goto bail; } - dd->ipath_pd[port] = p; + dd->ipath_pd[port] = pd; dd->ipath_pd[port]->port_port = port; dd->ipath_pd[port]->port_dd = dd; dd->ipath_pd[port]->port_tid_pg_list = ptmp; init_waitqueue_head(&dd->ipath_pd[port]->port_wait); } - if (!dd->ipath_pd[port]->port_cnt) { - dd->ipath_pd[port]->port_cnt = 1; - fp->private_data = (void *) dd->ipath_pd[port]; + if (!pd->port_cnt) { + pd->userversion = uinfo->spu_userversion; + init_user_egr_sizes(pd); + if ((ret = init_subports(dd, pd, uinfo)) != 0) + goto bail; ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", current->comm, current->pid, dd->ipath_unit, port); - dd->ipath_pd[port]->port_pid = current->pid; - strncpy(dd->ipath_pd[port]->port_comm, current->comm, - sizeof(dd->ipath_pd[port]->port_comm)); + pd->port_cnt = 1; + port_fp(fp) = pd; + pd->port_pid = current->pid; + strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); ipath_stats.sps_ports++; ret = 0; - goto bail; - } - ret = -EBUSY; + } else + ret = -EBUSY; bail: return ret; @@ -1264,7 +1520,8 @@ static inline int usable(struct ipath_devdata *dd) | IPATH_LINKUNK)); } -static int find_free_port(int unit, struct file *fp) +static int find_free_port(int unit, struct file *fp, + const struct ipath_user_info *uinfo) { struct ipath_devdata *dd = ipath_lookup(unit); int ret, i; @@ -1279,8 +1536,8 @@ static int find_free_port(int unit, struct file *fp) goto bail; } - for (i = 0; i < dd->ipath_cfgports; i++) { - ret = try_alloc_port(dd, i, fp); + for (i = 1; i < dd->ipath_cfgports; i++) { + ret = try_alloc_port(dd, i, fp, uinfo); if (ret != -EBUSY) goto bail; } @@ -1290,13 +1547,14 @@ bail: return ret; } -static int find_best_unit(struct file *fp) +static int find_best_unit(struct file *fp, + const struct ipath_user_info *uinfo) { int ret = 0, i, prefunit = -1, devmax; int maxofallports, npresent, nup; int ndev; - (void) ipath_count_units(&npresent, &nup, &maxofallports); + devmax = ipath_count_units(&npresent, &nup, &maxofallports); /* * This code is present to allow a knowledgeable person to @@ -1343,8 +1601,6 @@ static int find_best_unit(struct file *fp) if (prefunit != -1) devmax = prefunit + 1; - else - devmax = ipath_count_units(NULL, NULL, NULL); recheck: for (i = 1; i < maxofallports; i++) { for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; @@ -1359,7 +1615,7 @@ recheck: * next. */ continue; - ret = try_alloc_port(dd, i, fp); + ret = try_alloc_port(dd, i, fp, uinfo); if (!ret) goto done; } @@ -1395,22 +1651,183 @@ done: return ret; } +static int find_shared_port(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int devmax, ndev, i; + int ret = 0; + + devmax = ipath_count_units(NULL, NULL, NULL); + + for (ndev = 0; ndev < devmax; ndev++) { + struct ipath_devdata *dd = ipath_lookup(ndev); + + if (!dd) + continue; + for (i = 1; i < dd->ipath_cfgports; i++) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + /* Skip ports which are not yet open */ + if (!pd || !pd->port_cnt) + continue; + /* Skip port if it doesn't match the requested one */ + if (pd->port_subport_id != uinfo->spu_subport_id) + continue; + /* Verify the sharing process matches the master */ + if (pd->port_subport_cnt != uinfo->spu_subport_cnt || + pd->userversion != uinfo->spu_userversion || + pd->port_cnt >= pd->port_subport_cnt) { + ret = -EINVAL; + goto done; + } + port_fp(fp) = pd; + subport_fp(fp) = pd->port_cnt++; + tidcursor_fp(fp) = 0; + pd->active_slaves |= 1 << subport_fp(fp); + ipath_cdbg(PROC, + "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", + current->comm, current->pid, + subport_fp(fp), + pd->port_comm, pd->port_pid, + dd->ipath_unit, pd->port_port); + ret = 1; + goto done; + } + } + +done: + return ret; +} + static int ipath_open(struct inode *in, struct file *fp) { - int ret, user_minor; + /* The real work is performed later in ipath_assign_port() */ + fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); + return fp->private_data ? 0 : -ENOMEM; +} + + +/* Get port early, so can set affinity prior to memory allocation */ +static int ipath_assign_port(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int ret; + int i_minor; + unsigned swminor; + + /* Check to be sure we haven't already initialized this file */ + if (port_fp(fp)) { + ret = -EINVAL; + goto done; + } + + /* for now, if major version is different, bail */ + if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { + ipath_dbg("User major version %d not same as driver " + "major %d\n", uinfo->spu_userversion >> 16, + IPATH_USER_SWMAJOR); + ret = -ENODEV; + goto done; + } + + swminor = uinfo->spu_userversion & 0xffff; + if (swminor != IPATH_USER_SWMINOR) + ipath_dbg("User minor version %d not same as driver " + "minor %d\n", swminor, IPATH_USER_SWMINOR); mutex_lock(&ipath_mutex); - user_minor = iminor(in) - IPATH_USER_MINOR_BASE; + if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt && + (ret = find_shared_port(fp, uinfo))) { + mutex_unlock(&ipath_mutex); + if (ret > 0) + ret = 0; + goto done; + } + + i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", - (long)in->i_rdev, user_minor); + (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); - if (user_minor) - ret = find_free_port(user_minor - 1, fp); + if (i_minor) + ret = find_free_port(i_minor - 1, fp, uinfo); else - ret = find_best_unit(fp); + ret = find_best_unit(fp, uinfo); mutex_unlock(&ipath_mutex); + +done: + return ret; +} + + +static int ipath_do_user_init(struct file *fp, + const struct ipath_user_info *uinfo) +{ + int ret; + struct ipath_portdata *pd; + struct ipath_devdata *dd; + u32 head32; + + pd = port_fp(fp); + dd = pd->port_dd; + + if (uinfo->spu_rcvhdrsize) { + ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); + if (ret) + goto done; + } + + /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ + + /* for right now, kernel piobufs are at end, so port 1 is at 0 */ + pd->port_piobufs = dd->ipath_piobufbase + + dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; + ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", + pd->port_port, pd->port_piobufs); + + /* + * Now allocate the rcvhdr Q and eager TIDs; skip the TID + * array for time being. If pd->port_port > chip-supported, + * we need to do extra stuff here to handle by handling overflow + * through port 0, someday + */ + ret = ipath_create_rcvhdrq(dd, pd); + if (!ret) + ret = ipath_create_user_egr(pd); + if (ret) + goto done; + + /* + * set the eager head register for this port to the current values + * of the tail pointers, since we don't know if they were + * updated on last use of the port. + */ + head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); + ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); + dd->ipath_lastegrheads[pd->port_port] = -1; + dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; + ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", + pd->port_port, head32); + pd->port_tidcursor = 0; /* start at beginning after open */ + /* + * now enable the port; the tail registers will be written to memory + * by the chip as soon as it sees the write to + * dd->ipath_kregs->kr_rcvctrl. The update only happens on + * transition from 0 to 1, so clear it first, then set it as part of + * enabling the port. This will (very briefly) affect any other + * open ports, but it shouldn't be long enough to be an issue. + * We explictly set the in-memory copy to 0 beforehand, so we don't + * have to wait to be sure the DMA update has happened. + */ + *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL; + set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, + &dd->ipath_rcvctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); +done: return ret; } @@ -1433,6 +1850,8 @@ static void unlock_expected_tids(struct ipath_portdata *pd) if (!dd->ipath_pageshadow[i]) continue; + pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i], 1); dd->ipath_pageshadow[i] = NULL; @@ -1453,6 +1872,7 @@ static void unlock_expected_tids(struct ipath_portdata *pd) static int ipath_close(struct inode *in, struct file *fp) { int ret = 0; + struct ipath_filedata *fd; struct ipath_portdata *pd; struct ipath_devdata *dd; unsigned port; @@ -1462,9 +1882,24 @@ static int ipath_close(struct inode *in, struct file *fp) mutex_lock(&ipath_mutex); - pd = port_fp(fp); - port = pd->port_port; + fd = (struct ipath_filedata *) fp->private_data; fp->private_data = NULL; + pd = fd->pd; + if (!pd) { + mutex_unlock(&ipath_mutex); + goto bail; + } + if (--pd->port_cnt) { + /* + * XXX If the master closes the port before the slave(s), + * revoke the mmap for the eager receive queue so + * the slave(s) don't wait for receive data forever. + */ + pd->active_slaves &= ~(1 << fd->subport); + mutex_unlock(&ipath_mutex); + goto bail; + } + port = pd->port_port; dd = pd->port_dd; if (pd->port_hdrqfull) { @@ -1503,8 +1938,6 @@ static int ipath_close(struct inode *in, struct file *fp) /* clean up the pkeys for this port user */ ipath_clean_part_key(pd, dd); - - /* * be paranoid, and never write 0's to these, just use an * unused part of the port 0 tail page. Of course, @@ -1523,39 +1956,49 @@ static int ipath_close(struct inode *in, struct file *fp) i = dd->ipath_pbufsport * (port - 1); ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); + dd->ipath_f_clear_tids(dd, pd->port_port); + if (dd->ipath_pageshadow) unlock_expected_tids(pd); ipath_stats.sps_ports--; ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", pd->port_comm, pd->port_pid, dd->ipath_unit, port); - - dd->ipath_f_clear_tids(dd, pd->port_port); } - pd->port_cnt = 0; pd->port_pid = 0; - dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ mutex_unlock(&ipath_mutex); ipath_free_pddata(dd, pd); /* after releasing the mutex */ +bail: + kfree(fd); return ret; } -static int ipath_port_info(struct ipath_portdata *pd, +static int ipath_port_info(struct ipath_portdata *pd, u16 subport, struct ipath_port_info __user *uinfo) { struct ipath_port_info info; int nup; int ret; + size_t sz; (void) ipath_count_units(NULL, &nup, NULL); info.num_active = nup; info.unit = pd->port_dd->ipath_unit; info.port = pd->port_port; + info.subport = subport; + /* Don't return new fields if old library opened the port. */ + if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) { + /* Number of user ports available for this device. */ + info.num_ports = pd->port_dd->ipath_cfgports - 1; + info.num_subports = pd->port_subport_cnt; + sz = sizeof(info); + } else + sz = sizeof(info) - 2 * sizeof(u16); - if (copy_to_user(uinfo, &info, sizeof(info))) { + if (copy_to_user(uinfo, &info, sz)) { ret = -EFAULT; goto bail; } @@ -1565,6 +2008,16 @@ bail: return ret; } +static int ipath_get_slave_info(struct ipath_portdata *pd, + void __user *slave_mask_addr) +{ + int ret = 0; + + if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32))) + ret = -EFAULT; + return ret; +} + static ssize_t ipath_write(struct file *fp, const char __user *data, size_t count, loff_t *off) { @@ -1591,6 +2044,8 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, consumed = sizeof(cmd.type); switch (cmd.type) { + case IPATH_CMD_ASSIGN_PORT: + case __IPATH_CMD_USER_INIT: case IPATH_CMD_USER_INIT: copy = sizeof(cmd.cmd.user_info); dest = &cmd.cmd.user_info; @@ -1617,6 +2072,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, dest = &cmd.cmd.part_key; src = &ucmd->cmd.part_key; break; + case IPATH_CMD_SLAVE_INFO: + copy = sizeof(cmd.cmd.slave_mask_addr); + dest = &cmd.cmd.slave_mask_addr; + src = &ucmd->cmd.slave_mask_addr; + break; default: ret = -EINVAL; goto bail; @@ -1634,34 +2094,55 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, consumed += copy; pd = port_fp(fp); + if (!pd && cmd.type != __IPATH_CMD_USER_INIT && + cmd.type != IPATH_CMD_ASSIGN_PORT) { + ret = -EINVAL; + goto bail; + } switch (cmd.type) { + case IPATH_CMD_ASSIGN_PORT: + ret = ipath_assign_port(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + break; + case __IPATH_CMD_USER_INIT: + /* backwards compatibility, get port first */ + ret = ipath_assign_port(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + /* and fall through to current version. */ case IPATH_CMD_USER_INIT: - ret = ipath_do_user_init(pd, &cmd.cmd.user_info); - if (ret < 0) + ret = ipath_do_user_init(fp, &cmd.cmd.user_info); + if (ret) goto bail; ret = ipath_get_base_info( - pd, (void __user *) (unsigned long) + fp, (void __user *) (unsigned long) cmd.cmd.user_info.spu_base_info, cmd.cmd.user_info.spu_base_info_size); break; case IPATH_CMD_RECV_CTRL: - ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl); + ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl); break; case IPATH_CMD_PORT_INFO: - ret = ipath_port_info(pd, + ret = ipath_port_info(pd, subport_fp(fp), (struct ipath_port_info __user *) (unsigned long) cmd.cmd.port_info); break; case IPATH_CMD_TID_UPDATE: - ret = ipath_tid_update(pd, &cmd.cmd.tid_info); + ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info); break; case IPATH_CMD_TID_FREE: - ret = ipath_tid_free(pd, &cmd.cmd.tid_info); + ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info); break; case IPATH_CMD_SET_PART_KEY: ret = ipath_set_part_key(pd, cmd.cmd.part_key); break; + case IPATH_CMD_SLAVE_INFO: + ret = ipath_get_slave_info(pd, + (void __user *) (unsigned long) + cmd.cmd.slave_mask_addr); + break; } if (ret >= 0) @@ -1858,4 +2339,3 @@ void ipath_user_remove(struct ipath_devdata *dd) bail: return; } - diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index a5eb30a06a5c..79a60f020a21 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -61,14 +61,13 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->u.generic_ip = data; + inode->i_private = data; if ((mode & S_IFMT) == S_IFDIR) { inode->i_op = &simple_dir_inode_operations; - inode->i_nlink++; - dir->i_nlink++; + inc_nlink(inode); + inc_nlink(dir); } inode->i_fop = fops; @@ -119,7 +118,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, u16 i; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_path.dentry->d_inode->i_private; for (i = 0; i < NUM_COUNTERS; i++) counters[i] = ipath_snap_cntr(dd, i); @@ -139,7 +138,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf, struct ipath_devdata *dd; u64 guid; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_path.dentry->d_inode->i_private; guid = be64_to_cpu(dd->ipath_guid); @@ -178,7 +177,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, u32 tmp, tmp2; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_path.dentry->d_inode->i_private; /* so we only initialize non-zero fields. */ memset(portinfo, 0, sizeof portinfo); @@ -325,7 +324,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, goto bail; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_path.dentry->d_inode->i_private; if (ipath_eeprom_read(dd, pos, tmp, count)) { ipath_dev_err(dd, "failed to read from flash\n"); ret = -ENXIO; @@ -357,19 +356,16 @@ static ssize_t flash_write(struct file *file, const char __user *buf, pos = *ppos; - if ( pos < 0) { + if (pos != 0) { ret = -EINVAL; goto bail; } - if (pos >= sizeof(struct ipath_flash)) { - ret = 0; + if (count != sizeof(struct ipath_flash)) { + ret = -EINVAL; goto bail; } - if (count > sizeof(struct ipath_flash) - pos) - count = sizeof(struct ipath_flash) - pos; - tmp = kmalloc(count, GFP_KERNEL); if (!tmp) { ret = -ENOMEM; @@ -381,7 +377,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, goto bail_tmp; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_path.dentry->d_inode->i_private; if (ipath_eeprom_write(dd, pos, tmp, count)) { ret = -ENXIO; ipath_dev_err(dd, "failed to write to flash\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index bf2455a6d562..e57c7a351cb5 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -38,6 +38,7 @@ #include <linux/pci.h> #include <linux/delay.h> +#include <linux/htirq.h> #include "ipath_kernel.h" #include "ipath_registers.h" @@ -252,8 +253,8 @@ static const struct ipath_cregs ipath_ht_cregs = { }; /* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK 0x1FF -#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF +#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1) +#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1) /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ #define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0 @@ -338,7 +339,7 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs, if (crcbits) { u16 ctrl0, ctrl1; snprintf(bitsmsg, sizeof bitsmsg, - "[HT%s lane %s CRC (%llx); ignore till reload]", + "[HT%s lane %s CRC (%llx); powercycle to completely clear]", !(crcbits & _IPATH_HTLINK1_CRCBITS) ? "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS) ? "1 (B)" : "0+1 (A+B)"), @@ -389,17 +390,28 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs, _IPATH_HTLINK1_CRCBITS))); } +/* 6110 specific hardware errors... */ +static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"), + INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"), + INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"), + INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"), + INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"), + INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"), + INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), + INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), +}; + /** - * ipath_ht_handle_hwerrors - display hardware errors + * ipath_ht_handle_hwerrors - display hardware errors. * @dd: the infinipath device * @msg: the output buffer * @msgl: the size of the output buffer * - * Use same msg buffer as regular errors to avoid - * excessive stack use. Most hardware errors are catastrophic, but for - * right now, we'll print them and continue. - * We reuse the same message buffer as ipath_handle_errors() to avoid - * excessive stack usage. + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. We reuse the same message buffer as + * ipath_handle_errors() to avoid excessive stack usage. */ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, size_t msgl) @@ -440,19 +452,49 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, * make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds */ - if ((hwerrs & ~dd->ipath_lasthwerror) || + if ((hwerrs & ~(dd->ipath_lasthwerror | + ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || (ipath_debug & __IPATH_VERBDBG)) dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " "(cleared)\n", (unsigned long long) hwerrs); dd->ipath_lasthwerror |= hwerrs; - if (hwerrs & ~infinipath_hwe_bitsextant) + if (hwerrs & ~dd->ipath_hwe_bitsextant) ipath_dev_err(dd, "hwerror interrupt with unknown errors " "%llx set\n", (unsigned long long) - (hwerrs & ~infinipath_hwe_bitsextant)); + (hwerrs & ~dd->ipath_hwe_bitsextant)); ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); if (ctrl & INFINIPATH_C_FREEZEMODE) { + /* + * parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { + ipath_stats.sps_txeparity++; + ipath_dbg("Recovering from TXE parity error (%llu), " + "hwerrstatus=%llx\n", + (unsigned long long) ipath_stats.sps_txeparity, + (unsigned long long) hwerrs); + ipath_disarm_senderrbufs(dd); + hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); + if (!hwerrs) { /* else leave in freeze mode */ + ipath_write_kreg(dd, + dd->ipath_kregs->kr_control, + dd->ipath_control); + return; + } + } if (hwerrs) { /* * if any set that we aren't ignoring; only @@ -499,44 +541,16 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, bits); strlcat(msg, bitsmsg, msgl); } - if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK - << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_RXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_TXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR) - strlcat(msg, "[IB2IPATH Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR) - strlcat(msg, "[IPATH2IB Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR) - strlcat(msg, "[HTC Ireq Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR) - strlcat(msg, "[HTC Treq Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR) - strlcat(msg, "[HTC Tresp Parity]", msgl); + + ipath_format_hwerrors(hwerrs, + ipath_6110_hwerror_msgs, + sizeof(ipath_6110_hwerror_msgs) / + sizeof(ipath_6110_hwerror_msgs[0]), + msg, msgl); if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) hwerr_crcbits(dd, hwerrs, msg, msgl); - if (hwerrs & INFINIPATH_HWE_HTCMISCERR5) - strlcat(msg, "[HT core Misc5]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCMISCERR6) - strlcat(msg, "[HT core Misc6]", msgl); - if (hwerrs & INFINIPATH_HWE_HTCMISCERR7) - strlcat(msg, "[HT core Misc7]", msgl); if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); @@ -573,11 +587,6 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, dd->ipath_hwerrmask); } - if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR) - strlcat(msg, "[Rx Dsync]", msgl); - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) - strlcat(msg, "[SerDes PLL]", msgl); - ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) /* @@ -742,7 +751,6 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd) return 0; } -#define HT_CAPABILITY_ID 0x08 /* HT capabilities not defined in kernel */ #define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */ #define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */ @@ -906,49 +914,40 @@ static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev, } } -static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev, - int pos) +static int ipath_ht_intconfig(struct ipath_devdata *dd) { - u32 int_handler_addr_lower; - u32 int_handler_addr_upper; - u64 ihandler; - u32 intvec; + int ret; - /* use indirection register to get the intr handler */ - pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x10); - pci_read_config_dword(pdev, pos + 4, &int_handler_addr_lower); - pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x11); - pci_read_config_dword(pdev, pos + 4, &int_handler_addr_upper); + if (dd->ipath_intconfig) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig, + dd->ipath_intconfig); /* interrupt address */ + ret = 0; + } else { + ipath_dev_err(dd, "No interrupts enabled, couldn't setup " + "interrupt address\n"); + ret = -EINVAL; + } - ihandler = (u64) int_handler_addr_lower | - ((u64) int_handler_addr_upper << 32); + return ret; +} + +static void ipath_ht_irq_update(struct pci_dev *dev, int irq, + struct ht_irq_msg *msg) +{ + struct ipath_devdata *dd = pci_get_drvdata(dev); + u64 prev_intconfig = dd->ipath_intconfig; + + dd->ipath_intconfig = msg->address_lo; + dd->ipath_intconfig |= ((u64) msg->address_hi) << 32; /* - * kernels with CONFIG_PCI_MSI set the vector in the irq field of - * struct pci_device, so we use that to program the internal - * interrupt register (not config space) with that value. The BIOS - * must still have done the basic MSI setup. - */ - intvec = pdev->irq; - /* - * clear any vector bits there; normally not set but we'll overload - * this for some debug purposes (setting the HTC debug register - * value from software, rather than GPIOs), so it might be set on a - * driver reload. + * If the previous value of dd->ipath_intconfig is zero, we're + * getting configured for the first time, and must not program the + * intconfig register here (it will be programmed later, when the + * hardware is ready). Otherwise, we should. */ - ihandler &= ~0xff0000; - /* x86 vector goes in intrinfo[23:16] */ - ihandler |= intvec << 16; - ipath_cdbg(VERBOSE, "ihandler lower %x, upper %x, intvec %x, " - "interruptconfig %llx\n", int_handler_addr_lower, - int_handler_addr_upper, intvec, - (unsigned long long) ihandler); - - /* can't program yet, so save for interrupt setup */ - dd->ipath_intconfig = ihandler; - /* keep going, so we find link control stuff also */ - - return ihandler != 0; + if (prev_intconfig) + ipath_ht_intconfig(dd); } /** @@ -964,16 +963,23 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev, static int ipath_setup_ht_config(struct ipath_devdata *dd, struct pci_dev *pdev) { - int pos, ret = 0; - int ihandler = 0; + int pos, ret; + + ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update); + if (ret < 0) { + ipath_dev_err(dd, "Couldn't create interrupt handler: " + "err %d\n", ret); + goto bail; + } + dd->ipath_irq = ret; + ret = 0; /* - * Read the capability info to find the interrupt info, and also - * handle clearing CRC errors in linkctrl register if necessary. We + * Handle clearing CRC errors in linkctrl register if necessary. We * do this early, before we ever enable errors or hardware errors, * mostly to avoid causing the chip to enter freeze mode. */ - pos = pci_find_capability(pdev, HT_CAPABILITY_ID); + pos = pci_find_capability(pdev, PCI_CAP_ID_HT); if (!pos) { ipath_dev_err(dd, "Couldn't find HyperTransport " "capability; no interrupts\n"); @@ -993,16 +999,8 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, } if (!(cap_type & 0xE0)) slave_or_pri_blk(dd, pdev, pos, cap_type); - else if (cap_type == HT_INTR_DISC_CONFIG) - ihandler = set_int_handler(dd, pdev, pos); } while ((pos = pci_find_next_capability(pdev, pos, - HT_CAPABILITY_ID))); - - if (!ihandler) { - ipath_dev_err(dd, "Couldn't find interrupt handler in " - "config space\n"); - ret = -ENODEV; - } + PCI_CAP_ID_HT))); bail: return ret; @@ -1081,21 +1079,21 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); } -static void ipath_init_ht_variables(void) +static void ipath_init_ht_variables(struct ipath_devdata *dd) { - ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - ipath_gpio_sda = IPATH_GPIO_SDA; - ipath_gpio_scl = IPATH_GPIO_SCL; + dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; + dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; + dd->ipath_gpio_sda = IPATH_GPIO_SDA; + dd->ipath_gpio_scl = IPATH_GPIO_SCL; - infinipath_i_bitsextant = + dd->ipath_i_bitsextant = (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | (INFINIPATH_I_RCVAVAIL_MASK << INFINIPATH_I_RCVAVAIL_SHIFT) | INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; - infinipath_e_bitsextant = + dd->ipath_e_bitsextant = INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | @@ -1113,7 +1111,7 @@ static void ipath_init_ht_variables(void) INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE; - infinipath_hwe_bitsextant = + dd->ipath_hwe_bitsextant = (INFINIPATH_HWE_HTCMEMPARITYERR_MASK << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) | (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << @@ -1142,8 +1140,8 @@ static void ipath_init_ht_variables(void) INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; - infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; + dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; } /** @@ -1353,25 +1351,6 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); } -static int ipath_ht_intconfig(struct ipath_devdata *dd) -{ - int ret; - - if (!dd->ipath_intconfig) { - ipath_dev_err(dd, "No interrupts enabled, couldn't setup " - "interrupt address\n"); - ret = 1; - goto bail; - } - - ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig, - dd->ipath_intconfig); /* interrupt address */ - ret = 0; - -bail: - return ret; -} - /** * ipath_pe_put_tid - write a TID in chip * @dd: the infinipath device @@ -1568,6 +1547,14 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) return 0; } +static void ipath_ht_free_irq(struct ipath_devdata *dd) +{ + free_irq(dd->ipath_irq, dd); + ht_destroy_irq(dd->ipath_irq); + dd->ipath_irq = 0; + dd->ipath_intconfig = 0; +} + /** * ipath_init_iba6110_funcs - set up the chip-specific function pointers * @dd: the infinipath device @@ -1591,6 +1578,7 @@ void ipath_init_iba6110_funcs(struct ipath_devdata *dd) dd->ipath_f_cleanup = ipath_setup_ht_cleanup; dd->ipath_f_setextled = ipath_setup_ht_setextled; dd->ipath_f_get_base_info = ipath_ht_get_base_info; + dd->ipath_f_free_irq = ipath_ht_free_irq; /* * initialize chip-specific variables @@ -1608,5 +1596,5 @@ void ipath_init_iba6110_funcs(struct ipath_devdata *dd) * do very early init that is needed before ipath_f_bus is * called */ - ipath_init_ht_variables(); + ipath_init_ht_variables(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index d86516d23df6..6af89683f710 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -263,8 +263,8 @@ static const struct ipath_cregs ipath_pe_cregs = { }; /* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK 0x1F -#define INFINIPATH_I_RCVAVAIL_MASK 0x1F +#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1) +#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1) /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ #define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL @@ -294,6 +294,33 @@ static const struct ipath_cregs ipath_pe_cregs = { #define IPATH_GPIO_SCL (1ULL << \ (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) +/* + * Rev2 silicon allows suppressing check for ArmLaunch errors. + * this can speed up short packet sends on systems that do + * not guaranteee write-order. + */ +#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63) + +/* 6120 specific hardware errors... */ +static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), + INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"), + /* + * In practice, it's unlikely wthat we'll see PCIe PLL, or bus + * parity or memory parity error failures, because most likely we + * won't be able to talk to the core of the chip. Nonetheless, we + * might see them, if they are in parts of the PCIe core that aren't + * essential. + */ + INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"), + INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"), + INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"), + INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"), + INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"), + INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), + INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), +}; + /** * ipath_pe_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -343,19 +370,49 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds */ - if ((hwerrs & ~dd->ipath_lasthwerror) || + if ((hwerrs & ~(dd->ipath_lasthwerror | + ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || (ipath_debug & __IPATH_VERBDBG)) dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " "(cleared)\n", (unsigned long long) hwerrs); dd->ipath_lasthwerror |= hwerrs; - if (hwerrs & ~infinipath_hwe_bitsextant) + if (hwerrs & ~dd->ipath_hwe_bitsextant) ipath_dev_err(dd, "hwerror interrupt with unknown errors " "%llx set\n", (unsigned long long) - (hwerrs & ~infinipath_hwe_bitsextant)); + (hwerrs & ~dd->ipath_hwe_bitsextant)); ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); if (ctrl & INFINIPATH_C_FREEZEMODE) { + /* + * parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { + ipath_stats.sps_txeparity++; + ipath_dbg("Recovering from TXE parity error (%llu), " + "hwerrstatus=%llx\n", + (unsigned long long) ipath_stats.sps_txeparity, + (unsigned long long) hwerrs); + ipath_disarm_senderrbufs(dd); + hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); + if (!hwerrs) { /* else leave in freeze mode */ + ipath_write_kreg(dd, + dd->ipath_kregs->kr_control, + dd->ipath_control); + return; + } + } if (hwerrs) { /* * if any set that we aren't ignoring only make the @@ -379,9 +436,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, } else { ipath_dbg("Clearing freezemode on ignored hardware " "error\n"); - ctrl &= ~INFINIPATH_C_FREEZEMODE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - ctrl); + dd->ipath_control); } } @@ -396,24 +452,13 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, dd->ipath_hwerrmask); } - if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK - << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_RXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_TXEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } + + ipath_format_hwerrors(hwerrs, + ipath_6120_hwerror_msgs, + sizeof(ipath_6120_hwerror_msgs)/ + sizeof(ipath_6120_hwerror_msgs[0]), + msg, msgl); + if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) { bits = (u32) ((hwerrs >> @@ -423,10 +468,6 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, "[PCIe Mem Parity Errs %x] ", bits); strlcat(msg, bitsmsg, msgl); } - if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR) - strlcat(msg, "[IB2IPATH Parity]", msgl); - if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR) - strlcat(msg, "[IPATH2IB Parity]", msgl); #define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ INFINIPATH_HWE_COREPLL_RFSLIP ) @@ -452,34 +493,6 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, dd->ipath_hwerrmask); } - if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP) - strlcat(msg, "[PCIe Poisoned TLP]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT) - strlcat(msg, "[PCIe completion timeout]", msgl); - - /* - * In practice, it's unlikely wthat we'll see PCIe PLL, or bus - * parity or memory parity error failures, because most likely we - * won't be able to talk to the core of the chip. Nonetheless, we - * might see them, if they are in parts of the PCIe core that aren't - * essential. - */ - if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED) - strlcat(msg, "[PCIePLL1]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED) - strlcat(msg, "[PCIePLL0]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH) - strlcat(msg, "[PCIe XTLH core parity]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM) - strlcat(msg, "[PCIe ADM TX core parity]", msgl); - if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM) - strlcat(msg, "[PCIe ADM RX core parity]", msgl); - - if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR) - strlcat(msg, "[Rx Dsync]", msgl); - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) - strlcat(msg, "[SerDes PLL]", msgl); - ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) { /* @@ -525,6 +538,9 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, case 5: n = "InfiniPath_QMH7140"; break; + case 6: + n = "InfiniPath_QLE7142"; + break; default: ipath_dev_err(dd, "Don't yet know about board with ID %u\n", @@ -571,9 +587,12 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) if (!dd->ipath_boardrev) // no PLL for Emulator val &= ~INFINIPATH_HWE_SERDESPLLFAILED; - /* workaround bug 9460 in internal interface bus parity checking */ - val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM; - + if (dd->ipath_minrev < 2) { + /* workaround bug 9460 in internal interface bus parity + * checking. Fixed (HW bug 9490) in Rev2. + */ + val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM; + } dd->ipath_hwerrmask = val; } @@ -583,8 +602,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) */ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) { - u64 val, tmp, config1; - int ret = 0, change = 0; + u64 val, tmp, config1, prev_val; + int ret = 0; ipath_dbg("Trying to bringup serdes\n"); @@ -641,6 +660,7 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + prev_val = val; if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & INFINIPATH_XGXS_MDIOADDR_MASK) != 3) { val &= @@ -648,11 +668,9 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) INFINIPATH_XGXS_MDIOADDR_SHIFT); /* MDIO address 3 */ val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT; - change = 1; } if (val & INFINIPATH_XGXS_RESET) { val &= ~INFINIPATH_XGXS_RESET; - change = 1; } if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { @@ -661,9 +679,19 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) INFINIPATH_XGXS_RX_POL_SHIFT); val |= dd->ipath_rx_pol_inv << INFINIPATH_XGXS_RX_POL_SHIFT; - change = 1; } - if (change) + if (dd->ipath_minrev >= 2) { + /* Rev 2. can tolerate multiple writes to PBC, and + * allowing them can provide lower latency on some + * CPUs, but this feature is off by default, only + * turned on by setting D63 of XGXSconfig reg. + * May want to make this conditional more + * fine-grained in future. This is not exactly + * related to XGXS, but where the bit ended up. + */ + val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR; + } + if (val != prev_val) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); @@ -717,9 +745,25 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); } -/* this is not yet needed on this chip, so just return 0. */ static int ipath_pe_intconfig(struct ipath_devdata *dd) { + u64 val; + u32 chiprev; + + /* + * If the chip supports added error indication via GPIO pins, + * enable interrupts on those bits so the interrupt routine + * can count the events. Also set flag so interrupt routine + * can know they are expected. + */ + chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT; + if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) { + /* Rev2+ reports extra errors via internal GPIO pins */ + dd->ipath_flags |= IPATH_GPIO_ERRINTRS; + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); + val |= IPATH_GPIO_ERRINTR_MASK; + ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + } return 0; } @@ -807,6 +851,7 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd, int pos, ret; dd->ipath_msi_lo = 0; /* used as a flag during reset processing */ + dd->ipath_irq = pdev->irq; ret = pci_enable_msi(dd->pcidev); if (ret) ipath_dev_err(dd, "pci_enable_msi failed: %d, " @@ -853,21 +898,23 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd, return 0; } -static void ipath_init_pe_variables(void) +static void ipath_init_pe_variables(struct ipath_devdata *dd) { /* * bits for selecting i2c direction and values, * used for I2C serial flash */ - ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - ipath_gpio_sda = IPATH_GPIO_SDA; - ipath_gpio_scl = IPATH_GPIO_SCL; + dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; + dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; + dd->ipath_gpio_sda = IPATH_GPIO_SDA; + dd->ipath_gpio_scl = IPATH_GPIO_SCL; /* variables for sanity checking interrupt and errors */ - infinipath_hwe_bitsextant = + dd->ipath_hwe_bitsextant = (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | + (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) | INFINIPATH_HWE_PCIE1PLLFAILED | @@ -883,13 +930,13 @@ static void ipath_init_pe_variables(void) INFINIPATH_HWE_SERDESPLLFAILED | INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; - infinipath_i_bitsextant = + dd->ipath_i_bitsextant = (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | (INFINIPATH_I_RCVAVAIL_MASK << INFINIPATH_I_RCVAVAIL_SHIFT) | INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; - infinipath_e_bitsextant = + dd->ipath_e_bitsextant = INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | @@ -907,8 +954,8 @@ static void ipath_init_pe_variables(void) INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE; - infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; + dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; } /* setup the MSI stuff again after a reset. I'd like to just call @@ -1082,6 +1129,45 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, mmiowb(); spin_unlock_irqrestore(&dd->ipath_tid_lock, flags); } +/** + * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher + * @dd: the infinipath device + * @tidptr: pointer to the expected TID (in chip) to udpate + * @tidtype: 0 for eager, 1 for expected + * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing + * + * This exists as a separate routine to allow for selection of the + * appropriate "flavor". The static calls in cleanup just use the + * revision-agnostic form, as they are not performance critical. + */ +static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + u32 __iomem *tidp32 = (u32 __iomem *)tidptr; + + if (pa != dd->ipath_tidinvalid) { + if (pa & ((1U << 11) - 1)) { + dev_info(&dd->pcidev->dev, "BUG: physaddr %lx " + "not 2KB aligned!\n", pa); + return; + } + pa >>= 11; + /* paranoia check */ + if (pa & (7<<29)) + ipath_dev_err(dd, + "BUG: Physical page address 0x%lx " + "has bits set in 31-29\n", pa); + + if (type == 0) + pa |= dd->ipath_tidtemplate; + else /* for now, always full 4KB page */ + pa |= 2 << 29; + } + if (dd->ipath_kregbase) + writel(pa, tidp32); + mmiowb(); +} + /** * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager @@ -1203,7 +1289,7 @@ int __attribute__((weak)) ipath_unordered_wc(void) /** * ipath_init_pe_get_base_info - set chip-specific flags for user code - * @dd: the infinipath device + * @pd: the infinipath port * @kbase: ipath_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs @@ -1212,6 +1298,7 @@ int __attribute__((weak)) ipath_unordered_wc(void) static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) { struct ipath_base_info *kinfo = kbase; + struct ipath_devdata *dd; if (ipath_unordered_wc()) { kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER; @@ -1220,11 +1307,29 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) else ipath_cdbg(PROC, "Not Intel processor, WC ordered\n"); - kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; + if (pd == NULL) + goto done; + + dd = pd->port_dd; + + if (dd != NULL && dd->ipath_minrev >= 2) { + ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n"); + kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE; + ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n"); + kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN; + } +done: + kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; return 0; } +static void ipath_pe_free_irq(struct ipath_devdata *dd) +{ + free_irq(dd->ipath_irq, dd); + dd->ipath_irq = 0; +} + /** * ipath_init_iba6120_funcs - set up the chip-specific function pointers * @dd: the infinipath device @@ -1244,10 +1349,14 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd) dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes; dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes; dd->ipath_f_clear_tids = ipath_pe_clear_tids; - dd->ipath_f_put_tid = ipath_pe_put_tid; + if (dd->ipath_minrev >= 2) + dd->ipath_f_put_tid = ipath_pe_put_tid_2; + else + dd->ipath_f_put_tid = ipath_pe_put_tid; dd->ipath_f_cleanup = ipath_setup_pe_cleanup; dd->ipath_f_setextled = ipath_setup_pe_setextled; dd->ipath_f_get_base_info = ipath_pe_get_base_info; + dd->ipath_f_free_irq = ipath_pe_free_irq; /* initialize chip-specific variables */ dd->ipath_f_tidtemplate = ipath_pe_tidtemplate; @@ -1259,6 +1368,6 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd) dd->ipath_kregs = &ipath_pe_kregs; dd->ipath_cregs = &ipath_pe_cregs; - ipath_init_pe_variables(); + ipath_init_pe_variables(dd); } diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 44669dc2e22d..d819cca524cd 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -88,13 +88,13 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver"); static int create_port0_egr(struct ipath_devdata *dd) { unsigned e, egrcnt; - struct sk_buff **skbs; + struct ipath_skbinfo *skbinfo; int ret; egrcnt = dd->ipath_rcvegrcnt; - skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt); - if (skbs == NULL) { + skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt); + if (skbinfo == NULL) { ipath_dev_err(dd, "allocation error for eager TID " "skb array\n"); ret = -ENOMEM; @@ -109,13 +109,13 @@ static int create_port0_egr(struct ipath_devdata *dd) * 4 bytes so that the data buffer stays word aligned. * See ipath_kreceive() for more details. */ - skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL); - if (!skbs[e]) { + skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL); + if (!skbinfo[e].skb) { ipath_dev_err(dd, "SKB allocation error for " "eager TID %u\n", e); while (e != 0) - dev_kfree_skb(skbs[--e]); - vfree(skbs); + dev_kfree_skb(skbinfo[--e].skb); + vfree(skbinfo); ret = -ENOMEM; goto bail; } @@ -124,14 +124,17 @@ static int create_port0_egr(struct ipath_devdata *dd) * After loop above, so we can test non-NULL to see if ready * to use at receive, etc. */ - dd->ipath_port0_skbs = skbs; + dd->ipath_port0_skbinfo = skbinfo; for (e = 0; e < egrcnt; e++) { - unsigned long phys = - virt_to_phys(dd->ipath_port0_skbs[e]->data); + dd->ipath_port0_skbinfo[e].phys = + ipath_map_single(dd->pcidev, + dd->ipath_port0_skbinfo[e].skb->data, + dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); dd->ipath_f_put_tid(dd, e + (u64 __iomem *) ((char __iomem *) dd->ipath_kregbase + - dd->ipath_rcvegrbase), 0, phys); + dd->ipath_rcvegrbase), 0, + dd->ipath_port0_skbinfo[e].phys); } ret = 0; @@ -432,16 +435,33 @@ done: */ static void init_shadow_tids(struct ipath_devdata *dd) { - dd->ipath_pageshadow = (struct page **) - vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + struct page **pages; + dma_addr_t *addrs; + + pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * sizeof(struct page *)); - if (!dd->ipath_pageshadow) + if (!pages) { ipath_dev_err(dd, "failed to allocate shadow page * " "array, no expected sends!\n"); - else - memset(dd->ipath_pageshadow, 0, - dd->ipath_cfgports * dd->ipath_rcvtidcnt * - sizeof(struct page *)); + dd->ipath_pageshadow = NULL; + return; + } + + addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + sizeof(dma_addr_t)); + if (!addrs) { + ipath_dev_err(dd, "failed to allocate shadow dma handle " + "array, no expected sends!\n"); + vfree(dd->ipath_pageshadow); + dd->ipath_pageshadow = NULL; + return; + } + + memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt * + sizeof(struct page *)); + + dd->ipath_pageshadow = pages; + dd->ipath_physshadow = addrs; } static void enable_chip(struct ipath_devdata *dd, diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 49bf7bb15b04..5652a550d442 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -37,6 +37,50 @@ #include "ipath_verbs.h" #include "ipath_common.h" +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used. + */ +void ipath_disarm_senderrbufs(struct ipath_devdata *dd) +{ + u32 piobcnt; + unsigned long sbuf[4]; + /* + * it's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling + */ + piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + /* read these before writing errorclear */ + sbuf[0] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror); + sbuf[1] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 1); + if (piobcnt > 128) { + sbuf[2] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 2); + sbuf[3] = ipath_read_kreg64( + dd, dd->ipath_kregs->kr_sendbuffererror + 3); + } + + if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { + int i; + if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) { + __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, + "SendbufErrs %lx %lx", sbuf[0], + sbuf[1]); + if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) + printk(" %lx %lx ", sbuf[2], sbuf[3]); + printk("\n"); + } + + for (i = 0; i < piobcnt; i++) + if (test_bit(i, sbuf)) + ipath_disarm_piobufs(dd, i, 1); + dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ + } +} + + /* These are all rcv-related errors which we want to count for stats */ #define E_SUM_PKTERRS \ (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ @@ -68,53 +112,9 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) { - unsigned long sbuf[4]; u64 ignore_this_time = 0; - u32 piobcnt; - /* if possible that sendbuffererror could be valid */ - piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - /* read these before writing errorclear */ - sbuf[0] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror); - sbuf[1] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 1); - if (piobcnt > 128) { - sbuf[2] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 2); - sbuf[3] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 3); - } - - if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { - int i; - - ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]); - if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) - printk("%lx %lx ", sbuf[2], sbuf[3]); - for (i = 0; i < piobcnt; i++) { - if (test_bit(i, sbuf)) { - u32 __iomem *piobuf; - if (i < dd->ipath_piobcnt2k) - piobuf = (u32 __iomem *) - (dd->ipath_pio2kbase + - i * dd->ipath_palign); - else - piobuf = (u32 __iomem *) - (dd->ipath_pio4kbase + - (i - dd->ipath_piobcnt2k) * - dd->ipath_4kalign); - - ipath_cdbg(PKT, - "PIObuf[%u] @%p pbc is %x; ", - i, piobuf, readl(piobuf)); - - ipath_disarm_piobufs(dd, i, 1); - } - } - if (ipath_debug & __IPATH_PKTDBG) - printk("\n"); - } + ipath_disarm_senderrbufs(dd); if ((errs & E_SUM_LINK_PKTERRS) && !(dd->ipath_flags & IPATH_LINKACTIVE)) { /* @@ -132,6 +132,82 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) return ignore_this_time; } +/* generic hw error messages... */ +#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \ + { \ + .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \ + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \ + .msg = "TXE " #a " Memory Parity" \ + } +#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \ + { \ + .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \ + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \ + .msg = "RXE " #a " Memory Parity" \ + } + +static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = { + INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"), + INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"), + + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF), + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC), + INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO), + + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO), + INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO), +}; + +/** + * ipath_format_hwmsg - format a single hwerror message + * @msg message buffer + * @msgl length of message buffer + * @hwmsg message to add to message buffer + */ +static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) +{ + strlcat(msg, "[", msgl); + strlcat(msg, hwmsg, msgl); + strlcat(msg, "]", msgl); +} + +/** + * ipath_format_hwerrors - format hardware error messages for display + * @hwerrs hardware errors bit vector + * @hwerrmsgs hardware error descriptions + * @nhwerrmsgs number of hwerrmsgs + * @msg message buffer + * @msgl message buffer length + */ +void ipath_format_hwerrors(u64 hwerrs, + const struct ipath_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, + char *msg, size_t msgl) +{ + int i; + const int glen = + sizeof(ipath_generic_hwerror_msgs) / + sizeof(ipath_generic_hwerror_msgs[0]); + + for (i=0; i<glen; i++) { + if (hwerrs & ipath_generic_hwerror_msgs[i].mask) { + ipath_format_hwmsg(msg, msgl, + ipath_generic_hwerror_msgs[i].msg); + } + } + + for (i=0; i<nhwerrmsgs; i++) { + if (hwerrs & hwerrmsgs[i].mask) { + ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg); + } + } +} + /* return the strings for the most common link states */ static char *ib_linkstate(u32 linkstate) { @@ -404,10 +480,10 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); } - if (!noprint && (errs & ~infinipath_e_bitsextant)) + if (!noprint && (errs & ~dd->ipath_e_bitsextant)) ipath_dev_err(dd, "error interrupt with unknown errors " "%llx set\n", (unsigned long long) - (errs & ~infinipath_e_bitsextant)); + (errs & ~dd->ipath_e_bitsextant)); if (errs & E_SUM_ERRS) ignore_this_time = handle_e_sum_errs(dd, errs); @@ -478,6 +554,14 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) ~(INFINIPATH_E_HARDWARE | INFINIPATH_E_IBSTATUSCHANGED); } + + /* likely due to cancel, so suppress */ + if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && + dd->ipath_lastcancel > jiffies) { + ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n"); + errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN); + } + if (!errs) return 0; @@ -529,7 +613,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * don't report same point multiple times, * except kernel */ - tl = (u32) * pd->port_rcvhdrtail_kvaddr; + tl = *(u64 *) pd->port_rcvhdrtail_kvaddr; if (tl == dd->ipath_lastrcvhdrqtails[i]) continue; hd = ipath_read_ureg32(dd, ur_rcvhdrhead, @@ -626,14 +710,14 @@ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) * linuxbios development work, and it may happen in * the future again. */ - if (dd->pcidev && dd->pcidev->irq) { + if (dd->pcidev && dd->ipath_irq) { ipath_dev_err(dd, "Now %u unexpected " "interrupts, unregistering " "interrupt handler\n", *unexpectp); - ipath_dbg("free_irq of irq %x\n", - dd->pcidev->irq); - free_irq(dd->pcidev->irq, dd); + ipath_dbg("free_irq of irq %d\n", + dd->ipath_irq); + dd->ipath_f_free_irq(dd); } } if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) { @@ -669,7 +753,7 @@ static void ipath_bad_regread(struct ipath_devdata *dd) if (allbits == 2) { ipath_dev_err(dd, "Still bad interrupt status, " "unregistering interrupt\n"); - free_irq(dd->pcidev->irq, dd); + dd->ipath_f_free_irq(dd); } else if (allbits > 2) { if ((allbits % 10000) == 0) printk("."); @@ -729,9 +813,9 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) int rcvdint = 0; portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) & - infinipath_i_rcvavail_mask) + dd->ipath_i_rcvavail_mask) | ((istat >> INFINIPATH_I_RCVURG_SHIFT) & - infinipath_i_rcvurg_mask); + dd->ipath_i_rcvurg_mask); for (i = 1; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; if (portr & (1 << i) && pd && pd->port_cnt && @@ -755,7 +839,7 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) } } -irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) +irqreturn_t ipath_intr(int irq, void *data) { struct ipath_devdata *dd = data; u32 istat, chk0rcv = 0; @@ -808,7 +892,7 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) if (oldhead != curtail) { if (dd->ipath_flags & IPATH_GPIO_INTR) { ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - (u64) (1 << 2)); + (u64) (1 << IPATH_GPIO_PORT0_BIT)); istat = port0rbits | INFINIPATH_I_GPIO; } else @@ -838,10 +922,10 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) if (unexpected) unexpected = 0; - if (unlikely(istat & ~infinipath_i_bitsextant)) + if (unlikely(istat & ~dd->ipath_i_bitsextant)) ipath_dev_err(dd, "interrupt with unknown interrupts %x set\n", - istat & (u32) ~ infinipath_i_bitsextant); + istat & (u32) ~ dd->ipath_i_bitsextant); else ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat); @@ -867,26 +951,80 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) if (istat & INFINIPATH_I_GPIO) { /* - * Packets are available in the port 0 rcv queue. - * Eventually this needs to be generalized to check - * IPATH_GPIO_INTR, and the specific GPIO bit, if - * GPIO interrupts are used for anything else. + * GPIO interrupts fall in two broad classes: + * GPIO_2 indicates (on some HT4xx boards) that a packet + * has arrived for Port 0. Checking for this + * is controlled by flag IPATH_GPIO_INTR. + * GPIO_3..5 on IBA6120 Rev2 chips indicate errors + * that we need to count. Checking for this + * is controlled by flag IPATH_GPIO_ERRINTRS. */ - if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) { - u32 gpiostatus; - gpiostatus = ipath_read_kreg32( - dd, dd->ipath_kregs->kr_gpio_status); - ipath_dbg("Unexpected GPIO interrupt bits %x\n", - gpiostatus); - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - gpiostatus); + u32 gpiostatus; + u32 to_clear = 0; + + gpiostatus = ipath_read_kreg32( + dd, dd->ipath_kregs->kr_gpio_status); + /* First the error-counter case. + */ + if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) && + (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) { + /* want to clear the bits we see asserted. */ + to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK); + + /* + * Count appropriately, clear bits out of our copy, + * as they have been "handled". + */ + if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) { + ipath_dbg("FlowCtl on UnsupVL\n"); + dd->ipath_rxfc_unsupvl_errs++; + } + if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) { + ipath_dbg("Overrun Threshold exceeded\n"); + dd->ipath_overrun_thresh_errs++; + } + if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) { + ipath_dbg("Local Link Integrity error\n"); + dd->ipath_lli_errs++; + } + gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK; } - else { - /* Clear GPIO status bit 2 */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - (u64) (1 << 2)); + /* Now the Port0 Receive case */ + if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) && + (dd->ipath_flags & IPATH_GPIO_INTR)) { + /* + * GPIO status bit 2 is set, and we expected it. + * clear it and indicate in p0bits. + * This probably only happens if a Port0 pkt + * arrives at _just_ the wrong time, and we + * handle that by seting chk0rcv; + */ + to_clear |= (1 << IPATH_GPIO_PORT0_BIT); + gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); chk0rcv = 1; } + if (unlikely(gpiostatus)) { + /* + * Some unexpected bits remain. If they could have + * caused the interrupt, complain and clear. + * MEA: this is almost certainly non-ideal. + * we should look into auto-disable of unexpected + * GPIO interrupts, possibly on a "three strikes" + * basis. + */ + u32 mask; + mask = ipath_read_kreg32( + dd, dd->ipath_kregs->kr_gpio_mask); + if (mask & gpiostatus) { + ipath_dbg("Unexpected GPIO IRQ bits %x\n", + gpiostatus & mask); + to_clear |= (gpiostatus & mask); + } + } + if (to_clear) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, + (u64) to_clear); + } } chk0rcv |= istat & port0rbits; @@ -911,9 +1049,9 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) istat &= ~port0rbits; } - if (istat & ((infinipath_i_rcvavail_mask << + if (istat & ((dd->ipath_i_rcvavail_mask << INFINIPATH_I_RCVAVAIL_SHIFT) - | (infinipath_i_rcvurg_mask << + | (dd->ipath_i_rcvurg_mask << INFINIPATH_I_RCVURG_SHIFT))) handle_urcv(dd, istat); diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index a8a56276ff1d..986b2125b8f5 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -39,6 +39,8 @@ */ #include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> #include <asm/io.h> #include "ipath_common.h" @@ -62,7 +64,7 @@ struct ipath_portdata { /* rcvhdrq base, needs mmap before useful */ void *port_rcvhdrq; /* kernel virtual address where hdrqtail is updated */ - volatile __le64 *port_rcvhdrtail_kvaddr; + void *port_rcvhdrtail_kvaddr; /* * temp buffer for expected send setup, allocated at open, instead * of each setup call @@ -79,8 +81,8 @@ struct ipath_portdata { dma_addr_t port_rcvhdrq_phys; dma_addr_t port_rcvhdrqtailaddr_phys; /* - * number of opens on this instance (0 or 1; ignoring forks, dup, - * etc. for now) + * number of opens (including slave subports) on this instance + * (ignoring forks, dup, etc. for now) */ int port_cnt; /* @@ -89,6 +91,10 @@ struct ipath_portdata { */ /* instead of calculating it */ unsigned port_port; + /* non-zero if port is being shared. */ + u16 port_subport_cnt; + /* non-zero if port is being shared. */ + u16 port_subport_id; /* chip offset of PIO buffers for this port */ u32 port_piobufs; /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ @@ -121,6 +127,16 @@ struct ipath_portdata { u16 port_pkeys[4]; /* so file ops can get at unit */ struct ipath_devdata *port_dd; + /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ + void *subport_uregbase; + /* An array of pages for the eager receive buffers * N */ + void *subport_rcvegrbuf; + /* An array of pages for the eager header queue entries * N */ + void *subport_rcvhdr_base; + /* The version of the library which opened this port */ + u32 userversion; + /* Bitmask of active slaves */ + u32 active_slaves; }; struct sk_buff; @@ -132,6 +148,11 @@ struct _ipath_layer { void *l_arg; }; +struct ipath_skbinfo { + struct sk_buff *skb; + dma_addr_t phys; +}; + struct ipath_devdata { struct list_head ipath_list; @@ -154,7 +175,7 @@ struct ipath_devdata { /* ipath_cfgports pointers */ struct ipath_portdata **ipath_pd; /* sk_buffs used by port 0 eager receive queue */ - struct sk_buff **ipath_port0_skbs; + struct ipath_skbinfo *ipath_port0_skbinfo; /* kvirt address of 1st 2k pio buffer */ void __iomem *ipath_pio2kbase; /* kvirt address of 1st 4k pio buffer */ @@ -192,6 +213,8 @@ struct ipath_devdata { void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); /* fill out chip-specific fields */ int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); + /* free irq */ + void (*ipath_f_free_irq)(struct ipath_devdata *); struct ipath_ibdev *verbs_dev; struct timer_list verbs_timer; /* total dwords sent (summed from counter) */ @@ -307,6 +330,8 @@ struct ipath_devdata { /* so we can rewrite it after a chip reset */ u32 ipath_pcibar1; + /* interrupt number */ + int ipath_irq; /* HT/PCI Vendor ID (here for NodeInfo) */ u16 ipath_vendorid; /* HT/PCI Device ID (here for NodeInfo) */ @@ -315,12 +340,16 @@ struct ipath_devdata { u8 ipath_ht_slave_off; /* for write combining settings */ unsigned long ipath_wc_cookie; + unsigned long ipath_wc_base; + unsigned long ipath_wc_len; /* ref count for each pkey */ atomic_t ipath_pkeyrefs[4]; /* shadow copy of all exptids physaddr; used only by funcsim */ u64 *ipath_tidsimshadow; /* shadow copy of struct page *'s for exp tid pages */ struct page **ipath_pageshadow; + /* shadow copy of dma handles for exp tid pages */ + dma_addr_t *ipath_physshadow; /* lock to workaround chip bug 9437 */ spinlock_t ipath_tid_lock; @@ -402,6 +431,9 @@ struct ipath_devdata { unsigned long ipath_rcvctrl; /* shadow kr_sendctrl */ unsigned long ipath_sendctrl; + /* ports waiting for PIOavail intr */ + unsigned long ipath_portpiowait; + unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */ /* value we put in kr_rcvhdrcnt */ u32 ipath_rcvhdrcnt; @@ -465,8 +497,6 @@ struct ipath_devdata { u32 ipath_htwidth; /* HT speed (200,400,800,1000) from HT config */ u32 ipath_htspeed; - /* ports waiting for PIOavail intr */ - unsigned long ipath_portpiowait; /* * number of sequential ibcstatus change for polling active/quiet * (i.e., link not coming up). @@ -510,8 +540,47 @@ struct ipath_devdata { u32 ipath_lli_counter; /* local link integrity errors */ u32 ipath_lli_errors; + /* + * Above counts only cases where _successive_ LocalLinkIntegrity + * errors were seen in the receive headers of kern-packets. + * Below are the three (monotonically increasing) counters + * maintained via GPIO interrupts on iba6120-rev2. + */ + u32 ipath_rxfc_unsupvl_errs; + u32 ipath_overrun_thresh_errs; + u32 ipath_lli_errs; + + /* + * Not all devices managed by a driver instance are the same + * type, so these fields must be per-device. + */ + u64 ipath_i_bitsextant; + ipath_err_t ipath_e_bitsextant; + ipath_err_t ipath_hwe_bitsextant; + + /* + * Below should be computable from number of ports, + * since they are never modified. + */ + u32 ipath_i_rcvavail_mask; + u32 ipath_i_rcvurg_mask; + + /* + * Register bits for selecting i2c direction and values, used for + * I2C serial flash. + */ + u16 ipath_gpio_sda_num; + u16 ipath_gpio_scl_num; + u64 ipath_gpio_sda; + u64 ipath_gpio_scl; }; +/* Private data for file operations */ +struct ipath_filedata { + struct ipath_portdata *pd; + unsigned subport; + unsigned tidcursor; +}; extern struct list_head ipath_dev_list; extern spinlock_t ipath_devs_lock; extern struct ipath_devdata *ipath_lookup(int unit); @@ -521,6 +590,7 @@ int ipath_enable_wc(struct ipath_devdata *dd); void ipath_disable_wc(struct ipath_devdata *dd); int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); void ipath_shutdown_device(struct ipath_devdata *); +void ipath_disarm_senderrbufs(struct ipath_devdata *); struct file_operations; int ipath_cdev_init(int minor, char *name, struct file_operations *fops, @@ -540,7 +610,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t); extern int ipath_diag_inuse; -irqreturn_t ipath_intr(int irq, void *devid, struct pt_regs *regs); +irqreturn_t ipath_intr(int irq, void *devid); void ipath_decode_err(char *buf, size_t blen, ipath_err_t err); #if __IPATH_INFO || __IPATH_DBG extern const char *ipath_ibcstatus_str[]; @@ -572,7 +642,11 @@ int ipath_set_lid(struct ipath_devdata *, u32, u8); int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* for use in system calls, where we want to know device type, etc. */ -#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data) +#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd +#define subport_fp(fp) \ + ((struct ipath_filedata *)(fp)->private_data)->subport +#define tidcursor_fp(fp) \ + ((struct ipath_filedata *)(fp)->private_data)->tidcursor /* * values for ipath_flags @@ -612,6 +686,15 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* can miss port0 rx interrupts */ #define IPATH_POLL_RX_INTR 0x40000 #define IPATH_DISABLED 0x80000 /* administratively disabled */ + /* Use GPIO interrupts for new counters */ +#define IPATH_GPIO_ERRINTRS 0x100000 + +/* Bits in GPIO for the added interrupts */ +#define IPATH_GPIO_PORT0_BIT 2 +#define IPATH_GPIO_RXUVL_BIT 3 +#define IPATH_GPIO_OVRUN_BIT 4 +#define IPATH_GPIO_LLI_BIT 5 +#define IPATH_GPIO_ERRINTR_MASK 0x38 /* portdata flag bit offsets */ /* waiting for a packet to arrive */ @@ -790,15 +873,19 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *); void ipath_device_remove_group(struct device *, struct ipath_devdata *); int ipath_expose_reset(struct device *); -int ipath_diagpkt_add(void); -void ipath_diagpkt_remove(void); - int ipath_init_ipathfs(void); void ipath_exit_ipathfs(void); int ipathfs_add_device(struct ipath_devdata *); int ipathfs_remove_device(struct ipath_devdata *); /* + * dma_addr wrappers - all 0's invalid for hw + */ +dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long, + size_t, int); +dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); + +/* * Flush write combining store buffers (if present) and perform a write * barrier. */ @@ -855,4 +942,20 @@ extern struct mutex ipath_mutex; #endif /* _IPATH_DEBUGGING */ +/* + * this is used for formatting hw error messages... + */ +struct ipath_hwerror_msgs { + u64 mask; + const char *msg; +}; + +#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b } + +/* in ipath_intr.c... */ +void ipath_format_hwerrors(u64 hwerrs, + const struct ipath_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, + char *msg, size_t lmsg); + #endif /* _IPATH_KERNEL_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index ba1b93226caa..9a6cbd05adcd 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -118,9 +118,10 @@ void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey) * Check the IB SGE for validity and initialize our internal version * of it. */ -int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, +int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, struct ib_sge *sge, int acc) { + struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; struct ipath_mregion *mr; unsigned n, m; size_t off; @@ -140,7 +141,8 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, goto bail; } mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != sge->lkey)) { + if (unlikely(mr == NULL || mr->lkey != sge->lkey || + qp->ibqp.pd != mr->pd)) { ret = 0; goto bail; } @@ -188,9 +190,10 @@ bail: * * Return 1 if successful, otherwise 0. */ -int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, u32 len, u64 vaddr, u32 rkey, int acc) { + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_lkey_table *rkt = &dev->lk_table; struct ipath_sge *sge = &ss->sge; struct ipath_mregion *mr; @@ -214,7 +217,8 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, } mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != rkey)) { + if (unlikely(mr == NULL || mr->lkey != rkey || + qp->ibqp.pd != mr->pd)) { ret = 0; goto bail; } diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 72d1db89db8f..25908b02fbe5 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -87,7 +87,8 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, struct ipath_devdata *dd = to_idev(ibdev)->dd; u32 vendor, majrev, minrev; - if (smp->attr_mod) + /* GUID 0 is illegal */ + if (smp->attr_mod || (dd->ipath_guid == 0)) smp->status |= IB_SMP_INVALID_FIELD; nip->base_version = 1; @@ -131,10 +132,15 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp, * We only support one GUID for now. If this changes, the * portinfo.guid_cap field needs to be updated too. */ - if (startgx == 0) - /* The first is a copy of the read-only HW GUID. */ - *p = to_idev(ibdev)->dd->ipath_guid; - else + if (startgx == 0) { + __be64 g = to_idev(ibdev)->dd->ipath_guid; + if (g == 0) + /* GUID 0 is illegal */ + smp->status |= IB_SMP_INVALID_FIELD; + else + /* The first is a copy of the read-only HW GUID. */ + *p = g; + } else smp->status |= IB_SMP_INVALID_FIELD; return reply(smp); diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c index 11b7378ff214..a82157db4689 100644 --- a/drivers/infiniband/hw/ipath/ipath_mmap.c +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/mm.h> diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index b36f6fb3e37a..a0673c1eef71 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -138,6 +138,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, goto bail; } + mr->mr.pd = pd; mr->mr.user_base = *iova_start; mr->mr.iova = *iova_start; mr->mr.length = 0; @@ -197,6 +198,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, goto bail; } + mr->mr.pd = pd; mr->mr.user_base = region->user_base; mr->mr.iova = region->virt_base; mr->mr.length = region->length; @@ -289,6 +291,7 @@ struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, * Resources are allocated but no valid mapping (RKEY can't be * used). */ + fmr->mr.pd = pd; fmr->mr.user_base = 0; fmr->mr.iova = 0; fmr->mr.length = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 224b0f40767f..46c1c89bf6ae 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -335,6 +335,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; + qp->r_wrid_valid = 0; qp->s_rnr_timeout = 0; qp->s_head = 0; qp->s_tail = 0; @@ -342,6 +343,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; + qp->s_wait_credit = 0; if (qp->r_rq.wq) { qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; @@ -352,12 +354,13 @@ static void ipath_reset_qp(struct ipath_qp *qp) /** * ipath_error_qp - put a QP into an error state * @qp: the QP to put into an error state + * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. * QP s_lock should be held and interrupts disabled. */ -void ipath_error_qp(struct ipath_qp *qp) +void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; @@ -373,7 +376,6 @@ void ipath_error_qp(struct ipath_qp *qp) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); - wc.status = IB_WC_WR_FLUSH_ERR; wc.vendor_err = 0; wc.byte_len = 0; wc.imm_data = 0; @@ -385,6 +387,12 @@ void ipath_error_qp(struct ipath_qp *qp) wc.sl = 0; wc.dlid_path_bits = 0; wc.port_num = 0; + if (qp->r_wrid_valid) { + qp->r_wrid_valid = 0; + wc.status = err; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; while (qp->s_last != qp->s_head) { struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); @@ -501,7 +509,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, break; case IB_QPS_ERR: - ipath_error_qp(qp); + ipath_error_qp(qp, IB_WC_GENERAL_ERR); break; default: @@ -516,7 +524,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->remote_qpn = attr->dest_qp_num; if (attr_mask & IB_QP_SQ_PSN) { - qp->s_next_psn = attr->sq_psn; + qp->s_psn = qp->s_next_psn = attr->sq_psn; qp->s_last_psn = qp->s_next_psn - 1; } diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index a08654042c03..ce6038743c5c 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -201,6 +201,18 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_rnr_timeout) goto done; + /* Limit the number of packets sent without an ACK. */ + if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { + qp->s_wait_credit = 1; + dev->n_rc_stalls++; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); + spin_unlock(&dev->pending_lock); + goto done; + } + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; bth0 = 0; @@ -221,7 +233,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* Check if send work queue is empty. */ if (qp->s_tail == qp->s_head) goto done; - qp->s_psn = wqe->psn = qp->s_next_psn; + wqe->psn = qp->s_next_psn; newreq = 1; } /* @@ -229,10 +241,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, * original work request since we may need to resend * it. */ - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - qp->s_len = len = wqe->length; + len = wqe->length; ss = &qp->s_sge; bth2 = 0; switch (wqe->wr.opcode) { @@ -356,14 +365,23 @@ int ipath_make_rc_req(struct ipath_qp *qp, default: goto done; } + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_len = wqe->length; if (newreq) { qp->s_tail++; if (qp->s_tail >= qp->s_size) qp->s_tail = 0; } - bth2 |= qp->s_psn++ & IPATH_PSN_MASK; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; + bth2 |= qp->s_psn & IPATH_PSN_MASK; + if (wqe->wr.opcode == IB_WR_RDMA_READ) + qp->s_psn = wqe->lpsn + 1; + else { + qp->s_psn++; + if ((int)(qp->s_psn - qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + } /* * Put the QP on the pending list so lost ACKs will cause * a retry. More than one request can be pending so the @@ -393,12 +411,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { - /* - * Request an ACK every 1/2 MB to avoid retransmit - * timeouts. - */ - if (((wqe->length - len) % (512 * 1024)) == 0) - bth2 |= 1 << 31; len = pmtu; break; } @@ -435,12 +447,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { - /* - * Request an ACK every 1/2 MB to avoid retransmit - * timeouts. - */ - if (((wqe->length - len) % (512 * 1024)) == 0) - bth2 |= 1 << 31; len = pmtu; break; } @@ -498,6 +504,8 @@ int ipath_make_rc_req(struct ipath_qp *qp, */ goto done; } + if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) + bth2 |= 1 << 31; /* Request ACK. */ qp->s_len -= len; qp->s_hdrwords = hwords; qp->s_cur_sge = ss; @@ -688,13 +696,6 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); struct ipath_ibdev *dev; - /* - * If there are no requests pending, we are done. - */ - if (ipath_cmp24(psn, qp->s_next_psn) >= 0 || - qp->s_last == qp->s_tail) - goto done; - if (qp->s_retry == 0) { wc->wr_id = wqe->wr.wr_id; wc->status = IB_WC_RETRY_EXC_ERR; @@ -729,14 +730,21 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) dev->n_rc_resends += (int)qp->s_psn - (int)psn; reset_psn(qp, psn); - -done: tasklet_hi_schedule(&qp->s_task); bail: return; } +static inline void update_last_psn(struct ipath_qp *qp, u32 psn) +{ + if (qp->s_wait_credit) { + qp->s_wait_credit = 0; + tasklet_hi_schedule(&qp->s_task); + } + qp->s_last_psn = psn; +} + /** * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on @@ -754,6 +762,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) struct ib_wc wc; struct ipath_swqe *wqe; int ret = 0; + u32 ack_psn; /* * Remove the QP from the timeout queue (or RNR timeout queue). @@ -766,26 +775,26 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) list_del_init(&qp->timerwait); spin_unlock(&dev->pending_lock); + /* Nothing is pending to ACK/NAK. */ + if (unlikely(qp->s_last == qp->s_tail)) + goto bail; + /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued * before the NAK'ed request. The MSN won't include the NAK'ed * request but will include an ACK'ed request(s). */ + ack_psn = psn; + if (aeth >> 29) + ack_psn--; wqe = get_swqe_ptr(qp, qp->s_last); - /* Nothing is pending to ACK/NAK. */ - if (qp->s_last == qp->s_tail) - goto bail; - /* * The MSN might be for a later WQE than the PSN indicates so * only complete WQEs that the PSN finishes. */ - while (ipath_cmp24(psn, wqe->lpsn) >= 0) { - /* If we are ACKing a WQE, the MSN should be >= the SSN. */ - if (ipath_cmp24(aeth, wqe->ssn) < 0) - break; + while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) { /* * If this request is a RDMA read or atomic, and the ACK is * for a later operation, this ACK NAKs the RDMA read or @@ -796,7 +805,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) * is sent but before the response is received. */ if ((wqe->wr.opcode == IB_WR_RDMA_READ && - opcode != OP(RDMA_READ_RESPONSE_LAST)) || + (opcode != OP(RDMA_READ_RESPONSE_LAST) || + ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && (opcode != OP(ATOMIC_ACKNOWLEDGE) || @@ -805,7 +815,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) * The last valid PSN seen is the previous * request's. */ - qp->s_last_psn = wqe->psn - 1; + update_last_psn(qp, wqe->psn - 1); /* Retry this request. */ ipath_restart_rc(qp, wqe->psn, &wc); /* @@ -814,6 +824,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) */ goto bail; } + if (wqe->wr.opcode == IB_WR_RDMA_READ || + wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + tasklet_hi_schedule(&qp->s_task); /* Post a send completion queue entry if requested. */ if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { @@ -864,7 +878,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) ipath_get_credit(qp, aeth); qp->s_rnr_retry = qp->s_rnr_retry_cnt; qp->s_retry = qp->s_retry_cnt; - qp->s_last_psn = psn; + update_last_psn(qp, psn); ret = 1; goto bail; @@ -883,7 +897,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) goto bail; /* The last valid PSN is the previous PSN. */ - qp->s_last_psn = psn - 1; + update_last_psn(qp, psn - 1); dev->n_rc_resends += (int)qp->s_psn - (int)psn; @@ -898,7 +912,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) case 3: /* NAK */ /* The last valid PSN seen is the previous request's. */ if (qp->s_last != qp->s_tail) - qp->s_last_psn = wqe->psn - 1; + update_last_psn(qp, wqe->psn - 1); switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ @@ -1044,7 +1058,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* no AETH, no ACK */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + if (qp->s_last != qp->s_tail) + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); goto ack_done; } rdma_read: @@ -1071,7 +1086,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, * since we don't want s_sge modified. */ qp->s_len -= pmtu; - qp->s_last_psn = psn; + update_last_psn(qp, psn); spin_unlock_irqrestore(&qp->s_lock, flags); ipath_copy_sge(&qp->s_sge, data, pmtu); goto bail; @@ -1080,7 +1095,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* ACKs READ req. */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + if (qp->s_last != qp->s_tail) + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); goto ack_done; } /* FALLTHROUGH */ @@ -1223,7 +1239,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, * Address range must be a subset of the original * request and start on pmtu boundaries. */ - ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, + ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, qp->s_rdma_len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) { @@ -1282,6 +1298,14 @@ done: return 1; } +static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) +{ + spin_lock_irq(&qp->s_lock); + qp->state = IB_QPS_ERR; + ipath_error_qp(qp, err); + spin_unlock_irq(&qp->s_lock); +} + /** * ipath_rc_rcv - process an incoming RC packet * @dev: the device this packet came in on @@ -1309,6 +1333,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, struct ib_reth *reth; int header_in_data; + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) + goto done; + /* Check for GRH */ if (!has_grh) { ohdr = &hdr->u.oth; @@ -1370,8 +1398,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, */ if (qp->r_ack_state >= OP(COMPARE_SWAP)) goto send_ack; - /* XXX Flush WQEs */ - qp->state = IB_QPS_ERR; + ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); qp->r_ack_state = OP(SEND_ONLY); qp->r_nak_state = IB_NAK_INVALID_REQUEST; qp->r_ack_psn = qp->r_psn; @@ -1477,9 +1504,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv; ipath_copy_sge(&qp->r_sge, data, tlen); qp->r_msn++; - if (opcode == OP(RDMA_WRITE_LAST) || - opcode == OP(RDMA_WRITE_ONLY)) + if (!qp->r_wrid_valid) break; + qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; @@ -1517,7 +1544,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int ok; /* Check rkey & NAK */ - ok = ipath_rkey_ok(dev, &qp->r_sge, + ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) @@ -1559,7 +1586,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int ok; /* Check rkey & NAK */ - ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, + ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, qp->s_rdma_len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) { @@ -1618,7 +1645,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ - if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc; @@ -1670,8 +1697,7 @@ nack_acc: * is pending though. */ if (qp->r_ack_state < OP(COMPARE_SWAP)) { - /* XXX Flush WQEs */ - qp->state = IB_QPS_ERR; + ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); qp->r_ack_state = OP(RDMA_WRITE_ONLY); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index 6e23b3d632b8..dffc76016d3c 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -134,10 +134,24 @@ #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 #define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL #define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44 -#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL -#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL #define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL #define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL +/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */ +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL +#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL +/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ +#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL +/* waldo specific -- find the rest in ipath_6110.c */ +#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL +/* monty specific -- find the rest in ipath_6120.c */ +#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL /* kr_hwdiagctrl bits */ #define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL @@ -209,9 +223,9 @@ /* combination link status states that we use with some frequency */ #define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \ - << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \ + << INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \ (INFINIPATH_IBCS_LINKSTATE_MASK \ - <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)) + <<INFINIPATH_IBCS_LINKSTATE_SHIFT)) #define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \ << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \ (INFINIPATH_IBCS_LT_STATE_LINKUP \ @@ -302,6 +316,17 @@ typedef u64 ipath_err_t; +/* The following change with the type of device, so + * need to be part of the ipath_devdata struct, or + * we could have problems plugging in devices of + * different types (e.g. one HT, one PCIE) + * in one system, to be managed by one driver. + * On the other hand, this file is may also be included + * by other code, so leave the declarations here + * temporarily. Minor footprint issue if common-model + * linker used, none if C89+ linker used. + */ + /* mask of defined bits for various registers */ extern u64 infinipath_i_bitsextant; extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; @@ -310,13 +335,6 @@ extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; /* - * register bits for selecting i2c direction and values, used for I2C serial - * flash - */ -extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num; -extern u64 ipath_gpio_sda, ipath_gpio_scl; - -/* * These are the infinipath general register numbers (not offsets). * The kernel registers are used directly, those beyond the kernel * registers are calculated from one of the base registers. The use of diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 5c1da2d25e03..f7530512045d 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -108,7 +108,6 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe) { - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); int user = to_ipd(qp->ibqp.pd)->user; int i, j, ret; struct ib_wc wc; @@ -119,8 +118,7 @@ static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe) continue; /* Check LKEY */ if ((user && wqe->sg_list[i].lkey == 0) || - !ipath_lkey_ok(&dev->lk_table, - &qp->r_sg_list[j], &wqe->sg_list[i], + !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; @@ -231,6 +229,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) } } spin_unlock_irqrestore(&rq->lock, flags); + qp->r_wrid_valid = 1; bail: return ret; @@ -326,7 +325,7 @@ again: case IB_WR_RDMA_WRITE: if (wqe->length == 0) break; - if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length, + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_WRITE))) { @@ -350,7 +349,7 @@ again: break; case IB_WR_RDMA_READ: - if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length, + if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_READ))) @@ -365,7 +364,7 @@ again: case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64), + if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_ATOMIC))) @@ -575,8 +574,7 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr) } if (wr->sg_list[i].length == 0) continue; - if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table, - &wqe->sg_list[j], &wr->sg_list[i], + if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i], acc)) { spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EINVAL; diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 941e866d9517..94033503400c 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -104,11 +104,6 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, u32 sz; struct ib_srq *ret; - if (dev->n_srqs_allocated == ib_ipath_max_srqs) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - if (srq_init_attr->attr.max_wr == 0) { ret = ERR_PTR(-EINVAL); goto done; @@ -180,10 +175,17 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, spin_lock_init(&srq->rq.lock); srq->rq.wq->head = 0; srq->rq.wq->tail = 0; - srq->rq.max_sge = srq_init_attr->attr.max_sge; srq->limit = srq_init_attr->attr.srq_limit; - dev->n_srqs_allocated++; + spin_lock(&dev->n_srqs_lock); + if (dev->n_srqs_allocated == ib_ipath_max_srqs) { + spin_unlock(&dev->n_srqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + + dev->n_srqs_allocated++; + spin_unlock(&dev->n_srqs_lock); ret = &srq->ibsrq; goto done; @@ -351,8 +353,13 @@ int ipath_destroy_srq(struct ib_srq *ibsrq) struct ipath_srq *srq = to_isrq(ibsrq); struct ipath_ibdev *dev = to_idev(ibsrq->device); + spin_lock(&dev->n_srqs_lock); dev->n_srqs_allocated--; - vfree(srq->rq.wq); + spin_unlock(&dev->n_srqs_lock); + if (srq->ip) + kref_put(&srq->ip->ref, ipath_release_mmap_info); + else + vfree(srq->rq.wq); kfree(srq); return 0; diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index e299148c4b68..182de34f9f47 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -257,7 +257,7 @@ static ssize_t store_guid(struct device *dev, struct ipath_devdata *dd = dev_get_drvdata(dev); ssize_t ret; unsigned short guid[8]; - __be64 nguid; + __be64 new_guid; u8 *ng; int i; @@ -266,7 +266,7 @@ static ssize_t store_guid(struct device *dev, &guid[4], &guid[5], &guid[6], &guid[7]) != 8) goto invalid; - ng = (u8 *) &nguid; + ng = (u8 *) &new_guid; for (i = 0; i < 8; i++) { if (guid[i] > 0xff) @@ -274,7 +274,10 @@ static ssize_t store_guid(struct device *dev, ng[i] = guid[i]; } - dd->ipath_guid = nguid; + if (new_guid == 0) + goto invalid; + + dd->ipath_guid = new_guid; dd->ipath_nguid = 1; ret = strlen(buf); @@ -297,6 +300,16 @@ static ssize_t show_nguid(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid); } +static ssize_t show_nports(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + + /* Return the number of user ports available. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1); +} + static ssize_t show_serial(struct device *dev, struct device_attribute *attr, char *buf) @@ -608,6 +621,7 @@ static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid); static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu); static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled); static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL); +static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset); static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); @@ -623,6 +637,7 @@ static struct attribute *dev_attributes[] = { &dev_attr_mlid.attr, &dev_attr_mtu.attr, &dev_attr_nguid.attr, + &dev_attr_nports.attr, &dev_attr_serial.attr, &dev_attr_status.attr, &dev_attr_status_str.attr, diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 0fd3cded16ba..e636cfd67a82 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -246,6 +246,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, struct ib_reth *reth; int header_in_data; + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) + goto done; + /* Check for GRH */ if (!has_grh) { ohdr = &hdr->u.oth; @@ -440,7 +444,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int ok; /* Check rkey */ - ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len, + ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) { diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 6991d1d74e3c..49f1102af8b3 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -39,7 +39,6 @@ static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, u32 *lengthp, struct ipath_sge_state *ss) { - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); int user = to_ipd(qp->ibqp.pd)->user; int i, j, ret; struct ib_wc wc; @@ -50,8 +49,7 @@ static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, continue; /* Check LKEY */ if ((user && wqe->sg_list[i].lkey == 0) || - !ipath_lkey_ok(&dev->lk_table, - j ? &ss->sg_list[j - 1] : &ss->sge, + !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; *lengthp += wqe->sg_list[i].length; @@ -343,7 +341,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) if (wr->sg_list[i].length == 0) continue; - if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ? + if (!ipath_lkey_ok(qp, ss.num_sge ? sg_list + ss.num_sge - 1 : &ss.sge, &wr->sg_list[i], 0)) { ret = -EINVAL; diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index e32fca9faf80..8536aeb96af8 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -90,6 +90,62 @@ bail: } /** + * ipath_map_page - a safety wrapper around pci_map_page() + * + * A dma_addr of all 0's is interpreted by the chip as "disabled". + * Unfortunately, it can also be a valid dma_addr returned on some + * architectures. + * + * The powerpc iommu assigns dma_addrs in ascending order, so we don't + * have to bother with retries or mapping a dummy page to insure we + * don't just get the same mapping again. + * + * I'm sure we won't be so lucky with other iommu's, so FIXME. + */ +dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + dma_addr_t phys; + + phys = pci_map_page(hwdev, page, offset, size, direction); + + if (phys == 0) { + pci_unmap_page(hwdev, phys, size, direction); + phys = pci_map_page(hwdev, page, offset, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** + * ipath_map_single - a safety wrapper around pci_map_single() + * + * Same idea as ipath_map_page(). + */ +dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size, + int direction) +{ + dma_addr_t phys; + + phys = pci_map_single(hwdev, ptr, size, direction); + + if (phys == 0) { + pci_unmap_single(hwdev, phys, size, direction); + phys = pci_map_single(hwdev, ptr, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** * ipath_get_user_pages - lock user pages into memory * @start_page: the start page * @num_pages: the number of pages @@ -158,9 +214,10 @@ struct ipath_user_pages_work { unsigned long num_pages; }; -static void user_pages_account(void *ptr) +static void user_pages_account(struct work_struct *_work) { - struct ipath_user_pages_work *work = ptr; + struct ipath_user_pages_work *work = + container_of(_work, struct ipath_user_pages_work, work); down_write(&work->mm->mmap_sem); work->mm->locked_vm -= work->num_pages; @@ -186,7 +243,7 @@ void ipath_release_user_pages_on_close(struct page **p, size_t num_pages) goto bail; - INIT_WORK(&work->work, user_pages_account, work); + INIT_WORK(&work->work, user_pages_account); work->mm = mm; work->num_pages = num_pages; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index b8381c5e72bd..acdee33ee1f8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -898,7 +898,8 @@ int ipath_get_counters(struct ipath_devdata *dd, ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); + ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) + + dd->ipath_rxfc_unsupvl_errs; cntrs->port_rcv_remphys_errors = ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); cntrs->port_xmit_discards = @@ -911,8 +912,10 @@ int ipath_get_counters(struct ipath_devdata *dd, ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); cntrs->port_rcv_packets = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - cntrs->local_link_integrity_errors = dd->ipath_lli_errors; - cntrs->excessive_buffer_overrun_errors = 0; /* XXX */ + cntrs->local_link_integrity_errors = + (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? + dd->ipath_lli_errs : dd->ipath_lli_errors; + cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs; ret = 0; @@ -1199,6 +1202,7 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, struct ipath_ah *ah; struct ib_ah *ret; struct ipath_ibdev *dev = to_idev(pd->device); + unsigned long flags; /* A multicast address requires a GRH (see ch. 8.4.1). */ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && @@ -1225,16 +1229,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, goto bail; } - spin_lock(&dev->n_ahs_lock); + spin_lock_irqsave(&dev->n_ahs_lock, flags); if (dev->n_ahs_allocated == ib_ipath_max_ahs) { - spin_unlock(&dev->n_ahs_lock); + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); kfree(ah); ret = ERR_PTR(-ENOMEM); goto bail; } dev->n_ahs_allocated++; - spin_unlock(&dev->n_ahs_lock); + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); /* ib_create_ah() will initialize ah->ibah. */ ah->attr = *ah_attr; @@ -1255,10 +1259,11 @@ static int ipath_destroy_ah(struct ib_ah *ibah) { struct ipath_ibdev *dev = to_idev(ibah->device); struct ipath_ah *ah = to_iah(ibah); + unsigned long flags; - spin_lock(&dev->n_ahs_lock); + spin_lock_irqsave(&dev->n_ahs_lock, flags); dev->n_ahs_allocated--; - spin_unlock(&dev->n_ahs_lock); + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); kfree(ah); @@ -1380,11 +1385,13 @@ static int enable_timer(struct ipath_devdata *dd) * processing. */ if (dd->ipath_flags & IPATH_GPIO_INTR) { + u64 val; ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, 0x2074076542310ULL); /* Enable GPIO bit 2 interrupt */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - (u64) (1 << 2)); + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); + val |= (u64) (1 << IPATH_GPIO_PORT0_BIT); + ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); } init_timer(&dd->verbs_timer); @@ -1399,8 +1406,17 @@ static int enable_timer(struct ipath_devdata *dd) static int disable_timer(struct ipath_devdata *dd) { /* Disable GPIO bit 2 interrupt */ - if (dd->ipath_flags & IPATH_GPIO_INTR) - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0); + if (dd->ipath_flags & IPATH_GPIO_INTR) { + u64 val; + /* Disable GPIO bit 2 interrupt */ + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); + val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); + ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + /* + * We might want to undo changes to debugportselect, + * but how? + */ + } del_timer_sync(&dd->verbs_timer); @@ -1471,7 +1487,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; - idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT; + idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; idev->link_width_enabled = 3; /* 1x or 4x */ /* Snapshot current HW counters to "clear" them. */ @@ -1585,7 +1601,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev->mmap = ipath_mmap; snprintf(dev->node_desc, sizeof(dev->node_desc), - IPATH_IDSTR " %s", system_utsname.nodename); + IPATH_IDSTR " %s", init_utsname()->nodename); ret = ib_register_device(dev); if (ret) @@ -1683,6 +1699,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) "RC OTH NAKs %d\n" "RC timeouts %d\n" "RC RDMA dup %d\n" + "RC stalls %d\n" "piobuf wait %d\n" "no piobuf %d\n" "PKT drops %d\n" @@ -1690,7 +1707,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_piowait, + dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 09bbb3f9a217..8039f6e5f0c8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -220,6 +220,7 @@ struct ipath_segarray { }; struct ipath_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ u64 user_base; /* User's address for this region */ u64 iova; /* IB start address of this region */ size_t length; @@ -364,12 +365,14 @@ struct ipath_qp { u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ u8 r_reuse_sge; /* for UC receive errors */ u8 r_sge_inx; /* current index into sg_list */ + u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ u8 qp_access_flags; u8 s_max_sge; /* size of s_wq->sg_list */ u8 s_retry_cnt; /* number of times to retry */ u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; @@ -393,6 +396,8 @@ struct ipath_qp { #define IPATH_S_BUSY 0 #define IPATH_S_SIGNAL_REQ_WR 1 +#define IPATH_PSN_CREDIT 2048 + /* * Since struct ipath_swqe is not a fixed size, we can't simply index into * struct ipath_qp.s_wq. This function does the array index computation. @@ -521,6 +526,7 @@ struct ipath_ibdev { u32 n_rnr_naks; u32 n_other_naks; u32 n_timeouts; + u32 n_rc_stalls; u32 n_pkt_drops; u32 n_vl15_dropped; u32 n_wqe_errs; @@ -634,6 +640,8 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, int ipath_destroy_qp(struct ib_qp *ibqp); +void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err); + int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); @@ -653,12 +661,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); -int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, - u32 len, u64 vaddr, u32 rkey, int acc); - -int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, - struct ib_sge *sge, int acc); - void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length); void ipath_skip_sge(struct ipath_sge_state *ss, u32 length); @@ -683,10 +685,10 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey); -int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, +int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, struct ib_sge *sge, int acc); -int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, +int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, u32 len, u64 vaddr, u32 rkey, int acc); int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c index 036fde662aa9..0095bb70f34e 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c @@ -38,13 +38,23 @@ #include "ipath_kernel.h" /** - * ipath_unordered_wc - indicate whether write combining is ordered + * ipath_enable_wc - enable write combining for MMIO writes to the device + * @dd: infinipath device * - * PowerPC systems (at least those in the 970 processor family) - * write partially filled store buffers in address order, but will write - * completely filled store buffers in "random" order, and therefore must - * have serialization for correctness with current InfiniPath chips. + * Nothing to do on PowerPC, so just return without error. + */ +int ipath_enable_wc(struct ipath_devdata *dd) +{ + return 0; +} + +/** + * ipath_unordered_wc - indicate whether write combining is unordered * + * Because our performance depends on our ability to do write + * combining mmio writes in the most efficient way, we need to + * know if we are on a processor that may reorder stores when + * write combining. */ int ipath_unordered_wc(void) { diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c index f8f9e2e8cbdd..04696e62da87 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c @@ -123,6 +123,8 @@ int ipath_enable_wc(struct ipath_devdata *dd) ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, " "cookie is %d\n", cookie); dd->ipath_wc_cookie = cookie; + dd->ipath_wc_base = (unsigned long) pioaddr; + dd->ipath_wc_len = (unsigned long) piolen; } } @@ -136,9 +138,16 @@ int ipath_enable_wc(struct ipath_devdata *dd) void ipath_disable_wc(struct ipath_devdata *dd) { if (dd->ipath_wc_cookie) { + int r; ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n"); - mtrr_del(dd->ipath_wc_cookie, 0, 0); - dd->ipath_wc_cookie = 0; + r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base, + dd->ipath_wc_len); + if (r < 0) + dev_info(&dd->pcidev->dev, + "mtrr_del(%lx, %lx, %lx) failed: %d\n", + dd->ipath_wc_cookie, dd->ipath_wc_base, + dd->ipath_wc_len, r); + dd->ipath_wc_cookie = 0; /* even on failure */ } } diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 69599455aca2..27caf3b0648a 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -33,7 +33,6 @@ * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <linux/init.h> #include <linux/string.h> #include <linux/slab.h> @@ -190,7 +189,7 @@ int mthca_create_ah(struct mthca_dev *dev, on_hca_fail: if (ah->type == MTHCA_AH_PCI_POOL) { ah->av = pci_pool_alloc(dev->av_table.pool, - SLAB_ATOMIC, &ah->avdma); + GFP_ATOMIC, &ah->avdma); if (!ah->av) return -ENOMEM; @@ -323,7 +322,7 @@ int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr) return 0; } -int __devinit mthca_init_av_table(struct mthca_dev *dev) +int mthca_init_av_table(struct mthca_dev *dev) { int err; diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index cd044ea2dfa4..e948158a28d9 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -57,7 +57,7 @@ static int catas_reset_disable; module_param_named(catas_reset_disable, catas_reset_disable, int, 0644); MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero"); -static void catas_reset(void *work_ptr) +static void catas_reset(struct work_struct *work) { struct mthca_dev *dev, *tmpdev; LIST_HEAD(tlist); @@ -203,7 +203,7 @@ void mthca_stop_catas_poll(struct mthca_dev *dev) int __init mthca_catas_init(void) { - INIT_WORK(&catas_work, catas_reset, NULL); + INIT_WORK(&catas_work, catas_reset); catas_wq = create_singlethread_workqueue("mthca_catas"); if (!catas_wq) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 99a94d710935..768df7265b81 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1820,11 +1820,11 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, #define MAD_IFC_BOX_SIZE 0x400 #define MAD_IFC_MY_QPN_OFFSET 0x100 -#define MAD_IFC_RQPN_OFFSET 0x104 -#define MAD_IFC_SL_OFFSET 0x108 -#define MAD_IFC_G_PATH_OFFSET 0x109 -#define MAD_IFC_RLID_OFFSET 0x10a -#define MAD_IFC_PKEY_OFFSET 0x10e +#define MAD_IFC_RQPN_OFFSET 0x108 +#define MAD_IFC_SL_OFFSET 0x10c +#define MAD_IFC_G_PATH_OFFSET 0x10d +#define MAD_IFC_RLID_OFFSET 0x10e +#define MAD_IFC_PKEY_OFFSET 0x112 #define MAD_IFC_GRH_OFFSET 0x140 inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); @@ -1862,7 +1862,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, val = in_wc->dlid_path_bits | (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); - MTHCA_PUT(inbox, val, MAD_IFC_GRH_OFFSET); + MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET); MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET); MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); @@ -1870,7 +1870,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, if (in_grh) memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40); - op_modifier |= 0x10; + op_modifier |= 0x4; in_modifier |= in_wc->slid << 16; } diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index e393681ba7d4..283d50b76c3d 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -36,9 +36,10 @@ * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $ */ -#include <linux/init.h> #include <linux/hardirq.h> +#include <asm/io.h> + #include <rdma/ib_pack.h> #include "mthca_dev.h" @@ -210,6 +211,11 @@ static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, mthca_write64(doorbell, dev->kar + MTHCA_CQ_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + /* + * Make sure doorbells don't leak out of CQ spinlock + * and reach the HCA out of order: + */ + mmiowb(); } } @@ -963,7 +969,7 @@ void mthca_free_cq(struct mthca_dev *dev, mthca_free_mailbox(dev, mailbox); } -int __devinit mthca_init_cq_table(struct mthca_dev *dev) +int mthca_init_cq_table(struct mthca_dev *dev) { int err; diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index a29b1b6d82b1..8ec9fa1ff9ea 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -33,7 +33,6 @@ * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $ */ -#include <linux/init.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/pci.h> @@ -405,7 +404,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) return eqes_found; } -static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs *regs) +static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr) { struct mthca_dev *dev = dev_ptr; u32 ecr; @@ -432,8 +431,7 @@ static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs return IRQ_HANDLED; } -static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr, - struct pt_regs *regs) +static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr) { struct mthca_eq *eq = eq_ptr; struct mthca_dev *dev = eq->dev; @@ -446,7 +444,7 @@ static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr, return IRQ_HANDLED; } -static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs) +static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr) { struct mthca_dev *dev = dev_ptr; int work = 0; @@ -467,8 +465,7 @@ static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs return IRQ_RETVAL(work); } -static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr, - struct pt_regs *regs) +static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr) { struct mthca_eq *eq = eq_ptr; struct mthca_dev *dev = eq->dev; @@ -481,10 +478,10 @@ static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr, return IRQ_HANDLED; } -static int __devinit mthca_create_eq(struct mthca_dev *dev, - int nent, - u8 intr, - struct mthca_eq *eq) +static int mthca_create_eq(struct mthca_dev *dev, + int nent, + u8 intr, + struct mthca_eq *eq) { int npages; u64 *dma_list = NULL; @@ -666,9 +663,9 @@ static void mthca_free_irqs(struct mthca_dev *dev) dev->eq_table.eq + i); } -static int __devinit mthca_map_reg(struct mthca_dev *dev, - unsigned long offset, unsigned long size, - void __iomem **map) +static int mthca_map_reg(struct mthca_dev *dev, + unsigned long offset, unsigned long size, + void __iomem **map) { unsigned long base = pci_resource_start(dev->pdev, 0); @@ -693,7 +690,7 @@ static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset, iounmap(map); } -static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) +static int mthca_map_eq_regs(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) { /* @@ -783,7 +780,7 @@ static void mthca_unmap_eq_regs(struct mthca_dev *dev) } } -int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) +int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) { int ret; u8 status; @@ -827,7 +824,7 @@ void mthca_unmap_eq_icm(struct mthca_dev *dev) __free_page(dev->eq_table.icm_page); } -int __devinit mthca_init_eq_table(struct mthca_dev *dev) +int mthca_init_eq_table(struct mthca_dev *dev) { int err; u8 status; diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 45e106f14807..acfa41d968ee 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -317,7 +317,7 @@ err: return ret; } -void __devexit mthca_free_agents(struct mthca_dev *dev) +void mthca_free_agents(struct mthca_dev *dev) { struct ib_mad_agent *agent; int p, q; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 47ea02148368..0491ec7a7c0a 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -98,7 +98,7 @@ static struct mthca_profile default_profile = { .uarc_size = 1 << 18, /* Arbel only */ }; -static int __devinit mthca_tune_pci(struct mthca_dev *mdev) +static int mthca_tune_pci(struct mthca_dev *mdev) { int cap; u16 val; @@ -143,7 +143,7 @@ static int __devinit mthca_tune_pci(struct mthca_dev *mdev) return 0; } -static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) +static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) { int err; u8 status; @@ -255,7 +255,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim return 0; } -static int __devinit mthca_init_tavor(struct mthca_dev *mdev) +static int mthca_init_tavor(struct mthca_dev *mdev) { u8 status; int err; @@ -333,7 +333,7 @@ err_disable: return err; } -static int __devinit mthca_load_fw(struct mthca_dev *mdev) +static int mthca_load_fw(struct mthca_dev *mdev) { u8 status; int err; @@ -379,10 +379,10 @@ err_free: return err; } -static int __devinit mthca_init_icm(struct mthca_dev *mdev, - struct mthca_dev_lim *dev_lim, - struct mthca_init_hca_param *init_hca, - u64 icm_size) +static int mthca_init_icm(struct mthca_dev *mdev, + struct mthca_dev_lim *dev_lim, + struct mthca_init_hca_param *init_hca, + u64 icm_size) { u64 aux_pages; u8 status; @@ -575,7 +575,7 @@ static void mthca_free_icms(struct mthca_dev *mdev) mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); } -static int __devinit mthca_init_arbel(struct mthca_dev *mdev) +static int mthca_init_arbel(struct mthca_dev *mdev) { struct mthca_dev_lim dev_lim; struct mthca_profile profile; @@ -683,7 +683,7 @@ static void mthca_close_hca(struct mthca_dev *mdev) mthca_SYS_DIS(mdev, &status); } -static int __devinit mthca_init_hca(struct mthca_dev *mdev) +static int mthca_init_hca(struct mthca_dev *mdev) { u8 status; int err; @@ -720,7 +720,7 @@ err_close: return err; } -static int __devinit mthca_setup_hca(struct mthca_dev *dev) +static int mthca_setup_hca(struct mthca_dev *dev) { int err; u8 status; @@ -875,8 +875,7 @@ err_uar_table_free: return err; } -static int __devinit mthca_request_regions(struct pci_dev *pdev, - int ddr_hidden) +static int mthca_request_regions(struct pci_dev *pdev, int ddr_hidden) { int err; @@ -928,7 +927,7 @@ static void mthca_release_regions(struct pci_dev *pdev, MTHCA_HCR_SIZE); } -static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev) +static int mthca_enable_msi_x(struct mthca_dev *mdev) { struct msix_entry entries[3]; int err; @@ -1213,7 +1212,7 @@ int __mthca_restart_one(struct pci_dev *pdev) } static int __devinit mthca_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) + const struct pci_device_id *id) { static int mthca_version_printed = 0; int ret; diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 47ca8a9b7247..a8ad072be074 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -32,7 +32,6 @@ * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <linux/init.h> #include <linux/string.h> #include <linux/slab.h> @@ -371,7 +370,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return err; } -int __devinit mthca_init_mcg_table(struct mthca_dev *dev) +int mthca_init_mcg_table(struct mthca_dev *dev) { int err; int table_size = dev->limits.num_mgms + dev->limits.num_amgms; diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index a486dec1707e..f71ffa88db3a 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -34,7 +34,6 @@ */ #include <linux/slab.h> -#include <linux/init.h> #include <linux/errno.h> #include "mthca_dev.h" @@ -135,7 +134,7 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) spin_unlock(&buddy->lock); } -static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order) +static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) { int i, s; @@ -759,7 +758,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; } -int __devinit mthca_init_mr_table(struct mthca_dev *dev) +int mthca_init_mr_table(struct mthca_dev *dev) { unsigned long addr; int err, i; diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c index 59df51614c85..c1e950764bd8 100644 --- a/drivers/infiniband/hw/mthca/mthca_pd.c +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -34,7 +34,6 @@ * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <linux/init.h> #include <linux/errno.h> #include "mthca_dev.h" @@ -69,7 +68,7 @@ void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) mthca_free(&dev->pd_table.alloc, pd->pd_num); } -int __devinit mthca_init_pd_table(struct mthca_dev *dev) +int mthca_init_pd_table(struct mthca_dev *dev) { return mthca_alloc_init(&dev->pd_table.alloc, dev->limits.num_pds, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 981fe2eebdfa..7ec7c4b937f9 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -124,7 +124,7 @@ static int mthca_query_device(struct ib_device *ibdev, props->max_map_per_fmr = 255; else props->max_map_per_fmr = - (1 << (32 - long_log2(mdev->limits.num_mpts))) - 1; + (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1; err = 0; out: @@ -179,6 +179,8 @@ static int mthca_query_port(struct ib_device *ibdev, props->max_mtu = out_mad->data[41] & 0xf; props->active_mtu = out_mad->data[36] >> 4; props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; out: kfree(in_mad); @@ -814,7 +816,7 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda lkey = ucmd.lkey; } - ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status); + ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries), &status); if (status) ret = -EINVAL; @@ -1098,11 +1100,10 @@ static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct mthca_fmr *fmr; int err; - fmr = kmalloc(sizeof *fmr, GFP_KERNEL); + fmr = kmemdup(fmr_attr, sizeof *fmr, GFP_KERNEL); if (!fmr) return ERR_PTR(-ENOMEM); - memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr); err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num, convert_access(mr_access_flags), fmr); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 5e5c58b9920b..d844a2569b47 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -35,10 +35,11 @@ * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $ */ -#include <linux/init.h> #include <linux/string.h> #include <linux/slab.h> +#include <asm/io.h> + #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> #include <rdma/ib_pack.h> @@ -635,11 +636,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, if (mthca_is_memfree(dev)) { if (qp->rq.max) - qp_context->rq_size_stride = long_log2(qp->rq.max) << 3; + qp_context->rq_size_stride = ilog2(qp->rq.max) << 3; qp_context->rq_size_stride |= qp->rq.wqe_shift - 4; if (qp->sq.max) - qp_context->sq_size_stride = long_log2(qp->sq.max) << 3; + qp_context->sq_size_stride = ilog2(qp->sq.max) << 3; qp_context->sq_size_stride |= qp->sq.wqe_shift - 4; } @@ -1732,6 +1733,11 @@ out: mthca_write64(doorbell, dev->kar + MTHCA_SEND_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + /* + * Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order: + */ + mmiowb(); } qp->sq.next_ind = ind; @@ -1851,6 +1857,12 @@ out: qp->rq.next_ind = ind; qp->rq.head += nreq; + /* + * Make sure doorbells don't leak out of RQ spinlock and reach + * the HCA out of order: + */ + mmiowb(); + spin_unlock_irqrestore(&qp->rq.lock, flags); return err; } @@ -2112,6 +2124,12 @@ out: MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } + /* + * Make sure doorbells don't leak out of SQ spinlock and reach + * the HCA out of order: + */ + mmiowb(); + spin_unlock_irqrestore(&qp->sq.lock, flags); return err; } @@ -2222,7 +2240,7 @@ void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, *new_wqe = 0; } -int __devinit mthca_init_qp_table(struct mthca_dev *dev) +int mthca_init_qp_table(struct mthca_dev *dev) { int err; u8 status; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 0f316c87bf64..10684da33d58 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -35,6 +35,8 @@ #include <linux/slab.h> #include <linux/string.h> +#include <asm/io.h> + #include "mthca_dev.h" #include "mthca_cmd.h" #include "mthca_memfree.h" @@ -118,7 +120,7 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev, memset(context, 0, sizeof *context); - logsize = long_log2(srq->max) + srq->wqe_shift; + logsize = ilog2(srq->max); context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); context->db_index = cpu_to_be32(srq->db_index); @@ -201,6 +203,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (mthca_is_memfree(dev)) srq->max = roundup_pow_of_two(srq->max + 1); + else + srq->max = srq->max + 1; ds = max(64UL, roundup_pow_of_two(sizeof (struct mthca_next_seg) + @@ -209,7 +213,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz)) return -EINVAL; - srq->wqe_shift = long_log2(ds); + srq->wqe_shift = ilog2(ds); srq->srqn = mthca_alloc(&dev->srq_table.alloc); if (srq->srqn == -1) @@ -277,7 +281,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, srq->first_free = 0; srq->last_free = srq->max - 1; - attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max; + attr->max_wr = srq->max - 1; attr->max_sge = srq->max_gs; return 0; @@ -413,7 +417,7 @@ int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark); } - srq_attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max; + srq_attr->max_wr = srq->max - 1; srq_attr->max_sge = srq->max_gs; out: @@ -593,6 +597,12 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } + /* + * Make sure doorbells don't leak out of SRQ spinlock and + * reach the HCA out of order: + */ + mmiowb(); + spin_unlock_irqrestore(&srq->lock, flags); return err; } @@ -705,7 +715,7 @@ int mthca_max_srq_sge(struct mthca_dev *dev) sizeof (struct mthca_data_seg)); } -int __devinit mthca_init_srq_table(struct mthca_dev *dev) +int mthca_init_srq_table(struct mthca_dev *dev) { int err; diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index d74653d7de1c..c75322d820d4 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -26,7 +26,7 @@ config INFINIBAND_IPOIB_DEBUG_DATA bool "IP-over-InfiniBand data path debugging" depends on INFINIBAND_IPOIB_DEBUG ---help--- - This option compiles debugging code into the the data path + This option compiles debugging code into the data path of the IPoIB driver. The output can be turned on via the data_debug_level module parameter; however, even with output turned off, this debugging code will have some performance diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 0b8a79d53a00..99547996aba2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -136,11 +136,11 @@ struct ipoib_dev_priv { struct list_head multicast_list; struct rb_root multicast_tree; - struct work_struct pkey_task; - struct work_struct mcast_task; + struct delayed_work pkey_task; + struct delayed_work mcast_task; struct work_struct flush_task; struct work_struct restart_task; - struct work_struct ah_reap_task; + struct delayed_work ah_reap_task; struct ib_device *ca; u8 port; @@ -233,7 +233,7 @@ static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) } struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh); -void ipoib_neigh_free(struct ipoib_neigh *neigh); +void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); extern struct workqueue_struct *ipoib_workqueue; @@ -254,13 +254,13 @@ int ipoib_add_pkey_attr(struct net_device *dev); void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); -void ipoib_reap_ah(void *dev_ptr); +void ipoib_reap_ah(struct work_struct *work); void ipoib_flush_paths(struct net_device *dev); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); -void ipoib_ib_dev_flush(void *dev); +void ipoib_ib_dev_flush(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); @@ -271,10 +271,10 @@ int ipoib_ib_dev_stop(struct net_device *dev); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); void ipoib_dev_cleanup(struct net_device *dev); -void ipoib_mcast_join_task(void *dev_ptr); +void ipoib_mcast_join_task(struct work_struct *work); void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); -void ipoib_mcast_restart_task(void *dev_ptr); +void ipoib_mcast_restart_task(struct work_struct *work); int ipoib_mcast_start_thread(struct net_device *dev); int ipoib_mcast_stop_thread(struct net_device *dev, int flush); @@ -312,7 +312,7 @@ void ipoib_event(struct ib_event_handler *handler, int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey); int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); -void ipoib_pkey_poll(void *dev); +void ipoib_pkey_poll(struct work_struct *work); int ipoib_pkey_dev_delay_open(struct net_device *dev); #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 5dde380e8dbe..f1cb83688b31 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -141,7 +141,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file) return ret; seq = file->private_data; - seq->private = inode->u.generic_ip; + seq->private = inode->i_private; return 0; } @@ -247,7 +247,7 @@ static int ipoib_path_open(struct inode *inode, struct file *file) return ret; seq = file->private_data; - seq->private = inode->u.generic_ip; + seq->private = inode->i_private; return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f426a69d9a43..f10fba5d3265 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -355,6 +355,11 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, tx_req->skb = skb; addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(addr))) { + ++priv->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } pci_unmap_addr_set(tx_req, mapping, addr); if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), @@ -395,10 +400,11 @@ static void __ipoib_reap_ah(struct net_device *dev) spin_unlock_irq(&priv->tx_lock); } -void ipoib_reap_ah(void *dev_ptr) +void ipoib_reap_ah(struct work_struct *work) { - struct net_device *dev = dev_ptr; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, ah_reap_task.work); + struct net_device *dev = priv->dev; __ipoib_reap_ah(dev); @@ -608,10 +614,11 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; } -void ipoib_ib_dev_flush(void *_dev) +void ipoib_ib_dev_flush(struct work_struct *work) { - struct net_device *dev = (struct net_device *)_dev; - struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv; + struct ipoib_dev_priv *cpriv, *priv = + container_of(work, struct ipoib_dev_priv, flush_task); + struct net_device *dev = priv->dev; if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) { ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); @@ -633,14 +640,14 @@ void ipoib_ib_dev_flush(void *_dev) */ if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { ipoib_ib_dev_up(dev); - ipoib_mcast_restart_task(dev); + ipoib_mcast_restart_task(&priv->restart_task); } mutex_lock(&priv->vlan_mutex); /* Flush any child interfaces too */ list_for_each_entry(cpriv, &priv->child_intfs, list) - ipoib_ib_dev_flush(cpriv->dev); + ipoib_ib_dev_flush(&cpriv->flush_task); mutex_unlock(&priv->vlan_mutex); } @@ -667,10 +674,11 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) * change async notification is available. */ -void ipoib_pkey_poll(void *dev_ptr) +void ipoib_pkey_poll(struct work_struct *work) { - struct net_device *dev = dev_ptr; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, pkey_task.work); + struct net_device *dev = priv->dev; ipoib_pkey_dev_check_presence(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 1eaf00e9862c..c09280243726 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -49,6 +49,8 @@ #include <net/dst.h> +#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) + MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); @@ -262,7 +264,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path) if (neigh->ah) ipoib_put_ah(neigh->ah); - ipoib_neigh_free(neigh); + ipoib_neigh_free(dev, neigh); } spin_unlock_irqrestore(&priv->lock, flags); @@ -520,14 +522,14 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, sizeof(union ib_gid)); - ipoib_send(dev, skb, path->ah, - be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); } else { neigh->ah = NULL; - __skb_queue_tail(&neigh->queue, skb); if (!path->query && path_rec_start(dev, path)) goto err_list; + + __skb_queue_tail(&neigh->queue, skb); } spin_unlock(&priv->lock); @@ -537,7 +539,7 @@ err_list: list_del(&neigh->list); err_path: - ipoib_neigh_free(neigh); + ipoib_neigh_free(dev, neigh); ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -599,8 +601,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, ipoib_dbg(priv, "Send unicast ARP to %04x\n", be16_to_cpu(path->pathrec.dlid)); - ipoib_send(dev, skb, path->ah, - be32_to_cpup((__be32 *) phdr->hwaddr)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); } else if ((path->query || !path_rec_start(dev, path)) && skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { /* put pseudoheader back on for next time */ @@ -655,14 +656,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) */ ipoib_put_ah(neigh->ah); list_del(&neigh->list); - ipoib_neigh_free(neigh); + ipoib_neigh_free(dev, neigh); spin_unlock(&priv->lock); ipoib_path_lookup(skb, dev); goto out; } - ipoib_send(dev, skb, neigh->ah, - be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); + ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha)); goto out; } @@ -694,7 +694,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) IPOIB_GID_FMT "\n", skb->dst ? "neigh" : "dst", be16_to_cpup((__be16 *) skb->data), - be32_to_cpup((__be32 *) phdr->hwaddr), + IPOIB_QPN(phdr->hwaddr), IPOIB_GID_RAW_ARG(phdr->hwaddr + 4)); dev_kfree_skb_any(skb); ++priv->stats.tx_dropped; @@ -777,7 +777,7 @@ static void ipoib_neigh_destructor(struct neighbour *n) ipoib_dbg(priv, "neigh_destructor for %06x " IPOIB_GID_FMT "\n", - be32_to_cpup((__be32 *) n->ha), + IPOIB_QPN(n->ha), IPOIB_GID_RAW_ARG(n->ha + 4)); spin_lock_irqsave(&priv->lock, flags); @@ -787,7 +787,7 @@ static void ipoib_neigh_destructor(struct neighbour *n) if (neigh->ah) ah = neigh->ah; list_del(&neigh->list); - ipoib_neigh_free(neigh); + ipoib_neigh_free(n->dev, neigh); } spin_unlock_irqrestore(&priv->lock, flags); @@ -810,9 +810,15 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) return neigh; } -void ipoib_neigh_free(struct ipoib_neigh *neigh) +void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) { + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sk_buff *skb; *to_ipoib_neigh(neigh->neighbour) = NULL; + while ((skb = __skb_dequeue(&neigh->queue))) { + ++priv->stats.tx_dropped; + dev_kfree_skb_any(skb); + } kfree(neigh); } @@ -934,11 +940,11 @@ static void ipoib_setup(struct net_device *dev) INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); - INIT_WORK(&priv->pkey_task, ipoib_pkey_poll, priv->dev); - INIT_WORK(&priv->mcast_task, ipoib_mcast_join_task, priv->dev); - INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush, priv->dev); - INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev); - INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah, priv->dev); + INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll); + INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); + INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); + INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); + INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); } struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 3faa1820f0e9..b04b72ca32ed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -114,7 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) */ if (neigh->ah) ipoib_put_ah(neigh->ah); - ipoib_neigh_free(neigh); + ipoib_neigh_free(dev, neigh); } spin_unlock_irqrestore(&priv->lock, flags); @@ -399,7 +399,8 @@ static void ipoib_mcast_join_complete(int status, mcast->backoff = 1; mutex_lock(&mcast_mutex); if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) - queue_work(ipoib_workqueue, &priv->mcast_task); + queue_delayed_work(ipoib_workqueue, + &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); complete(&mcast->done); return; @@ -435,7 +436,8 @@ static void ipoib_mcast_join_complete(int status, if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { if (status == -ETIMEDOUT) - queue_work(ipoib_workqueue, &priv->mcast_task); + queue_delayed_work(ipoib_workqueue, &priv->mcast_task, + 0); else queue_delayed_work(ipoib_workqueue, &priv->mcast_task, mcast->backoff * HZ); @@ -517,10 +519,11 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, mcast->query_id = ret; } -void ipoib_mcast_join_task(void *dev_ptr) +void ipoib_mcast_join_task(struct work_struct *work) { - struct net_device *dev = dev_ptr; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, mcast_task.work); + struct net_device *dev = priv->dev; if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) return; @@ -610,7 +613,7 @@ int ipoib_mcast_start_thread(struct net_device *dev) mutex_lock(&mcast_mutex); if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) - queue_work(ipoib_workqueue, &priv->mcast_task); + queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); spin_lock_irq(&priv->lock); @@ -818,10 +821,11 @@ void ipoib_mcast_dev_flush(struct net_device *dev) } } -void ipoib_mcast_restart_task(void *dev_ptr) +void ipoib_mcast_restart_task(struct work_struct *work) { - struct net_device *dev = dev_ptr; - struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, restart_task); + struct net_device *dev = priv->dev; struct dev_mc_list *mclist; struct ipoib_mcast *mcast, *tmcast; LIST_HEAD(remove_list); diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index 365a1b5f19e0..aecbb9083f0c 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -1,11 +1,12 @@ config INFINIBAND_ISER - tristate "ISCSI RDMA Protocol" + tristate "iSCSI Extensions for RDMA (iSER)" depends on INFINIBAND && SCSI && INET select SCSI_ISCSI_ATTRS ---help--- - Support for the ISCSI RDMA Protocol over InfiniBand. This - allows you to access storage devices that speak ISER/ISCSI - over InfiniBand. + Support for the iSCSI Extensions for RDMA (iSER) Protocol + over InfiniBand. This allows you to access storage devices + that speak iSCSI over iSER over InfiniBand. - The ISER protocol is defined by IETF. - See <http://www.ietf.org/>. + The iSER protocol is defined by IETF. + See <http://www.ietf.org/internet-drafts/draft-ietf-ips-iser-05.txt> + and <http://www.infinibandta.org/members/spec/iser_annex_060418.pdf> diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 2a14fe2e3226..9b2041e25d59 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -317,6 +317,8 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) struct iscsi_iser_conn *iser_conn = conn->dd_data; iscsi_conn_teardown(cls_conn); + if (iser_conn->ib_conn) + iser_conn->ib_conn->iser_conn = NULL; kfree(iser_conn); } @@ -361,11 +363,11 @@ iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) struct iscsi_conn *conn = cls_conn->dd_data; int err; - err = iscsi_conn_start(cls_conn); + err = iser_conn_set_full_featured_mode(conn); if (err) return err; - return iser_conn_set_full_featured_mode(conn); + return iscsi_conn_start(cls_conn); } static struct iscsi_transport iscsi_iser_transport; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 2cf9ae0def1c..234e5b061a75 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -192,7 +192,7 @@ struct iser_regd_buf { struct iser_dto { struct iscsi_iser_cmd_task *ctask; - struct iscsi_iser_conn *conn; + struct iser_conn *ib_conn; int notify_enable; /* vector of registered buffers */ @@ -283,7 +283,7 @@ struct iser_global { struct mutex connlist_mutex; struct list_head connlist; /* all iSER IB connections */ - kmem_cache_t *desc_cache; + struct kmem_cache *desc_cache; }; extern struct iser_global ig; @@ -355,4 +355,11 @@ int iser_post_send(struct iser_desc *tx_desc); int iser_conn_state_comp(struct iser_conn *ib_conn, enum iser_ib_conn_state comp); + +int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir); + +void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index ccf56f6f7236..9b3d79c796c8 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -66,42 +66,6 @@ static void iser_dto_add_regd_buff(struct iser_dto *dto, dto->regd_vector_len++; } -static int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, - struct iser_data_buf *data, - enum iser_data_dir iser_dir, - enum dma_data_direction dma_dir) -{ - struct device *dma_device; - - iser_ctask->dir[iser_dir] = 1; - dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; - - data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir); - if (data->dma_nents == 0) { - iser_err("dma_map_sg failed!!!\n"); - return -EINVAL; - } - return 0; -} - -static void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) -{ - struct device *dma_device; - struct iser_data_buf *data; - - dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; - - if (iser_ctask->dir[ISER_DIR_IN]) { - data = &iser_ctask->data[ISER_DIR_IN]; - dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE); - } - - if (iser_ctask->dir[ISER_DIR_OUT]) { - data = &iser_ctask->data[ISER_DIR_OUT]; - dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE); - } -} - /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in * iser_ctask->data[ISER_DIR_IN].data_len @@ -249,7 +213,7 @@ static int iser_post_receive_control(struct iscsi_conn *conn) } recv_dto = &rx_desc->dto; - recv_dto->conn = iser_conn; + recv_dto->ib_conn = iser_conn->ib_conn; recv_dto->regd_vector_len = 0; regd_hdr = &rx_desc->hdr_regd_buf; @@ -296,7 +260,7 @@ static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; - send_dto->conn = iser_conn; + send_dto->ib_conn = iser_conn->ib_conn; send_dto->notify_enable = 1; send_dto->regd_vector_len = 0; @@ -588,7 +552,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc, unsigned long dto_xfer_len) { struct iser_dto *dto = &rx_desc->dto; - struct iscsi_iser_conn *conn = dto->conn; + struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; struct iscsi_session *session = conn->iscsi_conn->session; struct iscsi_cmd_task *ctask; struct iscsi_iser_cmd_task *iser_ctask; @@ -641,7 +605,8 @@ void iser_rcv_completion(struct iser_desc *rx_desc, void iser_snd_completion(struct iser_desc *tx_desc) { struct iser_dto *dto = &tx_desc->dto; - struct iscsi_iser_conn *iser_conn = dto->conn; + struct iser_conn *ib_conn = dto->ib_conn; + struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; struct iscsi_mgmt_task *mtask; @@ -652,7 +617,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) if (tx_desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, tx_desc); - atomic_dec(&iser_conn->ib_conn->post_send_buf_count); + atomic_dec(&ib_conn->post_send_buf_count); write_lock(conn->recv_lock); if (conn->suspend_tx) { @@ -698,14 +663,19 @@ void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask) void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) { int deferred; + int is_rdma_aligned = 1; /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ - if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) + if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) { + is_rdma_aligned = 0; iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN); - if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) + } + if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) { + is_rdma_aligned = 0; iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT); + } if (iser_ctask->dir[ISER_DIR_IN]) { deferred = iser_regd_buff_release @@ -725,7 +695,9 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) } } - iser_dma_unmap_task_data(iser_ctask); + /* if the data was unaligned, it was already unmapped and then copied */ + if (is_rdma_aligned) + iser_dma_unmap_task_data(iser_ctask); } void iser_dto_buffs_release(struct iser_dto *dto) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d0b03f426581..3aedd59b8a84 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -35,6 +35,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/highmem.h> #include <asm/io.h> #include <asm/scatterlist.h> #include <linux/scatterlist.h> @@ -113,7 +114,7 @@ int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, if (cmd_data_len > ISER_KMALLOC_THRESHOLD) mem = (void *)__get_free_pages(GFP_NOIO, - long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); + ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); else mem = kmalloc(cmd_data_len, GFP_NOIO); @@ -210,7 +211,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, if (cmd_data_len > ISER_KMALLOC_THRESHOLD) free_pages((unsigned long)mem_copy->copy_buf, - long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); + ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); else kfree(mem_copy->copy_buf); @@ -369,6 +370,44 @@ static void iser_page_vec_build(struct iser_data_buf *data, } } +int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) +{ + struct device *dma_device; + + iser_ctask->dir[iser_dir] = 1; + dma_device = + iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + + data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir); + if (data->dma_nents == 0) { + iser_err("dma_map_sg failed!!!\n"); + return -EINVAL; + } + return 0; +} + +void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) +{ + struct device *dma_device; + struct iser_data_buf *data; + + dma_device = + iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + + if (iser_ctask->dir[ISER_DIR_IN]) { + data = &iser_ctask->data[ISER_DIR_IN]; + dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE); + } + + if (iser_ctask->dir[ISER_DIR_OUT]) { + data = &iser_ctask->data[ISER_DIR_OUT]; + dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE); + } +} + /** * iser_reg_rdma_mem - Registers memory intended for RDMA, * obtaining rkey and va @@ -394,6 +433,10 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, iser_err("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); iser_data_buf_dump(mem); + + /* unmap the command data before accessing it */ + iser_dma_unmap_task_data(iser_ctask); + /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ecdca7fc1e4c..693b77002897 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -48,7 +48,7 @@ static void iser_cq_tasklet_fn(unsigned long data); static void iser_cq_callback(struct ib_cq *cq, void *cq_context); -static void iser_comp_error_worker(void *data); +static void iser_comp_error_worker(struct work_struct *work); static void iser_cq_event_callback(struct ib_event *cause, void *context) { @@ -480,8 +480,7 @@ int iser_conn_init(struct iser_conn **ibconn) init_waitqueue_head(&ib_conn->wait); atomic_set(&ib_conn->post_recv_buf_count, 0); atomic_set(&ib_conn->post_send_buf_count, 0); - INIT_WORK(&ib_conn->comperror_work, iser_comp_error_worker, - ib_conn); + INIT_WORK(&ib_conn->comperror_work, iser_comp_error_worker); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); @@ -571,6 +570,8 @@ void iser_conn_release(struct iser_conn *ib_conn) /* on EVENT_ADDR_ERROR there's no device yet for this conn */ if (device != NULL) iser_device_try_release(device); + if (ib_conn->iser_conn) + ib_conn->iser_conn->ib_conn = NULL; kfree(ib_conn); } @@ -694,7 +695,7 @@ int iser_post_recv(struct iser_desc *rx_desc) struct iser_dto *recv_dto = &rx_desc->dto; /* Retrieve conn */ - ib_conn = recv_dto->conn->ib_conn; + ib_conn = recv_dto->ib_conn; iser_dto_to_iov(recv_dto, iov, 2); @@ -727,7 +728,7 @@ int iser_post_send(struct iser_desc *tx_desc) struct iser_conn *ib_conn; struct iser_dto *dto = &tx_desc->dto; - ib_conn = dto->conn->ib_conn; + ib_conn = dto->ib_conn; iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN); @@ -752,9 +753,10 @@ int iser_post_send(struct iser_desc *tx_desc) return ret_val; } -static void iser_comp_error_worker(void *data) +static void iser_comp_error_worker(struct work_struct *work) { - struct iser_conn *ib_conn = data; + struct iser_conn *ib_conn = + container_of(work, struct iser_conn, comperror_work); /* getting here when the state is UP means that the conn is being * * terminated asynchronously from the iSCSI layer's perspective. */ @@ -774,7 +776,7 @@ static void iser_comp_error_worker(void *data) static void iser_handle_comp_error(struct iser_desc *desc) { struct iser_dto *dto = &desc->dto; - struct iser_conn *ib_conn = dto->conn->ib_conn; + struct iser_conn *ib_conn = dto->ib_conn; iser_dto_buffs_release(dto); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 44b9e5be6687..a6289595557b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -343,29 +343,32 @@ static int srp_send_req(struct srp_target_port *target) */ if (target->io_class == SRP_REV10_IB_IO_CLASS) { memcpy(req->priv.initiator_port_id, - target->srp_host->initiator_port_id + 8, 8); + &target->path.sgid.global.interface_id, 8); memcpy(req->priv.initiator_port_id + 8, - target->srp_host->initiator_port_id, 8); + &target->initiator_ext, 8); memcpy(req->priv.target_port_id, &target->ioc_guid, 8); memcpy(req->priv.target_port_id + 8, &target->id_ext, 8); } else { memcpy(req->priv.initiator_port_id, - target->srp_host->initiator_port_id, 16); + &target->initiator_ext, 8); + memcpy(req->priv.initiator_port_id + 8, + &target->path.sgid.global.interface_id, 8); memcpy(req->priv.target_port_id, &target->id_ext, 8); memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8); } /* * Topspin/Cisco SRP targets will reject our login unless we - * zero out the first 8 bytes of our initiator port ID. The - * second 8 bytes must be our local node GUID, but we always - * use that anyway. + * zero out the first 8 bytes of our initiator port ID and set + * the second 8 bytes to the local node GUID. */ if (topspin_workarounds && !memcmp(&target->ioc_guid, topspin_oui, 3)) { printk(KERN_DEBUG PFX "Topspin/Cisco initiator port ID workaround " "activated for target GUID %016llx\n", (unsigned long long) be64_to_cpu(target->ioc_guid)); memset(req->priv.initiator_port_id, 0, 8); + memcpy(req->priv.initiator_port_id + 8, + &target->srp_host->dev->dev->node_guid, 8); } status = ib_send_cm_req(target->cm_id, &req->param); @@ -387,9 +390,10 @@ static void srp_disconnect_target(struct srp_target_port *target) wait_for_completion(&target->done); } -static void srp_remove_work(void *target_ptr) +static void srp_remove_work(struct work_struct *work) { - struct srp_target_port *target = target_ptr; + struct srp_target_port *target = + container_of(work, struct srp_target_port, work); spin_lock_irq(target->scsi_host->host_lock); if (target->state != SRP_TARGET_DEAD) { @@ -572,7 +576,7 @@ err: spin_lock_irq(target->scsi_host->host_lock); if (target->state == SRP_TARGET_CONNECTING) { target->state = SRP_TARGET_DEAD; - INIT_WORK(&target->work, srp_remove_work, target); + INIT_WORK(&target->work, srp_remove_work); schedule_work(&target->work); } spin_unlock_irq(target->scsi_host->host_lock); @@ -1173,9 +1177,11 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) break; } - target->status = srp_alloc_iu_bufs(target); - if (target->status) - break; + if (!target->rx_ring[0]) { + target->status = srp_alloc_iu_bufs(target); + if (target->status) + break; + } qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); if (!qp_attr) { @@ -1553,6 +1559,7 @@ enum { SRP_OPT_MAX_SECT = 1 << 5, SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, SRP_OPT_IO_CLASS = 1 << 7, + SRP_OPT_INITIATOR_EXT = 1 << 8, SRP_OPT_ALL = (SRP_OPT_ID_EXT | SRP_OPT_IOC_GUID | SRP_OPT_DGID | @@ -1569,6 +1576,7 @@ static match_table_t srp_opt_tokens = { { SRP_OPT_MAX_SECT, "max_sect=%d" }, { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, { SRP_OPT_IO_CLASS, "io_class=%x" }, + { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, { SRP_OPT_ERR, NULL } }; @@ -1668,6 +1676,12 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) target->io_class = token; break; + case SRP_OPT_INITIATOR_EXT: + p = match_strdup(args); + target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); + kfree(p); + break; + default: printk(KERN_WARNING PFX "unknown parameter or missing value " "'%s' in target creation request\n", p); @@ -1705,10 +1719,10 @@ static ssize_t srp_create_target(struct class_device *class_dev, if (!target_host) return -ENOMEM; - target_host->max_lun = SRP_MAX_LUN; + target_host->max_lun = SRP_MAX_LUN; + target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target = host_to_target(target_host); - memset(target, 0, sizeof *target); target->io_class = SRP_REV16A_IB_IO_CLASS; target->scsi_host = target_host; @@ -1815,9 +1829,6 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) host->dev = device; host->port = port; - host->initiator_port_id[7] = port; - memcpy(host->initiator_port_id + 8, &device->dev->node_guid, 8); - host->class_dev.class = &srp_class; host->class_dev.dev = device->dev->dma_device; snprintf(host->class_dev.class_id, BUS_ID_SIZE, "srp-%s-%d", diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 5b581fb8eb0d..d4e35ef51374 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -91,7 +91,6 @@ struct srp_device { }; struct srp_host { - u8 initiator_port_id[16]; struct srp_device *dev; u8 port; struct class_device class_dev; @@ -122,6 +121,7 @@ struct srp_target_port { __be64 id_ext; __be64 ioc_guid; __be64 service_id; + __be64 initiator_ext; u16 io_class; struct srp_host *srp_host; struct Scsi_Host *scsi_host; |