]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Manual merge of for-linus to upstream (fix conflicts in drivers/infiniband/core/ucm.c)
authorRoland Dreier <rolandd@cisco.com>
Mon, 24 Oct 2005 17:55:29 +0000 (10:55 -0700)
committerRoland Dreier <rolandd@cisco.com>
Mon, 24 Oct 2005 17:55:29 +0000 (10:55 -0700)
32 files changed:
drivers/infiniband/core/cm.c
drivers/infiniband/core/cm_msgs.h
drivers/infiniband/core/device.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucm.c
drivers/infiniband/core/ucm.h [deleted file]
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/mthca/mthca_cmd.c
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_eq.c
drivers/infiniband/hw/mthca/mthca_main.c
drivers/infiniband/hw/mthca/mthca_mcg.c
drivers/infiniband/hw/mthca/mthca_memfree.c
drivers/infiniband/hw/mthca/mthca_memfree.h
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/mthca/mthca_srq.c
drivers/infiniband/hw/mthca/mthca_user.h
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
include/rdma/ib_cm.h
include/rdma/ib_user_cm.h
include/rdma/ib_user_verbs.h
include/rdma/ib_verbs.h

index 151ef83cc14f32bcddede4f9708ebc3d42eb5ee8..3fe6f4754fa8fe81adda9ab85b477ed0bb071816 100644 (file)
@@ -366,9 +366,15 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
                cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
                                          service_node);
                if ((cur_cm_id_priv->id.service_mask & service_id) ==
-                   (service_mask & cur_cm_id_priv->id.service_id))
-                       return cm_id_priv;
-               if (service_id < cur_cm_id_priv->id.service_id)
+                   (service_mask & cur_cm_id_priv->id.service_id) &&
+                   (cm_id_priv->id.device == cur_cm_id_priv->id.device))
+                       return cur_cm_id_priv;
+
+               if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
+                       link = &(*link)->rb_left;
+               else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
+                       link = &(*link)->rb_right;
+               else if (service_id < cur_cm_id_priv->id.service_id)
                        link = &(*link)->rb_left;
                else
                        link = &(*link)->rb_right;
@@ -378,7 +384,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
        return NULL;
 }
 
-static struct cm_id_private * cm_find_listen(__be64 service_id)
+static struct cm_id_private * cm_find_listen(struct ib_device *device,
+                                            __be64 service_id)
 {
        struct rb_node *node = cm.listen_service_table.rb_node;
        struct cm_id_private *cm_id_priv;
@@ -386,9 +393,15 @@ static struct cm_id_private * cm_find_listen(__be64 service_id)
        while (node) {
                cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
                if ((cm_id_priv->id.service_mask & service_id) ==
-                   (cm_id_priv->id.service_mask & cm_id_priv->id.service_id))
+                    cm_id_priv->id.service_id &&
+                   (cm_id_priv->id.device == device))
                        return cm_id_priv;
-               if (service_id < cm_id_priv->id.service_id)
+
+               if (device < cm_id_priv->id.device)
+                       node = node->rb_left;
+               else if (device > cm_id_priv->id.device)
+                       node = node->rb_right;
+               else if (service_id < cm_id_priv->id.service_id)
                        node = node->rb_left;
                else
                        node = node->rb_right;
@@ -523,7 +536,8 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
        ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
 }
 
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+                                ib_cm_handler cm_handler,
                                 void *context)
 {
        struct cm_id_private *cm_id_priv;
@@ -535,6 +549,7 @@ struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
 
        memset(cm_id_priv, 0, sizeof *cm_id_priv);
        cm_id_priv->id.state = IB_CM_IDLE;
+       cm_id_priv->id.device = device;
        cm_id_priv->id.cm_handler = cm_handler;
        cm_id_priv->id.context = context;
        cm_id_priv->id.remote_cm_qpn = 1;
@@ -1047,7 +1062,6 @@ static void cm_format_req_event(struct cm_work *work,
        req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
        param = &work->cm_event.param.req_rcvd;
        param->listen_id = listen_id;
-       param->device = cm_id_priv->av.port->mad_agent->device;
        param->port = cm_id_priv->av.port->port_num;
        param->primary_path = &work->path[0];
        if (req_msg->alt_local_lid)
@@ -1226,7 +1240,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
        }
 
        /* Find matching listen request. */
-       listen_cm_id_priv = cm_find_listen(req_msg->service_id);
+       listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+                                          req_msg->service_id);
        if (!listen_cm_id_priv) {
                spin_unlock_irqrestore(&cm.lock, flags);
                cm_issue_rej(work->port, work->mad_recv_wc,
@@ -1254,7 +1269,7 @@ static int cm_req_handler(struct cm_work *work)
 
        req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 
-       cm_id = ib_create_cm_id(NULL, NULL);
+       cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
        if (IS_ERR(cm_id))
                return PTR_ERR(cm_id);
 
@@ -2629,7 +2644,6 @@ static void cm_format_sidr_req_event(struct cm_work *work,
        param = &work->cm_event.param.sidr_req_rcvd;
        param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
        param->listen_id = listen_id;
-       param->device = work->port->mad_agent->device;
        param->port = work->port->port_num;
        work->cm_event.private_data = &sidr_req_msg->private_data;
 }
@@ -2642,7 +2656,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
        struct ib_wc *wc;
        unsigned long flags;
 
-       cm_id = ib_create_cm_id(NULL, NULL);
+       cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
        if (IS_ERR(cm_id))
                return PTR_ERR(cm_id);
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -2666,7 +2680,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
                spin_unlock_irqrestore(&cm.lock, flags);
                goto out; /* Duplicate message. */
        }
-       cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id);
+       cur_cm_id_priv = cm_find_listen(cm_id->device,
+                                       sidr_req_msg->service_id);
        if (!cur_cm_id_priv) {
                rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
                spin_unlock_irqrestore(&cm.lock, flags);
index 813ab70bf6d5ebd5642bde4d499ce0cb911f81be..4d3aee90c249783bee765b3286b944e6c3abc426 100644 (file)
@@ -186,6 +186,7 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
                req_msg->offset40 = cpu_to_be32((be32_to_cpu(
                                                  req_msg->offset40) &
                                                   0xFFFFFFF9) | 0x2);
+               break;
        default:
                req_msg->offset40 = cpu_to_be32(be32_to_cpu(
                                                 req_msg->offset40) &
index d3cf84e01587176bdfaa26926abbc0c8c6c63ae1..5a6e44976405bd06a37ffa0204668ce33cbafaf8 100644 (file)
@@ -514,6 +514,12 @@ int ib_query_port(struct ib_device *device,
                  u8 port_num,
                  struct ib_port_attr *port_attr)
 {
+       if (device->node_type == IB_NODE_SWITCH) {
+               if (port_num)
+                       return -EINVAL;
+       } else if (port_num < 1 || port_num > device->phys_port_cnt)
+               return -EINVAL;
+
        return device->query_port(device, port_num, port_attr);
 }
 EXPORT_SYMBOL(ib_query_port);
@@ -583,6 +589,12 @@ int ib_modify_port(struct ib_device *device,
                   u8 port_num, int port_modify_mask,
                   struct ib_port_modify *port_modify)
 {
+       if (device->node_type == IB_NODE_SWITCH) {
+               if (port_num)
+                       return -EINVAL;
+       } else if (port_num < 1 || port_num > device->phys_port_cnt)
+               return -EINVAL;
+
        return device->modify_port(device, port_num, port_modify_mask,
                                   port_modify);
 }
index a14ca87fda188566a98d3627a9d36061dffb0bd2..af302e830561c5e4f9fdb605a29ae7c2de96f7df 100644 (file)
@@ -2683,40 +2683,47 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
 
 static void ib_mad_init_device(struct ib_device *device)
 {
-       int num_ports, cur_port, i;
+       int start, end, i;
 
        if (device->node_type == IB_NODE_SWITCH) {
-               num_ports = 1;
-               cur_port = 0;
+               start = 0;
+               end   = 0;
        } else {
-               num_ports = device->phys_port_cnt;
-               cur_port = 1;
+               start = 1;
+               end   = device->phys_port_cnt;
        }
-       for (i = 0; i < num_ports; i++, cur_port++) {
-               if (ib_mad_port_open(device, cur_port)) {
+
+       for (i = start; i <= end; i++) {
+               if (ib_mad_port_open(device, i)) {
                        printk(KERN_ERR PFX "Couldn't open %s port %d\n",
-                              device->name, cur_port);
-                       goto error_device_open;
+                              device->name, i);
+                       goto error;
                }
-               if (ib_agent_port_open(device, cur_port)) {
+               if (ib_agent_port_open(device, i)) {
                        printk(KERN_ERR PFX "Couldn't open %s port %d "
                               "for agents\n",
-                              device->name, cur_port);
-                       goto error_device_open;
+                              device->name, i);
+                       goto error_agent;
                }
        }
        return;
 
-error_device_open:
-       while (i > 0) {
-               cur_port--;
-               if (ib_agent_port_close(device, cur_port))
+error_agent:
+       if (ib_mad_port_close(device, i))
+               printk(KERN_ERR PFX "Couldn't close %s port %d\n",
+                      device->name, i);
+
+error:
+       i--;
+
+       while (i >= start) {
+               if (ib_agent_port_close(device, i))
                        printk(KERN_ERR PFX "Couldn't close %s port %d "
                               "for agents\n",
-                              device->name, cur_port);
-               if (ib_mad_port_close(device, cur_port))
+                              device->name, i);
+               if (ib_mad_port_close(device, i))
                        printk(KERN_ERR PFX "Couldn't close %s port %d\n",
-                              device->name, cur_port);
+                              device->name, i);
                i--;
        }
 }
index e215cf0478d666d66881f1145d9989d65076f3e2..0e5ef97f7637edc19df101b4afe21829f7f2f095 100644 (file)
@@ -583,10 +583,16 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 {
        struct ib_sa_path_query *query;
        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
-       struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
-       struct ib_mad_agent *agent  = port->agent;
+       struct ib_sa_port   *port;
+       struct ib_mad_agent *agent;
        int ret;
 
+       if (!sa_dev)
+               return -ENODEV;
+
+       port  = &sa_dev->port[port_num - sa_dev->start_port];
+       agent = port->agent;
+
        query = kmalloc(sizeof *query, gfp_mask);
        if (!query)
                return -ENOMEM;
@@ -685,10 +691,16 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
 {
        struct ib_sa_service_query *query;
        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
-       struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
-       struct ib_mad_agent *agent  = port->agent;
+       struct ib_sa_port   *port;
+       struct ib_mad_agent *agent;
        int ret;
 
+       if (!sa_dev)
+               return -ENODEV;
+
+       port  = &sa_dev->port[port_num - sa_dev->start_port];
+       agent = port->agent;
+
        if (method != IB_MGMT_METHOD_GET &&
            method != IB_MGMT_METHOD_SET &&
            method != IB_SA_METHOD_DELETE)
@@ -768,10 +780,16 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 {
        struct ib_sa_mcmember_query *query;
        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
-       struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
-       struct ib_mad_agent *agent  = port->agent;
+       struct ib_sa_port   *port;
+       struct ib_mad_agent *agent;
        int ret;
 
+       if (!sa_dev)
+               return -ENODEV;
+
+       port  = &sa_dev->port[port_num - sa_dev->start_port];
+       agent = port->agent;
+
        query = kmalloc(sizeof *query, gfp_mask);
        if (!query)
                return -ENOMEM;
index 211ba3223f65cc7c2d78f77d3cc84ab50d3639cd..7ce7a6c782fa3a15b0c5685212fef5bdb3d20607 100644 (file)
@@ -65,6 +65,11 @@ struct port_table_attribute {
        int                     index;
 };
 
+static inline int ibdev_is_alive(const struct ib_device *dev) 
+{
+       return dev->reg_state == IB_DEV_REGISTERED;
+}
+
 static ssize_t port_attr_show(struct kobject *kobj,
                              struct attribute *attr, char *buf)
 {
@@ -74,6 +79,8 @@ static ssize_t port_attr_show(struct kobject *kobj,
 
        if (!port_attr->show)
                return -EIO;
+       if (!ibdev_is_alive(p->ibdev))
+               return -ENODEV;
 
        return port_attr->show(p, port_attr, buf);
 }
@@ -581,6 +588,9 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
 {
        struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
 
+       if (!ibdev_is_alive(dev))
+               return -ENODEV;
+
        switch (dev->node_type) {
        case IB_NODE_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
        case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
@@ -595,6 +605,9 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf)
        struct ib_device_attr attr;
        ssize_t ret;
 
+       if (!ibdev_is_alive(dev))
+               return -ENODEV;
+
        ret = ib_query_device(dev, &attr);
        if (ret)
                return ret;
@@ -612,6 +625,9 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf)
        struct ib_device_attr attr;
        ssize_t ret;
 
+       if (!ibdev_is_alive(dev))
+               return -ENODEV;
+
        ret = ib_query_device(dev, &attr);
        if (ret)
                return ret;
index 5a6ba4030d44f60869b9789a47ab2e61a9d0629d..28477565ecba8df54012a0562cee7a0eec78b6ef 100644 (file)
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/cdev.h>
+#include <linux/idr.h>
 
 #include <asm/uaccess.h>
 
-#include "ucm.h"
+#include <rdma/ib_cm.h>
+#include <rdma/ib_user_cm.h>
 
 MODULE_AUTHOR("Libor Michalek");
 MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
 MODULE_LICENSE("Dual BSD/GPL");
 
-static int ucm_debug_level;
+struct ib_ucm_device {
+       int                     devnum;
+       struct cdev             dev;
+       struct class_device     class_dev;
+       struct ib_device        *ib_dev;
+};
+
+struct ib_ucm_file {
+       struct semaphore mutex;
+       struct file *filp;
+       struct ib_ucm_device *device;
+
+       struct list_head  ctxs;
+       struct list_head  events;
+       wait_queue_head_t poll_wait;
+};
+
+struct ib_ucm_context {
+       int                 id;
+       wait_queue_head_t   wait;
+       atomic_t            ref;
+       int                 events_reported;
+
+       struct ib_ucm_file *file;
+       struct ib_cm_id    *cm_id;
+       __u64              uid;
+
+       struct list_head    events;    /* list of pending events. */
+       struct list_head    file_list; /* member in file ctx list */
+};
+
+struct ib_ucm_event {
+       struct ib_ucm_context *ctx;
+       struct list_head file_list; /* member in file event list */
+       struct list_head ctx_list;  /* member in ctx event list */
 
-module_param_named(debug_level, ucm_debug_level, int, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+       struct ib_cm_id *cm_id;
+       struct ib_ucm_event_resp resp;
+       void *data;
+       void *info;
+       int data_len;
+       int info_len;
+};
 
 enum {
        IB_UCM_MAJOR = 231,
-       IB_UCM_MINOR = 255
+       IB_UCM_BASE_MINOR = 224,
+       IB_UCM_MAX_DEVICES = 32
 };
 
-#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR)
+#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
 
-#define PFX "UCM: "
+static void ib_ucm_add_one(struct ib_device *device);
+static void ib_ucm_remove_one(struct ib_device *device);
 
-#define ucm_dbg(format, arg...)                        \
-       do {                                    \
-               if (ucm_debug_level > 0)        \
-                       printk(KERN_DEBUG PFX format, ## arg); \
-       } while (0)
+static struct ib_client ucm_client = {
+       .name   = "ucm",
+       .add    = ib_ucm_add_one,
+       .remove = ib_ucm_remove_one
+};
 
-static struct semaphore ctx_id_mutex;
-static struct idr       ctx_id_table;
+static DECLARE_MUTEX(ctx_id_mutex);
+static DEFINE_IDR(ctx_id_table);
+static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
 
 static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
 {
@@ -152,17 +196,13 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
                goto error;
 
        list_add_tail(&ctx->file_list, &file->ctxs);
-       ucm_dbg("Allocated CM ID <%d>\n", ctx->id);
        return ctx;
 
 error:
        kfree(ctx);
        return NULL;
 }
-/*
- * Event portion of the API, handle CM events
- * and allow event polling.
- */
+
 static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
                                  struct ib_sa_path_rec  *kpath)
 {
@@ -209,6 +249,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
        ureq->retry_count                = kreq->retry_count;
        ureq->rnr_retry_count            = kreq->rnr_retry_count;
        ureq->srq                        = kreq->srq;
+       ureq->port                       = kreq->port;
 
        ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path);
        ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path);
@@ -295,6 +336,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
        case IB_CM_SIDR_REQ_RECEIVED:
                uvt->resp.u.sidr_req_resp.pkey = 
                                        evt->param.sidr_req_rcvd.pkey;
+               uvt->resp.u.sidr_req_resp.port = 
+                                       evt->param.sidr_req_rcvd.port;
                uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
                break;
        case IB_CM_SIDR_REP_RECEIVED:
@@ -387,9 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
 
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
-       /*
-        * wait
-        */
+
        down(&file->mutex);
        while (list_empty(&file->events)) {
 
@@ -471,7 +512,6 @@ done:
        return result;
 }
 
-
 static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
                                const char __user *inbuf,
                                int in_len, int out_len)
@@ -494,29 +534,27 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
                return -ENOMEM;
 
        ctx->uid = cmd.uid;
-       ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx);
+       ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
+                                    ib_ucm_event_handler, ctx);
        if (IS_ERR(ctx->cm_id)) {
                result = PTR_ERR(ctx->cm_id);
-               goto err;
+               goto err1;
        }
 
        resp.id = ctx->id;
        if (copy_to_user((void __user *)(unsigned long)cmd.response,
                         &resp, sizeof(resp))) {
                result = -EFAULT;
-               goto err;
+               goto err2;
        }
-
        return 0;
 
-err:
+err2:
+       ib_destroy_cm_id(ctx->cm_id);
+err1:
        down(&ctx_id_mutex);
        idr_remove(&ctx_id_table, ctx->id);
        up(&ctx_id_mutex);
-
-       if (!IS_ERR(ctx->cm_id))
-               ib_destroy_cm_id(ctx->cm_id);
-
        kfree(ctx);
        return result;
 }
@@ -1184,9 +1222,6 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
        if (copy_from_user(&hdr, buf, sizeof(hdr)))
                return -EFAULT;
 
-       ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n",
-               hdr.cmd, hdr.in, hdr.out, len);
-
        if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
                return -EINVAL;
 
@@ -1231,8 +1266,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
 
        filp->private_data = file;
        file->filp = filp;
-
-       ucm_dbg("Created struct\n");
+       file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev);
 
        return 0;
 }
@@ -1263,7 +1297,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
        return 0;
 }
 
-static struct file_operations ib_ucm_fops = {
+static void ib_ucm_release_class_dev(struct class_device *class_dev)
+{
+       struct ib_ucm_device *dev;
+
+       dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+       cdev_del(&dev->dev);
+       clear_bit(dev->devnum, dev_map);
+       kfree(dev);
+}
+
+static struct file_operations ucm_fops = {
        .owner   = THIS_MODULE,
        .open    = ib_ucm_open,
        .release = ib_ucm_close,
@@ -1271,55 +1315,141 @@ static struct file_operations ib_ucm_fops = {
        .poll    = ib_ucm_poll,
 };
 
+static struct class ucm_class = {
+       .name    = "infiniband_cm",
+       .release = ib_ucm_release_class_dev
+};
 
-static struct class *ib_ucm_class;
-static struct cdev       ib_ucm_cdev;
+static ssize_t show_dev(struct class_device *class_dev, char *buf)
+{
+       struct ib_ucm_device *dev;
+       
+       dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+       return print_dev_t(buf, dev->dev.dev);
+}
+static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
 
-static int __init ib_ucm_init(void)
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
-       int result;
+       struct ib_ucm_device *dev;
+       
+       dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+       return sprintf(buf, "%s\n", dev->ib_dev->name);
+}
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
 
-       result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm");
-       if (result) {
-               ucm_dbg("Error <%d> registering dev\n", result);
-               goto err_chr;
-       }
+static void ib_ucm_add_one(struct ib_device *device)
+{
+       struct ib_ucm_device *ucm_dev;
+
+       if (!device->alloc_ucontext)
+               return;
+
+       ucm_dev = kmalloc(sizeof *ucm_dev, GFP_KERNEL);
+       if (!ucm_dev)
+               return;
 
-       cdev_init(&ib_ucm_cdev, &ib_ucm_fops);
+       memset(ucm_dev, 0, sizeof *ucm_dev);
+       ucm_dev->ib_dev = device;
+
+       ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
+       if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
+               goto err;
+
+       set_bit(ucm_dev->devnum, dev_map);
+
+       cdev_init(&ucm_dev->dev, &ucm_fops);
+       ucm_dev->dev.owner = THIS_MODULE;
+       kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum);
+       if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
+               goto err;
 
-       result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1);
-       if (result) {
-               ucm_dbg("Error <%d> adding cdev\n", result);
+       ucm_dev->class_dev.class = &ucm_class;
+       ucm_dev->class_dev.dev = device->dma_device;
+       snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d",
+                ucm_dev->devnum);
+       if (class_device_register(&ucm_dev->class_dev))
                goto err_cdev;
-       }
 
-       ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm");
-       if (IS_ERR(ib_ucm_class)) {
-               result = PTR_ERR(ib_ucm_class);
-               ucm_dbg("Error <%d> creating class\n", result);
+       if (class_device_create_file(&ucm_dev->class_dev,
+                                    &class_device_attr_dev))
+               goto err_class;
+       if (class_device_create_file(&ucm_dev->class_dev,
+                                    &class_device_attr_ibdev))
                goto err_class;
+
+       ib_set_client_data(device, &ucm_client, ucm_dev);
+       return;
+
+err_class:
+       class_device_unregister(&ucm_dev->class_dev);
+err_cdev:
+       cdev_del(&ucm_dev->dev);
+       clear_bit(ucm_dev->devnum, dev_map);
+err:
+       kfree(ucm_dev);
+       return;
+}
+
+static void ib_ucm_remove_one(struct ib_device *device)
+{
+       struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client);
+
+       if (!ucm_dev)
+               return;
+
+       class_device_unregister(&ucm_dev->class_dev);
+}
+
+static ssize_t show_abi_version(struct class *class, char *buf)
+{
+       return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int __init ib_ucm_init(void)
+{
+       int ret;
+
+       ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
+                                    "infiniband_cm");
+       if (ret) {
+               printk(KERN_ERR "ucm: couldn't register device number\n");
+               goto err;
        }
 
-       class_device_create(ib_ucm_class, IB_UCM_DEV, NULL, "ucm");
+       ret = class_register(&ucm_class);
+       if (ret) {
+               printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n");
+               goto err_chrdev;
+       }
 
-       idr_init(&ctx_id_table);
-       init_MUTEX(&ctx_id_mutex);
+       ret = class_create_file(&ucm_class, &class_attr_abi_version);
+       if (ret) {
+               printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
+               goto err_class;
+       }
 
+       ret = ib_register_client(&ucm_client);
+       if (ret) {
+               printk(KERN_ERR "ucm: couldn't register client\n");
+               goto err_class;
+       }
        return 0;
+
 err_class:
-       cdev_del(&ib_ucm_cdev);
-err_cdev:
-       unregister_chrdev_region(IB_UCM_DEV, 1);
-err_chr:
-       return result;
+       class_unregister(&ucm_class);
+err_chrdev:
+       unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+err:
+       return ret;
 }
 
 static void __exit ib_ucm_cleanup(void)
 {
-       class_device_destroy(ib_ucm_class, IB_UCM_DEV);
-       class_destroy(ib_ucm_class);
-       cdev_del(&ib_ucm_cdev);
-       unregister_chrdev_region(IB_UCM_DEV, 1);
+       ib_unregister_client(&ucm_client);
+       class_unregister(&ucm_class);
+       unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
        idr_destroy(&ctx_id_table);
 }
 
diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h
deleted file mode 100644 (file)
index f46f37b..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Intel Corporation.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $
- */
-
-#ifndef UCM_H
-#define UCM_H
-
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
-#include <linux/idr.h>
-
-#include <rdma/ib_cm.h>
-#include <rdma/ib_user_cm.h>
-
-struct ib_ucm_file {
-       struct semaphore mutex;
-       struct file *filp;
-
-       struct list_head  ctxs;   /* list of active connections */
-       struct list_head  events; /* list of pending events */
-       wait_queue_head_t poll_wait;
-};
-
-struct ib_ucm_context {
-       int                 id;
-       wait_queue_head_t   wait;
-       atomic_t            ref;
-       int                 events_reported;
-
-       struct ib_ucm_file *file;
-       struct ib_cm_id    *cm_id;
-       __u64              uid;
-
-       struct list_head    events;    /* list of pending events. */
-       struct list_head    file_list; /* member in file ctx list */
-};
-
-struct ib_ucm_event {
-       struct ib_ucm_context *ctx;
-       struct list_head file_list; /* member in file event list */
-       struct list_head ctx_list;  /* member in ctx event list */
-
-       struct ib_cm_id *cm_id;
-       struct ib_ucm_event_resp resp;
-       void *data;
-       void *info;
-       int data_len;
-       int info_len;
-};
-
-#endif /* UCM_H */
index a64d6b4dcc165851f41631d0938ab9d57eceaeac..fd200c064a2e5f9808935acc16823507724c734f 100644 (file)
@@ -280,14 +280,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 
        length = count - sizeof (struct ib_user_mad);
        packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) +
-                        sizeof(struct ib_rmpp_hdr), GFP_KERNEL);
+                        sizeof (struct ib_rmpp_hdr), GFP_KERNEL);
        if (!packet)
                return -ENOMEM;
 
        if (copy_from_user(&packet->mad, buf,
                            sizeof (struct ib_user_mad) +
-                           sizeof(struct ib_mad_hdr) +
-                           sizeof(struct ib_rmpp_hdr))) {
+                           sizeof (struct ib_mad_hdr) +
+                           sizeof (struct ib_rmpp_hdr))) {
                ret = -EFAULT;
                goto err;
        }
@@ -349,7 +349,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
                }
                rmpp_active = 1;
        } else {
-               if (length > sizeof(struct ib_mad)) {
+               if (length > sizeof (struct ib_mad)) {
                        ret = -EINVAL;
                        goto err_ah;
                }
@@ -376,17 +376,17 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
        if (!rmpp_active) {
                /* Copy message from user into send buffer */
                if (copy_from_user(packet->msg->mad,
-                                  buf + sizeof(struct ib_user_mad), length)) {
+                                  buf + sizeof (struct ib_user_mad), length)) {
                        ret = -EFAULT;
                        goto err_msg;
                }
        } else {
-               rmpp_hdr_size = sizeof(struct ib_mad_hdr) +
-                               sizeof(struct ib_rmpp_hdr);
+               rmpp_hdr_size = sizeof (struct ib_mad_hdr) +
+                               sizeof (struct ib_rmpp_hdr);
 
                /* Only copy MAD headers (RMPP header in place) */
                memcpy(packet->msg->mad, packet->mad.data,
-                      sizeof(struct ib_mad_hdr));
+                      sizeof (struct ib_mad_hdr));
 
                /* Now, copy rest of message from user into send buffer */
                if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data,
@@ -671,17 +671,6 @@ static struct ib_client umad_client = {
        .remove = ib_umad_remove_one
 };
 
-static ssize_t show_dev(struct class_device *class_dev, char *buf)
-{
-       struct ib_umad_port *port = class_get_devdata(class_dev);
-
-       if (class_dev == &port->class_dev)
-               return print_dev_t(buf, port->dev.dev);
-       else
-               return print_dev_t(buf, port->sm_dev.dev);
-}
-static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
-
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
        struct ib_umad_port *port = class_get_devdata(class_dev);
@@ -762,6 +751,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
 
        port->class_dev.class = &umad_class;
        port->class_dev.dev   = device->dma_device;
+       port->class_dev.devt  = port->dev.dev;
 
        snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum);
 
@@ -771,8 +761,6 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
        class_set_devdata(&port->class_dev, port);
        kref_get(&port->umad_dev->ref);
 
-       if (class_device_create_file(&port->class_dev, &class_device_attr_dev))
-               goto err_class;
        if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev))
                goto err_class;
        if (class_device_create_file(&port->class_dev, &class_device_attr_port))
@@ -786,6 +774,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
 
        port->sm_class_dev.class = &umad_class;
        port->sm_class_dev.dev   = device->dma_device;
+       port->sm_class_dev.devt  = port->sm_dev.dev;
 
        snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
 
@@ -795,8 +784,6 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
        class_set_devdata(&port->sm_class_dev, port);
        kref_get(&port->umad_dev->ref);
 
-       if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev))
-               goto err_sm_class;
        if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev))
                goto err_sm_class;
        if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port))
index cc124344dd2c72335cc8ebcb476cc66e4802e75d..63c8085c0c98940376f06c18033f05fd102779f8 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -53,14 +54,14 @@ struct ib_uverbs_device {
        struct cdev                             dev;
        struct class_device                     class_dev;
        struct ib_device                       *ib_dev;
-       int                                     num_comp;
+       int                                     num_comp_vectors;
 };
 
 struct ib_uverbs_event_file {
        struct kref                             ref;
+       struct file                            *file;
        struct ib_uverbs_file                  *uverbs_file;
        spinlock_t                              lock;
-       int                                     fd;
        int                                     is_async;
        wait_queue_head_t                       poll_wait;
        struct fasync_struct                   *async_queue;
@@ -73,8 +74,7 @@ struct ib_uverbs_file {
        struct ib_uverbs_device                *device;
        struct ib_ucontext                     *ucontext;
        struct ib_event_handler                 event_handler;
-       struct ib_uverbs_event_file             async_file;
-       struct ib_uverbs_event_file             comp_file[1];
+       struct ib_uverbs_event_file            *async_file;
 };
 
 struct ib_uverbs_event {
@@ -110,10 +110,17 @@ extern struct idr ib_uverbs_cq_idr;
 extern struct idr ib_uverbs_qp_idr;
 extern struct idr ib_uverbs_srq_idr;
 
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+                                       int is_async, int *fd);
+void ib_uverbs_release_event_file(struct kref *ref);
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
+
 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+                            struct ib_event *event);
 
 int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
                void *addr, size_t size, int write);
@@ -125,21 +132,26 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
                                 const char __user *buf, int in_len,    \
                                 int out_len)
 
-IB_UVERBS_DECLARE_CMD(query_params);
 IB_UVERBS_DECLARE_CMD(get_context);
 IB_UVERBS_DECLARE_CMD(query_device);
 IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(query_gid);
-IB_UVERBS_DECLARE_CMD(query_pkey);
 IB_UVERBS_DECLARE_CMD(alloc_pd);
 IB_UVERBS_DECLARE_CMD(dealloc_pd);
 IB_UVERBS_DECLARE_CMD(reg_mr);
 IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(create_comp_channel);
 IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(poll_cq);
+IB_UVERBS_DECLARE_CMD(req_notify_cq);
 IB_UVERBS_DECLARE_CMD(destroy_cq);
 IB_UVERBS_DECLARE_CMD(create_qp);
 IB_UVERBS_DECLARE_CMD(modify_qp);
 IB_UVERBS_DECLARE_CMD(destroy_qp);
+IB_UVERBS_DECLARE_CMD(post_send);
+IB_UVERBS_DECLARE_CMD(post_recv);
+IB_UVERBS_DECLARE_CMD(post_srq_recv);
+IB_UVERBS_DECLARE_CMD(create_ah);
+IB_UVERBS_DECLARE_CMD(destroy_ah);
 IB_UVERBS_DECLARE_CMD(attach_mcast);
 IB_UVERBS_DECLARE_CMD(detach_mcast);
 IB_UVERBS_DECLARE_CMD(create_srq);
index 562445165d2bee6efb2b75d506db723fdb997e8a..14583bb6e2c0c0301105e490cd454d4eb1009240 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -33,6 +34,8 @@
  * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
  */
 
+#include <linux/file.h>
+
 #include <asm/uaccess.h>
 
 #include "uverbs.h"
                (udata)->outlen = (olen);                               \
        } while (0)
 
-ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
-                              const char __user *buf,
-                              int in_len, int out_len)
-{
-       struct ib_uverbs_query_params      cmd;
-       struct ib_uverbs_query_params_resp resp;
-
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       memset(&resp, 0, sizeof resp);
-
-       resp.num_cq_events = file->device->num_comp;
-
-       if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
-           return -EFAULT;
-
-       return in_len;
-}
-
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
                              const char __user *buf,
                              int in_len, int out_len)
@@ -77,7 +57,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        struct ib_udata                   udata;
        struct ib_device                 *ibdev = file->device->ib_dev;
        struct ib_ucontext               *ucontext;
-       int i;
+       struct file                      *filp;
        int ret;
 
        if (out_len < sizeof resp)
@@ -110,26 +90,42 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        INIT_LIST_HEAD(&ucontext->srq_list);
        INIT_LIST_HEAD(&ucontext->ah_list);
 
-       resp.async_fd = file->async_file.fd;
-       for (i = 0; i < file->device->num_comp; ++i)
-               if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
-                                i * sizeof (__u32),
-                                &file->comp_file[i].fd, sizeof (__u32))) {
-                       ret = -EFAULT;
-                       goto err_free;
-               }
+       resp.num_comp_vectors = file->device->num_comp_vectors;
+
+       filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
+       if (IS_ERR(filp)) {
+               ret = PTR_ERR(filp);
+               goto err_free;
+       }
 
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp)) {
                ret = -EFAULT;
-               goto err_free;
+               goto err_file;
        }
 
-       file->ucontext = ucontext;
+       file->async_file = filp->private_data;
+
+       INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev,
+                             ib_uverbs_event_handler);
+       ret = ib_register_event_handler(&file->event_handler);
+       if (ret)
+               goto err_file;
+
+       kref_get(&file->async_file->ref);
+       kref_get(&file->ref);
+       file->ucontext   = ucontext;
+
+       fd_install(resp.async_fd, filp);
+
        up(&file->mutex);
 
        return in_len;
 
+err_file:
+       put_unused_fd(resp.async_fd);
+       fput(filp);
+
 err_free:
        ibdev->dealloc_ucontext(ucontext);
 
@@ -255,62 +251,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
        return in_len;
 }
 
-ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
-                           const char __user *buf,
-                           int in_len, int out_len)
-{
-       struct ib_uverbs_query_gid      cmd;
-       struct ib_uverbs_query_gid_resp resp;
-       int                             ret;
-
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       memset(&resp, 0, sizeof resp);
-
-       ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
-                          (union ib_gid *) resp.gid);
-       if (ret)
-               return ret;
-
-       if (copy_to_user((void __user *) (unsigned long) cmd.response,
-                        &resp, sizeof resp))
-               return -EFAULT;
-
-       return in_len;
-}
-
-ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
-                            const char __user *buf,
-                            int in_len, int out_len)
-{
-       struct ib_uverbs_query_pkey      cmd;
-       struct ib_uverbs_query_pkey_resp resp;
-       int                              ret;
-
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
-
-       memset(&resp, 0, sizeof resp);
-
-       ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
-                           &resp.pkey);
-       if (ret)
-               return ret;
-
-       if (copy_to_user((void __user *) (unsigned long) cmd.response,
-                        &resp, sizeof resp))
-               return -EFAULT;
-
-       return in_len;
-}
-
 ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
                           const char __user *buf,
                           int in_len, int out_len)
@@ -349,24 +289,20 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
        pd->uobject = uobj;
        atomic_set(&pd->usecnt, 0);
 
+       down(&ib_uverbs_idr_mutex);
+
 retry:
        if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
                ret = -ENOMEM;
-               goto err_pd;
+               goto err_up;
        }
 
-       down(&ib_uverbs_idr_mutex);
        ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
-       up(&ib_uverbs_idr_mutex);
 
        if (ret == -EAGAIN)
                goto retry;
        if (ret)
-               goto err_pd;
-
-       down(&file->mutex);
-       list_add_tail(&uobj->list, &file->ucontext->pd_list);
-       up(&file->mutex);
+               goto err_up;
 
        memset(&resp, 0, sizeof resp);
        resp.pd_handle = uobj->id;
@@ -374,21 +310,22 @@ retry:
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp)) {
                ret = -EFAULT;
-               goto err_list;
+               goto err_idr;
        }
 
-       return in_len;
-
-err_list:
-       down(&file->mutex);
-       list_del(&uobj->list);
+       down(&file->mutex);
+       list_add_tail(&uobj->list, &file->ucontext->pd_list);
        up(&file->mutex);
 
-       down(&ib_uverbs_idr_mutex);
-       idr_remove(&ib_uverbs_pd_idr, uobj->id);
        up(&ib_uverbs_idr_mutex);
 
-err_pd:
+       return in_len;
+
+err_idr:
+       idr_remove(&ib_uverbs_pd_idr, uobj->id);
+
+err_up:
+       up(&ib_uverbs_idr_mutex);
        ib_dealloc_pd(pd);
 
 err:
@@ -459,6 +396,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
                return -EINVAL;
 
+       /*
+        * Local write permission is required if remote write or
+        * remote atomic permission is also requested.
+        */
+       if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+           !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
+               return -EINVAL;
+
        obj = kmalloc(sizeof *obj, GFP_KERNEL);
        if (!obj)
                return -ENOMEM;
@@ -524,24 +469,22 @@ retry:
 
        resp.mr_handle = obj->uobject.id;
 
-       down(&file->mutex);
-       list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
-       up(&file->mutex);
-
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp)) {
                ret = -EFAULT;
-               goto err_list;
+               goto err_idr;
        }
 
+       down(&file->mutex);
+       list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+       up(&file->mutex);
+
        up(&ib_uverbs_idr_mutex);
 
        return in_len;
 
-err_list:
-       down(&file->mutex);
-       list_del(&obj->uobject.list);
-       up(&file->mutex);
+err_idr:
+       idr_remove(&ib_uverbs_mr_idr, obj->uobject.id);
 
 err_unreg:
        ib_dereg_mr(mr);
@@ -595,6 +538,35 @@ out:
        return ret ? ret : in_len;
 }
 
+ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
+                                     const char __user *buf, int in_len,
+                                     int out_len)
+{
+       struct ib_uverbs_create_comp_channel       cmd;
+       struct ib_uverbs_create_comp_channel_resp  resp;
+       struct file                               *filp;
+
+       if (out_len < sizeof resp)
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
+       if (IS_ERR(filp))
+               return PTR_ERR(filp);
+
+       if (copy_to_user((void __user *) (unsigned long) cmd.response,
+                        &resp, sizeof resp)) {
+               put_unused_fd(resp.fd);
+               fput(filp);
+               return -EFAULT;
+       }
+
+       fd_install(resp.fd, filp);
+       return in_len;
+}
+
 ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
                            const char __user *buf, int in_len,
                            int out_len)
@@ -603,6 +575,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
        struct ib_uverbs_create_cq_resp resp;
        struct ib_udata                 udata;
        struct ib_ucq_object           *uobj;
+       struct ib_uverbs_event_file    *ev_file = NULL;
        struct ib_cq                   *cq;
        int                             ret;
 
@@ -616,9 +589,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
                   (unsigned long) cmd.response + sizeof resp,
                   in_len - sizeof cmd, out_len - sizeof resp);
 
-       if (cmd.event_handler >= file->device->num_comp)
+       if (cmd.comp_vector >= file->device->num_comp_vectors)
                return -EINVAL;
 
+       if (cmd.comp_channel >= 0)
+               ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
+
        uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
        if (!uobj)
                return -ENOMEM;
@@ -641,27 +617,23 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
        cq->uobject       = &uobj->uobject;
        cq->comp_handler  = ib_uverbs_comp_handler;
        cq->event_handler = ib_uverbs_cq_event_handler;
-       cq->cq_context    = file;
+       cq->cq_context    = ev_file;
        atomic_set(&cq->usecnt, 0);
 
+       down(&ib_uverbs_idr_mutex);
+
 retry:
        if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
                ret = -ENOMEM;
-               goto err_cq;
+               goto err_up;
        }
 
-       down(&ib_uverbs_idr_mutex);
        ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id);
-       up(&ib_uverbs_idr_mutex);
 
        if (ret == -EAGAIN)
                goto retry;
        if (ret)
-               goto err_cq;
-
-       down(&file->mutex);
-       list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
-       up(&file->mutex);
+               goto err_up;
 
        memset(&resp, 0, sizeof resp);
        resp.cq_handle = uobj->uobject.id;
@@ -670,21 +642,22 @@ retry:
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp)) {
                ret = -EFAULT;
-               goto err_list;
+               goto err_idr;
        }
 
-       return in_len;
-
-err_list:
-       down(&file->mutex);
-       list_del(&uobj->uobject.list);
+       down(&file->mutex);
+       list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
        up(&file->mutex);
 
-       down(&ib_uverbs_idr_mutex);
-       idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
        up(&ib_uverbs_idr_mutex);
 
-err_cq:
+       return in_len;
+
+err_idr:
+       idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
+
+err_up:
+       up(&ib_uverbs_idr_mutex);
        ib_destroy_cq(cq);
 
 err:
@@ -692,6 +665,93 @@ err:
        return ret;
 }
 
+ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
+                         const char __user *buf, int in_len,
+                         int out_len)
+{
+       struct ib_uverbs_poll_cq       cmd;
+       struct ib_uverbs_poll_cq_resp *resp;
+       struct ib_cq                  *cq;
+       struct ib_wc                  *wc;
+       int                            ret = 0;
+       int                            i;
+       int                            rsize;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
+       if (!wc)
+               return -ENOMEM;
+
+       rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
+       resp = kmalloc(rsize, GFP_KERNEL);
+       if (!resp) {
+               ret = -ENOMEM;
+               goto out_wc;
+       }
+
+       down(&ib_uverbs_idr_mutex);
+       cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+       if (!cq || cq->uobject->context != file->ucontext) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       resp->count = ib_poll_cq(cq, cmd.ne, wc);
+
+       for (i = 0; i < resp->count; i++) {
+               resp->wc[i].wr_id          = wc[i].wr_id;
+               resp->wc[i].status         = wc[i].status;
+               resp->wc[i].opcode         = wc[i].opcode;
+               resp->wc[i].vendor_err     = wc[i].vendor_err;
+               resp->wc[i].byte_len       = wc[i].byte_len;
+               resp->wc[i].imm_data       = wc[i].imm_data;
+               resp->wc[i].qp_num         = wc[i].qp_num;
+               resp->wc[i].src_qp         = wc[i].src_qp;
+               resp->wc[i].wc_flags       = wc[i].wc_flags;
+               resp->wc[i].pkey_index     = wc[i].pkey_index;
+               resp->wc[i].slid           = wc[i].slid;
+               resp->wc[i].sl             = wc[i].sl;
+               resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
+               resp->wc[i].port_num       = wc[i].port_num;
+       }
+
+       if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
+               ret = -EFAULT;
+
+out:
+       up(&ib_uverbs_idr_mutex);
+       kfree(resp);
+
+out_wc:
+       kfree(wc);
+       return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
+                               const char __user *buf, int in_len,
+                               int out_len)
+{
+       struct ib_uverbs_req_notify_cq cmd;
+       struct ib_cq                  *cq;
+       int                            ret = -EINVAL;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       down(&ib_uverbs_idr_mutex);
+       cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+       if (cq && cq->uobject->context == file->ucontext) {
+               ib_req_notify_cq(cq, cmd.solicited_only ?
+                                       IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
+               ret = in_len;
+       }
+       up(&ib_uverbs_idr_mutex);
+
+       return ret;
+}
+
 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
                             const char __user *buf, int in_len,
                             int out_len)
@@ -700,6 +760,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
        struct ib_uverbs_destroy_cq_resp resp;
        struct ib_cq                    *cq;
        struct ib_ucq_object            *uobj;
+       struct ib_uverbs_event_file     *ev_file;
        struct ib_uverbs_event          *evt, *tmp;
        u64                              user_handle;
        int                              ret = -EINVAL;
@@ -716,7 +777,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
                goto out;
 
        user_handle = cq->uobject->user_handle;
-       uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+       uobj        = container_of(cq->uobject, struct ib_ucq_object, uobject);
+       ev_file     = cq->cq_context;
 
        ret = ib_destroy_cq(cq);
        if (ret)
@@ -728,19 +790,23 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
        list_del(&uobj->uobject.list);
        up(&file->mutex);
 
-       spin_lock_irq(&file->comp_file[0].lock);
-       list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
-               list_del(&evt->list);
-               kfree(evt);
+       if (ev_file) {
+               spin_lock_irq(&ev_file->lock);
+               list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
+                       list_del(&evt->list);
+                       kfree(evt);
+               }
+               spin_unlock_irq(&ev_file->lock);
+
+               kref_put(&ev_file->ref, ib_uverbs_release_event_file);
        }
-       spin_unlock_irq(&file->comp_file[0].lock);
 
-       spin_lock_irq(&file->async_file.lock);
+       spin_lock_irq(&file->async_file->lock);
        list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
                list_del(&evt->list);
                kfree(evt);
        }
-       spin_unlock_irq(&file->async_file.lock);
+       spin_unlock_irq(&file->async_file->lock);
 
        resp.comp_events_reported  = uobj->comp_events_reported;
        resp.async_events_reported = uobj->async_events_reported;
@@ -859,24 +925,22 @@ retry:
 
        resp.qp_handle = uobj->uobject.id;
 
-       down(&file->mutex);
-       list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
-       up(&file->mutex);
-
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp)) {
                ret = -EFAULT;
-               goto err_list;
+               goto err_idr;
        }
 
+       down(&file->mutex);
+       list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
+       up(&file->mutex);
+
        up(&ib_uverbs_idr_mutex);
 
        return in_len;
 
-err_list:
-       down(&file->mutex);
-       list_del(&uobj->uobject.list);
-       up(&file->mutex);
+err_idr:
+       idr_remove(&ib_uverbs_qp_idr, uobj->uobject.id);
 
 err_destroy:
        ib_destroy_qp(qp);
@@ -1005,12 +1069,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
        list_del(&uobj->uobject.list);
        up(&file->mutex);
 
-       spin_lock_irq(&file->async_file.lock);
+       spin_lock_irq(&file->async_file->lock);
        list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
                list_del(&evt->list);
                kfree(evt);
        }
-       spin_unlock_irq(&file->async_file.lock);
+       spin_unlock_irq(&file->async_file->lock);
 
        resp.events_reported = uobj->events_reported;
 
@@ -1026,6 +1090,468 @@ out:
        return ret ? ret : in_len;
 }
 
+ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+       struct ib_uverbs_post_send      cmd;
+       struct ib_uverbs_post_send_resp resp;
+       struct ib_uverbs_send_wr       *user_wr;
+       struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
+       struct ib_qp                   *qp;
+       int                             i, sg_ind;
+       ssize_t                         ret = -EINVAL;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
+           cmd.sge_count * sizeof (struct ib_uverbs_sge))
+               return -EINVAL;
+
+       if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
+               return -EINVAL;
+
+       user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
+       if (!user_wr)
+               return -ENOMEM;
+
+       down(&ib_uverbs_idr_mutex);
+
+       qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+       if (!qp || qp->uobject->context != file->ucontext)
+               goto out;
+
+       sg_ind = 0;
+       last = NULL;
+       for (i = 0; i < cmd.wr_count; ++i) {
+               if (copy_from_user(user_wr,
+                                  buf + sizeof cmd + i * cmd.wqe_size,
+                                  cmd.wqe_size)) {
+                       ret = -EFAULT;
+                       goto out;
+               }
+
+               if (user_wr->num_sge + sg_ind > cmd.sge_count) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+                              user_wr->num_sge * sizeof (struct ib_sge),
+                              GFP_KERNEL);
+               if (!next) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               if (!last)
+                       wr = next;
+               else
+                       last->next = next;
+               last = next;
+
+               next->next       = NULL;
+               next->wr_id      = user_wr->wr_id;
+               next->num_sge    = user_wr->num_sge;
+               next->opcode     = user_wr->opcode;
+               next->send_flags = user_wr->send_flags;
+               next->imm_data   = user_wr->imm_data;
+
+               if (qp->qp_type == IB_QPT_UD) {
+                       next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
+                                                 user_wr->wr.ud.ah);
+                       if (!next->wr.ud.ah) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       next->wr.ud.remote_qpn  = user_wr->wr.ud.remote_qpn;
+                       next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+               } else {
+                       switch (next->opcode) {
+                       case IB_WR_RDMA_WRITE:
+                       case IB_WR_RDMA_WRITE_WITH_IMM:
+                       case IB_WR_RDMA_READ:
+                               next->wr.rdma.remote_addr =
+                                       user_wr->wr.rdma.remote_addr;
+                               next->wr.rdma.rkey        =
+                                       user_wr->wr.rdma.rkey;
+                               break;
+                       case IB_WR_ATOMIC_CMP_AND_SWP:
+                       case IB_WR_ATOMIC_FETCH_AND_ADD:
+                               next->wr.atomic.remote_addr =
+                                       user_wr->wr.atomic.remote_addr;
+                               next->wr.atomic.compare_add =
+                                       user_wr->wr.atomic.compare_add;
+                               next->wr.atomic.swap = user_wr->wr.atomic.swap;
+                               next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+
+               if (next->num_sge) {
+                       next->sg_list = (void *) next +
+                               ALIGN(sizeof *next, sizeof (struct ib_sge));
+                       if (copy_from_user(next->sg_list,
+                                          buf + sizeof cmd +
+                                          cmd.wr_count * cmd.wqe_size +
+                                          sg_ind * sizeof (struct ib_sge),
+                                          next->num_sge * sizeof (struct ib_sge))) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
+                       sg_ind += next->num_sge;
+               } else
+                       next->sg_list = NULL;
+       }
+
+       resp.bad_wr = 0;
+       ret = qp->device->post_send(qp, wr, &bad_wr);
+       if (ret)
+               for (next = wr; next; next = next->next) {
+                       ++resp.bad_wr;
+                       if (next == bad_wr)
+                               break;
+               }
+
+       if (copy_to_user((void __user *) (unsigned long) cmd.response,
+                        &resp, sizeof resp))
+               ret = -EFAULT;
+
+out:
+       up(&ib_uverbs_idr_mutex);
+
+       while (wr) {
+               next = wr->next;
+               kfree(wr);
+               wr = next;
+       }
+
+       kfree(user_wr);
+
+       return ret ? ret : in_len;
+}
+
+static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
+                                                   int in_len,
+                                                   u32 wr_count,
+                                                   u32 sge_count,
+                                                   u32 wqe_size)
+{
+       struct ib_uverbs_recv_wr *user_wr;
+       struct ib_recv_wr        *wr = NULL, *last, *next;
+       int                       sg_ind;
+       int                       i;
+       int                       ret;
+
+       if (in_len < wqe_size * wr_count +
+           sge_count * sizeof (struct ib_uverbs_sge))
+               return ERR_PTR(-EINVAL);
+
+       if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
+               return ERR_PTR(-EINVAL);
+
+       user_wr = kmalloc(wqe_size, GFP_KERNEL);
+       if (!user_wr)
+               return ERR_PTR(-ENOMEM);
+
+       sg_ind = 0;
+       last = NULL;
+       for (i = 0; i < wr_count; ++i) {
+               if (copy_from_user(user_wr, buf + i * wqe_size,
+                                  wqe_size)) {
+                       ret = -EFAULT;
+                       goto err;
+               }
+
+               if (user_wr->num_sge + sg_ind > sge_count) {
+                       ret = -EINVAL;
+                       goto err;
+               }
+
+               next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+                              user_wr->num_sge * sizeof (struct ib_sge),
+                              GFP_KERNEL);
+               if (!next) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               if (!last)
+                       wr = next;
+               else
+                       last->next = next;
+               last = next;
+
+               next->next       = NULL;
+               next->wr_id      = user_wr->wr_id;
+               next->num_sge    = user_wr->num_sge;
+
+               if (next->num_sge) {
+                       next->sg_list = (void *) next +
+                               ALIGN(sizeof *next, sizeof (struct ib_sge));
+                       if (copy_from_user(next->sg_list,
+                                          buf + wr_count * wqe_size +
+                                          sg_ind * sizeof (struct ib_sge),
+                                          next->num_sge * sizeof (struct ib_sge))) {
+                               ret = -EFAULT;
+                               goto err;
+                       }
+                       sg_ind += next->num_sge;
+               } else
+                       next->sg_list = NULL;
+       }
+
+       kfree(user_wr);
+       return wr;
+
+err:
+       kfree(user_wr);
+
+       while (wr) {
+               next = wr->next;
+               kfree(wr);
+               wr = next;
+       }
+
+       return ERR_PTR(ret);
+}
+
+ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+       struct ib_uverbs_post_recv      cmd;
+       struct ib_uverbs_post_recv_resp resp;
+       struct ib_recv_wr              *wr, *next, *bad_wr;
+       struct ib_qp                   *qp;
+       ssize_t                         ret = -EINVAL;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+                                      in_len - sizeof cmd, cmd.wr_count,
+                                      cmd.sge_count, cmd.wqe_size);
+       if (IS_ERR(wr))
+               return PTR_ERR(wr);
+
+       down(&ib_uverbs_idr_mutex);
+
+       qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+       if (!qp || qp->uobject->context != file->ucontext)
+               goto out;
+
+       resp.bad_wr = 0;
+       ret = qp->device->post_recv(qp, wr, &bad_wr);
+       if (ret)
+               for (next = wr; next; next = next->next) {
+                       ++resp.bad_wr;
+                       if (next == bad_wr)
+                               break;
+               }
+
+
+       if (copy_to_user((void __user *) (unsigned long) cmd.response,
+                        &resp, sizeof resp))
+               ret = -EFAULT;
+
+out:
+       up(&ib_uverbs_idr_mutex);
+
+       while (wr) {
+               next = wr->next;
+               kfree(wr);
+               wr = next;
+       }
+
+       return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+       struct ib_uverbs_post_srq_recv      cmd;
+       struct ib_uverbs_post_srq_recv_resp resp;
+       struct ib_recv_wr                  *wr, *next, *bad_wr;
+       struct ib_srq                      *srq;
+       ssize_t                             ret = -EINVAL;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+                                      in_len - sizeof cmd, cmd.wr_count,
+                                      cmd.sge_count, cmd.wqe_size);
+       if (IS_ERR(wr))
+               return PTR_ERR(wr);
+
+       down(&ib_uverbs_idr_mutex);
+
+       srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
+       if (!srq || srq->uobject->context != file->ucontext)
+               goto out;
+
+       resp.bad_wr = 0;
+       ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+       if (ret)
+               for (next = wr; next; next = next->next) {
+                       ++resp.bad_wr;
+                       if (next == bad_wr)
+                               break;
+               }
+
+
+       if (copy_to_user((void __user *) (unsigned long) cmd.response,
+                        &resp, sizeof resp))
+               ret = -EFAULT;
+
+out:
+       up(&ib_uverbs_idr_mutex);
+
+       while (wr) {
+               next = wr->next;
+               kfree(wr);
+               wr = next;
+       }
+
+       return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
+                           const char __user *buf, int in_len,
+                           int out_len)
+{
+       struct ib_uverbs_create_ah       cmd;
+       struct ib_uverbs_create_ah_resp  resp;
+       struct ib_uobject               *uobj;
+       struct ib_pd                    *pd;
+       struct ib_ah                    *ah;
+       struct ib_ah_attr               attr;
+       int ret;
+
+       if (out_len < sizeof resp)
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+       if (!uobj)
+               return -ENOMEM;
+
+       down(&ib_uverbs_idr_mutex);
+
+       pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+       if (!pd || pd->uobject->context != file->ucontext) {
+               ret = -EINVAL;
+               goto err_up;
+       }
+
+       uobj->user_handle = cmd.user_handle;
+       uobj->context     = file->ucontext;
+
+       attr.dlid              = cmd.attr.dlid;
+       attr.sl                = cmd.attr.sl;
+       attr.src_path_bits     = cmd.attr.src_path_bits;
+       attr.static_rate       = cmd.attr.static_rate;
+       attr.port_num          = cmd.attr.port_num;
+       attr.grh.flow_label    = cmd.attr.grh.flow_label;
+       attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
+       attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
+       attr.grh.traffic_class = cmd.attr.grh.traffic_class;
+       memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+
+       ah = ib_create_ah(pd, &attr);
+       if (IS_ERR(ah)) {
+               ret = PTR_ERR(ah);
+               goto err_up;
+       }
+
+       ah->uobject = uobj;
+
+retry:
+       if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) {
+               ret = -ENOMEM;
+               goto err_destroy;
+       }
+
+       ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id);
+
+       if (ret == -EAGAIN)
+               goto retry;
+       if (ret)
+               goto err_destroy;
+
+       resp.ah_handle = uobj->id;
+
+       if (copy_to_user((void __user *) (unsigned long) cmd.response,
+                        &resp, sizeof resp)) {
+               ret = -EFAULT;
+               goto err_idr;
+       }
+
+       down(&file->mutex);
+       list_add_tail(&uobj->list, &file->ucontext->ah_list);
+       up(&file->mutex);
+
+       up(&ib_uverbs_idr_mutex);
+
+       return in_len;
+
+err_idr:
+       idr_remove(&ib_uverbs_ah_idr, uobj->id);
+
+err_destroy:
+       ib_destroy_ah(ah);
+
+err_up:
+       up(&ib_uverbs_idr_mutex);
+
+       kfree(uobj);
+       return ret;
+}
+
+ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len, int out_len)
+{
+       struct ib_uverbs_destroy_ah cmd;
+       struct ib_ah               *ah;
+       struct ib_uobject          *uobj;
+       int                         ret = -EINVAL;
+
+       if (copy_from_user(&cmd, buf, sizeof cmd))
+               return -EFAULT;
+
+       down(&ib_uverbs_idr_mutex);
+
+       ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle);
+       if (!ah || ah->uobject->context != file->ucontext)
+               goto out;
+
+       uobj = ah->uobject;
+
+       ret = ib_destroy_ah(ah);
+       if (ret)
+               goto out;
+
+       idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle);
+
+       down(&file->mutex);
+       list_del(&uobj->list);
+       up(&file->mutex);
+
+       kfree(uobj);
+
+out:
+       up(&ib_uverbs_idr_mutex);
+
+       return ret ? ret : in_len;
+}
+
 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
                               const char __user *buf, int in_len,
                               int out_len)
@@ -1148,24 +1674,22 @@ retry:
 
        resp.srq_handle = uobj->uobject.id;
 
-       down(&file->mutex);
-       list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
-       up(&file->mutex);
-
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp)) {
                ret = -EFAULT;
-               goto err_list;
+               goto err_idr;
        }
 
+       down(&file->mutex);
+       list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
+       up(&file->mutex);
+
        up(&ib_uverbs_idr_mutex);
 
        return in_len;
 
-err_list:
-       down(&file->mutex);
-       list_del(&uobj->uobject.list);
-       up(&file->mutex);
+err_idr:
+       idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id);
 
 err_destroy:
        ib_destroy_srq(srq);
@@ -1243,12 +1767,12 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
        list_del(&uobj->uobject.list);
        up(&file->mutex);
 
-       spin_lock_irq(&file->async_file.lock);
+       spin_lock_irq(&file->async_file->lock);
        list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
                list_del(&evt->list);
                kfree(evt);
        }
-       spin_unlock_irq(&file->async_file.lock);
+       spin_unlock_irq(&file->async_file->lock);
 
        resp.events_reported = uobj->events_reported;
 
index add45f7faa5b6d3267eee920bf9a8d2e02e935e0..251c752a7ae6ec1a815d644826d654121795284a 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -77,26 +78,31 @@ static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
                                     const char __user *buf, int in_len,
                                     int out_len) = {
-       [IB_USER_VERBS_CMD_QUERY_PARAMS]  = ib_uverbs_query_params,
-       [IB_USER_VERBS_CMD_GET_CONTEXT]   = ib_uverbs_get_context,
-       [IB_USER_VERBS_CMD_QUERY_DEVICE]  = ib_uverbs_query_device,
-       [IB_USER_VERBS_CMD_QUERY_PORT]    = ib_uverbs_query_port,
-       [IB_USER_VERBS_CMD_QUERY_GID]     = ib_uverbs_query_gid,
-       [IB_USER_VERBS_CMD_QUERY_PKEY]    = ib_uverbs_query_pkey,
-       [IB_USER_VERBS_CMD_ALLOC_PD]      = ib_uverbs_alloc_pd,
-       [IB_USER_VERBS_CMD_DEALLOC_PD]    = ib_uverbs_dealloc_pd,
-       [IB_USER_VERBS_CMD_REG_MR]        = ib_uverbs_reg_mr,
-       [IB_USER_VERBS_CMD_DEREG_MR]      = ib_uverbs_dereg_mr,
-       [IB_USER_VERBS_CMD_CREATE_CQ]     = ib_uverbs_create_cq,
-       [IB_USER_VERBS_CMD_DESTROY_CQ]    = ib_uverbs_destroy_cq,
-       [IB_USER_VERBS_CMD_CREATE_QP]     = ib_uverbs_create_qp,
-       [IB_USER_VERBS_CMD_MODIFY_QP]     = ib_uverbs_modify_qp,
-       [IB_USER_VERBS_CMD_DESTROY_QP]    = ib_uverbs_destroy_qp,
-       [IB_USER_VERBS_CMD_ATTACH_MCAST]  = ib_uverbs_attach_mcast,
-       [IB_USER_VERBS_CMD_DETACH_MCAST]  = ib_uverbs_detach_mcast,
-       [IB_USER_VERBS_CMD_CREATE_SRQ]    = ib_uverbs_create_srq,
-       [IB_USER_VERBS_CMD_MODIFY_SRQ]    = ib_uverbs_modify_srq,
-       [IB_USER_VERBS_CMD_DESTROY_SRQ]   = ib_uverbs_destroy_srq,
+       [IB_USER_VERBS_CMD_GET_CONTEXT]         = ib_uverbs_get_context,
+       [IB_USER_VERBS_CMD_QUERY_DEVICE]        = ib_uverbs_query_device,
+       [IB_USER_VERBS_CMD_QUERY_PORT]          = ib_uverbs_query_port,
+       [IB_USER_VERBS_CMD_ALLOC_PD]            = ib_uverbs_alloc_pd,
+       [IB_USER_VERBS_CMD_DEALLOC_PD]          = ib_uverbs_dealloc_pd,
+       [IB_USER_VERBS_CMD_REG_MR]              = ib_uverbs_reg_mr,
+       [IB_USER_VERBS_CMD_DEREG_MR]            = ib_uverbs_dereg_mr,
+       [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
+       [IB_USER_VERBS_CMD_CREATE_CQ]           = ib_uverbs_create_cq,
+       [IB_USER_VERBS_CMD_POLL_CQ]             = ib_uverbs_poll_cq,
+       [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]       = ib_uverbs_req_notify_cq,
+       [IB_USER_VERBS_CMD_DESTROY_CQ]          = ib_uverbs_destroy_cq,
+       [IB_USER_VERBS_CMD_CREATE_QP]           = ib_uverbs_create_qp,
+       [IB_USER_VERBS_CMD_MODIFY_QP]           = ib_uverbs_modify_qp,
+       [IB_USER_VERBS_CMD_DESTROY_QP]          = ib_uverbs_destroy_qp,
+       [IB_USER_VERBS_CMD_POST_SEND]           = ib_uverbs_post_send,
+       [IB_USER_VERBS_CMD_POST_RECV]           = ib_uverbs_post_recv,
+       [IB_USER_VERBS_CMD_POST_SRQ_RECV]       = ib_uverbs_post_srq_recv,
+       [IB_USER_VERBS_CMD_CREATE_AH]           = ib_uverbs_create_ah,
+       [IB_USER_VERBS_CMD_DESTROY_AH]          = ib_uverbs_destroy_ah,
+       [IB_USER_VERBS_CMD_ATTACH_MCAST]        = ib_uverbs_attach_mcast,
+       [IB_USER_VERBS_CMD_DETACH_MCAST]        = ib_uverbs_detach_mcast,
+       [IB_USER_VERBS_CMD_CREATE_SRQ]          = ib_uverbs_create_srq,
+       [IB_USER_VERBS_CMD_MODIFY_SRQ]          = ib_uverbs_modify_srq,
+       [IB_USER_VERBS_CMD_DESTROY_SRQ]         = ib_uverbs_destroy_srq,
 };
 
 static struct vfsmount *uverbs_event_mnt;
@@ -113,7 +119,13 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context)
 
        down(&ib_uverbs_idr_mutex);
 
-       /* XXX Free AHs */
+       list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
+               struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id);
+               idr_remove(&ib_uverbs_ah_idr, uobj->id);
+               ib_destroy_ah(ah);
+               list_del(&uobj->list);
+               kfree(uobj);
+       }
 
        list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
                struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
@@ -188,25 +200,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
 
        spin_lock_irq(&file->lock);
 
-       while (list_empty(&file->event_list) && file->fd >= 0) {
+       while (list_empty(&file->event_list)) {
                spin_unlock_irq(&file->lock);
 
                if (filp->f_flags & O_NONBLOCK)
                        return -EAGAIN;
 
                if (wait_event_interruptible(file->poll_wait,
-                                            !list_empty(&file->event_list) ||
-                                            file->fd < 0))
+                                            !list_empty(&file->event_list)))
                        return -ERESTARTSYS;
 
                spin_lock_irq(&file->lock);
        }
 
-       if (file->fd < 0) {
-               spin_unlock_irq(&file->lock);
-               return -ENODEV;
-       }
-
        event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
 
        if (file->is_async)
@@ -248,26 +254,19 @@ static unsigned int ib_uverbs_event_poll(struct file *filp,
        poll_wait(filp, &file->poll_wait, wait);
 
        spin_lock_irq(&file->lock);
-       if (file->fd < 0)
-               pollflags = POLLERR;
-       else if (!list_empty(&file->event_list))
+       if (!list_empty(&file->event_list))
                pollflags = POLLIN | POLLRDNORM;
        spin_unlock_irq(&file->lock);
 
        return pollflags;
 }
 
-static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
+void ib_uverbs_release_event_file(struct kref *ref)
 {
-       struct ib_uverbs_event *entry, *tmp;
+       struct ib_uverbs_event_file *file =
+               container_of(ref, struct ib_uverbs_event_file, ref);
 
-       spin_lock_irq(&file->lock);
-       if (file->fd != -1) {
-               file->fd = -1;
-               list_for_each_entry_safe(entry, tmp, &file->event_list, list)
-                       kfree(entry);
-       }
-       spin_unlock_irq(&file->lock);
+       kfree(file);
 }
 
 static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
@@ -280,21 +279,30 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
 static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
 {
        struct ib_uverbs_event_file *file = filp->private_data;
+       struct ib_uverbs_event *entry, *tmp;
+
+       spin_lock_irq(&file->lock);
+       file->file = NULL;
+       list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+               if (entry->counter)
+                       list_del(&entry->obj_list);
+               kfree(entry);
+       }
+       spin_unlock_irq(&file->lock);
 
-       ib_uverbs_event_release(file);
        ib_uverbs_event_fasync(-1, filp, 0);
-       kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+
+       if (file->is_async) {
+               ib_unregister_event_handler(&file->uverbs_file->event_handler);
+               kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+       }
+       kref_put(&file->ref, ib_uverbs_release_event_file);
 
        return 0;
 }
 
 static struct file_operations uverbs_event_fops = {
-       /*
-        * No .owner field since we artificially create event files,
-        * so there is no increment to the module reference count in
-        * the open path.  All event files come from a uverbs command
-        * file, which already takes a module reference, so this is OK.
-        */
+       .owner   = THIS_MODULE,
        .read    = ib_uverbs_event_read,
        .poll    = ib_uverbs_event_poll,
        .release = ib_uverbs_event_close,
@@ -303,27 +311,37 @@ static struct file_operations uverbs_event_fops = {
 
 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
 {
-       struct ib_uverbs_file  *file = cq_context;
-       struct ib_ucq_object   *uobj;
-       struct ib_uverbs_event *entry;
-       unsigned long           flags;
+       struct ib_uverbs_event_file    *file = cq_context;
+       struct ib_ucq_object           *uobj;
+       struct ib_uverbs_event         *entry;
+       unsigned long                   flags;
+
+       if (!file)
+               return;
+
+       spin_lock_irqsave(&file->lock, flags);
+       if (!file->file) {
+               spin_unlock_irqrestore(&file->lock, flags);
+               return;
+       }
 
        entry = kmalloc(sizeof *entry, GFP_ATOMIC);
-       if (!entry)
+       if (!entry) {
+               spin_unlock_irqrestore(&file->lock, flags);
                return;
+       }
 
        uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
 
        entry->desc.comp.cq_handle = cq->uobject->user_handle;
        entry->counter             = &uobj->comp_events_reported;
 
-       spin_lock_irqsave(&file->comp_file[0].lock, flags);
-       list_add_tail(&entry->list, &file->comp_file[0].event_list);
+       list_add_tail(&entry->list, &file->event_list);
        list_add_tail(&entry->obj_list, &uobj->comp_list);
-       spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
+       spin_unlock_irqrestore(&file->lock, flags);
 
-       wake_up_interruptible(&file->comp_file[0].poll_wait);
-       kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN);
+       wake_up_interruptible(&file->poll_wait);
+       kill_fasync(&file->async_queue, SIGIO, POLL_IN);
 }
 
 static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -334,32 +352,40 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
        struct ib_uverbs_event *entry;
        unsigned long flags;
 
+       spin_lock_irqsave(&file->async_file->lock, flags);
+       if (!file->async_file->file) {
+               spin_unlock_irqrestore(&file->async_file->lock, flags);
+               return;
+       }
+
        entry = kmalloc(sizeof *entry, GFP_ATOMIC);
-       if (!entry)
+       if (!entry) {
+               spin_unlock_irqrestore(&file->async_file->lock, flags);
                return;
+       }
 
        entry->desc.async.element    = element;
        entry->desc.async.event_type = event;
        entry->counter               = counter;
 
-       spin_lock_irqsave(&file->async_file.lock, flags);
-       list_add_tail(&entry->list, &file->async_file.event_list);
+       list_add_tail(&entry->list, &file->async_file->event_list);
        if (obj_list)
                list_add_tail(&entry->obj_list, obj_list);
-       spin_unlock_irqrestore(&file->async_file.lock, flags);
+       spin_unlock_irqrestore(&file->async_file->lock, flags);
 
-       wake_up_interruptible(&file->async_file.poll_wait);
-       kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN);
+       wake_up_interruptible(&file->async_file->poll_wait);
+       kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
 }
 
 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
 {
+       struct ib_uverbs_event_file *ev_file = context_ptr;
        struct ib_ucq_object *uobj;
 
        uobj = container_of(event->element.cq->uobject,
                            struct ib_ucq_object, uobject);
 
-       ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
+       ib_uverbs_async_handler(ev_file->uverbs_file, uobj->uobject.user_handle,
                                event->event, &uobj->async_list,
                                &uobj->async_events_reported);
                                
@@ -389,8 +415,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
                                &uobj->events_reported);
 }
 
-static void ib_uverbs_event_handler(struct ib_event_handler *handler,
-                                   struct ib_event *event)
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+                            struct ib_event *event)
 {
        struct ib_uverbs_file *file =
                container_of(handler, struct ib_uverbs_file, event_handler);
@@ -399,38 +425,90 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler,
                                NULL, NULL);
 }
 
-static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,
-                               struct ib_uverbs_file *uverbs_file)
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+                                       int is_async, int *fd)
 {
+       struct ib_uverbs_event_file *ev_file;
        struct file *filp;
+       int ret;
 
-       spin_lock_init(&file->lock);
-       INIT_LIST_HEAD(&file->event_list);
-       init_waitqueue_head(&file->poll_wait);
-       file->uverbs_file = uverbs_file;
-       file->async_queue = NULL;
-
-       file->fd = get_unused_fd();
-       if (file->fd < 0)
-               return file->fd;
+       ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
+       if (!ev_file)
+               return ERR_PTR(-ENOMEM);
+
+       kref_init(&ev_file->ref);
+       spin_lock_init(&ev_file->lock);
+       INIT_LIST_HEAD(&ev_file->event_list);
+       init_waitqueue_head(&ev_file->poll_wait);
+       ev_file->uverbs_file = uverbs_file;
+       ev_file->async_queue = NULL;
+       ev_file->is_async    = is_async;
+
+       *fd = get_unused_fd();
+       if (*fd < 0) {
+               ret = *fd;
+               goto err;
+       }
 
        filp = get_empty_filp();
        if (!filp) {
-               put_unused_fd(file->fd);
-               return -ENFILE;
+               ret = -ENFILE;
+               goto err_fd;
        }
 
-       filp->f_op         = &uverbs_event_fops;
+       ev_file->file      = filp;
+
+       /*
+        * fops_get() can't fail here, because we're coming from a
+        * system call on a uverbs file, which will already have a
+        * module reference.
+        */
+       filp->f_op         = fops_get(&uverbs_event_fops);
        filp->f_vfsmnt     = mntget(uverbs_event_mnt);
        filp->f_dentry     = dget(uverbs_event_mnt->mnt_root);
        filp->f_mapping    = filp->f_dentry->d_inode->i_mapping;
        filp->f_flags      = O_RDONLY;
        filp->f_mode       = FMODE_READ;
-       filp->private_data = file;
+       filp->private_data = ev_file;
 
-       fd_install(file->fd, filp);
+       return filp;
 
-       return 0;
+err_fd:
+       put_unused_fd(*fd);
+
+err:
+       kfree(ev_file);
+       return ERR_PTR(ret);
+}
+
+/*
+ * Look up a completion event file by FD.  If lookup is successful,
+ * takes a ref to the event file struct that it returns; if
+ * unsuccessful, returns NULL.
+ */
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
+{
+       struct ib_uverbs_event_file *ev_file = NULL;
+       struct file *filp;
+
+       filp = fget(fd);
+       if (!filp)
+               return NULL;
+
+       if (filp->f_op != &uverbs_event_fops)
+               goto out;
+
+       ev_file = filp->private_data;
+       if (ev_file->is_async) {
+               ev_file = NULL;
+               goto out;
+       }
+
+       kref_get(&ev_file->ref);
+
+out:
+       fput(filp);
+       return ev_file;
 }
 
 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
@@ -450,11 +528,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 
        if (hdr.command < 0                             ||
            hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
-           !uverbs_cmd_table[hdr.command])
+           !uverbs_cmd_table[hdr.command]              ||
+           !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
                return -EINVAL;
 
-       if (!file->ucontext                               &&
-           hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
+       if (!file->ucontext &&
            hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
                return -EINVAL;
 
@@ -477,82 +555,33 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
        struct ib_uverbs_device *dev =
                container_of(inode->i_cdev, struct ib_uverbs_device, dev);
        struct ib_uverbs_file *file;
-       int i = 0;
-       int ret;
 
        if (!try_module_get(dev->ib_dev->owner))
                return -ENODEV;
 
-       file = kmalloc(sizeof *file +
-                      (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
-                      GFP_KERNEL);
+       file = kmalloc(sizeof *file, GFP_KERNEL);
        if (!file) {
-               ret = -ENOMEM;
-               goto err;
+               module_put(dev->ib_dev->owner);
+               return -ENOMEM;
        }
 
-       file->device = dev;
+       file->device   = dev;
+       file->ucontext = NULL;
        kref_init(&file->ref);
        init_MUTEX(&file->mutex);
 
-       file->ucontext = NULL;
-
-       kref_get(&file->ref);
-       ret = ib_uverbs_event_init(&file->async_file, file);
-       if (ret)
-               goto err_kref;
-
-       file->async_file.is_async = 1;
-
-       for (i = 0; i < dev->num_comp; ++i) {
-               kref_get(&file->ref);
-               ret = ib_uverbs_event_init(&file->comp_file[i], file);
-               if (ret)
-                       goto err_async;
-               file->comp_file[i].is_async = 0;
-       }
-
-
        filp->private_data = file;
 
-       INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
-                             ib_uverbs_event_handler);
-       if (ib_register_event_handler(&file->event_handler))
-               goto err_async;
-
        return 0;
-
-err_async:
-       while (i--)
-               ib_uverbs_event_release(&file->comp_file[i]);
-
-       ib_uverbs_event_release(&file->async_file);
-
-err_kref:
-       /*
-        * One extra kref_put() because we took a reference before the
-        * event file creation that failed and got us here.
-        */
-       kref_put(&file->ref, ib_uverbs_release_file);
-       kref_put(&file->ref, ib_uverbs_release_file);
-
-err:
-       module_put(dev->ib_dev->owner);
-       return ret;
 }
 
 static int ib_uverbs_close(struct inode *inode, struct file *filp)
 {
        struct ib_uverbs_file *file = filp->private_data;
-       int i;
 
-       ib_unregister_event_handler(&file->event_handler);
-       ib_uverbs_event_release(&file->async_file);
        ib_dealloc_ucontext(file->ucontext);
 
-       for (i = 0; i < file->device->num_comp; ++i)
-               ib_uverbs_event_release(&file->comp_file[i]);
-
+       kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
        kref_put(&file->ref, ib_uverbs_release_file);
 
        return 0;
@@ -588,6 +617,15 @@ static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 }
 static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
 
+static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf)
+{
+       struct ib_uverbs_device *dev =
+               container_of(class_dev, struct ib_uverbs_device, class_dev);
+
+       return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
+}
+static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
+
 static void ib_uverbs_release_class_dev(struct class_device *class_dev)
 {
        struct ib_uverbs_device *dev =
@@ -631,8 +669,8 @@ static void ib_uverbs_add_one(struct ib_device *device)
        set_bit(uverbs_dev->devnum, dev_map);
        spin_unlock(&map_lock);
 
-       uverbs_dev->ib_dev   = device;
-       uverbs_dev->num_comp = 1;
+       uverbs_dev->ib_dev           = device;
+       uverbs_dev->num_comp_vectors = 1;
 
        if (device->mmap)
                cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops);
@@ -652,6 +690,8 @@ static void ib_uverbs_add_one(struct ib_device *device)
 
        if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev))
                goto err_class;
+       if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_abi_version))
+               goto err_class;
 
        ib_set_client_data(device, &uverbs_client, uverbs_dev);
 
index 5081d903e5617d00e602da1a89731a555984598a..72d3ef786db50acf3194612b43bbe3910b5cc955 100644 (file)
@@ -523,16 +523,22 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
 
 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
 {
-       return qp->device->attach_mcast ?
-               qp->device->attach_mcast(qp, gid, lid) :
-               -ENOSYS;
+       if (!qp->device->attach_mcast)
+               return -ENOSYS;
+       if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+               return -EINVAL;
+
+       return qp->device->attach_mcast(qp, gid, lid);
 }
 EXPORT_SYMBOL(ib_attach_mcast);
 
 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
 {
-       return qp->device->detach_mcast ?
-               qp->device->detach_mcast(qp, gid, lid) :
-               -ENOSYS;
+       if (!qp->device->detach_mcast)
+               return -ENOSYS;
+       if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+               return -EINVAL;
+
+       return qp->device->detach_mcast(qp, gid, lid);
 }
 EXPORT_SYMBOL(ib_detach_mcast);
index f6a8ac026557db5639d446bf1188cf9d73b22739..1bd7dc8f778c47c0372946cc547e90e391b352e5 100644 (file)
@@ -933,9 +933,9 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
                goto out;
 
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
-       dev_lim->max_srq_sz = 1 << field;
+       dev_lim->max_srq_sz = (1 << field) - 1;
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
-       dev_lim->max_qp_sz = 1 << field;
+       dev_lim->max_qp_sz = (1 << field) - 1;
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
        dev_lim->reserved_qps = 1 << (field & 0xf);
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
@@ -1045,6 +1045,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
                  dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
        mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
                  dev_lim->max_pds, dev_lim->reserved_mgms);
+       mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
+                 dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz);
 
        mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
 
index 7bff5a8425f4e5c5fbd7eb634a78b13eb8a3b88f..f106bac0f92555b38bb3043d9fcb773a124a2ae2 100644 (file)
@@ -83,6 +83,8 @@ enum {
        /* Arbel FW gives us these, but we need them for Tavor */
        MTHCA_MPT_ENTRY_SIZE  =  0x40,
        MTHCA_MTT_SEG_SIZE    =  0x40,
+
+       MTHCA_QP_PER_MGM      = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
 };
 
 enum {
@@ -128,12 +130,16 @@ struct mthca_limits {
        int      num_uars;
        int      max_sg;
        int      num_qps;
+       int      max_wqes;
+       int      max_qp_init_rdma;
        int      reserved_qps;
        int      num_srqs;
+       int      max_srq_wqes;
        int      reserved_srqs;
        int      num_eecs;
        int      reserved_eecs;
        int      num_cqs;
+       int      max_cqes;
        int      reserved_cqs;
        int      num_eqs;
        int      reserved_eqs;
@@ -148,6 +154,7 @@ struct mthca_limits {
        int      reserved_mcgs;
        int      num_pds;
        int      reserved_pds;
+       u32      flags;
        u8       port_width_cap;
 };
 
@@ -447,6 +454,8 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
 int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
                    struct ib_srq_attr *attr, struct mthca_srq *srq);
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+                    enum ib_srq_attr_mask attr_mask);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
                     enum ib_event_type event_type);
 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
index 8dfafda5ed241c9c4269877704981f21f020673d..e5a047a6dbeb85fdd0a9c65dd37aad69ae04030c 100644 (file)
@@ -83,7 +83,8 @@ enum {
        MTHCA_EVENT_TYPE_PATH_MIG           = 0x01,
        MTHCA_EVENT_TYPE_COMM_EST           = 0x02,
        MTHCA_EVENT_TYPE_SQ_DRAINED         = 0x03,
-       MTHCA_EVENT_TYPE_SRQ_LAST_WQE       = 0x13,
+       MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE    = 0x13,
+       MTHCA_EVENT_TYPE_SRQ_LIMIT          = 0x14,
        MTHCA_EVENT_TYPE_CQ_ERROR           = 0x04,
        MTHCA_EVENT_TYPE_WQ_CATAS_ERROR     = 0x05,
        MTHCA_EVENT_TYPE_EEC_CATAS_ERROR    = 0x06,
@@ -110,8 +111,9 @@ enum {
                                (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
                                (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE)        | \
                                (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
-#define MTHCA_SRQ_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
-                               (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE)
+#define MTHCA_SRQ_EVENT_MASK   ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
+                               (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
+                               (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
 #define MTHCA_CMD_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_CMD)
 
 #define MTHCA_EQ_DB_INC_CI     (1 << 24)
@@ -141,6 +143,9 @@ struct mthca_eqe {
                struct {
                        __be32 qpn;
                } __attribute__((packed)) qp;
+               struct {
+                       __be32 srqn;
+               } __attribute__((packed)) srq;
                struct {
                        __be32 cqn;
                        u32    reserved1;
@@ -305,6 +310,16 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
                                       IB_EVENT_SQ_DRAINED);
                        break;
 
+               case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
+                       mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+                                      IB_EVENT_QP_LAST_WQE_REACHED);
+                       break;
+
+               case MTHCA_EVENT_TYPE_SRQ_LIMIT:
+                       mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
+                                       IB_EVENT_SRQ_LIMIT_REACHED);
+                       break;
+
                case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
                        mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
                                       IB_EVENT_QP_FATAL);
index 23a3f56c78995ee8a21078544da3ab9b1bbda22a..883d1e5a79bc4270892b9a4cfdb5a83c3d841cd8 100644 (file)
@@ -162,9 +162,18 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
        mdev->limits.pkey_table_len     = dev_lim->max_pkeys;
        mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
        mdev->limits.max_sg             = dev_lim->max_sg;
+       mdev->limits.max_wqes           = dev_lim->max_qp_sz;
+       mdev->limits.max_qp_init_rdma   = dev_lim->max_requester_per_qp;
        mdev->limits.reserved_qps       = dev_lim->reserved_qps;
+       mdev->limits.max_srq_wqes       = dev_lim->max_srq_sz;
        mdev->limits.reserved_srqs      = dev_lim->reserved_srqs;
        mdev->limits.reserved_eecs      = dev_lim->reserved_eecs;
+       /*
+        * Subtract 1 from the limit because we need to allocate a
+        * spare CQE so the HCA HW can tell the difference between an
+        * empty CQ and a full CQ.
+        */
+       mdev->limits.max_cqes           = dev_lim->max_cq_sz - 1;
        mdev->limits.reserved_cqs       = dev_lim->reserved_cqs;
        mdev->limits.reserved_eqs       = dev_lim->reserved_eqs;
        mdev->limits.reserved_mtts      = dev_lim->reserved_mtts;
@@ -172,6 +181,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
        mdev->limits.reserved_uars      = dev_lim->reserved_uars;
        mdev->limits.reserved_pds       = dev_lim->reserved_pds;
        mdev->limits.port_width_cap     = dev_lim->max_port_width;
+       mdev->limits.flags              = dev_lim->flags;
 
        /* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
           May be doable since hardware supports it for SRQ.
@@ -1186,6 +1196,7 @@ MODULE_DEVICE_TABLE(pci, mthca_pci_table);
 
 static struct pci_driver mthca_driver = {
        .name           = DRV_NAME,
+       .owner          = THIS_MODULE,
        .id_table       = mthca_pci_table,
        .probe          = mthca_init_one,
        .remove         = __devexit_p(mthca_remove_one)
index a2707605f4c8ba3319644452f2deb48004e001e9..b47ea7daf0886cdf2eeb41e0d3c9824ef3b5e1b6 100644 (file)
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 
-enum {
-       MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
-};
-
 struct mthca_mgm {
        __be32 next_gid_index;
        u32    reserved[3];
@@ -189,7 +185,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        }
 
        for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
-               if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
+               if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
+                       mthca_dbg(dev, "QP %06x already a member of MGM\n", 
+                                 ibqp->qp_num);
+                       err = 0;
+                       goto out;
+               } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
                        mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
                        break;
                }
index 7bd7a4bec7b433f2e533602681a75d7e06553940..d63b1a14710060527031077b516e2d4377ff86c0 100644 (file)
@@ -487,7 +487,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
        }
 }
 
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db)
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+                  u32 qn, __be32 **db)
 {
        int group;
        int start, end, dir;
index bafa51544aa39db8db19cf2626361550fec7ea89..29c01a4b26568a46b912f39bcb9d55ba35ef29c7 100644 (file)
@@ -173,7 +173,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
 
 int mthca_init_db_tab(struct mthca_dev *dev);
 void mthca_cleanup_db_tab(struct mthca_dev *dev);
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db);
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+                  u32 qn, __be32 **db);
 void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
 
 #endif /* MTHCA_MEMFREE_H */
index 3f5319a46577022378e1142ff2dbaa13bd42a71a..9e911a1ea4157310645e011fc514be8fd433b657 100644 (file)
@@ -37,6 +37,7 @@
  */
 
 #include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
 #include <linux/mm.h>
 
 #include "mthca_dev.h"
@@ -90,15 +91,26 @@ static int mthca_query_device(struct ib_device *ibdev,
 
        props->max_mr_size         = ~0ull;
        props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
-       props->max_qp_wr           = 0xffff;
+       props->max_qp_wr           = mdev->limits.max_wqes;
        props->max_sge             = mdev->limits.max_sg;
        props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
-       props->max_cqe             = 0xffff;
+       props->max_cqe             = mdev->limits.max_cqes;
        props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
        props->max_pd              = mdev->limits.num_pds - mdev->limits.reserved_pds;
        props->max_qp_rd_atom      = 1 << mdev->qp_table.rdb_shift;
-       props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift;
+       props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
+       props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
+       props->max_srq             = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
+       props->max_srq_wr          = mdev->limits.max_srq_wqes;
+       props->max_srq_sge         = mdev->limits.max_sg;
        props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
+       props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? 
+                                       IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+       props->max_pkeys           = mdev->limits.pkey_table_len;
+       props->max_mcast_grp       = mdev->limits.num_mgms + mdev->limits.num_amgms;
+       props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
+       props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 
+                                          props->max_mcast_grp;
 
        err = 0;
  out:
@@ -150,9 +162,13 @@ static int mthca_query_port(struct ib_device *ibdev,
        props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
        props->max_msg_sz        = 0x80000000;
        props->pkey_tbl_len      = to_mdev(ibdev)->limits.pkey_table_len;
+       props->bad_pkey_cntr     = be16_to_cpup((__be16 *) (out_mad->data + 46));
        props->qkey_viol_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 48));
        props->active_width      = out_mad->data[31] & 0xf;
        props->active_speed      = out_mad->data[35] >> 4;
+       props->max_mtu           = out_mad->data[41] & 0xf;
+       props->active_mtu        = out_mad->data[36] >> 4;
+       props->subnet_timeout    = out_mad->data[51] & 0x1f;
 
  out:
        kfree(in_mad);
@@ -634,6 +650,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
        int nent;
        int err;
 
+       if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
+               return ERR_PTR(-EINVAL);
+
        if (context) {
                if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
                        return ERR_PTR(-EFAULT);
@@ -1058,6 +1077,26 @@ int mthca_register_device(struct mthca_dev *dev)
        strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
        dev->ib_dev.owner                = THIS_MODULE;
 
+       dev->ib_dev.uverbs_abi_ver       = MTHCA_UVERBS_ABI_VERSION;
+       dev->ib_dev.uverbs_cmd_mask      =
+               (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
+               (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
+               (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
+               (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
+               (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
+               (1ull << IB_USER_VERBS_CMD_REG_MR)              |
+               (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
+               (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+               (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
+               (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
+               (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
+               (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
+               (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
+               (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
+               (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
+               (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
+               (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
+               (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
        dev->ib_dev.node_type            = IB_NODE_CA;
        dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
        dev->ib_dev.dma_device           = &dev->pdev->dev;
@@ -1077,6 +1116,7 @@ int mthca_register_device(struct mthca_dev *dev)
 
        if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
                dev->ib_dev.create_srq           = mthca_create_srq;
+               dev->ib_dev.modify_srq           = mthca_modify_srq;
                dev->ib_dev.destroy_srq          = mthca_destroy_srq;
 
                if (mthca_is_memfree(dev))
index 5fa00669f9b8c43d59e9dd70fe1469d707ceb600..2ee0a2b0fd4bea99c603ad3fb9871e0d090e787f 100644 (file)
@@ -1112,8 +1112,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
                             struct mthca_qp *qp)
 {
        /* Sanity check QP size before proceeding */
-       if (cap->max_send_wr  > 65536 || cap->max_recv_wr  > 65536 ||
-           cap->max_send_sge > 64    || cap->max_recv_sge > 64)
+       if (cap->max_send_wr  > dev->limits.max_wqes ||
+           cap->max_recv_wr  > dev->limits.max_wqes ||
+           cap->max_send_sge > dev->limits.max_sg   ||
+           cap->max_recv_sge > dev->limits.max_sg)
                return -EINVAL;
 
        if (mthca_is_memfree(dev)) {
index 18998d48c53ec39fc3c21fba93a9903b71853f79..64f70aa1b3c09a704cc8885ca622445ed099f1c1 100644 (file)
@@ -186,7 +186,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
        int err;
 
        /* Sanity check SRQ size before proceeding */
-       if (attr->max_wr > 16 << 20 || attr->max_sge > 64)
+       if (attr->max_wr  > dev->limits.max_srq_wqes ||
+           attr->max_sge > dev->limits.max_sg)
                return -EINVAL;
 
        srq->max      = attr->max_wr;
@@ -332,6 +333,29 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
        mthca_free_mailbox(dev, mailbox);
 }
 
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+                    enum ib_srq_attr_mask attr_mask)
+{      
+       struct mthca_dev *dev = to_mdev(ibsrq->device);
+       struct mthca_srq *srq = to_msrq(ibsrq);
+       int ret;
+       u8 status;
+
+       /* We don't support resizing SRQs (yet?) */
+       if (attr_mask & IB_SRQ_MAX_WR)
+               return -EINVAL;
+
+       if (attr_mask & IB_SRQ_LIMIT) {
+               ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
+               if (ret)
+                       return ret;
+               if (status)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
                     enum ib_event_type event_type)
 {
@@ -354,7 +378,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 
        event.device      = &dev->ib_dev;
        event.event       = event_type;
-       event.element.srq  = &srq->ibsrq;
+       event.element.srq = &srq->ibsrq;
        srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
 
 out:
@@ -415,6 +439,14 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 
                wqe       = get_wqe(srq, ind);
                next_ind  = *wqe_to_link(wqe);
+
+               if (next_ind < 0) {
+                       mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+                       err = -ENOMEM;
+                       *bad_wr = wr;
+                       break;
+               }
+
                prev_wqe  = srq->last;
                srq->last = wqe;
 
@@ -506,6 +538,13 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
                wqe       = get_wqe(srq, ind);
                next_ind  = *wqe_to_link(wqe);
 
+               if (next_ind < 0) {
+                       mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+                       err = -ENOMEM;
+                       *bad_wr = wr;
+                       break;
+               }
+
                ((struct mthca_next_seg *) wqe)->nda_op =
                        cpu_to_be32((next_ind << srq->wqe_shift) | 1);
                ((struct mthca_next_seg *) wqe)->ee_nds = 0;
index 41613ec8a04e63c92fdfdbfe5d82e897c63c6a8b..bb015c6494c4e0f37d1bf40f963c3172ca3bac49 100644 (file)
 
 #include <linux/types.h>
 
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MTHCA_UVERBS_ABI_VERSION       1
+
 /*
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
index 4ea1c1ca85bc965fa330d6b1a9e24161457fa817..6b14bd1c60a034f6172a06ef3cfe0f81703f7994 100644 (file)
@@ -277,7 +277,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
 int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
                       union ib_gid *mgid);
 
-int ipoib_qp_create(struct net_device *dev);
+int ipoib_init_qp(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
 void ipoib_transport_dev_cleanup(struct net_device *dev);
 
index f7440096b5ed0355a41252c50c93f5290b1810df..02d0e000657804da8ca276d30c0a1f8a23c3e7ac 100644 (file)
@@ -387,9 +387,9 @@ int ipoib_ib_dev_open(struct net_device *dev)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int ret;
 
-       ret = ipoib_qp_create(dev);
+       ret = ipoib_init_qp(dev);
        if (ret) {
-               ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret);
+               ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
                return -1;
        }
 
index 6c5bf07489f4a47bb4dab5ee527ad09c9e50c87c..ee303859b044716dbec66813356377ba674a041c 100644 (file)
@@ -637,8 +637,11 @@ static void ipoib_timeout(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-       ipoib_warn(priv, "transmit timeout: latency %ld\n",
-                  jiffies - dev->trans_start);
+       ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
+                  jiffies_to_msecs(jiffies - dev->trans_start));
+       ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
+                  netif_queue_stopped(dev),
+                  priv->tx_head, priv->tx_tail);
        /* XXX reset QP, etc. */
 }
 
index 79f59d0563edc04b04a1e9d211ce6f6bbfa4c538..b5902a7ec240a59f61d897bd9980511a58a4b417 100644 (file)
@@ -92,7 +92,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
        return ret;
 }
 
-int ipoib_qp_create(struct net_device *dev)
+int ipoib_init_qp(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int ret;
@@ -149,10 +149,11 @@ int ipoib_qp_create(struct net_device *dev)
        return 0;
 
 out_fail:
-       ib_destroy_qp(priv->qp);
-       priv->qp = NULL;
+       qp_attr.qp_state = IB_QPS_RESET;
+       if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+               ipoib_warn(priv, "Failed to modify QP to RESET state\n");
 
-       return -EINVAL;
+       return ret;
 }
 
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
index 5308683c8c41a792f4e02c353056f2b69fab974d..0a9fcd59eb430966031b7d2aa4bde4b0a94ffb1f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -109,7 +109,6 @@ struct ib_cm_id;
 
 struct ib_cm_req_event_param {
        struct ib_cm_id         *listen_id;
-       struct ib_device        *device;
        u8                      port;
 
        struct ib_sa_path_rec   *primary_path;
@@ -220,7 +219,6 @@ struct ib_cm_apr_event_param {
 
 struct ib_cm_sidr_req_event_param {
        struct ib_cm_id         *listen_id;
-       struct ib_device        *device;
        u8                      port;
        u16                     pkey;
 };
@@ -284,6 +282,7 @@ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
 struct ib_cm_id {
        ib_cm_handler           cm_handler;
        void                    *context;
+       struct ib_device        *device;
        __be64                  service_id;
        __be64                  service_mask;
        enum ib_cm_state        state;          /* internal CM/debug use */
@@ -295,6 +294,8 @@ struct ib_cm_id {
 
 /**
  * ib_create_cm_id - Allocate a communication identifier.
+ * @device: Device associated with the cm_id.  All related communication will
+ * be associated with the specified device.
  * @cm_handler: Callback invoked to notify the user of CM events.
  * @context: User specified context associated with the communication
  *   identifier.
@@ -302,7 +303,8 @@ struct ib_cm_id {
  * Communication identifiers are used to track connection states, service
  * ID resolution requests, and listen requests.
  */
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+                                ib_cm_handler cm_handler,
                                 void *context);
 
 /**
index e4d1654276ad95b59d6a92f6db365b2836f18117..3037588b84644489b94846f0a85270b5ff16f766 100644 (file)
@@ -38,7 +38,7 @@
 
 #include <linux/types.h>
 
-#define IB_USER_CM_ABI_VERSION 2
+#define IB_USER_CM_ABI_VERSION 3
 
 enum {
        IB_USER_CM_CMD_CREATE_ID,
@@ -299,8 +299,6 @@ struct ib_ucm_event_get {
 };
 
 struct ib_ucm_req_event_resp {
-       /* device */
-       /* port */
        struct ib_ucm_path_rec primary_path;
        struct ib_ucm_path_rec alternate_path;
        __be64                 remote_ca_guid;
@@ -316,6 +314,7 @@ struct ib_ucm_req_event_resp {
        __u8  retry_count;
        __u8  rnr_retry_count;
        __u8  srq;
+       __u8  port;
 };
 
 struct ib_ucm_rep_event_resp {
@@ -353,10 +352,9 @@ struct ib_ucm_apr_event_resp {
 };
 
 struct ib_ucm_sidr_req_event_resp {
-       /* device */
-       /* port */
        __u16 pkey;
-       __u8  reserved[2];
+       __u8  port;
+       __u8  reserved;
 };
 
 struct ib_ucm_sidr_rep_event_resp {
index fd85725391a4e80a1267ffbf57190c0e356ace02..072f3a2edacece06258ed207dd360b78226f2367 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IB_USER_VERBS_ABI_VERSION      2
+#define IB_USER_VERBS_ABI_VERSION      3
 
 enum {
-       IB_USER_VERBS_CMD_QUERY_PARAMS,
        IB_USER_VERBS_CMD_GET_CONTEXT,
        IB_USER_VERBS_CMD_QUERY_DEVICE,
        IB_USER_VERBS_CMD_QUERY_PORT,
-       IB_USER_VERBS_CMD_QUERY_GID,
-       IB_USER_VERBS_CMD_QUERY_PKEY,
        IB_USER_VERBS_CMD_ALLOC_PD,
        IB_USER_VERBS_CMD_DEALLOC_PD,
        IB_USER_VERBS_CMD_CREATE_AH,
@@ -65,6 +63,7 @@ enum {
        IB_USER_VERBS_CMD_ALLOC_MW,
        IB_USER_VERBS_CMD_BIND_MW,
        IB_USER_VERBS_CMD_DEALLOC_MW,
+       IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
        IB_USER_VERBS_CMD_CREATE_CQ,
        IB_USER_VERBS_CMD_RESIZE_CQ,
        IB_USER_VERBS_CMD_DESTROY_CQ,
@@ -90,8 +89,11 @@ enum {
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
  * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
+ * Specifically:
+ *  - Do not use pointer types -- pass pointers in __u64 instead.
+ *  - Make sure that any structure larger than 4 bytes is padded to a
+ *    multiple of 8 bytes.  Otherwise the structure size will be
+ *    different between 32-bit and 64-bit architectures.
  */
 
 struct ib_uverbs_async_event_desc {
@@ -118,27 +120,14 @@ struct ib_uverbs_cmd_hdr {
        __u16 out_words;
 };
 
-/*
- * No driver_data for "query params" command, since this is intended
- * to be a core function with no possible device dependence.
- */
-struct ib_uverbs_query_params {
-       __u64 response;
-};
-
-struct ib_uverbs_query_params_resp {
-       __u32 num_cq_events;
-};
-
 struct ib_uverbs_get_context {
        __u64 response;
-       __u64 cq_fd_tab;
        __u64 driver_data[0];
 };
 
 struct ib_uverbs_get_context_resp {
        __u32 async_fd;
-       __u32 reserved;
+       __u32 num_comp_vectors;
 };
 
 struct ib_uverbs_query_device {
@@ -220,31 +209,6 @@ struct ib_uverbs_query_port_resp {
        __u8  reserved[3];
 };
 
-struct ib_uverbs_query_gid {
-       __u64 response;
-       __u8  port_num;
-       __u8  index;
-       __u8  reserved[6];
-       __u64 driver_data[0];
-};
-
-struct ib_uverbs_query_gid_resp {
-       __u8  gid[16];
-};
-
-struct ib_uverbs_query_pkey {
-       __u64 response;
-       __u8  port_num;
-       __u8  index;
-       __u8  reserved[6];
-       __u64 driver_data[0];
-};
-
-struct ib_uverbs_query_pkey_resp {
-       __u16 pkey;
-       __u16 reserved;
-};
-
 struct ib_uverbs_alloc_pd {
        __u64 response;
        __u64 driver_data[0];
@@ -278,11 +242,21 @@ struct ib_uverbs_dereg_mr {
        __u32 mr_handle;
 };
 
+struct ib_uverbs_create_comp_channel {
+       __u64 response;
+};
+
+struct ib_uverbs_create_comp_channel_resp {
+       __u32 fd;
+};
+
 struct ib_uverbs_create_cq {
        __u64 response;
        __u64 user_handle;
        __u32 cqe;
-       __u32 event_handler;
+       __u32 comp_vector;
+       __s32 comp_channel;
+       __u32 reserved;
        __u64 driver_data[0];
 };
 
@@ -291,6 +265,41 @@ struct ib_uverbs_create_cq_resp {
        __u32 cqe;
 };
 
+struct ib_uverbs_poll_cq {
+       __u64 response;
+       __u32 cq_handle;
+       __u32 ne;
+};
+
+struct ib_uverbs_wc {
+       __u64 wr_id;
+       __u32 status;
+       __u32 opcode;
+       __u32 vendor_err;
+       __u32 byte_len;
+       __u32 imm_data;
+       __u32 qp_num;
+       __u32 src_qp;
+       __u32 wc_flags;
+       __u16 pkey_index;
+       __u16 slid;
+       __u8 sl;
+       __u8 dlid_path_bits;
+       __u8 port_num;
+       __u8 reserved;
+};
+
+struct ib_uverbs_poll_cq_resp {
+       __u32 count;
+       __u32 reserved;
+       struct ib_uverbs_wc wc[0];
+};
+
+struct ib_uverbs_req_notify_cq {
+       __u32 cq_handle;
+       __u32 solicited_only;
+};
+
 struct ib_uverbs_destroy_cq {
        __u64 response;
        __u32 cq_handle;
@@ -388,6 +397,127 @@ struct ib_uverbs_destroy_qp_resp {
        __u32 events_reported;
 };
 
+/*
+ * The ib_uverbs_sge structure isn't used anywhere, since we assume
+ * the ib_sge structure is packed the same way on 32-bit and 64-bit
+ * architectures in both kernel and user space.  It's just here to
+ * document the ABI.
+ */
+struct ib_uverbs_sge {
+       __u64 addr;
+       __u32 length;
+       __u32 lkey;
+};
+
+struct ib_uverbs_send_wr {
+       __u64 wr_id; 
+       __u32 num_sge;
+       __u32 opcode;
+       __u32 send_flags;
+       __u32 imm_data;
+       union {
+               struct {
+                       __u64 remote_addr;
+                       __u32 rkey;
+                       __u32 reserved;
+               } rdma;
+               struct {
+                       __u64 remote_addr;
+                       __u64 compare_add;
+                       __u64 swap;
+                       __u32 rkey;
+                       __u32 reserved;
+               } atomic;
+               struct {
+                       __u32 ah;
+                       __u32 remote_qpn;
+                       __u32 remote_qkey;
+                       __u32 reserved;
+               } ud;
+       } wr;
+};
+
+struct ib_uverbs_post_send {
+       __u64 response;
+       __u32 qp_handle;
+       __u32 wr_count;
+       __u32 sge_count;
+       __u32 wqe_size;
+       struct ib_uverbs_send_wr send_wr[0];
+};
+
+struct ib_uverbs_post_send_resp {
+       __u32 bad_wr;
+};
+
+struct ib_uverbs_recv_wr {
+       __u64 wr_id;
+       __u32 num_sge;
+       __u32 reserved;
+};
+
+struct ib_uverbs_post_recv {
+       __u64 response;
+       __u32 qp_handle;
+       __u32 wr_count;
+       __u32 sge_count;
+       __u32 wqe_size;
+       struct ib_uverbs_recv_wr recv_wr[0];
+};
+
+struct ib_uverbs_post_recv_resp {
+       __u32 bad_wr;
+};
+
+struct ib_uverbs_post_srq_recv {
+       __u64 response;
+       __u32 srq_handle;
+       __u32 wr_count;
+       __u32 sge_count;
+       __u32 wqe_size;
+       struct ib_uverbs_recv_wr recv[0];
+};
+
+struct ib_uverbs_post_srq_recv_resp {
+       __u32 bad_wr;
+};
+
+struct ib_uverbs_global_route {
+       __u8  dgid[16];
+       __u32 flow_label;    
+       __u8  sgid_index;
+       __u8  hop_limit;
+       __u8  traffic_class;
+       __u8  reserved;
+};
+
+struct ib_uverbs_ah_attr {
+       struct ib_uverbs_global_route grh;
+       __u16 dlid;
+       __u8  sl;
+       __u8  src_path_bits;
+       __u8  static_rate;
+       __u8  is_global;
+       __u8  port_num;
+       __u8  reserved;
+};
+
+struct ib_uverbs_create_ah {
+       __u64 response;
+       __u64 user_handle;
+       __u32 pd_handle;
+       __u32 reserved;
+       struct ib_uverbs_ah_attr attr;
+};
+
+struct ib_uverbs_create_ah_resp {
+       __u32 ah_handle;
+};
+
+struct ib_uverbs_destroy_ah {
+       __u32 ah_handle;
+};
+
 struct ib_uverbs_attach_mcast {
        __u8  gid[16];
        __u32 qp_handle;
index e6f4c9e55df7b21d7ff0ccc6408b346594d314eb..a5a963cb56762568ef3fa0ac3998809ee8d5e1a7 100644 (file)
@@ -951,6 +951,9 @@ struct ib_device {
                IB_DEV_UNREGISTERED
        }                            reg_state;
 
+       u64                          uverbs_cmd_mask;
+       int                          uverbs_abi_ver;
+
        u8                           node_type;
        u8                           phys_port_cnt;
 };