summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/eq.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/eq.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c299
1 files changed, 264 insertions, 35 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 8ffcc8808e50..ea5d8d37a75c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -54,6 +54,7 @@ enum {
MLX5_NUM_SPARE_EQE = 0x80,
MLX5_NUM_ASYNC_EQE = 0x100,
MLX5_NUM_CMD_EQE = 32,
+ MLX5_NUM_PF_DRAIN = 64,
};
enum {
@@ -153,6 +154,8 @@ static const char *eqe_type_str(u8 type)
return "MLX5_EVENT_TYPE_PAGE_REQUEST";
case MLX5_EVENT_TYPE_PAGE_FAULT:
return "MLX5_EVENT_TYPE_PAGE_FAULT";
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ return "MLX5_EVENT_TYPE_PPS_EVENT";
default:
return "Unrecognized event";
}
@@ -188,10 +191,193 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm)
mb();
}
-static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static void eqe_pf_action(struct work_struct *work)
+{
+ struct mlx5_pagefault *pfault = container_of(work,
+ struct mlx5_pagefault,
+ work);
+ struct mlx5_eq *eq = pfault->eq;
+
+ mlx5_core_page_fault(eq->dev, pfault);
+ mempool_free(pfault, eq->pf_ctx.pool);
+}
+
+static void eq_pf_process(struct mlx5_eq *eq)
+{
+ struct mlx5_core_dev *dev = eq->dev;
+ struct mlx5_eqe_page_fault *pf_eqe;
+ struct mlx5_pagefault *pfault;
+ struct mlx5_eqe *eqe;
+ int set_ci = 0;
+
+ while ((eqe = next_eqe_sw(eq))) {
+ pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
+ if (!pfault) {
+ schedule_work(&eq->pf_ctx.work);
+ break;
+ }
+
+ dma_rmb();
+ pf_eqe = &eqe->data.page_fault;
+ pfault->event_subtype = eqe->sub_type;
+ pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
+
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
+ eqe->sub_type, pfault->bytes_committed);
+
+ switch (eqe->sub_type) {
+ case MLX5_PFAULT_SUBTYPE_RDMA:
+ /* RDMA based event */
+ pfault->type =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
+ pfault->token =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) &
+ MLX5_24BIT_MASK;
+ pfault->rdma.r_key =
+ be32_to_cpu(pf_eqe->rdma.r_key);
+ pfault->rdma.packet_size =
+ be16_to_cpu(pf_eqe->rdma.packet_length);
+ pfault->rdma.rdma_op_len =
+ be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+ pfault->rdma.rdma_va =
+ be64_to_cpu(pf_eqe->rdma.rdma_va);
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
+ pfault->type, pfault->token,
+ pfault->rdma.r_key);
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
+ pfault->rdma.rdma_op_len,
+ pfault->rdma.rdma_va);
+ break;
+
+ case MLX5_PFAULT_SUBTYPE_WQE:
+ /* WQE based event */
+ pfault->type =
+ be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24;
+ pfault->token =
+ be32_to_cpu(pf_eqe->wqe.token);
+ pfault->wqe.wq_num =
+ be32_to_cpu(pf_eqe->wqe.pftype_wq) &
+ MLX5_24BIT_MASK;
+ pfault->wqe.wqe_index =
+ be16_to_cpu(pf_eqe->wqe.wqe_index);
+ pfault->wqe.packet_size =
+ be16_to_cpu(pf_eqe->wqe.packet_length);
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+ pfault->type, pfault->token,
+ pfault->wqe.wq_num,
+ pfault->wqe.wqe_index);
+ break;
+
+ default:
+ mlx5_core_warn(dev,
+ "Unsupported page fault event sub-type: 0x%02hhx\n",
+ eqe->sub_type);
+ /* Unsupported page faults should still be
+ * resolved by the page fault handler
+ */
+ }
+
+ pfault->eq = eq;
+ INIT_WORK(&pfault->work, eqe_pf_action);
+ queue_work(eq->pf_ctx.wq, &pfault->work);
+
+ ++eq->cons_index;
+ ++set_ci;
+
+ if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
+ eq_update_ci(eq, 0);
+ set_ci = 0;
+ }
+ }
+
+ eq_update_ci(eq, 1);
+}
+
+static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
+{
+ struct mlx5_eq *eq = eq_ptr;
+ unsigned long flags;
+
+ if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
+ eq_pf_process(eq);
+ spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
+ } else {
+ schedule_work(&eq->pf_ctx.work);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* mempool_refill() was proposed but unfortunately wasn't accepted
+ * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
+ * Chip workaround.
+ */
+static void mempool_refill(mempool_t *pool)
+{
+ while (pool->curr_nr < pool->min_nr)
+ mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
+}
+
+static void eq_pf_action(struct work_struct *work)
+{
+ struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
+
+ mempool_refill(eq->pf_ctx.pool);
+
+ spin_lock_irq(&eq->pf_ctx.lock);
+ eq_pf_process(eq);
+ spin_unlock_irq(&eq->pf_ctx.lock);
+}
+
+static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
+{
+ spin_lock_init(&pf_ctx->lock);
+ INIT_WORK(&pf_ctx->work, eq_pf_action);
+
+ pf_ctx->wq = alloc_ordered_workqueue(name,
+ WQ_MEM_RECLAIM);
+ if (!pf_ctx->wq)
+ return -ENOMEM;
+
+ pf_ctx->pool = mempool_create_kmalloc_pool
+ (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
+ if (!pf_ctx->pool)
+ goto err_wq;
+
+ return 0;
+err_wq:
+ destroy_workqueue(pf_ctx->wq);
+ return -ENOMEM;
+}
+
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
+ u32 wq_num, u8 type, int error)
+{
+ u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
+
+ MLX5_SET(page_fault_resume_in, in, opcode,
+ MLX5_CMD_OP_PAGE_FAULT_RESUME);
+ MLX5_SET(page_fault_resume_in, in, error, !!error);
+ MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
+ MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
+ MLX5_SET(page_fault_resume_in, in, token, token);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
+#endif
+
+static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
{
+ struct mlx5_eq *eq = eq_ptr;
+ struct mlx5_core_dev *dev = eq->dev;
struct mlx5_eqe *eqe;
- int eqes_found = 0;
int set_ci = 0;
u32 cqn = -1;
u32 rsn;
@@ -276,12 +462,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
}
break;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- case MLX5_EVENT_TYPE_PAGE_FAULT:
- mlx5_eq_pagefault(dev, eqe);
- break;
-#endif
-
#ifdef CONFIG_MLX5_CORE_EN
case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
@@ -292,6 +472,10 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
mlx5_port_module_event(dev, eqe);
break;
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ if (dev->event)
+ dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe);
+ break;
default:
mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
eqe->type, eq->eqn);
@@ -299,7 +483,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
}
++eq->cons_index;
- eqes_found = 1;
++set_ci;
/* The HCA will think the queue has overflowed if we
@@ -319,17 +502,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
if (cqn != -1)
tasklet_schedule(&eq->tasklet_ctx.task);
- return eqes_found;
-}
-
-static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr)
-{
- struct mlx5_eq *eq = eq_ptr;
- struct mlx5_core_dev *dev = eq->dev;
-
- mlx5_eq_int(dev, eq);
-
- /* MSI-X vectors always belong to us */
return IRQ_HANDLED;
}
@@ -345,22 +517,32 @@ static void init_eq_buf(struct mlx5_eq *eq)
}
int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name, struct mlx5_uar *uar)
+ int nent, u64 mask, const char *name,
+ enum mlx5_eq_type type)
{
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = &dev->priv;
+ irq_handler_t handler;
__be64 *pas;
void *eqc;
int inlen;
u32 *in;
int err;
+ eq->type = type;
eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
if (err)
return err;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (type == MLX5_EQ_TYPE_PF)
+ handler = mlx5_eq_pf_int;
+ else
+#endif
+ handler = mlx5_eq_int;
+
init_eq_buf(eq);
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
@@ -380,7 +562,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
- MLX5_SET(eqc, eqc, uar_page, uar->index);
+ MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
MLX5_SET(eqc, eqc, intr, vecidx);
MLX5_SET(eqc, eqc, log_page_size,
eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
@@ -395,8 +577,8 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
eq->irqn = priv->msix_arr[vecidx].vector;
eq->dev = dev;
- eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET;
- err = request_irq(eq->irqn, mlx5_msix_handler, 0,
+ eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
+ err = request_irq(eq->irqn, handler, 0,
priv->irq_info[vecidx].name, eq);
if (err)
goto err_eq;
@@ -405,11 +587,20 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_irq;
- INIT_LIST_HEAD(&eq->tasklet_ctx.list);
- INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
- spin_lock_init(&eq->tasklet_ctx.lock);
- tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
- (unsigned long)&eq->tasklet_ctx);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (type == MLX5_EQ_TYPE_PF) {
+ err = init_pf_ctx(&eq->pf_ctx, name);
+ if (err)
+ goto err_irq;
+ } else
+#endif
+ {
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+ (unsigned long)&eq->tasklet_ctx);
+ }
/* EQs are created in ARMED state
*/
@@ -444,7 +635,16 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
synchronize_irq(eq->irqn);
- tasklet_disable(&eq->tasklet_ctx.task);
+
+ if (eq->type == MLX5_EQ_TYPE_COMP) {
+ tasklet_disable(&eq->tasklet_ctx.task);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ } else if (eq->type == MLX5_EQ_TYPE_PF) {
+ cancel_work_sync(&eq->pf_ctx.work);
+ destroy_workqueue(eq->pf_ctx.wq);
+ mempool_destroy(eq->pf_ctx.pool);
+#endif
+ }
mlx5_buf_free(dev, &eq->buf);
return err;
@@ -479,8 +679,6 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
int err;
- if (MLX5_CAP_GEN(dev, pg))
- async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);
if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
MLX5_CAP_GEN(dev, vport_group_manager) &&
@@ -492,9 +690,12 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
else
mlx5_core_dbg(dev, "port_module_event is not set\n");
+ if (MLX5_CAP_GEN(dev, pps))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
+
err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
- "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
+ "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
if (err) {
mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
return err;
@@ -504,7 +705,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
MLX5_NUM_ASYNC_EQE, async_event_mask,
- "mlx5_async_eq", &dev->priv.uuari.uars[0]);
+ "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
if (err) {
mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
goto err1;
@@ -514,13 +715,33 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
MLX5_EQ_VEC_PAGES,
/* TODO: sriov max_vf + */ 1,
1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
- &dev->priv.uuari.uars[0]);
+ MLX5_EQ_TYPE_ASYNC);
if (err) {
mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
goto err2;
}
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (MLX5_CAP_GEN(dev, pg)) {
+ err = mlx5_create_map_eq(dev, &table->pfault_eq,
+ MLX5_EQ_VEC_PFAULT,
+ MLX5_NUM_ASYNC_EQE,
+ 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
+ "mlx5_page_fault_eq",
+ MLX5_EQ_TYPE_PF);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
+ err);
+ goto err3;
+ }
+ }
+
return err;
+err3:
+ mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+#else
+ return err;
+#endif
err2:
mlx5_destroy_unmap_eq(dev, &table->async_eq);
@@ -536,6 +757,14 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
struct mlx5_eq_table *table = &dev->priv.eq_table;
int err;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (MLX5_CAP_GEN(dev, pg)) {
+ err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
+ if (err)
+ return err;
+ }
+#endif
+
err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
if (err)
return err;