summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile10
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c497
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h10
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c82
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h15
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h13
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c292
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h102
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c53
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.c39
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.h24
-rw-r--r--drivers/infiniband/hw/hfi1/fault.c375
-rw-r--r--drivers/infiniband/hw/hfi1/fault.h109
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c8
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h61
-rw-r--r--drivers/infiniband/hw/hfi1/init.c47
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c37
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c32
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c44
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c10
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c154
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c12
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c43
-rw-r--r--drivers/infiniband/hw/hfi1/trace_dbg.h3
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h160
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c4
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c61
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c11
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h4
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c45
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h15
31 files changed, 1568 insertions, 804 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index ce4010bad9828..f451ba912f472 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -14,7 +14,15 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
verbs_txreq.o vnic_main.o vnic_sdma.o
-hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
+
+ifdef CONFIG_DEBUG_FS
+hfi1-y += debugfs.o
+ifdef CONFIG_FAULT_INJECTION
+ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+hfi1-y += fault.o
+endif
+endif
+endif
CFLAGS_trace.o = -I$(src)
ifdef MVERSION
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index b5fab55cc2750..fbe7198a715a1 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -77,6 +77,58 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
set->gen = 0;
}
+/* Increment generation of CPU set if needed */
+static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set)
+{
+ if (cpumask_equal(&set->mask, &set->used)) {
+ /*
+ * We've used up all the CPUs, bump up the generation
+ * and reset the 'used' map
+ */
+ set->gen++;
+ cpumask_clear(&set->used);
+ }
+}
+
+static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set)
+{
+ if (cpumask_empty(&set->used) && set->gen) {
+ set->gen--;
+ cpumask_copy(&set->used, &set->mask);
+ }
+}
+
+/* Get the first CPU from the list of unused CPUs in a CPU set data structure */
+static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff)
+{
+ int cpu;
+
+ if (!diff || !set)
+ return -EINVAL;
+
+ _cpu_mask_set_gen_inc(set);
+
+ /* Find out CPUs left in CPU mask */
+ cpumask_andnot(diff, &set->mask, &set->used);
+
+ cpu = cpumask_first(diff);
+ if (cpu >= nr_cpu_ids) /* empty */
+ cpu = -EINVAL;
+ else
+ cpumask_set_cpu(cpu, &set->used);
+
+ return cpu;
+}
+
+static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu)
+{
+ if (!set)
+ return;
+
+ cpumask_clear_cpu(cpu, &set->used);
+ _cpu_mask_set_gen_dec(set);
+}
+
/* Initialize non-HT cpu cores mask */
void init_real_cpu_mask(void)
{
@@ -156,7 +208,13 @@ int node_affinity_init(void)
return 0;
}
-void node_affinity_destroy(void)
+static void node_affinity_destroy(struct hfi1_affinity_node *entry)
+{
+ free_percpu(entry->comp_vect_affinity);
+ kfree(entry);
+}
+
+void node_affinity_destroy_all(void)
{
struct list_head *pos, *q;
struct hfi1_affinity_node *entry;
@@ -166,7 +224,7 @@ void node_affinity_destroy(void)
entry = list_entry(pos, struct hfi1_affinity_node,
list);
list_del(pos);
- kfree(entry);
+ node_affinity_destroy(entry);
}
mutex_unlock(&node_affinity.lock);
kfree(hfi1_per_node_cntr);
@@ -180,6 +238,7 @@ static struct hfi1_affinity_node *node_affinity_allocate(int node)
if (!entry)
return NULL;
entry->node = node;
+ entry->comp_vect_affinity = alloc_percpu(u16);
INIT_LIST_HEAD(&entry->list);
return entry;
@@ -209,6 +268,341 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node)
return NULL;
}
+static int per_cpu_affinity_get(cpumask_var_t possible_cpumask,
+ u16 __percpu *comp_vect_affinity)
+{
+ int curr_cpu;
+ u16 cntr;
+ u16 prev_cntr;
+ int ret_cpu;
+
+ if (!possible_cpumask) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ if (!comp_vect_affinity) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ ret_cpu = cpumask_first(possible_cpumask);
+ if (ret_cpu >= nr_cpu_ids) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu);
+ for_each_cpu(curr_cpu, possible_cpumask) {
+ cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+ if (cntr < prev_cntr) {
+ ret_cpu = curr_cpu;
+ prev_cntr = cntr;
+ }
+ }
+
+ *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1;
+
+fail:
+ return ret_cpu;
+}
+
+static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask,
+ u16 __percpu *comp_vect_affinity)
+{
+ int curr_cpu;
+ int max_cpu;
+ u16 cntr;
+ u16 prev_cntr;
+
+ if (!possible_cpumask)
+ return -EINVAL;
+
+ if (!comp_vect_affinity)
+ return -EINVAL;
+
+ max_cpu = cpumask_first(possible_cpumask);
+ if (max_cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+ prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu);
+ for_each_cpu(curr_cpu, possible_cpumask) {
+ cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+ if (cntr > prev_cntr) {
+ max_cpu = curr_cpu;
+ prev_cntr = cntr;
+ }
+ }
+
+ *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1;
+
+ return max_cpu;
+}
+
+/*
+ * Non-interrupt CPUs are used first, then interrupt CPUs.
+ * Two already allocated cpu masks must be passed.
+ */
+static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry,
+ cpumask_var_t non_intr_cpus,
+ cpumask_var_t available_cpus)
+ __must_hold(&node_affinity.lock)
+{
+ int cpu;
+ struct cpu_mask_set *set = dd->comp_vect;
+
+ lockdep_assert_held(&node_affinity.lock);
+ if (!non_intr_cpus) {
+ cpu = -1;
+ goto fail;
+ }
+
+ if (!available_cpus) {
+ cpu = -1;
+ goto fail;
+ }
+
+ /* Available CPUs for pinning completion vectors */
+ _cpu_mask_set_gen_inc(set);
+ cpumask_andnot(available_cpus, &set->mask, &set->used);
+
+ /* Available CPUs without SDMA engine interrupts */
+ cpumask_andnot(non_intr_cpus, available_cpus,
+ &entry->def_intr.used);
+
+ /* If there are non-interrupt CPUs available, use them first */
+ if (!cpumask_empty(non_intr_cpus))
+ cpu = cpumask_first(non_intr_cpus);
+ else /* Otherwise, use interrupt CPUs */
+ cpu = cpumask_first(available_cpus);
+
+ if (cpu >= nr_cpu_ids) { /* empty */
+ cpu = -1;
+ goto fail;
+ }
+ cpumask_set_cpu(cpu, &set->used);
+
+fail:
+ return cpu;
+}
+
+static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu)
+{
+ struct cpu_mask_set *set = dd->comp_vect;
+
+ if (cpu < 0)
+ return;
+
+ cpu_mask_set_put(set, cpu);
+}
+
+/* _dev_comp_vect_mappings_destroy() is reentrant */
+static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd)
+{
+ int i, cpu;
+
+ if (!dd->comp_vect_mappings)
+ return;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = dd->comp_vect_mappings[i];
+ _dev_comp_vect_cpu_put(dd, cpu);
+ dd->comp_vect_mappings[i] = -1;
+ hfi1_cdbg(AFFINITY,
+ "[%s] Release CPU %d from completion vector %d",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i);
+ }
+
+ kfree(dd->comp_vect_mappings);
+ dd->comp_vect_mappings = NULL;
+}
+
+/*
+ * This function creates the table for looking up CPUs for completion vectors.
+ * num_comp_vectors needs to have been initilized before calling this function.
+ */
+static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry)
+ __must_hold(&node_affinity.lock)
+{
+ int i, cpu, ret;
+ cpumask_var_t non_intr_cpus;
+ cpumask_var_t available_cpus;
+
+ lockdep_assert_held(&node_affinity.lock);
+
+ if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) {
+ free_cpumask_var(non_intr_cpus);
+ return -ENOMEM;
+ }
+
+ dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus,
+ sizeof(*dd->comp_vect_mappings),
+ GFP_KERNEL);
+ if (!dd->comp_vect_mappings) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++)
+ dd->comp_vect_mappings[i] = -1;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus,
+ available_cpus);
+ if (cpu < 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ dd->comp_vect_mappings[i] = cpu;
+ hfi1_cdbg(AFFINITY,
+ "[%s] Completion Vector %d -> CPU %d",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
+ }
+
+ return 0;
+
+fail:
+ free_cpumask_var(available_cpus);
+ free_cpumask_var(non_intr_cpus);
+ _dev_comp_vect_mappings_destroy(dd);
+
+ return ret;
+}
+
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd)
+{
+ int ret;
+ struct hfi1_affinity_node *entry;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ ret = _dev_comp_vect_mappings_create(dd, entry);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+
+ return ret;
+}
+
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd)
+{
+ _dev_comp_vect_mappings_destroy(dd);
+}
+
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect)
+{
+ struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+ struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+
+ if (!dd->comp_vect_mappings)
+ return -EINVAL;
+ if (comp_vect >= dd->comp_vect_possible_cpus)
+ return -EINVAL;
+
+ return dd->comp_vect_mappings[comp_vect];
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry,
+ bool first_dev_init)
+ __must_hold(&node_affinity.lock)
+{
+ int i, j, curr_cpu;
+ int possible_cpus_comp_vect = 0;
+ struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask;
+
+ lockdep_assert_held(&node_affinity.lock);
+ /*
+ * If there's only one CPU available for completion vectors, then
+ * there will only be one completion vector available. Othewise,
+ * the number of completion vector available will be the number of
+ * available CPUs divide it by the number of devices in the
+ * local NUMA node.
+ */
+ if (cpumask_weight(&entry->comp_vect_mask) == 1) {
+ possible_cpus_comp_vect = 1;
+ dd_dev_warn(dd,
+ "Number of kernel receive queues is too large for completion vector affinity to be effective\n");
+ } else {
+ possible_cpus_comp_vect +=
+ cpumask_weight(&entry->comp_vect_mask) /
+ hfi1_per_node_cntr[dd->node];
+
+ /*
+ * If the completion vector CPUs available doesn't divide
+ * evenly among devices, then the first device device to be
+ * initialized gets an extra CPU.
+ */
+ if (first_dev_init &&
+ cpumask_weight(&entry->comp_vect_mask) %
+ hfi1_per_node_cntr[dd->node] != 0)
+ possible_cpus_comp_vect++;
+ }
+
+ dd->comp_vect_possible_cpus = possible_cpus_comp_vect;
+
+ /* Reserving CPUs for device completion vector */
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask,
+ entry->comp_vect_affinity);
+ if (curr_cpu < 0)
+ goto fail;
+
+ cpumask_set_cpu(curr_cpu, dev_comp_vect_mask);
+ }
+
+ hfi1_cdbg(AFFINITY,
+ "[%s] Completion vector affinity CPU set(s) %*pbl",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi),
+ cpumask_pr_args(dev_comp_vect_mask));
+
+ return 0;
+
+fail:
+ for (j = 0; j < i; j++)
+ per_cpu_affinity_put_max(&entry->comp_vect_mask,
+ entry->comp_vect_affinity);
+
+ return curr_cpu;
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry)
+ __must_hold(&node_affinity.lock)
+{
+ int i, cpu;
+
+ lockdep_assert_held(&node_affinity.lock);
+ if (!dd->comp_vect_possible_cpus)
+ return;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask,
+ entry->comp_vect_affinity);
+ /* Clearing CPU in device completion vector cpu mask */
+ if (cpu >= 0)
+ cpumask_clear_cpu(cpu, &dd->comp_vect->mask);
+ }
+
+ dd->comp_vect_possible_cpus = 0;
+}
+
/*
* Interrupt affinity.
*
@@ -225,7 +619,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
- int curr_cpu, possible, i;
+ int curr_cpu, possible, i, ret;
+ bool new_entry = false;
if (node < 0)
node = numa_node_id();
@@ -247,11 +642,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
- mutex_unlock(&node_affinity.lock);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
+ new_entry = true;
+
init_cpu_mask_set(&entry->def_intr);
init_cpu_mask_set(&entry->rcv_intr);
+ cpumask_clear(&entry->comp_vect_mask);
cpumask_clear(&entry->general_intr_mask);
/* Use the "real" cpu mask of this node as the default */
cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
@@ -304,10 +702,64 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
&entry->general_intr_mask);
}
- node_affinity_add_tail(entry);
+ /* Determine completion vector CPUs for the entire node */
+ cpumask_and(&entry->comp_vect_mask,
+ &node_affinity.real_cpu_mask, local_mask);
+ cpumask_andnot(&entry->comp_vect_mask,
+ &entry->comp_vect_mask,
+ &entry->rcv_intr.mask);
+ cpumask_andnot(&entry->comp_vect_mask,
+ &entry->comp_vect_mask,
+ &entry->general_intr_mask);
+
+ /*
+ * If there ends up being 0 CPU cores leftover for completion
+ * vectors, use the same CPU core as the general/control
+ * context.
+ */
+ if (cpumask_weight(&entry->comp_vect_mask) == 0)
+ cpumask_copy(&entry->comp_vect_mask,
+ &entry->general_intr_mask);
}
+
+ ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry);
+ if (ret < 0)
+ goto fail;
+
+ if (new_entry)
+ node_affinity_add_tail(entry);
+
mutex_unlock(&node_affinity.lock);
+
return 0;
+
+fail:
+ if (new_entry)
+ node_affinity_destroy(entry);
+ mutex_unlock(&node_affinity.lock);
+ return ret;
+}
+
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
+{
+ struct hfi1_affinity_node *entry;
+
+ if (dd->node < 0)
+ return;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry)
+ goto unlock;
+
+ /*
+ * Free device completion vector CPUs to be used by future
+ * completion vectors
+ */
+ _dev_comp_vect_cpu_mask_clean_up(dd, entry);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+ dd->node = -1;
}
/*
@@ -456,17 +908,12 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
if (!zalloc_cpumask_var(&diff, GFP_KERNEL))
return -ENOMEM;
- if (cpumask_equal(&set->mask, &set->used)) {
- /*
- * We've used up all the CPUs, bump up the generation
- * and reset the 'used' map
- */
- set->gen++;
- cpumask_clear(&set->used);
+ cpu = cpu_mask_set_get_first(set, diff);
+ if (cpu < 0) {
+ free_cpumask_var(diff);
+ dd_dev_err(dd, "Failure to obtain CPU for IRQ\n");
+ return cpu;
}
- cpumask_andnot(diff, &set->mask, &set->used);
- cpu = cpumask_first(diff);
- cpumask_set_cpu(cpu, &set->used);
free_cpumask_var(diff);
}
@@ -526,10 +973,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
if (set) {
cpumask_andnot(&set->used, &set->used, &msix->mask);
- if (cpumask_empty(&set->used) && set->gen) {
- set->gen--;
- cpumask_copy(&set->used, &set->mask);
- }
+ _cpu_mask_set_gen_dec(set);
}
irq_set_affinity_hint(msix->irq, NULL);
@@ -640,10 +1084,7 @@ int hfi1_get_proc_affinity(int node)
* If we've used all available HW threads, clear the mask and start
* overloading.
*/
- if (cpumask_equal(&set->mask, &set->used)) {
- set->gen++;
- cpumask_clear(&set->used);
- }
+ _cpu_mask_set_gen_inc(set);
/*
* If NUMA node has CPUs used by interrupt handlers, include them in the
@@ -767,11 +1208,7 @@ void hfi1_put_proc_affinity(int cpu)
return;
mutex_lock(&affinity->lock);
- cpumask_clear_cpu(cpu, &set->used);
+ cpu_mask_set_put(set, cpu);
hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
- if (cpumask_empty(&set->used) && set->gen) {
- set->gen--;
- cpumask_copy(&set->used, &set->mask);
- }
mutex_unlock(&affinity->lock);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 2a1e374169c0a..6a7e6ea4e4260 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -98,9 +98,11 @@ void hfi1_put_proc_affinity(int cpu);
struct hfi1_affinity_node {
int node;
+ u16 __percpu *comp_vect_affinity;
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct cpumask general_intr_mask;
+ struct cpumask comp_vect_mask;
struct list_head list;
};
@@ -116,7 +118,11 @@ struct hfi1_affinity_node_list {
};
int node_affinity_init(void);
-void node_affinity_destroy(void);
+void node_affinity_destroy_all(void);
extern struct hfi1_affinity_node_list node_affinity;
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd);
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect);
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd);
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd);
#endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index e6bdd0c1e80a9..6deb101cdd438 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -65,6 +65,7 @@
#include "aspm.h"
#include "affinity.h"
#include "debugfs.h"
+#include "fault.h"
#define NUM_IB_PORTS 1
@@ -1032,8 +1033,8 @@ static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
static void read_vc_remote_link_width(struct hfi1_devdata *dd,
u8 *remote_tx_rate, u16 *link_widths);
-static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
- u8 *flag_bits, u16 *link_widths);
+static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits,
+ u8 *flag_bits, u16 *link_widths);
static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
u8 *device_rev);
static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
@@ -6355,6 +6356,18 @@ static void handle_8051_request(struct hfi1_pportdata *ppd)
type);
hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
break;
+ case HREQ_LCB_RESET:
+ /* Put the LCB, RX FPE and TX FPE into reset */
+ write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_INTO_RESET);
+ /* Make sure the write completed */
+ (void)read_csr(dd, DCC_CFG_RESET);
+ /* Hold the reset long enough to take effect */
+ udelay(1);
+ /* Take the LCB, RX FPE and TX FPE out of reset */
+ write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET);
+ hreq_response(dd, HREQ_SUCCESS, 0);
+
+ break;
case HREQ_CONFIG_DONE:
hreq_response(dd, HREQ_SUCCESS, 0);
break;
@@ -6465,8 +6478,7 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
reg = read_csr(dd, DCC_CFG_RESET);
write_csr(dd, DCC_CFG_RESET, reg |
- (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) |
- (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
+ DCC_CFG_RESET_RESET_LCB | DCC_CFG_RESET_RESET_RX_FPE);
(void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
if (!abort) {
udelay(1); /* must hold for the longer of 16cclks or 20ns */
@@ -6531,7 +6543,7 @@ static void _dc_start(struct hfi1_devdata *dd)
__func__);
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
- write_csr(dd, DCC_CFG_RESET, 0x10);
+ write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET);
/* lcb_shutdown() with abort=1 does not restore these */
write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
dd->dc_shutdown = 0;
@@ -6829,7 +6841,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
}
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
- rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
+ rcvmask |= rcd->rcvhdrtail_kvaddr ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(dd, rcvmask, rcd);
hfi1_rcd_put(rcd);
@@ -7352,7 +7364,7 @@ static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
u8 misc_bits, local_flags;
u16 active_tx, active_rx;
- read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
+ read_vc_local_link_mode(dd, &misc_bits, &local_flags, &widths);
tx = widths >> 12;
rx = (widths >> 8) & 0xf;
@@ -8355,7 +8367,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
u32 tail;
int present;
- if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
+ if (!rcd->rcvhdrtail_kvaddr)
present = (rcd->seq_cnt ==
rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
else /* is RDMA rtail */
@@ -8824,29 +8836,29 @@ static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
GENERAL_CONFIG, frame);
}
-static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
- u8 *flag_bits, u16 *link_widths)
+static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits,
+ u8 *flag_bits, u16 *link_widths)
{
u32 frame;
- read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
+ read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG,
&frame);
*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
}
-static int write_vc_local_link_width(struct hfi1_devdata *dd,
- u8 misc_bits,
- u8 flag_bits,
- u16 link_widths)
+static int write_vc_local_link_mode(struct hfi1_devdata *dd,
+ u8 misc_bits,
+ u8 flag_bits,
+ u16 link_widths)
{
u32 frame;
frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
| (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
| (u32)link_widths << LINK_WIDTH_SHIFT;
- return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
+ return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG,
frame);
}
@@ -9316,8 +9328,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (loopback == LOOPBACK_SERDES)
misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT;
- ret = write_vc_local_link_width(dd, misc_bits, 0,
- opa_to_vc_link_widths(
+ /*
+ * An external device configuration request is used to reset the LCB
+ * to retry to obtain operational lanes when the first attempt is
+ * unsuccesful.
+ */
+ if (dd->dc8051_ver >= dc8051_ver(1, 25, 0))
+ misc_bits |= 1 << EXT_CFG_LCB_RESET_SUPPORTED_SHIFT;
+
+ ret = write_vc_local_link_mode(dd, misc_bits, 0,
+ opa_to_vc_link_widths(
ppd->link_width_enabled));
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
@@ -10495,9 +10515,9 @@ u32 driver_pstate(struct hfi1_pportdata *ppd)
case HLS_DN_OFFLINE:
return OPA_PORTPHYSSTATE_OFFLINE;
case HLS_VERIFY_CAP:
- return IB_PORTPHYSSTATE_POLLING;
+ return IB_PORTPHYSSTATE_TRAINING;
case HLS_GOING_UP:
- return IB_PORTPHYSSTATE_POLLING;
+ return IB_PORTPHYSSTATE_TRAINING;
case HLS_GOING_OFFLINE:
return OPA_PORTPHYSSTATE_OFFLINE;
case HLS_LINK_COOLDOWN:
@@ -11823,7 +11843,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
rcd->rcvhdrq_dma);
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+ if (rcd->rcvhdrtail_kvaddr)
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
rcd->rcvhdrqtailaddr_dma);
rcd->seq_cnt = 1;
@@ -11903,7 +11923,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
+ if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -14620,7 +14640,9 @@ static void init_rxe(struct hfi1_devdata *dd)
/* Have 16 bytes (4DW) of bypass header available in header queue */
val = read_csr(dd, RCV_BYPASS);
- val |= (4ull << 16);
+ val &= ~RCV_BYPASS_HDR_SIZE_SMASK;
+ val |= ((4ull & RCV_BYPASS_HDR_SIZE_MASK) <<
+ RCV_BYPASS_HDR_SIZE_SHIFT);
write_csr(dd, RCV_BYPASS, val);
}
@@ -15022,13 +15044,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret < 0)
goto bail_cleanup;
- /* verify that reads actually work, save revision for reset check */
- dd->revision = read_csr(dd, CCE_REVISION);
- if (dd->revision == ~(u64)0) {
- dd_dev_err(dd, "cannot read chip CSRs\n");
- ret = -EINVAL;
- goto bail_cleanup;
- }
dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
& CCE_REVISION_CHIP_REV_MAJOR_MASK;
dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
@@ -15224,6 +15239,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ ret = hfi1_comp_vectors_set_up(dd);
+ if (ret)
+ goto bail_clear_intr;
+
/* set up LCB access - must be after set_up_interrupts() */
init_lcb_access(dd);
@@ -15266,6 +15285,7 @@ bail_free_rcverr:
bail_free_cntrs:
free_cntrs(dd);
bail_clear_intr:
+ hfi1_comp_vectors_clean_up(dd);
hfi1_clean_up_interrupts(dd);
bail_cleanup:
hfi1_pcie_ddcleanup(dd);
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index c0d70f2550502..fdf389e46e19c 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -196,6 +196,15 @@
#define LSTATE_ARMED 0x3
#define LSTATE_ACTIVE 0x4
+/* DCC_CFG_RESET reset states */
+#define LCB_RX_FPE_TX_FPE_INTO_RESET (DCC_CFG_RESET_RESET_LCB | \
+ DCC_CFG_RESET_RESET_TX_FPE | \
+ DCC_CFG_RESET_RESET_RX_FPE | \
+ DCC_CFG_RESET_ENABLE_CCLK_BCC)
+ /* 0x17 */
+
+#define LCB_RX_FPE_TX_FPE_OUT_OF_RESET DCC_CFG_RESET_ENABLE_CCLK_BCC /* 0x10 */
+
/* DC8051_STS_CUR_STATE port values (physical link states) */
#define PLS_DISABLED 0x30
#define PLS_OFFLINE 0x90
@@ -283,6 +292,7 @@
#define HREQ_SET_TX_EQ_ABS 0x04
#define HREQ_SET_TX_EQ_REL 0x05
#define HREQ_ENABLE 0x06
+#define HREQ_LCB_RESET 0x07
#define HREQ_CONFIG_DONE 0xfe
#define HREQ_INTERFACE_TEST 0xff
@@ -383,7 +393,7 @@
#define TX_SETTINGS 0x06
#define VERIFY_CAP_LOCAL_PHY 0x07
#define VERIFY_CAP_LOCAL_FABRIC 0x08
-#define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09
+#define VERIFY_CAP_LOCAL_LINK_MODE 0x09
#define LOCAL_DEVICE_ID 0x0a
#define RESERVED_REGISTERS 0x0b
#define LOCAL_LNI_INFO 0x0c
@@ -584,8 +594,9 @@ enum {
#define LOOPBACK_LCB 2
#define LOOPBACK_CABLE 3 /* external cable */
-/* set up serdes bit in MISC_CONFIG_BITS */
+/* set up bits in MISC_CONFIG_BITS */
#define LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT 0
+#define EXT_CFG_LCB_RESET_SUPPORTED_SHIFT 3
/* read and write hardware registers */
u64 read_csr(const struct hfi1_devdata *dd, u32 offset);
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 793514f1d15fb..ee6dca5e2a2ff 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -97,8 +97,11 @@
#define DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT 32
#define DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK 0x700000000ull
#define DCC_CFG_RESET (DCC_CSRS + 0x000000000000)
-#define DCC_CFG_RESET_RESET_LCB_SHIFT 0
-#define DCC_CFG_RESET_RESET_RX_FPE_SHIFT 2
+#define DCC_CFG_RESET_RESET_LCB BIT_ULL(0)
+#define DCC_CFG_RESET_RESET_TX_FPE BIT_ULL(1)
+#define DCC_CFG_RESET_RESET_RX_FPE BIT_ULL(2)
+#define DCC_CFG_RESET_RESET_8051 BIT_ULL(3)
+#define DCC_CFG_RESET_ENABLE_CCLK_BCC BIT_ULL(4)
#define DCC_CFG_SC_VL_TABLE_15_0 (DCC_CSRS + 0x000000000028)
#define DCC_CFG_SC_VL_TABLE_15_0_ENTRY0_SHIFT 0
#define DCC_CFG_SC_VL_TABLE_15_0_ENTRY10_SHIFT 40
@@ -635,6 +638,12 @@
#define RCV_BTH_QP_KDETH_QP_MASK 0xFFull
#define RCV_BTH_QP_KDETH_QP_SHIFT 16
#define RCV_BYPASS (RXE + 0x000000000038)
+#define RCV_BYPASS_HDR_SIZE_SHIFT 16
+#define RCV_BYPASS_HDR_SIZE_MASK 0x1Full
+#define RCV_BYPASS_HDR_SIZE_SMASK 0x1F0000ull
+#define RCV_BYPASS_BYPASS_CONTEXT_SHIFT 0
+#define RCV_BYPASS_BYPASS_CONTEXT_MASK 0xFFull
+#define RCV_BYPASS_BYPASS_CONTEXT_SMASK 0xFFull
#define RCV_CONTEXTS (RXE + 0x000000000010)
#define RCV_COUNTER_ARRAY32 (RXE + 0x000000000400)
#define RCV_COUNTER_ARRAY64 (RXE + 0x000000000500)
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 852173bf05d0b..9f992ae36c891 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -60,15 +60,13 @@
#include "device.h"
#include "qp.h"
#include "sdma.h"
+#include "fault.h"
static struct dentry *hfi1_dbg_root;
/* wrappers to enforce srcu in seq file */
-static ssize_t hfi1_seq_read(
- struct file *file,
- char __user *buf,
- size_t size,
- loff_t *ppos)
+ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size,
+ loff_t *ppos)
{
struct dentry *d = file->f_path.dentry;
ssize_t r;
@@ -81,10 +79,7 @@ static ssize_t hfi1_seq_read(
return r;
}
-static loff_t hfi1_seq_lseek(
- struct file *file,
- loff_t offset,
- int whence)
+loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry *d = file->f_path.dentry;
loff_t r;
@@ -100,48 +95,6 @@ static loff_t hfi1_seq_lseek(
#define private2dd(file) (file_inode(file)->i_private)
#define private2ppd(file) (file_inode(file)->i_private)
-#define DEBUGFS_SEQ_FILE_OPS(name) \
-static const struct seq_operations _##name##_seq_ops = { \
- .start = _##name##_seq_start, \
- .next = _##name##_seq_next, \
- .stop = _##name##_seq_stop, \
- .show = _##name##_seq_show \
-}
-
-#define DEBUGFS_SEQ_FILE_OPEN(name) \
-static int _##name##_open(struct inode *inode, struct file *s) \
-{ \
- struct seq_file *seq; \
- int ret; \
- ret = seq_open(s, &_##name##_seq_ops); \
- if (ret) \
- return ret; \
- seq = s->private_data; \
- seq->private = inode->i_private; \
- return 0; \
-}
-
-#define DEBUGFS_FILE_OPS(name) \
-static const struct file_operations _##name##_file_ops = { \
- .owner = THIS_MODULE, \
- .open = _##name##_open, \
- .read = hfi1_seq_read, \
- .llseek = hfi1_seq_lseek, \
- .release = seq_release \
-}
-
-#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \
-do { \
- struct dentry *ent; \
- ent = debugfs_create_file(name, mode, parent, \
- data, ops); \
- if (!ent) \
- pr_warn("create of %s failed\n", name); \
-} while (0)
-
-#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
- DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
-
static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
@@ -1160,232 +1113,6 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);
-#ifdef CONFIG_FAULT_INJECTION
-static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
-{
- struct hfi1_opcode_stats_perctx *opstats;
-
- if (*pos >= ARRAY_SIZE(opstats->stats))
- return NULL;
- return pos;
-}
-
-static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct hfi1_opcode_stats_perctx *opstats;
-
- ++*pos;
- if (*pos >= ARRAY_SIZE(opstats->stats))
- return NULL;
- return pos;
-}
-
-static void _fault_stats_seq_stop(struct seq_file *s, void *v)
-{
-}
-
-static int _fault_stats_seq_show(struct seq_file *s, void *v)
-{
- loff_t *spos = v;
- loff_t i = *spos, j;
- u64 n_packets = 0, n_bytes = 0;
- struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
- struct hfi1_devdata *dd = dd_from_dev(ibd);
- struct hfi1_ctxtdata *rcd;
-
- for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
- rcd = hfi1_rcd_get_by_index(dd, j);
- if (rcd) {
- n_packets += rcd->opstats->stats[i].n_packets;
- n_bytes += rcd->opstats->stats[i].n_bytes;
- }
- hfi1_rcd_put(rcd);
- }
- for_each_possible_cpu(j) {
- struct hfi1_opcode_stats_perctx *sp =
- per_cpu_ptr(dd->tx_opstats, j);
-
- n_packets += sp->stats[i].n_packets;
- n_bytes += sp->stats[i].n_bytes;
- }
- if (!n_packets && !n_bytes)
- return SEQ_SKIP;
- if (!ibd->fault_opcode->n_rxfaults[i] &&
- !ibd->fault_opcode->n_txfaults[i])
- return SEQ_SKIP;
- seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
- (unsigned long long)n_packets,
- (unsigned long long)n_bytes,
- (unsigned long long)ibd->fault_opcode->n_rxfaults[i],
- (unsigned long long)ibd->fault_opcode->n_txfaults[i]);
- return 0;
-}
-
-DEBUGFS_SEQ_FILE_OPS(fault_stats);
-DEBUGFS_SEQ_FILE_OPEN(fault_stats);
-DEBUGFS_FILE_OPS(fault_stats);
-
-static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
-{
- debugfs_remove_recursive(ibd->fault_opcode->dir);
- kfree(ibd->fault_opcode);
- ibd->fault_opcode = NULL;
-}
-
-static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
-{
- struct dentry *parent = ibd->hfi1_ibdev_dbg;
-
- ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
- if (!ibd->fault_opcode)
- return -ENOMEM;
-
- ibd->fault_opcode->attr.interval = 1;
- ibd->fault_opcode->attr.require_end = ULONG_MAX;
- ibd->fault_opcode->attr.stacktrace_depth = 32;
- ibd->fault_opcode->attr.dname = NULL;
- ibd->fault_opcode->attr.verbose = 0;
- ibd->fault_opcode->fault_by_opcode = false;
- ibd->fault_opcode->opcode = 0;
- ibd->fault_opcode->mask = 0xff;
-
- ibd->fault_opcode->dir =
- fault_create_debugfs_attr("fault_opcode",
- parent,
- &ibd->fault_opcode->attr);
- if (IS_ERR(ibd->fault_opcode->dir)) {
- kfree(ibd->fault_opcode);
- return -ENOENT;
- }
-
- DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
- if (!debugfs_create_bool("fault_by_opcode", 0600,
- ibd->fault_opcode->dir,
- &ibd->fault_opcode->fault_by_opcode))
- goto fail;
- if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
- &ibd->fault_opcode->opcode))
- goto fail;
- if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
- &ibd->fault_opcode->mask))
- goto fail;
-
- return 0;
-fail:
- fault_exit_opcode_debugfs(ibd);
- return -ENOMEM;
-}
-
-static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
-{
- debugfs_remove_recursive(ibd->fault_packet->dir);
- kfree(ibd->fault_packet);
- ibd->fault_packet = NULL;
-}
-
-static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
-{
- struct dentry *parent = ibd->hfi1_ibdev_dbg;
-
- ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
- if (!ibd->fault_packet)
- return -ENOMEM;
-
- ibd->fault_packet->attr.interval = 1;
- ibd->fault_packet->attr.require_end = ULONG_MAX;
- ibd->fault_packet->attr.stacktrace_depth = 32;
- ibd->fault_packet->attr.dname = NULL;
- ibd->fault_packet->attr.verbose = 0;
- ibd->fault_packet->fault_by_packet = false;
-
- ibd->fault_packet->dir =
- fault_create_debugfs_attr("fault_packet",
- parent,
- &ibd->fault_opcode->attr);
- if (IS_ERR(ibd->fault_packet->dir)) {
- kfree(ibd->fault_packet);
- return -ENOENT;
- }
-
- if (!debugfs_create_bool("fault_by_packet", 0600,
- ibd->fault_packet->dir,
- &ibd->fault_packet->fault_by_packet))
- goto fail;
- if (!debugfs_create_u64("fault_stats", 0400,
- ibd->fault_packet->dir,
- &ibd->fault_packet->n_faults))
- goto fail;
-
- return 0;
-fail:
- fault_exit_packet_debugfs(ibd);
- return -ENOMEM;
-}
-
-static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
-{
- fault_exit_opcode_debugfs(ibd);
- fault_exit_packet_debugfs(ibd);
-}
-
-static int fault_init_debugfs(struct hfi1_ibdev *ibd)
-{
- int ret = 0;
-
- ret = fault_init_opcode_debugfs(ibd);
- if (ret)
- return ret;
-
- ret = fault_init_packet_debugfs(ibd);
- if (ret)
- fault_exit_opcode_debugfs(ibd);
-
- return ret;
-}
-
-bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
-{
- return ibd->fault_suppress_err;
-}
-
-bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
-{
- bool ret = false;
- struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
-
- if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
- return false;
- if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
- return false;
- ret = should_fail(&ibd->fault_opcode->attr, 1);
- if (ret) {
- trace_hfi1_fault_opcode(qp, opcode);
- if (rx)
- ibd->fault_opcode->n_rxfaults[opcode]++;
- else
- ibd->fault_opcode->n_txfaults[opcode]++;
- }
- return ret;
-}
-
-bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
-{
- struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
- struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
- bool ret = false;
-
- if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
- return false;
-
- ret = should_fail(&ibd->fault_packet->attr, 1);
- if (ret) {
- ++ibd->fault_packet->n_faults;
- trace_hfi1_fault_packet(packet);
- }
- return ret;
-}
-#endif
-
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -1438,21 +1165,14 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
S_IRUGO : S_IRUGO | S_IWUSR);
}
-#ifdef CONFIG_FAULT_INJECTION
- debugfs_create_bool("fault_suppress_err", 0600,
- ibd->hfi1_ibdev_dbg,
- &ibd->fault_suppress_err);
- fault_init_debugfs(ibd);
-#endif
+ hfi1_fault_init_debugfs(ibd);
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
-#ifdef CONFIG_FAULT_INJECTION
- fault_exit_debugfs(ibd);
-#endif
+ hfi1_fault_exit_debugfs(ibd);
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index 38c38a98156d6..d5d824459fcc6 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_DEBUGFS_H
#define _HFI1_DEBUGFS_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015, 2016, 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -48,51 +48,59 @@
*/
struct hfi1_ibdev;
-#ifdef CONFIG_DEBUG_FS
-void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
-void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
-void hfi1_dbg_init(void);
-void hfi1_dbg_exit(void);
-
-#ifdef CONFIG_FAULT_INJECTION
-#include <linux/fault-inject.h>
-struct fault_opcode {
- struct fault_attr attr;
- struct dentry *dir;
- bool fault_by_opcode;
- u64 n_rxfaults[256];
- u64 n_txfaults[256];
- u8 opcode;
- u8 mask;
-};
-struct fault_packet {
- struct fault_attr attr;
- struct dentry *dir;
- bool fault_by_packet;
- u64 n_faults;
-};
+#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \
+do { \
+ struct dentry *ent; \
+ const char *__name = name; \
+ ent = debugfs_create_file(__name, mode, parent, \
+ data, ops); \
+ if (!ent) \
+ pr_warn("create of %s failed\n", __name); \
+} while (0)
-bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
-bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
-bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
-#else
-static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
-{
- return false;
+#define DEBUGFS_SEQ_FILE_OPS(name) \
+static const struct seq_operations _##name##_seq_ops = { \
+ .start = _##name##_seq_start, \
+ .next = _##name##_seq_next, \
+ .stop = _##name##_seq_stop, \
+ .show = _##name##_seq_show \
}
-static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
- u32 opcode, bool rx)
-{
- return false;
+#define DEBUGFS_SEQ_FILE_OPEN(name) \
+static int _##name##_open(struct inode *inode, struct file *s) \
+{ \
+ struct seq_file *seq; \
+ int ret; \
+ ret = seq_open(s, &_##name##_seq_ops); \
+ if (ret) \
+ return ret; \
+ seq = s->private_data; \
+ seq->private = inode->i_private; \
+ return 0; \
}
-static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
-{
- return false;
+#define DEBUGFS_FILE_OPS(name) \
+static const struct file_operations _##name##_file_ops = { \
+ .owner = THIS_MODULE, \
+ .open = _##name##_open, \
+ .read = hfi1_seq_read, \
+ .llseek = hfi1_seq_lseek, \
+ .release = seq_release \
}
-#endif
+
+#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
+ DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, 0444)
+
+ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size,
+ loff_t *ppos);
+loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence);
+
+#ifdef CONFIG_DEBUG_FS
+void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
+void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
+void hfi1_dbg_init(void);
+void hfi1_dbg_exit(void);
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
@@ -110,22 +118,6 @@ static inline void hfi1_dbg_init(void)
static inline void hfi1_dbg_exit(void)
{
}
-
-static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
-{
- return false;
-}
-
-static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
- u32 opcode, bool rx)
-{
- return false;
-}
-
-static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
-{
- return false;
-}
#endif
#endif /* _HFI1_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index bd837a048bf49..94dca95db04f4 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -61,6 +61,7 @@
#include "sdma.h"
#include "debugfs.h"
#include "vnic.h"
+#include "fault.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1482,38 +1483,51 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
u8 l4;
- u8 grh_len;
packet->hdr = (struct hfi1_16b_header *)
hfi1_get_16B_header(packet->rcd->dd,
packet->rhf_addr);
- packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
-
l4 = hfi1_16B_get_l4(packet->hdr);
if (l4 == OPA_16B_L4_IB_LOCAL) {
- grh_len = 0;
packet->ohdr = packet->ebuf;
packet->grh = NULL;
+ packet->opcode = ib_bth_get_opcode(packet->ohdr);
+ packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
+ /* hdr_len_by_opcode already has an IB LRH factored in */
+ packet->hlen = hdr_len_by_opcode[packet->opcode] +
+ (LRH_16B_BYTES - LRH_9B_BYTES);
+ packet->migrated = opa_bth_is_migration(packet->ohdr);
} else if (l4 == OPA_16B_L4_IB_GLOBAL) {
u32 vtf;
+ u8 grh_len = sizeof(struct ib_grh);
- grh_len = sizeof(struct ib_grh);
packet->ohdr = packet->ebuf + grh_len;
packet->grh = packet->ebuf;
+ packet->opcode = ib_bth_get_opcode(packet->ohdr);
+ packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
+ /* hdr_len_by_opcode already has an IB LRH factored in */
+ packet->hlen = hdr_len_by_opcode[packet->opcode] +
+ (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len;
+ packet->migrated = opa_bth_is_migration(packet->ohdr);
+
if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
goto drop;
vtf = be32_to_cpu(packet->grh->version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
+ } else if (l4 == OPA_16B_L4_FM) {
+ packet->mgmt = packet->ebuf;
+ packet->ohdr = NULL;
+ packet->grh = NULL;
+ packet->opcode = IB_OPCODE_UD_SEND_ONLY;
+ packet->pad = OPA_16B_L4_FM_PAD;
+ packet->hlen = OPA_16B_L4_FM_HLEN;
+ packet->migrated = false;
} else {
goto drop;
}
/* Query commonly used fields from packet header */
- packet->opcode = ib_bth_get_opcode(packet->ohdr);
- /* hdr_len_by_opcode already has an IB LRH factored in */
- packet->hlen = hdr_len_by_opcode[packet->opcode] +
- (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len;
packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES;
packet->slid = hfi1_16B_get_slid(packet->hdr);
packet->dlid = hfi1_16B_get_dlid(packet->hdr);
@@ -1523,10 +1537,8 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
16B);
packet->sc = hfi1_16B_get_sc(packet->hdr);
packet->sl = ibp->sc_to_sl[packet->sc];
- packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
packet->extra_byte = SIZE_OF_LT;
packet->pkey = hfi1_16B_get_pkey(packet->hdr);
- packet->migrated = opa_bth_is_migration(packet->ohdr);
if (hfi1_bypass_ingress_pkt_check(packet))
goto drop;
@@ -1565,10 +1577,10 @@ void handle_eflags(struct hfi1_packet *packet)
*/
int process_receive_ib(struct hfi1_packet *packet)
{
- if (unlikely(hfi1_dbg_fault_packet(packet)))
+ if (hfi1_setup_9B_packet(packet))
return RHF_RCV_CONTINUE;
- if (hfi1_setup_9B_packet(packet))
+ if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
trace_hfi1_rcvhdr(packet);
@@ -1642,7 +1654,8 @@ int process_receive_error(struct hfi1_packet *packet)
/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
if (unlikely(
hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
- rhf_rcv_type_err(packet->rhf) == 3))
+ (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR ||
+ packet->rhf & RHF_DC_ERR)))
return RHF_RCV_CONTINUE;
hfi1_setup_ib_header(packet);
@@ -1657,10 +1670,10 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet)
{
- if (unlikely(hfi1_dbg_fault_packet(packet)))
+ hfi1_setup_9B_packet(packet);
+ if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
- hfi1_setup_ib_header(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@@ -1671,11 +1684,11 @@ int kdeth_process_expected(struct hfi1_packet *packet)
int kdeth_process_eager(struct hfi1_packet *packet)
{
- hfi1_setup_ib_header(packet);
+ hfi1_setup_9B_packet(packet);
+ if (unlikely(hfi1_dbg_should_fault_rx(packet)))
+ return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
- if (unlikely(hfi1_dbg_fault_packet(packet)))
- return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
index 0af91675acc6d..1be49a0d9c11a 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -52,13 +52,24 @@
* exp_tid_group_init - initialize exp_tid_set
* @set - the set
*/
-void hfi1_exp_tid_group_init(struct exp_tid_set *set)
+static void hfi1_exp_tid_set_init(struct exp_tid_set *set)
{
INIT_LIST_HEAD(&set->list);
set->count = 0;
}
/**
+ * hfi1_exp_tid_group_init - initialize rcd expected receive
+ * @rcd - the rcd
+ */
+void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd)
+{
+ hfi1_exp_tid_set_init(&rcd->tid_group_list);
+ hfi1_exp_tid_set_init(&rcd->tid_used_list);
+ hfi1_exp_tid_set_init(&rcd->tid_full_list);
+}
+
+/**
* alloc_ctxt_rcv_groups - initialize expected receive groups
* @rcd - the context to add the groupings to
*/
@@ -68,13 +79,17 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
u32 tidbase;
struct tid_group *grp;
int i;
+ u32 ngroups;
+ ngroups = rcd->expected_count / dd->rcv_entries.group_size;
+ rcd->groups =
+ kcalloc_node(ngroups, sizeof(*rcd->groups),
+ GFP_KERNEL, rcd->numa_id);
+ if (!rcd->groups)
+ return -ENOMEM;
tidbase = rcd->expected_base;
- for (i = 0; i < rcd->expected_count /
- dd->rcv_entries.group_size; i++) {
- grp = kzalloc(sizeof(*grp), GFP_KERNEL);
- if (!grp)
- goto bail;
+ for (i = 0; i < ngroups; i++) {
+ grp = &rcd->groups[i];
grp->size = dd->rcv_entries.group_size;
grp->base = tidbase;
tid_group_add_tail(grp, &rcd->tid_group_list);
@@ -82,9 +97,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
}
return 0;
-bail:
- hfi1_free_ctxt_rcv_groups(rcd);
- return -ENOMEM;
}
/**
@@ -100,15 +112,12 @@ bail:
*/
void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
{
- struct tid_group *grp, *gptr;
-
WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list));
WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list));
- list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) {
- tid_group_remove(grp, &rcd->tid_group_list);
- kfree(grp);
- }
+ kfree(rcd->groups);
+ rcd->groups = NULL;
+ hfi1_exp_tid_group_init(rcd);
hfi1_clear_tids(rcd);
}
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
index 08719047628a0..f25362015095b 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.h
@@ -183,8 +183,30 @@ static inline u32 rcventry2tidinfo(u32 rcventry)
EXP_TID_SET(CTRL, 1 << (rcventry - pair));
}
+/**
+ * hfi1_tid_group_to_idx - convert an index to a group
+ * @rcd - the receive context
+ * @grp - the group pointer
+ */
+static inline u16
+hfi1_tid_group_to_idx(struct hfi1_ctxtdata *rcd, struct tid_group *grp)
+{
+ return grp - &rcd->groups[0];
+}
+
+/**
+ * hfi1_idx_to_tid_group - convert a group to an index
+ * @rcd - the receive context
+ * @idx - the index
+ */
+static inline struct tid_group *
+hfi1_idx_to_tid_group(struct hfi1_ctxtdata *rcd, u16 idx)
+{
+ return &rcd->groups[idx];
+}
+
int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
-void hfi1_exp_tid_group_init(struct exp_tid_set *set);
+void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd);
#endif /* _HFI1_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
new file mode 100644
index 0000000000000..e2290f32c8d90
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/bitmap.h>
+
+#include "debugfs.h"
+#include "fault.h"
+#include "trace.h"
+
+#define HFI1_FAULT_DIR_TX BIT(0)
+#define HFI1_FAULT_DIR_RX BIT(1)
+#define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
+
+static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void _fault_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _fault_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct hfi1_ctxtdata *rcd;
+
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
+ rcd = hfi1_rcd_get_by_index(dd, j);
+ if (rcd) {
+ n_packets += rcd->opstats->stats[i].n_packets;
+ n_bytes += rcd->opstats->stats[i].n_bytes;
+ }
+ hfi1_rcd_put(rcd);
+ }
+ for_each_possible_cpu(j) {
+ struct hfi1_opcode_stats_perctx *sp =
+ per_cpu_ptr(dd->tx_opstats, j);
+
+ n_packets += sp->stats[i].n_packets;
+ n_bytes += sp->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
+ (unsigned long long)n_packets,
+ (unsigned long long)n_bytes,
+ (unsigned long long)ibd->fault->n_rxfaults[i],
+ (unsigned long long)ibd->fault->n_txfaults[i]);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(fault_stats);
+DEBUGFS_SEQ_FILE_OPEN(fault_stats);
+DEBUGFS_FILE_OPS(fault_stats);
+
+static int fault_opcodes_open(struct inode *inode, struct file *file)
+{
+ file->private_data = inode->i_private;
+ return nonseekable_open(inode, file);
+}
+
+static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ ssize_t ret = 0;
+ /* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
+ size_t copy, datalen = 1280;
+ char *data, *token, *ptr, *end;
+ struct fault *fault = file->private_data;
+
+ data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ copy = min(len, datalen - 1);
+ if (copy_from_user(data, buf, copy))
+ return -EFAULT;
+
+ ret = debugfs_file_get(file->f_path.dentry);
+ if (unlikely(ret))
+ return ret;
+ ptr = data;
+ token = ptr;
+ for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
+ char *dash;
+ unsigned long range_start, range_end, i;
+ bool remove = false;
+
+ end = strchr(ptr, ',');
+ if (end)
+ *end = '\0';
+ if (token[0] == '-') {
+ remove = true;
+ token++;
+ }
+ dash = strchr(token, '-');
+ if (dash)
+ *dash = '\0';
+ if (kstrtoul(token, 0, &range_start))
+ break;
+ if (dash) {
+ token = dash + 1;
+ if (kstrtoul(token, 0, &range_end))
+ break;
+ } else {
+ range_end = range_start;
+ }
+ if (range_start == range_end && range_start == -1UL) {
+ bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
+ BITS_PER_BYTE);
+ break;
+ }
+ for (i = range_start; i <= range_end; i++) {
+ if (remove)
+ clear_bit(i, fault->opcodes);
+ else
+ set_bit(i, fault->opcodes);
+ }
+ if (!end)
+ break;
+ }
+ ret = len;
+
+ debugfs_file_put(file->f_path.dentry);
+ kfree(data);
+ return ret;
+}
+
+static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ ssize_t ret = 0;
+ char *data;
+ size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
+ unsigned long bit = 0, zero = 0;
+ struct fault *fault = file->private_data;
+ size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
+
+ data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ ret = debugfs_file_get(file->f_path.dentry);
+ if (unlikely(ret))
+ return ret;
+ bit = find_first_bit(fault->opcodes, bitsize);
+ while (bit < bitsize) {
+ zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
+ if (zero - 1 != bit)
+ size += snprintf(data + size,
+ datalen - size - 1,
+ "0x%lx-0x%lx,", bit, zero - 1);
+ else
+ size += snprintf(data + size,
+ datalen - size - 1, "0x%lx,",
+ bit);
+ bit = find_next_bit(fault->opcodes, bitsize, zero);
+ }
+ debugfs_file_put(file->f_path.dentry);
+ data[size - 1] = '\n';
+ data[size] = '\0';
+ ret = simple_read_from_buffer(buf, len, pos, data, size);
+ kfree(data);
+ return ret;
+}
+
+static const struct file_operations __fault_opcodes_fops = {
+ .owner = THIS_MODULE,
+ .open = fault_opcodes_open,
+ .read = fault_opcodes_read,
+ .write = fault_opcodes_write,
+ .llseek = no_llseek
+};
+
+void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+ if (ibd->fault)
+ debugfs_remove_recursive(ibd->fault->dir);
+ kfree(ibd->fault);
+ ibd->fault = NULL;
+}
+
+int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
+ if (!ibd->fault)
+ return -ENOMEM;
+
+ ibd->fault->attr.interval = 1;
+ ibd->fault->attr.require_end = ULONG_MAX;
+ ibd->fault->attr.stacktrace_depth = 32;
+ ibd->fault->attr.dname = NULL;
+ ibd->fault->attr.verbose = 0;
+ ibd->fault->enable = false;
+ ibd->fault->opcode = false;
+ ibd->fault->fault_skip = 0;
+ ibd->fault->skip = 0;
+ ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
+ ibd->fault->suppress_err = false;
+ bitmap_zero(ibd->fault->opcodes,
+ sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
+
+ ibd->fault->dir =
+ fault_create_debugfs_attr("fault", parent,
+ &ibd->fault->attr);
+ if (IS_ERR(ibd->fault->dir)) {
+ kfree(ibd->fault);
+ ibd->fault = NULL;
+ return -ENOENT;
+ }
+
+ DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd);
+ if (!debugfs_create_bool("enable", 0600, ibd->fault->dir,
+ &ibd->fault->enable))
+ goto fail;
+ if (!debugfs_create_bool("suppress_err", 0600,
+ ibd->fault->dir,
+ &ibd->fault->suppress_err))
+ goto fail;
+ if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir,
+ &ibd->fault->opcode))
+ goto fail;
+ if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir,
+ ibd->fault, &__fault_opcodes_fops))
+ goto fail;
+ if (!debugfs_create_u64("skip_pkts", 0600,
+ ibd->fault->dir,
+ &ibd->fault->fault_skip))
+ goto fail;
+ if (!debugfs_create_u64("skip_usec", 0600,
+ ibd->fault->dir,
+ &ibd->fault->fault_skip_usec))
+ goto fail;
+ if (!debugfs_create_u8("direction", 0600, ibd->fault->dir,
+ &ibd->fault->direction))
+ goto fail;
+
+ return 0;
+fail:
+ hfi1_fault_exit_debugfs(ibd);
+ return -ENOMEM;
+}
+
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ if (ibd->fault)
+ return ibd->fault->suppress_err;
+ return false;
+}
+
+static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
+ u8 direction)
+{
+ bool ret = false;
+
+ if (!ibd->fault || !ibd->fault->enable)
+ return false;
+ if (!(ibd->fault->direction & direction))
+ return false;
+ if (ibd->fault->opcode) {
+ if (bitmap_empty(ibd->fault->opcodes,
+ (sizeof(ibd->fault->opcodes) *
+ BITS_PER_BYTE)))
+ return false;
+ if (!(test_bit(opcode, ibd->fault->opcodes)))
+ return false;
+ }
+ if (ibd->fault->fault_skip_usec &&
+ time_before(jiffies, ibd->fault->skip_usec))
+ return false;
+ if (ibd->fault->fault_skip && ibd->fault->skip) {
+ ibd->fault->skip--;
+ return false;
+ }
+ ret = should_fail(&ibd->fault->attr, 1);
+ if (ret) {
+ ibd->fault->skip = ibd->fault->fault_skip;
+ ibd->fault->skip_usec = jiffies +
+ usecs_to_jiffies(ibd->fault->fault_skip_usec);
+ }
+ return ret;
+}
+
+bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
+{
+ struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
+
+ if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
+ trace_hfi1_fault_opcode(qp, opcode);
+ ibd->fault->n_txfaults[opcode]++;
+ return true;
+ }
+ return false;
+}
+
+bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
+{
+ struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
+
+ if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
+ trace_hfi1_fault_packet(packet);
+ ibd->fault->n_rxfaults[packet->opcode]++;
+ return true;
+ }
+ return false;
+}
diff --git a/drivers/infiniband/hw/hfi1/fault.h b/drivers/infiniband/hw/hfi1/fault.h
new file mode 100644
index 0000000000000..a83382700a7c0
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/fault.h
@@ -0,0 +1,109 @@
+#ifndef _HFI1_FAULT_H
+#define _HFI1_FAULT_H
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/fault-inject.h>
+#include <linux/dcache.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <rdma/rdma_vt.h>
+
+#include "hfi.h"
+
+struct hfi1_ibdev;
+
+#if defined(CONFIG_FAULT_INJECTION) && defined(CONFIG_FAULT_INJECTION_DEBUG_FS)
+struct fault {
+ struct fault_attr attr;
+ struct dentry *dir;
+ u64 n_rxfaults[(1U << BITS_PER_BYTE)];
+ u64 n_txfaults[(1U << BITS_PER_BYTE)];
+ u64 fault_skip;
+ u64 skip;
+ u64 fault_skip_usec;
+ unsigned long skip_usec;
+ unsigned long opcodes[(1U << BITS_PER_BYTE) / BITS_PER_LONG];
+ bool enable;
+ bool suppress_err;
+ bool opcode;
+ u8 direction;
+};
+
+int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd);
+bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode);
+bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
+void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd);
+
+#else
+
+static inline int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ return 0;
+}
+
+static inline bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp,
+ u32 opcode)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
+
+static inline void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+}
+#endif
+#endif /* _HFI1_FAULT_H */
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index da4aa1a95b110..0fc4aa9455c32 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -110,7 +110,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg);
static int ctxt_reset(struct hfi1_ctxtdata *uctxt);
static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
unsigned long arg);
-static int vma_fault(struct vm_fault *vmf);
+static vm_fault_t vma_fault(struct vm_fault *vmf);
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
unsigned long arg);
@@ -505,7 +505,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EINVAL;
goto done;
}
- if (flags & VM_WRITE) {
+ if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) {
ret = -EPERM;
goto done;
}
@@ -591,7 +591,7 @@ done:
* Local (non-chip) user memory is not mapped right away but as it is
* accessed by the user-level code.
*/
-static int vma_fault(struct vm_fault *vmf)
+static vm_fault_t vma_fault(struct vm_fault *vmf)
{
struct page *page;
@@ -689,8 +689,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
* checks to default and disable the send context.
*/
if (uctxt->sc) {
- set_pio_integrity(uctxt->sc);
sc_disable(uctxt->sc);
+ set_pio_integrity(uctxt->sc);
}
hfi1_free_ctxt_rcv_groups(uctxt);
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index cac2c62bc42d6..4ab8b5bfbed13 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_KERNEL_H
#define _HFI1_KERNEL_H
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -231,20 +231,22 @@ struct hfi1_ctxtdata {
/* job key */
u16 jkey;
/* number of RcvArray groups for this context. */
- u32 rcv_array_groups;
+ u16 rcv_array_groups;
/* index of first eager TID entry. */
- u32 eager_base;
+ u16 eager_base;
/* number of expected TID entries */
- u32 expected_count;
+ u16 expected_count;
/* index of first expected TID entry. */
- u32 expected_base;
+ u16 expected_base;
+ /* array of tid_groups */
+ struct tid_group *groups;
struct exp_tid_set tid_group_list;
struct exp_tid_set tid_used_list;
struct exp_tid_set tid_full_list;
- /* lock protecting all Expected TID data */
- struct mutex exp_lock;
+ /* lock protecting all Expected TID data of user contexts */
+ struct mutex exp_mutex;
/* per-context configuration flags */
unsigned long flags;
/* per-context event flags for fileops/intr communication */
@@ -282,7 +284,7 @@ struct hfi1_ctxtdata {
/* interrupt handling */
u64 imask; /* clear interrupt mask */
int ireg; /* clear interrupt register */
- unsigned numa_id; /* numa node of this context */
+ int numa_id; /* numa node of this context */
/* verbs rx_stats per rcd */
struct hfi1_opcode_stats_perctx *opstats;
@@ -333,6 +335,7 @@ struct hfi1_packet {
struct rvt_qp *qp;
struct ib_other_headers *ohdr;
struct ib_grh *grh;
+ struct opa_16b_mgmt *mgmt;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
@@ -392,10 +395,17 @@ struct hfi1_packet {
*/
#define OPA_16B_L4_9B 0x00
#define OPA_16B_L2_TYPE 0x02
+#define OPA_16B_L4_FM 0x08
#define OPA_16B_L4_IB_LOCAL 0x09
#define OPA_16B_L4_IB_GLOBAL 0x0A
#define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR
+/*
+ * OPA 16B Management
+ */
+#define OPA_16B_L4_FM_PAD 3 /* fixed 3B pad */
+#define OPA_16B_L4_FM_HLEN 24 /* 16B(16) + L4_FM(8) */
+
static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr)
{
return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK);
@@ -472,6 +482,27 @@ static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr)
OPA_16B_BTH_PAD_MASK);
}
+/*
+ * 16B Management
+ */
+#define OPA_16B_MGMT_QPN_MASK 0xFFFFFF
+static inline u32 hfi1_16B_get_dest_qpn(struct opa_16b_mgmt *mgmt)
+{
+ return be32_to_cpu(mgmt->dest_qpn) & OPA_16B_MGMT_QPN_MASK;
+}
+
+static inline u32 hfi1_16B_get_src_qpn(struct opa_16b_mgmt *mgmt)
+{
+ return be32_to_cpu(mgmt->src_qpn) & OPA_16B_MGMT_QPN_MASK;
+}
+
+static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt,
+ u32 dest_qp, u32 src_qp)
+{
+ mgmt->dest_qpn = cpu_to_be32(dest_qp & OPA_16B_MGMT_QPN_MASK);
+ mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK);
+}
+
struct rvt_sge_state;
/*
@@ -880,9 +911,9 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp,
#define RHF_RCV_REPROCESS 2 /* stop. retain this packet */
struct rcv_array_data {
- u8 group_size;
u16 ngroups;
u16 nctxt_extra;
+ u8 group_size;
};
struct per_vl_data {
@@ -1263,6 +1294,9 @@ struct hfi1_devdata {
/* Save the enabled LCB error bits */
u64 lcb_err_en;
+ struct cpu_mask_set *comp_vect;
+ int *comp_vect_mappings;
+ u32 comp_vect_possible_cpus;
/*
* Capability to have different send engines simply by changing a
@@ -1856,6 +1890,7 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_HAS_SDMA_TIMEOUT 0x8
#define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */
#define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */
+#define HFI1_SHUTDOWN 0x100 /* device is shutting down */
/* IB dword length mask in PBC (lower 11 bits); same for all chips */
#define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1)
@@ -2048,7 +2083,9 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
+#ifndef CONFIG_FAULT_INJECTION
| SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK
+#endif
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK
@@ -2061,7 +2098,11 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
| SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
if (ctxt_type == SC_USER)
- base_sc_integrity |= HFI1_PKT_USER_SC_INTEGRITY;
+ base_sc_integrity |=
+#ifndef CONFIG_FAULT_INJECTION
+ SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
+#endif
+ HFI1_PKT_USER_SC_INTEGRITY;
else
base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 6309edf811df6..f110842b91f56 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -113,8 +113,8 @@ module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO);
MODULE_PARM_DESC(rcvhdrcnt, "Receive header queue count (default 2048)");
static uint hfi1_hdrq_entsize = 32;
-module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, S_IRUGO);
-MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B (default), 32 - 128B");
+module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, 0444);
+MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B, 32 - 128B (default)");
unsigned int user_credit_return_threshold = 33; /* default is 33% */
module_param(user_credit_return_threshold, uint, S_IRUGO);
@@ -361,16 +361,14 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
}
INIT_LIST_HEAD(&rcd->qp_wait_list);
- hfi1_exp_tid_group_init(&rcd->tid_group_list);
- hfi1_exp_tid_group_init(&rcd->tid_used_list);
- hfi1_exp_tid_group_init(&rcd->tid_full_list);
+ hfi1_exp_tid_group_init(rcd);
rcd->ppd = ppd;
rcd->dd = dd;
__set_bit(0, rcd->in_use_ctxts);
rcd->numa_id = numa;
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
- mutex_init(&rcd->exp_lock);
+ mutex_init(&rcd->exp_mutex);
hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
@@ -1058,6 +1056,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
unsigned pidx;
int i;
+ if (dd->flags & HFI1_SHUTDOWN)
+ return;
+ dd->flags |= HFI1_SHUTDOWN;
+
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -1240,6 +1242,8 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd)
dd->rcv_limit = NULL;
dd->send_schedule = NULL;
dd->tx_opstats = NULL;
+ kfree(dd->comp_vect);
+ dd->comp_vect = NULL;
sdma_clean(dd, dd->num_sdma);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
@@ -1296,6 +1300,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
dd->unit = ret;
list_add(&dd->list, &hfi1_dev_list);
}
+ dd->node = -1;
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
idr_preload_end();
@@ -1348,6 +1353,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
goto bail;
}
+ dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL);
+ if (!dd->comp_vect) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
kobject_init(&dd->kobj, &hfi1_devdata_type);
return dd;
@@ -1391,6 +1402,7 @@ void hfi1_disable_after_error(struct hfi1_devdata *dd)
static void remove_one(struct pci_dev *);
static int init_one(struct pci_dev *, const struct pci_device_id *);
+static void shutdown_one(struct pci_dev *);
#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: "
#define PFX DRIVER_NAME ": "
@@ -1407,6 +1419,7 @@ static struct pci_driver hfi1_pci_driver = {
.name = DRIVER_NAME,
.probe = init_one,
.remove = remove_one,
+ .shutdown = shutdown_one,
.id_table = hfi1_pci_tbl,
.err_handler = &hfi1_pci_err_handler,
};
@@ -1515,7 +1528,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
- node_affinity_destroy();
+ node_affinity_destroy_all();
hfi1_wss_exit();
hfi1_dbg_exit();
@@ -1599,6 +1612,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
static void postinit_cleanup(struct hfi1_devdata *dd)
{
hfi1_start_cleanup(dd);
+ hfi1_comp_vectors_clean_up(dd);
+ hfi1_dev_affinity_clean_up(dd);
hfi1_pcie_ddcleanup(dd);
hfi1_pcie_cleanup(dd->pcidev);
@@ -1816,6 +1831,13 @@ static void remove_one(struct pci_dev *pdev)
postinit_cleanup(dd);
}
+static void shutdown_one(struct pci_dev *pdev)
+{
+ struct hfi1_devdata *dd = pci_get_drvdata(pdev);
+
+ shutdown_device(dd);
+}
+
/**
* hfi1_create_rcvhdrq - create a receive header queue
* @dd: the hfi1_ib device
@@ -1831,7 +1853,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
u64 reg;
if (!rcd->rcvhdrq) {
- dma_addr_t dma_hdrqtail;
gfp_t gfp_flags;
/*
@@ -1856,13 +1877,13 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
goto bail;
}
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+ if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
+ HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) {
rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
- gfp_flags);
+ &dd->pcidev->dev, PAGE_SIZE,
+ &rcd->rcvhdrqtailaddr_dma, gfp_flags);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
- rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}
rcd->rcvhdrq_size = amt;
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index e9962c65c68f9..0307405491e01 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1238,7 +1238,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
}
static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
- u32 logical_state, u32 phys_state)
+ u32 logical_state, u32 phys_state, int local_mad)
{
struct hfi1_devdata *dd = ppd->dd;
u32 link_state;
@@ -1314,7 +1314,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
* Don't send a reply if the response would be sent
* through the disabled port.
*/
- if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
+ if (link_state == HLS_DN_DISABLE && !local_mad)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
break;
case IB_PORT_ARMED:
@@ -1350,7 +1350,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
*/
static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len, u32 max_len)
+ u32 *resp_len, u32 max_len, int local_mad)
{
struct opa_port_info *pi = (struct opa_port_info *)data;
struct ib_event event;
@@ -1634,7 +1634,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
*/
if (!invalid) {
- ret = set_port_states(ppd, smp, ls_new, ps_new);
+ ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
if (ret)
return ret;
}
@@ -2085,7 +2085,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len, u32 max_len)
+ u32 *resp_len, u32 max_len, int local_mad)
{
u32 nports = OPA_AM_NPORT(am);
u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -2122,7 +2122,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
}
if (!invalid) {
- ret = set_port_states(ppd, smp, ls_new, ps_new);
+ ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
if (ret)
return ret;
}
@@ -3424,6 +3424,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
+ rsp->port_number = port;
/* PortRcvErrorInfo */
rsp->port_rcv_ei.status_and_code =
@@ -4190,7 +4191,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev, u8 port,
- u32 *resp_len, u32 max_len)
+ u32 *resp_len, u32 max_len, int local_mad)
{
int ret;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -4198,7 +4199,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
switch (attr_id) {
case IB_SMP_ATTR_PORT_INFO:
ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
- resp_len, max_len);
+ resp_len, max_len, local_mad);
break;
case IB_SMP_ATTR_PKEY_TABLE:
ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
@@ -4222,7 +4223,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
- resp_len, max_len);
+ resp_len, max_len, local_mad);
break;
case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
@@ -4314,7 +4315,7 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
static int subn_set_opa_aggregate(struct opa_smp *smp,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, int local_mad)
{
int i;
u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
@@ -4344,7 +4345,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp,
}
(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
- ibdev, port, NULL, (u32)agg_data_len);
+ ibdev, port, NULL, (u32)agg_data_len,
+ local_mad);
+
if (smp->status & IB_SMP_INVALID_FIELD)
break;
if (smp->status & ~IB_SMP_DIRECTION) {
@@ -4519,7 +4522,7 @@ static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp,
static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
u8 port, const struct opa_mad *in_mad,
struct opa_mad *out_mad,
- u32 *resp_len)
+ u32 *resp_len, int local_mad)
{
struct opa_smp *smp = (struct opa_smp *)out_mad;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -4588,11 +4591,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
default:
ret = subn_set_opa_sma(attr_id, smp, am, data,
ibdev, port, resp_len,
- data_size);
+ data_size, local_mad);
break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_set_opa_aggregate(smp, ibdev, port,
- resp_len);
+ resp_len, local_mad);
break;
}
break;
@@ -4832,6 +4835,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
{
int ret;
int pkey_idx;
+ int local_mad = 0;
u32 resp_len = 0;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -4846,13 +4850,14 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
switch (in_mad->mad_hdr.mgmt_class) {
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
case IB_MGMT_CLASS_SUBN_LID_ROUTED:
- if (is_local_mad(ibp, in_mad, in_wc)) {
+ local_mad = is_local_mad(ibp, in_mad, in_wc);
+ if (local_mad) {
ret = opa_local_smp_check(ibp, in_wc);
if (ret)
return IB_MAD_RESULT_FAILURE;
}
ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
- out_mad, &resp_len);
+ out_mad, &resp_len, local_mad);
goto bail;
case IB_MGMT_CLASS_PERF_MGMT:
ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc);
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index c1c982908b4bb..4d4371bf2c7c8 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -56,11 +56,6 @@
#include "chip_registers.h"
#include "aspm.h"
-/* link speed vector for Gen3 speed - not in Linux headers */
-#define GEN1_SPEED_VECTOR 0x1
-#define GEN2_SPEED_VECTOR 0x2
-#define GEN3_SPEED_VECTOR 0x3
-
/*
* This file contains PCIe utility routines.
*/
@@ -183,6 +178,14 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
return -ENOMEM;
}
dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
+
+ /* verify that reads actually work, save revision for reset check */
+ dd->revision = readq(dd->kregbase1 + CCE_REVISION);
+ if (dd->revision == ~(u64)0) {
+ dd_dev_err(dd, "Cannot read chip CSRs\n");
+ goto nomem;
+ }
+
dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8;
@@ -262,7 +265,7 @@ static u32 extract_speed(u16 linkstat)
case PCI_EXP_LNKSTA_CLS_5_0GB:
speed = 5000; /* Gen 2, 5GHz */
break;
- case GEN3_SPEED_VECTOR:
+ case PCI_EXP_LNKSTA_CLS_8_0GB:
speed = 8000; /* Gen 3, 8GHz */
break;
}
@@ -317,7 +320,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
return ret;
}
- if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
+ if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) {
dd_dev_info(dd,
"This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
linkcap & PCI_EXP_LNKCAP_SLS);
@@ -694,9 +697,6 @@ const struct pci_error_handlers hfi1_pci_err_handler = {
/* gasket block secondary bus reset delay */
#define SBR_DELAY_US 200000 /* 200ms */
-/* mask for PCIe capability register lnkctl2 target link speed */
-#define LNKCTL2_TARGET_LINK_SPEED_MASK 0xf
-
static uint pcie_target = 3;
module_param(pcie_target, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_target, "PCIe target speed (0 skip, 1-3 Gen1-3)");
@@ -1045,13 +1045,13 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
return 0;
if (pcie_target == 1) { /* target Gen1 */
- target_vector = GEN1_SPEED_VECTOR;
+ target_vector = PCI_EXP_LNKCTL2_TLS_2_5GT;
target_speed = 2500;
} else if (pcie_target == 2) { /* target Gen2 */
- target_vector = GEN2_SPEED_VECTOR;
+ target_vector = PCI_EXP_LNKCTL2_TLS_5_0GT;
target_speed = 5000;
} else if (pcie_target == 3) { /* target Gen3 */
- target_vector = GEN3_SPEED_VECTOR;
+ target_vector = PCI_EXP_LNKCTL2_TLS_8_0GT;
target_speed = 8000;
} else {
/* off or invalid target - skip */
@@ -1290,8 +1290,8 @@ retry:
dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
(u32)lnkctl2);
/* only write to parent if target is not as high as ours */
- if ((lnkctl2 & LNKCTL2_TARGET_LINK_SPEED_MASK) < target_vector) {
- lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
+ if ((lnkctl2 & PCI_EXP_LNKCTL2_TLS) < target_vector) {
+ lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
lnkctl2 |= target_vector;
dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
(u32)lnkctl2);
@@ -1316,7 +1316,7 @@ retry:
dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
(u32)lnkctl2);
- lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
+ lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
lnkctl2 |= target_vector;
dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
(u32)lnkctl2);
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 40dac4d16eb89..9cac15d10c4ff 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -50,8 +50,6 @@
#include "qp.h"
#include "trace.h"
-#define SC_CTXT_PACKET_EGRESS_TIMEOUT 350 /* in chip cycles */
-
#define SC(name) SEND_CTXT_##name
/*
* Send Context functions
@@ -961,15 +959,40 @@ void sc_disable(struct send_context *sc)
}
/* return SendEgressCtxtStatus.PacketOccupancy */
-#define packet_occupancy(r) \
- (((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)\
- >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT)
+static u64 packet_occupancy(u64 reg)
+{
+ return (reg &
+ SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)
+ >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT;
+}
/* is egress halted on the context? */
-#define egress_halted(r) \
- ((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK)
+static bool egress_halted(u64 reg)
+{
+ return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK);
+}
-/* wait for packet egress, optionally pause for credit return */
+/* is the send context halted? */
+static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context)
+{
+ return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) &
+ SC(STATUS_CTXT_HALTED_SMASK));
+}
+
+/**
+ * sc_wait_for_packet_egress
+ * @sc: valid send context
+ * @pause: wait for credit return
+ *
+ * Wait for packet egress, optionally pause for credit return
+ *
+ * Egress halt and Context halt are not necessarily the same thing, so
+ * check for both.
+ *
+ * NOTE: The context halt bit may not be set immediately. Because of this,
+ * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW
+ * context bit to determine if the context is halted.
+ */
static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
{
struct hfi1_devdata *dd = sc->dd;
@@ -981,8 +1004,9 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
reg_prev = reg;
reg = read_csr(dd, sc->hw_context * 8 +
SEND_EGRESS_CTXT_STATUS);
- /* done if egress is stopped */
- if (egress_halted(reg))
+ /* done if any halt bits, SW or HW are set */
+ if (sc->flags & SCF_HALTED ||
+ is_sc_halted(dd, sc->hw_context) || egress_halted(reg))
break;
reg = packet_occupancy(reg);
if (reg == 0)
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index da58046a02eaa..1a1a47ac53c6f 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -2012,7 +2012,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
unsigned long nsec = 1024 * ccti_timer;
hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec),
- HRTIMER_MODE_REL);
+ HRTIMER_MODE_REL_PINNED);
}
spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
@@ -2123,7 +2123,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2149,7 +2149,7 @@ send_middle:
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
/* consume RWQE */
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2159,7 +2159,7 @@ send_middle:
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
case OP(SEND_ONLY_WITH_INVALIDATE):
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2271,7 +2271,7 @@ send_last:
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
goto no_immediate_data;
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret) {
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index c0071ca4147ae..ef4c566e206f9 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -53,156 +53,6 @@
#include "verbs_txreq.h"
#include "trace.h"
-/*
- * Validate a RWQE and fill in the SGE state.
- * Return 1 if OK.
- */
-static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
-{
- int i, j, ret;
- struct ib_wc wc;
- struct rvt_lkey_table *rkt;
- struct rvt_pd *pd;
- struct rvt_sge_state *ss;
-
- rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
- pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
- ss = &qp->r_sge;
- ss->sg_list = qp->r_sg_list;
- qp->r_len = 0;
- for (i = j = 0; i < wqe->num_sge; i++) {
- if (wqe->sg_list[i].length == 0)
- continue;
- /* Check LKEY */
- ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- NULL, &wqe->sg_list[i],
- IB_ACCESS_LOCAL_WRITE);
- if (unlikely(ret <= 0))
- goto bad_lkey;
- qp->r_len += wqe->sg_list[i].length;
- j++;
- }
- ss->num_sge = j;
- ss->total_len = qp->r_len;
- ret = 1;
- goto bail;
-
-bad_lkey:
- while (j) {
- struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
-
- rvt_put_mr(sge->mr);
- }
- ss->num_sge = 0;
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr_id;
- wc.status = IB_WC_LOC_PROT_ERR;
- wc.opcode = IB_WC_RECV;
- wc.qp = &qp->ibqp;
- /* Signal solicited completion event. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
- ret = 0;
-bail:
- return ret;
-}
-
-/**
- * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
- * @qp: the QP
- * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
- *
- * Return -1 if there is a local error, 0 if no RWQE is available,
- * otherwise return 1.
- *
- * Can be called from interrupt level.
- */
-int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
-{
- unsigned long flags;
- struct rvt_rq *rq;
- struct rvt_rwq *wq;
- struct rvt_srq *srq;
- struct rvt_rwqe *wqe;
- void (*handler)(struct ib_event *, void *);
- u32 tail;
- int ret;
-
- if (qp->ibqp.srq) {
- srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
- handler = srq->ibsrq.event_handler;
- rq = &srq->rq;
- } else {
- srq = NULL;
- handler = NULL;
- rq = &qp->r_rq;
- }
-
- spin_lock_irqsave(&rq->lock, flags);
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
- ret = 0;
- goto unlock;
- }
-
- wq = rq->wq;
- tail = wq->tail;
- /* Validate tail before using it since it is user writable. */
- if (tail >= rq->size)
- tail = 0;
- if (unlikely(tail == wq->head)) {
- ret = 0;
- goto unlock;
- }
- /* Make sure entry is read after head index is read. */
- smp_rmb();
- wqe = rvt_get_rwqe_ptr(rq, tail);
- /*
- * Even though we update the tail index in memory, the verbs
- * consumer is not supposed to post more entries until a
- * completion is generated.
- */
- if (++tail >= rq->size)
- tail = 0;
- wq->tail = tail;
- if (!wr_id_only && !init_sge(qp, wqe)) {
- ret = -1;
- goto unlock;
- }
- qp->r_wr_id = wqe->wr_id;
-
- ret = 1;
- set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
- if (handler) {
- u32 n;
-
- /*
- * Validate head pointer value and compute
- * the number of remaining WQEs.
- */
- n = wq->head;
- if (n >= rq->size)
- n = 0;
- if (n < tail)
- n += rq->size - tail;
- else
- n -= tail;
- if (n < srq->limit) {
- struct ib_event ev;
-
- srq->limit = 0;
- spin_unlock_irqrestore(&rq->lock, flags);
- ev.device = qp->ibqp.device;
- ev.element.srq = qp->ibqp.srq;
- ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- handler(&ev, srq->ibsrq.srq_context);
- goto bail;
- }
- }
-unlock:
- spin_unlock_irqrestore(&rq->lock, flags);
-bail:
- return ret;
-}
-
static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
{
return (gid->global.interface_id == id &&
@@ -423,7 +273,7 @@ again:
/* FALLTHROUGH */
case IB_WR_SEND:
send:
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
@@ -435,7 +285,7 @@ send:
goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto op_err;
if (!ret)
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 1f203309cf24b..298e0e3fc0c9f 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -923,9 +923,10 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
cpumask_var_t mask, new_mask;
unsigned long cpu;
int ret, vl, sz;
+ struct sdma_rht_node *rht_node;
vl = sdma_engine_get_vl(sde);
- if (unlikely(vl < 0))
+ if (unlikely(vl < 0 || vl >= ARRAY_SIZE(rht_node->map)))
return -EINVAL;
ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
@@ -953,19 +954,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
mutex_lock(&process_to_sde_mutex);
for_each_cpu(cpu, mask) {
- struct sdma_rht_node *rht_node;
-
/* Check if we have this already mapped */
if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
cpumask_set_cpu(cpu, new_mask);
continue;
}
- if (vl >= ARRAY_SIZE(rht_node->map)) {
- ret = -EINVAL;
- goto out;
- }
-
rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 89bd9851065b8..7c8aed0ffc07e 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -63,13 +63,20 @@ static u8 __get_ib_hdr_len(struct ib_header *hdr)
static u8 __get_16b_hdr_len(struct hfi1_16b_header *hdr)
{
- struct ib_other_headers *ohdr;
+ struct ib_other_headers *ohdr = NULL;
u8 opcode;
+ u8 l4 = hfi1_16B_get_l4(hdr);
+
+ if (l4 == OPA_16B_L4_FM) {
+ opcode = IB_OPCODE_UD_SEND_ONLY;
+ return (8 + 8); /* No BTH */
+ }
- if (hfi1_16B_get_l4(hdr) == OPA_16B_L4_IB_LOCAL)
+ if (l4 == OPA_16B_L4_IB_LOCAL)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
+
opcode = ib_bth_get_opcode(ohdr);
return hdr_len_by_opcode[opcode] == 0 ?
0 : hdr_len_by_opcode[opcode] - (12 + 8 + 8);
@@ -234,17 +241,24 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass,
#define BTH_16B_PRN \
"op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \
"qpn:0x%.6x a:%d psn:0x%.8x"
-const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass,
- u8 ack, bool becn, bool fecn, u8 mig,
- u8 se, u8 pad, u8 opcode, const char *opname,
- u8 tver, u16 pkey, u32 psn, u32 qpn)
+#define L4_FM_16B_PRN \
+ "op:0x%.2x,%s dest_qpn:0x%.6x src_qpn:0x%.6x"
+const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4,
+ u8 ack, bool becn, bool fecn, u8 mig,
+ u8 se, u8 pad, u8 opcode, const char *opname,
+ u8 tver, u16 pkey, u32 psn, u32 qpn,
+ u32 dest_qpn, u32 src_qpn)
{
const char *ret = trace_seq_buffer_ptr(p);
if (bypass)
- trace_seq_printf(p, BTH_16B_PRN,
- opcode, opname,
- se, mig, pad, tver, qpn, ack, psn);
+ if (l4 == OPA_16B_L4_FM)
+ trace_seq_printf(p, L4_FM_16B_PRN,
+ opcode, opname, dest_qpn, src_qpn);
+ else
+ trace_seq_printf(p, BTH_16B_PRN,
+ opcode, opname,
+ se, mig, pad, tver, qpn, ack, psn);
else
trace_seq_printf(p, BTH_9B_PRN,
@@ -258,12 +272,17 @@ const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass,
const char *parse_everbs_hdrs(
struct trace_seq *p,
- u8 opcode,
+ u8 opcode, u8 l4, u32 dest_qpn, u32 src_qpn,
void *ehdrs)
{
union ib_ehdrs *eh = ehdrs;
const char *ret = trace_seq_buffer_ptr(p);
+ if (l4 == OPA_16B_L4_FM) {
+ trace_seq_printf(p, "mgmt pkt");
+ goto out;
+ }
+
switch (opcode) {
/* imm */
case OP(RC, SEND_LAST_WITH_IMMEDIATE):
@@ -334,6 +353,7 @@ const char *parse_everbs_hdrs(
be32_to_cpu(eh->ieth));
break;
}
+out:
trace_seq_putc(p, 0);
return ret;
}
@@ -374,6 +394,7 @@ const char *print_u32_array(
return ret;
}
+__hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);
__hfi1_trace_fn(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
index 0e7d929530c5b..e62171fb7379f 100644
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -1,5 +1,5 @@
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -113,6 +113,7 @@ void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \
* hfi1_cdbg(LVL, fmt, ...); as well as take care of all
* the debugfs stuff.
*/
+__hfi1_trace_def(AFFINITY);
__hfi1_trace_def(PKT);
__hfi1_trace_def(PROC);
__hfi1_trace_def(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 2847626d3819e..1dc2c28fc96e7 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -96,7 +96,9 @@ __print_symbolic(opcode, \
ib_opcode_name(CNP))
u8 ibhdr_exhdr_len(struct ib_header *hdr);
-const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
+const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode,
+ u8 l4, u32 dest_qpn, u32 src_qpn,
+ void *ehdrs);
u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah);
u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet);
const char *hfi1_trace_get_packet_l4_str(u8 l4);
@@ -123,14 +125,16 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass,
u8 rc, u8 sc, u8 sl, u16 entropy,
u16 len, u16 pkey, u32 dlid, u32 slid);
-const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass,
- u8 ack, bool becn, bool fecn, u8 mig,
- u8 se, u8 pad, u8 opcode, const char *opname,
- u8 tver, u16 pkey, u32 psn, u32 qpn);
+const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4,
+ u8 ack, bool becn, bool fecn, u8 mig,
+ u8 se, u8 pad, u8 opcode, const char *opname,
+ u8 tver, u16 pkey, u32 psn, u32 qpn,
+ u32 dest_qpn, u32 src_qpn);
const char *hfi1_trace_get_packet_l2_str(u8 l2);
-#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
+#define __parse_ib_ehdrs(op, l4, dest_qpn, src_qpn, ehdrs) \
+ parse_everbs_hdrs(p, op, l4, dest_qpn, src_qpn, ehdrs)
#define lrh_name(lrh) { HFI1_##lrh, #lrh }
#define show_lnh(lrh) \
@@ -169,6 +173,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
__field(u32, psn)
__field(u32, qpn)
__field(u32, slid)
+ __field(u32, dest_qpn)
+ __field(u32, src_qpn)
/* extended headers */
__dynamic_array(u8, ehdrs,
hfi1_trace_packet_hdr_len(packet))
@@ -178,6 +184,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
__entry->etype = packet->etype;
__entry->l2 = hfi1_16B_get_l2(packet->hdr);
+ __entry->dest_qpn = 0;
+ __entry->src_qpn = 0;
if (__entry->etype == RHF_RCV_TYPE_BYPASS) {
hfi1_trace_parse_16b_hdr(packet->hdr,
&__entry->age,
@@ -192,16 +200,23 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
&__entry->dlid,
&__entry->slid);
- hfi1_trace_parse_16b_bth(packet->ohdr,
- &__entry->ack,
- &__entry->mig,
- &__entry->opcode,
- &__entry->pad,
- &__entry->se,
- &__entry->tver,
- &__entry->psn,
- &__entry->qpn);
+ if (__entry->l4 == OPA_16B_L4_FM) {
+ __entry->opcode = IB_OPCODE_UD_SEND_ONLY;
+ __entry->dest_qpn = hfi1_16B_get_dest_qpn(packet->mgmt);
+ __entry->src_qpn = hfi1_16B_get_src_qpn(packet->mgmt);
+ } else {
+ hfi1_trace_parse_16b_bth(packet->ohdr,
+ &__entry->ack,
+ &__entry->mig,
+ &__entry->opcode,
+ &__entry->pad,
+ &__entry->se,
+ &__entry->tver,
+ &__entry->psn,
+ &__entry->qpn);
+ }
} else {
+ __entry->l4 = OPA_16B_L4_9B;
hfi1_trace_parse_9b_hdr(packet->hdr, sc5,
&__entry->lnh,
&__entry->lver,
@@ -223,8 +238,9 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
&__entry->pkey,
&__entry->psn,
&__entry->qpn);
- }
- /* extended headers */
+ }
+ /* extended headers */
+ if (__entry->l4 != OPA_16B_L4_FM)
memcpy(__get_dynamic_array(ehdrs),
&packet->ohdr->u,
__get_dynamic_array_len(ehdrs));
@@ -253,25 +269,31 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
__entry->pkey,
__entry->dlid,
__entry->slid),
- hfi1_trace_fmt_bth(p,
- __entry->etype ==
+ hfi1_trace_fmt_rest(p,
+ __entry->etype ==
RHF_RCV_TYPE_BYPASS,
- __entry->ack,
- __entry->becn,
- __entry->fecn,
- __entry->mig,
- __entry->se,
- __entry->pad,
- __entry->opcode,
- show_ib_opcode(__entry->opcode),
- __entry->tver,
- __entry->pkey,
- __entry->psn,
- __entry->qpn),
+ __entry->l4,
+ __entry->ack,
+ __entry->becn,
+ __entry->fecn,
+ __entry->mig,
+ __entry->se,
+ __entry->pad,
+ __entry->opcode,
+ show_ib_opcode(__entry->opcode),
+ __entry->tver,
+ __entry->pkey,
+ __entry->psn,
+ __entry->qpn,
+ __entry->dest_qpn,
+ __entry->src_qpn),
/* extended headers */
__get_dynamic_array_len(ehdrs),
__parse_ib_ehdrs(
__entry->opcode,
+ __entry->l4,
+ __entry->dest_qpn,
+ __entry->src_qpn,
(void *)__get_dynamic_array(ehdrs))
)
);
@@ -310,6 +332,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
__field(u32, psn)
__field(u32, qpn)
__field(u32, slid)
+ __field(u32, dest_qpn)
+ __field(u32, src_qpn)
/* extended headers */
__dynamic_array(u8, ehdrs,
hfi1_trace_opa_hdr_len(opah))
@@ -320,6 +344,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
DD_DEV_ASSIGN(dd);
__entry->hdr_type = opah->hdr_type;
+ __entry->dest_qpn = 0;
+ __entry->src_qpn = 0;
if (__entry->hdr_type) {
hfi1_trace_parse_16b_hdr(&opah->opah,
&__entry->age,
@@ -334,19 +360,26 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
&__entry->dlid,
&__entry->slid);
- if (__entry->l4 == OPA_16B_L4_IB_LOCAL)
- ohdr = &opah->opah.u.oth;
- else
- ohdr = &opah->opah.u.l.oth;
- hfi1_trace_parse_16b_bth(ohdr,
- &__entry->ack,
- &__entry->mig,
- &__entry->opcode,
- &__entry->pad,
- &__entry->se,
- &__entry->tver,
- &__entry->psn,
- &__entry->qpn);
+ if (__entry->l4 == OPA_16B_L4_FM) {
+ ohdr = NULL;
+ __entry->opcode = IB_OPCODE_UD_SEND_ONLY;
+ __entry->dest_qpn = hfi1_16B_get_dest_qpn(&opah->opah.u.mgmt);
+ __entry->src_qpn = hfi1_16B_get_src_qpn(&opah->opah.u.mgmt);
+ } else {
+ if (__entry->l4 == OPA_16B_L4_IB_LOCAL)
+ ohdr = &opah->opah.u.oth;
+ else
+ ohdr = &opah->opah.u.l.oth;
+ hfi1_trace_parse_16b_bth(ohdr,
+ &__entry->ack,
+ &__entry->mig,
+ &__entry->opcode,
+ &__entry->pad,
+ &__entry->se,
+ &__entry->tver,
+ &__entry->psn,
+ &__entry->qpn);
+ }
} else {
__entry->l4 = OPA_16B_L4_9B;
hfi1_trace_parse_9b_hdr(&opah->ibh, sc5,
@@ -376,8 +409,9 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
}
/* extended headers */
- memcpy(__get_dynamic_array(ehdrs),
- &ohdr->u, __get_dynamic_array_len(ehdrs));
+ if (__entry->l4 != OPA_16B_L4_FM)
+ memcpy(__get_dynamic_array(ehdrs),
+ &ohdr->u, __get_dynamic_array_len(ehdrs));
),
TP_printk("[%s] (%s) %s %s hlen:%d %s",
__get_str(dev),
@@ -399,24 +433,30 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
__entry->pkey,
__entry->dlid,
__entry->slid),
- hfi1_trace_fmt_bth(p,
- !!__entry->hdr_type,
- __entry->ack,
- __entry->becn,
- __entry->fecn,
- __entry->mig,
- __entry->se,
- __entry->pad,
- __entry->opcode,
- show_ib_opcode(__entry->opcode),
- __entry->tver,
- __entry->pkey,
- __entry->psn,
- __entry->qpn),
+ hfi1_trace_fmt_rest(p,
+ !!__entry->hdr_type,
+ __entry->l4,
+ __entry->ack,
+ __entry->becn,
+ __entry->fecn,
+ __entry->mig,
+ __entry->se,
+ __entry->pad,
+ __entry->opcode,
+ show_ib_opcode(__entry->opcode),
+ __entry->tver,
+ __entry->pkey,
+ __entry->psn,
+ __entry->qpn,
+ __entry->dest_qpn,
+ __entry->src_qpn),
/* extended headers */
__get_dynamic_array_len(ehdrs),
__parse_ib_ehdrs(
__entry->opcode,
+ __entry->l4,
+ __entry->dest_qpn,
+ __entry->src_qpn,
(void *)__get_dynamic_array(ehdrs))
)
);
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 9d7a3110c14c6..b7b671017e594 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -397,7 +397,7 @@ send_first:
if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
qp->r_sge = qp->s_rdma_read_sge;
} else {
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
@@ -542,7 +542,7 @@ rdma_last_imm:
if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
rvt_put_ss(&qp->s_rdma_read_sge);
} else {
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto op_err;
if (!ret)
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 69c17a5ef0387..1ab332f1866e8 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -163,7 +163,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
} else {
int ret;
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0) {
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
@@ -399,16 +399,30 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
u32 dlid, slid, nwords, extra_bytes;
+ u32 dest_qp = wqe->ud_wr.remote_qpn;
+ u32 src_qp = qp->ibqp.qp_num;
u16 len, pkey;
u8 l4, sc5;
+ bool is_mgmt = false;
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
- /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */
- ps->s_txreq->hdr_dwords = 9;
- if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
- ps->s_txreq->hdr_dwords++;
+
+ /*
+ * Build 16B Management Packet if either the destination
+ * or source queue pair number is 0 or 1.
+ */
+ if (dest_qp == 0 || src_qp == 0 || dest_qp == 1 || src_qp == 1) {
+ /* header size in dwords 16B LRH+L4_FM = (16+8)/4. */
+ ps->s_txreq->hdr_dwords = 6;
+ is_mgmt = true;
+ } else {
+ /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */
+ ps->s_txreq->hdr_dwords = 9;
+ if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
+ ps->s_txreq->hdr_dwords++;
+ }
/* SW provides space for CRC and LT for bypass packets. */
extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2),
@@ -453,7 +467,14 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
- hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true);
+ if (is_mgmt) {
+ l4 = OPA_16B_L4_FM;
+ pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
+ hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt,
+ dest_qp, src_qp);
+ } else {
+ hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true);
+ }
/* Convert dwords to flits */
len = (ps->s_txreq->hdr_dwords + nwords) >> 1;
@@ -845,10 +866,8 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
*/
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
- struct ib_other_headers *ohdr = packet->ohdr;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
- u32 qkey;
u32 src_qp;
u16 pkey;
int mgmt_pkey_idx = -1;
@@ -864,27 +883,35 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 dlid = packet->dlid;
u32 slid = packet->slid;
u8 extra_bytes;
+ u8 l4 = 0;
bool dlid_is_permissive;
bool slid_is_permissive;
+ bool solicited = false;
extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2);
- qkey = ib_get_qkey(ohdr);
- src_qp = ib_get_sqpn(ohdr);
if (packet->etype == RHF_RCV_TYPE_BYPASS) {
u32 permissive_lid =
opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B);
+ l4 = hfi1_16B_get_l4(packet->hdr);
pkey = hfi1_16B_get_pkey(packet->hdr);
dlid_is_permissive = (dlid == permissive_lid);
slid_is_permissive = (slid == permissive_lid);
} else {
- pkey = ib_bth_get_pkey(ohdr);
+ pkey = ib_bth_get_pkey(packet->ohdr);
dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE));
slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE));
}
sl_from_sc = ibp->sc_to_sl[sc5];
+ if (likely(l4 != OPA_16B_L4_FM)) {
+ src_qp = ib_get_sqpn(packet->ohdr);
+ solicited = ib_bth_is_solicited(packet->ohdr);
+ } else {
+ src_qp = hfi1_16B_get_src_qpn(packet->mgmt);
+ }
+
process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/*
* Get the number of bytes the message was padded by
@@ -922,8 +949,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (mgmt_pkey_idx < 0)
goto drop;
}
- if (unlikely(qkey != qp->qkey)) /* Silent drop */
- return;
+ if (unlikely(l4 != OPA_16B_L4_FM &&
+ ib_get_qkey(packet->ohdr) != qp->qkey))
+ return; /* Silent drop */
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
@@ -950,7 +978,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (qp->ibqp.qp_num > 1 &&
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
- wc.ex.imm_data = ohdr->u.ud.imm_data;
+ wc.ex.imm_data = packet->ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
tlen -= sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
@@ -974,7 +1002,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
} else {
int ret;
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0) {
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
@@ -1047,8 +1075,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- ib_bth_is_solicited(ohdr));
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited);
return;
drop:
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 0d5330b7353d2..dbe7d14a5c76d 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -375,7 +375,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
* From this point on, we are going to be using shared (between master
* and subcontexts) context resources. We need to take the lock.
*/
- mutex_lock(&uctxt->exp_lock);
+ mutex_lock(&uctxt->exp_mutex);
/*
* The first step is to program the RcvArray entries which are complete
* groups.
@@ -437,7 +437,6 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
hfi1_cdbg(TID,
"Failed to program RcvArray entries %d",
ret);
- ret = -EFAULT;
goto unlock;
} else if (ret > 0) {
if (grp->used == grp->size)
@@ -462,7 +461,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
}
}
unlock:
- mutex_unlock(&uctxt->exp_lock);
+ mutex_unlock(&uctxt->exp_mutex);
nomem:
hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
mapped_pages, ret);
@@ -518,7 +517,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
if (IS_ERR(tidinfo))
return PTR_ERR(tidinfo);
- mutex_lock(&uctxt->exp_lock);
+ mutex_lock(&uctxt->exp_mutex);
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
if (ret) {
@@ -531,7 +530,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
fd->tid_used -= tididx;
spin_unlock(&fd->tid_lock);
tinfo->tidcnt = tididx;
- mutex_unlock(&uctxt->exp_lock);
+ mutex_unlock(&uctxt->exp_mutex);
kfree(tidinfo);
return ret;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index a3d1924243448..d2bc77f75253f 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_USER_SDMA_H
#define _HFI1_USER_SDMA_H
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -122,8 +122,6 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
(req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \
##__VA_ARGS__)
-extern uint extended_psn;
-
struct hfi1_user_sdma_pkt_q {
u16 ctxt;
u16 subctxt;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index c8cf4d4984d34..08991874c0e2c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -63,6 +63,8 @@
#include "verbs_txreq.h"
#include "debugfs.h"
#include "vnic.h"
+#include "fault.h"
+#include "affinity.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -615,7 +617,12 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
wake_up(&mcast->wait);
} else {
/* Get the destination QP number. */
- qp_num = ib_bth_get_qpn(packet->ohdr);
+ if (packet->etype == RHF_RCV_TYPE_BYPASS &&
+ hfi1_16B_get_l4(packet->hdr) == OPA_16B_L4_FM)
+ qp_num = hfi1_16B_get_dest_qpn(packet->mgmt);
+ else
+ qp_num = ib_bth_get_qpn(packet->ohdr);
+
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp)
@@ -624,10 +631,6 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
if (hfi1_do_pkey_check(packet))
goto unlock_drop;
- if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode,
- true)))
- goto unlock_drop;
-
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(packet);
if (likely(packet_handler))
@@ -934,8 +937,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
- if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode,
- false)))
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd,
pbc,
@@ -1088,7 +1090,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
- if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, false)))
+
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
}
@@ -1310,21 +1313,23 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct ib_other_headers *ohdr;
+ struct ib_other_headers *ohdr = NULL;
send_routine sr;
int ret;
u16 pkey;
u32 slid;
+ u8 l4 = 0;
/* locate the pkey within the headers */
if (ps->s_txreq->phdr.hdr.hdr_type) {
struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah;
- u8 l4 = hfi1_16B_get_l4(hdr);
- if (l4 == OPA_16B_L4_IB_GLOBAL)
- ohdr = &hdr->u.l.oth;
- else
+ l4 = hfi1_16B_get_l4(hdr);
+ if (l4 == OPA_16B_L4_IB_LOCAL)
ohdr = &hdr->u.oth;
+ else if (l4 == OPA_16B_L4_IB_GLOBAL)
+ ohdr = &hdr->u.l.oth;
+
slid = hfi1_16B_get_slid(hdr);
pkey = hfi1_16B_get_pkey(hdr);
} else {
@@ -1339,7 +1344,11 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
pkey = ib_bth_get_pkey(ohdr);
}
- ps->opcode = ib_bth_get_opcode(ohdr);
+ if (likely(l4 != OPA_16B_L4_FM))
+ ps->opcode = ib_bth_get_opcode(ohdr);
+ else
+ ps->opcode = IB_OPCODE_UD_SEND_ONLY;
+
sr = get_send_routine(qp, ps);
ret = egress_pkey_check(dd->pport, slid, pkey,
priv->s_sc, qp->s_pkey_index);
@@ -1937,11 +1946,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
+ hfi1_comp_vect_mappings_lookup;
/* completeion queue */
- snprintf(dd->verbs_dev.rdi.dparms.cq_name,
- sizeof(dd->verbs_dev.rdi.dparms.cq_name),
- "hfi1_cq%d", dd->unit);
+ dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus;
dd->verbs_dev.rdi.dparms.node = dd->node;
/* misc settings */
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 2d787b8346cab..a4d06502f06df 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -110,6 +110,12 @@ enum {
#define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh))
#define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
+/* 24Bits for qpn, upper 8Bits reserved */
+struct opa_16b_mgmt {
+ __be32 dest_qpn;
+ __be32 src_qpn;
+};
+
struct hfi1_16b_header {
u32 lrh[4];
union {
@@ -118,6 +124,7 @@ struct hfi1_16b_header {
struct ib_other_headers oth;
} l;
struct ib_other_headers oth;
+ struct opa_16b_mgmt mgmt;
} u;
} __packed;
@@ -227,9 +234,7 @@ struct hfi1_ibdev {
/* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link;
#ifdef CONFIG_FAULT_INJECTION
- struct fault_opcode *fault_opcode;
- struct fault_packet *fault_packet;
- bool fault_suppress_err;
+ struct fault *fault;
#endif
#endif
};
@@ -330,8 +335,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet);
int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey);
-int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only);
-
void hfi1_migrate_qp(struct rvt_qp *qp);
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,