summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig59
-rw-r--r--lib/Kconfig.debug67
-rw-r--r--lib/Kconfig.ubsan7
-rw-r--r--lib/Makefile8
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/dma-debug.c65
-rw-r--r--lib/dma-direct.c32
-rw-r--r--lib/dma-noncoherent.c102
-rw-r--r--lib/errseq.c23
-rw-r--r--lib/find_bit_benchmark.c7
-rw-r--r--lib/iommu-common.c267
-rw-r--r--lib/iommu-helper.c14
-rw-r--r--lib/iov_iter.c65
-rw-r--r--lib/kobject.c11
-rw-r--r--lib/list_debug.c14
-rw-r--r--lib/lockref.c28
-rw-r--r--lib/logic_pio.c280
-rw-r--r--lib/radix-tree.c13
-rw-r--r--lib/raid6/.gitignore1
-rw-r--r--lib/raid6/Makefile27
-rw-r--r--lib/raid6/algos.c4
-rw-r--r--lib/raid6/altivec.uc3
-rw-r--r--lib/raid6/test/Makefile22
-rw-r--r--lib/raid6/vpermxor.uc105
-rw-r--r--lib/reed_solomon/decode_rs.c34
-rw-r--r--lib/reed_solomon/encode_rs.c15
-rw-r--r--lib/reed_solomon/reed_solomon.c240
-rw-r--r--lib/sbitmap.c113
-rw-r--r--lib/sha256.c283
-rw-r--r--lib/swiotlb.c19
-rw-r--r--lib/test_bitmap.c35
-rw-r--r--lib/test_firmware.c1
-rw-r--r--lib/test_kasan.c8
-rw-r--r--lib/test_ubsan.c144
-rw-r--r--lib/textsearch.c40
-rw-r--r--lib/vsprintf.c48
36 files changed, 1610 insertions, 596 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index e960894993710..7a913937888b6 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -55,6 +55,22 @@ config ARCH_USE_CMPXCHG_LOCKREF
config ARCH_HAS_FAST_MULTIPLIER
bool
+config INDIRECT_PIO
+ bool "Access I/O in non-MMIO mode"
+ depends on ARM64
+ help
+ On some platforms where no separate I/O space exists, there are I/O
+ hosts which can not be accessed in MMIO mode. Using the logical PIO
+ mechanism, the host-local I/O resource can be mapped into system
+ logic PIO space shared with MMIO hosts, such as PCI/PCIe, then the
+ system can access the I/O devices with the mapped-logic PIO through
+ I/O accessors.
+
+ This way has relatively little I/O performance cost. Please make
+ sure your devices really need this configure item enabled.
+
+ When in doubt, say N.
+
config CRC_CCITT
tristate "CRC-CCITT functions"
help
@@ -413,15 +429,50 @@ config SGL_ALLOC
bool
default n
+config NEED_SG_DMA_LENGTH
+ bool
+
+config NEED_DMA_MAP_STATE
+ bool
+
+config ARCH_DMA_ADDR_T_64BIT
+ def_bool 64BIT || PHYS_ADDR_T_64BIT
+
+config IOMMU_HELPER
+ bool
+
+config ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ bool
+
+config ARCH_HAS_SYNC_DMA_FOR_CPU
+ bool
+ select NEED_DMA_MAP_STATE
+
config DMA_DIRECT_OPS
bool
- depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
- default n
+ depends on HAS_DMA
+
+config DMA_NONCOHERENT_OPS
+ bool
+ depends on HAS_DMA
+ select DMA_DIRECT_OPS
+
+config DMA_NONCOHERENT_MMAP
+ bool
+ depends on DMA_NONCOHERENT_OPS
+
+config DMA_NONCOHERENT_CACHE_SYNC
+ bool
+ depends on DMA_NONCOHERENT_OPS
config DMA_VIRT_OPS
bool
- depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
- default n
+ depends on HAS_DMA
+
+config SWIOTLB
+ bool
+ select DMA_DIRECT_OPS
+ select NEED_DMA_MAP_STATE
config CHECK_SIGNATURE
bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 51c6bf0d93c61..76555479ae369 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -800,6 +800,30 @@ config SOFTLOCKUP_DETECTOR
chance to run. The current stack trace is displayed upon
detection and the system will stay locked up.
+config BOOTPARAM_SOFTLOCKUP_PANIC
+ bool "Panic (Reboot) On Soft Lockups"
+ depends on SOFTLOCKUP_DETECTOR
+ help
+ Say Y here to enable the kernel to panic on "soft lockups",
+ which are bugs that cause the kernel to loop in kernel
+ mode for more than 20 seconds (configurable using the watchdog_thresh
+ sysctl), without giving other tasks a chance to run.
+
+ The panic can be used in combination with panic_timeout,
+ to cause the system to reboot automatically after a
+ lockup has been detected. This feature is useful for
+ high-availability systems that have uptime guarantees and
+ where a lockup must be resolved ASAP.
+
+ Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+ int
+ depends on SOFTLOCKUP_DETECTOR
+ range 0 1
+ default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+ default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
config HARDLOCKUP_DETECTOR_PERF
bool
select SOFTLOCKUP_DETECTOR
@@ -849,30 +873,6 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
default 1 if BOOTPARAM_HARDLOCKUP_PANIC
-config BOOTPARAM_SOFTLOCKUP_PANIC
- bool "Panic (Reboot) On Soft Lockups"
- depends on SOFTLOCKUP_DETECTOR
- help
- Say Y here to enable the kernel to panic on "soft lockups",
- which are bugs that cause the kernel to loop in kernel
- mode for more than 20 seconds (configurable using the watchdog_thresh
- sysctl), without giving other tasks a chance to run.
-
- The panic can be used in combination with panic_timeout,
- to cause the system to reboot automatically after a
- lockup has been detected. This feature is useful for
- high-availability systems that have uptime guarantees and
- where a lockup must be resolved ASAP.
-
- Say N if unsure.
-
-config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
- int
- depends on SOFTLOCKUP_DETECTOR
- range 0 1
- default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
- default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
-
config DETECT_HUNG_TASK
bool "Detect Hung Tasks"
depends on DEBUG_KERNEL
@@ -1634,7 +1634,7 @@ config PROVIDE_OHCI1394_DMA_INIT
config DMA_API_DEBUG
bool "Enable debugging of DMA-API usage"
- depends on HAVE_DMA_API_DEBUG
+ select NEED_DMA_MAP_STATE
help
Enable this option to debug the use of the DMA API by device drivers.
With this option you will be able to detect common bugs in device
@@ -1651,6 +1651,23 @@ config DMA_API_DEBUG
If unsure, say N.
+config DMA_API_DEBUG_SG
+ bool "Debug DMA scatter-gather usage"
+ default y
+ depends on DMA_API_DEBUG
+ help
+ Perform extra checking that callers of dma_map_sg() have respected the
+ appropriate segment length/boundary limits for the given device when
+ preparing DMA scatterlists.
+
+ This is particularly likely to have been overlooked in cases where the
+ dma_map_sg() API is used for general bulk mapping of pages rather than
+ preparing literal scatter-gather descriptors, where there is a risk of
+ unexpected behaviour from DMA API implementations if the scatterlist
+ is technically out-of-spec.
+
+ If unsure, say N.
+
menuconfig RUNTIME_TESTING_MENU
bool "Runtime Testing"
def_bool y
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index a669c193b8785..19d42ea75ec22 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -46,3 +46,10 @@ config UBSAN_NULL
help
This option enables detection of memory accesses via a
null pointer.
+
+config TEST_UBSAN
+ tristate "Module for testing for undefined behavior detection"
+ depends on m && UBSAN
+ help
+ This is a test module for UBSAN.
+ It triggers various undefined behavior, and detect it.
diff --git a/lib/Makefile b/lib/Makefile
index 0bd50d71f423f..9f18c81522814 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -30,6 +30,7 @@ lib-$(CONFIG_PRINTK) += dump_stack.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
+lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o
lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
lib-y += kobject.o klist.o
@@ -53,6 +54,9 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o
+CFLAGS_test_kasan.o += -fno-builtin
+obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
+UBSAN_SANITIZE_test_ubsan.o := y
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
obj-$(CONFIG_TEST_LKM) += test_module.o
@@ -82,6 +86,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
+obj-y += logic_pio.o
+
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_BTREE) += btree.o
@@ -142,7 +148,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
-obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
+obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 9e498c77ed0e8..a42eff7e8c48b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -607,7 +607,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
/* if no digit is after '-', it's wrong*/
if (at_start && in_range)
return -EINVAL;
- if (!(a <= b) || !(used_size <= group_size))
+ if (!(a <= b) || group_size == 0 || !(used_size <= group_size))
return -EINVAL;
if (b >= nmaskbits)
return -ERANGE;
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 7f5cdc1e6b298..c007d25bee098 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -41,6 +41,11 @@
#define HASH_FN_SHIFT 13
#define HASH_FN_MASK (HASH_SIZE - 1)
+/* allow architectures to override this if absolutely required */
+#ifndef PREALLOC_DMA_DEBUG_ENTRIES
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+#endif
+
enum {
dma_debug_single,
dma_debug_page,
@@ -127,7 +132,7 @@ static u32 min_free_entries;
static u32 nr_total_entries;
/* number of preallocated entries requested by kernel cmdline */
-static u32 req_entries;
+static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
/* debugfs dentry's for the stuff above */
static struct dentry *dma_debug_dent __read_mostly;
@@ -439,7 +444,6 @@ void debug_dma_dump_mappings(struct device *dev)
spin_unlock_irqrestore(&bucket->lock, flags);
}
}
-EXPORT_SYMBOL(debug_dma_dump_mappings);
/*
* For each mapping (initial cacheline in the case of
@@ -748,7 +752,6 @@ int dma_debug_resize_entries(u32 num_entries)
return ret;
}
-EXPORT_SYMBOL(dma_debug_resize_entries);
/*
* DMA-API debugging init code
@@ -1004,10 +1007,7 @@ void dma_debug_add_bus(struct bus_type *bus)
bus_register_notifier(bus, nb);
}
-/*
- * Let the architectures decide how many entries should be preallocated.
- */
-void dma_debug_init(u32 num_entries)
+static int dma_debug_init(void)
{
int i;
@@ -1015,7 +1015,7 @@ void dma_debug_init(u32 num_entries)
* called to set dma_debug_initialized
*/
if (global_disable)
- return;
+ return 0;
for (i = 0; i < HASH_SIZE; ++i) {
INIT_LIST_HEAD(&dma_entry_hash[i].list);
@@ -1026,17 +1026,14 @@ void dma_debug_init(u32 num_entries)
pr_err("DMA-API: error creating debugfs entries - disabling\n");
global_disable = true;
- return;
+ return 0;
}
- if (req_entries)
- num_entries = req_entries;
-
- if (prealloc_memory(num_entries) != 0) {
+ if (prealloc_memory(nr_prealloc_entries) != 0) {
pr_err("DMA-API: debugging out of memory error - disabled\n");
global_disable = true;
- return;
+ return 0;
}
nr_total_entries = num_free_entries;
@@ -1044,7 +1041,9 @@ void dma_debug_init(u32 num_entries)
dma_debug_initialized = true;
pr_info("DMA-API: debugging enabled by kernel config\n");
+ return 0;
}
+core_initcall(dma_debug_init);
static __init int dma_debug_cmdline(char *str)
{
@@ -1061,16 +1060,10 @@ static __init int dma_debug_cmdline(char *str)
static __init int dma_debug_entries_cmdline(char *str)
{
- int res;
-
if (!str)
return -EINVAL;
-
- res = get_option(&str, &req_entries);
-
- if (!res)
- req_entries = 0;
-
+ if (!get_option(&str, &nr_prealloc_entries))
+ nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
return 0;
}
@@ -1293,6 +1286,32 @@ out:
put_hash_bucket(bucket, &flags);
}
+static void check_sg_segment(struct device *dev, struct scatterlist *sg)
+{
+#ifdef CONFIG_DMA_API_DEBUG_SG
+ unsigned int max_seg = dma_get_max_seg_size(dev);
+ u64 start, end, boundary = dma_get_seg_boundary(dev);
+
+ /*
+ * Either the driver forgot to set dma_parms appropriately, or
+ * whoever generated the list forgot to check them.
+ */
+ if (sg->length > max_seg)
+ err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
+ sg->length, max_seg);
+ /*
+ * In some cases this could potentially be the DMA API
+ * implementation's fault, but it would usually imply that
+ * the scatterlist was built inappropriately to begin with.
+ */
+ start = sg_dma_address(sg);
+ end = start + sg_dma_len(sg) - 1;
+ if ((start ^ end) & ~boundary)
+ err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
+ start, end, boundary);
+#endif
+}
+
void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
size_t size, int direction, dma_addr_t dma_addr,
bool map_single)
@@ -1423,6 +1442,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
}
+ check_sg_segment(dev, s);
+
add_dma_entry(entry);
}
}
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index c0bba30fef0ac..8be8106270c24 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -34,6 +34,13 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
const char *caller)
{
if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
+ if (!dev->dma_mask) {
+ dev_err(dev,
+ "%s: call on device without dma_mask\n",
+ caller);
+ return false;
+ }
+
if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
dev_err(dev,
"%s: overflow %pad+%zu of device mask %llx\n",
@@ -84,7 +91,15 @@ again:
__free_pages(page, page_order);
page = NULL;
- if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+ dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
+ !(gfp & (GFP_DMA32 | GFP_DMA))) {
+ gfp |= GFP_DMA32;
+ goto again;
+ }
+
+ if (IS_ENABLED(CONFIG_ZONE_DMA) &&
+ dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
!(gfp & GFP_DMA)) {
gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
goto again;
@@ -120,7 +135,7 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
free_pages((unsigned long)cpu_addr, page_order);
}
-static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
+dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
@@ -131,8 +146,8 @@ static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
return dma_addr;
}
-static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir, unsigned long attrs)
+int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+ enum dma_data_direction dir, unsigned long attrs)
{
int i;
struct scatterlist *sg;
@@ -164,10 +179,16 @@ int dma_direct_supported(struct device *dev, u64 mask)
if (mask < DMA_BIT_MASK(32))
return 0;
#endif
+ /*
+ * Various PCI/PCIe bridges have broken support for > 32bit DMA even
+ * if the device itself might support it.
+ */
+ if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
+ return 0;
return 1;
}
-static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
+int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr == DIRECT_MAPPING_ERROR;
}
@@ -179,6 +200,5 @@ const struct dma_map_ops dma_direct_ops = {
.map_sg = dma_direct_map_sg,
.dma_supported = dma_direct_supported,
.mapping_error = dma_direct_mapping_error,
- .is_phys = 1,
};
EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c
new file mode 100644
index 0000000000000..79e9a757387f6
--- /dev/null
+++ b/lib/dma-noncoherent.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Christoph Hellwig.
+ *
+ * DMA operations that map physical memory directly without providing cache
+ * coherence.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/scatterlist.h>
+
+static void dma_noncoherent_sync_single_for_device(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+ arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
+}
+
+static void dma_noncoherent_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
+}
+
+static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ dma_addr_t addr;
+
+ addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+ if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(dev, page_to_phys(page) + offset,
+ size, dir);
+ return addr;
+}
+
+static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+ nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
+ if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir);
+ return nents;
+}
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+ arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
+}
+
+static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+}
+
+static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir);
+}
+#endif
+
+const struct dma_map_ops dma_noncoherent_ops = {
+ .alloc = arch_dma_alloc,
+ .free = arch_dma_free,
+ .mmap = arch_dma_mmap,
+ .sync_single_for_device = dma_noncoherent_sync_single_for_device,
+ .sync_sg_for_device = dma_noncoherent_sync_sg_for_device,
+ .map_page = dma_noncoherent_map_page,
+ .map_sg = dma_noncoherent_map_sg,
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+ .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu,
+ .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu,
+ .unmap_page = dma_noncoherent_unmap_page,
+ .unmap_sg = dma_noncoherent_unmap_sg,
+#endif
+ .dma_supported = dma_direct_supported,
+ .mapping_error = dma_direct_mapping_error,
+ .cache_sync = arch_dma_cache_sync,
+};
+EXPORT_SYMBOL(dma_noncoherent_ops);
diff --git a/lib/errseq.c b/lib/errseq.c
index df782418b333e..81f9e33aa7e72 100644
--- a/lib/errseq.c
+++ b/lib/errseq.c
@@ -111,27 +111,22 @@ EXPORT_SYMBOL(errseq_set);
* errseq_sample() - Grab current errseq_t value.
* @eseq: Pointer to errseq_t to be sampled.
*
- * This function allows callers to sample an errseq_t value, marking it as
- * "seen" if required.
+ * This function allows callers to initialise their errseq_t variable.
+ * If the error has been "seen", new callers will not see an old error.
+ * If there is an unseen error in @eseq, the caller of this function will
+ * see it the next time it checks for an error.
*
+ * Context: Any context.
* Return: The current errseq value.
*/
errseq_t errseq_sample(errseq_t *eseq)
{
errseq_t old = READ_ONCE(*eseq);
- errseq_t new = old;
- /*
- * For the common case of no errors ever having been set, we can skip
- * marking the SEEN bit. Once an error has been set, the value will
- * never go back to zero.
- */
- if (old != 0) {
- new |= ERRSEQ_SEEN;
- if (old != new)
- cmpxchg(eseq, old, new);
- }
- return new;
+ /* If nobody has seen this error yet, then we can be the first. */
+ if (!(old & ERRSEQ_SEEN))
+ old = 0;
+ return old;
}
EXPORT_SYMBOL(errseq_sample);
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
index 5985a25e6cbcf..5367ffa5c18f9 100644
--- a/lib/find_bit_benchmark.c
+++ b/lib/find_bit_benchmark.c
@@ -132,7 +132,12 @@ static int __init find_bit_test(void)
test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
- test_find_first_bit(bitmap, BITMAP_LEN);
+
+ /*
+ * test_find_first_bit() may take some time, so
+ * traverse only part of bitmap to avoid soft lockup.
+ */
+ test_find_first_bit(bitmap, BITMAP_LEN / 10);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
pr_err("\nStart testing find_bit() with sparse bitmap\n");
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
deleted file mode 100644
index 55b00de106b51..0000000000000
--- a/lib/iommu-common.c
+++ /dev/null
@@ -1,267 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * IOMMU mmap management and range allocation functions.
- * Based almost entirely upon the powerpc iommu allocator.
- */
-
-#include <linux/export.h>
-#include <linux/bitmap.h>
-#include <linux/bug.h>
-#include <linux/iommu-helper.h>
-#include <linux/iommu-common.h>
-#include <linux/dma-mapping.h>
-#include <linux/hash.h>
-
-static unsigned long iommu_large_alloc = 15;
-
-static DEFINE_PER_CPU(unsigned int, iommu_hash_common);
-
-static inline bool need_flush(struct iommu_map_table *iommu)
-{
- return ((iommu->flags & IOMMU_NEED_FLUSH) != 0);
-}
-
-static inline void set_flush(struct iommu_map_table *iommu)
-{
- iommu->flags |= IOMMU_NEED_FLUSH;
-}
-
-static inline void clear_flush(struct iommu_map_table *iommu)
-{
- iommu->flags &= ~IOMMU_NEED_FLUSH;
-}
-
-static void setup_iommu_pool_hash(void)
-{
- unsigned int i;
- static bool do_once;
-
- if (do_once)
- return;
- do_once = true;
- for_each_possible_cpu(i)
- per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
-}
-
-/*
- * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
- * is the number of table entries. If `large_pool' is set to true,
- * the top 1/4 of the table will be set aside for pool allocations
- * of more than iommu_large_alloc pages.
- */
-void iommu_tbl_pool_init(struct iommu_map_table *iommu,
- unsigned long num_entries,
- u32 table_shift,
- void (*lazy_flush)(struct iommu_map_table *),
- bool large_pool, u32 npools,
- bool skip_span_boundary_check)
-{
- unsigned int start, i;
- struct iommu_pool *p = &(iommu->large_pool);
-
- setup_iommu_pool_hash();
- if (npools == 0)
- iommu->nr_pools = IOMMU_NR_POOLS;
- else
- iommu->nr_pools = npools;
- BUG_ON(npools > IOMMU_NR_POOLS);
-
- iommu->table_shift = table_shift;
- iommu->lazy_flush = lazy_flush;
- start = 0;
- if (skip_span_boundary_check)
- iommu->flags |= IOMMU_NO_SPAN_BOUND;
- if (large_pool)
- iommu->flags |= IOMMU_HAS_LARGE_POOL;
-
- if (!large_pool)
- iommu->poolsize = num_entries/iommu->nr_pools;
- else
- iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
- for (i = 0; i < iommu->nr_pools; i++) {
- spin_lock_init(&(iommu->pools[i].lock));
- iommu->pools[i].start = start;
- iommu->pools[i].hint = start;
- start += iommu->poolsize; /* start for next pool */
- iommu->pools[i].end = start - 1;
- }
- if (!large_pool)
- return;
- /* initialize large_pool */
- spin_lock_init(&(p->lock));
- p->start = start;
- p->hint = p->start;
- p->end = num_entries;
-}
-EXPORT_SYMBOL(iommu_tbl_pool_init);
-
-unsigned long iommu_tbl_range_alloc(struct device *dev,
- struct iommu_map_table *iommu,
- unsigned long npages,
- unsigned long *handle,
- unsigned long mask,
- unsigned int align_order)
-{
- unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
- unsigned long n, end, start, limit, boundary_size;
- struct iommu_pool *pool;
- int pass = 0;
- unsigned int pool_nr;
- unsigned int npools = iommu->nr_pools;
- unsigned long flags;
- bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
- bool largealloc = (large_pool && npages > iommu_large_alloc);
- unsigned long shift;
- unsigned long align_mask = 0;
-
- if (align_order > 0)
- align_mask = ~0ul >> (BITS_PER_LONG - align_order);
-
- /* Sanity check */
- if (unlikely(npages == 0)) {
- WARN_ON_ONCE(1);
- return IOMMU_ERROR_CODE;
- }
-
- if (largealloc) {
- pool = &(iommu->large_pool);
- pool_nr = 0; /* to keep compiler happy */
- } else {
- /* pick out pool_nr */
- pool_nr = pool_hash & (npools - 1);
- pool = &(iommu->pools[pool_nr]);
- }
- spin_lock_irqsave(&pool->lock, flags);
-
- again:
- if (pass == 0 && handle && *handle &&
- (*handle >= pool->start) && (*handle < pool->end))
- start = *handle;
- else
- start = pool->hint;
-
- limit = pool->end;
-
- /* The case below can happen if we have a small segment appended
- * to a large, or when the previous alloc was at the very end of
- * the available space. If so, go back to the beginning. If a
- * flush is needed, it will get done based on the return value
- * from iommu_area_alloc() below.
- */
- if (start >= limit)
- start = pool->start;
- shift = iommu->table_map_base >> iommu->table_shift;
- if (limit + shift > mask) {
- limit = mask - shift + 1;
- /* If we're constrained on address range, first try
- * at the masked hint to avoid O(n) search complexity,
- * but on second pass, start at 0 in pool 0.
- */
- if ((start & mask) >= limit || pass > 0) {
- spin_unlock(&(pool->lock));
- pool = &(iommu->pools[0]);
- spin_lock(&(pool->lock));
- start = pool->start;
- } else {
- start &= mask;
- }
- }
-
- if (dev)
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- 1 << iommu->table_shift);
- else
- boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
-
- boundary_size = boundary_size >> iommu->table_shift;
- /*
- * if the skip_span_boundary_check had been set during init, we set
- * things up so that iommu_is_span_boundary() merely checks if the
- * (index + npages) < num_tsb_entries
- */
- if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
- shift = 0;
- boundary_size = iommu->poolsize * iommu->nr_pools;
- }
- n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
- boundary_size, align_mask);
- if (n == -1) {
- if (likely(pass == 0)) {
- /* First failure, rescan from the beginning. */
- pool->hint = pool->start;
- set_flush(iommu);
- pass++;
- goto again;
- } else if (!largealloc && pass <= iommu->nr_pools) {
- spin_unlock(&(pool->lock));
- pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
- pool = &(iommu->pools[pool_nr]);
- spin_lock(&(pool->lock));
- pool->hint = pool->start;
- set_flush(iommu);
- pass++;
- goto again;
- } else {
- /* give up */
- n = IOMMU_ERROR_CODE;
- goto bail;
- }
- }
- if (iommu->lazy_flush &&
- (n < pool->hint || need_flush(iommu))) {
- clear_flush(iommu);
- iommu->lazy_flush(iommu);
- }
-
- end = n + npages;
- pool->hint = end;
-
- /* Update handle for SG allocations */
- if (handle)
- *handle = end;
-bail:
- spin_unlock_irqrestore(&(pool->lock), flags);
-
- return n;
-}
-EXPORT_SYMBOL(iommu_tbl_range_alloc);
-
-static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
- unsigned long entry)
-{
- struct iommu_pool *p;
- unsigned long largepool_start = tbl->large_pool.start;
- bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
-
- /* The large pool is the last pool at the top of the table */
- if (large_pool && entry >= largepool_start) {
- p = &tbl->large_pool;
- } else {
- unsigned int pool_nr = entry / tbl->poolsize;
-
- BUG_ON(pool_nr >= tbl->nr_pools);
- p = &tbl->pools[pool_nr];
- }
- return p;
-}
-
-/* Caller supplies the index of the entry into the iommu map table
- * itself when the mapping from dma_addr to the entry is not the
- * default addr->entry mapping below.
- */
-void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
- unsigned long npages, unsigned long entry)
-{
- struct iommu_pool *pool;
- unsigned long flags;
- unsigned long shift = iommu->table_shift;
-
- if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */
- entry = (dma_addr - iommu->table_map_base) >> shift;
- pool = get_pool(iommu, entry);
-
- spin_lock_irqsave(&(pool->lock), flags);
- bitmap_clear(iommu->map, entry, npages);
- spin_unlock_irqrestore(&(pool->lock), flags);
-}
-EXPORT_SYMBOL(iommu_tbl_range_free);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 23633c0fda4a6..92a9f243c0e25 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -3,19 +3,8 @@
* IOMMU helper functions for the free area management
*/
-#include <linux/export.h>
#include <linux/bitmap.h>
-#include <linux/bug.h>
-
-int iommu_is_span_boundary(unsigned int index, unsigned int nr,
- unsigned long shift,
- unsigned long boundary_size)
-{
- BUG_ON(!is_power_of_2(boundary_size));
-
- shift = (shift + index) & (boundary_size - 1);
- return shift + nr > boundary_size;
-}
+#include <linux/iommu-helper.h>
unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr,
@@ -38,4 +27,3 @@ again:
}
return -1;
}
-EXPORT_SYMBOL(iommu_area_alloc);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 970212670b6a1..7e43cd54c84ca 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -573,6 +573,67 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
}
EXPORT_SYMBOL(_copy_to_iter);
+#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
+static int copyout_mcsafe(void __user *to, const void *from, size_t n)
+{
+ if (access_ok(VERIFY_WRITE, to, n)) {
+ kasan_check_read(from, n);
+ n = copy_to_user_mcsafe((__force void *) to, from, n);
+ }
+ return n;
+}
+
+static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
+ const char *from, size_t len)
+{
+ unsigned long ret;
+ char *to;
+
+ to = kmap_atomic(page);
+ ret = memcpy_mcsafe(to + offset, from, len);
+ kunmap_atomic(to);
+
+ return ret;
+}
+
+size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
+{
+ const char *from = addr;
+ unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
+
+ if (unlikely(i->type & ITER_PIPE)) {
+ WARN_ON(1);
+ return 0;
+ }
+ if (iter_is_iovec(i))
+ might_fault();
+ iterate_and_advance(i, bytes, v,
+ copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+ ({
+ rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
+ (from += v.bv_len) - v.bv_len, v.bv_len);
+ if (rem) {
+ curr_addr = (unsigned long) from;
+ bytes = curr_addr - s_addr - rem;
+ return bytes;
+ }
+ }),
+ ({
+ rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
+ v.iov_len);
+ if (rem) {
+ curr_addr = (unsigned long) from;
+ bytes = curr_addr - s_addr - rem;
+ return bytes;
+ }
+ })
+ )
+
+ return bytes;
+}
+EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
+#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
+
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
@@ -1012,7 +1073,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
}
EXPORT_SYMBOL(iov_iter_gap_alignment);
-static inline size_t __pipe_get_pages(struct iov_iter *i,
+static inline ssize_t __pipe_get_pages(struct iov_iter *i,
size_t maxsize,
struct page **pages,
int idx,
@@ -1102,7 +1163,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
size_t *start)
{
struct page **p;
- size_t n;
+ ssize_t n;
int idx;
int npages;
diff --git a/lib/kobject.c b/lib/kobject.c
index e1d1f290bf354..18989b5b3b56b 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -233,13 +233,12 @@ static int kobject_add_internal(struct kobject *kobj)
/* be noisy on error issues */
if (error == -EEXIST)
- WARN(1,
- "%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n",
- __func__, kobject_name(kobj));
+ pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n",
+ __func__, kobject_name(kobj));
else
- WARN(1, "%s failed for %s (error: %d parent: %s)\n",
- __func__, kobject_name(kobj), error,
- parent ? kobject_name(parent) : "'none'");
+ pr_err("%s failed for %s (error: %d parent: %s)\n",
+ __func__, kobject_name(kobj), error,
+ parent ? kobject_name(parent) : "'none'");
} else
kobj->state_in_sysfs = 1;
diff --git a/lib/list_debug.c b/lib/list_debug.c
index a34db8d276676..5d5424b51b746 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -21,13 +21,13 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
struct list_head *next)
{
if (CHECK_DATA_CORRUPTION(next->prev != prev,
- "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+ "list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
prev, next->prev, next) ||
CHECK_DATA_CORRUPTION(prev->next != next,
- "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+ "list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n",
next, prev->next, prev) ||
CHECK_DATA_CORRUPTION(new == prev || new == next,
- "list_add double add: new=%p, prev=%p, next=%p.\n",
+ "list_add double add: new=%px, prev=%px, next=%px.\n",
new, prev, next))
return false;
@@ -43,16 +43,16 @@ bool __list_del_entry_valid(struct list_head *entry)
next = entry->next;
if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
- "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+ "list_del corruption, %px->next is LIST_POISON1 (%px)\n",
entry, LIST_POISON1) ||
CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
- "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+ "list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
entry, LIST_POISON2) ||
CHECK_DATA_CORRUPTION(prev->next != entry,
- "list_del corruption. prev->next should be %p, but was %p\n",
+ "list_del corruption. prev->next should be %px, but was %px\n",
entry, prev->next) ||
CHECK_DATA_CORRUPTION(next->prev != entry,
- "list_del corruption. next->prev should be %p, but was %p\n",
+ "list_del corruption. next->prev should be %px, but was %px\n",
entry, next->prev))
return false;
diff --git a/lib/lockref.c b/lib/lockref.c
index 47169ed7e964f..3d468b53d4c93 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -81,6 +81,34 @@ int lockref_get_not_zero(struct lockref *lockref)
EXPORT_SYMBOL(lockref_get_not_zero);
/**
+ * lockref_put_not_zero - Decrements count unless count <= 1 before decrement
+ * @lockref: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count would become zero
+ */
+int lockref_put_not_zero(struct lockref *lockref)
+{
+ int retval;
+
+ CMPXCHG_LOOP(
+ new.count--;
+ if (old.count <= 1)
+ return 0;
+ ,
+ return 1;
+ );
+
+ spin_lock(&lockref->lock);
+ retval = 0;
+ if (lockref->count > 1) {
+ lockref->count--;
+ retval = 1;
+ }
+ spin_unlock(&lockref->lock);
+ return retval;
+}
+EXPORT_SYMBOL(lockref_put_not_zero);
+
+/**
* lockref_get_or_lock - Increments count unless the count is 0 or dead
* @lockref: pointer to lockref structure
* Return: 1 if count updated successfully or 0 if count was zero
diff --git a/lib/logic_pio.c b/lib/logic_pio.c
new file mode 100644
index 0000000000000..feea48fd1a0dd
--- /dev/null
+++ b/lib/logic_pio.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2017 HiSilicon Limited, All Rights Reserved.
+ * Author: Gabriele Paoloni <gabriele.paoloni@huawei.com>
+ * Author: Zhichang Yuan <yuanzhichang@hisilicon.com>
+ */
+
+#define pr_fmt(fmt) "LOGIC PIO: " fmt
+
+#include <linux/of.h>
+#include <linux/io.h>
+#include <linux/logic_pio.h>
+#include <linux/mm.h>
+#include <linux/rculist.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+/* The unique hardware address list */
+static LIST_HEAD(io_range_list);
+static DEFINE_MUTEX(io_range_mutex);
+
+/* Consider a kernel general helper for this */
+#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
+
+/**
+ * logic_pio_register_range - register logical PIO range for a host
+ * @new_range: pointer to the IO range to be registered.
+ *
+ * Returns 0 on success, the error code in case of failure.
+ *
+ * Register a new IO range node in the IO range list.
+ */
+int logic_pio_register_range(struct logic_pio_hwaddr *new_range)
+{
+ struct logic_pio_hwaddr *range;
+ resource_size_t start;
+ resource_size_t end;
+ resource_size_t mmio_sz = 0;
+ resource_size_t iio_sz = MMIO_UPPER_LIMIT;
+ int ret = 0;
+
+ if (!new_range || !new_range->fwnode || !new_range->size)
+ return -EINVAL;
+
+ start = new_range->hw_start;
+ end = new_range->hw_start + new_range->size;
+
+ mutex_lock(&io_range_mutex);
+ list_for_each_entry_rcu(range, &io_range_list, list) {
+ if (range->fwnode == new_range->fwnode) {
+ /* range already there */
+ goto end_register;
+ }
+ if (range->flags == LOGIC_PIO_CPU_MMIO &&
+ new_range->flags == LOGIC_PIO_CPU_MMIO) {
+ /* for MMIO ranges we need to check for overlap */
+ if (start >= range->hw_start + range->size ||
+ end < range->hw_start) {
+ mmio_sz += range->size;
+ } else {
+ ret = -EFAULT;
+ goto end_register;
+ }
+ } else if (range->flags == LOGIC_PIO_INDIRECT &&
+ new_range->flags == LOGIC_PIO_INDIRECT) {
+ iio_sz += range->size;
+ }
+ }
+
+ /* range not registered yet, check for available space */
+ if (new_range->flags == LOGIC_PIO_CPU_MMIO) {
+ if (mmio_sz + new_range->size - 1 > MMIO_UPPER_LIMIT) {
+ /* if it's too big check if 64K space can be reserved */
+ if (mmio_sz + SZ_64K - 1 > MMIO_UPPER_LIMIT) {
+ ret = -E2BIG;
+ goto end_register;
+ }
+ new_range->size = SZ_64K;
+ pr_warn("Requested IO range too big, new size set to 64K\n");
+ }
+ new_range->io_start = mmio_sz;
+ } else if (new_range->flags == LOGIC_PIO_INDIRECT) {
+ if (iio_sz + new_range->size - 1 > IO_SPACE_LIMIT) {
+ ret = -E2BIG;
+ goto end_register;
+ }
+ new_range->io_start = iio_sz;
+ } else {
+ /* invalid flag */
+ ret = -EINVAL;
+ goto end_register;
+ }
+
+ list_add_tail_rcu(&new_range->list, &io_range_list);
+
+end_register:
+ mutex_unlock(&io_range_mutex);
+ return ret;
+}
+
+/**
+ * find_io_range_by_fwnode - find logical PIO range for given FW node
+ * @fwnode: FW node handle associated with logical PIO range
+ *
+ * Returns pointer to node on success, NULL otherwise.
+ *
+ * Traverse the io_range_list to find the registered node for @fwnode.
+ */
+struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode)
+{
+ struct logic_pio_hwaddr *range;
+
+ list_for_each_entry_rcu(range, &io_range_list, list) {
+ if (range->fwnode == fwnode)
+ return range;
+ }
+ return NULL;
+}
+
+/* Return a registered range given an input PIO token */
+static struct logic_pio_hwaddr *find_io_range(unsigned long pio)
+{
+ struct logic_pio_hwaddr *range;
+
+ list_for_each_entry_rcu(range, &io_range_list, list) {
+ if (in_range(pio, range->io_start, range->size))
+ return range;
+ }
+ pr_err("PIO entry token %lx invalid\n", pio);
+ return NULL;
+}
+
+/**
+ * logic_pio_to_hwaddr - translate logical PIO to HW address
+ * @pio: logical PIO value
+ *
+ * Returns HW address if valid, ~0 otherwise.
+ *
+ * Translate the input logical PIO to the corresponding hardware address.
+ * The input PIO should be unique in the whole logical PIO space.
+ */
+resource_size_t logic_pio_to_hwaddr(unsigned long pio)
+{
+ struct logic_pio_hwaddr *range;
+
+ range = find_io_range(pio);
+ if (range)
+ return range->hw_start + pio - range->io_start;
+
+ return (resource_size_t)~0;
+}
+
+/**
+ * logic_pio_trans_hwaddr - translate HW address to logical PIO
+ * @fwnode: FW node reference for the host
+ * @addr: Host-relative HW address
+ * @size: size to translate
+ *
+ * Returns Logical PIO value if successful, ~0UL otherwise
+ */
+unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode,
+ resource_size_t addr, resource_size_t size)
+{
+ struct logic_pio_hwaddr *range;
+
+ range = find_io_range_by_fwnode(fwnode);
+ if (!range || range->flags == LOGIC_PIO_CPU_MMIO) {
+ pr_err("IO range not found or invalid\n");
+ return ~0UL;
+ }
+ if (range->size < size) {
+ pr_err("resource size %pa cannot fit in IO range size %pa\n",
+ &size, &range->size);
+ return ~0UL;
+ }
+ return addr - range->hw_start + range->io_start;
+}
+
+unsigned long logic_pio_trans_cpuaddr(resource_size_t addr)
+{
+ struct logic_pio_hwaddr *range;
+
+ list_for_each_entry_rcu(range, &io_range_list, list) {
+ if (range->flags != LOGIC_PIO_CPU_MMIO)
+ continue;
+ if (in_range(addr, range->hw_start, range->size))
+ return addr - range->hw_start + range->io_start;
+ }
+ pr_err("addr %llx not registered in io_range_list\n",
+ (unsigned long long) addr);
+ return ~0UL;
+}
+
+#if defined(CONFIG_INDIRECT_PIO) && defined(PCI_IOBASE)
+#define BUILD_LOGIC_IO(bw, type) \
+type logic_in##bw(unsigned long addr) \
+{ \
+ type ret = (type)~0; \
+ \
+ if (addr < MMIO_UPPER_LIMIT) { \
+ ret = read##bw(PCI_IOBASE + addr); \
+ } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \
+ struct logic_pio_hwaddr *entry = find_io_range(addr); \
+ \
+ if (entry && entry->ops) \
+ ret = entry->ops->in(entry->hostdata, \
+ addr, sizeof(type)); \
+ else \
+ WARN_ON_ONCE(1); \
+ } \
+ return ret; \
+} \
+ \
+void logic_out##bw(type value, unsigned long addr) \
+{ \
+ if (addr < MMIO_UPPER_LIMIT) { \
+ write##bw(value, PCI_IOBASE + addr); \
+ } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \
+ struct logic_pio_hwaddr *entry = find_io_range(addr); \
+ \
+ if (entry && entry->ops) \
+ entry->ops->out(entry->hostdata, \
+ addr, value, sizeof(type)); \
+ else \
+ WARN_ON_ONCE(1); \
+ } \
+} \
+ \
+void logic_ins##bw(unsigned long addr, void *buffer, \
+ unsigned int count) \
+{ \
+ if (addr < MMIO_UPPER_LIMIT) { \
+ reads##bw(PCI_IOBASE + addr, buffer, count); \
+ } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \
+ struct logic_pio_hwaddr *entry = find_io_range(addr); \
+ \
+ if (entry && entry->ops) \
+ entry->ops->ins(entry->hostdata, \
+ addr, buffer, sizeof(type), count); \
+ else \
+ WARN_ON_ONCE(1); \
+ } \
+ \
+} \
+ \
+void logic_outs##bw(unsigned long addr, const void *buffer, \
+ unsigned int count) \
+{ \
+ if (addr < MMIO_UPPER_LIMIT) { \
+ writes##bw(PCI_IOBASE + addr, buffer, count); \
+ } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \
+ struct logic_pio_hwaddr *entry = find_io_range(addr); \
+ \
+ if (entry && entry->ops) \
+ entry->ops->outs(entry->hostdata, \
+ addr, buffer, sizeof(type), count); \
+ else \
+ WARN_ON_ONCE(1); \
+ } \
+}
+
+BUILD_LOGIC_IO(b, u8)
+EXPORT_SYMBOL(logic_inb);
+EXPORT_SYMBOL(logic_insb);
+EXPORT_SYMBOL(logic_outb);
+EXPORT_SYMBOL(logic_outsb);
+
+BUILD_LOGIC_IO(w, u16)
+EXPORT_SYMBOL(logic_inw);
+EXPORT_SYMBOL(logic_insw);
+EXPORT_SYMBOL(logic_outw);
+EXPORT_SYMBOL(logic_outsw);
+
+BUILD_LOGIC_IO(l, u32)
+EXPORT_SYMBOL(logic_inl);
+EXPORT_SYMBOL(logic_insl);
+EXPORT_SYMBOL(logic_outl);
+EXPORT_SYMBOL(logic_outsl);
+
+#endif /* CONFIG_INDIRECT_PIO && PCI_IOBASE */
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e00138d593fd..a9e41aed6de4b 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -146,7 +146,7 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
{
- return root->gfp_mask & __GFP_BITS_MASK;
+ return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
}
static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -1612,11 +1612,9 @@ static void set_iter_tags(struct radix_tree_iter *iter,
static void __rcu **skip_siblings(struct radix_tree_node **nodep,
void __rcu **slot, struct radix_tree_iter *iter)
{
- void *sib = node_to_entry(slot - 1);
-
while (iter->index < iter->next_index) {
*nodep = rcu_dereference_raw(*slot);
- if (*nodep && *nodep != sib)
+ if (*nodep && !is_sibling_entry(iter->node, *nodep))
return slot;
slot++;
iter->index = __radix_tree_iter_add(iter, 1);
@@ -1631,7 +1629,7 @@ void __rcu **__radix_tree_next_slot(void __rcu **slot,
struct radix_tree_iter *iter, unsigned flags)
{
unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
- struct radix_tree_node *node = rcu_dereference_raw(*slot);
+ struct radix_tree_node *node;
slot = skip_siblings(&node, slot, iter);
@@ -2036,10 +2034,12 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
unsigned long index, void *item)
{
struct radix_tree_node *node = NULL;
- void __rcu **slot;
+ void __rcu **slot = NULL;
void *entry;
entry = __radix_tree_lookup(root, index, &node, &slot);
+ if (!slot)
+ return NULL;
if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
get_slot_offset(node, slot))))
return NULL;
@@ -2285,6 +2285,7 @@ void __init radix_tree_init(void)
int ret;
BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
+ BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
sizeof(struct radix_tree_node), 0,
SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index f01b1cb04f919..3de0d89212860 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -4,3 +4,4 @@ int*.c
tables.c
neon?.c
s390vx?.c
+vpermxor*.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 44d6b46df051b..2f8b61dfd9b0e 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -5,7 +5,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
int8.o int16.o int32.o
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -90,6 +91,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
CFLAGS_neon1.o += $(NEON_FLAGS)
targets += neon1.c
$(obj)/neon1.c: UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index c65aa80d67ed4..5065b1e7e3275 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_altivec2,
&raid6_altivec4,
&raid6_altivec8,
+ &raid6_vpermxor1,
+ &raid6_vpermxor2,
+ &raid6_vpermxor4,
+ &raid6_vpermxor8,
#endif
#if defined(CONFIG_S390)
&raid6_s390vx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8a1fef2..d20ed0d114111 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>
+#endif /* __KERNEL__ */
/*
* This is the C data type to use. We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index fabc477b14170..5d73f5cb4d8a7 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -45,10 +45,12 @@ else ifeq ($(HAS_NEON),yes)
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
else
HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
- gcc -c -x c - >&/dev/null && \
- rm ./-.o && echo yes)
+ gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
ifeq ($(HAS_ALTIVEC),yes)
- OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+ CFLAGS += -I../../../arch/powerpc/include
+ CFLAGS += -DCONFIG_ALTIVEC
+ OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
endif
endif
@@ -95,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
altivec8.c: altivec.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
+vpermxor1.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
int1.c: int.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=1 < int.uc > $@
@@ -117,7 +131,7 @@ tables.c: mktables
./mktables > tables.c
clean:
- rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+ rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
spotless: clean
rm -f *~
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 0000000000000..10475dc423c15
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and vpermxor instruction.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/ppc-opcode.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+ 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+ 0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+ 0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+ 0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+ void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+ unative_t wp$$, wq$$, wd$$;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ for (d = 0; d < bytes; d += NSIZE*$#) {
+ wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+ for (z = z0-1; z>=0; z--) {
+ wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ /* P syndrome */
+ wp$$ = vec_xor(wp$$, wd$$);
+
+ /* Q syndrome */
+ asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+ wq$$ = vec_xor(wq$$, wd$$);
+ }
+ *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ }
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+
+ raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+ disable_kernel_altivec();
+ preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+ /* Check if arch has both altivec and the vpermxor instructions */
+# ifdef __KERNEL__
+ return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+ return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+ raid6_vpermxor$#_gen_syndrome,
+ NULL,
+ raid6_have_altivec_vpermxor,
+ "vpermxor$#",
+ 0
+};
+#endif
diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c
index 0ec3f257ffdf9..1db74eb098d0e 100644
--- a/lib/reed_solomon/decode_rs.c
+++ b/lib/reed_solomon/decode_rs.c
@@ -1,22 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * lib/reed_solomon/decode_rs.c
- *
- * Overview:
- * Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
*
* Copyright 2002, Phil Karn, KA9Q
* May be used under the terms of the GNU General Public License (GPL)
*
* Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
*
- * $Id: decode_rs.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
- *
- */
-
-/* Generic data width independent code which is included by the
- * wrappers.
+ * Generic data width independent code which is included by the wrappers.
*/
{
+ struct rs_codec *rs = rsc->codec;
int deg_lambda, el, deg_omega;
int i, j, r, k, pad;
int nn = rs->nn;
@@ -27,16 +21,22 @@
uint16_t *alpha_to = rs->alpha_to;
uint16_t *index_of = rs->index_of;
uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error;
- /* Err+Eras Locator poly and syndrome poly The maximum value
- * of nroots is 8. So the necessary stack size will be about
- * 220 bytes max.
- */
- uint16_t lambda[nroots + 1], syn[nroots];
- uint16_t b[nroots + 1], t[nroots + 1], omega[nroots + 1];
- uint16_t root[nroots], reg[nroots + 1], loc[nroots];
int count = 0;
uint16_t msk = (uint16_t) rs->nn;
+ /*
+ * The decoder buffers are in the rs control struct. They are
+ * arrays sized [nroots + 1]
+ */
+ uint16_t *lambda = rsc->buffers + RS_DECODE_LAMBDA * (nroots + 1);
+ uint16_t *syn = rsc->buffers + RS_DECODE_SYN * (nroots + 1);
+ uint16_t *b = rsc->buffers + RS_DECODE_B * (nroots + 1);
+ uint16_t *t = rsc->buffers + RS_DECODE_T * (nroots + 1);
+ uint16_t *omega = rsc->buffers + RS_DECODE_OMEGA * (nroots + 1);
+ uint16_t *root = rsc->buffers + RS_DECODE_ROOT * (nroots + 1);
+ uint16_t *reg = rsc->buffers + RS_DECODE_REG * (nroots + 1);
+ uint16_t *loc = rsc->buffers + RS_DECODE_LOC * (nroots + 1);
+
/* Check length parameter for validity */
pad = nn - nroots - len;
BUG_ON(pad < 0 || pad >= nn);
diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c
index 0b5b1a6728ec0..9112d46e869ee 100644
--- a/lib/reed_solomon/encode_rs.c
+++ b/lib/reed_solomon/encode_rs.c
@@ -1,23 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * lib/reed_solomon/encode_rs.c
- *
- * Overview:
- * Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
*
* Copyright 2002, Phil Karn, KA9Q
* May be used under the terms of the GNU General Public License (GPL)
*
* Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
*
- * $Id: encode_rs.c,v 1.5 2005/11/07 11:14:59 gleixner Exp $
- *
- */
-
-/* Generic data width independent code which is included by the
- * wrappers.
- * int encode_rsX (struct rs_control *rs, uintX_t *data, int len, uintY_t *par)
+ * Generic data width independent code which is included by the wrappers.
*/
{
+ struct rs_codec *rs = rsc->codec;
int i, j, pad;
int nn = rs->nn;
int nroots = rs->nroots;
diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c
index 06d04cfa93390..dfcf54242fb92 100644
--- a/lib/reed_solomon/reed_solomon.c
+++ b/lib/reed_solomon/reed_solomon.c
@@ -1,43 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * lib/reed_solomon/reed_solomon.c
- *
- * Overview:
- * Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
*
* Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de)
*
* Reed Solomon code lifted from reed solomon library written by Phil Karn
* Copyright 2002 Phil Karn, KA9Q
*
- * $Id: rslib.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Description:
*
* The generic Reed Solomon library provides runtime configurable
* encoding / decoding of RS codes.
- * Each user must call init_rs to get a pointer to a rs_control
- * structure for the given rs parameters. This structure is either
- * generated or a already available matching control structure is used.
- * If a structure is generated then the polynomial arrays for
- * fast encoding / decoding are built. This can take some time so
- * make sure not to call this function from a time critical path.
- * Usually a module / driver should initialize the necessary
- * rs_control structure on module / driver init and release it
- * on exit.
- * The encoding puts the calculated syndrome into a given syndrome
- * buffer.
- * The decoding is a two step process. The first step calculates
- * the syndrome over the received (data + syndrome) and calls the
- * second stage, which does the decoding / error correction itself.
- * Many hw encoders provide a syndrome calculation over the received
- * data + syndrome and can call the second stage directly.
*
+ * Each user must call init_rs to get a pointer to a rs_control structure
+ * for the given rs parameters. The control struct is unique per instance.
+ * It points to a codec which can be shared by multiple control structures.
+ * If a codec is newly allocated then the polynomial arrays for fast
+ * encoding / decoding are built. This can take some time so make sure not
+ * to call this function from a time critical path. Usually a module /
+ * driver should initialize the necessary rs_control structure on module /
+ * driver init and release it on exit.
+ *
+ * The encoding puts the calculated syndrome into a given syndrome buffer.
+ *
+ * The decoding is a two step process. The first step calculates the
+ * syndrome over the received (data + syndrome) and calls the second stage,
+ * which does the decoding / error correction itself. Many hw encoders
+ * provide a syndrome calculation over the received data + syndrome and can
+ * call the second stage directly.
*/
-
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -46,32 +37,44 @@
#include <linux/slab.h>
#include <linux/mutex.h>
-/* This list holds all currently allocated rs control structures */
-static LIST_HEAD (rslist);
+enum {
+ RS_DECODE_LAMBDA,
+ RS_DECODE_SYN,
+ RS_DECODE_B,
+ RS_DECODE_T,
+ RS_DECODE_OMEGA,
+ RS_DECODE_ROOT,
+ RS_DECODE_REG,
+ RS_DECODE_LOC,
+ RS_DECODE_NUM_BUFFERS
+};
+
+/* This list holds all currently allocated rs codec structures */
+static LIST_HEAD(codec_list);
/* Protection for the list */
static DEFINE_MUTEX(rslistlock);
/**
- * rs_init - Initialize a Reed-Solomon codec
+ * codec_init - Initialize a Reed-Solomon codec
* @symsize: symbol size, bits (1-8)
* @gfpoly: Field generator polynomial coefficients
* @gffunc: Field generator function
* @fcr: first root of RS code generator polynomial, index form
* @prim: primitive element to generate polynomial roots
* @nroots: RS code generator polynomial degree (number of roots)
+ * @gfp: GFP_ flags for allocations
*
- * Allocate a control structure and the polynom arrays for faster
+ * Allocate a codec structure and the polynom arrays for faster
* en/decoding. Fill the arrays according to the given parameters.
*/
-static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
- int fcr, int prim, int nroots)
+static struct rs_codec *codec_init(int symsize, int gfpoly, int (*gffunc)(int),
+ int fcr, int prim, int nroots, gfp_t gfp)
{
- struct rs_control *rs;
int i, j, sr, root, iprim;
+ struct rs_codec *rs;
- /* Allocate the control structure */
- rs = kmalloc(sizeof (struct rs_control), GFP_KERNEL);
- if (rs == NULL)
+ rs = kzalloc(sizeof(*rs), gfp);
+ if (!rs)
return NULL;
INIT_LIST_HEAD(&rs->list);
@@ -85,17 +88,17 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
rs->gffunc = gffunc;
/* Allocate the arrays */
- rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL);
+ rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp);
if (rs->alpha_to == NULL)
- goto errrs;
+ goto err;
- rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL);
+ rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp);
if (rs->index_of == NULL)
- goto erralp;
+ goto err;
- rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), GFP_KERNEL);
+ rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), gfp);
if(rs->genpoly == NULL)
- goto erridx;
+ goto err;
/* Generate Galois field lookup tables */
rs->index_of[0] = rs->nn; /* log(zero) = -inf */
@@ -120,7 +123,7 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
}
/* If it's not primitive, exit */
if(sr != rs->alpha_to[0])
- goto errpol;
+ goto err;
/* Find prim-th root of 1, used in decoding */
for(iprim = 1; (iprim % prim) != 0; iprim += rs->nn);
@@ -148,42 +151,52 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
/* convert rs->genpoly[] to index form for quicker encoding */
for (i = 0; i <= nroots; i++)
rs->genpoly[i] = rs->index_of[rs->genpoly[i]];
+
+ rs->users = 1;
+ list_add(&rs->list, &codec_list);
return rs;
- /* Error exit */
-errpol:
+err:
kfree(rs->genpoly);
-erridx:
kfree(rs->index_of);
-erralp:
kfree(rs->alpha_to);
-errrs:
kfree(rs);
return NULL;
}
/**
- * free_rs - Free the rs control structure, if it is no longer used
- * @rs: the control structure which is not longer used by the
+ * free_rs - Free the rs control structure
+ * @rs: The control structure which is not longer used by the
* caller
+ *
+ * Free the control structure. If @rs is the last user of the associated
+ * codec, free the codec as well.
*/
void free_rs(struct rs_control *rs)
{
+ struct rs_codec *cd;
+
+ if (!rs)
+ return;
+
+ cd = rs->codec;
mutex_lock(&rslistlock);
- rs->users--;
- if(!rs->users) {
- list_del(&rs->list);
- kfree(rs->alpha_to);
- kfree(rs->index_of);
- kfree(rs->genpoly);
- kfree(rs);
+ cd->users--;
+ if(!cd->users) {
+ list_del(&cd->list);
+ kfree(cd->alpha_to);
+ kfree(cd->index_of);
+ kfree(cd->genpoly);
+ kfree(cd);
}
mutex_unlock(&rslistlock);
+ kfree(rs);
}
+EXPORT_SYMBOL_GPL(free_rs);
/**
- * init_rs_internal - Find a matching or allocate a new rs control structure
+ * init_rs_internal - Allocate rs control, find a matching codec or allocate a new one
* @symsize: the symbol size (number of bits)
* @gfpoly: the extended Galois field generator polynomial coefficients,
* with the 0th coefficient in the low order bit. The polynomial
@@ -191,55 +204,69 @@ void free_rs(struct rs_control *rs)
* @gffunc: pointer to function to generate the next field element,
* or the multiplicative identity element if given 0. Used
* instead of gfpoly if gfpoly is 0
- * @fcr: the first consecutive root of the rs code generator polynomial
+ * @fcr: the first consecutive root of the rs code generator polynomial
* in index form
* @prim: primitive element to generate polynomial roots
* @nroots: RS code generator polynomial degree (number of roots)
+ * @gfp: GFP_ flags for allocations
*/
static struct rs_control *init_rs_internal(int symsize, int gfpoly,
- int (*gffunc)(int), int fcr,
- int prim, int nroots)
+ int (*gffunc)(int), int fcr,
+ int prim, int nroots, gfp_t gfp)
{
- struct list_head *tmp;
- struct rs_control *rs;
+ struct list_head *tmp;
+ struct rs_control *rs;
+ unsigned int bsize;
/* Sanity checks */
if (symsize < 1)
return NULL;
if (fcr < 0 || fcr >= (1<<symsize))
- return NULL;
+ return NULL;
if (prim <= 0 || prim >= (1<<symsize))
- return NULL;
+ return NULL;
if (nroots < 0 || nroots >= (1<<symsize))
return NULL;
+ /*
+ * The decoder needs buffers in each control struct instance to
+ * avoid variable size or large fixed size allocations on
+ * stack. Size the buffers to arrays of [nroots + 1].
+ */
+ bsize = sizeof(uint16_t) * RS_DECODE_NUM_BUFFERS * (nroots + 1);
+ rs = kzalloc(sizeof(*rs) + bsize, gfp);
+ if (!rs)
+ return NULL;
+
mutex_lock(&rslistlock);
/* Walk through the list and look for a matching entry */
- list_for_each(tmp, &rslist) {
- rs = list_entry(tmp, struct rs_control, list);
- if (symsize != rs->mm)
+ list_for_each(tmp, &codec_list) {
+ struct rs_codec *cd = list_entry(tmp, struct rs_codec, list);
+
+ if (symsize != cd->mm)
continue;
- if (gfpoly != rs->gfpoly)
+ if (gfpoly != cd->gfpoly)
continue;
- if (gffunc != rs->gffunc)
+ if (gffunc != cd->gffunc)
continue;
- if (fcr != rs->fcr)
+ if (fcr != cd->fcr)
continue;
- if (prim != rs->prim)
+ if (prim != cd->prim)
continue;
- if (nroots != rs->nroots)
+ if (nroots != cd->nroots)
continue;
/* We have a matching one already */
- rs->users++;
+ cd->users++;
+ rs->codec = cd;
goto out;
}
/* Create a new one */
- rs = rs_init(symsize, gfpoly, gffunc, fcr, prim, nroots);
- if (rs) {
- rs->users = 1;
- list_add(&rs->list, &rslist);
+ rs->codec = codec_init(symsize, gfpoly, gffunc, fcr, prim, nroots, gfp);
+ if (!rs->codec) {
+ kfree(rs);
+ rs = NULL;
}
out:
mutex_unlock(&rslistlock);
@@ -247,45 +274,48 @@ out:
}
/**
- * init_rs - Find a matching or allocate a new rs control structure
+ * init_rs_gfp - Create a RS control struct and initialize it
* @symsize: the symbol size (number of bits)
* @gfpoly: the extended Galois field generator polynomial coefficients,
* with the 0th coefficient in the low order bit. The polynomial
* must be primitive;
- * @fcr: the first consecutive root of the rs code generator polynomial
+ * @fcr: the first consecutive root of the rs code generator polynomial
* in index form
* @prim: primitive element to generate polynomial roots
* @nroots: RS code generator polynomial degree (number of roots)
+ * @gfp: GFP_ flags for allocations
*/
-struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim,
- int nroots)
+struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim,
+ int nroots, gfp_t gfp)
{
- return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots);
+ return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots, gfp);
}
+EXPORT_SYMBOL_GPL(init_rs_gfp);
/**
- * init_rs_non_canonical - Find a matching or allocate a new rs control
- * structure, for fields with non-canonical
- * representation
+ * init_rs_non_canonical - Allocate rs control struct for fields with
+ * non-canonical representation
* @symsize: the symbol size (number of bits)
* @gffunc: pointer to function to generate the next field element,
* or the multiplicative identity element if given 0. Used
* instead of gfpoly if gfpoly is 0
- * @fcr: the first consecutive root of the rs code generator polynomial
+ * @fcr: the first consecutive root of the rs code generator polynomial
* in index form
* @prim: primitive element to generate polynomial roots
* @nroots: RS code generator polynomial degree (number of roots)
*/
struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int),
- int fcr, int prim, int nroots)
+ int fcr, int prim, int nroots)
{
- return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots);
+ return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots,
+ GFP_KERNEL);
}
+EXPORT_SYMBOL_GPL(init_rs_non_canonical);
#ifdef CONFIG_REED_SOLOMON_ENC8
/**
* encode_rs8 - Calculate the parity for data values (8bit data width)
- * @rs: the rs control structure
+ * @rsc: the rs control structure
* @data: data field of a given type
* @len: data length
* @par: parity data, must be initialized by caller (usually all 0)
@@ -295,7 +325,7 @@ struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int),
* symbol size > 8. The calling code must take care of encoding of the
* syndrome result for storage itself.
*/
-int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par,
+int encode_rs8(struct rs_control *rsc, uint8_t *data, int len, uint16_t *par,
uint16_t invmsk)
{
#include "encode_rs.c"
@@ -306,7 +336,7 @@ EXPORT_SYMBOL_GPL(encode_rs8);
#ifdef CONFIG_REED_SOLOMON_DEC8
/**
* decode_rs8 - Decode codeword (8bit data width)
- * @rs: the rs control structure
+ * @rsc: the rs control structure
* @data: data field of a given type
* @par: received parity data field
* @len: data length
@@ -319,9 +349,14 @@ EXPORT_SYMBOL_GPL(encode_rs8);
* The syndrome and parity uses a uint16_t data type to enable
* symbol size > 8. The calling code must take care of decoding of the
* syndrome result and the received parity before calling this code.
+ *
+ * Note: The rs_control struct @rsc contains buffers which are used for
+ * decoding, so the caller has to ensure that decoder invocations are
+ * serialized.
+ *
* Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
*/
-int decode_rs8(struct rs_control *rs, uint8_t *data, uint16_t *par, int len,
+int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len,
uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
uint16_t *corr)
{
@@ -333,7 +368,7 @@ EXPORT_SYMBOL_GPL(decode_rs8);
#ifdef CONFIG_REED_SOLOMON_ENC16
/**
* encode_rs16 - Calculate the parity for data values (16bit data width)
- * @rs: the rs control structure
+ * @rsc: the rs control structure
* @data: data field of a given type
* @len: data length
* @par: parity data, must be initialized by caller (usually all 0)
@@ -341,7 +376,7 @@ EXPORT_SYMBOL_GPL(decode_rs8);
*
* Each field in the data array contains up to symbol size bits of valid data.
*/
-int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par,
+int encode_rs16(struct rs_control *rsc, uint16_t *data, int len, uint16_t *par,
uint16_t invmsk)
{
#include "encode_rs.c"
@@ -352,7 +387,7 @@ EXPORT_SYMBOL_GPL(encode_rs16);
#ifdef CONFIG_REED_SOLOMON_DEC16
/**
* decode_rs16 - Decode codeword (16bit data width)
- * @rs: the rs control structure
+ * @rsc: the rs control structure
* @data: data field of a given type
* @par: received parity data field
* @len: data length
@@ -363,9 +398,14 @@ EXPORT_SYMBOL_GPL(encode_rs16);
* @corr: buffer to store correction bitmask on eras_pos
*
* Each field in the data array contains up to symbol size bits of valid data.
+ *
+ * Note: The rc_control struct @rsc contains buffers which are used for
+ * decoding, so the caller has to ensure that decoder invocations are
+ * serialized.
+ *
* Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
*/
-int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len,
+int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len,
uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
uint16_t *corr)
{
@@ -374,10 +414,6 @@ int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len,
EXPORT_SYMBOL_GPL(decode_rs16);
#endif
-EXPORT_SYMBOL_GPL(init_rs);
-EXPORT_SYMBOL_GPL(init_rs_non_canonical);
-EXPORT_SYMBOL_GPL(free_rs);
-
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Reed Solomon encoder/decoder");
MODULE_AUTHOR("Phil Karn, Thomas Gleixner");
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index e6a9c06ec70cb..6fdc6267f4a82 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -270,18 +270,33 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
}
EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
-static unsigned int sbq_calc_wake_batch(unsigned int depth)
+static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
{
unsigned int wake_batch;
+ unsigned int shallow_depth;
/*
* For each batch, we wake up one queue. We need to make sure that our
- * batch size is small enough that the full depth of the bitmap is
- * enough to wake up all of the queues.
+ * batch size is small enough that the full depth of the bitmap,
+ * potentially limited by a shallow depth, is enough to wake up all of
+ * the queues.
+ *
+ * Each full word of the bitmap has bits_per_word bits, and there might
+ * be a partial word. There are depth / bits_per_word full words and
+ * depth % bits_per_word bits left over. In bitwise arithmetic:
+ *
+ * bits_per_word = 1 << shift
+ * depth / bits_per_word = depth >> shift
+ * depth % bits_per_word = depth & ((1 << shift) - 1)
+ *
+ * Each word can be limited to sbq->min_shallow_depth bits.
*/
- wake_batch = SBQ_WAKE_BATCH;
- if (wake_batch > depth / SBQ_WAIT_QUEUES)
- wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
+ shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
+ depth = ((depth >> sbq->sb.shift) * shallow_depth +
+ min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
+ wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
+ SBQ_WAKE_BATCH);
return wake_batch;
}
@@ -307,7 +322,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
*per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
}
- sbq->wake_batch = sbq_calc_wake_batch(depth);
+ sbq->min_shallow_depth = UINT_MAX;
+ sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
atomic_set(&sbq->wake_index, 0);
sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
@@ -327,21 +343,28 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
{
- unsigned int wake_batch = sbq_calc_wake_batch(depth);
+ unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
if (sbq->wake_batch != wake_batch) {
WRITE_ONCE(sbq->wake_batch, wake_batch);
/*
- * Pairs with the memory barrier in sbq_wake_up() to ensure that
- * the batch size is updated before the wait counts.
+ * Pairs with the memory barrier in sbitmap_queue_wake_up()
+ * to ensure that the batch size is updated before the wait
+ * counts.
*/
smp_mb__before_atomic();
for (i = 0; i < SBQ_WAIT_QUEUES; i++)
atomic_set(&sbq->ws[i].wait_cnt, 1);
}
+}
+
+void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+{
+ sbitmap_queue_update_wake_batch(sbq, depth);
sbitmap_resize(&sbq->sb, depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -380,6 +403,8 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
unsigned int hint, depth;
int nr;
+ WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
+
hint = this_cpu_read(*sbq->alloc_hint);
depth = READ_ONCE(sbq->sb.depth);
if (unlikely(hint >= depth)) {
@@ -403,6 +428,14 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
}
EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
+void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
+ unsigned int min_shallow_depth)
+{
+ sbq->min_shallow_depth = min_shallow_depth;
+ sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
+
static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
{
int i, wake_index;
@@ -425,52 +458,67 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
return NULL;
}
-static void sbq_wake_up(struct sbitmap_queue *sbq)
+static bool __sbq_wake_up(struct sbitmap_queue *sbq)
{
struct sbq_wait_state *ws;
unsigned int wake_batch;
int wait_cnt;
- /*
- * Pairs with the memory barrier in set_current_state() to ensure the
- * proper ordering of clear_bit()/waitqueue_active() in the waker and
- * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
- * waiter. See the comment on waitqueue_active(). This is __after_atomic
- * because we just did clear_bit_unlock() in the caller.
- */
- smp_mb__after_atomic();
-
ws = sbq_wake_ptr(sbq);
if (!ws)
- return;
+ return false;
wait_cnt = atomic_dec_return(&ws->wait_cnt);
if (wait_cnt <= 0) {
+ int ret;
+
wake_batch = READ_ONCE(sbq->wake_batch);
+
/*
* Pairs with the memory barrier in sbitmap_queue_resize() to
* ensure that we see the batch size update before the wait
* count is reset.
*/
smp_mb__before_atomic();
+
/*
- * If there are concurrent callers to sbq_wake_up(), the last
- * one to decrement the wait count below zero will bump it back
- * up. If there is a concurrent resize, the count reset will
- * either cause the cmpxchg to fail or overwrite after the
- * cmpxchg.
+ * For concurrent callers of this, the one that failed the
+ * atomic_cmpxhcg() race should call this function again
+ * to wakeup a new batch on a different 'ws'.
*/
- atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
- sbq_index_atomic_inc(&sbq->wake_index);
- wake_up_nr(&ws->wait, wake_batch);
+ ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
+ if (ret == wait_cnt) {
+ sbq_index_atomic_inc(&sbq->wake_index);
+ wake_up_nr(&ws->wait, wake_batch);
+ return false;
+ }
+
+ return true;
}
+
+ return false;
+}
+
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
+{
+ while (__sbq_wake_up(sbq))
+ ;
}
+EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
unsigned int cpu)
{
sbitmap_clear_bit_unlock(&sbq->sb, nr);
- sbq_wake_up(sbq);
+ /*
+ * Pairs with the memory barrier in set_current_state() to ensure the
+ * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
+ * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
+ * waiter. See the comment on waitqueue_active().
+ */
+ smp_mb__after_atomic();
+ sbitmap_queue_wake_up(sbq);
+
if (likely(!sbq->round_robin && nr < sbq->sb.depth))
*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
}
@@ -482,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
/*
* Pairs with the memory barrier in set_current_state() like in
- * sbq_wake_up().
+ * sbitmap_queue_wake_up().
*/
smp_mb();
wake_index = atomic_read(&sbq->wake_index);
@@ -528,5 +576,6 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
seq_puts(m, "}\n");
seq_printf(m, "round_robin=%d\n", sbq->round_robin);
+ seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_show);
diff --git a/lib/sha256.c b/lib/sha256.c
new file mode 100644
index 0000000000000..4400c832e2aaa
--- /dev/null
+++ b/lib/sha256.c
@@ -0,0 +1,283 @@
+/*
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/sha256.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+static inline u32 Ch(u32 x, u32 y, u32 z)
+{
+ return z ^ (x & (y ^ z));
+}
+
+static inline u32 Maj(u32 x, u32 y, u32 z)
+{
+ return (x & y) | (z & (x | y));
+}
+
+#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
+#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
+#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
+#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
+
+static inline void LOAD_OP(int I, u32 *W, const u8 *input)
+{
+ W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
+}
+
+static inline void BLEND_OP(int I, u32 *W)
+{
+ W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
+}
+
+static void sha256_transform(u32 *state, const u8 *input)
+{
+ u32 a, b, c, d, e, f, g, h, t1, t2;
+ u32 W[64];
+ int i;
+
+ /* load the input */
+ for (i = 0; i < 16; i++)
+ LOAD_OP(i, W, input);
+
+ /* now blend */
+ for (i = 16; i < 64; i++)
+ BLEND_OP(i, W);
+
+ /* load the state into our registers */
+ a = state[0]; b = state[1]; c = state[2]; d = state[3];
+ e = state[4]; f = state[5]; g = state[6]; h = state[7];
+
+ /* now iterate */
+ t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ state[0] += a; state[1] += b; state[2] += c; state[3] += d;
+ state[4] += e; state[5] += f; state[6] += g; state[7] += h;
+
+ /* clear any sensitive info... */
+ a = b = c = d = e = f = g = h = t1 = t2 = 0;
+ memset(W, 0, 64 * sizeof(u32));
+}
+
+int sha256_init(struct sha256_state *sctx)
+{
+ sctx->state[0] = SHA256_H0;
+ sctx->state[1] = SHA256_H1;
+ sctx->state[2] = SHA256_H2;
+ sctx->state[3] = SHA256_H3;
+ sctx->state[4] = SHA256_H4;
+ sctx->state[5] = SHA256_H5;
+ sctx->state[6] = SHA256_H6;
+ sctx->state[7] = SHA256_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+{
+ unsigned int partial, done;
+ const u8 *src;
+
+ partial = sctx->count & 0x3f;
+ sctx->count += len;
+ done = 0;
+ src = data;
+
+ if ((partial + len) > 63) {
+ if (partial) {
+ done = -partial;
+ memcpy(sctx->buf + partial, data, done + 64);
+ src = sctx->buf;
+ }
+
+ do {
+ sha256_transform(sctx->state, src);
+ done += 64;
+ src = data + done;
+ } while (done + 63 < len);
+
+ partial = 0;
+ }
+ memcpy(sctx->buf + partial, src, len - done);
+
+ return 0;
+}
+
+int sha256_final(struct sha256_state *sctx, u8 *out)
+{
+ __be32 *dst = (__be32 *)out;
+ __be64 bits;
+ unsigned int index, pad_len;
+ int i;
+ static const u8 padding[64] = { 0x80, };
+
+ /* Save number of bits */
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64. */
+ index = sctx->count & 0x3f;
+ pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+ sha256_update(sctx, padding, pad_len);
+
+ /* Append length (before padding) */
+ sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Zeroize sensitive information. */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 47aeb04c1997b..04b68d9dfface 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -593,9 +593,8 @@ found:
}
/*
- * Allocates bounce buffer and returns its kernel virtual address.
+ * Allocates bounce buffer and returns its physical address.
*/
-
static phys_addr_t
map_single(struct device *hwdev, phys_addr_t phys, size_t size,
enum dma_data_direction dir, unsigned long attrs)
@@ -614,7 +613,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
}
/*
- * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ * tlb_addr is the physical address of the bounce buffer to unmap.
*/
void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir,
@@ -692,7 +691,6 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
}
}
-#ifdef CONFIG_DMA_DIRECT_OPS
static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
size_t size)
{
@@ -714,12 +712,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
phys_addr = swiotlb_tbl_map_single(dev,
__phys_to_dma(dev, io_tlb_start),
- 0, size, DMA_FROM_DEVICE, 0);
+ 0, size, DMA_FROM_DEVICE, attrs);
if (phys_addr == SWIOTLB_MAP_ERROR)
goto out_warn;
*dma_handle = __phys_to_dma(dev, phys_addr);
- if (dma_coherent_ok(dev, *dma_handle, size))
+ if (!dma_coherent_ok(dev, *dma_handle, size))
goto out_unmap;
memset(phys_to_virt(phys_addr), 0, size);
@@ -727,7 +725,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
out_unmap:
dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
- (unsigned long long)(dev ? dev->coherent_dma_mask : 0),
+ (unsigned long long)dev->coherent_dma_mask,
(unsigned long long)*dma_handle);
/*
@@ -737,7 +735,7 @@ out_unmap:
swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
out_warn:
- if ((attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
+ if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
dev_warn(dev,
"swiotlb: coherent allocation failed, size=%zu\n",
size);
@@ -764,7 +762,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size,
DMA_ATTR_SKIP_CPU_SYNC);
return true;
}
-#endif
static void
swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
@@ -1045,7 +1042,6 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
-#ifdef CONFIG_DMA_DIRECT_OPS
void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
@@ -1087,6 +1083,5 @@ const struct dma_map_ops swiotlb_dma_ops = {
.unmap_sg = swiotlb_unmap_sg_attrs,
.map_page = swiotlb_map_page,
.unmap_page = swiotlb_unmap_page,
- .dma_supported = swiotlb_dma_supported,
+ .dma_supported = dma_direct_supported,
};
-#endif /* CONFIG_DMA_DIRECT_OPS */
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index b3f235baa05d6..6cd7d07400059 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -255,6 +255,10 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
{-EINVAL, "-1", NULL, 8, 0},
{-EINVAL, "-0", NULL, 8, 0},
{-EINVAL, "10-1", NULL, 8, 0},
+ {-EINVAL, "0-31:", NULL, 8, 0},
+ {-EINVAL, "0-31:0", NULL, 8, 0},
+ {-EINVAL, "0-31:0/0", NULL, 8, 0},
+ {-EINVAL, "0-31:1/0", NULL, 8, 0},
{-EINVAL, "0-31:10/1", NULL, 8, 0},
};
@@ -292,15 +296,17 @@ static void __init test_bitmap_parselist(void)
}
}
+#define EXP_BYTES (sizeof(exp) * 8)
+
static void __init test_bitmap_arr32(void)
{
- unsigned int nbits, next_bit, len = sizeof(exp) * 8;
+ unsigned int nbits, next_bit;
u32 arr[sizeof(exp) / 4];
- DECLARE_BITMAP(bmap2, len);
+ DECLARE_BITMAP(bmap2, EXP_BYTES);
memset(arr, 0xa5, sizeof(arr));
- for (nbits = 0; nbits < len; ++nbits) {
+ for (nbits = 0; nbits < EXP_BYTES; ++nbits) {
bitmap_to_arr32(arr, exp, nbits);
bitmap_from_arr32(bmap2, arr, nbits);
expect_eq_bitmap(bmap2, exp, nbits);
@@ -312,7 +318,7 @@ static void __init test_bitmap_arr32(void)
" tail is not safely cleared: %d\n",
nbits, next_bit);
- if (nbits < len - 32)
+ if (nbits < EXP_BYTES - 32)
expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
0xa5a5a5a5);
}
@@ -325,23 +331,32 @@ static void noinline __init test_mem_optimisations(void)
unsigned int start, nbits;
for (start = 0; start < 1024; start += 8) {
- memset(bmap1, 0x5a, sizeof(bmap1));
- memset(bmap2, 0x5a, sizeof(bmap2));
for (nbits = 0; nbits < 1024 - start; nbits += 8) {
+ memset(bmap1, 0x5a, sizeof(bmap1));
+ memset(bmap2, 0x5a, sizeof(bmap2));
+
bitmap_set(bmap1, start, nbits);
__bitmap_set(bmap2, start, nbits);
- if (!bitmap_equal(bmap1, bmap2, 1024))
+ if (!bitmap_equal(bmap1, bmap2, 1024)) {
printk("set not equal %d %d\n", start, nbits);
- if (!__bitmap_equal(bmap1, bmap2, 1024))
+ failed_tests++;
+ }
+ if (!__bitmap_equal(bmap1, bmap2, 1024)) {
printk("set not __equal %d %d\n", start, nbits);
+ failed_tests++;
+ }
bitmap_clear(bmap1, start, nbits);
__bitmap_clear(bmap2, start, nbits);
- if (!bitmap_equal(bmap1, bmap2, 1024))
+ if (!bitmap_equal(bmap1, bmap2, 1024)) {
printk("clear not equal %d %d\n", start, nbits);
- if (!__bitmap_equal(bmap1, bmap2, 1024))
+ failed_tests++;
+ }
+ if (!__bitmap_equal(bmap1, bmap2, 1024)) {
printk("clear not __equal %d %d\n", start,
nbits);
+ failed_tests++;
+ }
}
}
}
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 078a614805732..cee000ac54d8d 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -21,6 +21,7 @@
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/kthread.h>
+#include <linux/vmalloc.h>
#define TEST_FIRMWARE_NAME "test-firmware.bin"
#define TEST_FIRMWARE_NUM_REQS 4
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 98854a64b014d..ec657105edbf0 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -567,7 +567,15 @@ static noinline void __init kmem_cache_invalid_free(void)
return;
}
+ /* Trigger invalid free, the object doesn't get freed */
kmem_cache_free(cache, p + 1);
+
+ /*
+ * Properly free the object to prevent the "Objects remaining in
+ * test_cache on __kmem_cache_shutdown" BUG failure.
+ */
+ kmem_cache_free(cache, p);
+
kmem_cache_destroy(cache);
}
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
new file mode 100644
index 0000000000000..280f4979d00ed
--- /dev/null
+++ b/lib/test_ubsan.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+typedef void(*test_ubsan_fp)(void);
+
+static void test_ubsan_add_overflow(void)
+{
+ volatile int val = INT_MAX;
+
+ val += 2;
+}
+
+static void test_ubsan_sub_overflow(void)
+{
+ volatile int val = INT_MIN;
+ volatile int val2 = 2;
+
+ val -= val2;
+}
+
+static void test_ubsan_mul_overflow(void)
+{
+ volatile int val = INT_MAX / 2;
+
+ val *= 3;
+}
+
+static void test_ubsan_negate_overflow(void)
+{
+ volatile int val = INT_MIN;
+
+ val = -val;
+}
+
+static void test_ubsan_divrem_overflow(void)
+{
+ volatile int val = 16;
+ volatile int val2 = 0;
+
+ val /= val2;
+}
+
+static void test_ubsan_vla_bound_not_positive(void)
+{
+ volatile int size = -1;
+ char buf[size];
+
+ (void)buf;
+}
+
+static void test_ubsan_shift_out_of_bounds(void)
+{
+ volatile int val = -1;
+ int val2 = 10;
+
+ val2 <<= val;
+}
+
+static void test_ubsan_out_of_bounds(void)
+{
+ volatile int i = 4, j = 5;
+ volatile int arr[i];
+
+ arr[j] = i;
+}
+
+static void test_ubsan_load_invalid_value(void)
+{
+ volatile char *dst, *src;
+ bool val, val2, *ptr;
+ char c = 4;
+
+ dst = (char *)&val;
+ src = &c;
+ *dst = *src;
+
+ ptr = &val2;
+ val2 = val;
+}
+
+static void test_ubsan_null_ptr_deref(void)
+{
+ volatile int *ptr = NULL;
+ int val;
+
+ val = *ptr;
+}
+
+static void test_ubsan_misaligned_access(void)
+{
+ volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
+ volatile int *ptr, val = 6;
+
+ ptr = (int *)(arr + 1);
+ *ptr = val;
+}
+
+static void test_ubsan_object_size_mismatch(void)
+{
+ /* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
+ volatile int val __aligned(8) = 4;
+ volatile long long *ptr, val2;
+
+ ptr = (long long *)&val;
+ val2 = *ptr;
+}
+
+static const test_ubsan_fp test_ubsan_array[] = {
+ test_ubsan_add_overflow,
+ test_ubsan_sub_overflow,
+ test_ubsan_mul_overflow,
+ test_ubsan_negate_overflow,
+ test_ubsan_divrem_overflow,
+ test_ubsan_vla_bound_not_positive,
+ test_ubsan_shift_out_of_bounds,
+ test_ubsan_out_of_bounds,
+ test_ubsan_load_invalid_value,
+ //test_ubsan_null_ptr_deref, /* exclude it because there is a crash */
+ test_ubsan_misaligned_access,
+ test_ubsan_object_size_mismatch,
+};
+
+static int __init test_ubsan_init(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_ubsan_array); i++)
+ test_ubsan_array[i]();
+
+ (void)test_ubsan_null_ptr_deref; /* to avoid unsed-function warning */
+ return 0;
+}
+module_init(test_ubsan_init);
+
+static void __exit test_ubsan_exit(void)
+{
+ /* do nothing */
+}
+module_exit(test_ubsan_exit);
+
+MODULE_AUTHOR("Jinbum Park <jinb.park7@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 0b79908dfe892..5939549c0e7bc 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -10,7 +10,10 @@
* Pablo Neira Ayuso <pablo@netfilter.org>
*
* ==========================================================================
- *
+ */
+
+/**
+ * DOC: ts_intro
* INTRODUCTION
*
* The textsearch infrastructure provides text searching facilities for
@@ -19,7 +22,9 @@
*
* ARCHITECTURE
*
- * User
+ * .. code-block:: none
+ *
+ * User
* +----------------+
* | finish()|<--------------(6)-----------------+
* |get_next_block()|<--------------(5)---------------+ |
@@ -33,21 +38,21 @@
* | (3)|----->| find()/next() |-----------+ |
* | (7)|----->| destroy() |----------------------+
* +----------------+ +---------------+
- *
- * (1) User configures a search by calling _prepare() specifying the
- * search parameters such as the pattern and algorithm name.
+ *
+ * (1) User configures a search by calling textsearch_prepare() specifying
+ * the search parameters such as the pattern and algorithm name.
* (2) Core requests the algorithm to allocate and initialize a search
* configuration according to the specified parameters.
- * (3) User starts the search(es) by calling _find() or _next() to
- * fetch subsequent occurrences. A state variable is provided
- * to the algorithm to store persistent variables.
+ * (3) User starts the search(es) by calling textsearch_find() or
+ * textsearch_next() to fetch subsequent occurrences. A state variable
+ * is provided to the algorithm to store persistent variables.
* (4) Core eventually resets the search offset and forwards the find()
* request to the algorithm.
* (5) Algorithm calls get_next_block() provided by the user continuously
* to fetch the data to be searched in block by block.
* (6) Algorithm invokes finish() after the last call to get_next_block
* to clean up any leftovers from get_next_block. (Optional)
- * (7) User destroys the configuration by calling _destroy().
+ * (7) User destroys the configuration by calling textsearch_destroy().
* (8) Core notifies the algorithm to destroy algorithm specific
* allocations. (Optional)
*
@@ -62,9 +67,10 @@
* amount of times and even in parallel as long as a separate struct
* ts_state variable is provided to every instance.
*
- * The actual search is performed by either calling textsearch_find_-
- * continuous() for linear data or by providing an own get_next_block()
- * implementation and calling textsearch_find(). Both functions return
+ * The actual search is performed by either calling
+ * textsearch_find_continuous() for linear data or by providing
+ * an own get_next_block() implementation and
+ * calling textsearch_find(). Both functions return
* the position of the first occurrence of the pattern or UINT_MAX if
* no match was found. Subsequent occurrences can be found by calling
* textsearch_next() regardless of the linearity of the data.
@@ -72,7 +78,7 @@
* Once you're done using a configuration it must be given back via
* textsearch_destroy.
*
- * EXAMPLE
+ * EXAMPLE::
*
* int pos;
* struct ts_config *conf;
@@ -87,13 +93,13 @@
* goto errout;
* }
*
- * pos = textsearch_find_continuous(conf, &state, example, strlen(example));
+ * pos = textsearch_find_continuous(conf, \&state, example, strlen(example));
* if (pos != UINT_MAX)
- * panic("Oh my god, dancing chickens at %d\n", pos);
+ * panic("Oh my god, dancing chickens at \%d\n", pos);
*
* textsearch_destroy(conf);
- * ==========================================================================
*/
+/* ========================================================================== */
#include <linux/module.h>
#include <linux/types.h>
@@ -225,7 +231,7 @@ static unsigned int get_linear_data(unsigned int consumed, const u8 **dst,
*
* Returns the position of first occurrence of the pattern or
* %UINT_MAX if no occurrence was found.
- */
+ */
unsigned int textsearch_find_continuous(struct ts_config *conf,
struct ts_state *state,
const void *data, unsigned int len)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 8999202ad43b0..a48aaa79d3523 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -336,7 +336,7 @@ char *put_dec(char *buf, unsigned long long n)
*
* If speed is not important, use snprintf(). It's easy to read the code.
*/
-int num_to_str(char *buf, int size, unsigned long long num)
+int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
{
/* put_dec requires 2-byte alignment of the buffer. */
char tmp[sizeof(num) * 3] __aligned(2);
@@ -350,11 +350,21 @@ int num_to_str(char *buf, int size, unsigned long long num)
len = put_dec(tmp, num) - tmp;
}
- if (len > size)
+ if (len > size || width > size)
return 0;
+
+ if (width > len) {
+ width = width - len;
+ for (idx = 0; idx < width; idx++)
+ buf[idx] = ' ';
+ } else {
+ width = 0;
+ }
+
for (idx = 0; idx < len; ++idx)
- buf[idx] = tmp[len - idx - 1];
- return len;
+ buf[idx + width] = tmp[len - idx - 1];
+
+ return len + width;
}
#define SIGN 1 /* unsigned/signed, must be 1 */
@@ -1641,19 +1651,22 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
return widen_string(buf, buf - buf_start, end, spec);
}
-static bool have_filled_random_ptr_key __read_mostly;
+static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
static siphash_key_t ptr_key __read_mostly;
-static void fill_random_ptr_key(struct random_ready_callback *unused)
+static void enable_ptr_key_workfn(struct work_struct *work)
{
get_random_bytes(&ptr_key, sizeof(ptr_key));
- /*
- * have_filled_random_ptr_key==true is dependent on get_random_bytes().
- * ptr_to_id() needs to see have_filled_random_ptr_key==true
- * after get_random_bytes() returns.
- */
- smp_mb();
- WRITE_ONCE(have_filled_random_ptr_key, true);
+ /* Needs to run from preemptible context */
+ static_branch_disable(&not_filled_random_ptr_key);
+}
+
+static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
+
+static void fill_random_ptr_key(struct random_ready_callback *unused)
+{
+ /* This may be in an interrupt handler. */
+ queue_work(system_unbound_wq, &enable_ptr_key_work);
}
static struct random_ready_callback random_ready = {
@@ -1667,7 +1680,8 @@ static int __init initialize_ptr_random(void)
if (!ret) {
return 0;
} else if (ret == -EALREADY) {
- fill_random_ptr_key(&random_ready);
+ /* This is in preemptible context */
+ enable_ptr_key_workfn(&enable_ptr_key_work);
return 0;
}
@@ -1681,7 +1695,7 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
unsigned long hashval;
- if (unlikely(!have_filled_random_ptr_key)) {
+ if (static_branch_unlikely(&not_filled_random_ptr_key)) {
spec.field_width = 2 * sizeof(ptr);
/* string length must be less than default_width */
return string(buf, end, str, spec);
@@ -2562,6 +2576,8 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
case 's':
case 'F':
case 'f':
+ case 'x':
+ case 'K':
save_arg(void *);
break;
default:
@@ -2736,6 +2752,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
case 's':
case 'F':
case 'f':
+ case 'x':
+ case 'K':
process = true;
break;
default: