From ebf43500ef148a380bd132743c3fc530111ac620 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 27 Apr 2006 08:46:01 +0200
Subject: [PATCH] Add find_get_pages_contig(): contiguous variant of
 find_get_pages()

find_get_pages_contig() will break out if we hit a hole in the page cache.
From Andrew Morton, small modifications and documentation by me.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 mm/filemap.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'mm')

diff --git a/mm/filemap.c b/mm/filemap.c
index 3ef20739e725..fd57442186cb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -697,6 +697,38 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 	return ret;
 }
 
+/**
+ * find_get_pages_contig - gang contiguous pagecache lookup
+ * @mapping:	The address_space to search
+ * @index:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages_contig() works exactly like find_get_pages(), except
+ * that the returned number of pages are guaranteed to be contiguous.
+ *
+ * find_get_pages_contig() returns the number of pages which were found.
+ */
+unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
+			       unsigned int nr_pages, struct page **pages)
+{
+	unsigned int i;
+	unsigned int ret;
+
+	read_lock_irq(&mapping->tree_lock);
+	ret = radix_tree_gang_lookup(&mapping->page_tree,
+				(void **)pages, index, nr_pages);
+	for (i = 0; i < ret; i++) {
+		if (pages[i]->mapping == NULL || pages[i]->index != index)
+			break;
+
+		page_cache_get(pages[i]);
+		index++;
+	}
+	read_unlock_irq(&mapping->tree_lock);
+	return i;
+}
+
 /*
  * Like find_get_pages, except we only return pages which are tagged with
  * `tag'.   We update *index to index the next page for the traversal.
-- 
cgit v1.2.3


From 693f7d362055261882659475d2ef022e32edbff1 Mon Sep 17 00:00:00 2001
From: "shin, jacob" <jacob.shin@amd.com>
Date: Fri, 28 Apr 2006 10:54:37 -0500
Subject: [PATCH] slab: fix crash on __drain_alien_cahce() during CPU Hotplug

transfer_objects should only be called when all of the cpus in the
node are online.  CPU_DEAD notifier callback marks l3->shared to NULL.

Signed-off-by: Jacob Shin <jacob.shin@amd.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/slab.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'mm')

diff --git a/mm/slab.c b/mm/slab.c
index af5c5237e11a..c32af7e7581e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -979,7 +979,8 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
 		 * That way we could avoid the overhead of putting the objects
 		 * into the free lists and getting them back later.
 		 */
-		transfer_objects(rl3->shared, ac, ac->limit);
+		if (rl3->shared)
+			transfer_objects(rl3->shared, ac, ac->limit);
 
 		free_block(cachep, ac->entry, ac->avail, node);
 		ac->avail = 0;
-- 
cgit v1.2.3


From 4c28f81193b6778f7b49090930d88e6d12bcb928 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 1 May 2006 12:16:08 -0700
Subject: [PATCH] page migration: Fix fallback behavior for dirty pages

Currently we check PageDirty() in order to make the decision to swap out
the page.  However, the dirty information may be only be contained in the
ptes pointing to the page.  We need to first unmap the ptes before checking
for PageDirty().  If unmap is successful then the page count of the page
will also be decreased so that pageout() works properly.

This is a fix necessary for 2.6.17.  Without this fix we may migrate dirty
pages for filesystems without migration functions.  Filesystems may keep
pointers to dirty pages.  Migration of dirty pages can result in the
filesystem keeping pointers to freed pages.

Unmapping is currently not be separated out from removing all the
references to a page and moving the mapping.  Therefore try_to_unmap will
be called again in migrate_page() if the writeout is successful.  However,
it wont do anything since the ptes are already removed.

The coming updates to the page migration code will restructure the code
so that this is no longer necessary.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/migrate.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'mm')

diff --git a/mm/migrate.c b/mm/migrate.c
index d444229f2599..1c25040693d2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -439,6 +439,17 @@ redo:
 			goto unlock_both;
                 }
 
+		/* Make sure the dirty bit is up to date */
+		if (try_to_unmap(page, 1) == SWAP_FAIL) {
+			rc = -EPERM;
+			goto unlock_both;
+		}
+
+		if (page_mapcount(page)) {
+			rc = -EAGAIN;
+			goto unlock_both;
+		}
+
 		/*
 		 * Default handling if a filesystem does not provide
 		 * a migration function. We can only migrate clean
-- 
cgit v1.2.3


From 46a66eecdf7bc12562ecb492297447ed0e1ecf59 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mjkravetz@verizon.net>
Date: Mon, 1 May 2006 12:16:09 -0700
Subject: [PATCH] sparsemem interaction with memory add bug fixes

This patch fixes two bugs with the way sparsemem interacts with memory add.
They are:

- memory leak if memmap for section already exists

- calling alloc_bootmem_node() after boot

These bugs were discovered and a first cut at the fixes were provided by
Arnd Bergmann <arnd@arndb.de> and Joel Schopp <jschopp@us.ibm.com>.

Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/sparse.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'mm')

diff --git a/mm/sparse.c b/mm/sparse.c
index 0a51f36ba3a1..d7c32de99ee8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -32,7 +32,10 @@ static struct mem_section *sparse_index_alloc(int nid)
 	unsigned long array_size = SECTIONS_PER_ROOT *
 				   sizeof(struct mem_section);
 
-	section = alloc_bootmem_node(NODE_DATA(nid), array_size);
+	if (system_state == SYSTEM_RUNNING)
+		section = kmalloc_node(array_size, GFP_KERNEL, nid);
+	else
+		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
 
 	if (section)
 		memset(section, 0, array_size);
@@ -281,9 +284,9 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 
 	ret = sparse_init_one_section(ms, section_nr, memmap);
 
-	if (ret <= 0)
-		__kfree_section_memmap(memmap, nr_pages);
 out:
 	pgdat_resize_unlock(pgdat, &flags);
+	if (ret <= 0)
+		__kfree_section_memmap(memmap, nr_pages);
 	return ret;
 }
-- 
cgit v1.2.3


From bed120c64eb07b6838bb758109811484af8cebba Mon Sep 17 00:00:00 2001
From: Joel H Schopp <jschopp@us.ibm.com>
Date: Mon, 1 May 2006 12:16:11 -0700
Subject: [PATCH] spufs: fix for CONFIG_NUMA

Based on an older patch from  Mike Kravetz <kravetz@us.ibm.com>

We need to have a mem_map for high addresses in order to make fops->no_page
work on spufs mem and register files.  So far, we have used the
memory_present() function during early bootup, but that did not work when
CONFIG_NUMA was enabled.

We now use the __add_pages() function to add the mem_map when loading the
spufs module, which is a lot nicer.

Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/powerpc/platforms/cell/Kconfig    |  3 +-
 arch/powerpc/platforms/cell/setup.c    | 78 ++--------------------------------
 arch/powerpc/platforms/cell/spu_base.c | 72 +++++++++++++++++++++++++------
 mm/memory_hotplug.c                    |  6 ++-
 4 files changed, 70 insertions(+), 89 deletions(-)

(limited to 'mm')

diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index c2a3db8edb0c..6a02d51086c8 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -12,7 +12,8 @@ config SPU_FS
 
 config SPUFS_MMAP
 	bool
-	depends on SPU_FS && SPARSEMEM && !PPC_64K_PAGES
+	depends on SPU_FS && SPARSEMEM
+	select MEMORY_HOTPLUG
 	default y
 
 endmenu
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index dac5d0365fde..6574b22b3cf3 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -29,6 +29,8 @@
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -46,6 +48,7 @@
 #include <asm/cputable.h>
 #include <asm/ppc-pci.h>
 #include <asm/irq.h>
+#include <asm/spu.h>
 
 #include "interrupt.h"
 #include "iommu.h"
@@ -69,77 +72,6 @@ static void cell_show_cpuinfo(struct seq_file *m)
 	of_node_put(root);
 }
 
-#ifdef CONFIG_SPARSEMEM
-static int __init find_spu_node_id(struct device_node *spe)
-{
-	unsigned int *id;
-#ifdef CONFIG_NUMA
-	struct device_node *cpu;
-	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
-#else
-	id = NULL;
-#endif
-	return id ? *id : 0;
-}
-
-static void __init cell_spuprop_present(struct device_node *spe,
-				       const char *prop, int early)
-{
-	struct address_prop {
-		unsigned long address;
-		unsigned int len;
-	} __attribute__((packed)) *p;
-	int proplen;
-
-	unsigned long start_pfn, end_pfn, pfn;
-	int node_id;
-
-	p = (void*)get_property(spe, prop, &proplen);
-	WARN_ON(proplen != sizeof (*p));
-
-	node_id = find_spu_node_id(spe);
-
-	start_pfn = p->address >> PAGE_SHIFT;
-	end_pfn = (p->address + p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
-	/* We need to call memory_present *before* the call to sparse_init,
-	   but we can initialize the page structs only *after* that call.
-	   Thus, we're being called twice. */
-	if (early)
-		memory_present(node_id, start_pfn, end_pfn);
-	else {
-		/* As the pages backing SPU LS and I/O are outside the range
-		   of regular memory, their page structs were not initialized
-		   by free_area_init. Do it here instead. */
-		for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-			struct page *page = pfn_to_page(pfn);
-			set_page_links(page, ZONE_DMA, node_id, pfn);
-			init_page_count(page);
-			reset_page_mapcount(page);
-			SetPageReserved(page);
-			INIT_LIST_HEAD(&page->lru);
-		}
-	}
-}
-
-static void __init cell_spumem_init(int early)
-{
-	struct device_node *node;
-	for (node = of_find_node_by_type(NULL, "spe");
-			node; node = of_find_node_by_type(node, "spe")) {
-		cell_spuprop_present(node, "local-store", early);
-		cell_spuprop_present(node, "problem", early);
-		cell_spuprop_present(node, "priv1", early);
-		cell_spuprop_present(node, "priv2", early);
-	}
-}
-#else
-static void __init cell_spumem_init(int early)
-{
-}
-#endif
-
 static void cell_progress(char *s, unsigned short hex)
 {
 	printk("*** %04x : %s\n", hex, s ? s : "");
@@ -172,8 +104,6 @@ static void __init cell_setup_arch(void)
 #endif
 
 	mmio_nvram_init();
-
-	cell_spumem_init(0);
 }
 
 /*
@@ -189,8 +119,6 @@ static void __init cell_init_early(void)
 
 	ppc64_interrupt_controller = IC_CELL_PIC;
 
-	cell_spumem_init(1);
-
 	DBG(" <- cell_init_early()\n");
 }
 
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index ef47a6239d48..b788e165c557 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -520,6 +520,56 @@ void spu_irq_setaffinity(struct spu *spu, int cpu)
 }
 EXPORT_SYMBOL_GPL(spu_irq_setaffinity);
 
+static int __init find_spu_node_id(struct device_node *spe)
+{
+	unsigned int *id;
+	struct device_node *cpu;
+	cpu = spe->parent->parent;
+	id = (unsigned int *)get_property(cpu, "node-id", NULL);
+	return id ? *id : 0;
+}
+
+static int __init cell_spuprop_present(struct device_node *spe,
+					const char *prop)
+{
+	static DEFINE_MUTEX(add_spumem_mutex);
+
+	struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *p;
+	int proplen;
+
+	unsigned long start_pfn, nr_pages;
+	int node_id;
+	struct pglist_data *pgdata;
+	struct zone *zone;
+	int ret;
+
+	p = (void*)get_property(spe, prop, &proplen);
+	WARN_ON(proplen != sizeof (*p));
+
+	start_pfn = p->address >> PAGE_SHIFT;
+	nr_pages = ((unsigned long)p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	/*
+	 * XXX need to get the correct NUMA node in here. This may
+	 * be different from the spe::node_id property, e.g. when
+	 * the host firmware is not NUMA aware.
+	 */
+	node_id = 0;
+
+	pgdata = NODE_DATA(node_id);
+	zone = pgdata->node_zones;
+
+	/* XXX rethink locking here */
+	mutex_lock(&add_spumem_mutex);
+	ret = __add_pages(zone, start_pfn, nr_pages);
+	mutex_unlock(&add_spumem_mutex);
+
+	return ret;
+}
+
 static void __iomem * __init map_spe_prop(struct device_node *n,
 						 const char *name)
 {
@@ -530,6 +580,8 @@ static void __iomem * __init map_spe_prop(struct device_node *n,
 
 	void *p;
 	int proplen;
+	void* ret = NULL;
+	int err = 0;
 
 	p = get_property(n, name, &proplen);
 	if (proplen != sizeof (struct address_prop))
@@ -537,7 +589,14 @@ static void __iomem * __init map_spe_prop(struct device_node *n,
 
 	prop = p;
 
-	return ioremap(prop->address, prop->len);
+	err = cell_spuprop_present(n, name);
+	if (err && (err != -EEXIST))
+		goto out;
+
+	ret = ioremap(prop->address, prop->len);
+
+ out:
+	return ret;
 }
 
 static void spu_unmap(struct spu *spu)
@@ -597,17 +656,6 @@ out:
 	return ret;
 }
 
-static int __init find_spu_node_id(struct device_node *spe)
-{
-	unsigned int *id;
-	struct device_node *cpu;
-
-	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
-
-	return id ? *id : 0;
-}
-
 static int __init create_spu(struct device_node *spe)
 {
 	struct spu *spu;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1fe76d963ac2..1ae2b2cc3a54 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -69,12 +69,16 @@ int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
 	for (i = 0; i < nr_pages; i += PAGES_PER_SECTION) {
 		err = __add_section(zone, phys_start_pfn + i);
 
-		if (err)
+		/* We want to keep adding the rest of the
+		 * sections if the first ones already exist
+		 */
+		if (err && (err != -EEXIST))
 			break;
 	}
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(__add_pages);
 
 static void grow_zone_span(struct zone *zone,
 		unsigned long start_pfn, unsigned long end_pfn)
-- 
cgit v1.2.3