summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2018-04-27 20:46:22 +0100
committerDavid Howells <dhowells@redhat.com>2018-05-14 13:17:35 +0100
commitb61f7dcf4eb2653e870c9079b02d11a0834cfe39 (patch)
treedc5bd17741a867d22407e0d55d05b1476d7dc42c /fs
parentf0ab773f5c96c29a5227234c4b5a820f5591b74d (diff)
downloadlinux-0-day-b61f7dcf4eb2653e870c9079b02d11a0834cfe39.tar.gz
linux-0-day-b61f7dcf4eb2653e870c9079b02d11a0834cfe39.tar.xz
afs: Fix directory page locking
The afs directory loading code (primarily afs_read_dir()) locks all the pages that hold a directory's content blob to defend against getdents/getdents races and getdents/lookup races where the competitors issue conflicting reads on the same data. As the reads will complete consecutively, they may retrieve different versions of the data and one may overwrite the data that the other is busy parsing. Fix this by not locking the pages at all, but rather by turning the validation lock into an rwsem and getting an exclusive lock on it whilst reading the data or validating the attributes and a shared lock whilst parsing the data. Sharing the attribute validation lock should be fine as the data fetch will retrieve the attributes also. The individual page locks aren't needed at all as the only place they're being used is to serialise data loading. Without this patch, the: if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { ... } part of afs_read_dir() may be skipped, leaving the pages unlocked when we hit the success: clause - in which case we try to unlock the not-locked pages, leading to the following oops: page:ffffe38b405b4300 count:3 mapcount:0 mapping:ffff98156c83a978 index:0x0 flags: 0xfffe000001004(referenced|private) raw: 000fffe000001004 ffff98156c83a978 0000000000000000 00000003ffffffff raw: dead000000000100 dead000000000200 0000000000000001 ffff98156b27c000 page dumped because: VM_BUG_ON_PAGE(!PageLocked(page)) page->mem_cgroup:ffff98156b27c000 ------------[ cut here ]------------ kernel BUG at mm/filemap.c:1205! ... RIP: 0010:unlock_page+0x43/0x50 ... Call Trace: afs_dir_iterate+0x789/0x8f0 [kafs] ? _cond_resched+0x15/0x30 ? kmem_cache_alloc_trace+0x166/0x1d0 ? afs_do_lookup+0x69/0x490 [kafs] ? afs_do_lookup+0x101/0x490 [kafs] ? key_default_cmp+0x20/0x20 ? request_key+0x3c/0x80 ? afs_lookup+0xf1/0x340 [kafs] ? __lookup_slow+0x97/0x150 ? lookup_slow+0x35/0x50 ? walk_component+0x1bf/0x490 ? path_lookupat.isra.52+0x75/0x200 ? filename_lookup.part.66+0xa0/0x170 ? afs_end_vnode_operation+0x41/0x60 [kafs] ? __check_object_size+0x9c/0x171 ? strncpy_from_user+0x4a/0x170 ? vfs_statx+0x73/0xe0 ? __do_sys_newlstat+0x39/0x70 ? __x64_sys_getdents+0xc9/0x140 ? __x64_sys_getdents+0x140/0x140 ? do_syscall_64+0x5b/0x160 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: f3ddee8dc4e2 ("afs: Fix directory handling") Reported-by: Marc Dionne <marc.dionne@auristor.com> Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c36
-rw-r--r--fs/afs/inode.c6
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/super.c2
4 files changed, 22 insertions, 24 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5889f70d4d273..2853acd644828 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file)
* get reclaimed during the iteration.
*/
static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
+ __acquires(&dvnode->validate_lock)
{
struct afs_read *req;
loff_t i_size;
@@ -261,18 +262,21 @@ retry:
/* If we're going to reload, we need to lock all the pages to prevent
* races.
*/
- if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
- ret = -ERESTARTSYS;
- for (i = 0; i < req->nr_pages; i++)
- if (lock_page_killable(req->pages[i]) < 0)
- goto error_unlock;
+ ret = -ERESTARTSYS;
+ if (down_read_killable(&dvnode->validate_lock) < 0)
+ goto error;
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- goto success;
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ goto success;
+
+ up_read(&dvnode->validate_lock);
+ if (down_write_killable(&dvnode->validate_lock) < 0)
+ goto error;
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
ret = afs_fetch_data(dvnode, key, req);
if (ret < 0)
- goto error_unlock_all;
+ goto error_unlock;
task_io_account_read(PAGE_SIZE * req->nr_pages);
@@ -284,33 +288,26 @@ retry:
for (i = 0; i < req->nr_pages; i++)
if (!afs_dir_check_page(dvnode, req->pages[i],
req->actual_len))
- goto error_unlock_all;
+ goto error_unlock;
// TODO: Trim excess pages
set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
}
+ downgrade_write(&dvnode->validate_lock);
success:
- i = req->nr_pages;
- while (i > 0)
- unlock_page(req->pages[--i]);
return req;
-error_unlock_all:
- i = req->nr_pages;
error_unlock:
- while (i > 0)
- unlock_page(req->pages[--i]);
+ up_write(&dvnode->validate_lock);
error:
afs_put_read(req);
_leave(" = %d", ret);
return ERR_PTR(ret);
content_has_grown:
- i = req->nr_pages;
- while (i > 0)
- unlock_page(req->pages[--i]);
+ up_write(&dvnode->validate_lock);
afs_put_read(req);
goto retry;
}
@@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
}
out:
+ up_read(&dvnode->validate_lock);
afs_put_read(req);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 06194cfe9724c..e855c6e5cf285 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -415,7 +415,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
if (valid)
goto valid;
- mutex_lock(&vnode->validate_lock);
+ down_write(&vnode->validate_lock);
/* if the promise has expired, we need to check the server again to get
* a new promise - note that if the (parent) directory's metadata was
@@ -444,13 +444,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* different */
if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
afs_zap_data(vnode);
- mutex_unlock(&vnode->validate_lock);
+ up_write(&vnode->validate_lock);
valid:
_leave(" = 0");
return 0;
error_unlock:
- mutex_unlock(&vnode->validate_lock);
+ up_write(&vnode->validate_lock);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index f8086ec95e241..468be1e0dffbc 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -494,7 +494,7 @@ struct afs_vnode {
#endif
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
struct mutex io_lock; /* Lock for serialising I/O on this mutex */
- struct mutex validate_lock; /* lock for validating this vnode */
+ struct rw_semaphore validate_lock; /* lock for validating this vnode */
spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 65081ec3c36e5..b02838cd95259 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->vfs_inode);
mutex_init(&vnode->io_lock);
- mutex_init(&vnode->validate_lock);
+ init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock);
INIT_LIST_HEAD(&vnode->wb_keys);