From 03e0990fc88f82c85abeaf90aabe1921e4e0b72f Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 8 Nov 2016 11:31:33 +1100 Subject: ext2: remove support for DAX PMD faults DAX PMD support was added via the following commit: commit e7b1ea2ad658 ("ext2: huge page fault support") I believe this path to be untested as ext2 doesn't reliably provide block allocations that are aligned to 2MiB. In my testing I've been unable to get ext2 to actually fault in a PMD. It always fails with a "pfn unaligned" message because the sector returned by ext2_get_block() isn't aligned. I've tried various settings for the "stride" and "stripe_width" extended options to mkfs.ext2, without any luck. Since we can't reliably get PMDs, remove support so that we don't have an untested code path that we may someday traverse when we happen to get an aligned block allocation. This should also make 4k DAX faults in ext2 a bit faster since they will no longer have to call the PMD fault handler only to get a response of VM_FAULT_FALLBACK. Signed-off-by: Ross Zwisler Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Dave Chinner --- fs/ext2/file.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'fs/ext2') diff --git a/fs/ext2/file.c b/fs/ext2/file.c index a0e1478dfd04..fb88b51ca947 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -107,27 +107,6 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return ret; } -static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, unsigned int flags) -{ - struct inode *inode = file_inode(vma->vm_file); - struct ext2_inode_info *ei = EXT2_I(inode); - int ret; - - if (flags & FAULT_FLAG_WRITE) { - sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); - } - down_read(&ei->dax_sem); - - ret = dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block); - - up_read(&ei->dax_sem); - if (flags & FAULT_FLAG_WRITE) - sb_end_pagefault(inode->i_sb); - return ret; -} - static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -154,7 +133,11 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, static const struct vm_operations_struct ext2_dax_vm_ops = { .fault = ext2_dax_fault, - .pmd_fault = ext2_dax_pmd_fault, + /* + * .pmd_fault is not supported for DAX because allocation in ext2 + * cannot be reliably aligned to huge page sizes and so pmd faults + * will always fail and fail back to regular faults. + */ .page_mkwrite = ext2_dax_fault, .pfn_mkwrite = ext2_dax_pfn_mkwrite, }; @@ -166,7 +149,7 @@ static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); vma->vm_ops = &ext2_dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + vma->vm_flags |= VM_MIXEDMAP; return 0; } #else -- cgit v1.2.3 From 11c59c92f44d9272db7655a462608658a6d95013 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 8 Nov 2016 11:32:46 +1100 Subject: dax: correct dax iomap code namespace The recently added DAX functions that use the new struct iomap data structure were named iomap_dax_rw(), iomap_dax_fault() and iomap_dax_actor(). These are actually defined in fs/dax.c, though, so should be part of the "dax" namespace and not the "iomap" namespace. Rename them to dax_iomap_rw(), dax_iomap_fault() and dax_iomap_actor() respectively. Signed-off-by: Ross Zwisler Suggested-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Dave Chinner --- fs/dax.c | 16 ++++++++-------- fs/ext2/file.c | 6 +++--- fs/xfs/xfs_file.c | 8 ++++---- include/linux/dax.h | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'fs/ext2') diff --git a/fs/dax.c b/fs/dax.c index 3d0b1032c555..fdbd7a1ec6cf 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1031,7 +1031,7 @@ EXPORT_SYMBOL_GPL(dax_truncate_page); #ifdef CONFIG_FS_IOMAP static loff_t -iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, +dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap *iomap) { struct iov_iter *iter = data; @@ -1088,7 +1088,7 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, } /** - * iomap_dax_rw - Perform I/O to a DAX file + * dax_iomap_rw - Perform I/O to a DAX file * @iocb: The control block for this I/O * @iter: The addresses to do I/O from or to * @ops: iomap ops passed from the file system @@ -1098,7 +1098,7 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * and evicting any page cache pages in the region under I/O. */ ssize_t -iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, +dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops) { struct address_space *mapping = iocb->ki_filp->f_mapping; @@ -1128,7 +1128,7 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, - iter, iomap_dax_actor); + iter, dax_iomap_actor); if (ret <= 0) break; pos += ret; @@ -1138,10 +1138,10 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, iocb->ki_pos += done; return done ? done : ret; } -EXPORT_SYMBOL_GPL(iomap_dax_rw); +EXPORT_SYMBOL_GPL(dax_iomap_rw); /** - * iomap_dax_fault - handle a page fault on a DAX file + * dax_iomap_fault - handle a page fault on a DAX file * @vma: The virtual memory area where the fault occurred * @vmf: The description of the fault * @ops: iomap ops passed from the file system @@ -1150,7 +1150,7 @@ EXPORT_SYMBOL_GPL(iomap_dax_rw); * or mkwrite handler for DAX files. Assumes the caller has done all the * necessary locking for the page fault to proceed successfully. */ -int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, +int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, struct iomap_ops *ops) { struct address_space *mapping = vma->vm_file->f_mapping; @@ -1252,5 +1252,5 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, return VM_FAULT_SIGBUS | major; return VM_FAULT_NOPAGE | major; } -EXPORT_SYMBOL_GPL(iomap_dax_fault); +EXPORT_SYMBOL_GPL(dax_iomap_fault); #endif /* CONFIG_FS_IOMAP */ diff --git a/fs/ext2/file.c b/fs/ext2/file.c index fb88b51ca947..b0f241528a30 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -38,7 +38,7 @@ static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) return 0; /* skip atime */ inode_lock_shared(inode); - ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops); + ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops); inode_unlock_shared(inode); file_accessed(iocb->ki_filp); @@ -62,7 +62,7 @@ static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret) goto out_unlock; - ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops); + ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); mark_inode_dirty(inode); @@ -99,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } down_read(&ei->dax_sem); - ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops); + ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops); up_read(&ei->dax_sem); if (vmf->flags & FAULT_FLAG_WRITE) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a314fc7b56fa..e7f35d548cfc 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -344,7 +344,7 @@ xfs_file_dax_read( return 0; /* skip atime */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops); + ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); file_accessed(iocb->ki_filp); @@ -691,7 +691,7 @@ xfs_file_dax_write( trace_xfs_file_dax_write(ip, count, pos); - ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -1640,7 +1640,7 @@ xfs_filemap_page_mkwrite( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); } else { ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); @@ -1674,7 +1674,7 @@ xfs_filemap_fault( * changes to xfs_get_blocks_direct() to map unwritten extent * ioend for conversion on read-only mappings. */ - ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); } else ret = filemap_fault(vma, vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); diff --git a/include/linux/dax.h b/include/linux/dax.h index 0f74866edae6..a3dfee4cb03f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -11,13 +11,13 @@ struct iomap_ops; /* We use lowest available exceptional entry bit for locking */ #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) -ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, +ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops); ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, get_block_t, dio_iodone_t, int flags); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); -int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, +int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, struct iomap_ops *ops); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); -- cgit v1.2.3