diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/cell.c | 17 | ||||
-rw-r--r-- | fs/afs/dynroot.c | 2 | ||||
-rw-r--r-- | fs/afs/internal.h | 4 | ||||
-rw-r--r-- | fs/afs/main.c | 2 | ||||
-rw-r--r-- | fs/afs/proc.c | 7 | ||||
-rw-r--r-- | fs/afs/rxrpc.c | 2 | ||||
-rw-r--r-- | fs/cachefiles/namei.c | 2 | ||||
-rw-r--r-- | fs/dax.c | 13 | ||||
-rw-r--r-- | fs/fat/fatent.c | 1 | ||||
-rw-r--r-- | fs/fscache/cookie.c | 31 | ||||
-rw-r--r-- | fs/fscache/internal.h | 1 | ||||
-rw-r--r-- | fs/fscache/main.c | 4 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 2 | ||||
-rw-r--r-- | fs/ubifs/super.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 200 |
16 files changed, 216 insertions, 78 deletions
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index f3d0bef16d78b..6127f0fcd62c4 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -514,6 +514,8 @@ static int afs_alloc_anon_key(struct afs_cell *cell) */ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) { + struct hlist_node **p; + struct afs_cell *pcell; int ret; if (!cell->anonymous_key) { @@ -534,7 +536,18 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) return ret; mutex_lock(&net->proc_cells_lock); - list_add_tail(&cell->proc_link, &net->proc_cells); + for (p = &net->proc_cells.first; *p; p = &(*p)->next) { + pcell = hlist_entry(*p, struct afs_cell, proc_link); + if (strcmp(cell->name, pcell->name) < 0) + break; + } + + cell->proc_link.pprev = p; + cell->proc_link.next = *p; + rcu_assign_pointer(*p, &cell->proc_link.next); + if (cell->proc_link.next) + cell->proc_link.next->pprev = &cell->proc_link.next; + afs_dynroot_mkdir(net, cell); mutex_unlock(&net->proc_cells_lock); return 0; @@ -550,7 +563,7 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) afs_proc_cell_remove(cell); mutex_lock(&net->proc_cells_lock); - list_del_init(&cell->proc_link); + hlist_del_rcu(&cell->proc_link); afs_dynroot_rmdir(net, cell); mutex_unlock(&net->proc_cells_lock); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 1cde710a80133..f29c6dade7f62 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -265,7 +265,7 @@ int afs_dynroot_populate(struct super_block *sb) return -ERESTARTSYS; net->dynroot_sb = sb; - list_for_each_entry(cell, &net->proc_cells, proc_link) { + hlist_for_each_entry(cell, &net->proc_cells, proc_link) { ret = afs_dynroot_mkdir(net, cell); if (ret < 0) goto error; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 871a228d7f37c..34c02fdcc25f1 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -242,7 +242,7 @@ struct afs_net { seqlock_t cells_lock; struct mutex proc_cells_lock; - struct list_head proc_cells; + struct hlist_head proc_cells; /* Known servers. Theoretically each fileserver can only be in one * cell, but in practice, people create aliases and subsets and there's @@ -320,7 +320,7 @@ struct afs_cell { struct afs_net *net; struct key *anonymous_key; /* anonymous user key for this cell */ struct work_struct manager; /* Manager for init/deinit/dns */ - struct list_head proc_link; /* /proc cell list link */ + struct hlist_node proc_link; /* /proc cell list link */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif diff --git a/fs/afs/main.c b/fs/afs/main.c index e84fe822a9607..107427688eddd 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -87,7 +87,7 @@ static int __net_init afs_net_init(struct net *net_ns) timer_setup(&net->cells_timer, afs_cells_timer, 0); mutex_init(&net->proc_cells_lock); - INIT_LIST_HEAD(&net->proc_cells); + INIT_HLIST_HEAD(&net->proc_cells); seqlock_init(&net->fs_lock); net->fs_servers = RB_ROOT; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 476dcbb79713d..9101f62707af2 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -33,9 +33,8 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m) static int afs_proc_cells_show(struct seq_file *m, void *v) { struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); - struct afs_net *net = afs_seq2net(m); - if (v == &net->proc_cells) { + if (v == SEQ_START_TOKEN) { /* display header on line 1 */ seq_puts(m, "USE NAME\n"); return 0; @@ -50,12 +49,12 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { rcu_read_lock(); - return seq_list_start_head(&afs_seq2net(m)->proc_cells, *_pos); + return seq_hlist_start_head_rcu(&afs_seq2net(m)->proc_cells, *_pos); } static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &afs_seq2net(m)->proc_cells, pos); + return seq_hlist_next_rcu(v, &afs_seq2net(m)->proc_cells, pos); } static void afs_proc_cells_stop(struct seq_file *m, void *v) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 35f2ae30f31f7..77a83790a31f3 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -690,8 +690,6 @@ static void afs_process_async_call(struct work_struct *work) } if (call->state == AFS_CALL_COMPLETE) { - call->reply[0] = NULL; - /* We have two refs to release - one from the alloc and one * queued with the work item - and we can't just deallocate the * call because the work item may be queued again. diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index af2b17b21b94b..95983c744164a 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -343,7 +343,7 @@ try_again: trap = lock_rename(cache->graveyard, dir); /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir) { + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); diff --git a/fs/dax.c b/fs/dax.c index 4becbf168b7f0..0fb270f0a0ef6 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -666,6 +666,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping) while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { + pgoff_t nr_pages = 1; + for (i = 0; i < pagevec_count(&pvec); i++) { struct page *pvec_ent = pvec.pages[i]; void *entry; @@ -680,8 +682,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping) xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); - if (entry) + if (entry) { page = dax_busy_page(entry); + /* + * Account for multi-order entries at + * the end of the pagevec. + */ + if (i + 1 >= pagevec_count(&pvec)) + nr_pages = 1UL << dax_radix_order(entry); + } put_unlocked_mapping_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); if (page) @@ -696,7 +705,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) */ pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - index++; + index += nr_pages; if (page) break; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index defc2168de915..f58c0cacc531d 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -682,6 +682,7 @@ int fat_count_free_clusters(struct super_block *sb) if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); + cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 83bfe04456b6a..c550512ce3350 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -70,20 +70,7 @@ void fscache_free_cookie(struct fscache_cookie *cookie) } /* - * initialise an cookie jar slab element prior to any use - */ -void fscache_cookie_init_once(void *_cookie) -{ - struct fscache_cookie *cookie = _cookie; - - memset(cookie, 0, sizeof(*cookie)); - spin_lock_init(&cookie->lock); - spin_lock_init(&cookie->stores_lock); - INIT_HLIST_HEAD(&cookie->backing_objects); -} - -/* - * Set the index key in a cookie. The cookie struct has space for a 12-byte + * Set the index key in a cookie. The cookie struct has space for a 16-byte * key plus length and hash, but if that's not big enough, it's instead a * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then * the key data. @@ -93,20 +80,18 @@ static int fscache_set_key(struct fscache_cookie *cookie, { unsigned long long h; u32 *buf; + int bufs; int i; - cookie->key_len = index_key_len; + bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf)); if (index_key_len > sizeof(cookie->inline_key)) { - buf = kzalloc(index_key_len, GFP_KERNEL); + buf = kcalloc(bufs, sizeof(*buf), GFP_KERNEL); if (!buf) return -ENOMEM; cookie->key = buf; } else { buf = (u32 *)cookie->inline_key; - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; } memcpy(buf, index_key, index_key_len); @@ -116,7 +101,8 @@ static int fscache_set_key(struct fscache_cookie *cookie, */ h = (unsigned long)cookie->parent; h += index_key_len + cookie->type; - for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++) + + for (i = 0; i < bufs; i++) h += buf[i]; cookie->key_hash = h ^ (h >> 32); @@ -161,7 +147,7 @@ struct fscache_cookie *fscache_alloc_cookie( struct fscache_cookie *cookie; /* allocate and initialise a cookie */ - cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL); + cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); if (!cookie) return NULL; @@ -192,6 +178,9 @@ struct fscache_cookie *fscache_alloc_cookie( cookie->netfs_data = netfs_data; cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET); cookie->type = def->type; + spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); + INIT_HLIST_HEAD(&cookie->backing_objects); /* radix tree insertion won't use the preallocation pool unless it's * told it may not wait */ diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index f83328a7f0482..d6209022e9658 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -51,7 +51,6 @@ extern struct fscache_cache *fscache_select_cache_for_object( extern struct kmem_cache *fscache_cookie_jar; extern void fscache_free_cookie(struct fscache_cookie *); -extern void fscache_cookie_init_once(void *); extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *, const struct fscache_cookie_def *, const void *, size_t, diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 7dce110bf17d0..30ad89db1efcc 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -143,9 +143,7 @@ static int __init fscache_init(void) fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", sizeof(struct fscache_cookie), - 0, - 0, - fscache_cookie_init_once); + 0, 0, NULL); if (!fscache_cookie_jar) { pr_notice("Failed to allocate a cookie jar\n"); ret = -ENOMEM; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 03128ed1f34e8..84544a4f012d7 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1057,7 +1057,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, } } release_metapath(&mp); - if (gfs2_is_jdata(ip)) + if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip)) iomap->page_done = gfs2_iomap_journaled_page_done; return 0; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8e712b614e6e2..933aac5da1934 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -96,7 +96,9 @@ struct ocfs2_unblock_ctl { }; /* Lockdep class keys */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; +#endif static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index bf000c8aeffbb..fec62e9dfbe6a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2337,8 +2337,8 @@ late_initcall(ubifs_init); static void __exit ubifs_exit(void) { - WARN_ON(list_empty(&ubifs_infos)); - WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) == 0); + WARN_ON(!list_empty(&ubifs_infos)); + WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) != 0); dbg_debugfs_exit(); ubifs_compressors_exit(); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 5289e22cb081d..42ea7bab9144c 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1220,35 +1220,92 @@ retry: return 0; } +/* Unlock both inodes after they've been prepped for a range clone. */ +STATIC void +xfs_reflink_remap_unlock( + struct file *file_in, + struct file *file_out) +{ + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + bool same_inode = (inode_in == inode_out); + + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + if (!same_inode) + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + inode_unlock(inode_out); + if (!same_inode) + inode_unlock_shared(inode_in); +} + /* - * Link a range of blocks from one file to another. + * If we're reflinking to a point past the destination file's EOF, we must + * zero any speculative post-EOF preallocations that sit between the old EOF + * and the destination file offset. */ -int -xfs_reflink_remap_range( +static int +xfs_reflink_zero_posteof( + struct xfs_inode *ip, + loff_t pos) +{ + loff_t isize = i_size_read(VFS_I(ip)); + + if (pos <= isize) + return 0; + + trace_xfs_zero_eof(ip, isize, pos - isize); + return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, + &xfs_iomap_ops); +} + +/* + * Prepare two files for range cloning. Upon a successful return both inodes + * will have the iolock and mmaplock held, the page cache of the out file will + * be truncated, and any leases on the out file will have been broken. This + * function borrows heavily from xfs_file_aio_write_checks. + * + * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't + * checked that the bytes beyond EOF physically match. Hence we cannot use the + * EOF block in the source dedupe range because it's not a complete block match, + * hence can introduce a corruption into the file that has it's block replaced. + * + * In similar fashion, the VFS file cloning also allows partial EOF blocks to be + * "block aligned" for the purposes of cloning entire files. However, if the + * source file range includes the EOF block and it lands within the existing EOF + * of the destination file, then we can expose stale data from beyond the source + * file EOF in the destination file. + * + * XFS doesn't support partial block sharing, so in both cases we have check + * these cases ourselves. For dedupe, we can simply round the length to dedupe + * down to the previous whole block and ignore the partial EOF block. While this + * means we can't dedupe the last block of a file, this is an acceptible + * tradeoff for simplicity on implementation. + * + * For cloning, we want to share the partial EOF block if it is also the new EOF + * block of the destination file. If the partial EOF block lies inside the + * existing destination EOF, then we have to abort the clone to avoid exposing + * stale data in the destination file. Hence we reject these clone attempts with + * -EINVAL in this case. + */ +STATIC int +xfs_reflink_remap_prep( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + u64 *len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); - struct xfs_mount *mp = src->i_mount; bool same_inode = (inode_in == inode_out); - xfs_fileoff_t sfsbno, dfsbno; - xfs_filblks_t fsblen; - xfs_extlen_t cowextsize; + u64 blkmask = i_blocksize(inode_in) - 1; ssize_t ret; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return -EOPNOTSUPP; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - /* Lock both files against IO */ ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); if (ret) @@ -1270,33 +1327,115 @@ xfs_reflink_remap_range( goto out_unlock; ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, - &len, is_dedupe); + len, is_dedupe); if (ret <= 0) goto out_unlock; + /* + * If the dedupe data matches, chop off the partial EOF block + * from the source file so we don't try to dedupe the partial + * EOF block. + */ + if (is_dedupe) { + *len &= ~blkmask; + } else if (*len & blkmask) { + /* + * The user is attempting to share a partial EOF block, + * if it's inside the destination EOF then reject it. + */ + if (pos_out + *len < i_size_read(inode_out)) { + ret = -EINVAL; + goto out_unlock; + } + } + /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) goto out_unlock; - trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - /* - * Clear out post-eof preallocations because we don't have page cache - * backing the delayed allocations and they'll never get freed on - * their own. + * Zero existing post-eof speculative preallocations in the destination + * file. */ - if (xfs_can_free_eofblocks(dest, true)) { - ret = xfs_free_eofblocks(dest); - if (ret) - goto out_unlock; - } + ret = xfs_reflink_zero_posteof(dest, pos_out); + if (ret) + goto out_unlock; /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; + /* Zap any page cache for the destination file's range. */ + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + *len) - 1); + + /* If we're altering the file contents... */ + if (!is_dedupe) { + /* + * ...update the timestamps (which will grab the ilock again + * from xfs_fs_dirty_inode, so we have to call it before we + * take the ilock). + */ + if (!(file_out->f_mode & FMODE_NOCMTIME)) { + ret = file_update_time(file_out); + if (ret) + goto out_unlock; + } + + /* + * ...clear the security bits if the process is not being run + * by root. This keeps people from modifying setuid and setgid + * binaries. + */ + ret = file_remove_privs(file_out); + if (ret) + goto out_unlock; + } + + return 1; +out_unlock: + xfs_reflink_remap_unlock(file_in, file_out); + return ret; +} + +/* + * Link a range of blocks from one file to another. + */ +int +xfs_reflink_remap_range( + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) +{ + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + struct xfs_mount *mp = src->i_mount; + xfs_fileoff_t sfsbno, dfsbno; + xfs_filblks_t fsblen; + xfs_extlen_t cowextsize; + ssize_t ret; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return -EOPNOTSUPP; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + /* Prepare and then clone file data. */ + ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, + &len, is_dedupe); + if (ret <= 0) + return ret; + + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); @@ -1305,10 +1444,6 @@ xfs_reflink_remap_range( if (ret) goto out_unlock; - /* Zap any page cache for the destination file's range. */ - truncate_inode_pages_range(&inode_out->i_data, pos_out, - PAGE_ALIGN(pos_out + len) - 1); - /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file @@ -1325,12 +1460,7 @@ xfs_reflink_remap_range( is_dedupe); out_unlock: - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - if (!same_inode) - xfs_iunlock(src, XFS_MMAPLOCK_SHARED); - inode_unlock(inode_out); - if (!same_inode) - inode_unlock_shared(inode_in); + xfs_reflink_remap_unlock(file_in, file_out); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; |