summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cmservice.c78
-rw-r--r--fs/afs/fsclient.c221
-rw-r--r--fs/afs/internal.h14
-rw-r--r--fs/afs/rxrpc.c73
-rw-r--r--fs/afs/vlclient.c11
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/btrfs/backref.c1
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/delayed-ref.c7
-rw-r--r--fs/btrfs/disk-io.c56
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c185
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/file.c28
-rw-r--r--fs/btrfs/inode-map.c3
-rw-r--r--fs/btrfs/inode.c37
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/qgroup.c62
-rw-r--r--fs/btrfs/qgroup.h36
-rw-r--r--fs/btrfs/relocation.c126
-rw-r--r--fs/btrfs/root-tree.c27
-rw-r--r--fs/btrfs/super.c16
-rw-r--r--fs/btrfs/transaction.c7
-rw-r--r--fs/btrfs/tree-log.c21
-rw-r--r--fs/btrfs/tree-log.h5
-rw-r--r--fs/btrfs/volumes.c27
-rw-r--r--fs/dlm/debug_fs.c62
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/super.c18
-rw-r--r--fs/ext4/xattr.c53
-rw-r--r--fs/ext4/xattr.h1
-rw-r--r--fs/f2fs/data.c2
-rw-r--r--fs/f2fs/f2fs.h12
-rw-r--r--fs/f2fs/file.c13
-rw-r--r--fs/f2fs/node.c47
-rw-r--r--fs/f2fs/super.c6
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/blocklayout/blocklayout.h3
-rw-r--r--fs/nfs/blocklayout/extent_tree.c10
-rw-r--r--fs/nfs/callback.c1
-rw-r--r--fs/nfs/callback_proc.c8
-rw-r--r--fs/nfs/client.c10
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c45
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c23
-rw-r--r--fs/nfs/internal.h5
-rw-r--r--fs/nfs/nfs42proc.c34
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4proc.c89
-rw-r--r--fs/nfs/nfs4session.c53
-rw-r--r--fs/nfs/nfs4session.h7
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/super.c19
-rw-r--r--fs/seq_file.c4
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/ubifs/xattr.c5
57 files changed, 1032 insertions, 571 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4b0eff6da6740..85737e96ab8b5 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -189,11 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
case 1:
_debug("extract FID count");
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("FID count: %u", call->count);
@@ -210,11 +207,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
_debug("extract FID array");
ret = afs_extract_data(call, skb, last, call->buffer,
call->count * 3 * 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
_debug("unmarshall FID array");
call->request = kcalloc(call->count,
@@ -239,11 +233,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
case 3:
_debug("extract CB count");
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
tmp = ntohl(call->tmp);
_debug("CB count: %u", tmp);
@@ -258,11 +249,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
_debug("extract CB array");
ret = afs_extract_data(call, skb, last, call->request,
call->count * 3 * 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
_debug("unmarshall CB array");
cb = call->request;
@@ -278,9 +266,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
call->unmarshall++;
case 5:
- _debug("trailer");
- if (skb->len != 0)
- return -EBADMSG;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
/* Record that the message was unmarshalled successfully so
* that the call destructor can know do the callback breaking
@@ -294,8 +282,6 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
break;
}
- if (!last)
- return 0;
call->state = AFS_CALL_REPLYING;
@@ -335,13 +321,13 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
{
struct afs_server *server;
struct in_addr addr;
+ int ret;
_enter(",{%u},%d", skb->len, last);
- if (skb->len > 0)
- return -EBADMSG;
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
@@ -371,8 +357,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
_enter(",{%u},%d", skb->len, last);
+ /* There are some arguments that we ignore */
+ afs_data_consumed(call, skb);
if (!last)
- return 0;
+ return -EAGAIN;
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
@@ -408,12 +396,13 @@ static void SRXAFSCB_Probe(struct work_struct *work)
static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
bool last)
{
+ int ret;
+
_enter(",{%u},%d", skb->len, last);
- if (skb->len > 0)
- return -EBADMSG;
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
@@ -460,10 +449,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- if (skb->len > 0)
- return -EBADMSG;
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
switch (call->unmarshall) {
case 0:
@@ -509,8 +497,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
break;
}
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
call->state = AFS_CALL_REPLYING;
@@ -588,12 +577,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
struct sk_buff *skb, bool last)
{
+ int ret;
+
_enter(",{%u},%d", skb->len, last);
- if (skb->len > 0)
- return -EBADMSG;
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c2e930ec28889..9312b92e54bed 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -240,15 +240,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter(",,%u", last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -335,11 +333,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
case 1:
_debug("extract data length (MSW)");
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("DATA length MSW: %u", call->count);
@@ -353,11 +348,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
case 2:
_debug("extract data length");
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("DATA length: %u", call->count);
@@ -375,11 +367,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
ret = afs_extract_data(call, skb, last, buffer,
call->count);
kunmap_atomic(buffer);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
}
call->offset = 0;
@@ -389,11 +378,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
case 4:
ret = afs_extract_data(call, skb, last, call->buffer,
(21 + 3 + 6) * 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
bp = call->buffer;
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
@@ -405,15 +391,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
call->unmarshall++;
case 5:
- _debug("trailer");
- if (skb->len != 0)
- return -EBADMSG;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
break;
}
- if (!last)
- return 0;
-
if (call->count < PAGE_SIZE) {
_debug("clear");
page = call->reply3;
@@ -537,9 +520,8 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
{
_enter(",{%u},%d", skb->len, last);
- if (skb->len > 0)
- return -EBADMSG; /* shouldn't be any reply data */
- return 0;
+ /* shouldn't be any reply data */
+ return afs_data_complete(call, skb, last);
}
/*
@@ -622,15 +604,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call,
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -721,15 +701,13 @@ static int afs_deliver_fs_remove(struct afs_call *call,
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -804,15 +782,13 @@ static int afs_deliver_fs_link(struct afs_call *call,
{
struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -892,15 +868,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call,
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -999,15 +973,13 @@ static int afs_deliver_fs_rename(struct afs_call *call,
{
struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -1105,20 +1077,13 @@ static int afs_deliver_fs_store_data(struct afs_call *call,
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter(",,%u", last);
- afs_transfer_reply(call, skb);
- if (!last) {
- _leave(" = 0 [more]");
- return 0;
- }
-
- if (call->reply_size != call->reply_max) {
- _leave(" = -EBADMSG [%u != %u]",
- call->reply_size, call->reply_max);
- return -EBADMSG;
- }
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -1292,20 +1257,13 @@ static int afs_deliver_fs_store_status(struct afs_call *call,
afs_dataversion_t *store_version;
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter(",,%u", last);
- afs_transfer_reply(call, skb);
- if (!last) {
- _leave(" = 0 [more]");
- return 0;
- }
-
- if (call->reply_size != call->reply_max) {
- _leave(" = -EBADMSG [%u != %u]",
- call->reply_size, call->reply_max);
- return -EBADMSG;
- }
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
store_version = NULL;
@@ -1504,11 +1462,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
_debug("extract status");
ret = afs_extract_data(call, skb, last, call->buffer,
12 * 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
bp = call->buffer;
xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
@@ -1518,11 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the volume name length */
case 2:
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("volname length: %u", call->count);
@@ -1537,11 +1489,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
if (call->count > 0) {
ret = afs_extract_data(call, skb, last, call->reply3,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
}
p = call->reply3;
@@ -1561,11 +1510,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
case 4:
ret = afs_extract_data(call, skb, last, call->buffer,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->offset = 0;
call->unmarshall++;
@@ -1574,11 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the offline message length */
case 5:
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("offline msg length: %u", call->count);
@@ -1593,11 +1536,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
if (call->count > 0) {
ret = afs_extract_data(call, skb, last, call->reply3,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
}
p = call->reply3;
@@ -1617,11 +1557,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
case 7:
ret = afs_extract_data(call, skb, last, call->buffer,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->offset = 0;
call->unmarshall++;
@@ -1630,11 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the message of the day length */
case 8:
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("motd length: %u", call->count);
@@ -1649,11 +1583,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
if (call->count > 0) {
ret = afs_extract_data(call, skb, last, call->reply3,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
}
p = call->reply3;
@@ -1673,26 +1604,20 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
case 10:
ret = afs_extract_data(call, skb, last, call->buffer,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->offset = 0;
call->unmarshall++;
no_motd_padding:
case 11:
- _debug("trailer %d", skb->len);
- if (skb->len != 0)
- return -EBADMSG;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
break;
}
- if (!last)
- return 0;
-
_leave(" = 0 [done]");
return 0;
}
@@ -1764,15 +1689,13 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
struct sk_buff *skb, bool last)
{
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 71d5982312f3d..df976b2a7f40f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -609,17 +609,29 @@ extern void afs_proc_cell_remove(struct afs_cell *);
*/
extern int afs_open_socket(void);
extern void afs_close_socket(void);
+extern void afs_data_consumed(struct afs_call *, struct sk_buff *);
extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
const struct afs_wait_mode *);
extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
size_t, size_t);
extern void afs_flat_call_destructor(struct afs_call *);
-extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
+extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool);
extern void afs_send_empty_reply(struct afs_call *);
extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
size_t);
+static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb,
+ bool last)
+{
+ if (skb->len > 0)
+ return -EBADMSG;
+ afs_data_consumed(call, skb);
+ if (!last)
+ return -EAGAIN;
+ return 0;
+}
+
/*
* security.c
*/
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 4832de84d52cb..14d04c848465a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -150,10 +150,9 @@ void afs_close_socket(void)
}
/*
- * note that the data in a socket buffer is now delivered and that the buffer
- * should be freed
+ * Note that the data in a socket buffer is now consumed.
*/
-static void afs_data_delivered(struct sk_buff *skb)
+void afs_data_consumed(struct afs_call *call, struct sk_buff *skb)
{
if (!skb) {
_debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
@@ -161,9 +160,7 @@ static void afs_data_delivered(struct sk_buff *skb)
} else {
_debug("DLVR %p{%u} [%d]",
skb, skb->mark, atomic_read(&afs_outstanding_skbs));
- if (atomic_dec_return(&afs_outstanding_skbs) == -1)
- BUG();
- rxrpc_kernel_data_delivered(skb);
+ rxrpc_kernel_data_consumed(call->rxcall, skb);
}
}
@@ -489,9 +486,15 @@ static void afs_deliver_to_call(struct afs_call *call)
last = rxrpc_kernel_is_data_last(skb);
ret = call->type->deliver(call, skb, last);
switch (ret) {
+ case -EAGAIN:
+ if (last) {
+ _debug("short data");
+ goto unmarshal_error;
+ }
+ break;
case 0:
- if (last &&
- call->state == AFS_CALL_AWAIT_REPLY)
+ ASSERT(last);
+ if (call->state == AFS_CALL_AWAIT_REPLY)
call->state = AFS_CALL_COMPLETE;
break;
case -ENOTCONN:
@@ -501,6 +504,7 @@ static void afs_deliver_to_call(struct afs_call *call)
abort_code = RX_INVALID_OPERATION;
goto do_abort;
default:
+ unmarshal_error:
abort_code = RXGEN_CC_UNMARSHAL;
if (call->state != AFS_CALL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
@@ -511,9 +515,7 @@ static void afs_deliver_to_call(struct afs_call *call)
call->state = AFS_CALL_ERROR;
break;
}
- afs_data_delivered(skb);
- skb = NULL;
- continue;
+ break;
case RXRPC_SKB_MARK_FINAL_ACK:
_debug("Rcv ACK");
call->state = AFS_CALL_COMPLETE;
@@ -685,15 +687,35 @@ static void afs_process_async_call(struct afs_call *call)
}
/*
- * empty a socket buffer into a flat reply buffer
+ * Empty a socket buffer into a flat reply buffer.
*/
-void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
+int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last)
{
size_t len = skb->len;
- if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
- BUG();
- call->reply_size += len;
+ if (len > call->reply_max - call->reply_size) {
+ _leave(" = -EBADMSG [%zu > %u]",
+ len, call->reply_max - call->reply_size);
+ return -EBADMSG;
+ }
+
+ if (len > 0) {
+ if (skb_copy_bits(skb, 0, call->buffer + call->reply_size,
+ len) < 0)
+ BUG();
+ call->reply_size += len;
+ }
+
+ afs_data_consumed(call, skb);
+ if (!last)
+ return -EAGAIN;
+
+ if (call->reply_size != call->reply_max) {
+ _leave(" = -EBADMSG [%u != %u]",
+ call->reply_size, call->reply_max);
+ return -EBADMSG;
+ }
+ return 0;
}
/*
@@ -745,7 +767,8 @@ static void afs_collect_incoming_call(struct work_struct *work)
}
/*
- * grab the operation ID from an incoming cache manager call
+ * Grab the operation ID from an incoming cache manager call. The socket
+ * buffer is discarded on error or if we don't yet have sufficient data.
*/
static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
bool last)
@@ -766,12 +789,9 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
call->offset += len;
if (call->offset < 4) {
- if (last) {
- _leave(" = -EBADMSG [op ID short]");
- return -EBADMSG;
- }
- _leave(" = 0 [incomplete]");
- return 0;
+ afs_data_consumed(call, skb);
+ _leave(" = -EAGAIN");
+ return -EAGAIN;
}
call->state = AFS_CALL_AWAIT_REQUEST;
@@ -855,7 +875,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
}
/*
- * extract a piece of data from the received data socket buffers
+ * Extract a piece of data from the received data socket buffers.
*/
int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
bool last, void *buf, size_t count)
@@ -873,10 +893,7 @@ int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
call->offset += len;
if (call->offset < count) {
- if (last) {
- _leave(" = -EBADMSG [%d < %zu]", call->offset, count);
- return -EBADMSG;
- }
+ afs_data_consumed(call, skb);
_leave(" = -EAGAIN");
return -EAGAIN;
}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 340afd0cd1829..f94d1abdc3ebc 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -64,16 +64,13 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
struct afs_cache_vlocation *entry;
__be32 *bp;
u32 tmp;
- int loop;
+ int loop, ret;
_enter(",,%u", last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
entry = call->reply;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7f6aff3f72eba..e5495f37c6ed5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -853,6 +853,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
current->flags |= PF_RANDOMIZE;
setup_new_exec(bprm);
+ install_exec_creds(bprm);
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
@@ -1044,7 +1045,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
goto out;
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
- install_exec_creds(bprm);
retval = create_elf_tables(bprm, &loc->elf_ex,
load_addr, interp_load_addr);
if (retval < 0)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c3cdde87cc8c6..08ae99343d92f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -249,7 +249,8 @@ struct super_block *freeze_bdev(struct block_device *bdev)
* thaw_bdev drops it.
*/
sb = get_super(bdev);
- drop_super(sb);
+ if (sb)
+ drop_super(sb);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return sb;
}
@@ -646,7 +647,7 @@ static struct dentry *bd_mount(struct file_system_type *fs_type,
{
struct dentry *dent;
dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
- if (dent)
+ if (!IS_ERR(dent))
dent->d_sb->s_iflags |= SB_I_CGROUPWB;
return dent;
}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 2b88439c2ee86..455a6b2fd5395 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode)
list_del(&ref2->list);
kmem_cache_free(btrfs_prelim_ref_cache, ref2);
+ cond_resched();
}
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2fe8f89091a30..eff3993c77b33 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1028,6 +1028,7 @@ struct btrfs_fs_info {
struct btrfs_workqueue *qgroup_rescan_workers;
struct completion qgroup_rescan_completion;
struct btrfs_work qgroup_rescan_work;
+ bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
/* filesystem state */
unsigned long fs_state;
@@ -1079,6 +1080,8 @@ struct btrfs_fs_info {
struct list_head pinned_chunks;
int creating_free_space_tree;
+ /* Used to record internally whether fs has been frozen */
+ int fs_frozen;
};
struct btrfs_subvolume_writers {
@@ -2578,7 +2581,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 owner, u64 offset,
struct btrfs_key *ins);
-int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data, int delalloc);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index d9ddcfc18c91f..ac02e041464bc 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_qgroup_extent_record *qexisting;
int count_mod = 1;
int must_insert_reserved = 0;
@@ -606,10 +605,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
qrecord->num_bytes = num_bytes;
qrecord->old_roots = NULL;
- qexisting = btrfs_qgroup_insert_dirty_extent(fs_info,
- delayed_refs,
- qrecord);
- if (qexisting)
+ if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info,
+ delayed_refs, qrecord))
kfree(qrecord);
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 59febfb8d04a3..54bc8c7c6bcd3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -559,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root,
u32 nritems = btrfs_header_nritems(leaf);
int slot;
- if (nritems == 0)
+ if (nritems == 0) {
+ struct btrfs_root *check_root;
+
+ key.objectid = btrfs_header_owner(leaf);
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ check_root = btrfs_get_fs_root(root->fs_info, &key, false);
+ /*
+ * The only reason we also check NULL here is that during
+ * open_ctree() some roots has not yet been set up.
+ */
+ if (!IS_ERR_OR_NULL(check_root)) {
+ /* if leaf is the root, then it's fine */
+ if (leaf->start !=
+ btrfs_root_bytenr(&check_root->root_item)) {
+ CORRUPT("non-root leaf's nritems is 0",
+ leaf, root, 0);
+ return -EIO;
+ }
+ }
return 0;
+ }
/* Check the 0 item */
if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
@@ -612,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
+static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+ unsigned long nr = btrfs_header_nritems(node);
+
+ if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+ btrfs_crit(root->fs_info,
+ "corrupt node: block %llu root %llu nritems %lu",
+ node->start, root->objectid, nr);
+ return -EIO;
+ }
+ return 0;
+}
+
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror)
@@ -682,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
ret = -EIO;
}
+ if (found_level > 0 && check_node(root, eb))
+ ret = -EIO;
+
if (!ret)
set_extent_buffer_uptodate(eb);
err:
@@ -1618,8 +1655,8 @@ fail:
return ret;
}
-static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
- u64 root_id)
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+ u64 root_id)
{
struct btrfs_root *root;
@@ -2298,6 +2335,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
fs_info->qgroup_ulist = NULL;
+ fs_info->qgroup_rescan_running = false;
mutex_init(&fs_info->qgroup_rescan_lock);
}
@@ -2624,6 +2662,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
+ fs_info->fs_frozen = 0;
fs_info->sb = sb;
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
fs_info->metadata_ratio = 0;
@@ -3739,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);
- if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
btrfs_free_log(NULL, root);
+ if (root->reloc_root) {
+ free_extent_buffer(root->reloc_root->node);
+ free_extent_buffer(root->reloc_root->commit_root);
+ btrfs_put_fs_root(root->reloc_root);
+ root->reloc_root = NULL;
+ }
+ }
if (root->free_ino_pinned)
__btrfs_remove_free_space_cache(root->free_ino_pinned);
@@ -3851,7 +3897,7 @@ void close_ctree(struct btrfs_root *root)
smp_mb();
/* wait for the qgroup rescan worker to stop */
- btrfs_qgroup_wait_for_completion(fs_info);
+ btrfs_qgroup_wait_for_completion(fs_info, false);
/* wait for the uuid_scan task to finish */
down(&fs_info->uuid_tree_rescan_sem);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b3207a0e09f79..f19a982f5a4f1 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
struct btrfs_key *location);
int btrfs_init_fs_root(struct btrfs_root *root);
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+ u64 root_id);
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 61b494e8e604e..0450dc4105339 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -60,21 +60,6 @@ enum {
CHUNK_ALLOC_FORCE = 2,
};
-/*
- * Control how reservations are dealt with.
- *
- * RESERVE_FREE - freeing a reservation.
- * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
- * ENOSPC accounting
- * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
- * bytes_may_use as the ENOSPC accounting is done elsewhere
- */
-enum {
- RESERVE_FREE = 0,
- RESERVE_ALLOC = 1,
- RESERVE_ALLOC_NO_ACCOUNT = 2,
-};
-
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, int alloc);
@@ -104,9 +89,10 @@ static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve,
- int delalloc);
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 ram_bytes, u64 num_bytes, int delalloc);
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int delalloc);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_pin_extent(struct btrfs_root *root,
@@ -3501,7 +3487,6 @@ again:
dcs = BTRFS_DC_SETUP;
else if (ret == -ENOSPC)
set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
- btrfs_free_reserved_data_space(inode, 0, num_pages);
out_put:
iput(inode);
@@ -4472,6 +4457,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
}
}
+/*
+ * If force is CHUNK_ALLOC_FORCE:
+ * - return 1 if it successfully allocates a chunk,
+ * - return errors including -ENOSPC otherwise.
+ * If force is NOT CHUNK_ALLOC_FORCE:
+ * - return 0 if it doesn't need to allocate a new chunk,
+ * - return 1 if it successfully allocates a chunk,
+ * - return errors including -ENOSPC otherwise.
+ */
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 flags, int force)
{
@@ -4882,7 +4876,7 @@ static int flush_space(struct btrfs_root *root,
btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans, root);
- if (ret == -ENOSPC)
+ if (ret > 0 || ret == -ENOSPC)
ret = 0;
break;
case COMMIT_TRANS:
@@ -6497,19 +6491,15 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
}
/**
- * btrfs_update_reserved_bytes - update the block_group and space info counters
+ * btrfs_add_reserved_bytes - update the block_group and space info counters
* @cache: The cache we are manipulating
+ * @ram_bytes: The number of bytes of file content, and will be same to
+ * @num_bytes except for the compress path.
* @num_bytes: The number of bytes in question
- * @reserve: One of the reservation enums
* @delalloc: The blocks are allocated for the delalloc write
*
- * This is called by the allocator when it reserves space, or by somebody who is
- * freeing space that was never actually used on disk. For example if you
- * reserve some space for a new leaf in transaction A and before transaction A
- * commits you free that leaf, you call this with reserve set to 0 in order to
- * clear the reservation.
- *
- * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
+ * This is called by the allocator when it reserves space. Metadata
+ * reservations should be called with RESERVE_ALLOC so we do the proper
* ENOSPC accounting. For data we handle the reservation through clearing the
* delalloc bits in the io_tree. We have to do this since we could end up
* allocating less disk space for the amount of data we have reserved in the
@@ -6519,44 +6509,63 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
* make the reservation and return -EAGAIN, otherwise this function always
* succeeds.
*/
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve, int delalloc)
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 ram_bytes, u64 num_bytes, int delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
int ret = 0;
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
- if (reserve != RESERVE_FREE) {
- if (cache->ro) {
- ret = -EAGAIN;
- } else {
- cache->reserved += num_bytes;
- space_info->bytes_reserved += num_bytes;
- if (reserve == RESERVE_ALLOC) {
- trace_btrfs_space_reservation(cache->fs_info,
- "space_info", space_info->flags,
- num_bytes, 0);
- space_info->bytes_may_use -= num_bytes;
- }
-
- if (delalloc)
- cache->delalloc_bytes += num_bytes;
- }
+ if (cache->ro) {
+ ret = -EAGAIN;
} else {
- if (cache->ro)
- space_info->bytes_readonly += num_bytes;
- cache->reserved -= num_bytes;
- space_info->bytes_reserved -= num_bytes;
+ cache->reserved += num_bytes;
+ space_info->bytes_reserved += num_bytes;
+ trace_btrfs_space_reservation(cache->fs_info,
+ "space_info", space_info->flags,
+ ram_bytes, 0);
+ space_info->bytes_may_use -= ram_bytes;
if (delalloc)
- cache->delalloc_bytes -= num_bytes;
+ cache->delalloc_bytes += num_bytes;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
return ret;
}
+/**
+ * btrfs_free_reserved_bytes - update the block_group and space info counters
+ * @cache: The cache we are manipulating
+ * @num_bytes: The number of bytes in question
+ * @delalloc: The blocks are allocated for the delalloc write
+ *
+ * This is called by somebody who is freeing space that was never actually used
+ * on disk. For example if you reserve some space for a new leaf in transaction
+ * A and before transaction A commits you free that leaf, you call this with
+ * reserve set to 0 in order to clear the reservation.
+ */
+
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int delalloc)
+{
+ struct btrfs_space_info *space_info = cache->space_info;
+ int ret = 0;
+
+ spin_lock(&space_info->lock);
+ spin_lock(&cache->lock);
+ if (cache->ro)
+ space_info->bytes_readonly += num_bytes;
+ cache->reserved -= num_bytes;
+ space_info->bytes_reserved -= num_bytes;
+
+ if (delalloc)
+ cache->delalloc_bytes -= num_bytes;
+ spin_unlock(&cache->lock);
+ spin_unlock(&space_info->lock);
+ return ret;
+}
void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -7191,7 +7200,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(cache, buf->start, buf->len);
- btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
+ btrfs_free_reserved_bytes(cache, buf->len, 0);
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
@@ -7416,9 +7425,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
* the free space extent currently.
*/
static noinline int find_free_extent(struct btrfs_root *orig_root,
- u64 num_bytes, u64 empty_size,
- u64 hint_byte, struct btrfs_key *ins,
- u64 flags, int delalloc)
+ u64 ram_bytes, u64 num_bytes, u64 empty_size,
+ u64 hint_byte, struct btrfs_key *ins,
+ u64 flags, int delalloc)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -7430,8 +7439,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
struct btrfs_space_info *space_info;
int loop = 0;
int index = __get_raid_index(flags);
- int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
- RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
bool failed_cluster_refill = false;
bool failed_alloc = false;
bool use_cluster = true;
@@ -7763,8 +7770,8 @@ checks:
search_start - offset);
BUG_ON(offset > search_start);
- ret = btrfs_update_reserved_bytes(block_group, num_bytes,
- alloc_type, delalloc);
+ ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
+ num_bytes, delalloc);
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
@@ -7936,7 +7943,7 @@ again:
up_read(&info->groups_sem);
}
-int btrfs_reserve_extent(struct btrfs_root *root,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data, int delalloc)
@@ -7948,8 +7955,8 @@ int btrfs_reserve_extent(struct btrfs_root *root,
flags = btrfs_get_alloc_profile(root, is_data);
again:
WARN_ON(num_bytes < root->sectorsize);
- ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
- flags, delalloc);
+ ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+ hint_byte, ins, flags, delalloc);
if (!ret && !is_data) {
btrfs_dec_block_group_reservations(root->fs_info,
ins->objectid);
@@ -7958,6 +7965,7 @@ again:
num_bytes = min(num_bytes >> 1, ins->offset);
num_bytes = round_down(num_bytes, root->sectorsize);
num_bytes = max(num_bytes, min_alloc_size);
+ ram_bytes = num_bytes;
if (num_bytes == min_alloc_size)
final_tried = true;
goto again;
@@ -7995,7 +8003,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
if (btrfs_test_opt(root->fs_info, DISCARD))
ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len);
- btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
+ btrfs_free_reserved_bytes(cache, len, delalloc);
trace_btrfs_reserved_extent_free(root, start, len);
}
@@ -8223,8 +8231,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
if (!block_group)
return -EINVAL;
- ret = btrfs_update_reserved_bytes(block_group, ins->offset,
- RESERVE_ALLOC_NO_ACCOUNT, 0);
+ ret = btrfs_add_reserved_bytes(block_group, ins->offset,
+ ins->offset, 0);
BUG_ON(ret); /* logic error */
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
@@ -8368,7 +8376,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
- ret = btrfs_reserve_extent(root, blocksize, blocksize,
+ ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
empty_size, hint, &ins, 0, 0);
if (ret)
goto out_unuse;
@@ -8521,35 +8529,6 @@ reada:
wc->reada_slot = slot;
}
-/*
- * These may not be seen by the usual inc/dec ref code so we have to
- * add them here.
- */
-static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
- u64 num_bytes)
-{
- struct btrfs_qgroup_extent_record *qrecord;
- struct btrfs_delayed_ref_root *delayed_refs;
-
- qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
- if (!qrecord)
- return -ENOMEM;
-
- qrecord->bytenr = bytenr;
- qrecord->num_bytes = num_bytes;
- qrecord->old_roots = NULL;
-
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- if (btrfs_qgroup_insert_dirty_extent(trans->fs_info,
- delayed_refs, qrecord))
- kfree(qrecord);
- spin_unlock(&delayed_refs->lock);
-
- return 0;
-}
-
static int account_leaf_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *eb)
@@ -8583,7 +8562,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
- ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
+ ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+ bytenr, num_bytes, GFP_NOFS);
if (ret)
return ret;
}
@@ -8732,8 +8712,9 @@ walk_down:
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
- ret = record_one_subtree_extent(trans, root, child_bytenr,
- root->nodesize);
+ ret = btrfs_qgroup_insert_dirty_extent(trans,
+ root->fs_info, child_bytenr,
+ root->nodesize, GFP_NOFS);
if (ret)
goto out;
}
@@ -9906,6 +9887,7 @@ static int find_first_block_group(struct btrfs_root *root,
} else {
ret = 0;
}
+ free_extent_map(em);
goto out;
}
path->slots[0]++;
@@ -9942,6 +9924,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
block_group->iref = 0;
block_group->inode = NULL;
spin_unlock(&block_group->lock);
+ ASSERT(block_group->io_ctl.inode == NULL);
iput(inode);
last = block_group->key.objectid + block_group->key.offset;
btrfs_put_block_group(block_group);
@@ -9999,6 +9982,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
free_excluded_extents(info->extent_root, block_group);
btrfs_remove_free_space_cache(block_group);
+ ASSERT(list_empty(&block_group->dirty_list));
+ ASSERT(list_empty(&block_group->io_list));
+ ASSERT(list_empty(&block_group->bg_list));
+ ASSERT(atomic_read(&block_group->count) == 1);
btrfs_put_block_group(block_group);
spin_lock(&info->block_group_cache_lock);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bc2729a7612db..28cd88fccc7ea 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -20,6 +20,7 @@
#define EXTENT_DAMAGED (1U << 14)
#define EXTENT_NORESERVE (1U << 15)
#define EXTENT_QGROUP_RESERVED (1U << 16)
+#define EXTENT_CLEAR_DATA_RESV (1U << 17)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5842423f8f47b..fea31a4a6e368 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2070,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
trans->sync = true;
- btrfs_init_log_ctx(&ctx);
+ btrfs_init_log_ctx(&ctx, inode);
ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
if (ret < 0) {
@@ -2675,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode,
alloc_start = round_down(offset, blocksize);
alloc_end = round_up(offset + len, blocksize);
+ cur_offset = alloc_start;
/* Make sure we aren't being give some crap mode */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2767,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode,
/* First, check if we exceed the qgroup limit */
INIT_LIST_HEAD(&reserve_list);
- cur_offset = alloc_start;
while (1) {
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
@@ -2794,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode,
last_byte - cur_offset);
if (ret < 0)
break;
+ } else {
+ /*
+ * Do not need to reserve unwritten extent for this
+ * range, free reserved data space first, otherwise
+ * it'll result in false ENOSPC error.
+ */
+ btrfs_free_reserved_data_space(inode, cur_offset,
+ last_byte - cur_offset);
}
free_extent_map(em);
cur_offset = last_byte;
@@ -2811,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode,
range->start,
range->len, 1 << inode->i_blkbits,
offset + len, &alloc_hint);
+ else
+ btrfs_free_reserved_data_space(inode, range->start,
+ range->len);
list_del(&range->list);
kfree(range);
}
@@ -2845,18 +2856,11 @@ out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_KERNEL);
out:
- /*
- * As we waited the extent range, the data_rsv_map must be empty
- * in the range, as written data range will be released from it.
- * And for prealloacted extent, it will also be released when
- * its metadata is written.
- * So this is completely used as cleanup.
- */
- btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
inode_unlock(inode);
/* Let go of our reservation. */
- btrfs_free_reserved_data_space(inode, alloc_start,
- alloc_end - alloc_start);
+ if (ret != 0)
+ btrfs_free_reserved_data_space(inode, alloc_start,
+ alloc_end - cur_offset);
return ret;
}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index aa6fabaee72ed..359ee861b5a4b 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -495,10 +495,9 @@ again:
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
- btrfs_delalloc_release_space(inode, 0, prealloc);
+ btrfs_delalloc_release_metadata(inode, prealloc);
goto out_put;
}
- btrfs_free_reserved_data_space(inode, 0, prealloc);
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
out_put:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 08dfc57e22705..e6811c42e41ef 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -566,6 +566,8 @@ cont:
PAGE_SET_WRITEBACK |
page_error_op |
PAGE_END_WRITEBACK);
+ btrfs_free_reserved_data_space_noquota(inode, start,
+ end - start + 1);
goto free_pages_out;
}
}
@@ -742,7 +744,7 @@ retry:
lock_extent(io_tree, async_extent->start,
async_extent->start + async_extent->ram_size - 1);
- ret = btrfs_reserve_extent(root,
+ ret = btrfs_reserve_extent(root, async_extent->ram_size,
async_extent->compressed_size,
async_extent->compressed_size,
0, alloc_hint, &ins, 1, 1);
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode,
EXTENT_DEFRAG, PAGE_UNLOCK |
PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
PAGE_END_WRITEBACK);
-
+ btrfs_free_reserved_data_space_noquota(inode, start,
+ end - start + 1);
*nr_written = *nr_written +
(end - start + PAGE_SIZE) / PAGE_SIZE;
*page_started = 1;
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode,
unsigned long op;
cur_alloc_size = disk_num_bytes;
- ret = btrfs_reserve_extent(root, cur_alloc_size,
+ ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
root->sectorsize, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
@@ -1489,8 +1492,10 @@ out_check:
extent_clear_unlock_delalloc(inode, cur_offset,
cur_offset + num_bytes - 1,
locked_page, EXTENT_LOCKED |
- EXTENT_DELALLOC, PAGE_UNLOCK |
- PAGE_SET_PRIVATE2);
+ EXTENT_DELALLOC |
+ EXTENT_CLEAR_DATA_RESV,
+ PAGE_UNLOCK | PAGE_SET_PRIVATE2);
+
if (!nolock && nocow)
btrfs_end_write_no_snapshoting(root);
cur_offset = extent_end;
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode,
return;
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
- && do_list && !(state->state & EXTENT_NORESERVE))
+ && do_list && !(state->state & EXTENT_NORESERVE)
+ && (*bits & (EXTENT_DO_ACCOUNTING |
+ EXTENT_CLEAR_DATA_RESV)))
btrfs_free_reserved_data_space_noquota(inode,
state->start, len);
@@ -7251,7 +7258,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
int ret;
alloc_hint = get_extent_allocation_hint(inode, start, len);
- ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
+ ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
alloc_hint, &ins, 1, 1);
if (ret)
return ERR_PTR(ret);
@@ -7751,6 +7758,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
ret = PTR_ERR(em2);
goto unlock_err;
}
+ /*
+ * For inode marked NODATACOW or extent marked PREALLOC,
+ * use the existing or preallocated extent, so does not
+ * need to adjust btrfs_space_info's bytes_may_use.
+ */
+ btrfs_free_reserved_data_space_noquota(inode,
+ start, len);
goto unlock;
}
}
@@ -7785,7 +7799,6 @@ unlock:
i_size_write(inode, start + len);
adjust_dio_outstanding_extents(inode, dio_data, len);
- btrfs_free_reserved_data_space(inode, start, len);
WARN_ON(dio_data->reserve < len);
dio_data->reserve -= len;
dio_data->unsubmitted_oe_range_end = start + len;
@@ -10306,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 last_alloc = (u64)-1;
int ret = 0;
bool own_trans = true;
+ u64 end = start + num_bytes - 1;
if (trans)
own_trans = false;
@@ -10327,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
* sized chunks.
*/
cur_bytes = min(cur_bytes, last_alloc);
- ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
- *alloc_hint, &ins, 1, 0);
+ ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
+ min_size, 0, *alloc_hint, &ins, 1, 0);
if (ret) {
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -10414,6 +10428,9 @@ next:
if (own_trans)
btrfs_end_transaction(trans, root);
}
+ if (cur_offset < end)
+ btrfs_free_reserved_data_space(inode, cur_offset,
+ end - cur_offset + 1);
return ret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 14ed1e9e6bc83..b2a2da5893af5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5084,7 +5084,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- return btrfs_qgroup_wait_for_completion(root->fs_info);
+ return btrfs_qgroup_wait_for_completion(root->fs_info, true);
}
static long _btrfs_ioctl_set_received_subvol(struct file *file,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 93ee1c18ef9d4..8db2e29fdcf41 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
goto out;
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
- btrfs_qgroup_wait_for_completion(fs_info);
+ btrfs_qgroup_wait_for_completion(fs_info, false);
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
@@ -1453,10 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
return ret;
}
-struct btrfs_qgroup_extent_record *
-btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_qgroup_extent_record *record)
+int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record)
{
struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
struct rb_node *parent_node = NULL;
@@ -1475,12 +1474,42 @@ btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
else if (bytenr > entry->bytenr)
p = &(*p)->rb_right;
else
- return entry;
+ return 1;
}
rb_link_node(&record->node, parent_node, p);
rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
- return NULL;
+ return 0;
+}
+
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+ gfp_t gfp_flag)
+{
+ struct btrfs_qgroup_extent_record *record;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ int ret;
+
+ if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0)
+ return 0;
+ if (WARN_ON(trans == NULL))
+ return -EINVAL;
+ record = kmalloc(sizeof(*record), gfp_flag);
+ if (!record)
+ return -ENOMEM;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ record->bytenr = bytenr;
+ record->num_bytes = num_bytes;
+ record->old_roots = NULL;
+
+ spin_lock(&delayed_refs->lock);
+ ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs,
+ record);
+ spin_unlock(&delayed_refs->lock);
+ if (ret > 0)
+ kfree(record);
+ return 0;
}
#define UPDATE_NEW 0
@@ -2303,6 +2332,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
int err = -ENOMEM;
int ret = 0;
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ fs_info->qgroup_rescan_running = true;
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
path = btrfs_alloc_path();
if (!path)
goto out;
@@ -2369,6 +2402,9 @@ out:
}
done:
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ fs_info->qgroup_rescan_running = false;
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
complete_all(&fs_info->qgroup_rescan_completion);
}
@@ -2487,20 +2523,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
return 0;
}
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+ bool interruptible)
{
int running;
int ret = 0;
mutex_lock(&fs_info->qgroup_rescan_lock);
spin_lock(&fs_info->qgroup_lock);
- running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+ running = fs_info->qgroup_rescan_running;
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
- if (running)
+ if (!running)
+ return 0;
+
+ if (interruptible)
ret = wait_for_completion_interruptible(
&fs_info->qgroup_rescan_completion);
+ else
+ wait_for_completion(&fs_info->qgroup_rescan_completion);
return ret;
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 710887c06aaf4..1bc64c864b626 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+ bool interruptible);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
@@ -63,10 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
-struct btrfs_qgroup_extent_record *
-btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_qgroup_extent_record *record);
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * No lock version, caller must acquire delayed ref lock and allocate memory.
+ *
+ * Return 0 for success insert
+ * Return >0 for existing record, caller can free @record safely.
+ * Error is not possible
+ */
+int btrfs_qgroup_insert_dirty_extent_nolock(
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record);
+
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * Better encapsulated version.
+ *
+ * Return 0 if the operation is done.
+ * Return <0 for error, like memory allocation failure or invalid parameter
+ * (NULL trans)
+ */
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+ gfp_t gfp_flag);
+
int
btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b26a5aea41b4a..8a2c2a07987b2 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -31,6 +31,7 @@
#include "async-thread.h"
#include "free-space-cache.h"
#include "inode-map.h"
+#include "qgroup.h"
/*
* backref_node, mapping_node and tree_block start with this
@@ -3037,15 +3038,19 @@ int prealloc_file_extent_cluster(struct inode *inode,
u64 num_bytes;
int nr = 0;
int ret = 0;
+ u64 prealloc_start = cluster->start - offset;
+ u64 prealloc_end = cluster->end - offset;
+ u64 cur_offset;
BUG_ON(cluster->start != cluster->boundary[0]);
inode_lock(inode);
- ret = btrfs_check_data_free_space(inode, cluster->start,
- cluster->end + 1 - cluster->start);
+ ret = btrfs_check_data_free_space(inode, prealloc_start,
+ prealloc_end + 1 - prealloc_start);
if (ret)
goto out;
+ cur_offset = prealloc_start;
while (nr < cluster->nr) {
start = cluster->boundary[nr] - offset;
if (nr + 1 < cluster->nr)
@@ -3055,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode,
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
num_bytes = end + 1 - start;
+ if (cur_offset < start)
+ btrfs_free_reserved_data_space(inode, cur_offset,
+ start - cur_offset);
ret = btrfs_prealloc_file_range(inode, 0, start,
num_bytes, num_bytes,
end + 1, &alloc_hint);
+ cur_offset = end + 1;
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
if (ret)
break;
nr++;
}
- btrfs_free_reserved_data_space(inode, cluster->start,
- cluster->end + 1 - cluster->start);
+ if (cur_offset < prealloc_end)
+ btrfs_free_reserved_data_space(inode, cur_offset,
+ prealloc_end + 1 - cur_offset);
out:
inode_unlock(inode);
return ret;
@@ -3916,6 +3926,90 @@ int prepare_to_relocate(struct reloc_control *rc)
return 0;
}
+/*
+ * Qgroup fixer for data chunk relocation.
+ * The data relocation is done in the following steps
+ * 1) Copy data extents into data reloc tree
+ * 2) Create tree reloc tree(special snapshot) for related subvolumes
+ * 3) Modify file extents in tree reloc tree
+ * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
+ *
+ * The problem is, data and tree reloc tree are not accounted to qgroup,
+ * and 4) will only info qgroup to track tree blocks change, not file extents
+ * in the tree blocks.
+ *
+ * The good news is, related data extents are all in data reloc tree, so we
+ * only need to info qgroup to track all file extents in data reloc tree
+ * before commit trans.
+ */
+static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
+ struct reloc_control *rc)
+{
+ struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+ struct inode *inode = rc->data_inode;
+ struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret = 0;
+
+ if (!fs_info->quota_enabled)
+ return 0;
+
+ /*
+ * Only for stage where we update data pointers the qgroup fix is
+ * valid.
+ * For MOVING_DATA stage, we will miss the timing of swapping tree
+ * blocks, and won't fix it.
+ */
+ if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
+ while (1) {
+ struct btrfs_file_extent_item *fi;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid > btrfs_ino(inode))
+ break;
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ goto next;
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(path->nodes[0], fi) !=
+ BTRFS_FILE_EXTENT_REG)
+ goto next;
+ ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
+ btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
+ btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
+ GFP_NOFS);
+ if (ret < 0)
+ break;
+next:
+ ret = btrfs_next_item(data_reloc_root, path);
+ if (ret < 0)
+ break;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+ unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
{
struct rb_root blocks = RB_ROOT;
@@ -4102,10 +4196,16 @@ restart:
/* get rid of pinned extents */
trans = btrfs_join_transaction(rc->extent_root);
- if (IS_ERR(trans))
+ if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- else
- btrfs_commit_transaction(trans, rc->extent_root);
+ goto out_free;
+ }
+ err = qgroup_fix_relocated_data_extents(trans, rc);
+ if (err < 0) {
+ btrfs_abort_transaction(trans, err);
+ goto out_free;
+ }
+ btrfs_commit_transaction(trans, rc->extent_root);
out_free:
btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
btrfs_free_path(path);
@@ -4468,10 +4568,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
unset_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root);
- if (IS_ERR(trans))
+ if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- else
- err = btrfs_commit_transaction(trans, rc->extent_root);
+ goto out_free;
+ }
+ err = qgroup_fix_relocated_data_extents(trans, rc);
+ if (err < 0) {
+ btrfs_abort_transaction(trans, err);
+ goto out_free;
+ }
+ err = btrfs_commit_transaction(trans, rc->extent_root);
out_free:
kfree(rc);
out:
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7fd7e1830cfe6..091296062456b 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
root_key.objectid = key.offset;
key.offset++;
+ /*
+ * The root might have been inserted already, as before we look
+ * for orphan roots, log replay might have happened, which
+ * triggers a transaction commit and qgroup accounting, which
+ * in turn reads and inserts fs roots while doing backref
+ * walking.
+ */
+ root = btrfs_lookup_fs_root(tree_root->fs_info,
+ root_key.objectid);
+ if (root) {
+ WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
+ &root->state));
+ if (btrfs_root_refs(&root->root_item) == 0)
+ btrfs_add_dead_root(root);
+ continue;
+ }
+
root = btrfs_read_fs_root(tree_root, &root_key);
err = PTR_ERR_OR_ZERO(root);
if (err && err != -ENOENT) {
@@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
err = btrfs_insert_fs_root(root->fs_info, root);
- /*
- * The root might have been inserted already, as before we look
- * for orphan roots, log replay might have happened, which
- * triggers a transaction commit and qgroup accounting, which
- * in turn reads and inserts fs roots while doing backref
- * walking.
- */
- if (err == -EEXIST)
- err = 0;
if (err) {
+ BUG_ON(err == -EEXIST);
btrfs_free_fs_root(root);
break;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 864ce334f696c..4071fe2bd0981 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb)
struct btrfs_trans_handle *trans;
struct btrfs_root *root = btrfs_sb(sb)->tree_root;
+ root->fs_info->fs_frozen = 1;
+ /*
+ * We don't need a barrier here, we'll wait for any transaction that
+ * could be in progress on other threads (and do delayed iputs that
+ * we want to avoid on a frozen filesystem), or do the commit
+ * ourselves.
+ */
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
/* no transaction, don't bother */
@@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb)
return btrfs_commit_transaction(trans, root);
}
+static int btrfs_unfreeze(struct super_block *sb)
+{
+ struct btrfs_root *root = btrfs_sb(sb)->tree_root;
+
+ root->fs_info->fs_frozen = 0;
+ return 0;
+}
+
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = {
.statfs = btrfs_statfs,
.remount_fs = btrfs_remount,
.freeze_fs = btrfs_freeze,
+ .unfreeze_fs = btrfs_unfreeze,
};
static const struct file_operations btrfs_ctl_fops = {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9cca0a7219618..95d41919d034e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
+ /*
+ * If fs has been frozen, we can not handle delayed iputs, otherwise
+ * it'll result in deadlock about SB_FREEZE_FS.
+ */
if (current != root->fs_info->transaction_kthread &&
- current != root->fs_info->cleaner_kthread)
+ current != root->fs_info->cleaner_kthread &&
+ !root->fs_info->fs_frozen)
btrfs_run_delayed_iputs(root);
return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fff3f3efa4360..e935035ac0343 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -27,6 +27,7 @@
#include "backref.h"
#include "hash.h"
#include "compression.h"
+#include "qgroup.h"
/* magic values for the inode_only field in btrfs_log_inode:
*
@@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
offset = key->offset - btrfs_file_extent_offset(eb, item);
+ /*
+ * Manually record dirty extent, as here we did a shallow
+ * file extent item copy and skip normal backref update,
+ * but modifying extent tree all by ourselves.
+ * So need to manually record dirty extent for qgroup,
+ * as the owner of the file extent changed from log tree
+ * (doesn't affect qgroup) to fs/file tree(affects qgroup)
+ */
+ ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+ btrfs_file_extent_disk_bytenr(eb, item),
+ btrfs_file_extent_disk_num_bytes(eb, item),
+ GFP_NOFS);
+ if (ret < 0)
+ goto out;
+
if (ins.objectid > 0) {
u64 csum_start;
u64 csum_end;
@@ -2807,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
*/
mutex_unlock(&root->log_mutex);
- btrfs_init_log_ctx(&root_log_ctx);
+ btrfs_init_log_ctx(&root_log_ctx, NULL);
mutex_lock(&log_root_tree->log_mutex);
atomic_inc(&log_root_tree->log_batch);
@@ -4741,7 +4757,8 @@ again:
if (ret < 0) {
err = ret;
goto out_unlock;
- } else if (ret > 0) {
+ } else if (ret > 0 && ctx &&
+ other_ino != btrfs_ino(ctx->inode)) {
struct btrfs_key inode_key;
struct inode *other_inode;
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index a9f1b75d080d3..ab858e31ccbc2 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -30,15 +30,18 @@ struct btrfs_log_ctx {
int log_transid;
int io_err;
bool log_new_dentries;
+ struct inode *inode;
struct list_head list;
};
-static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
+static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
+ struct inode *inode)
{
ctx->log_ret = 0;
ctx->log_transid = 0;
ctx->io_err = 0;
ctx->log_new_dentries = false;
+ ctx->inode = inode;
INIT_LIST_HEAD(&ctx->list);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 51f125508771a..035efce603a9c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -834,10 +834,6 @@ static void __free_device(struct work_struct *work)
struct btrfs_device *device;
device = container_of(work, struct btrfs_device, rcu_work);
-
- if (device->bdev)
- blkdev_put(device->bdev, device->mode);
-
rcu_string_free(device->name);
kfree(device);
}
@@ -852,6 +848,17 @@ static void free_device(struct rcu_head *head)
schedule_work(&device->rcu_work);
}
+static void btrfs_close_bdev(struct btrfs_device *device)
+{
+ if (device->bdev && device->writeable) {
+ sync_blockdev(device->bdev);
+ invalidate_bdev(device->bdev);
+ }
+
+ if (device->bdev)
+ blkdev_put(device->bdev, device->mode);
+}
+
static void btrfs_close_one_device(struct btrfs_device *device)
{
struct btrfs_fs_devices *fs_devices = device->fs_devices;
@@ -870,10 +877,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
if (device->missing)
fs_devices->missing_devices--;
- if (device->bdev && device->writeable) {
- sync_blockdev(device->bdev);
- invalidate_bdev(device->bdev);
- }
+ btrfs_close_bdev(device);
new_device = btrfs_alloc_device(NULL, &device->devid,
device->uuid);
@@ -1932,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
}
+ btrfs_close_bdev(device);
+
call_rcu(&device->rcu, free_device);
num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -2025,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
/* zero out the old super if it is writable */
btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
}
+
+ btrfs_close_bdev(srcdev);
+
call_rcu(&srcdev->rcu, free_device);
/*
@@ -2080,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
* the device_list_mutex lock.
*/
btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+
+ btrfs_close_bdev(tgtdev);
call_rcu(&tgtdev->rcu, free_device);
}
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index eea64912c9c0a..466f7d60edc27 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -607,20 +607,54 @@ static const struct file_operations format2_fops;
static const struct file_operations format3_fops;
static const struct file_operations format4_fops;
-static int table_open(struct inode *inode, struct file *file)
+static int table_open1(struct inode *inode, struct file *file)
{
struct seq_file *seq;
- int ret = -1;
+ int ret;
- if (file->f_op == &format1_fops)
- ret = seq_open(file, &format1_seq_ops);
- else if (file->f_op == &format2_fops)
- ret = seq_open(file, &format2_seq_ops);
- else if (file->f_op == &format3_fops)
- ret = seq_open(file, &format3_seq_ops);
- else if (file->f_op == &format4_fops)
- ret = seq_open(file, &format4_seq_ops);
+ ret = seq_open(file, &format1_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private; /* the dlm_ls */
+ return 0;
+}
+
+static int table_open2(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+
+ ret = seq_open(file, &format2_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private; /* the dlm_ls */
+ return 0;
+}
+
+static int table_open3(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+
+ ret = seq_open(file, &format3_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private; /* the dlm_ls */
+ return 0;
+}
+
+static int table_open4(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+ ret = seq_open(file, &format4_seq_ops);
if (ret)
return ret;
@@ -631,7 +665,7 @@ static int table_open(struct inode *inode, struct file *file)
static const struct file_operations format1_fops = {
.owner = THIS_MODULE,
- .open = table_open,
+ .open = table_open1,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
@@ -639,7 +673,7 @@ static const struct file_operations format1_fops = {
static const struct file_operations format2_fops = {
.owner = THIS_MODULE,
- .open = table_open,
+ .open = table_open2,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
@@ -647,7 +681,7 @@ static const struct file_operations format2_fops = {
static const struct file_operations format3_fops = {
.owner = THIS_MODULE,
- .open = table_open,
+ .open = table_open3,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
@@ -655,7 +689,7 @@ static const struct file_operations format3_fops = {
static const struct file_operations format4_fops = {
.owner = THIS_MODULE,
- .open = table_open,
+ .open = table_open4,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3131747199e16..c6ea25a190f88 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
sbi->s_want_extra_isize,
iloc, handle);
if (ret) {
- ext4_set_inode_state(inode,
- EXT4_STATE_NO_EXPAND);
if (mnt_count !=
le16_to_cpu(sbi->s_es->s_mnt_count)) {
ext4_warning(inode->i_sb,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1c593aa0218ee..3ec8708989ca0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2211,6 +2211,7 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
/* Called at mount-time, super-block is locked */
static int ext4_check_descriptors(struct super_block *sb,
+ ext4_fsblk_t sb_block,
ext4_group_t *first_not_zeroed)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2241,6 +2242,11 @@ static int ext4_check_descriptors(struct super_block *sb,
grp = i;
block_bitmap = ext4_block_bitmap(sb, gdp);
+ if (block_bitmap == sb_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Block bitmap for group %u overlaps "
+ "superblock", i);
+ }
if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Block bitmap for group %u not in group "
@@ -2248,6 +2254,11 @@ static int ext4_check_descriptors(struct super_block *sb,
return 0;
}
inode_bitmap = ext4_inode_bitmap(sb, gdp);
+ if (inode_bitmap == sb_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode bitmap for group %u overlaps "
+ "superblock", i);
+ }
if (inode_bitmap < first_block || inode_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Inode bitmap for group %u not in group "
@@ -2255,6 +2266,11 @@ static int ext4_check_descriptors(struct super_block *sb,
return 0;
}
inode_table = ext4_inode_table(sb, gdp);
+ if (inode_table == sb_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode table for group %u overlaps "
+ "superblock", i);
+ }
if (inode_table < first_block ||
inode_table + sbi->s_itb_per_group - 1 > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -3757,7 +3773,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
- if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
+ if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
ret = -EFSCORRUPTED;
goto failed_mount2;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 39e9cfb1b3715..2eb935ca5d9ef 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1353,15 +1353,19 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
size_t min_offs, free;
int total_ino;
void *base, *start, *end;
- int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
+ int error = 0, tried_min_extra_isize = 0;
int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
+ int isize_diff; /* How much do we need to grow i_extra_isize */
down_write(&EXT4_I(inode)->xattr_sem);
+ /*
+ * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
+ */
+ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
retry:
- if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
- up_write(&EXT4_I(inode)->xattr_sem);
- return 0;
- }
+ isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
+ if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
+ goto out;
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
@@ -1382,7 +1386,7 @@ retry:
goto cleanup;
free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
- if (free >= new_extra_isize) {
+ if (free >= isize_diff) {
entry = IFIRST(header);
ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
- new_extra_isize, (void *)raw_inode +
@@ -1390,8 +1394,7 @@ retry:
(void *)header, total_ino,
inode->i_sb->s_blocksize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
- error = 0;
- goto cleanup;
+ goto out;
}
/*
@@ -1414,7 +1417,7 @@ retry:
end = bh->b_data + bh->b_size;
min_offs = end - base;
free = ext4_xattr_free_space(first, &min_offs, base, NULL);
- if (free < new_extra_isize) {
+ if (free < isize_diff) {
if (!tried_min_extra_isize && s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
@@ -1428,7 +1431,7 @@ retry:
free = inode->i_sb->s_blocksize;
}
- while (new_extra_isize > 0) {
+ while (isize_diff > 0) {
size_t offs, size, entry_size;
struct ext4_xattr_entry *small_entry = NULL;
struct ext4_xattr_info i = {
@@ -1459,7 +1462,7 @@ retry:
EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
EXT4_XATTR_LEN(last->e_name_len);
if (total_size <= free && total_size < min_total_size) {
- if (total_size < new_extra_isize) {
+ if (total_size < isize_diff) {
small_entry = last;
} else {
entry = last;
@@ -1514,22 +1517,22 @@ retry:
error = ext4_xattr_ibody_set(handle, inode, &i, is);
if (error)
goto cleanup;
+ total_ino -= entry_size;
entry = IFIRST(header);
- if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
- shift_bytes = new_extra_isize;
+ if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
+ shift_bytes = isize_diff;
else
- shift_bytes = entry_size + size;
+ shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
/* Adjust the offsets and shift the remaining entries ahead */
- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
- shift_bytes, (void *)raw_inode +
- EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
- (void *)header, total_ino - entry_size,
- inode->i_sb->s_blocksize);
+ ext4_xattr_shift_entries(entry, -shift_bytes,
+ (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+ EXT4_I(inode)->i_extra_isize + shift_bytes,
+ (void *)header, total_ino, inode->i_sb->s_blocksize);
- extra_isize += shift_bytes;
- new_extra_isize -= shift_bytes;
- EXT4_I(inode)->i_extra_isize = extra_isize;
+ isize_diff -= shift_bytes;
+ EXT4_I(inode)->i_extra_isize += shift_bytes;
+ header = IHDR(inode, raw_inode);
i.name = b_entry_name;
i.value = buffer;
@@ -1551,6 +1554,8 @@ retry:
kfree(bs);
}
brelse(bh);
+out:
+ ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
up_write(&EXT4_I(inode)->xattr_sem);
return 0;
@@ -1562,6 +1567,10 @@ cleanup:
kfree(is);
kfree(bs);
brelse(bh);
+ /*
+ * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
+ * size expansion failed.
+ */
up_write(&EXT4_I(inode)->xattr_sem);
return error;
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 69dd3e6566e02..a92e783fa0570 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -24,6 +24,7 @@
#define EXT4_XATTR_INDEX_SYSTEM 7
#define EXT4_XATTR_INDEX_RICHACL 8
#define EXT4_XATTR_INDEX_ENCRYPTION 9
+#define EXT4_XATTR_INDEX_HURD 10 /* Reserved for Hurd */
struct ext4_xattr_header {
__le32 h_magic; /* magic number for identification */
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d64d2a515cb2c..ccb401eebc112 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1699,11 +1699,11 @@ static int f2fs_write_end(struct file *file,
trace_f2fs_write_end(inode, pos, len, copied);
set_page_dirty(page);
- f2fs_put_page(page, 1);
if (pos + copied > i_size_read(inode))
f2fs_i_size_write(inode, pos + copied);
+ f2fs_put_page(page, 1);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return copied;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 675fa79d86f65..14f5fe2b841e2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -538,7 +538,7 @@ struct f2fs_nm_info {
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
struct radix_tree_root nat_set_root;/* root of the nat set cache */
- struct percpu_rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
+ struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
@@ -787,7 +787,7 @@ struct f2fs_sb_info {
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
- struct percpu_rw_semaphore cp_rwsem; /* blocking FS operations */
+ struct rw_semaphore cp_rwsem; /* blocking FS operations */
struct rw_semaphore node_write; /* locking node writes */
wait_queue_head_t cp_wait;
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
@@ -1074,22 +1074,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
- percpu_down_read(&sbi->cp_rwsem);
+ down_read(&sbi->cp_rwsem);
}
static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
{
- percpu_up_read(&sbi->cp_rwsem);
+ up_read(&sbi->cp_rwsem);
}
static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
{
- percpu_down_write(&sbi->cp_rwsem);
+ down_write(&sbi->cp_rwsem);
}
static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
{
- percpu_up_write(&sbi->cp_rwsem);
+ up_write(&sbi->cp_rwsem);
}
static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0e493f63ea414..47abb96098e49 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2086,15 +2086,19 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
if (unlikely(f2fs_readonly(src->i_sb)))
return -EROFS;
- if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode))
- return -EISDIR;
+ if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
+ return -EINVAL;
if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
return -EOPNOTSUPP;
inode_lock(src);
- if (src != dst)
- inode_lock(dst);
+ if (src != dst) {
+ if (!inode_trylock(dst)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ }
ret = -EINVAL;
if (pos_in + len > src->i_size || pos_in + len < pos_in)
@@ -2152,6 +2156,7 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
out_unlock:
if (src != dst)
inode_unlock(dst);
+out:
inode_unlock(src);
return ret;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b2fa4b615925b..f75d197d5beb0 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -206,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
struct nat_entry *e;
bool need = false;
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
if (!get_nat_flag(e, IS_CHECKPOINTED) &&
!get_nat_flag(e, HAS_FSYNCED_INODE))
need = true;
}
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return need;
}
@@ -223,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
struct nat_entry *e;
bool is_cp = true;
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e && !get_nat_flag(e, IS_CHECKPOINTED))
is_cp = false;
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return is_cp;
}
@@ -237,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
struct nat_entry *e;
bool need_update = true;
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ino);
if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
(get_nat_flag(e, IS_CHECKPOINTED) ||
get_nat_flag(e, HAS_FSYNCED_INODE)))
need_update = false;
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return need_update;
}
@@ -284,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
- percpu_down_write(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
e = grab_nat_entry(nm_i, ni->nid);
@@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
set_nat_flag(e, HAS_FSYNCED_INODE, true);
set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
}
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -342,7 +342,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
struct f2fs_nm_info *nm_i = NM_I(sbi);
int nr = nr_shrink;
- percpu_down_write(&nm_i->nat_tree_lock);
+ if (!down_write_trylock(&nm_i->nat_tree_lock))
+ return 0;
while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
struct nat_entry *ne;
@@ -351,7 +352,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
}
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
@@ -373,13 +374,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
ni->nid = nid;
/* Check nat cache */
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return;
}
@@ -403,11 +404,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
/* cache nat entry */
- percpu_down_write(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
cache_nat_entry(sbi, nid, &ne);
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
/*
@@ -1788,7 +1789,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
@@ -1820,7 +1821,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
remove_free_nid(nm_i, nid);
}
up_read(&curseg->journal_rwsem);
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
@@ -2209,7 +2210,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!nm_i->dirty_nat_cnt)
return;
- percpu_down_write(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
/*
* if there are no enough space in journal to store dirty nat
@@ -2232,7 +2233,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
list_for_each_entry_safe(set, tmp, &sets, set_list)
__flush_nat_entry_set(sbi, set);
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
@@ -2268,8 +2269,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
- if (percpu_init_rwsem(&nm_i->nat_tree_lock))
- return -ENOMEM;
+ init_rwsem(&nm_i->nat_tree_lock);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2326,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
spin_unlock(&nm_i->free_nid_list_lock);
/* destroy nat cache */
- percpu_down_write(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
@@ -2351,9 +2351,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
kmem_cache_free(nat_entry_set_slab, setvec[idx]);
}
}
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
- percpu_free_rwsem(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
sbi->nm_info = NULL;
kfree(nm_i);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1b86d3f638efc..7f863a645ab1d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -706,8 +706,6 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi)
percpu_counter_destroy(&sbi->nr_pages[i]);
percpu_counter_destroy(&sbi->alloc_valid_block_count);
percpu_counter_destroy(&sbi->total_valid_inode_count);
-
- percpu_free_rwsem(&sbi->cp_rwsem);
}
static void f2fs_put_super(struct super_block *sb)
@@ -1483,9 +1481,6 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
{
int i, err;
- if (percpu_init_rwsem(&sbi->cp_rwsem))
- return -ENOMEM;
-
for (i = 0; i < NR_COUNT_TYPE; i++) {
err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
if (err)
@@ -1686,6 +1681,7 @@ try_onemore:
sbi->write_io[i].bio = NULL;
}
+ init_rwsem(&sbi->cp_rwsem);
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index f55a4e7560470..217847679f0ea 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -346,7 +346,7 @@ static void bl_write_cleanup(struct work_struct *work)
PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
- (end - start) >> SECTOR_SHIFT);
+ (end - start) >> SECTOR_SHIFT, end);
}
pnfs_ld_write_done(hdr);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 18e6fd0b9506e..efc007f007427 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -141,6 +141,7 @@ struct pnfs_block_layout {
struct rb_root bl_ext_ro;
spinlock_t bl_ext_lock; /* Protects list manipulation */
bool bl_scsi_layout;
+ u64 bl_lwb;
};
static inline struct pnfs_block_layout *
@@ -182,7 +183,7 @@ int ext_tree_insert(struct pnfs_block_layout *bl,
int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start,
sector_t end);
int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
- sector_t len);
+ sector_t len, u64 lwb);
bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
struct pnfs_block_extent *ret, bool rw);
int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 992bcb19c11e7..c85fbfd2d0d99 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -402,7 +402,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be,
int
ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
- sector_t len)
+ sector_t len, u64 lwb)
{
struct rb_root *root = &bl->bl_ext_rw;
sector_t end = start + len;
@@ -471,6 +471,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
}
}
out:
+ if (bl->bl_lwb < lwb)
+ bl->bl_lwb = lwb;
spin_unlock(&bl->bl_ext_lock);
__ext_put_deviceids(&tmp);
@@ -518,7 +520,7 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
}
static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
- size_t buffer_size, size_t *count)
+ size_t buffer_size, size_t *count, __u64 *lastbyte)
{
struct pnfs_block_extent *be;
int ret = 0;
@@ -542,6 +544,8 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
p = encode_block_extent(be, p);
be->be_tag = EXTENT_COMMITTING;
}
+ *lastbyte = bl->bl_lwb - 1;
+ bl->bl_lwb = 0;
spin_unlock(&bl->bl_ext_lock);
return ret;
@@ -564,7 +568,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
arg->layoutupdate_pages = &arg->layoutupdate_page;
retry:
- ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count);
+ ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a7f2e6e333052..52a28311e2a4b 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -275,6 +275,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
err_socks:
svc_rpcb_cleanup(serv, net);
err_bind:
+ nn->cb_users[minorversion]--;
dprintk("NFS: Couldn't create callback socket: err = %d; "
"net = %p\n", ret, net);
return ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c92a75e066a6f..f953ef6b2f2e7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -454,11 +454,8 @@ static bool referring_call_exists(struct nfs_client *clp,
((u32 *)&rclist->rcl_sessionid.data)[3],
ref->rc_sequenceid, ref->rc_slotid);
- spin_lock(&tbl->slot_tbl_lock);
- status = (test_bit(ref->rc_slotid, tbl->used_slots) &&
- tbl->slots[ref->rc_slotid].seq_nr ==
- ref->rc_sequenceid);
- spin_unlock(&tbl->slot_tbl_lock);
+ status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid,
+ ref->rc_sequenceid, HZ >> 1) < 0;
if (status)
goto out;
}
@@ -487,7 +484,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
goto out;
tbl = &clp->cl_session->bc_slot_table;
- slot = tbl->slots + args->csa_slotid;
/* Set up res before grabbing the spinlock */
memcpy(&res->csr_sessionid, &args->csa_sessionid,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 003ebce4bbc49..1e106780a2375 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -426,7 +426,7 @@ EXPORT_SYMBOL_GPL(nfs_mark_client_ready);
* Initialise the timeout values for a connection
*/
void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
- unsigned int timeo, unsigned int retrans)
+ int timeo, int retrans)
{
to->to_initval = timeo * HZ / 10;
to->to_retries = retrans;
@@ -434,9 +434,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
switch (proto) {
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_RDMA:
- if (to->to_retries == 0)
+ if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_TCP_RETRANS;
- if (to->to_initval == 0)
+ if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0)
to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10;
if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
to->to_initval = NFS_MAX_TCP_TIMEOUT;
@@ -449,9 +449,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
to->to_exponential = 0;
break;
case XPRT_TRANSPORT_UDP:
- if (to->to_retries == 0)
+ if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_UDP_RETRANS;
- if (!to->to_initval)
+ if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0)
to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10;
if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
to->to_initval = NFS_MAX_UDP_TIMEOUT;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index e6206eaf2bdf3..51b51369704c5 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -37,6 +37,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
if (ffl) {
INIT_LIST_HEAD(&ffl->error_list);
INIT_LIST_HEAD(&ffl->mirrors);
+ ffl->last_report_time = ktime_get();
return &ffl->generic_hdr;
} else
return NULL;
@@ -640,19 +641,18 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
{
static const ktime_t notime = {0};
s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;
+ struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
if (ktime_equal(mirror->start_time, notime))
mirror->start_time = now;
- if (ktime_equal(mirror->last_report_time, notime))
- mirror->last_report_time = now;
if (mirror->report_interval != 0)
report_interval = (s64)mirror->report_interval * 1000LL;
else if (layoutstats_timer != 0)
report_interval = (s64)layoutstats_timer * 1000LL;
- if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
+ if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >=
report_interval) {
- mirror->last_report_time = now;
+ ffl->last_report_time = now;
return true;
}
@@ -806,11 +806,14 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_pnfs_ds *ds;
+ bool fail_return = false;
int idx;
/* mirrors are sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
- ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
+ if (idx+1 == fls->mirror_array_cnt)
+ fail_return = true;
+ ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
if (ds) {
*best_idx = idx;
return ds;
@@ -859,6 +862,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs4_pnfs_ds *ds;
int ds_idx;
+retry:
/* Use full layout for now */
if (!pgio->pg_lseg)
ff_layout_pg_get_read(pgio, req, false);
@@ -871,10 +875,13 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
if (!ds) {
- if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
- goto out_pnfs;
- else
+ if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
+ pnfs_put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ /* Sleep for 1 second before retrying */
+ ssleep(1);
+ goto retry;
}
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
@@ -890,12 +897,6 @@ out_mds:
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
nfs_pageio_reset_read_mds(pgio);
- return;
-
-out_pnfs:
- pnfs_set_lo_fail(pgio->pg_lseg);
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
}
static void
@@ -909,6 +910,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
int i;
int status;
+retry:
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
@@ -940,10 +942,13 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
for (i = 0; i < pgio->pg_mirror_count; i++) {
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
if (!ds) {
- if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
- goto out_pnfs;
- else
+ if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
+ pnfs_put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ /* Sleep for 1 second before retrying */
+ ssleep(1);
+ goto retry;
}
pgm = &pgio->pg_mirrors[i];
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
@@ -956,12 +961,6 @@ out_mds:
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
nfs_pageio_reset_write_mds(pgio);
- return;
-
-out_pnfs:
- pnfs_set_lo_fail(pgio->pg_lseg);
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
}
static unsigned int
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 1bcdb15d0c41a..3ee0c9fcea763 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -84,7 +84,6 @@ struct nfs4_ff_layout_mirror {
struct nfs4_ff_layoutstat read_stat;
struct nfs4_ff_layoutstat write_stat;
ktime_t start_time;
- ktime_t last_report_time;
u32 report_interval;
};
@@ -101,6 +100,7 @@ struct nfs4_flexfile_layout {
struct pnfs_ds_commit_info commit_info;
struct list_head mirrors;
struct list_head error_list; /* nfs4_ff_layout_ds_err */
+ ktime_t last_report_time; /* Layoutstat report times */
};
static inline struct nfs4_flexfile_layout *
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 0aa36be71fcea..f7a3f6b05369a 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -17,8 +17,8 @@
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
-static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_retrans;
void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
{
@@ -379,7 +379,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
devid = &mirror->mirror_ds->id_node;
if (ff_layout_test_devid_unavailable(devid))
- goto out;
+ goto out_fail;
ds = mirror->mirror_ds->ds;
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
@@ -405,15 +405,16 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror->mirror_ds->ds_versions[0].rsize = max_payload;
if (mirror->mirror_ds->ds_versions[0].wsize > max_payload)
mirror->mirror_ds->ds_versions[0].wsize = max_payload;
- } else {
- ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
- mirror, lseg->pls_range.offset,
- lseg->pls_range.length, NFS4ERR_NXIO,
- OP_ILLEGAL, GFP_NOIO);
- if (fail_return || !ff_layout_has_available_ds(lseg))
- pnfs_error_mark_layout_for_return(ino, lseg);
- ds = NULL;
+ goto out;
}
+ ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
+ mirror, lseg->pls_range.offset,
+ lseg->pls_range.length, NFS4ERR_NXIO,
+ OP_ILLEGAL, GFP_NOIO);
+out_fail:
+ if (fail_return || !ff_layout_has_available_ds(lseg))
+ pnfs_error_mark_layout_for_return(ino, lseg);
+ ds = NULL;
out:
return ds;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7ce5e023c3c3c..74935a19e4bfc 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -58,6 +58,9 @@ struct nfs_clone_mount {
*/
#define NFS_UNSPEC_PORT (-1)
+#define NFS_UNSPEC_RETRANS (UINT_MAX)
+#define NFS_UNSPEC_TIMEO (UINT_MAX)
+
/*
* Maximum number of pages that readdir can use for creating
* a vmapped array of pages.
@@ -156,7 +159,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *,
int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
void nfs_server_insert_lists(struct nfs_server *);
void nfs_server_remove_lists(struct nfs_server *);
-void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int);
+void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans);
int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t,
rpc_authflavor_t);
struct nfs_server *nfs_alloc_server(void);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 6f47527348042..64b43b4ad9dd8 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -318,10 +318,22 @@ static void
nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
{
struct nfs42_layoutstat_data *data = calldata;
- struct nfs_server *server = NFS_SERVER(data->args.inode);
+ struct inode *inode = data->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct pnfs_layout_hdr *lo;
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (!pnfs_layout_is_valid(lo)) {
+ spin_unlock(&inode->i_lock);
+ rpc_exit(task, 0);
+ return;
+ }
+ nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid);
+ spin_unlock(&inode->i_lock);
nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
&data->res.seq_res, task);
+
}
static void
@@ -341,11 +353,11 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
spin_lock(&inode->i_lock);
lo = NFS_I(inode)->layout;
- if (lo && nfs4_stateid_match(&data->args.stateid,
+ if (pnfs_layout_is_valid(lo) &&
+ nfs4_stateid_match(&data->args.stateid,
&lo->plh_stateid)) {
LIST_HEAD(head);
@@ -359,11 +371,23 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
} else
spin_unlock(&inode->i_lock);
break;
+ case -NFS4ERR_OLD_STATEID:
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (pnfs_layout_is_valid(lo) &&
+ nfs4_stateid_match_other(&data->args.stateid,
+ &lo->plh_stateid)) {
+ /* Do we need to delay before resending? */
+ if (!nfs4_stateid_is_newer(&lo->plh_stateid,
+ &data->args.stateid))
+ rpc_delay(task, HZ);
+ rpc_restart_call_prepare(task);
+ }
+ spin_unlock(&inode->i_lock);
+ break;
case -ENOTSUPP:
case -EOPNOTSUPP:
NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
- default:
- break;
}
dprintk("%s server returns %d\n", __func__, task->tk_status);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8d7d08d4f95f1..cd3b7cfdde16a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -817,6 +817,11 @@ static int nfs4_set_client(struct nfs_server *server,
goto error;
}
+ if (server->nfs_client == clp) {
+ error = -ELOOP;
+ goto error;
+ }
+
/*
* Query for the lease time on clientid setup or renewal
*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1949bbd806ebd..f5aecaabcb7c7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -634,15 +634,11 @@ out_sleep:
}
EXPORT_SYMBOL_GPL(nfs40_setup_sequence);
-static int nfs40_sequence_done(struct rpc_task *task,
- struct nfs4_sequence_res *res)
+static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res)
{
struct nfs4_slot *slot = res->sr_slot;
struct nfs4_slot_table *tbl;
- if (slot == NULL)
- goto out;
-
tbl = slot->table;
spin_lock(&tbl->slot_tbl_lock);
if (!nfs41_wake_and_assign_slot(tbl, slot))
@@ -650,7 +646,13 @@ static int nfs40_sequence_done(struct rpc_task *task,
spin_unlock(&tbl->slot_tbl_lock);
res->sr_slot = NULL;
-out:
+}
+
+static int nfs40_sequence_done(struct rpc_task *task,
+ struct nfs4_sequence_res *res)
+{
+ if (res->sr_slot != NULL)
+ nfs40_sequence_free_slot(res);
return 1;
}
@@ -666,6 +668,11 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
tbl = slot->table;
session = tbl->session;
+ /* Bump the slot sequence number */
+ if (slot->seq_done)
+ slot->seq_nr++;
+ slot->seq_done = 0;
+
spin_lock(&tbl->slot_tbl_lock);
/* Be nice to the server: try to ensure that the last transmitted
* value for highest_user_slotid <= target_highest_slotid
@@ -686,9 +693,12 @@ out_unlock:
res->sr_slot = NULL;
if (send_new_highest_used_slotid)
nfs41_notify_server(session->clp);
+ if (waitqueue_active(&tbl->slot_waitq))
+ wake_up_all(&tbl->slot_waitq);
}
-int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+static int nfs41_sequence_process(struct rpc_task *task,
+ struct nfs4_sequence_res *res)
{
struct nfs4_session *session;
struct nfs4_slot *slot = res->sr_slot;
@@ -714,7 +724,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
switch (res->sr_status) {
case 0:
/* Update the slot's sequence and clientid lease timer */
- ++slot->seq_nr;
+ slot->seq_done = 1;
clp = session->clp;
do_renew_lease(clp, res->sr_timestamp);
/* Check sequence flags */
@@ -769,16 +779,16 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
goto retry_nowait;
default:
/* Just update the slot sequence no. */
- ++slot->seq_nr;
+ slot->seq_done = 1;
}
out:
/* The session may be reset by one of the error handlers. */
dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
- nfs41_sequence_free_slot(res);
out_noaction:
return ret;
retry_nowait:
if (rpc_restart_call_prepare(task)) {
+ nfs41_sequence_free_slot(res);
task->tk_status = 0;
ret = 0;
}
@@ -789,8 +799,37 @@ out_retry:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return 0;
}
+
+int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+ if (!nfs41_sequence_process(task, res))
+ return 0;
+ if (res->sr_slot != NULL)
+ nfs41_sequence_free_slot(res);
+ return 1;
+
+}
EXPORT_SYMBOL_GPL(nfs41_sequence_done);
+static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+ if (res->sr_slot == NULL)
+ return 1;
+ if (res->sr_slot->table->session != NULL)
+ return nfs41_sequence_process(task, res);
+ return nfs40_sequence_done(task, res);
+}
+
+static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+ if (res->sr_slot != NULL) {
+ if (res->sr_slot->table->session != NULL)
+ nfs41_sequence_free_slot(res);
+ else
+ nfs40_sequence_free_slot(res);
+ }
+}
+
int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
{
if (res->sr_slot == NULL)
@@ -920,6 +959,17 @@ static int nfs4_setup_sequence(const struct nfs_server *server,
args, res, task);
}
+static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+ return nfs40_sequence_done(task, res);
+}
+
+static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+ if (res->sr_slot != NULL)
+ nfs40_sequence_free_slot(res);
+}
+
int nfs4_sequence_done(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
@@ -1197,6 +1247,7 @@ static void nfs4_opendata_free(struct kref *kref)
struct super_block *sb = p->dentry->d_sb;
nfs_free_seqid(p->o_arg.seqid);
+ nfs4_sequence_free_slot(&p->o_res.seq_res);
if (p->state != NULL)
nfs4_put_open_state(p->state);
nfs4_put_state_owner(p->owner);
@@ -1656,9 +1707,14 @@ err:
static struct nfs4_state *
nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
{
+ struct nfs4_state *ret;
+
if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
- return _nfs4_opendata_reclaim_to_nfs4_state(data);
- return _nfs4_opendata_to_nfs4_state(data);
+ ret =_nfs4_opendata_reclaim_to_nfs4_state(data);
+ else
+ ret = _nfs4_opendata_to_nfs4_state(data);
+ nfs4_sequence_free_slot(&data->o_res.seq_res);
+ return ret;
}
static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -2056,7 +2112,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
data->rpc_status = task->tk_status;
- if (!nfs4_sequence_done(task, &data->o_res.seq_res))
+ if (!nfs4_sequence_process(task, &data->o_res.seq_res))
return;
if (task->tk_status == 0) {
@@ -7864,7 +7920,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
struct nfs4_layoutget *lgp = calldata;
dprintk("--> %s\n", __func__);
- nfs41_sequence_done(task, &lgp->res.seq_res);
+ nfs41_sequence_process(task, &lgp->res.seq_res);
dprintk("<-- %s\n", __func__);
}
@@ -8080,6 +8136,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
if (status == 0 && lgp->res.layoutp->len)
lseg = pnfs_layout_process(lgp);
+ nfs4_sequence_free_slot(&lgp->res.seq_res);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
if (status)
@@ -8106,7 +8163,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
dprintk("--> %s\n", __func__);
- if (!nfs41_sequence_done(task, &lrp->res.seq_res))
+ if (!nfs41_sequence_process(task, &lrp->res.seq_res))
return;
server = NFS_SERVER(lrp->args.inode);
@@ -8118,6 +8175,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_DELAY:
if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
break;
+ nfs4_sequence_free_slot(&lrp->res.seq_res);
rpc_restart_call_prepare(task);
return;
}
@@ -8138,6 +8196,7 @@ static void nfs4_layoutreturn_release(void *calldata)
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&lo->plh_inode->i_lock);
+ nfs4_sequence_free_slot(&lrp->res.seq_res);
pnfs_free_lseg_list(&freeme);
pnfs_put_layout_hdr(lrp->args.layout);
nfs_iput_and_deactive(lrp->inode);
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index 332d06e64fa91..b62973045a3e0 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -28,6 +28,7 @@ static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue)
tbl->highest_used_slotid = NFS4_NO_SLOT;
spin_lock_init(&tbl->slot_tbl_lock);
rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue);
+ init_waitqueue_head(&tbl->slot_waitq);
init_completion(&tbl->complete);
}
@@ -172,6 +173,58 @@ struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid)
return ERR_PTR(-E2BIG);
}
+static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid,
+ u32 *seq_nr)
+ __must_hold(&tbl->slot_tbl_lock)
+{
+ struct nfs4_slot *slot;
+
+ slot = nfs4_lookup_slot(tbl, slotid);
+ if (IS_ERR(slot))
+ return PTR_ERR(slot);
+ *seq_nr = slot->seq_nr;
+ return 0;
+}
+
+/*
+ * nfs4_slot_seqid_in_use - test if a slot sequence id is still in use
+ *
+ * Given a slot table, slot id and sequence number, determine if the
+ * RPC call in question is still in flight. This function is mainly
+ * intended for use by the callback channel.
+ */
+static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl,
+ u32 slotid, u32 seq_nr)
+{
+ u32 cur_seq;
+ bool ret = false;
+
+ spin_lock(&tbl->slot_tbl_lock);
+ if (nfs4_slot_get_seqid(tbl, slotid, &cur_seq) == 0 &&
+ cur_seq == seq_nr && test_bit(slotid, tbl->used_slots))
+ ret = true;
+ spin_unlock(&tbl->slot_tbl_lock);
+ return ret;
+}
+
+/*
+ * nfs4_slot_wait_on_seqid - wait until a slot sequence id is complete
+ *
+ * Given a slot table, slot id and sequence number, wait until the
+ * corresponding RPC call completes. This function is mainly
+ * intended for use by the callback channel.
+ */
+int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl,
+ u32 slotid, u32 seq_nr,
+ unsigned long timeout)
+{
+ if (wait_event_timeout(tbl->slot_waitq,
+ !nfs4_slot_seqid_in_use(tbl, slotid, seq_nr),
+ timeout) == 0)
+ return -ETIMEDOUT;
+ return 0;
+}
+
/*
* nfs4_alloc_slot - efficiently look for a free slot
*
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 5b51298d1d037..f703b755351bd 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -21,7 +21,8 @@ struct nfs4_slot {
unsigned long generation;
u32 slot_nr;
u32 seq_nr;
- unsigned int interrupted : 1;
+ unsigned int interrupted : 1,
+ seq_done : 1;
};
/* Sessions */
@@ -36,6 +37,7 @@ struct nfs4_slot_table {
unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
spinlock_t slot_tbl_lock;
struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */
+ wait_queue_head_t slot_waitq; /* Completion wait on slot */
u32 max_slots; /* # slots in table */
u32 max_slotid; /* Max allowed slotid value */
u32 highest_used_slotid; /* sent to server on each SEQ.
@@ -78,6 +80,9 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid);
+extern int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl,
+ u32 slotid, u32 seq_nr,
+ unsigned long timeout);
extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 70806cae0d36b..6daf034645c83 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1555,6 +1555,7 @@ pnfs_update_layout(struct inode *ino,
}
lookup_again:
+ nfs4_client_recover_expired_lease(clp);
first = false;
spin_lock(&ino->i_lock);
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
@@ -2510,7 +2511,6 @@ pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
data->args.fh = NFS_FH(inode);
data->args.inode = inode;
- nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid);
status = ld->prepare_layoutstats(&data->args);
if (status)
goto out_free;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 18d446e1a82bb..d39601381adf5 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -923,6 +923,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data) {
+ data->timeo = NFS_UNSPEC_TIMEO;
+ data->retrans = NFS_UNSPEC_RETRANS;
data->acregmin = NFS_DEF_ACREGMIN;
data->acregmax = NFS_DEF_ACREGMAX;
data->acdirmin = NFS_DEF_ACDIRMIN;
@@ -1189,6 +1191,19 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option)
return rc;
}
+static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option,
+ unsigned long l_bound, unsigned long u_bound)
+{
+ int ret;
+
+ ret = nfs_get_option_ul(args, option);
+ if (ret != 0)
+ return ret;
+ if (*option < l_bound || *option > u_bound)
+ return -ERANGE;
+ return 0;
+}
+
/*
* Error-check and convert a string of mount options from user space into
* a data structure. The whole mount string is processed; bad options are
@@ -1352,12 +1367,12 @@ static int nfs_parse_mount_options(char *raw,
mnt->bsize = option;
break;
case Opt_timeo:
- if (nfs_get_option_ul(args, &option) || option == 0)
+ if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX))
goto out_invalid_value;
mnt->timeo = option;
break;
case Opt_retrans:
- if (nfs_get_option_ul(args, &option) || option == 0)
+ if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX))
goto out_invalid_value;
mnt->retrans = option;
break;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 19f532e7d35e9..6dc4296eed62c 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -223,8 +223,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
size -= n;
buf += n;
copied += n;
- if (!m->count)
+ if (!m->count) {
+ m->from = 0;
m->index++;
+ }
if (!size)
goto Done;
}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index b45345d701e77..51157da3f76ed 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -370,7 +370,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
p = c->gap_lebs;
do {
- ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
+ ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs);
written = layout_leb_in_gaps(c, p);
if (written < 0) {
err = written;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index e237811f09ce5..11a004114eba5 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -575,7 +575,8 @@ static int ubifs_xattr_get(const struct xattr_handler *handler,
dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
inode->i_ino, dentry, size);
- return __ubifs_getxattr(inode, name, buffer, size);
+ name = xattr_full_name(handler, name);
+ return __ubifs_getxattr(inode, name, buffer, size);
}
static int ubifs_xattr_set(const struct xattr_handler *handler,
@@ -586,6 +587,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler,
dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd",
name, inode->i_ino, dentry, size);
+ name = xattr_full_name(handler, name);
+
if (value)
return __ubifs_setxattr(inode, name, value, size, flags);
else