summaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/dlmglue.c119
-rw-r--r--fs/ocfs2/dlmglue.h1
-rw-r--r--fs/ocfs2/file.c10
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/mmap.c44
-rw-r--r--fs/ocfs2/namei.c3
-rw-r--r--fs/ocfs2/ocfs2_fs.h8
8 files changed, 121 insertions, 68 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 97a972efab83b..68728de128646 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -788,35 +788,34 @@ static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
spin_unlock(&lockres->l_lock);
}
-static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
- struct ocfs2_lock_holder *oh)
-{
- spin_lock(&lockres->l_lock);
- list_del(&oh->oh_list);
- spin_unlock(&lockres->l_lock);
-
- put_pid(oh->oh_owner_pid);
-}
-
-static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres)
+static struct ocfs2_lock_holder *
+ocfs2_pid_holder(struct ocfs2_lock_res *lockres,
+ struct pid *pid)
{
struct ocfs2_lock_holder *oh;
- struct pid *pid;
- /* look in the list of holders for one with the current task as owner */
spin_lock(&lockres->l_lock);
- pid = task_pid(current);
list_for_each_entry(oh, &lockres->l_holders, oh_list) {
if (oh->oh_owner_pid == pid) {
spin_unlock(&lockres->l_lock);
- return 1;
+ return oh;
}
}
spin_unlock(&lockres->l_lock);
+ return NULL;
+}
- return 0;
+static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
+ struct ocfs2_lock_holder *oh)
+{
+ spin_lock(&lockres->l_lock);
+ list_del(&oh->oh_list);
+ spin_unlock(&lockres->l_lock);
+
+ put_pid(oh->oh_owner_pid);
}
+
static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
int level)
{
@@ -2610,34 +2609,93 @@ void ocfs2_inode_unlock(struct inode *inode,
*
* return < 0 on error, return == 0 if there's no lock holder on the stack
* before this call, return == 1 if this call would be a recursive locking.
+ * return == -1 if this lock attempt will cause an upgrade which is forbidden.
+ *
+ * When taking lock levels into account,we face some different situations.
+ *
+ * 1. no lock is held
+ * In this case, just lock the inode as requested and return 0
+ *
+ * 2. We are holding a lock
+ * For this situation, things diverges into several cases
+ *
+ * wanted holding what to do
+ * ex ex see 2.1 below
+ * ex pr see 2.2 below
+ * pr ex see 2.1 below
+ * pr pr see 2.1 below
+ *
+ * 2.1 lock level that is been held is compatible
+ * with the wanted level, so no lock action will be tacken.
+ *
+ * 2.2 Otherwise, an upgrade is needed, but it is forbidden.
+ *
+ * Reason why upgrade within a process is forbidden is that
+ * lock upgrade may cause dead lock. The following illustrates
+ * how it happens.
+ *
+ * thread on node1 thread on node2
+ * ocfs2_inode_lock_tracker(ex=0)
+ *
+ * <====== ocfs2_inode_lock_tracker(ex=1)
+ *
+ * ocfs2_inode_lock_tracker(ex=1)
*/
int ocfs2_inode_lock_tracker(struct inode *inode,
struct buffer_head **ret_bh,
int ex,
struct ocfs2_lock_holder *oh)
{
- int status;
- int arg_flags = 0, has_locked;
+ int status = 0;
struct ocfs2_lock_res *lockres;
+ struct ocfs2_lock_holder *tmp_oh;
+ struct pid *pid = task_pid(current);
+
lockres = &OCFS2_I(inode)->ip_inode_lockres;
- has_locked = ocfs2_is_locked_by_me(lockres);
- /* Just get buffer head if the cluster lock has been taken */
- if (has_locked)
- arg_flags = OCFS2_META_LOCK_GETBH;
+ tmp_oh = ocfs2_pid_holder(lockres, pid);
- if (likely(!has_locked || ret_bh)) {
- status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags);
+ if (!tmp_oh) {
+ /*
+ * This corresponds to the case 1.
+ * We haven't got any lock before.
+ */
+ status = ocfs2_inode_lock_full(inode, ret_bh, ex, 0);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
return status;
}
- }
- if (!has_locked)
+
+ oh->oh_ex = ex;
ocfs2_add_holder(lockres, oh);
+ return 0;
+ }
- return has_locked;
+ if (unlikely(ex && !tmp_oh->oh_ex)) {
+ /*
+ * case 2.2 upgrade may cause dead lock, forbid it.
+ */
+ mlog(ML_ERROR, "Recursive locking is not permitted to "
+ "upgrade to EX level from PR level.\n");
+ dump_stack();
+ return -EINVAL;
+ }
+
+ /*
+ * case 2.1 OCFS2_META_LOCK_GETBH flag make ocfs2_inode_lock_full.
+ * ignore the lock level and just update it.
+ */
+ if (ret_bh) {
+ status = ocfs2_inode_lock_full(inode, ret_bh, ex,
+ OCFS2_META_LOCK_GETBH);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ return status;
+ }
+ }
+ return tmp_oh ? 1 : 0;
}
void ocfs2_inode_unlock_tracker(struct inode *inode,
@@ -2649,12 +2707,13 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
lockres = &OCFS2_I(inode)->ip_inode_lockres;
/* had_lock means that the currect process already takes the cluster
- * lock previously. If had_lock is 1, we have nothing to do here, and
- * it will get unlocked where we got the lock.
+ * lock previously.
+ * If had_lock is 1, we have nothing to do here.
+ * If had_lock is 0, we will release the lock.
*/
if (!had_lock) {
+ ocfs2_inode_unlock(inode, oh->oh_ex);
ocfs2_remove_holder(lockres, oh);
- ocfs2_inode_unlock(inode, ex);
}
}
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 256e0a9067b8c..4ec1c828f6e08 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -96,6 +96,7 @@ struct ocfs2_trim_fs_info {
struct ocfs2_lock_holder {
struct list_head oh_list;
struct pid *oh_owner_pid;
+ int oh_ex;
};
/* ocfs2_inode_lock_full() 'arg_flags' flags */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6ee94bc23f5b1..a2a8603d27e0c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -563,8 +563,8 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
return ret;
}
-static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
- u32 clusters_to_add, int mark_unwritten)
+static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
+ u32 clusters_to_add, int mark_unwritten)
{
int status = 0;
int restart_func = 0;
@@ -1035,8 +1035,8 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
clusters_to_add -= oi->ip_clusters;
if (clusters_to_add) {
- ret = __ocfs2_extend_allocation(inode, oi->ip_clusters,
- clusters_to_add, 0);
+ ret = ocfs2_extend_allocation(inode, oi->ip_clusters,
+ clusters_to_add, 0);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1493,7 +1493,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
goto next;
}
- ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
+ ret = ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
if (ret) {
if (ret != -ENOSPC)
mlog_errno(ret);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 1fdc9839cd931..7eb7f03531f6b 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -65,8 +65,6 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
u64 new_i_size, u64 zero_to);
int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
loff_t zero_to);
-int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
- u32 clusters_to_add, int mark_unwritten);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index ab30c005cc4bc..994726ada857c 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -402,7 +402,7 @@ out_err:
static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist,
unsigned int chunksize)
{
- int index;
+ u32 index;
index = __ilog2_u32(chunksize);
if (index >= OCFS2_INFO_MAX_HIST)
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index fb9a20e3d6085..05220b365fb96 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -44,11 +44,11 @@
#include "ocfs2_trace.h"
-static int ocfs2_fault(struct vm_fault *vmf)
+static vm_fault_t ocfs2_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
sigset_t oldset;
- int ret;
+ vm_fault_t ret;
ocfs2_block_signals(&oldset);
ret = filemap_fault(vmf);
@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_fault *vmf)
return ret;
}
-static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
- struct page *page)
+static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
+ struct buffer_head *di_bh, struct page *page)
{
- int ret = VM_FAULT_NOPAGE;
+ int err;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping;
loff_t pos = page_offset(page);
@@ -105,15 +106,12 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
if (page->index == last_index)
len = ((size - 1) & ~PAGE_MASK) + 1;
- ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
+ err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
&locked_page, &fsdata, di_bh, page);
- if (ret) {
- if (ret != -ENOSPC)
- mlog_errno(ret);
- if (ret == -ENOMEM)
- ret = VM_FAULT_OOM;
- else
- ret = VM_FAULT_SIGBUS;
+ if (err) {
+ if (err != -ENOSPC)
+ mlog_errno(err);
+ ret = vmf_error(err);
goto out;
}
@@ -121,20 +119,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
ret = VM_FAULT_NOPAGE;
goto out;
}
- ret = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
- BUG_ON(ret != len);
+ err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
+ BUG_ON(err != len);
ret = VM_FAULT_LOCKED;
out:
return ret;
}
-static int ocfs2_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct inode *inode = file_inode(vmf->vma->vm_file);
struct buffer_head *di_bh = NULL;
sigset_t oldset;
- int ret;
+ int err;
+ vm_fault_t ret;
sb_start_pagefault(inode->i_sb);
ocfs2_block_signals(&oldset);
@@ -144,13 +143,10 @@ static int ocfs2_page_mkwrite(struct vm_fault *vmf)
* node. Taking the data lock will also ensure that we don't
* attempt page truncation as part of a downconvert.
*/
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
- if (ret < 0) {
- mlog_errno(ret);
- if (ret == -ENOMEM)
- ret = VM_FAULT_OOM;
- else
- ret = VM_FAULT_SIGBUS;
+ err = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (err < 0) {
+ mlog_errno(err);
+ ret = vmf_error(err);
goto out;
}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8dd6f703c819d..b7ca84bc3df73 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2332,8 +2332,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
struct buffer_head *orphan_dir_bh,
bool dio)
{
- const int namelen = OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN;
- char name[namelen + 1];
+ char name[OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN + 1];
struct ocfs2_dinode *orphan_fe;
int status = 0;
struct ocfs2_dir_lookup_result lookup = { NULL, };
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 5bb4a89f90453..7071ad0dec900 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -807,11 +807,11 @@ struct ocfs2_dir_block_trailer {
* in this block. (unused) */
/*10*/ __u8 db_signature[8]; /* Signature for verification */
__le64 db_reserved2;
- __le64 db_free_next; /* Next block in list (unused) */
-/*20*/ __le64 db_blkno; /* Offset on disk, in blocks */
- __le64 db_parent_dinode; /* dinode which owns me, in
+/*20*/ __le64 db_free_next; /* Next block in list (unused) */
+ __le64 db_blkno; /* Offset on disk, in blocks */
+/*30*/ __le64 db_parent_dinode; /* dinode which owns me, in
blocks */
-/*30*/ struct ocfs2_block_check db_check; /* Error checking */
+ struct ocfs2_block_check db_check; /* Error checking */
/*40*/
};