Fix race issue in fid contention.
Eric's and Greg's patch offer a mechanism to fix open-unlink-f*syscall
bug in 9p. But there is race issue in fid parallel accesses.
As Greg's patch stores all of fids from opened files into according inode,
so all the lookup fid ops can retrieve fid from inode preferentially. But
there is no mechanism to handle the fid contention issue. For example,
there are two threads get the same fid in the same time and one of them
clunk the fid before the other thread ready to discard the fid. In this
scenario, it will lead to some fatal problems, even kernel core dump.
I introduce a mechanism to fix this race issue. A counter field introduced
into p9_fid struct to store the reference counter to the fid. When a fid
is allocated from the inode or dentry, the counter will increase, and
will decrease at the end of its occupation. It is guaranteed that the
fid won't be clunked before the reference counter go down to 0, then
we can avoid the clunked fid to be used.
tests:
race issue test from the old test case:
for file in {01..50}; do touch f.${file}; done
seq 1 1000 | xargs -n 1 -P 50 -I{} cat f.* > /dev/null
open-unlink-f*syscall test:
I have tested for f*syscall include: ftruncate fstat fchown fchmod faccessat.
Link: http://lkml.kernel.org/r/20200923141146.90046-5-jianyong.wu@arm.com
Fixes: 478ba09edc1f ("fs/9p: search open fids first")
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
static inline void __add_fid(struct dentry *dentry, struct p9_fid *fid)
{
+ atomic_set(&fid->count, 1);
hlist_add_head(&fid->dlist, (struct hlist_head *)&dentry->d_fsdata);
}
break;
}
}
+ if (ret && !IS_ERR(ret))
+ atomic_inc(&ret->count);
spin_unlock(&inode->i_lock);
return ret;
}
void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid)
{
spin_lock(&inode->i_lock);
+ atomic_set(&fid->count, 1);
hlist_add_head(&fid->ilist, (struct hlist_head *)&inode->i_private);
spin_unlock(&inode->i_lock);
}
hlist_for_each_entry(fid, h, dlist) {
if (any || uid_eq(fid->uid, uid)) {
ret = fid;
+ atomic_inc(&ret->count);
break;
}
}
fid = v9fs_fid_find(ds, uid, any);
if (fid) {
/* Found the parent fid do a lookup with that */
- fid = p9_client_walk(fid, 1, &dentry->d_name.name, 1);
+ struct p9_fid *ofid = fid;
+
+ fid = p9_client_walk(ofid, 1, &dentry->d_name.name, 1);
+ p9_client_clunk(ofid);
goto fid_out;
}
up_read(&v9ses->rename_sem);
v9fs_fid_add(dentry->d_sb->s_root, fid);
}
/* If we are root ourself just return that */
- if (dentry->d_sb->s_root == dentry)
+ if (dentry->d_sb->s_root == dentry) {
+ atomic_inc(&fid->count);
return fid;
+ }
/*
* Do a multipath walk with attached root.
* When walking parent we need to make sure we
fid = ERR_PTR(-ENOENT);
} else {
__add_fid(dentry, fid);
+ atomic_inc(&fid->count);
spin_unlock(&dentry->d_lock);
}
}
struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
{
int err;
- struct p9_fid *fid;
+ struct p9_fid *fid, *ofid;
- fid = clone_fid(v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0));
+ ofid = v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0);
+ if (ofid && !IS_ERR(ofid))
+ fid = clone_fid(ofid);
if (IS_ERR(fid))
goto error_out;
+ p9_client_clunk(ofid);
/*
* writeback fid will only be used to write back the
* dirty pages. We always request for the open fid in read-write
}
static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
{
- return clone_fid(v9fs_fid_lookup(dentry));
+ struct p9_fid *fid, *nfid;
+
+ fid = v9fs_fid_lookup(dentry);
+ if (!fid || IS_ERR(fid))
+ return fid;
+
+ nfid = p9_client_walk(fid, 0, NULL, 1);
+ p9_client_clunk(fid);
+ return nfid;
}
#endif
retval = v9fs_refresh_inode_dotl(fid, inode);
else
retval = v9fs_refresh_inode(fid, inode);
+ p9_client_clunk(fid);
+
if (retval == -ENOENT)
return 0;
if (retval < 0)
fid = filp->private_data;
p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
inode, filp, fid ? fid->fid : -1);
- spin_lock(&inode->i_lock);
- hlist_del(&fid->ilist);
- spin_unlock(&inode->i_lock);
- if (fid)
+ if (fid) {
+ spin_lock(&inode->i_lock);
+ hlist_del(&fid->ilist);
+ spin_unlock(&inode->i_lock);
p9_client_clunk(fid);
+ }
return 0;
}
if (v9fs_proto_dotl(v9ses))
retval = p9_client_unlinkat(dfid, dentry->d_name.name,
v9fs_at_to_dotl_flags(flags));
+ p9_client_clunk(dfid);
if (retval == -EOPNOTSUPP) {
/* Try the one based on path */
v9fid = v9fs_fid_clone(dentry);
{
int err;
const unsigned char *name;
- struct p9_fid *dfid, *ofid, *fid;
+ struct p9_fid *dfid, *ofid = NULL, *fid = NULL;
struct inode *inode;
p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
err = 0;
- ofid = NULL;
- fid = NULL;
name = dentry->d_name.name;
dfid = v9fs_parent_fid(dentry);
if (IS_ERR(dfid)) {
if (IS_ERR(ofid)) {
err = PTR_ERR(ofid);
p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ p9_client_clunk(dfid);
return ERR_PTR(err);
}
err = p9_client_fcreate(ofid, name, perm, mode, extension);
if (err < 0) {
p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
+ p9_client_clunk(dfid);
goto error;
}
p9_debug(P9_DEBUG_VFS,
"p9_client_walk failed %d\n", err);
fid = NULL;
+ p9_client_clunk(dfid);
goto error;
}
/*
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS,
"inode creation failed %d\n", err);
+ p9_client_clunk(dfid);
goto error;
}
v9fs_fid_add(dentry, fid);
d_instantiate(dentry, inode);
}
+ p9_client_clunk(dfid);
return ofid;
error:
if (ofid)
*/
name = dentry->d_name.name;
fid = p9_client_walk(dfid, 1, &name, 1);
+ p9_client_clunk(dfid);
if (fid == ERR_PTR(-ENOENT))
inode = NULL;
else if (IS_ERR(fid))
struct inode *old_inode;
struct inode *new_inode;
struct v9fs_session_info *v9ses;
- struct p9_fid *oldfid;
+ struct p9_fid *oldfid, *dfid;
struct p9_fid *olddirfid;
struct p9_fid *newdirfid;
struct p9_wstat wstat;
if (IS_ERR(oldfid))
return PTR_ERR(oldfid);
- olddirfid = clone_fid(v9fs_parent_fid(old_dentry));
+ dfid = v9fs_parent_fid(old_dentry);
+ olddirfid = clone_fid(dfid);
+ if (dfid && !IS_ERR(dfid))
+ p9_client_clunk(dfid);
+
if (IS_ERR(olddirfid)) {
retval = PTR_ERR(olddirfid);
goto done;
}
- newdirfid = clone_fid(v9fs_parent_fid(new_dentry));
+ dfid = v9fs_parent_fid(new_dentry);
+ newdirfid = clone_fid(dfid);
+ p9_client_clunk(dfid);
+
if (IS_ERR(newdirfid)) {
retval = PTR_ERR(newdirfid);
goto clunk_olddir;
p9_client_clunk(olddirfid);
done:
+ p9_client_clunk(oldfid);
return retval;
}
return PTR_ERR(fid);
st = p9_client_stat(fid);
+ p9_client_clunk(fid);
if (IS_ERR(st))
return PTR_ERR(st);
static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
{
- int retval;
+ int retval, use_dentry = 0;
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL;
struct p9_wstat wstat;
fid = iattr->ia_file->private_data;
WARN_ON(!fid);
}
- if (!fid)
+ if (!fid) {
fid = v9fs_fid_lookup(dentry);
+ use_dentry = 1;
+ }
if(IS_ERR(fid))
return PTR_ERR(fid);
filemap_write_and_wait(d_inode(dentry)->i_mapping);
retval = p9_client_wstat(fid, &wstat);
+
+ if (use_dentry)
+ p9_client_clunk(fid);
+
if (retval < 0)
return retval;
return ERR_PTR(-EBADF);
st = p9_client_stat(fid);
+ p9_client_clunk(fid);
if (IS_ERR(st))
return ERR_CAST(st);
/* instantiate inode and assign the unopened fid to the dentry */
fid = p9_client_walk(dfid, 1, &name, 1);
+ p9_client_clunk(dfid);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
if (err < 0)
goto error;
-
fid = p9_client_walk(dfid, 1, &name, 1);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
if (fid)
p9_client_clunk(fid);
v9fs_put_acl(dacl, pacl);
+ p9_client_clunk(dfid);
return err;
}
*/
st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+ p9_client_clunk(fid);
if (IS_ERR(st))
return PTR_ERR(st);
int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
{
- int retval;
+ int retval, use_dentry = 0;
struct p9_fid *fid = NULL;
struct p9_iattr_dotl p9attr;
struct inode *inode = d_inode(dentry);
fid = iattr->ia_file->private_data;
WARN_ON(!fid);
}
- if (!fid)
+ if (!fid) {
fid = v9fs_fid_lookup(dentry);
+ use_dentry = 1;
+ }
if (IS_ERR(fid))
return PTR_ERR(fid);
filemap_write_and_wait(inode->i_mapping);
retval = p9_client_setattr(fid, &p9attr);
- if (retval < 0)
+ if (retval < 0) {
+ if (use_dentry)
+ p9_client_clunk(fid);
return retval;
+ }
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode))
if (iattr->ia_valid & ATTR_MODE) {
/* We also want to update ACL when we update mode bits */
retval = v9fs_acl_chmod(inode, fid);
- if (retval < 0)
+ if (retval < 0) {
+ if (use_dentry)
+ p9_client_clunk(fid);
return retval;
+ }
}
+ if (use_dentry)
+ p9_client_clunk(fid);
+
return 0;
}
if (fid)
p9_client_clunk(fid);
+ p9_client_clunk(dfid);
return err;
}
return PTR_ERR(dfid);
oldfid = v9fs_fid_lookup(old_dentry);
- if (IS_ERR(oldfid))
+ if (IS_ERR(oldfid)) {
+ p9_client_clunk(dfid);
return PTR_ERR(oldfid);
+ }
err = p9_client_link(dfid, oldfid, dentry->d_name.name);
+ p9_client_clunk(dfid);
+ p9_client_clunk(oldfid);
if (err < 0) {
p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
return err;
return PTR_ERR(fid);
v9fs_refresh_inode_dotl(fid, d_inode(old_dentry));
+ p9_client_clunk(fid);
}
ihold(d_inode(old_dentry));
d_instantiate(dentry, d_inode(old_dentry));
if (fid)
p9_client_clunk(fid);
v9fs_put_acl(dacl, pacl);
+ p9_client_clunk(dfid);
+
return err;
}
if (IS_ERR(fid))
return ERR_CAST(fid);
retval = p9_client_readlink(fid, &target);
+ p9_client_clunk(fid);
if (retval)
return ERR_PTR(retval);
set_delayed_call(done, kfree_link, target);
}
res = simple_statfs(dentry, buf);
done:
+ p9_client_clunk(fid);
return res;
}
void *buffer, size_t buffer_size)
{
struct p9_fid *fid;
+ int ret;
p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n",
name, buffer_size);
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
+ ret = v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
+ p9_client_clunk(fid);
- return v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
+ return ret;
}
/*
int v9fs_xattr_set(struct dentry *dentry, const char *name,
const void *value, size_t value_len, int flags)
{
- struct p9_fid *fid = v9fs_fid_lookup(dentry);
- return v9fs_fid_xattr_set(fid, name, value, value_len, flags);
+ int ret;
+ struct p9_fid *fid;
+
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+ ret = v9fs_fid_xattr_set(fid, name, value, value_len, flags);
+ p9_client_clunk(fid);
+ return ret;
}
int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
*
* TODO: This needs lots of explanation.
*/
+enum fid_source {
+ FID_FROM_OTHER,
+ FID_FROM_INODE,
+ FID_FROM_DENTRY,
+};
struct p9_fid {
struct p9_client *clnt;
u32 fid;
+ atomic_t count;
int mode;
struct p9_qid qid;
u32 iounit;
fid->clnt = clnt;
fid->rdir = NULL;
fid->fid = 0;
+ atomic_set(&fid->count, 1);
idr_preload(GFP_KERNEL);
spin_lock_irq(&clnt->lock);
GFP_NOWAIT);
spin_unlock_irq(&clnt->lock);
idr_preload_end();
-
if (!ret)
return fid;
p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
-
req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
nwname, wnames);
if (IS_ERR(req)) {
struct p9_req_t *req;
int retries = 0;
- if (!fid) {
- pr_warn("%s (%d): Trying to clunk with NULL fid\n",
+ if (!fid || IS_ERR(fid)) {
+ pr_warn("%s (%d): Trying to clunk with invalid fid\n",
__func__, task_pid_nr(current));
dump_stack();
return 0;
}
+ if (!atomic_dec_and_test(&fid->count))
+ return 0;
again:
p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid,