Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torvalds/linux.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c7
-rw-r--r--fs/9p/vfs_inode_dotl.c7
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/adfs/super.c1
-rw-r--r--fs/afs/dir.c45
-rw-r--r--fs/afs/dynroot.c25
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/aio.c232
-rw-r--r--fs/anon_inodes.c30
-rw-r--r--fs/attr.c5
-rw-r--r--fs/bad_inode.c2
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/btrfs/acl.c13
-rw-r--r--fs/btrfs/backref.c6
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/check-integrity.c9
-rw-r--r--fs/btrfs/compression.c18
-rw-r--r--fs/btrfs/ctree.c53
-rw-r--r--fs/btrfs/ctree.h89
-rw-r--r--fs/btrfs/delayed-inode.c14
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/delayed-ref.c43
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dev-replace.c29
-rw-r--r--fs/btrfs/dir-item.c4
-rw-r--r--fs/btrfs/disk-io.c113
-rw-r--r--fs/btrfs/disk-io.h5
-rw-r--r--fs/btrfs/extent-tree.c883
-rw-r--r--fs/btrfs/extent_io.c156
-rw-r--r--fs/btrfs/extent_io.h16
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c128
-rw-r--r--fs/btrfs/free-space-cache.c19
-rw-r--r--fs/btrfs/free-space-tree.c2
-rw-r--r--fs/btrfs/inode-map.c12
-rw-r--r--fs/btrfs/inode.c267
-rw-r--r--fs/btrfs/ioctl.c69
-rw-r--r--fs/btrfs/ordered-data.c138
-rw-r--r--fs/btrfs/ordered-data.h23
-rw-r--r--fs/btrfs/print-tree.c39
-rw-r--r--fs/btrfs/qgroup.c270
-rw-r--r--fs/btrfs/qgroup.h46
-rw-r--r--fs/btrfs/raid56.c109
-rw-r--r--fs/btrfs/reada.c3
-rw-r--r--fs/btrfs/relocation.c216
-rw-r--r--fs/btrfs/root-tree.c22
-rw-r--r--fs/btrfs/scrub.c679
-rw-r--r--fs/btrfs/send.c172
-rw-r--r--fs/btrfs/struct-funcs.c1
-rw-r--r--fs/btrfs/super.c115
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/tests/qgroup-tests.c24
-rw-r--r--fs/btrfs/transaction.c11
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-checker.c115
-rw-r--r--fs/btrfs/tree-log.c270
-rw-r--r--fs/btrfs/volumes.c611
-rw-r--r--fs/btrfs/volumes.h31
-rw-r--r--fs/buffer.c76
-rw-r--r--fs/ceph/file.c7
-rw-r--r--fs/ceph/super.h3
-rw-r--r--fs/cifs/cifsfs.h3
-rw-r--r--fs/cifs/dir.c7
-rw-r--r--fs/dcache.c81
-rw-r--r--fs/efivarfs/inode.c4
-rw-r--r--fs/ext2/ialloc.c3
-rw-r--r--fs/ext2/namei.c9
-rw-r--r--fs/file_table.c85
-rw-r--r--fs/fuse/dir.c25
-rw-r--r--fs/gfs2/inode.c32
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hostfs/hostfs_kern.c28
-rw-r--r--fs/hpfs/dir.c23
-rw-r--r--fs/hugetlbfs/inode.c54
-rw-r--r--fs/inode.c53
-rw-r--r--fs/internal.h7
-rw-r--r--fs/iomap.c532
-rw-r--r--fs/jfs/jfs_imap.c8
-rw-r--r--fs/jfs/jfs_inode.c10
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/locks.c20
-rw-r--r--fs/namei.c261
-rw-r--r--fs/namespace.c28
-rw-r--r--fs/nfs/dir.c23
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfsd/vfs.c2
-rw-r--r--fs/open.c88
-rw-r--r--fs/pipe.c43
-rw-r--r--fs/timerfd.c14
-rw-r--r--fs/udf/namei.c12
-rw-r--r--fs/ufs/ialloc.c3
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/xfs_iomap.c6
-rw-r--r--fs/xfs/xfs_iops.c2
97 files changed, 3025 insertions, 3779 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 42e102e2e74a..85ff859d3af5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -859,8 +859,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
static int
v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned flags, umode_t mode,
- int *opened)
+ struct file *file, unsigned flags, umode_t mode)
{
int err;
u32 perm;
@@ -917,7 +916,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9inode->writeback_fid = (void *) inode_fid;
}
mutex_unlock(&v9inode->v_mutex);
- err = finish_open(file, dentry, generic_file_open, opened);
+ err = finish_open(file, dentry, generic_file_open);
if (err)
goto error;
@@ -925,7 +924,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
v9fs_cache_inode_set_cookie(d_inode(dentry), file);
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
out:
dput(res);
return err;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 7f6ae21a27b3..4823e1c46999 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -241,8 +241,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
static int
v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned flags, umode_t omode,
- int *opened)
+ struct file *file, unsigned flags, umode_t omode)
{
int err = 0;
kgid_t gid;
@@ -352,13 +351,13 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
}
mutex_unlock(&v9inode->v_mutex);
/* Since we are opening a file, assign the open fid to the file */
- err = finish_open(file, dentry, generic_file_open, opened);
+ err = finish_open(file, dentry, generic_file_open);
if (err)
goto err_clunk_old_fid;
file->private_data = ofid;
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
v9fs_cache_inode_set_cookie(inode, file);
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
out:
v9fs_put_acl(dacl, pacl);
dput(res);
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index c836c425ca94..e91028d4340a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -287,7 +287,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
ADFS_I(inode)->mmu_private = inode->i_size;
}
- insert_inode_hash(inode);
+ inode_fake_hash(inode);
out:
return inode;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 71fa525d63a0..7e099a7a4eb1 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -291,6 +291,7 @@ static void destroy_inodecache(void)
static const struct super_operations adfs_sops = {
.alloc_inode = adfs_alloc_inode,
.destroy_inode = adfs_destroy_inode,
+ .drop_inode = generic_delete_inode,
.write_inode = adfs_write_inode,
.put_super = adfs_put_super,
.statfs = adfs_statfs,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 7d623008157f..855bf2b79fed 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -822,6 +822,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
{
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct inode *inode;
+ struct dentry *d;
struct key *key;
int ret;
@@ -862,43 +863,17 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
afs_stat_v(dvnode, n_lookup);
inode = afs_do_lookup(dir, dentry, key);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- if (ret == -ENOENT) {
- inode = afs_try_auto_mntpt(dentry, dir);
- if (!IS_ERR(inode)) {
- key_put(key);
- goto success;
- }
-
- ret = PTR_ERR(inode);
- }
-
- key_put(key);
- if (ret == -ENOENT) {
- d_add(dentry, NULL);
- _leave(" = NULL [negative]");
- return NULL;
- }
- _leave(" = %d [do]", ret);
- return ERR_PTR(ret);
- }
- dentry->d_fsdata = (void *)(unsigned long)dvnode->status.data_version;
-
- /* instantiate the dentry */
key_put(key);
- if (IS_ERR(inode)) {
- _leave(" = %ld", PTR_ERR(inode));
- return ERR_CAST(inode);
+ if (inode == ERR_PTR(-ENOENT)) {
+ inode = afs_try_auto_mntpt(dentry, dir);
+ } else {
+ dentry->d_fsdata =
+ (void *)(unsigned long)dvnode->status.data_version;
}
-
-success:
- d_add(dentry, inode);
- _leave(" = 0 { ino=%lu v=%u }",
- d_inode(dentry)->i_ino,
- d_inode(dentry)->i_generation);
-
- return NULL;
+ d = d_splice_alias(inode, dentry);
+ if (!IS_ERR_OR_NULL(d))
+ d->d_fsdata = dentry->d_fsdata;
+ return d;
}
/*
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 174e843f0633..1cde710a8013 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -83,7 +83,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
out:
_leave("= %d", ret);
- return ERR_PTR(ret);
+ return ret == -ENOENT ? NULL : ERR_PTR(ret);
}
/*
@@ -141,12 +141,6 @@ out_p:
static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
- struct afs_vnode *vnode;
- struct inode *inode;
- int ret;
-
- vnode = AFS_FS_I(dir);
-
_enter("%pd", dentry);
ASSERTCMP(d_inode(dentry), ==, NULL);
@@ -160,22 +154,7 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
memcmp(dentry->d_name.name, "@cell", 5) == 0)
return afs_lookup_atcell(dentry);
- inode = afs_try_auto_mntpt(dentry, dir);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- if (ret == -ENOENT) {
- d_add(dentry, NULL);
- _leave(" = NULL [negative]");
- return NULL;
- }
- _leave(" = %d [do]", ret);
- return ERR_PTR(ret);
- }
-
- d_add(dentry, inode);
- _leave(" = 0 { ino=%lu v=%u }",
- d_inode(dentry)->i_ino, d_inode(dentry)->i_generation);
- return NULL;
+ return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry);
}
const struct inode_operations afs_dynroot_inode_operations = {
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a1b18082991b..183cc5418722 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -648,7 +648,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
trace_afs_notify_call(rxcall, call);
call->need_attention = true;
- u = __atomic_add_unless(&call->usage, 1, 0);
+ u = atomic_fetch_add_unless(&call->usage, 1, 0);
if (u != 0) {
trace_afs_call(call, afs_call_trace_wake, u,
atomic_read(&call->net->nr_outstanding_calls),
diff --git a/fs/aio.c b/fs/aio.c
index 27454594e37a..b9350f3360c6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,6 +5,7 @@
* Implements an efficient asynchronous io interface.
*
* Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright 2018 Christoph Hellwig.
*
* See ../COPYING for licensing terms.
*/
@@ -18,6 +19,7 @@
#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/backing-dev.h>
+#include <linux/refcount.h>
#include <linux/uio.h>
#include <linux/sched/signal.h>
@@ -164,10 +166,21 @@ struct fsync_iocb {
bool datasync;
};
+struct poll_iocb {
+ struct file *file;
+ struct wait_queue_head *head;
+ __poll_t events;
+ bool woken;
+ bool cancelled;
+ struct wait_queue_entry wait;
+ struct work_struct work;
+};
+
struct aio_kiocb {
union {
struct kiocb rw;
struct fsync_iocb fsync;
+ struct poll_iocb poll;
};
struct kioctx *ki_ctx;
@@ -178,6 +191,7 @@ struct aio_kiocb {
struct list_head ki_list; /* the aio core uses this
* for cancellation */
+ refcount_t ki_refcnt;
/*
* If the aio_resfd field of the userspace iocb is not zero,
@@ -202,9 +216,7 @@ static const struct address_space_operations aio_ctx_aops;
static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
{
- struct qstr this = QSTR_INIT("[aio]", 5);
struct file *file;
- struct path path;
struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
if (IS_ERR(inode))
return ERR_CAST(inode);
@@ -213,31 +225,17 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
inode->i_mapping->private_data = ctx;
inode->i_size = PAGE_SIZE * nr_pages;
- path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
- if (!path.dentry) {
+ file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
+ O_RDWR, &aio_ring_fops);
+ if (IS_ERR(file))
iput(inode);
- return ERR_PTR(-ENOMEM);
- }
- path.mnt = mntget(aio_mnt);
-
- d_instantiate(path.dentry, inode);
- file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
- if (IS_ERR(file)) {
- path_put(&path);
- return file;
- }
-
- file->f_flags = O_RDWR;
return file;
}
static struct dentry *aio_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- static const struct dentry_operations ops = {
- .d_dname = simple_dname,
- };
- struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+ struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL,
AIO_RING_MAGIC);
if (!IS_ERR(root))
@@ -1015,6 +1013,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
percpu_ref_get(&ctx->reqs);
INIT_LIST_HEAD(&req->ki_list);
+ refcount_set(&req->ki_refcnt, 0);
req->ki_ctx = ctx;
return req;
out_put:
@@ -1049,6 +1048,15 @@ out:
return ret;
}
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+ if (refcount_read(&iocb->ki_refcnt) == 0 ||
+ refcount_dec_and_test(&iocb->ki_refcnt)) {
+ percpu_ref_put(&iocb->ki_ctx->reqs);
+ kmem_cache_free(kiocb_cachep, iocb);
+ }
+}
+
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
@@ -1118,8 +1126,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
eventfd_ctx_put(iocb->ki_eventfd);
}
- kmem_cache_free(kiocb_cachep, iocb);
-
/*
* We have to order our ring_info tail store above and test
* of the wait list below outside the wait lock. This is
@@ -1130,8 +1136,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
-
- percpu_ref_put(&ctx->reqs);
+ iocb_put(iocb);
}
/* aio_read_events_ring
@@ -1592,6 +1597,182 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
return 0;
}
+static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+{
+ struct file *file = iocb->poll.file;
+
+ aio_complete(iocb, mangle_poll(mask), 0);
+ fput(file);
+}
+
+static void aio_poll_complete_work(struct work_struct *work)
+{
+ struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+ struct poll_table_struct pt = { ._key = req->events };
+ struct kioctx *ctx = iocb->ki_ctx;
+ __poll_t mask = 0;
+
+ if (!READ_ONCE(req->cancelled))
+ mask = vfs_poll(req->file, &pt) & req->events;
+
+ /*
+ * Note that ->ki_cancel callers also delete iocb from active_reqs after
+ * calling ->ki_cancel. We need the ctx_lock roundtrip here to
+ * synchronize with them. In the cancellation case the list_del_init
+ * itself is not actually needed, but harmless so we keep it in to
+ * avoid further branches in the fast path.
+ */
+ spin_lock_irq(&ctx->ctx_lock);
+ if (!mask && !READ_ONCE(req->cancelled)) {
+ add_wait_queue(req->head, &req->wait);
+ spin_unlock_irq(&ctx->ctx_lock);
+ return;
+ }
+ list_del_init(&iocb->ki_list);
+ spin_unlock_irq(&ctx->ctx_lock);
+
+ aio_poll_complete(iocb, mask);
+}
+
+/* assumes we are called with irqs disabled */
+static int aio_poll_cancel(struct kiocb *iocb)
+{
+ struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+ struct poll_iocb *req = &aiocb->poll;
+
+ spin_lock(&req->head->lock);
+ WRITE_ONCE(req->cancelled, true);
+ if (!list_empty(&req->wait.entry)) {
+ list_del_init(&req->wait.entry);
+ schedule_work(&aiocb->poll.work);
+ }
+ spin_unlock(&req->head->lock);
+
+ return 0;
+}
+
+static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+ __poll_t mask = key_to_poll(key);
+
+ req->woken = true;
+
+ /* for instances that support it check for an event match first: */
+ if (mask) {
+ if (!(mask & req->events))
+ return 0;
+
+ /* try to complete the iocb inline if we can: */
+ if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+ list_del(&iocb->ki_list);
+ spin_unlock(&iocb->ki_ctx->ctx_lock);
+
+ list_del_init(&req->wait.entry);
+ aio_poll_complete(iocb, mask);
+ return 1;
+ }
+ }
+
+ list_del_init(&req->wait.entry);
+ schedule_work(&req->work);
+ return 1;
+}
+
+struct aio_poll_table {
+ struct poll_table_struct pt;
+ struct aio_kiocb *iocb;
+ int error;
+};
+
+static void
+aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
+ struct poll_table_struct *p)
+{
+ struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
+
+ /* multiple wait queues per file are not supported */
+ if (unlikely(pt->iocb->poll.head)) {
+ pt->error = -EINVAL;
+ return;
+ }
+
+ pt->error = 0;
+ pt->iocb->poll.head = head;
+ add_wait_queue(head, &pt->iocb->poll.wait);
+}
+
+static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+{
+ struct kioctx *ctx = aiocb->ki_ctx;
+ struct poll_iocb *req = &aiocb->poll;
+ struct aio_poll_table apt;
+ __poll_t mask;
+
+ /* reject any unknown events outside the normal event mask. */
+ if ((u16)iocb->aio_buf != iocb->aio_buf)
+ return -EINVAL;
+ /* reject fields that are not defined for poll */
+ if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
+ return -EINVAL;
+
+ INIT_WORK(&req->work, aio_poll_complete_work);
+ req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+ req->file = fget(iocb->aio_fildes);
+ if (unlikely(!req->file))
+ return -EBADF;
+
+ apt.pt._qproc = aio_poll_queue_proc;
+ apt.pt._key = req->events;
+ apt.iocb = aiocb;
+ apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
+
+ /* initialized the list so that we can do list_empty checks */
+ INIT_LIST_HEAD(&req->wait.entry);
+ init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+
+ /* one for removal from waitqueue, one for this function */
+ refcount_set(&aiocb->ki_refcnt, 2);
+
+ mask = vfs_poll(req->file, &apt.pt) & req->events;
+ if (unlikely(!req->head)) {
+ /* we did not manage to set up a waitqueue, done */
+ goto out;
+ }
+
+ spin_lock_irq(&ctx->ctx_lock);
+ spin_lock(&req->head->lock);
+ if (req->woken) {
+ /* wake_up context handles the rest */
+ mask = 0;
+ apt.error = 0;
+ } else if (mask || apt.error) {
+ /* if we get an error or a mask we are done */
+ WARN_ON_ONCE(list_empty(&req->wait.entry));
+ list_del_init(&req->wait.entry);
+ } else {
+ /* actually waiting for an event */
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ aiocb->ki_cancel = aio_poll_cancel;
+ }
+ spin_unlock(&req->head->lock);
+ spin_unlock_irq(&ctx->ctx_lock);
+
+out:
+ if (unlikely(apt.error)) {
+ fput(req->file);
+ return apt.error;
+ }
+
+ if (mask)
+ aio_poll_complete(aiocb, mask);
+ iocb_put(aiocb);
+ return 0;
+}
+
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
bool compat)
{
@@ -1665,6 +1846,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
case IOCB_CMD_FDSYNC:
ret = aio_fsync(&req->fsync, &iocb, true);
break;
+ case IOCB_CMD_POLL:
+ ret = aio_poll(req, &iocb);
+ break;
default:
pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
ret = -EINVAL;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3168ee4e77f4..91262c34b797 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -71,8 +71,6 @@ struct file *anon_inode_getfile(const char *name,
const struct file_operations *fops,
void *priv, int flags)
{
- struct qstr this;
- struct path path;
struct file *file;
if (IS_ERR(anon_inode_inode))
@@ -82,39 +80,23 @@ struct file *anon_inode_getfile(const char *name,
return ERR_PTR(-ENOENT);
/*
- * Link the inode to a directory entry by creating a unique name
- * using the inode sequence number.
- */
- file = ERR_PTR(-ENOMEM);
- this.name = name;
- this.len = strlen(name);
- this.hash = 0;
- path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
- if (!path.dentry)
- goto err_module;
-
- path.mnt = mntget(anon_inode_mnt);
- /*
* We know the anon_inode inode count is always greater than zero,
* so ihold() is safe.
*/
ihold(anon_inode_inode);
-
- d_instantiate(path.dentry, anon_inode_inode);
-
- file = alloc_file(&path, OPEN_FMODE(flags), fops);
+ file = alloc_file_pseudo(anon_inode_inode, anon_inode_mnt, name,
+ flags & (O_ACCMODE | O_NONBLOCK), fops);
if (IS_ERR(file))
- goto err_dput;
+ goto err;
+
file->f_mapping = anon_inode_inode->i_mapping;
- file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
file->private_data = priv;
return file;
-err_dput:
- path_put(&path);
-err_module:
+err:
+ iput(anon_inode_inode);
module_put(fops->owner);
return file;
}
diff --git a/fs/attr.c b/fs/attr.c
index e3d53bf12240..d22e8187477f 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -120,7 +120,6 @@ EXPORT_SYMBOL(setattr_prepare);
* inode_newsize_ok - may this inode be truncated to a given size
* @inode: the inode to be truncated
* @offset: the new size to assign to the inode
- * @Returns: 0 on success, -ve errno on failure
*
* inode_newsize_ok must be called with i_mutex held.
*
@@ -130,6 +129,8 @@ EXPORT_SYMBOL(setattr_prepare);
* returned. @inode must be a file (not directory), with appropriate
* permissions to allow truncate (inode_newsize_ok does NOT check these
* conditions).
+ *
+ * Return: 0 on success, -ve errno on failure
*/
int inode_newsize_ok(const struct inode *inode, loff_t offset)
{
@@ -205,7 +206,7 @@ EXPORT_SYMBOL(setattr_copy);
/**
* notify_change - modify attributes of a filesytem object
* @dentry: object affected
- * @iattr: new attributes
+ * @attr: new attributes
* @delegated_inode: returns inode, if the inode is delegated
*
* The caller must hold the i_mutex on the affected object.
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 125e8bbd22a2..8035d2a44561 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -134,7 +134,7 @@ static int bad_inode_update_time(struct inode *inode, struct timespec64 *time,
static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry,
struct file *file, unsigned int open_flag,
- umode_t create_mode, int *opened)
+ umode_t create_mode)
{
return -EIO;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 816cc921cf36..efae2fb0930a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1751,7 +1751,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
const struct user_regset *regset = &view->regsets[i];
do_thread_regset_writeback(t->task, regset);
if (regset->core_note_type && regset->get &&
- (!regset->active || regset->active(t->task, regset))) {
+ (!regset->active || regset->active(t->task, regset) > 0)) {
int ret;
size_t size = regset_size(t->task, regset);
void *data = kmalloc(size, GFP_KERNEL);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 4b5fff31ef27..aa4a7a23ff99 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -205,7 +205,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
goto error;
if (fmt->flags & MISC_FMT_OPEN_FILE) {
- interp_file = filp_clone_open(fmt->interp_file);
+ interp_file = file_clone_open(fmt->interp_file);
if (!IS_ERR(interp_file))
deny_write_access(interp_file);
} else {
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15e1dfef56a5..3b66c957ea6f 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -30,23 +30,22 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
- BUG();
+ return ERR_PTR(-EINVAL);
}
- size = btrfs_getxattr(inode, name, "", 0);
+ size = btrfs_getxattr(inode, name, NULL, 0);
if (size > 0) {
value = kzalloc(size, GFP_KERNEL);
if (!value)
return ERR_PTR(-ENOMEM);
size = btrfs_getxattr(inode, name, value, size);
}
- if (size > 0) {
+ if (size > 0)
acl = posix_acl_from_xattr(&init_user_ns, value, size);
- } else if (size == -ERANGE || size == -ENODATA || size == 0) {
+ else if (size == -ENODATA || size == 0)
acl = NULL;
- } else {
- acl = ERR_PTR(-EIO);
- }
+ else
+ acl = ERR_PTR(size);
kfree(value);
return acl;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0a8e2e29a66b..ae750b1574a2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -925,7 +925,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
type = btrfs_get_extent_inline_ref_type(leaf, iref,
BTRFS_REF_TYPE_ANY);
if (type == BTRFS_REF_TYPE_INVALID)
- return -EINVAL;
+ return -EUCLEAN;
offset = btrfs_extent_inline_ref_offset(leaf, iref);
@@ -1793,7 +1793,7 @@ static int get_extent_inline_ref(unsigned long *ptr,
*out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref,
BTRFS_REF_TYPE_ANY);
if (*out_type == BTRFS_REF_TYPE_INVALID)
- return -EINVAL;
+ return -EUCLEAN;
*ptr += btrfs_extent_inline_ref_size(*out_type);
WARN_ON(*ptr > end);
@@ -2225,7 +2225,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
fspath = init_data_container(total_bytes);
if (IS_ERR(fspath))
- return (void *)fspath;
+ return ERR_CAST(fspath);
ifp = kmalloc(sizeof(*ifp), GFP_KERNEL);
if (!ifp) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 7e075343daa5..1343ac57b438 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -178,7 +178,7 @@ struct btrfs_inode {
struct btrfs_delayed_node *delayed_node;
/* File creation time. */
- struct timespec i_otime;
+ struct timespec64 i_otime;
/* Hook into fs_info->delayed_iputs */
struct list_head delayed_iput;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index a3fdb4fe967d..833cf3c35b4d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1539,7 +1539,12 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
}
device = multi->stripes[0].dev;
- block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev->bd_dev);
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
+ !device->bdev || !device->name)
+ block_ctx_out->dev = NULL;
+ else
+ block_ctx_out->dev = btrfsic_dev_state_lookup(
+ device->bdev->bd_dev);
block_ctx_out->dev_bytenr = multi->stripes[0].physical;
block_ctx_out->start = bytenr;
block_ctx_out->len = len;
@@ -1624,7 +1629,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
bio = btrfs_io_bio_alloc(num_pages - i);
bio_set_dev(bio, block_ctx->dev->bdev);
bio->bi_iter.bi_sector = dev_bytenr >> 9;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d3e447b45bf7..9bfa66592aa7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -5,7 +5,6 @@
#include <linux/kernel.h>
#include <linux/bio.h>
-#include <linux/buffer_head.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -14,10 +13,7 @@
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
-#include <linux/bit_spinlock.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/log2.h>
@@ -303,7 +299,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
struct bio *bio = NULL;
struct compressed_bio *cb;
unsigned long bytes_left;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
int pg_index = 0;
struct page *page;
u64 first_byte = disk_start;
@@ -342,9 +337,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_iter.bi_size)
- submit = io_tree->ops->merge_bio_hook(page, 0,
- PAGE_SIZE,
- bio, 0);
+ submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE, bio, 0);
page->mapping = NULL;
if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
@@ -613,7 +606,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->len = bio->bi_iter.bi_size;
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
- bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
+ comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
refcount_set(&cb->pending_bios, 1);
@@ -626,9 +619,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
- submit = tree->ops->merge_bio_hook(page, 0,
- PAGE_SIZE,
- comp_bio, 0);
+ submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE,
+ comp_bio, 0);
page->mapping = NULL;
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
@@ -660,7 +652,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
- bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
+ comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4bc326df472e..d436fb4c002e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -888,11 +888,7 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
btrfs_root_last_snapshot(&root->root_item) ||
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
return 1;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
- btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
- return 1;
-#endif
+
return 0;
}
@@ -3128,8 +3124,7 @@ again:
* higher levels
*
*/
-static void fixup_low_keys(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+static void fixup_low_keys(struct btrfs_path *path,
struct btrfs_disk_key *key, int level)
{
int i;
@@ -3181,7 +3176,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
btrfs_set_item_key(eb, &disk_key, slot);
btrfs_mark_buffer_dirty(eb);
if (slot == 0)
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
/*
@@ -3359,17 +3354,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
root_add_used(root, fs_info->nodesize);
- memzero_extent_buffer(c, 0, sizeof(struct btrfs_header));
btrfs_set_header_nritems(c, 1);
- btrfs_set_header_level(c, level);
- btrfs_set_header_bytenr(c, c->start);
- btrfs_set_header_generation(c, trans->transid);
- btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(c, root->root_key.objectid);
-
- write_extent_buffer_fsid(c, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(c, fs_info->chunk_tree_uuid);
-
btrfs_set_node_key(c, &lower_key, 0);
btrfs_set_node_blockptr(c, 0, lower->start);
lower_gen = btrfs_header_generation(lower);
@@ -3498,15 +3483,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
return PTR_ERR(split);
root_add_used(root, fs_info->nodesize);
-
- memzero_extent_buffer(split, 0, sizeof(struct btrfs_header));
- btrfs_set_header_level(split, btrfs_header_level(c));
- btrfs_set_header_bytenr(split, split->start);
- btrfs_set_header_generation(split, trans->transid);
- btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(split, root->root_key.objectid);
- write_extent_buffer_fsid(split, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(split, fs_info->chunk_tree_uuid);
+ ASSERT(btrfs_header_level(c) == level);
ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid);
if (ret) {
@@ -3945,7 +3922,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
clean_tree_block(fs_info, right);
btrfs_item_key(right, &disk_key, 0);
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
@@ -4292,15 +4269,6 @@ again:
root_add_used(root, fs_info->nodesize);
- memzero_extent_buffer(right, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(right, right->start);
- btrfs_set_header_generation(right, trans->transid);
- btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(right, root->root_key.objectid);
- btrfs_set_header_level(right, 0);
- write_extent_buffer_fsid(right, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(right, fs_info->chunk_tree_uuid);
-
if (split == 0) {
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
@@ -4320,7 +4288,7 @@ again:
path->nodes[0] = right;
path->slots[0] = 0;
if (path->slots[1] == 0)
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
/*
* We create a new leaf 'right' for the required ins_len and
@@ -4642,7 +4610,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
btrfs_set_item_key(leaf, &disk_key, slot);
if (slot == 0)
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
item = btrfs_item_nr(slot);
@@ -4744,7 +4712,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
if (path->slots[0] == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
@@ -4886,7 +4854,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
int level, int slot)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *parent = path->nodes[level];
u32 nritems;
int ret;
@@ -4919,7 +4886,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key disk_key;
btrfs_node_key(parent, &disk_key, 0);
- fixup_low_keys(fs_info, path, &disk_key, level + 1);
+ fixup_low_keys(path, &disk_key, level + 1);
}
btrfs_mark_buffer_dirty(parent);
}
@@ -5022,7 +4989,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_item_key(leaf, &disk_key, 0);
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
/* delete the leaf if it is mostly empty */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 118346aceea9..318be7864072 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -55,8 +55,6 @@ struct btrfs_ordered_sum;
#define BTRFS_OLDEST_GENERATION 0ULL
-#define BTRFS_COMPAT_EXTENT_TREE_V0
-
/*
* the max metadata block size. This limit is somewhat artificial,
* but the memmove costs go through the roof for larger blocks.
@@ -86,6 +84,14 @@ static const int btrfs_csum_sizes[] = { 4 };
#define BTRFS_DIRTY_METADATA_THRESH SZ_32M
+/*
+ * Use large batch size to reduce overhead of metadata updates. On the reader
+ * side, we only read it when we are close to ENOSPC and the read overhead is
+ * mostly related to the number of CPUs, so it is OK to use arbitrary large
+ * value here.
+ */
+#define BTRFS_TOTAL_BYTES_PINNED_BATCH SZ_128M
+
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
@@ -342,8 +348,8 @@ struct btrfs_path {
sizeof(struct btrfs_item))
struct btrfs_dev_replace {
u64 replace_state; /* see #define above */
- u64 time_started; /* seconds since 1-Jan-1970 */
- u64 time_stopped; /* seconds since 1-Jan-1970 */
+ time64_t time_started; /* seconds since 1-Jan-1970 */
+ time64_t time_stopped; /* seconds since 1-Jan-1970 */
atomic64_t num_write_errors;
atomic64_t num_uncorrectable_read_errors;
@@ -359,8 +365,6 @@ struct btrfs_dev_replace {
struct btrfs_device *srcdev;
struct btrfs_device *tgtdev;
- pid_t lock_owner;
- atomic_t nesting_level;
struct mutex lock_finishing_cancel_unmount;
rwlock_t lock;
atomic_t read_locks;
@@ -1213,7 +1217,6 @@ struct btrfs_root {
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
- char *name;
/* the dirty list is only used by non-reference counted roots */
struct list_head dirty_list;
@@ -2428,32 +2431,6 @@ static inline u32 btrfs_file_extent_inline_item_len(
return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
}
-/* this returns the number of file bytes represented by the inline item.
- * If an item is compressed, this is the uncompressed size
- */
-static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb,
- int slot,
- const struct btrfs_file_extent_item *fi)
-{
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token);
- /*
- * return the space used on disk if this item isn't
- * compressed or encoded
- */
- if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
- btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
- btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
- return btrfs_file_extent_inline_item_len(eb,
- btrfs_item_nr(slot));
- }
-
- /* otherwise use the ram bytes field */
- return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
-}
-
-
/* btrfs_dev_stats_item */
static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
const struct btrfs_dev_stats_item *ptr,
@@ -2676,7 +2653,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 offset, u64 ram_bytes,
struct btrfs_key *ins);
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 root_objectid, u64 owner, u64 offset,
struct btrfs_key *ins);
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
@@ -2716,15 +2692,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytes_used,
- u64 type, u64 chunk_offset, u64 size);
+ u64 bytes_used, u64 type, u64 chunk_offset,
+ u64 size);
void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
const u64 chunk_offset);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 group_start,
- struct extent_map *em);
+ u64 group_start, struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
@@ -2786,7 +2761,6 @@ void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type);
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
-void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush);
@@ -2803,8 +2777,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
-int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache);
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
@@ -2812,8 +2785,7 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end);
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes);
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
@@ -2822,10 +2794,10 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
-void check_system_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, const u64 type);
+void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
u64 start, u64 end);
+void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
@@ -3011,16 +2983,14 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
/* root-item.c */
-int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
- const char *name, int name_len);
-int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
- const char *name, int name_len);
+int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+ u64 ref_id, u64 dirid, u64 sequence, const char *name,
+ int name_len);
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+ u64 ref_id, u64 dirid, u64 *sequence, const char *name,
+ int name_len);
int btrfs_del_root(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, const struct btrfs_key *key);
+ const struct btrfs_key *key);
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *key,
struct btrfs_root_item *item);
@@ -3196,7 +3166,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
-void btrfs_set_range_writeback(void *private_data, u64 start, u64 end);
+void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end);
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
@@ -3452,7 +3422,7 @@ do { \
#ifdef CONFIG_BTRFS_ASSERT
__cold
-static inline void assfail(char *expr, char *file, int line)
+static inline void assfail(const char *expr, const char *file, int line)
{
pr_err("assertion failed: %s, file: %s, line: %d\n",
expr, file, line);
@@ -3465,6 +3435,13 @@ static inline void assfail(char *expr, char *file, int line)
#define ASSERT(expr) ((void)0)
#endif
+__cold
+static inline void btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
+{
+ btrfs_err(fs_info,
+"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel");
+}
+
__printf(5, 6)
__cold
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fe6caa7e698b..f51b509f2d9b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1222,7 +1222,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_trans_handle *trans;
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
@@ -1418,7 +1418,6 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
/* Will return 0 or -ENOMEM */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
const char *name, int name_len,
struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 type,
@@ -1458,11 +1457,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
*/
BUG_ON(ret);
-
mutex_lock(&delayed_node->mutex);
ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
if (unlikely(ret)) {
- btrfs_err(fs_info,
+ btrfs_err(trans->fs_info,
"err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
name_len, name, delayed_node->root->objectid,
delayed_node->inode_id, ret);
@@ -1495,7 +1493,6 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
}
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_inode *dir, u64 index)
{
struct btrfs_delayed_node *node;
@@ -1511,7 +1508,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
item_key.type = BTRFS_DIR_INDEX_KEY;
item_key.offset = index;
- ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key);
+ ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
+ &item_key);
if (!ret)
goto end;
@@ -1533,7 +1531,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&node->mutex);
ret = __btrfs_add_delayed_deletion_item(node, item);
if (unlikely(ret)) {
- btrfs_err(fs_info,
+ btrfs_err(trans->fs_info,
"err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
index, node->root->objectid, node->inode_id, ret);
BUG();
@@ -1837,7 +1835,7 @@ release_node:
int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_delayed_node *delayed_node;
/*
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index ca7a97f3ab6b..33536cd681d4 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -86,14 +86,12 @@ static inline void btrfs_init_delayed_root(
}
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
const char *name, int name_len,
struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 type,
u64 index);
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_inode *dir, u64 index);
int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 03dec673d12a..62ff545ba1f7 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -709,13 +709,13 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
* to make sure the delayed ref is eventually processed before this
* transaction commits.
*/
-int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int *old_ref_mod, int *new_ref_mod)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -730,27 +730,33 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
if (!ref)
return -ENOMEM;
+ head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
+ if (!head_ref) {
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+ return -ENOMEM;
+ }
+
+ if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
+ is_fstree(ref_root)) {
+ record = kmalloc(sizeof(*record), GFP_NOFS);
+ if (!record) {
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+ kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+ return -ENOMEM;
+ }
+ }
+
if (parent)
ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
else
ref_type = BTRFS_TREE_BLOCK_REF_KEY;
+
init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
ref_root, action, ref_type);
ref->root = ref_root;
ref->parent = parent;
ref->level = level;
- head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
- if (!head_ref)
- goto free_ref;
-
- if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
- is_fstree(ref_root)) {
- record = kmalloc(sizeof(*record), GFP_NOFS);
- if (!record)
- goto free_head_ref;
- }
-
init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
ref_root, 0, action, false, is_system);
head_ref->extent_op = extent_op;
@@ -779,25 +785,18 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
-
-free_head_ref:
- kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
-free_ref:
- kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
-
- return -ENOMEM;
}
/*
* add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
-int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
int *old_ref_mod, int *new_ref_mod)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index ea1aecb6a50d..d9f2a4ebd5db 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -234,14 +234,12 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea
kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
}
-int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int *old_ref_mod, int *new_ref_mod);
-int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e2ba0419297a..dec01970d8c5 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -6,14 +6,9 @@
#include <linux/sched.h>
#include <linux/bio.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
#include <linux/blkdev.h>
-#include <linux/random.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
#include <linux/kthread.h>
#include <linux/math64.h>
-#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -465,7 +460,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
* go to the tgtdev as well (refer to btrfs_map_block()).
*/
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
- dev_replace->time_started = get_seconds();
+ dev_replace->time_started = ktime_get_real_seconds();
dev_replace->cursor_left = 0;
dev_replace->committed_cursor_left = 0;
dev_replace->cursor_left_last_write_of_item = 0;
@@ -511,7 +506,7 @@ leave:
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
btrfs_dev_replace_write_unlock(dev_replace);
- btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+ btrfs_destroy_dev_replace_tgtdev(tgt_device);
return ret;
}
@@ -618,7 +613,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
dev_replace->tgtdev = NULL;
dev_replace->srcdev = NULL;
- dev_replace->time_stopped = get_seconds();
+ dev_replace->time_stopped = ktime_get_real_seconds();
dev_replace->item_needs_writeback = 1;
/* replace old device with new one in mapping tree */
@@ -637,7 +632,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device)
- btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+ btrfs_destroy_dev_replace_tgtdev(tgt_device);
btrfs_rm_dev_replace_unblocked(fs_info);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
@@ -663,7 +658,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
tgt_device->commit_total_bytes = src_device->commit_total_bytes;
tgt_device->commit_bytes_used = src_device->bytes_used;
- btrfs_assign_next_active_device(fs_info, src_device, tgt_device);
+ btrfs_assign_next_active_device(src_device, tgt_device);
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
fs_info->fs_devices->rw_devices++;
@@ -672,11 +667,17 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_rm_dev_replace_blocked(fs_info);
- btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device);
+ btrfs_rm_dev_replace_remove_srcdev(src_device);
btrfs_rm_dev_replace_unblocked(fs_info);
/*
+ * Increment dev_stats_ccnt so that btrfs_run_dev_stats() will
+ * update on-disk dev stats value during commit transaction
+ */
+ atomic_inc(&tgt_device->dev_stats_ccnt);
+
+ /*
* this is again a consistent state where no dev_replace procedure
* is running, the target device is part of the filesystem, the
* source device is not part of the filesystem anymore and its 1st
@@ -807,7 +808,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
break;
}
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
- dev_replace->time_stopped = get_seconds();
+ dev_replace->time_stopped = ktime_get_real_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_write_unlock(dev_replace);
btrfs_scrub_cancel(fs_info);
@@ -826,7 +827,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
btrfs_dev_name(tgt_device));
if (tgt_device)
- btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+ btrfs_destroy_dev_replace_tgtdev(tgt_device);
leave:
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
@@ -848,7 +849,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
- dev_replace->time_stopped = get_seconds();
+ dev_replace->time_stopped = ktime_get_real_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_info(fs_info, "suspending dev_replace for unmount");
break;
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 39e9766d1cbd..a678b07fcf01 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -160,8 +160,8 @@ second_insert:
}
btrfs_release_path(path);
- ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name,
- name_len, dir, &disk_key, type, index);
+ ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir,
+ &disk_key, type, index);
out_free:
btrfs_free_path(path);
if (ret)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 205092dc9390..5124c15705ce 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -5,8 +5,6 @@
#include <linux/fs.h>
#include <linux/blkdev.h>
-#include <linux/scatterlist.h>
-#include <linux/swap.h>
#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h>
@@ -54,7 +52,6 @@
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
-static void free_fs_root(struct btrfs_root *root);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
@@ -108,12 +105,9 @@ void __cold btrfs_end_io_wq_exit(void)
*/
struct async_submit_bio {
void *private_data;
- struct btrfs_fs_info *fs_info;
struct bio *bio;
extent_submit_bio_start_t *submit_bio_start;
- extent_submit_bio_done_t *submit_bio_done;
int mirror_num;
- unsigned long bio_flags;
/*
* bio_offset is optional, can be used if the pages in the bio
* can't tell us where in the file the bio should go
@@ -212,7 +206,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
int create)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
int ret;
@@ -615,8 +609,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
- btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
- found_start, eb->start);
+ btrfs_err_rl(fs_info, "bad tree block start, want %llu have %llu",
+ eb->start, found_start);
ret = -EIO;
goto err;
}
@@ -628,8 +622,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
- btrfs_err(fs_info, "bad tree block level %d",
- (int)btrfs_header_level(eb));
+ btrfs_err(fs_info, "bad tree block level %d on %llu",
+ (int)btrfs_header_level(eb), eb->start);
ret = -EIO;
goto err;
}
@@ -779,7 +773,7 @@ static void run_one_async_done(struct btrfs_work *work)
return;
}
- async->submit_bio_done(async->private_data, async->bio, async->mirror_num);
+ btrfs_submit_bio_done(async->private_data, async->bio, async->mirror_num);
}
static void run_one_async_free(struct btrfs_work *work)
@@ -793,8 +787,7 @@ static void run_one_async_free(struct btrfs_work *work)
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset, void *private_data,
- extent_submit_bio_start_t *submit_bio_start,
- extent_submit_bio_done_t *submit_bio_done)
+ extent_submit_bio_start_t *submit_bio_start)
{
struct async_submit_bio *async;
@@ -803,16 +796,13 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
return BLK_STS_RESOURCE;
async->private_data = private_data;
- async->fs_info = fs_info;
async->bio = bio;
async->mirror_num = mirror_num;
async->submit_bio_start = submit_bio_start;
- async->submit_bio_done = submit_bio_done;
btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
run_one_async_done, run_one_async_free);
- async->bio_flags = bio_flags;
async->bio_offset = bio_offset;
async->status = 0;
@@ -851,24 +841,6 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
return btree_csum_one_bio(bio);
}
-static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio,
- int mirror_num)
-{
- struct inode *inode = private_data;
- blk_status_t ret;
-
- /*
- * when we're called for a write, we're already in the async
- * submission context. Just jump into btrfs_map_bio
- */
- ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
- if (ret) {
- bio->bi_status = ret;
- bio_endio(bio);
- }
- return ret;
-}
-
static int check_async_write(struct btrfs_inode *bi)
{
if (atomic_read(&bi->sync_writers))
@@ -911,8 +883,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
*/
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
bio_offset, private_data,
- btree_submit_bio_start,
- btree_submit_bio_done);
+ btree_submit_bio_start);
}
if (ret)
@@ -961,8 +932,9 @@ static int btree_writepages(struct address_space *mapping,
fs_info = BTRFS_I(mapping->host)->root->fs_info;
/* this is a bit racy, but that's ok */
- ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
- BTRFS_DIRTY_METADATA_THRESH);
+ ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH,
+ fs_info->dirty_metadata_batch);
if (ret < 0)
return 0;
}
@@ -1181,7 +1153,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->highest_objectid = 0;
root->nr_delalloc_inodes = 0;
root->nr_ordered_extents = 0;
- root->name = NULL;
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
@@ -1292,15 +1263,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
goto fail;
}
- memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(leaf, leaf->start);
- btrfs_set_header_generation(leaf, trans->transid);
- btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(leaf, objectid);
root->node = leaf;
-
- write_extent_buffer_fsid(leaf, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
btrfs_mark_buffer_dirty(leaf);
root->commit_root = btrfs_root_node(root);
@@ -1374,14 +1337,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
return ERR_CAST(leaf);
}
- memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(leaf, leaf->start);
- btrfs_set_header_generation(leaf, trans->transid);
- btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
root->node = leaf;
- write_extent_buffer_fsid(root->node, fs_info->fsid);
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
return root;
@@ -1546,7 +1503,7 @@ int btrfs_init_fs_root(struct btrfs_root *root)
return 0;
fail:
- /* the caller is responsible to call free_fs_root */
+ /* The caller is responsible to call btrfs_free_fs_root */
return ret;
}
@@ -1651,14 +1608,14 @@ again:
ret = btrfs_insert_fs_root(fs_info, root);
if (ret) {
if (ret == -EEXIST) {
- free_fs_root(root);
+ btrfs_free_fs_root(root);
goto again;
}
goto fail;
}
return root;
fail:
- free_fs_root(root);
+ btrfs_free_fs_root(root);
return ERR_PTR(ret);
}
@@ -1803,7 +1760,7 @@ static int transaction_kthread(void *arg)
struct btrfs_trans_handle *trans;
struct btrfs_transaction *cur;
u64 transid;
- unsigned long now;
+ time64_t now;
unsigned long delay;
bool cannot_commit;
@@ -1819,7 +1776,7 @@ static int transaction_kthread(void *arg)
goto sleep;
}
- now = get_seconds();
+ now = ktime_get_seconds();
if (cur->state < TRANS_STATE_BLOCKED &&
!test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
(now < cur->start_time ||
@@ -2196,8 +2153,6 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
{
- fs_info->dev_replace.lock_owner = 0;
- atomic_set(&fs_info->dev_replace.nesting_level, 0);
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
rwlock_init(&fs_info->dev_replace.lock);
atomic_set(&fs_info->dev_replace.read_locks, 0);
@@ -3075,6 +3030,13 @@ retry_root_backup:
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
+ ret = btrfs_verify_dev_extents(fs_info);
+ if (ret) {
+ btrfs_err(fs_info,
+ "failed to verify dev extents against chunks: %d",
+ ret);
+ goto fail_block_groups;
+ }
ret = btrfs_recover_balance(fs_info);
if (ret) {
btrfs_err(fs_info, "failed to recover balance: %d", ret);
@@ -3875,10 +3837,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
__btrfs_remove_free_space_cache(root->free_ino_pinned);
if (root->free_ino_ctl)
__btrfs_remove_free_space_cache(root->free_ino_ctl);
- free_fs_root(root);
+ btrfs_free_fs_root(root);
}
-static void free_fs_root(struct btrfs_root *root)
+void btrfs_free_fs_root(struct btrfs_root *root)
{
iput(root->ino_cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
@@ -3890,15 +3852,9 @@ static void free_fs_root(struct btrfs_root *root)
free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl);
kfree(root->free_ino_pinned);
- kfree(root->name);
btrfs_put_fs_root(root);
}
-void btrfs_free_fs_root(struct btrfs_root *root)
-{
- free_fs_root(root);
-}
-
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
u64 root_objectid = 0;
@@ -4104,10 +4060,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/*
* This is a fast path so only do this check if we have sanity tests
- * enabled. Normal people shouldn't be marking dummy buffers as dirty
+ * enabled. Normal people shouldn't be using umapped buffers as dirty
* outside of the sanity tests.
*/
- if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags)))
+ if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
return;
#endif
root = BTRFS_I(buf->pages[0]->mapping->host)->root;
@@ -4150,8 +4106,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info,
if (flush_delayed)
btrfs_balance_delayed_items(fs_info);
- ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
- BTRFS_DIRTY_METADATA_THRESH);
+ ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH,
+ fs_info->dirty_metadata_batch);
if (ret > 0) {
balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping);
}
@@ -4563,21 +4520,11 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
return 0;
}
-static struct btrfs_fs_info *btree_fs_info(void *private_data)
-{
- struct inode *inode = private_data;
- return btrfs_sb(inode->i_sb);
-}
-
static const struct extent_io_ops btree_extent_io_ops = {
/* mandatory callbacks */
.submit_bio_hook = btree_submit_bio_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
- /* note we're sharing with inode.c for the merge bio hook */
- .merge_bio_hook = btrfs_merge_bio_hook,
.readpage_io_failed_hook = btree_io_failed_hook,
- .set_range_writeback = btrfs_set_range_writeback,
- .tree_fs_info = btree_fs_info,
/* optional callbacks */
};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 1a3d277b027b..4cccba22640f 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -120,8 +120,9 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset, void *private_data,
- extent_submit_bio_start_t *submit_bio_start,
- extent_submit_bio_done_t *submit_bio_done);
+ extent_submit_bio_start_t *submit_bio_start);
+blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
+ int mirror_num);
int btrfs_write_tree_block(struct extent_buffer *buf);
void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3d9fe58c0080..de6f75f5547b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -52,24 +52,21 @@ enum {
};
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_node *node, u64 parent,
- u64 root_objectid, u64 owner_objectid,
- u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extra_op);
+ struct btrfs_delayed_ref_node *node, u64 parent,
+ u64 root_objectid, u64 owner_objectid,
+ u64 owner_offset, int refs_to_drop,
+ struct btrfs_delayed_extent_op *extra_op);
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
struct extent_buffer *leaf,
struct btrfs_extent_item *ei);
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset,
struct btrfs_key *ins, int ref_mod);
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
-static int do_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 flags,
+static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
int force);
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
@@ -220,9 +217,9 @@ static int add_excluded_extent(struct btrfs_fs_info *fs_info,
return 0;
}
-static void free_excluded_extents(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache)
+static void free_excluded_extents(struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
u64 start, end;
start = cache->key.objectid;
@@ -234,9 +231,9 @@ static void free_excluded_extents(struct btrfs_fs_info *fs_info,
start, end, EXTENT_UPTODATE);
}
-static int exclude_super_stripes(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache)
+static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
u64 bytenr;
u64 *logical;
int stripe_len;
@@ -558,7 +555,7 @@ static noinline void caching_thread(struct btrfs_work *work)
caching_ctl->progress = (u64)-1;
up_read(&fs_info->commit_root_sem);
- free_excluded_extents(fs_info, block_group);
+ free_excluded_extents(block_group);
mutex_unlock(&caching_ctl->mutex);
wake_up(&caching_ctl->wait);
@@ -666,7 +663,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
wake_up(&caching_ctl->wait);
if (ret == 1) {
put_caching_control(caching_ctl);
- free_excluded_extents(fs_info, cache);
+ free_excluded_extents(cache);
return 0;
}
} else {
@@ -758,7 +755,8 @@ static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
space_info = __find_space_info(fs_info, flags);
ASSERT(space_info);
- percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
}
/*
@@ -870,18 +868,16 @@ search_again:
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- struct btrfs_extent_item_v0 *ei0;
- BUG_ON(item_size != sizeof(*ei0));
- ei0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item_v0);
- num_refs = btrfs_extent_refs_v0(leaf, ei0);
- /* FIXME: this isn't correct for data */
- extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
-#else
- BUG();
-#endif
+ ret = -EINVAL;
+ btrfs_print_v0_err(fs_info);
+ if (trans)
+ btrfs_abort_transaction(trans, ret);
+ else
+ btrfs_handle_fs_error(fs_info, ret, NULL);
+
+ goto out_free;
}
+
BUG_ON(num_refs == 0);
} else {
num_refs = 0;
@@ -1039,89 +1035,6 @@ out_free:
* tree block info structure.
*/
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
- u64 owner, u32 extra_size)
-{
- struct btrfs_root *root = fs_info->extent_root;
- struct btrfs_extent_item *item;
- struct btrfs_extent_item_v0 *ei0;
- struct btrfs_extent_ref_v0 *ref0;
- struct btrfs_tree_block_info *bi;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- struct btrfs_key found_key;
- u32 new_size = sizeof(*item);
- u64 refs;
- int ret;
-
- leaf = path->nodes[0];
- BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- ei0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item_v0);
- refs = btrfs_extent_refs_v0(leaf, ei0);
-
- if (owner == (u64)-1) {
- while (1) {
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- return ret;
- BUG_ON(ret > 0); /* Corruption */
- leaf = path->nodes[0];
- }
- btrfs_item_key_to_cpu(leaf, &found_key,
- path->slots[0]);
- BUG_ON(key.objectid != found_key.objectid);
- if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
- path->slots[0]++;
- continue;
- }
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- owner = btrfs_ref_objectid_v0(leaf, ref0);
- break;
- }
- }
- btrfs_release_path(path);
-
- if (owner < BTRFS_FIRST_FREE_OBJECTID)
- new_size += sizeof(*bi);
-
- new_size -= sizeof(*ei0);
- ret = btrfs_search_slot(trans, root, &key, path,
- new_size + extra_size, 1);
- if (ret < 0)
- return ret;
- BUG_ON(ret); /* Corruption */
-
- btrfs_extend_item(fs_info, path, new_size);
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
- btrfs_set_extent_refs(leaf, item, refs);
- /* FIXME: get real generation */
- btrfs_set_extent_generation(leaf, item, 0);
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- btrfs_set_extent_flags(leaf, item,
- BTRFS_EXTENT_FLAG_TREE_BLOCK |
- BTRFS_BLOCK_FLAG_FULL_BACKREF);
- bi = (struct btrfs_tree_block_info *)(item + 1);
- /* FIXME: get first key of the block */
- memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi));
- btrfs_set_tree_block_level(leaf, bi, (int)owner);
- } else {
- btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
- }
- btrfs_mark_buffer_dirty(leaf);
- return 0;
-}
-#endif
-
/*
* is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
* is_data == BTRFS_REF_TYPE_DATA, data type is requried,
@@ -1216,13 +1129,12 @@ static int match_extent_data_ref(struct extent_buffer *leaf,
}
static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid,
u64 owner, u64 offset)
{
- struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_root *root = trans->fs_info->extent_root;
struct btrfs_key key;
struct btrfs_extent_data_ref *ref;
struct extent_buffer *leaf;
@@ -1251,17 +1163,6 @@ again:
if (parent) {
if (!ret)
return 0;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- key.type = BTRFS_EXTENT_REF_V0_KEY;
- btrfs_release_path(path);
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0) {
- err = ret;
- goto fail;
- }
- if (!ret)
- return 0;
-#endif
goto fail;
}
@@ -1304,13 +1205,12 @@ fail:
}
static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid, u64 owner,
u64 offset, int refs_to_add)
{
- struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_root *root = trans->fs_info->extent_root;
struct btrfs_key key;
struct extent_buffer *leaf;
u32 size;
@@ -1384,7 +1284,6 @@ fail:
}
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
int refs_to_drop, int *last_ref)
{
@@ -1406,13 +1305,10 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
ref2 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_shared_data_ref);
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- num_refs = btrfs_ref_count_v0(leaf, ref0);
-#endif
+ } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
+ btrfs_print_v0_err(trans->fs_info);
+ btrfs_abort_transaction(trans, -EINVAL);
+ return -EINVAL;
} else {
BUG();
}
@@ -1421,21 +1317,13 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
num_refs -= refs_to_drop;
if (num_refs == 0) {
- ret = btrfs_del_item(trans, fs_info->extent_root, path);
+ ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
*last_ref = 1;
} else {
if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- else {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- btrfs_set_ref_count_v0(leaf, ref0, num_refs);
- }
-#endif
btrfs_mark_buffer_dirty(leaf);
}
return ret;
@@ -1453,6 +1341,8 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
if (iref) {
/*
* If type is invalid, we should have bailed out earlier than
@@ -1475,13 +1365,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
ref2 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_shared_data_ref);
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- num_refs = btrfs_ref_count_v0(leaf, ref0);
-#endif
} else {
WARN_ON(1);
}
@@ -1489,12 +1372,11 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
}
static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid)
{
- struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_root *root = trans->fs_info->extent_root;
struct btrfs_key key;
int ret;
@@ -1510,20 +1392,10 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
ret = -ENOENT;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (ret == -ENOENT && parent) {
- btrfs_release_path(path);
- key.type = BTRFS_EXTENT_REF_V0_KEY;
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0)
- ret = -ENOENT;
- }
-#endif
return ret;
}
static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid)
@@ -1540,7 +1412,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
key.offset = root_objectid;
}
- ret = btrfs_insert_empty_item(trans, fs_info->extent_root,
+ ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
path, &key, 0);
btrfs_release_path(path);
return ret;
@@ -1599,13 +1471,13 @@ static int find_next_key(struct btrfs_path *path, int level,
*/
static noinline_for_stack
int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref **ref_ret,
u64 bytenr, u64 num_bytes,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int insert)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
struct extent_buffer *leaf;
@@ -1635,8 +1507,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
extra_size = -1;
/*
- * Owner is our parent level, so we can just add one to get the level
- * for the block we are interested in.
+ * Owner is our level, so we can just add one to get the level for the
+ * block we are interested in.
*/
if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
key.type = BTRFS_METADATA_ITEM_KEY;
@@ -1684,23 +1556,12 @@ again:
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- if (!insert) {
- err = -ENOENT;
- goto out;
- }
- ret = convert_extent_item_v0(trans, fs_info, path, owner,
- extra_size);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (unlikely(item_size < sizeof(*ei))) {
+ err = -EINVAL;
+ btrfs_print_v0_err(fs_info);
+ btrfs_abort_transaction(trans, err);
+ goto out;
}
-#endif
- BUG_ON(item_size < sizeof(*ei));
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
@@ -1727,7 +1588,7 @@ again:
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
if (type == BTRFS_REF_TYPE_INVALID) {
- err = -EINVAL;
+ err = -EUCLEAN;
goto out;
}
@@ -1863,7 +1724,6 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
}
static int lookup_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref **ref_ret,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -1871,9 +1731,9 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
{
int ret;
- ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret,
- bytenr, num_bytes, parent,
- root_objectid, owner, offset, 0);
+ ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
+ num_bytes, parent, root_objectid,
+ owner, offset, 0);
if (ret != -ENOENT)
return ret;
@@ -1881,12 +1741,11 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
*ref_ret = NULL;
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = lookup_tree_block_ref(trans, fs_info, path, bytenr,
- parent, root_objectid);
+ ret = lookup_tree_block_ref(trans, path, bytenr, parent,
+ root_objectid);
} else {
- ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
- parent, root_objectid, owner,
- offset);
+ ret = lookup_extent_data_ref(trans, path, bytenr, parent,
+ root_objectid, owner, offset);
}
return ret;
}
@@ -1895,14 +1754,14 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
* helper to update/remove inline back ref
*/
static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+void update_inline_extent_backref(struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
struct btrfs_delayed_extent_op *extent_op,
int *last_ref)
{
- struct extent_buffer *leaf;
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_extent_item *ei;
struct btrfs_extent_data_ref *dref = NULL;
struct btrfs_shared_data_ref *sref = NULL;
@@ -1913,7 +1772,6 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
int type;
u64 refs;
- leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
@@ -1965,7 +1823,6 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
static noinline_for_stack
int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner,
@@ -1975,15 +1832,15 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_extent_inline_ref *iref;
int ret;
- ret = lookup_inline_extent_backref(trans, fs_info, path, &iref,
- bytenr, num_bytes, parent,
- root_objectid, owner, offset, 1);
+ ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
+ num_bytes, parent, root_objectid,
+ owner, offset, 1);
if (ret == 0) {
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
- update_inline_extent_backref(fs_info, path, iref,
- refs_to_add, extent_op, NULL);
+ update_inline_extent_backref(path, iref, refs_to_add,
+ extent_op, NULL);
} else if (ret == -ENOENT) {
- setup_inline_extent_backref(fs_info, path, iref, parent,
+ setup_inline_extent_backref(trans->fs_info, path, iref, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
ret = 0;
@@ -1992,7 +1849,6 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
}
static int insert_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add)
@@ -2000,18 +1856,17 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
int ret;
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
BUG_ON(refs_to_add != 1);
- ret = insert_tree_block_ref(trans, fs_info, path, bytenr,
- parent, root_objectid);
+ ret = insert_tree_block_ref(trans, path, bytenr, parent,
+ root_objectid);
} else {
- ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
- parent, root_objectid,
- owner, offset, refs_to_add);
+ ret = insert_extent_data_ref(trans, path, bytenr, parent,
+ root_objectid, owner, offset,
+ refs_to_add);
}
return ret;
}
static int remove_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_drop, int is_data, int *last_ref)
@@ -2020,14 +1875,14 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
BUG_ON(!is_data && refs_to_drop != 1);
if (iref) {
- update_inline_extent_backref(fs_info, path, iref,
- -refs_to_drop, NULL, last_ref);
+ update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
+ last_ref);
} else if (is_data) {
- ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop,
+ ret = remove_extent_data_ref(trans, path, refs_to_drop,
last_ref);
} else {
*last_ref = 1;
- ret = btrfs_del_item(trans, fs_info->extent_root, path);
+ ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
}
return ret;
}
@@ -2185,13 +2040,13 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
owner, offset, BTRFS_ADD_DELAYED_REF);
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+ ret = btrfs_add_delayed_tree_ref(trans, bytenr,
num_bytes, parent,
root_objectid, (int)owner,
BTRFS_ADD_DELAYED_REF, NULL,
&old_ref_mod, &new_ref_mod);
} else {
- ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+ ret = btrfs_add_delayed_data_ref(trans, bytenr,
num_bytes, parent,
root_objectid, owner, offset,
0, BTRFS_ADD_DELAYED_REF,
@@ -2207,8 +2062,41 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * __btrfs_inc_extent_ref - insert backreference for a given extent
+ *
+ * @trans: Handle of transaction
+ *
+ * @node: The delayed ref node used to get the bytenr/length for
+ * extent whose references are incremented.
+ *
+ * @parent: If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
+ * BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
+ * bytenr of the parent block. Since new extents are always
+ * created with indirect references, this will only be the case
+ * when relocating a shared extent. In that case, root_objectid
+ * will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must
+ * be 0
+ *
+ * @root_objectid: The id of the root where this modification has originated,
+ * this can be either one of the well-known metadata trees or
+ * the subvolume id which references this extent.
+ *
+ * @owner: For data extents it is the inode number of the owning file.
+ * For metadata extents this parameter holds the level in the
+ * tree of the extent.
+ *
+ * @offset: For metadata extents the offset is ignored and is currently
+ * always passed as 0. For data extents it is the fileoffset
+ * this extent belongs to.
+ *
+ * @refs_to_add Number of references to add
+ *
+ * @extent_op Pointer to a structure, holding information necessary when
+ * updating a tree block's flags
+ *
+ */
static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add,
@@ -2230,10 +2118,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* this will setup the path even if it fails to insert the back ref */
- ret = insert_inline_extent_backref(trans, fs_info, path, bytenr,
- num_bytes, parent, root_objectid,
- owner, offset,
- refs_to_add, extent_op);
+ ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
+ parent, root_objectid, owner,
+ offset, refs_to_add, extent_op);
if ((ret < 0 && ret != -EAGAIN) || !ret)
goto out;
@@ -2256,8 +2143,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* now insert the actual backref */
- ret = insert_extent_backref(trans, fs_info, path, bytenr, parent,
- root_objectid, owner, offset, refs_to_add);
+ ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
+ owner, offset, refs_to_add);
if (ret)
btrfs_abort_transaction(trans, ret);
out:
@@ -2266,7 +2153,6 @@ out:
}
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
@@ -2283,7 +2169,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
ref = btrfs_delayed_node_to_data_ref(node);
- trace_run_delayed_data_ref(fs_info, node, ref, node->action);
+ trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
if (node->type == BTRFS_SHARED_DATA_REF_KEY)
parent = ref->parent;
@@ -2292,17 +2178,16 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
if (extent_op)
flags |= extent_op->flags_to_set;
- ret = alloc_reserved_file_extent(trans, fs_info,
- parent, ref_root, flags,
- ref->objectid, ref->offset,
- &ins, node->ref_mod);
+ ret = alloc_reserved_file_extent(trans, parent, ref_root,
+ flags, ref->objectid,
+ ref->offset, &ins,
+ node->ref_mod);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent,
- ref_root, ref->objectid,
- ref->offset, node->ref_mod,
- extent_op);
+ ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
+ ref->objectid, ref->offset,
+ node->ref_mod, extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, fs_info, node, parent,
+ ret = __btrfs_free_extent(trans, node, parent,
ref_root, ref->objectid,
ref->offset, node->ref_mod,
extent_op);
@@ -2331,10 +2216,10 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
}
static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_extent_op *extent_op)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_key key;
struct btrfs_path *path;
struct btrfs_extent_item *ei;
@@ -2400,18 +2285,14 @@ again:
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+
+ if (unlikely(item_size < sizeof(*ei))) {
+ err = -EINVAL;
+ btrfs_print_v0_err(fs_info);
+ btrfs_abort_transaction(trans, err);
+ goto out;
}
-#endif
- BUG_ON(item_size < sizeof(*ei));
+
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
__run_delayed_extent_op(extent_op, leaf, ei);
@@ -2422,7 +2303,6 @@ out:
}
static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
@@ -2433,14 +2313,14 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
u64 ref_root = 0;
ref = btrfs_delayed_node_to_tree_ref(node);
- trace_run_delayed_tree_ref(fs_info, node, ref, node->action);
+ trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
parent = ref->parent;
ref_root = ref->root;
if (node->ref_mod != 1) {
- btrfs_err(fs_info,
+ btrfs_err(trans->fs_info,
"btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
node->bytenr, node->ref_mod, node->action, ref_root,
parent);
@@ -2450,13 +2330,10 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
BUG_ON(!extent_op || !extent_op->update_flags);
ret = alloc_reserved_tree_block(trans, node, extent_op);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, fs_info, node,
- parent, ref_root,
- ref->level, 0, 1,
- extent_op);
+ ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
+ ref->level, 0, 1, extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, fs_info, node,
- parent, ref_root,
+ ret = __btrfs_free_extent(trans, node, parent, ref_root,
ref->level, 0, 1, extent_op);
} else {
BUG();
@@ -2466,7 +2343,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
/* helper function to actually process a single delayed ref entry */
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
@@ -2475,18 +2351,18 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
if (trans->aborted) {
if (insert_reserved)
- btrfs_pin_extent(fs_info, node->bytenr,
+ btrfs_pin_extent(trans->fs_info, node->bytenr,
node->num_bytes, 1);
return 0;
}
if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
node->type == BTRFS_SHARED_BLOCK_REF_KEY)
- ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
+ ret = run_delayed_tree_ref(trans, node, extent_op,
insert_reserved);
else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
node->type == BTRFS_SHARED_DATA_REF_KEY)
- ret = run_delayed_data_ref(trans, fs_info, node, extent_op,
+ ret = run_delayed_data_ref(trans, node, extent_op,
insert_reserved);
else
BUG();
@@ -2528,7 +2404,6 @@ static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_ref
}
static int cleanup_extent_op(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head)
{
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
@@ -2542,21 +2417,22 @@ static int cleanup_extent_op(struct btrfs_trans_handle *trans,
return 0;
}
spin_unlock(&head->lock);
- ret = run_delayed_extent_op(trans, fs_info, head, extent_op);
+ ret = run_delayed_extent_op(trans, head, extent_op);
btrfs_free_delayed_extent_op(extent_op);
return ret ? ret : 1;
}
static int cleanup_ref_head(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head)
{
+
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_root *delayed_refs;
int ret;
delayed_refs = &trans->transaction->delayed_refs;
- ret = cleanup_extent_op(trans, fs_info, head);
+ ret = cleanup_extent_op(trans, head);
if (ret < 0) {
unselect_delayed_ref_head(delayed_refs, head);
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
@@ -2598,8 +2474,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
flags = BTRFS_BLOCK_GROUP_METADATA;
space_info = __find_space_info(fs_info, flags);
ASSERT(space_info);
- percpu_counter_add(&space_info->total_bytes_pinned,
- -head->num_bytes);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned,
+ -head->num_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
if (head->is_data) {
spin_lock(&delayed_refs->lock);
@@ -2705,7 +2582,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
* up and move on to the next ref_head.
*/
if (!ref) {
- ret = cleanup_ref_head(trans, fs_info, locked_ref);
+ ret = cleanup_ref_head(trans, locked_ref);
if (ret > 0 ) {
/* We dropped our lock, we need to loop. */
ret = 0;
@@ -2752,7 +2629,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
locked_ref->extent_op = NULL;
spin_unlock(&locked_ref->lock);
- ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
+ ret = run_one_delayed_ref(trans, ref, extent_op,
must_insert_reserved);
btrfs_free_delayed_extent_op(extent_op);
@@ -3227,12 +3104,6 @@ static noinline int check_committed_ref(struct btrfs_root *root,
ret = 1;
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
- goto out;
- }
-#endif
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
if (item_size != sizeof(*ei) +
@@ -4060,11 +3931,7 @@ static void update_space_info(struct btrfs_fs_info *info, u64 flags,
struct btrfs_space_info *found;
int factor;
- if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
+ factor = btrfs_bg_type_to_factor(flags);
found = __find_space_info(info, flags);
ASSERT(found);
@@ -4289,7 +4156,7 @@ again:
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = do_chunk_alloc(trans, fs_info, alloc_target,
+ ret = do_chunk_alloc(trans, alloc_target,
CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans);
if (ret < 0) {
@@ -4309,9 +4176,10 @@ again:
* allocation, and no removed chunk in current transaction,
* don't bother committing the transaction.
*/
- have_pinned_space = percpu_counter_compare(
+ have_pinned_space = __percpu_counter_compare(
&data_sinfo->total_bytes_pinned,
- used + bytes - data_sinfo->total_bytes);
+ used + bytes - data_sinfo->total_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
@@ -4358,7 +4226,7 @@ commit_trans:
data_sinfo->flags, bytes, 1);
spin_unlock(&data_sinfo->lock);
- return ret;
+ return 0;
}
int btrfs_check_data_free_space(struct inode *inode,
@@ -4511,9 +4379,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
* for allocating a chunk, otherwise if it's false, reserve space necessary for
* removing a chunk.
*/
-void check_system_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type)
+void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *info;
u64 left;
u64 thresh;
@@ -4552,7 +4420,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
* the paths we visit in the chunk tree (they were already COWed
* or created in the current transaction for example).
*/
- ret = btrfs_alloc_chunk(trans, fs_info, flags);
+ ret = btrfs_alloc_chunk(trans, flags);
}
if (!ret) {
@@ -4573,11 +4441,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
*/
-static int do_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 flags, int force)
+static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ int force)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *space_info;
- int wait_for_alloc = 0;
+ bool wait_for_alloc = false;
+ bool should_alloc = false;
int ret = 0;
/* Don't re-enter if we're already allocating a chunk */
@@ -4587,45 +4457,44 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info = __find_space_info(fs_info, flags);
ASSERT(space_info);
-again:
- spin_lock(&space_info->lock);
- if (force < space_info->force_alloc)
- force = space_info->force_alloc;
- if (space_info->full) {
- if (should_alloc_chunk(fs_info, space_info, force))
- ret = -ENOSPC;
- else
- ret = 0;
- spin_unlock(&space_info->lock);
- return ret;
- }
-
- if (!should_alloc_chunk(fs_info, space_info, force)) {
- spin_unlock(&space_info->lock);
- return 0;
- } else if (space_info->chunk_alloc) {
- wait_for_alloc = 1;
- } else {
- space_info->chunk_alloc = 1;
- }
-
- spin_unlock(&space_info->lock);
-
- mutex_lock(&fs_info->chunk_mutex);
+ do {
+ spin_lock(&space_info->lock);
+ if (force < space_info->force_alloc)
+ force = space_info->force_alloc;
+ should_alloc = should_alloc_chunk(fs_info, space_info, force);
+ if (space_info->full) {
+ /* No more free physical space */
+ if (should_alloc)
+ ret = -ENOSPC;
+ else
+ ret = 0;
+ spin_unlock(&space_info->lock);
+ return ret;
+ } else if (!should_alloc) {
+ spin_unlock(&space_info->lock);
+ return 0;
+ } else if (space_info->chunk_alloc) {
+ /*
+ * Someone is already allocating, so we need to block
+ * until this someone is finished and then loop to
+ * recheck if we should continue with our allocation
+ * attempt.
+ */
+ wait_for_alloc = true;
+ spin_unlock(&space_info->lock);
+ mutex_lock(&fs_info->chunk_mutex);
+ mutex_unlock(&fs_info->chunk_mutex);
+ } else {
+ /* Proceed with allocation */
+ space_info->chunk_alloc = 1;
+ wait_for_alloc = false;
+ spin_unlock(&space_info->lock);
+ }
- /*
- * The chunk_mutex is held throughout the entirety of a chunk
- * allocation, so once we've acquired the chunk_mutex we know that the
- * other guy is done and we need to recheck and see if we should
- * allocate.
- */
- if (wait_for_alloc) {
- mutex_unlock(&fs_info->chunk_mutex);
- wait_for_alloc = 0;
cond_resched();
- goto again;
- }
+ } while (wait_for_alloc);
+ mutex_lock(&fs_info->chunk_mutex);
trans->allocating_chunk = true;
/*
@@ -4651,9 +4520,9 @@ again:
* Check if we have enough space in SYSTEM chunk because we may need
* to update devices.
*/
- check_system_chunk(trans, fs_info, flags);
+ check_system_chunk(trans, flags);
- ret = btrfs_alloc_chunk(trans, fs_info, flags);
+ ret = btrfs_alloc_chunk(trans, flags);
trans->allocating_chunk = false;
spin_lock(&space_info->lock);
@@ -4703,6 +4572,7 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
u64 space_size;
u64 avail;
u64 used;
+ int factor;
/* Don't overcommit when in mixed mode. */
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
@@ -4737,10 +4607,8 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
* doesn't include the parity drive, so we don't have to
* change the math
*/
- if (profile & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- avail >>= 1;
+ factor = btrfs_bg_type_to_factor(profile);
+ avail = div_u64(avail, factor);
/*
* If we aren't flushing all things, let us overcommit up to
@@ -4912,8 +4780,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
return 0;
/* See if there is enough pinned space to make this reservation */
- if (percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes) >= 0)
+ if (__percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
goto commit;
/*
@@ -4930,8 +4799,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
bytes -= delayed_rsv->size;
spin_unlock(&delayed_rsv->lock);
- if (percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes) < 0) {
+ if (__percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
return -ENOSPC;
}
@@ -4984,7 +4854,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
ret = PTR_ERR(trans);
break;
}
- ret = do_chunk_alloc(trans, fs_info,
+ ret = do_chunk_alloc(trans,
btrfs_metadata_alloc_profile(fs_info),
CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans);
@@ -5659,11 +5529,6 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
kfree(rsv);
}
-void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
-{
- kfree(rsv);
-}
-
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush)
@@ -6019,7 +5884,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
@@ -6092,7 +5957,7 @@ out_fail:
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
spin_lock(&inode->lock);
@@ -6121,7 +5986,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned num_extents;
spin_lock(&inode->lock);
@@ -6219,12 +6084,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
cache = btrfs_lookup_block_group(info, bytenr);
if (!cache)
return -ENOENT;
- if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
+ factor = btrfs_bg_type_to_factor(cache->flags);
+
/*
* If this block group has free space cache written out, we
* need to make sure to load it if we are removing space. This
@@ -6268,8 +6129,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
trace_btrfs_space_reservation(info, "pinned",
cache->space_info->flags,
num_bytes, 1);
- percpu_counter_add(&cache->space_info->total_bytes_pinned,
- num_bytes);
+ percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
+ num_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
@@ -6279,7 +6141,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
if (list_empty(&cache->dirty_list)) {
list_add_tail(&cache->dirty_list,
&trans->transaction->dirty_bgs);
- trans->transaction->num_dirty_bgs++;
+ trans->transaction->num_dirty_bgs++;
btrfs_get_block_group(cache);
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
@@ -6290,16 +6152,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
* dirty list to avoid races between cleaner kthread and space
* cache writeout.
*/
- if (!alloc && old_val == 0) {
- spin_lock(&info->unused_bgs_lock);
- if (list_empty(&cache->bg_list)) {
- btrfs_get_block_group(cache);
- trace_btrfs_add_unused_block_group(cache);
- list_add_tail(&cache->bg_list,
- &info->unused_bgs);
- }
- spin_unlock(&info->unused_bgs_lock);
- }
+ if (!alloc && old_val == 0)
+ btrfs_mark_bg_unused(cache);
btrfs_put_block_group(cache);
total -= num_bytes;
@@ -6347,7 +6201,8 @@ static int pin_down_extent(struct btrfs_fs_info *fs_info,
trace_btrfs_space_reservation(fs_info, "pinned",
cache->space_info->flags, num_bytes, 1);
- percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes);
+ percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
+ num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
set_extent_dirty(fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
return 0;
@@ -6711,7 +6566,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
trace_btrfs_space_reservation(fs_info, "pinned",
space_info->flags, len, 0);
space_info->max_extent_size = 0;
- percpu_counter_add(&space_info->total_bytes_pinned, -len);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned,
+ -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
if (cache->ro) {
space_info->bytes_readonly += len;
readonly = true;
@@ -6815,12 +6671,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
}
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *info,
- struct btrfs_delayed_ref_node *node, u64 parent,
- u64 root_objectid, u64 owner_objectid,
- u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extent_op)
+ struct btrfs_delayed_ref_node *node, u64 parent,
+ u64 root_objectid, u64 owner_objectid,
+ u64 owner_offset, int refs_to_drop,
+ struct btrfs_delayed_extent_op *extent_op)
{
+ struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_key key;
struct btrfs_path *path;
struct btrfs_root *extent_root = info->extent_root;
@@ -6852,9 +6708,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (is_data)
skinny_metadata = false;
- ret = lookup_extent_backref(trans, info, path, &iref,
- bytenr, num_bytes, parent,
- root_objectid, owner_objectid,
+ ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
+ parent, root_objectid, owner_objectid,
owner_offset);
if (ret == 0) {
extent_slot = path->slots[0];
@@ -6877,14 +6732,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
break;
extent_slot--;
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
- if (found_extent && item_size < sizeof(*ei))
- found_extent = 0;
-#endif
+
if (!found_extent) {
BUG_ON(iref);
- ret = remove_extent_backref(trans, info, path, NULL,
+ ret = remove_extent_backref(trans, path, NULL,
refs_to_drop,
is_data, &last_ref);
if (ret) {
@@ -6957,42 +6808,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, extent_slot);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- BUG_ON(found_extent || extent_slot != path->slots[0]);
- ret = convert_extent_item_v0(trans, info, path, owner_objectid,
- 0);
- if (ret < 0) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
-
- btrfs_release_path(path);
- path->leave_spinning = 1;
-
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = num_bytes;
-
- ret = btrfs_search_slot(trans, extent_root, &key, path,
- -1, 1);
- if (ret) {
- btrfs_err(info,
- "umm, got %d back from search, was looking for %llu",
- ret, bytenr);
- btrfs_print_leaf(path->nodes[0]);
- }
- if (ret < 0) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
-
- extent_slot = path->slots[0];
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, extent_slot);
+ if (unlikely(item_size < sizeof(*ei))) {
+ ret = -EINVAL;
+ btrfs_print_v0_err(info);
+ btrfs_abort_transaction(trans, ret);
+ goto out;
}
-#endif
- BUG_ON(item_size < sizeof(*ei));
ei = btrfs_item_ptr(leaf, extent_slot,
struct btrfs_extent_item);
if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
@@ -7028,9 +6849,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
}
if (found_extent) {
- ret = remove_extent_backref(trans, info, path,
- iref, refs_to_drop,
- is_data, &last_ref);
+ ret = remove_extent_backref(trans, path, iref,
+ refs_to_drop, is_data,
+ &last_ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -7172,7 +6993,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
root->root_key.objectid,
btrfs_header_level(buf), 0,
BTRFS_DROP_DELAYED_REF);
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
+ ret = btrfs_add_delayed_tree_ref(trans, buf->start,
buf->len, parent,
root->root_key.objectid,
btrfs_header_level(buf),
@@ -7251,13 +7072,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
old_ref_mod = new_ref_mod = 0;
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+ ret = btrfs_add_delayed_tree_ref(trans, bytenr,
num_bytes, parent,
root_objectid, (int)owner,
BTRFS_DROP_DELAYED_REF, NULL,
&old_ref_mod, &new_ref_mod);
} else {
- ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+ ret = btrfs_add_delayed_data_ref(trans, bytenr,
num_bytes, parent,
root_objectid, owner, offset,
0, BTRFS_DROP_DELAYED_REF,
@@ -7534,7 +7355,7 @@ search:
* for the proper type.
*/
if (!block_group_bits(block_group, flags)) {
- u64 extra = BTRFS_BLOCK_GROUP_DUP |
+ u64 extra = BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6 |
@@ -7738,7 +7559,7 @@ unclustered_alloc:
goto loop;
}
checks:
- search_start = ALIGN(offset, fs_info->stripesize);
+ search_start = round_up(offset, fs_info->stripesize);
/* move on to the next group */
if (search_start + num_bytes >
@@ -7750,7 +7571,6 @@ checks:
if (offset < search_start)
btrfs_add_free_space(block_group, offset,
search_start - offset);
- BUG_ON(offset > search_start);
ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
num_bytes, delalloc);
@@ -7826,8 +7646,7 @@ loop:
goto out;
}
- ret = do_chunk_alloc(trans, fs_info, flags,
- CHUNK_ALLOC_FORCE);
+ ret = do_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE);
/*
* If we can't allocate a new chunk we've already looped
@@ -8053,11 +7872,11 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
}
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset,
struct btrfs_key *ins, int ref_mod)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_extent_item *extent_item;
struct btrfs_extent_inline_ref *iref;
@@ -8231,7 +8050,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 offset, u64 ram_bytes,
struct btrfs_key *ins)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
@@ -8240,7 +8058,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
root->root_key.objectid, owner, offset,
BTRFS_ADD_DELAYED_EXTENT);
- ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
+ ret = btrfs_add_delayed_data_ref(trans, ins->objectid,
ins->offset, 0,
root->root_key.objectid, owner,
offset, ram_bytes,
@@ -8254,10 +8072,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
* space cache bits as well
*/
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 root_objectid, u64 owner, u64 offset,
struct btrfs_key *ins)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_block_group_cache *block_group;
struct btrfs_space_info *space_info;
@@ -8285,15 +8103,15 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
- ret = alloc_reserved_file_extent(trans, fs_info, 0, root_objectid,
- 0, owner, offset, ins, 1);
+ ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
+ offset, ins, 1);
btrfs_put_block_group(block_group);
return ret;
}
static struct extent_buffer *
btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- u64 bytenr, int level)
+ u64 bytenr, int level, u64 owner)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *buf;
@@ -8302,7 +8120,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (IS_ERR(buf))
return buf;
- btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
clean_tree_block(fs_info, buf);
@@ -8311,6 +8128,14 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_lock_blocking(buf);
set_extent_buffer_uptodate(buf);
+ memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
+ btrfs_set_header_level(buf, level);
+ btrfs_set_header_bytenr(buf, buf->start);
+ btrfs_set_header_generation(buf, trans->transid);
+ btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(buf, owner);
+ write_extent_buffer_fsid(buf, fs_info->fsid);
+ write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
buf->log_index = root->log_transid % 2;
/*
@@ -8419,7 +8244,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (btrfs_is_testing(fs_info)) {
buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
- level);
+ level, root_objectid);
if (!IS_ERR(buf))
root->alloc_bytenr += blocksize;
return buf;
@@ -8435,7 +8260,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
if (ret)
goto out_unuse;
- buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
+ buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
+ root_objectid);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto out_free_reserved;
@@ -8467,7 +8293,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
root_objectid, level, 0,
BTRFS_ADD_DELAYED_EXTENT);
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid,
+ ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
ins.offset, parent,
root_objectid, level,
BTRFS_ADD_DELAYED_EXTENT,
@@ -8499,7 +8325,6 @@ struct walk_control {
int keep_locks;
int reada_slot;
int reada_count;
- int for_reloc;
};
#define DROP_REFERENCE 1
@@ -8819,7 +8644,7 @@ skip:
}
if (need_account) {
- ret = btrfs_qgroup_trace_subtree(trans, root, next,
+ ret = btrfs_qgroup_trace_subtree(trans, next,
generation, level - 1);
if (ret) {
btrfs_err_rl(fs_info,
@@ -8919,7 +8744,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
else
ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, eb);
+ ret = btrfs_qgroup_trace_leaf_items(trans, eb);
if (ret) {
btrfs_err_rl(fs_info,
"error %d accounting leaf items. Quota is out of sync, rescan required.",
@@ -9136,7 +8961,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
wc->stage = DROP_REFERENCE;
wc->update_ref = update_ref;
wc->keep_locks = 0;
- wc->for_reloc = for_reloc;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
while (1) {
@@ -9199,7 +9023,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
if (err)
goto out_end_trans;
- ret = btrfs_del_root(trans, fs_info, &root->root_key);
+ ret = btrfs_del_root(trans, &root->root_key);
if (ret) {
btrfs_abort_transaction(trans, ret);
err = ret;
@@ -9302,7 +9126,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
wc->stage = DROP_REFERENCE;
wc->update_ref = 0;
wc->keep_locks = 1;
- wc->for_reloc = 1;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
while (1) {
@@ -9417,10 +9240,10 @@ out:
return ret;
}
-int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache)
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_trans_handle *trans;
u64 alloc_flags;
int ret;
@@ -9454,7 +9277,7 @@ again:
*/
alloc_flags = update_block_group_flags(fs_info, cache->flags);
if (alloc_flags != cache->flags) {
- ret = do_chunk_alloc(trans, fs_info, alloc_flags,
+ ret = do_chunk_alloc(trans, alloc_flags,
CHUNK_ALLOC_FORCE);
/*
* ENOSPC is allowed here, we may have enough space
@@ -9471,8 +9294,7 @@ again:
if (!ret)
goto out;
alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
- ret = do_chunk_alloc(trans, fs_info, alloc_flags,
- CHUNK_ALLOC_FORCE);
+ ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
ret = inc_block_group_ro(cache, 0);
@@ -9480,7 +9302,7 @@ out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(fs_info, cache->flags);
mutex_lock(&fs_info->chunk_mutex);
- check_system_chunk(trans, fs_info, alloc_flags);
+ check_system_chunk(trans, alloc_flags);
mutex_unlock(&fs_info->chunk_mutex);
}
mutex_unlock(&fs_info->ro_block_group_mutex);
@@ -9489,12 +9311,11 @@ out:
return ret;
}
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type)
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
{
- u64 alloc_flags = get_alloc_profile(fs_info, type);
+ u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
- return do_chunk_alloc(trans, fs_info, alloc_flags, CHUNK_ALLOC_FORCE);
+ return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
}
/*
@@ -9520,13 +9341,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
continue;
}
- if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_DUP))
- factor = 2;
- else
- factor = 1;
-
+ factor = btrfs_bg_type_to_factor(block_group->flags);
free_bytes += (block_group->key.offset -
btrfs_block_group_used(&block_group->item)) *
factor;
@@ -9717,6 +9532,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
int ret = 0;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+ struct btrfs_block_group_item bg;
+ u64 flags;
int slot;
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9751,8 +9568,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
"logical %llu len %llu found bg but no related chunk",
found_key.objectid, found_key.offset);
ret = -ENOENT;
+ } else if (em->start != found_key.objectid ||
+ em->len != found_key.offset) {
+ btrfs_err(fs_info,
+ "block group %llu len %llu mismatch with chunk %llu len %llu",
+ found_key.objectid, found_key.offset,
+ em->start, em->len);
+ ret = -EUCLEAN;
} else {
- ret = 0;
+ read_extent_buffer(leaf, &bg,
+ btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bg));
+ flags = btrfs_block_group_flags(&bg) &
+ BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ if (flags != (em->map_lookup->type &
+ BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
+ found_key.objectid,
+ found_key.offset, flags,
+ (BTRFS_BLOCK_GROUP_TYPE_MASK &
+ em->map_lookup->type));
+ ret = -EUCLEAN;
+ } else {
+ ret = 0;
+ }
}
free_extent_map(em);
goto out;
@@ -9847,7 +9688,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
*/
if (block_group->cached == BTRFS_CACHE_NO ||
block_group->cached == BTRFS_CACHE_ERROR)
- free_excluded_extents(info, block_group);
+ free_excluded_extents(block_group);
btrfs_remove_free_space_cache(block_group);
ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
@@ -10003,6 +9844,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
return cache;
}
+
+/*
+ * Iterate all chunks and verify that each of them has the corresponding block
+ * group
+ */
+static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map *em;
+ struct btrfs_block_group_cache *bg;
+ u64 start = 0;
+ int ret = 0;
+
+ while (1) {
+ read_lock(&map_tree->map_tree.lock);
+ /*
+ * lookup_extent_mapping will return the first extent map
+ * intersecting the range, so setting @len to 1 is enough to
+ * get the first chunk.
+ */
+ em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
+ read_unlock(&map_tree->map_tree.lock);
+ if (!em)
+ break;
+
+ bg = btrfs_lookup_block_group(fs_info, em->start);
+ if (!bg) {
+ btrfs_err(fs_info,
+ "chunk start=%llu len=%llu doesn't have corresponding block group",
+ em->start, em->len);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ break;
+ }
+ if (bg->key.objectid != em->start ||
+ bg->key.offset != em->len ||
+ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+ (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(fs_info,
+"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
+ em->start, em->len,
+ em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
+ bg->key.objectid, bg->key.offset,
+ bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ break;
+ }
+ start = em->start + em->len;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ }
+ return ret;
+}
+
int btrfs_read_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_path *path;
@@ -10089,13 +9986,13 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
* info has super bytes accounted for, otherwise we'll think
* we have more space than we actually do.
*/
- ret = exclude_super_stripes(info, cache);
+ ret = exclude_super_stripes(cache);
if (ret) {
/*
* We may have excluded something, so call this just in
* case.
*/
- free_excluded_extents(info, cache);
+ free_excluded_extents(cache);
btrfs_put_block_group(cache);
goto error;
}
@@ -10110,14 +10007,14 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
if (found_key.offset == btrfs_block_group_used(&cache->item)) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- free_excluded_extents(info, cache);
+ free_excluded_extents(cache);
} else if (btrfs_block_group_used(&cache->item) == 0) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, found_key.objectid,
found_key.objectid +
found_key.offset);
- free_excluded_extents(info, cache);
+ free_excluded_extents(cache);
}
ret = btrfs_add_block_group_cache(info, cache);
@@ -10140,15 +10037,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
if (btrfs_chunk_readonly(info, cache->key.objectid)) {
inc_block_group_ro(cache, 1);
} else if (btrfs_block_group_used(&cache->item) == 0) {
- spin_lock(&info->unused_bgs_lock);
- /* Should always be true but just in case. */
- if (list_empty(&cache->bg_list)) {
- btrfs_get_block_group(cache);
- trace_btrfs_add_unused_block_group(cache);
- list_add_tail(&cache->bg_list,
- &info->unused_bgs);
- }
- spin_unlock(&info->unused_bgs_lock);
+ ASSERT(list_empty(&cache->bg_list));
+ btrfs_mark_bg_unused(cache);
}
}
@@ -10176,7 +10066,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
btrfs_add_raid_kobjects(info);
init_global_block_rsv(info);
- ret = 0;
+ ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
return ret;
@@ -10206,8 +10096,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
sizeof(item));
if (ret)
btrfs_abort_transaction(trans, ret);
- ret = btrfs_finish_chunk_alloc(trans, fs_info, key.objectid,
- key.offset);
+ ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
if (ret)
btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, block_group);
@@ -10218,10 +10107,10 @@ next:
trans->can_flush_pending_bgs = can_flush_pending_bgs;
}
-int btrfs_make_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytes_used,
+int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
u64 type, u64 chunk_offset, u64 size)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *cache;
int ret;
@@ -10240,20 +10129,20 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
cache->needs_free_space = 1;
- ret = exclude_super_stripes(fs_info, cache);
+ ret = exclude_super_stripes(cache);
if (ret) {
/*
* We may have excluded something, so call this just in
* case.
*/
- free_excluded_extents(fs_info, cache);
+ free_excluded_extents(cache);
btrfs_put_block_group(cache);
return ret;
}
add_new_free_space(cache, chunk_offset, chunk_offset + size);
- free_excluded_extents(fs_info, cache);
+ free_excluded_extents(cache);
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_should_fragment_free_space(cache)) {
@@ -10311,9 +10200,9 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 group_start,
- struct extent_map *em)
+ u64 group_start, struct extent_map *em)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_path *path;
struct btrfs_block_group_cache *block_group;
@@ -10337,18 +10226,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* Free the reserved super bytes from this block group before
* remove it.
*/
- free_excluded_extents(fs_info, block_group);
+ free_excluded_extents(block_group);
btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
block_group->key.offset);
memcpy(&key, &block_group->key, sizeof(key));
index = btrfs_bg_flags_to_raid_index(block_group->flags);
- if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
+ factor = btrfs_bg_type_to_factor(block_group->flags);
/* make sure this block group isn't part of an allocation cluster */
cluster = &fs_info->data_alloc_cluster;
@@ -10687,7 +10571,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem);
spin_lock(&block_group->lock);
- if (block_group->reserved ||
+ if (block_group->reserved || block_group->pinned ||
btrfs_block_group_used(&block_group->item) ||
block_group->ro ||
list_is_singular(&block_group->list)) {
@@ -10764,8 +10648,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
space_info->bytes_pinned -= block_group->pinned;
space_info->bytes_readonly += block_group->pinned;
- percpu_counter_add(&space_info->total_bytes_pinned,
- -block_group->pinned);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned,
+ -block_group->pinned,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
block_group->pinned = 0;
spin_unlock(&block_group->lock);
@@ -10782,8 +10667,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* Btrfs_remove_chunk will abort the transaction if things go
* horribly wrong.
*/
- ret = btrfs_remove_chunk(trans, fs_info,
- block_group->key.objectid);
+ ret = btrfs_remove_chunk(trans, block_group->key.objectid);
if (ret) {
if (trimming)
@@ -11066,3 +10950,16 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
!atomic_read(&root->will_be_snapshotted));
}
}
+
+void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg)
+{
+ struct btrfs_fs_info *fs_info = bg->fs_info;
+
+ spin_lock(&fs_info->unused_bgs_lock);
+ if (list_empty(&bg->bg_list)) {
+ btrfs_get_block_group(bg);
+ trace_btrfs_add_unused_block_group(bg);
+ list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b3e45714d28f..628f1aef34b0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -140,14 +140,6 @@ static int add_extent_changeset(struct extent_state *state, unsigned bits,
static void flush_write_bio(struct extent_page_data *epd);
-static inline struct btrfs_fs_info *
-tree_fs_info(struct extent_io_tree *tree)
-{
- if (tree->ops)
- return tree->ops->tree_fs_info(tree->private_data);
- return NULL;
-}
-
int __init extent_io_init(void)
{
extent_state_cache = kmem_cache_create("btrfs_extent_state",
@@ -564,8 +556,10 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
{
- btrfs_panic(tree_fs_info(tree), err,
- "Locking error: Extent tree was modified by another thread while locked.");
+ struct inode *inode = tree->private_data;
+
+ btrfs_panic(btrfs_sb(inode->i_sb), err,
+ "locking error: extent tree was modified by another thread while locked");
}
/*
@@ -1386,14 +1380,6 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
}
}
-/*
- * helper function to set both pages and extents in the tree writeback
- */
-static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
-{
- tree->ops->set_range_writeback(tree->private_data, start, end);
-}
-
/* find the first state struct with 'bits' set after 'start', and
* return it. tree->lock must be held. NULL will returned if
* nothing was found after 'start'
@@ -2059,7 +2045,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int mirror_num)
{
u64 start = eb->start;
- unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
+ int i, num_pages = num_extent_pages(eb);
int ret = 0;
if (sb_rdonly(fs_info->sb))
@@ -2398,7 +2384,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
start - page_offset(page),
(int)phy_offset, failed_bio->bi_end_io,
NULL);
- bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
+ bio->bi_opf = REQ_OP_READ | read_mode;
btrfs_debug(btrfs_sb(inode->i_sb),
"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
@@ -2790,8 +2776,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
else
contig = bio_end_sector(bio) == sector;
- if (tree->ops && tree->ops->merge_bio_hook(page, offset,
- page_size, bio, bio_flags))
+ if (tree->ops && btrfs_merge_bio_hook(page, offset, page_size,
+ bio, bio_flags))
can_merge = false;
if (prev_bio_flags != bio_flags || !contig || !can_merge ||
@@ -3422,7 +3408,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
continue;
}
- set_range_writeback(tree, cur, cur + iosize - 1);
+ btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
btrfs_err(BTRFS_I(inode)->root->fs_info,
"page %lu not writeback, cur %llu end %llu",
@@ -3538,7 +3524,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
struct btrfs_fs_info *fs_info,
struct extent_page_data *epd)
{
- unsigned long i, num_pages;
+ int i, num_pages;
int flush = 0;
int ret = 0;
@@ -3588,7 +3574,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
if (!ret)
return ret;
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
@@ -3712,13 +3698,13 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
u64 offset = eb->start;
u32 nritems;
- unsigned long i, num_pages;
+ int i, num_pages;
unsigned long start, end;
unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
int ret = 0;
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
atomic_set(&eb->io_pages, num_pages);
/* set btree blocks beyond nritems with 0 to avoid stale content. */
@@ -4643,23 +4629,20 @@ int extent_buffer_under_io(struct extent_buffer *eb)
}
/*
- * Helper for releasing extent buffer page.
+ * Release all pages attached to the extent buffer.
*/
-static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
+static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
{
- unsigned long index;
- struct page *page;
- int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+ int i;
+ int num_pages;
+ int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
BUG_ON(extent_buffer_under_io(eb));
- index = num_extent_pages(eb->start, eb->len);
- if (index == 0)
- return;
+ num_pages = num_extent_pages(eb);
+ for (i = 0; i < num_pages; i++) {
+ struct page *page = eb->pages[i];
- do {
- index--;
- page = eb->pages[index];
if (!page)
continue;
if (mapped)
@@ -4691,7 +4674,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
/* One for when we allocated the page */
put_page(page);
- } while (index != 0);
+ }
}
/*
@@ -4699,7 +4682,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
*/
static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
{
- btrfs_release_extent_buffer_page(eb);
+ btrfs_release_extent_buffer_pages(eb);
__free_extent_buffer(eb);
}
@@ -4743,10 +4726,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
{
- unsigned long i;
+ int i;
struct page *p;
struct extent_buffer *new;
- unsigned long num_pages = num_extent_pages(src->start, src->len);
+ int num_pages = num_extent_pages(src);
new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
if (new == NULL)
@@ -4766,7 +4749,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
}
set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
- set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
+ set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
return new;
}
@@ -4775,15 +4758,14 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len)
{
struct extent_buffer *eb;
- unsigned long num_pages;
- unsigned long i;
-
- num_pages = num_extent_pages(start, len);
+ int num_pages;
+ int i;
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
return NULL;
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
eb->pages[i] = alloc_page(GFP_NOFS);
if (!eb->pages[i])
@@ -4791,7 +4773,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
}
set_extent_buffer_uptodate(eb);
btrfs_set_header_nritems(eb, 0);
- set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+ set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
return eb;
err:
@@ -4843,11 +4825,11 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
static void mark_extent_buffer_accessed(struct extent_buffer *eb,
struct page *accessed)
{
- unsigned long num_pages, i;
+ int num_pages, i;
check_buffer_tree_ref(eb);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
@@ -4944,8 +4926,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
{
unsigned long len = fs_info->nodesize;
- unsigned long num_pages = num_extent_pages(start, len);
- unsigned long i;
+ int num_pages;
+ int i;
unsigned long index = start >> PAGE_SHIFT;
struct extent_buffer *eb;
struct extent_buffer *exists = NULL;
@@ -4967,6 +4949,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
if (!eb)
return ERR_PTR(-ENOMEM);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
if (!p) {
@@ -5009,8 +4992,11 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
uptodate = 0;
/*
- * see below about how we avoid a nasty race with release page
- * and why we unlock later
+ * We can't unlock the pages just yet since the extent buffer
+ * hasn't been properly inserted in the radix tree, this
+ * opens a race with btree_releasepage which can free a page
+ * while we are still filling in all pages for the buffer and
+ * we could crash.
*/
}
if (uptodate)
@@ -5039,21 +5025,12 @@ again:
set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
/*
- * there is a race where release page may have
- * tried to find this extent buffer in the radix
- * but failed. It will tell the VM it is safe to
- * reclaim the, and it will clear the page private bit.
- * We must make sure to set the page private bit properly
- * after the extent buffer is in the radix tree so
- * it doesn't get lost
+ * Now it's safe to unlock the pages because any calls to
+ * btree_releasepage will correctly detect that a page belongs to a
+ * live buffer and won't free them prematurely.
*/
- SetPageChecked(eb->pages[0]);
- for (i = 1; i < num_pages; i++) {
- p = eb->pages[i];
- ClearPageChecked(p);
- unlock_page(p);
- }
- unlock_page(eb->pages[0]);
+ for (i = 0; i < num_pages; i++)
+ unlock_page(eb->pages[i]);
return eb;
free_eb:
@@ -5075,9 +5052,10 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
__free_extent_buffer(eb);
}
-/* Expects to have eb->eb_lock already held */
static int release_extent_buffer(struct extent_buffer *eb)
{
+ lockdep_assert_held(&eb->refs_lock);
+
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
@@ -5094,9 +5072,9 @@ static int release_extent_buffer(struct extent_buffer *eb)
}
/* Should be safe to release our pages at this point */
- btrfs_release_extent_buffer_page(eb);
+ btrfs_release_extent_buffer_pages(eb);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
+ if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
__free_extent_buffer(eb);
return 1;
}
@@ -5127,7 +5105,7 @@ void free_extent_buffer(struct extent_buffer *eb)
spin_lock(&eb->refs_lock);
if (atomic_read(&eb->refs) == 2 &&
- test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
+ test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))
atomic_dec(&eb->refs);
if (atomic_read(&eb->refs) == 2 &&
@@ -5159,11 +5137,11 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
void clear_extent_buffer_dirty(struct extent_buffer *eb)
{
- unsigned long i;
- unsigned long num_pages;
+ int i;
+ int num_pages;
struct page *page;
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
@@ -5189,15 +5167,15 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
int set_extent_buffer_dirty(struct extent_buffer *eb)
{
- unsigned long i;
- unsigned long num_pages;
+ int i;
+ int num_pages;
int was_dirty = 0;
check_buffer_tree_ref(eb);
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
WARN_ON(atomic_read(&eb->refs) == 0);
WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
@@ -5208,12 +5186,12 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
void clear_extent_buffer_uptodate(struct extent_buffer *eb)
{
- unsigned long i;
+ int i;
struct page *page;
- unsigned long num_pages;
+ int num_pages;
clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
if (page)
@@ -5223,12 +5201,12 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb)
void set_extent_buffer_uptodate(struct extent_buffer *eb)
{
- unsigned long i;
+ int i;
struct page *page;
- unsigned long num_pages;
+ int num_pages;
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
SetPageUptodate(page);
@@ -5238,13 +5216,13 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, int wait, int mirror_num)
{
- unsigned long i;
+ int i;
struct page *page;
int err;
int ret = 0;
int locked_pages = 0;
int all_uptodate = 1;
- unsigned long num_pages;
+ int num_pages;
unsigned long num_reads = 0;
struct bio *bio = NULL;
unsigned long bio_flags = 0;
@@ -5252,7 +5230,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
if (wait == WAIT_NONE) {
@@ -5576,11 +5554,11 @@ void copy_extent_buffer_full(struct extent_buffer *dst,
struct extent_buffer *src)
{
int i;
- unsigned num_pages;
+ int num_pages;
ASSERT(dst->len == src->len);
- num_pages = num_extent_pages(dst->start, dst->len);
+ num_pages = num_extent_pages(dst);
for (i = 0; i < num_pages; i++)
copy_page(page_address(dst->pages[i]),
page_address(src->pages[i]));
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0bfd4aeb822d..b4d03e677e1d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -46,7 +46,7 @@
#define EXTENT_BUFFER_STALE 6
#define EXTENT_BUFFER_WRITEBACK 7
#define EXTENT_BUFFER_READ_ERR 8 /* read IO error */
-#define EXTENT_BUFFER_DUMMY 9
+#define EXTENT_BUFFER_UNMAPPED 9
#define EXTENT_BUFFER_IN_TREE 10
#define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */
@@ -92,9 +92,6 @@ typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
-typedef blk_status_t (extent_submit_bio_done_t)(void *private_data,
- struct bio *bio, int mirror_num);
-
struct extent_io_ops {
/*
* The following callbacks must be allways defined, the function
@@ -104,12 +101,7 @@ struct extent_io_ops {
int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int mirror);
- int (*merge_bio_hook)(struct page *page, unsigned long offset,
- size_t size, struct bio *bio,
- unsigned long bio_flags);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
- struct btrfs_fs_info *(*tree_fs_info)(void *private_data);
- void (*set_range_writeback)(void *private_data, u64 start, u64 end);
/*
* Optional hooks, called if the pointer is not NULL
@@ -440,10 +432,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
int mirror_num);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
-static inline unsigned long num_extent_pages(u64 start, u64 len)
+static inline int num_extent_pages(const struct extent_buffer *eb)
{
- return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) -
- (start >> PAGE_SHIFT);
+ return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) -
+ (eb->start >> PAGE_SHIFT);
}
static inline void extent_buffer_get(struct extent_buffer *eb)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f9dd6d1836a3..ba74827beb32 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -922,7 +922,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
const bool new_inline,
struct extent_map *em)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf = path->nodes[0];
const int slot = path->slots[0];
@@ -942,7 +942,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
btrfs_file_extent_num_bytes(leaf, fi);
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
- size = btrfs_file_extent_inline_len(leaf, slot, fi);
+ size = btrfs_file_extent_ram_bytes(leaf, fi);
extent_end = ALIGN(extent_start + size,
fs_info->sectorsize);
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 51e77d72068a..2be00e873e92 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -5,14 +5,11 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
-#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
-#include <linux/mpage.h>
#include <linux/falloc.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
#include <linux/slab.h>
@@ -83,7 +80,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct inode_defrag *entry;
struct rb_node **p;
struct rb_node *parent = NULL;
@@ -135,8 +132,8 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct inode_defrag *defrag;
u64 transid;
int ret;
@@ -185,7 +182,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret;
if (!__need_auto_defrag(fs_info))
@@ -833,8 +830,7 @@ next_slot:
btrfs_file_extent_num_bytes(leaf, fi);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = key.offset +
- btrfs_file_extent_inline_len(leaf,
- path->slots[0], fi);
+ btrfs_file_extent_ram_bytes(leaf, fi);
} else {
/* can't happen */
BUG();
@@ -1133,7 +1129,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 start, u64 end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct btrfs_path *path;
@@ -1470,7 +1466,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
u64 *lockstart, u64 *lockend,
struct extent_state **cached_state)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 start_pos;
u64 last_pos;
int i;
@@ -1526,7 +1522,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
struct btrfs_ordered_extent *ordered;
u64 lockstart, lockend;
@@ -1569,10 +1565,11 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
return ret;
}
-static noinline ssize_t __btrfs_buffered_write(struct file *file,
- struct iov_iter *i,
- loff_t pos)
+static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
+ struct iov_iter *i)
{
+ struct file *file = iocb->ki_filp;
+ loff_t pos = iocb->ki_pos;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1804,7 +1801,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- loff_t pos = iocb->ki_pos;
+ loff_t pos;
ssize_t written;
ssize_t written_buffered;
loff_t endbyte;
@@ -1815,8 +1812,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (written < 0 || !iov_iter_count(from))
return written;
- pos += written;
- written_buffered = __btrfs_buffered_write(file, from, pos);
+ pos = iocb->ki_pos;
+ written_buffered = btrfs_buffered_write(iocb, from);
if (written_buffered < 0) {
err = written_buffered;
goto out;
@@ -1953,7 +1950,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_DIRECT) {
num_written = __btrfs_direct_write(iocb, from);
} else {
- num_written = __btrfs_buffered_write(file, from, pos);
+ num_written = btrfs_buffered_write(iocb, from);
if (num_written > 0)
iocb->ki_pos = pos + num_written;
if (clean_page)
@@ -2042,7 +2039,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
int ret = 0, err;
- bool full_sync = false;
u64 len;
/*
@@ -2066,96 +2062,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
inode_lock(inode);
atomic_inc(&root->log_batch);
- full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+
/*
- * We might have have had more pages made dirty after calling
- * start_ordered_ops and before acquiring the inode's i_mutex.
+ * We have to do this here to avoid the priority inversion of waiting on
+ * IO of a lower priority task while holding a transaciton open.
*/
- if (full_sync) {
- /*
- * For a full sync, we need to make sure any ordered operations
- * start and finish before we start logging the inode, so that
- * all extents are persisted and the respective file extent
- * items are in the fs/subvol btree.
- */
- ret = btrfs_wait_ordered_range(inode, start, len);
- } else {
- /*
- * Start any new ordered operations before starting to log the
- * inode. We will wait for them to finish in btrfs_sync_log().
- *
- * Right before acquiring the inode's mutex, we might have new
- * writes dirtying pages, which won't immediately start the
- * respective ordered operations - that is done through the
- * fill_delalloc callbacks invoked from the writepage and
- * writepages address space operations. So make sure we start
- * all ordered operations before starting to log our inode. Not
- * doing this means that while logging the inode, writeback
- * could start and invoke writepage/writepages, which would call
- * the fill_delalloc callbacks (cow_file_range,
- * submit_compressed_extents). These callbacks add first an
- * extent map to the modified list of extents and then create
- * the respective ordered operation, which means in
- * tree-log.c:btrfs_log_inode() we might capture all existing
- * ordered operations (with btrfs_get_logged_extents()) before
- * the fill_delalloc callback adds its ordered operation, and by
- * the time we visit the modified list of extent maps (with
- * btrfs_log_changed_extents()), we see and process the extent
- * map they created. We then use the extent map to construct a
- * file extent item for logging without waiting for the
- * respective ordered operation to finish - this file extent
- * item points to a disk location that might not have yet been
- * written to, containing random data - so after a crash a log
- * replay will make our inode have file extent items that point
- * to disk locations containing invalid data, as we returned
- * success to userspace without waiting for the respective
- * ordered operation to finish, because it wasn't captured by
- * btrfs_get_logged_extents().
- */
- ret = start_ordered_ops(inode, start, end);
- }
+ ret = btrfs_wait_ordered_range(inode, start, len);
if (ret) {
inode_unlock(inode);
goto out;
}
atomic_inc(&root->log_batch);
- /*
- * If the last transaction that changed this file was before the current
- * transaction and we have the full sync flag set in our inode, we can
- * bail out now without any syncing.
- *
- * Note that we can't bail out if the full sync flag isn't set. This is
- * because when the full sync flag is set we start all ordered extents
- * and wait for them to fully complete - when they complete they update
- * the inode's last_trans field through:
- *
- * btrfs_finish_ordered_io() ->
- * btrfs_update_inode_fallback() ->
- * btrfs_update_inode() ->
- * btrfs_set_inode_last_trans()
- *
- * So we are sure that last_trans is up to date and can do this check to
- * bail out safely. For the fast path, when the full sync flag is not
- * set in our inode, we can not do it because we start only our ordered
- * extents and don't wait for them to complete (that is when
- * btrfs_finish_ordered_io runs), so here at this point their last_trans
- * value might be less than or equals to fs_info->last_trans_committed,
- * and setting a speculative last_trans for an inode when a buffered
- * write is made (such as fs_info->generation + 1 for example) would not
- * be reliable since after setting the value and before fsync is called
- * any number of transactions can start and commit (transaction kthread
- * commits the current transaction periodically), and a transaction
- * commit does not start nor waits for ordered extents to complete.
- */
smp_mb();
if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
- (full_sync && BTRFS_I(inode)->last_trans <=
- fs_info->last_trans_committed) ||
- (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
- BTRFS_I(inode)->last_trans
- <= fs_info->last_trans_committed)) {
+ BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) {
/*
* We've had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
@@ -2239,13 +2160,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
}
- if (!full_sync) {
- ret = btrfs_wait_ordered_range(inode, start, len);
- if (ret) {
- btrfs_end_transaction(trans);
- goto out;
- }
- }
ret = btrfs_commit_transaction(trans);
} else {
ret = btrfs_end_transaction(trans);
@@ -2310,7 +2224,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path, u64 offset, u64 end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d5f80cb300be..0adf38b00fa0 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -71,10 +71,6 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
inode = btrfs_iget(fs_info->sb, &location, root, NULL);
if (IS_ERR(inode))
return inode;
- if (is_bad_inode(inode)) {
- iput(inode);
- return ERR_PTR(-ENOENT);
- }
mapping_set_gfp_mask(inode->i_mapping,
mapping_gfp_constraint(inode->i_mapping,
@@ -300,9 +296,9 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
check_crcs = 1;
- /* Make sure we can fit our crcs into the first page */
+ /* Make sure we can fit our crcs and generation into the first page */
if (write && check_crcs &&
- (num_pages * sizeof(u32)) >= PAGE_SIZE)
+ (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
return -ENOSPC;
memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
@@ -547,7 +543,7 @@ static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
io_ctl_map_page(io_ctl, 0);
}
- memcpy(io_ctl->cur, bitmap, PAGE_SIZE);
+ copy_page(io_ctl->cur, bitmap);
io_ctl_set_crc(io_ctl, io_ctl->index - 1);
if (io_ctl->index < io_ctl->num_pages)
io_ctl_map_page(io_ctl, 0);
@@ -607,7 +603,7 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
if (ret)
return ret;
- memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE);
+ copy_page(entry->bitmap, io_ctl->cur);
io_ctl_unmap_page(io_ctl);
return 0;
@@ -655,7 +651,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_path *path, u64 offset)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
struct btrfs_io_ctl io_ctl;
@@ -1123,13 +1119,10 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
{
int ret;
struct inode *inode = io_ctl->inode;
- struct btrfs_fs_info *fs_info;
if (!inode)
return 0;
- fs_info = btrfs_sb(inode->i_sb);
-
/* Flush the dirty pages in the cache file. */
ret = flush_dirty_cache(inode);
if (ret)
@@ -1145,7 +1138,7 @@ out:
BTRFS_I(inode)->generation = 0;
if (block_group) {
#ifdef DEBUG
- btrfs_err(fs_info,
+ btrfs_err(root->fs_info,
"failed to write free space cache for block group %llu",
block_group->key.objectid);
#endif
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index b5950aacd697..d6736595ec57 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1236,7 +1236,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
if (ret)
goto abort;
- ret = btrfs_del_root(trans, fs_info, &free_space_root->root_key);
+ ret = btrfs_del_root(trans, &free_space_root->root_key);
if (ret)
goto abort;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 12fcd8897c33..ffca2abf13d0 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -3,7 +3,6 @@
* Copyright (C) 2007 Oracle. All rights reserved.
*/
-#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/pagemap.h>
@@ -244,8 +243,6 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
return;
while (1) {
- bool add_to_ctl = true;
-
spin_lock(rbroot_lock);
n = rb_first(rbroot);
if (!n) {
@@ -257,15 +254,14 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
BUG_ON(info->bitmap); /* Logic error */
if (info->offset > root->ino_cache_progress)
- add_to_ctl = false;
- else if (info->offset + info->bytes > root->ino_cache_progress)
- count = root->ino_cache_progress - info->offset + 1;
+ count = 0;
else
- count = info->bytes;
+ count = min(root->ino_cache_progress - info->offset + 1,
+ info->bytes);
rb_erase(&info->offset_index, rbroot);
spin_unlock(rbroot_lock);
- if (add_to_ctl)
+ if (count)
__btrfs_add_free_space(root->fs_info, ctl,
info->offset, count);
kmem_cache_free(btrfs_free_space_cachep, info);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eba61bcb9bb3..9357a19d2bff 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -14,17 +14,13 @@
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
-#include <linux/bit_spinlock.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
-#include <linux/mount.h>
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
@@ -1443,8 +1439,7 @@ next_slot:
nocow = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset +
- btrfs_file_extent_inline_len(leaf,
- path->slots[0], fi);
+ btrfs_file_extent_ram_bytes(leaf, fi);
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
} else {
@@ -1752,7 +1747,7 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
void __btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = root->fs_info;
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
@@ -1903,8 +1898,8 @@ static void btrfs_clear_bit_hook(void *private_data,
}
/*
- * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
- * we don't create bios that span stripes or chunks
+ * Merge bio hook, this must check the chunk tree to make sure we don't create
+ * bios that span stripes or chunks
*
* return 1 if page cannot be merged to bio
* return 0 if page can be merged to bio
@@ -1962,7 +1957,7 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
+blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num)
{
struct inode *inode = private_data;
@@ -2035,8 +2030,7 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
/* we're doing a write, do the async checksumming */
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
bio_offset, inode,
- btrfs_submit_bio_start,
- btrfs_submit_bio_done);
+ btrfs_submit_bio_start);
goto out;
} else if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@ -3610,18 +3604,15 @@ static int btrfs_read_locked_inode(struct inode *inode)
filled = true;
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto make_bad;
- }
+ if (!path)
+ return -ENOMEM;
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
if (ret) {
- if (ret > 0)
- ret = -ENOENT;
- goto make_bad;
+ btrfs_free_path(path);
+ return ret;
}
leaf = path->nodes[0];
@@ -3774,11 +3765,6 @@ cache_acl:
btrfs_sync_inode_flags_to_i_flags(inode);
return 0;
-
-make_bad:
- btrfs_free_path(path);
- make_bad_inode(inode);
- return ret;
}
/*
@@ -3984,7 +3970,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
goto err;
}
skip_backref:
- ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index);
+ ret = btrfs_delete_delayed_dir_index(trans, dir, index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto err;
@@ -4087,11 +4073,10 @@ out:
}
static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, u64 objectid,
- const char *name, int name_len)
+ struct inode *dir, u64 objectid,
+ const char *name, int name_len)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
@@ -4124,9 +4109,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = btrfs_del_root_ref(trans, fs_info, objectid,
- root->root_key.objectid, dir_ino,
- &index, name, name_len);
+ ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
+ dir_ino, &index, name, name_len);
if (ret < 0) {
if (ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
@@ -4145,12 +4129,11 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- btrfs_release_path(path);
index = key.offset;
}
btrfs_release_path(path);
- ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index);
+ ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -4243,9 +4226,9 @@ again:
prev = node;
entry = rb_entry(node, struct btrfs_inode, rb_node);
- if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ if (objectid < btrfs_ino(entry))
node = node->rb_left;
- else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ else if (objectid > btrfs_ino(entry))
node = node->rb_right;
else
break;
@@ -4253,7 +4236,7 @@ again:
if (!node) {
while (prev) {
entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
+ if (objectid <= btrfs_ino(entry)) {
node = prev;
break;
}
@@ -4262,7 +4245,7 @@ again:
}
while (node) {
entry = rb_entry(node, struct btrfs_inode, rb_node);
- objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
+ objectid = btrfs_ino(entry) + 1;
inode = igrab(&entry->vfs_inode);
if (inode) {
spin_unlock(&root->inode_lock);
@@ -4343,10 +4326,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
- ret = btrfs_unlink_subvol(trans, root, dir,
- dest->root_key.objectid,
- dentry->d_name.name,
- dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
+ dentry->d_name.name, dentry->d_name.len);
if (ret) {
err = ret;
btrfs_abort_transaction(trans, ret);
@@ -4441,7 +4422,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
return PTR_ERR(trans);
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
- err = btrfs_unlink_subvol(trans, root, dir,
+ err = btrfs_unlink_subvol(trans, dir,
BTRFS_I(inode)->location.objectid,
dentry->d_name.name,
dentry->d_name.len);
@@ -4643,8 +4624,8 @@ search_again:
BTRFS_I(inode), leaf, fi,
found_key.offset);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- item_end += btrfs_file_extent_inline_len(leaf,
- path->slots[0], fi);
+ item_end += btrfs_file_extent_ram_bytes(leaf,
+ fi);
trace_btrfs_truncate_show_fi_inline(
BTRFS_I(inode), leaf, fi, path->slots[0],
@@ -5615,9 +5596,9 @@ static void inode_tree_add(struct inode *inode)
parent = *p;
entry = rb_entry(parent, struct btrfs_inode, rb_node);
- if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ if (ino < btrfs_ino(entry))
p = &parent->rb_left;
- else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ else if (ino > btrfs_ino(entry))
p = &parent->rb_right;
else {
WARN_ON(!(entry->vfs_inode.i_state &
@@ -5708,16 +5689,21 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
int ret;
ret = btrfs_read_locked_inode(inode);
- if (!is_bad_inode(inode)) {
+ if (!ret) {
inode_tree_add(inode);
unlock_new_inode(inode);
if (new)
*new = 1;
} else {
- unlock_new_inode(inode);
- iput(inode);
- ASSERT(ret < 0);
- inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
+ iget_failed(inode);
+ /*
+ * ret > 0 can come from btrfs_search_slot called by
+ * btrfs_read_locked_inode, this means the inode item
+ * was not found.
+ */
+ if (ret > 0)
+ ret = -ENOENT;
+ inode = ERR_PTR(ret);
}
}
@@ -5745,7 +5731,7 @@ static struct inode *new_simple_dir(struct super_block *s,
inode->i_mtime = current_time(inode);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
- BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime);
+ BTRFS_I(inode)->i_otime = inode->i_mtime;
return inode;
}
@@ -6027,32 +6013,6 @@ err:
return ret;
}
-int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- int ret = 0;
- bool nolock = false;
-
- if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
- return 0;
-
- if (btrfs_fs_closing(root->fs_info) &&
- btrfs_is_free_space_inode(BTRFS_I(inode)))
- nolock = true;
-
- if (wbc->sync_mode == WB_SYNC_ALL) {
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans);
- }
- return ret;
-}
-
/*
* This is somewhat expensive, updating the tree every time the
* inode changes. But, it is most likely to find the inode in cache.
@@ -6335,8 +6295,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
location->type = BTRFS_INODE_ITEM_KEY;
ret = btrfs_insert_inode_locked(inode);
- if (ret < 0)
+ if (ret < 0) {
+ iput(inode);
goto fail;
+ }
path->leave_spinning = 1;
ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
@@ -6349,7 +6311,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_mtime = current_time(inode);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
- BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime);
+ BTRFS_I(inode)->i_otime = inode->i_mtime;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
@@ -6395,12 +6357,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
return inode;
fail_unlock:
- unlock_new_inode(inode);
+ discard_new_inode(inode);
fail:
if (dir && name)
BTRFS_I(dir)->index_cnt--;
btrfs_free_path(path);
- iput(inode);
return ERR_PTR(ret);
}
@@ -6419,7 +6380,6 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const char *name, int name_len, int add_backref, u64 index)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
int ret = 0;
struct btrfs_key key;
struct btrfs_root *root = parent_inode->root;
@@ -6435,7 +6395,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
}
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
- ret = btrfs_add_root_ref(trans, fs_info, key.objectid,
+ ret = btrfs_add_root_ref(trans, key.objectid,
root->root_key.objectid, parent_ino,
index, name, name_len);
} else if (add_backref) {
@@ -6471,7 +6431,7 @@ fail_dir_item:
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
u64 local_index;
int err;
- err = btrfs_del_root_ref(trans, fs_info, key.objectid,
+ err = btrfs_del_root_ref(trans, key.objectid,
root->root_key.objectid, parent_ino,
&local_index, name, name_len);
@@ -6505,7 +6465,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = NULL;
int err;
- int drop_inode = 0;
u64 objectid;
u64 index = 0;
@@ -6527,6 +6486,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
+ inode = NULL;
goto out_unlock;
}
@@ -6541,31 +6501,24 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
0, index);
- if (err) {
- goto out_unlock_inode;
- } else {
- btrfs_update_inode(trans, root, inode);
- d_instantiate_new(dentry, inode);
- }
+ if (err)
+ goto out_unlock;
+
+ btrfs_update_inode(trans, root, inode);
+ d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
- if (drop_inode) {
+ if (err && inode) {
inode_dec_link_count(inode);
- iput(inode);
+ discard_new_inode(inode);
}
return err;
-
-out_unlock_inode:
- drop_inode = 1;
- unlock_new_inode(inode);
- goto out_unlock;
-
}
static int btrfs_create(struct inode *dir, struct dentry *dentry,
@@ -6575,7 +6528,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = NULL;
- int drop_inode_on_err = 0;
int err;
u64 objectid;
u64 index = 0;
@@ -6598,9 +6550,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
+ inode = NULL;
goto out_unlock;
}
- drop_inode_on_err = 1;
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
@@ -6613,33 +6565,28 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
err = btrfs_update_inode(trans, root, inode);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
0, index);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
- if (err && drop_inode_on_err) {
+ if (err && inode) {
inode_dec_link_count(inode);
- iput(inode);
+ discard_new_inode(inode);
}
btrfs_btree_balance_dirty(fs_info);
return err;
-
-out_unlock_inode:
- unlock_new_inode(inode);
- goto out_unlock;
-
}
static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
@@ -6748,6 +6695,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
S_IFDIR | mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
+ inode = NULL;
goto out_fail;
}
@@ -6758,34 +6706,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_fail_inode;
+ goto out_fail;
btrfs_i_size_write(BTRFS_I(inode), 0);
err = btrfs_update_inode(trans, root, inode);
if (err)
- goto out_fail_inode;
+ goto out_fail;
err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
dentry->d_name.name,
dentry->d_name.len, 0, index);
if (err)
- goto out_fail_inode;
+ goto out_fail;
d_instantiate_new(dentry, inode);
drop_on_err = 0;
out_fail:
btrfs_end_transaction(trans);
- if (drop_on_err) {
+ if (err && inode) {
inode_dec_link_count(inode);
- iput(inode);
+ discard_new_inode(inode);
}
btrfs_btree_balance_dirty(fs_info);
return err;
-
-out_fail_inode:
- unlock_new_inode(inode);
- goto out_fail;
}
static noinline int uncompress_inline(struct btrfs_path *path,
@@ -6847,7 +6791,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
size_t pg_offset, u64 start, u64 len,
int create)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret;
int err = 0;
u64 extent_start = 0;
@@ -6943,7 +6887,8 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
extent_start);
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
- size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
+
+ size = btrfs_file_extent_ram_bytes(leaf, item);
extent_end = ALIGN(extent_start + size,
fs_info->sectorsize);
@@ -6994,7 +6939,7 @@ next:
if (new_inline)
goto out;
- size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
+ size = btrfs_file_extent_ram_bytes(leaf, item);
extent_offset = page_offset(page) + pg_offset - extent_start;
copy_size = min_t(u64, PAGE_SIZE - pg_offset,
size - extent_offset);
@@ -7865,7 +7810,7 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
isector >>= inode->i_sb->s_blocksize_bits;
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
pgoff, isector, repair_endio, repair_arg);
- bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
+ bio->bi_opf = REQ_OP_READ | read_mode;
btrfs_debug(BTRFS_I(inode)->root->fs_info,
"repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
@@ -8299,8 +8244,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
if (write && async_submit) {
ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
file_offset, inode,
- btrfs_submit_bio_start_direct_io,
- btrfs_submit_bio_done);
+ btrfs_submit_bio_start_direct_io);
goto err;
} else if (write) {
/*
@@ -9540,8 +9484,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, root, old_dir,
- root_objectid,
+ ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else { /* src is an inode */
@@ -9560,8 +9503,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* dest is a subvolume */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, dest, new_dir,
- root_objectid,
+ ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
new_dentry->d_name.name,
new_dentry->d_name.len);
} else { /* dest is an inode */
@@ -9821,7 +9763,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
+ ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else {
@@ -9843,8 +9785,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
root_objectid = BTRFS_I(new_inode)->location.objectid;
- ret = btrfs_unlink_subvol(trans, dest, new_dir,
- root_objectid,
+ ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
new_dentry->d_name.name,
new_dentry->d_name.len);
BUG_ON(new_inode->i_nlink == 0);
@@ -10115,7 +10056,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
struct btrfs_key key;
struct inode *inode = NULL;
int err;
- int drop_inode = 0;
u64 objectid;
u64 index = 0;
int name_len;
@@ -10148,6 +10088,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
objectid, S_IFLNK|S_IRWXUGO, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
+ inode = NULL;
goto out_unlock;
}
@@ -10164,12 +10105,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
- goto out_unlock_inode;
+ goto out_unlock;
}
key.objectid = btrfs_ino(BTRFS_I(inode));
key.offset = 0;
@@ -10179,7 +10120,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
datasize);
if (err) {
btrfs_free_path(path);
- goto out_unlock_inode;
+ goto out_unlock;
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -10211,26 +10152,19 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (!err)
err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
BTRFS_I(inode), 0, index);
- if (err) {
- drop_inode = 1;
- goto out_unlock_inode;
- }
+ if (err)
+ goto out_unlock;
d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
- if (drop_inode) {
+ if (err && inode) {
inode_dec_link_count(inode);
- iput(inode);
+ discard_new_inode(inode);
}
btrfs_btree_balance_dirty(fs_info);
return err;
-
-out_unlock_inode:
- drop_inode = 1;
- unlock_new_inode(inode);
- goto out_unlock;
}
static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -10439,14 +10373,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = btrfs_init_inode_security(trans, inode, dir, NULL);
if (ret)
- goto out_inode;
+ goto out;
ret = btrfs_update_inode(trans, root, inode);
if (ret)
- goto out_inode;
+ goto out;
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret)
- goto out_inode;
+ goto out;
/*
* We set number of links to 0 in btrfs_new_inode(), and here we set
@@ -10456,21 +10390,15 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
* d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
*/
set_nlink(inode, 1);
- unlock_new_inode(inode);
d_tmpfile(dentry, inode);
+ unlock_new_inode(inode);
mark_inode_dirty(inode);
-
out:
btrfs_end_transaction(trans);
- if (ret)
- iput(inode);
+ if (ret && inode)
+ discard_new_inode(inode);
btrfs_btree_balance_dirty(fs_info);
return ret;
-
-out_inode:
- unlock_new_inode(inode);
- goto out;
-
}
__attribute__((const))
@@ -10479,12 +10407,6 @@ static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
return -EAGAIN;
}
-static struct btrfs_fs_info *iotree_fs_info(void *private_data)
-{
- struct inode *inode = private_data;
- return btrfs_sb(inode->i_sb);
-}
-
static void btrfs_check_extent_io_range(void *private_data, const char *caller,
u64 start, u64 end)
{
@@ -10499,9 +10421,9 @@ static void btrfs_check_extent_io_range(void *private_data, const char *caller,
}
}
-void btrfs_set_range_writeback(void *private_data, u64 start, u64 end)
+void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
- struct inode *inode = private_data;
+ struct inode *inode = tree->private_data;
unsigned long index = start >> PAGE_SHIFT;
unsigned long end_index = end >> PAGE_SHIFT;
struct page *page;
@@ -10557,10 +10479,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
/* mandatory callbacks */
.submit_bio_hook = btrfs_submit_bio_hook,
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
- .merge_bio_hook = btrfs_merge_bio_hook,
.readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
- .tree_fs_info = iotree_fs_info,
- .set_range_writeback = btrfs_set_range_writeback,
/* optional callbacks */
.fill_delalloc = run_delalloc_range,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b077544b5232..d3a5d2a41e5f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5,23 +5,18 @@
#include <linux/kernel.h>
#include <linux/bio.h>
-#include <linux/buffer_head.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
-#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mount.h>
-#include <linux/mpage.h>
#include <linux/namei.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
-#include <linux/bit_spinlock.h>
#include <linux/security.h>
#include <linux/xattr.h>
#include <linux/mm.h>
@@ -606,7 +601,7 @@ static noinline int create_subvol(struct inode *dir,
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
- ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit);
+ ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
if (ret)
goto fail;
@@ -616,14 +611,6 @@ static noinline int create_subvol(struct inode *dir,
goto fail;
}
- memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(leaf, leaf->start);
- btrfs_set_header_generation(leaf, trans->transid);
- btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(leaf, objectid);
-
- write_extent_buffer_fsid(leaf, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
btrfs_mark_buffer_dirty(leaf);
inode_item = &root_item->inode;
@@ -711,8 +698,7 @@ static noinline int create_subvol(struct inode *dir,
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
- ret = btrfs_add_root_ref(trans, fs_info,
- objectid, root->root_key.objectid,
+ ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
btrfs_ino(BTRFS_I(dir)), index, name, namelen);
BUG_ON(ret);
@@ -2507,8 +2493,8 @@ out:
static noinline int btrfs_ioctl_ino_lookup(struct file *file,
void __user *argp)
{
- struct btrfs_ioctl_ino_lookup_args *args;
- struct inode *inode;
+ struct btrfs_ioctl_ino_lookup_args *args;
+ struct inode *inode;
int ret = 0;
args = memdup_user(argp, sizeof(*args));
@@ -2941,8 +2927,14 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = btrfs_defrag_root(root);
break;
case S_IFREG:
- if (!(file->f_mode & FMODE_WRITE)) {
- ret = -EINVAL;
+ /*
+ * Note that this does not check the file descriptor for write
+ * access. This prevents defragmenting executables that are
+ * running and allows defrag on files open in read-only mode.
+ */
+ if (!capable(CAP_SYS_ADMIN) &&
+ inode_permission(inode, MAY_WRITE)) {
+ ret = -EPERM;
goto out;
}
@@ -3165,10 +3157,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
di_args->total_bytes = btrfs_device_get_total_bytes(dev);
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
if (dev->name) {
- struct rcu_string *name;
-
- name = rcu_dereference(dev->name);
- strncpy(di_args->path, name->str, sizeof(di_args->path) - 1);
+ strncpy(di_args->path, rcu_str_deref(dev->name),
+ sizeof(di_args->path) - 1);
di_args->path[sizeof(di_args->path) - 1] = 0;
} else {
di_args->path[0] = '\0';
@@ -5118,9 +5108,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_quota_ctl_args *sa;
- struct btrfs_trans_handle *trans = NULL;
int ret;
- int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -5136,28 +5124,19 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
}
down_write(&fs_info->subvol_sem);
- trans = btrfs_start_transaction(fs_info->tree_root, 2);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
- ret = btrfs_quota_enable(trans, fs_info);
+ ret = btrfs_quota_enable(fs_info);
break;
case BTRFS_QUOTA_CTL_DISABLE:
- ret = btrfs_quota_disable(trans, fs_info);
+ ret = btrfs_quota_disable(fs_info);
break;
default:
ret = -EINVAL;
break;
}
- err = btrfs_commit_transaction(trans);
- if (err && !ret)
- ret = err;
-out:
kfree(sa);
up_write(&fs_info->subvol_sem);
drop_write:
@@ -5195,15 +5174,13 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
}
if (sa->assign) {
- ret = btrfs_add_qgroup_relation(trans, fs_info,
- sa->src, sa->dst);
+ ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst);
} else {
- ret = btrfs_del_qgroup_relation(trans, fs_info,
- sa->src, sa->dst);
+ ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst);
}
/* update qgroup status and info */
- err = btrfs_run_qgroups(trans, fs_info);
+ err = btrfs_run_qgroups(trans);
if (err < 0)
btrfs_handle_fs_error(fs_info, err,
"failed to update qgroup status and info");
@@ -5221,7 +5198,6 @@ drop_write:
static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_create_args *sa;
struct btrfs_trans_handle *trans;
@@ -5253,9 +5229,9 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
}
if (sa->create) {
- ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid);
+ ret = btrfs_create_qgroup(trans, sa->qgroupid);
} else {
- ret = btrfs_remove_qgroup(trans, fs_info, sa->qgroupid);
+ ret = btrfs_remove_qgroup(trans, sa->qgroupid);
}
err = btrfs_end_transaction(trans);
@@ -5272,7 +5248,6 @@ drop_write:
static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_limit_args *sa;
struct btrfs_trans_handle *trans;
@@ -5305,7 +5280,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
qgroupid = root->root_key.objectid;
}
- ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim);
+ ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim);
err = btrfs_end_transaction(trans);
if (err && !ret)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2e1a1694a33d..0c4ef208b8b9 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,7 +6,6 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
-#include <linux/pagevec.h>
#include "ctree.h"
#include "transaction.h"
#include "btrfs_inode.h"
@@ -421,129 +420,6 @@ out:
return ret == 0;
}
-/* Needs to either be called under a log transaction or the log_mutex */
-void btrfs_get_logged_extents(struct btrfs_inode *inode,
- struct list_head *logged_list,
- const loff_t start,
- const loff_t end)
-{
- struct btrfs_ordered_inode_tree *tree;
- struct btrfs_ordered_extent *ordered;
- struct rb_node *n;
- struct rb_node *prev;
-
- tree = &inode->ordered_tree;
- spin_lock_irq(&tree->lock);
- n = __tree_search(&tree->tree, end, &prev);
- if (!n)
- n = prev;
- for (; n; n = rb_prev(n)) {
- ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
- if (ordered->file_offset > end)
- continue;
- if (entry_end(ordered) <= start)
- break;
- if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
- continue;
- list_add(&ordered->log_list, logged_list);
- refcount_inc(&ordered->refs);
- }
- spin_unlock_irq(&tree->lock);
-}
-
-void btrfs_put_logged_extents(struct list_head *logged_list)
-{
- struct btrfs_ordered_extent *ordered;
-
- while (!list_empty(logged_list)) {
- ordered = list_first_entry(logged_list,
- struct btrfs_ordered_extent,
- log_list);
- list_del_init(&ordered->log_list);
- btrfs_put_ordered_extent(ordered);
- }
-}
-
-void btrfs_submit_logged_extents(struct list_head *logged_list,
- struct btrfs_root *log)
-{
- int index = log->log_transid % 2;
-
- spin_lock_irq(&log->log_extents_lock[index]);
- list_splice_tail(logged_list, &log->logged_list[index]);
- spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
-void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *log, u64 transid)
-{
- struct btrfs_ordered_extent *ordered;
- int index = transid % 2;
-
- spin_lock_irq(&log->log_extents_lock[index]);
- while (!list_empty(&log->logged_list[index])) {
- struct inode *inode;
- ordered = list_first_entry(&log->logged_list[index],
- struct btrfs_ordered_extent,
- log_list);
- list_del_init(&ordered->log_list);
- inode = ordered->inode;
- spin_unlock_irq(&log->log_extents_lock[index]);
-
- if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
- u64 start = ordered->file_offset;
- u64 end = ordered->file_offset + ordered->len - 1;
-
- WARN_ON(!inode);
- filemap_fdatawrite_range(inode->i_mapping, start, end);
- }
- wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
- &ordered->flags));
-
- /*
- * In order to keep us from losing our ordered extent
- * information when committing the transaction we have to make
- * sure that any logged extents are completed when we go to
- * commit the transaction. To do this we simply increase the
- * current transactions pending_ordered counter and decrement it
- * when the ordered extent completes.
- */
- if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
- struct btrfs_ordered_inode_tree *tree;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- spin_lock_irq(&tree->lock);
- if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
- set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
- atomic_inc(&trans->transaction->pending_ordered);
- }
- spin_unlock_irq(&tree->lock);
- }
- btrfs_put_ordered_extent(ordered);
- spin_lock_irq(&log->log_extents_lock[index]);
- }
- spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
-void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
-{
- struct btrfs_ordered_extent *ordered;
- int index = transid % 2;
-
- spin_lock_irq(&log->log_extents_lock[index]);
- while (!list_empty(&log->logged_list[index])) {
- ordered = list_first_entry(&log->logged_list[index],
- struct btrfs_ordered_extent,
- log_list);
- list_del_init(&ordered->log_list);
- spin_unlock_irq(&log->log_extents_lock[index]);
- btrfs_put_ordered_extent(ordered);
- spin_lock_irq(&log->log_extents_lock[index]);
- }
- spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
/*
* used to drop a reference on an ordered extent. This will free
* the extent if the last reference is dropped
@@ -913,20 +789,6 @@ out:
return entry;
}
-bool btrfs_have_ordered_extents_in_range(struct inode *inode,
- u64 file_offset,
- u64 len)
-{
- struct btrfs_ordered_extent *oe;
-
- oe = btrfs_lookup_ordered_range(BTRFS_I(inode), file_offset, len);
- if (oe) {
- btrfs_put_ordered_extent(oe);
- return true;
- }
- return false;
-}
-
/*
* lookup and return any extent before 'file_offset'. NULL is returned
* if none is found
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 3be443fb3001..02d813aaa261 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -54,15 +54,11 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
* has done its due diligence in updating
* the isize. */
-#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
- ordered extent */
-#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
+#define BTRFS_ORDERED_TRUNCATED 8 /* Set when we have to truncate an extent */
-#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
- * in the logging code. */
-#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
+#define BTRFS_ORDERED_PENDING 9 /* We are waiting for this ordered extent to
* complete in the current transaction. */
-#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */
+#define BTRFS_ORDERED_REGULAR 10 /* Regular IO for COW */
struct btrfs_ordered_extent {
/* logical offset in the file */
@@ -182,9 +178,6 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
struct btrfs_inode *inode,
u64 file_offset,
u64 len);
-bool btrfs_have_ordered_extents_in_range(struct inode *inode,
- u64 file_offset,
- u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
@@ -193,16 +186,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len);
u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len);
-void btrfs_get_logged_extents(struct btrfs_inode *inode,
- struct list_head *logged_list,
- const loff_t start,
- const loff_t end);
-void btrfs_put_logged_extents(struct list_head *logged_list);
-void btrfs_submit_logged_extents(struct list_head *logged_list,
- struct btrfs_root *log);
-void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *log, u64 transid);
-void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
int __init ordered_data_init(void);
void __cold ordered_data_exit(void);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index a4e11cf04671..df49931ffe92 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -52,17 +52,9 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
u64 offset;
int ref_index = 0;
- if (item_size < sizeof(*ei)) {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- struct btrfs_extent_item_v0 *ei0;
- BUG_ON(item_size != sizeof(*ei0));
- ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
- pr_info("\t\textent refs %u\n",
- btrfs_extent_refs_v0(eb, ei0));
- return;
-#else
- BUG();
-#endif
+ if (unlikely(item_size < sizeof(*ei))) {
+ btrfs_print_v0_err(eb->fs_info);
+ btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
}
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
@@ -133,20 +125,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
WARN_ON(ptr > end);
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
-{
- struct btrfs_extent_ref_v0 *ref0;
-
- ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
- printk("\t\textent back ref root %llu gen %llu owner %llu num_refs %lu\n",
- btrfs_ref_root_v0(eb, ref0),
- btrfs_ref_generation_v0(eb, ref0),
- btrfs_ref_objectid_v0(eb, ref0),
- (unsigned long)btrfs_ref_count_v0(eb, ref0));
-}
-#endif
-
static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
u32 item_size)
{
@@ -267,8 +245,8 @@ void btrfs_print_leaf(struct extent_buffer *l)
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(l, fi) ==
BTRFS_FILE_EXTENT_INLINE) {
- pr_info("\t\tinline extent data size %u\n",
- btrfs_file_extent_inline_len(l, i, fi));
+ pr_info("\t\tinline extent data size %llu\n",
+ btrfs_file_extent_ram_bytes(l, fi));
break;
}
pr_info("\t\textent data disk bytenr %llu nr %llu\n",
@@ -280,11 +258,8 @@ void btrfs_print_leaf(struct extent_buffer *l)
btrfs_file_extent_ram_bytes(l, fi));
break;
case BTRFS_EXTENT_REF_V0_KEY:
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- print_extent_ref_v0(l, i);
-#else
- BUG();
-#endif
+ btrfs_print_v0_err(fs_info);
+ btrfs_handle_fs_error(fs_info, -EINVAL, NULL);
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c25dc47210a3..4353bb69bb86 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -530,11 +530,11 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
fs_info->qgroup_ulist = NULL;
}
-static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *quota_root,
- u64 src, u64 dst)
+static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
int ret;
+ struct btrfs_root *quota_root = trans->fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
@@ -554,11 +554,11 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
return ret;
}
-static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *quota_root,
- u64 src, u64 dst)
+static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
int ret;
+ struct btrfs_root *quota_root = trans->fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
@@ -653,10 +653,10 @@ out:
return ret;
}
-static int del_qgroup_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *quota_root, u64 qgroupid)
+static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
{
int ret;
+ struct btrfs_root *quota_root = trans->fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
@@ -700,9 +700,9 @@ out:
}
static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_qgroup *qgroup)
{
+ struct btrfs_root *quota_root = trans->fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *l;
@@ -718,7 +718,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
@@ -742,9 +742,10 @@ out:
}
static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_qgroup *qgroup)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *l;
@@ -752,7 +753,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
int ret;
int slot;
- if (btrfs_is_testing(root->fs_info))
+ if (btrfs_is_testing(fs_info))
return 0;
key.objectid = 0;
@@ -763,7 +764,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
@@ -786,10 +787,10 @@ out:
return ret;
}
-static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_root *root)
+static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *l;
@@ -805,7 +806,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
@@ -875,8 +876,7 @@ out:
return ret;
}
-int btrfs_quota_enable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *quota_root;
struct btrfs_root *tree_root = fs_info->tree_root;
@@ -886,6 +886,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_qgroup *qgroup = NULL;
+ struct btrfs_trans_handle *trans = NULL;
int ret = 0;
int slot;
@@ -893,9 +894,25 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
if (fs_info->quota_root)
goto out;
+ /*
+ * 1 for quota root item
+ * 1 for BTRFS_QGROUP_STATUS item
+ *
+ * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
+ * per subvolume. However those are not currently reserved since it
+ * would be a lot of overkill.
+ */
+ trans = btrfs_start_transaction(tree_root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
+ }
+
fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
if (!fs_info->qgroup_ulist) {
ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -906,12 +923,14 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
BTRFS_QUOTA_TREE_OBJECTID);
if (IS_ERR(quota_root)) {
ret = PTR_ERR(quota_root);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
goto out_free_root;
}
@@ -921,8 +940,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*ptr));
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
+ }
leaf = path->nodes[0];
ptr = btrfs_item_ptr(leaf, path->slots[0],
@@ -944,9 +965,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
if (ret > 0)
goto out_add_root;
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
-
+ }
while (1) {
slot = path->slots[0];
@@ -956,18 +978,23 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
if (found_key.type == BTRFS_ROOT_REF_KEY) {
ret = add_qgroup_item(trans, quota_root,
found_key.offset);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
+ }
qgroup = add_qgroup_rb(fs_info, found_key.offset);
if (IS_ERR(qgroup)) {
ret = PTR_ERR(qgroup);
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
}
ret = btrfs_next_item(tree_root, path);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
+ }
if (ret)
break;
}
@@ -975,18 +1002,28 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
out_add_root:
btrfs_release_path(path);
ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
+ }
qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
if (IS_ERR(qgroup)) {
ret = PTR_ERR(qgroup);
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_root = quota_root;
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
spin_unlock(&fs_info->qgroup_lock);
+
+ ret = btrfs_commit_transaction(trans);
+ if (ret) {
+ trans = NULL;
+ goto out_free_path;
+ }
+
ret = qgroup_rescan_init(fs_info, 0, 1);
if (!ret) {
qgroup_rescan_zero_tracking(fs_info);
@@ -1006,20 +1043,35 @@ out:
if (ret) {
ulist_free(fs_info->qgroup_ulist);
fs_info->qgroup_ulist = NULL;
+ if (trans)
+ btrfs_end_transaction(trans);
}
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
-int btrfs_quota_disable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *quota_root;
+ struct btrfs_trans_handle *trans = NULL;
int ret = 0;
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
goto out;
+
+ /*
+ * 1 For the root item
+ *
+ * We should also reserve enough items for the quota tree deletion in
+ * btrfs_clean_quota_tree but this is not done.
+ */
+ trans = btrfs_start_transaction(fs_info->tree_root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
btrfs_qgroup_wait_for_completion(fs_info, false);
spin_lock(&fs_info->qgroup_lock);
@@ -1031,12 +1083,16 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
btrfs_free_qgroup_config(fs_info);
ret = btrfs_clean_quota_tree(trans, quota_root);
- if (ret)
- goto out;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto end_trans;
+ }
- ret = btrfs_del_root(trans, fs_info, &quota_root->root_key);
- if (ret)
- goto out;
+ ret = btrfs_del_root(trans, &quota_root->root_key);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto end_trans;
+ }
list_del(&quota_root->dirty_list);
@@ -1048,6 +1104,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
free_extent_buffer(quota_root->node);
free_extent_buffer(quota_root->commit_root);
kfree(quota_root);
+
+end_trans:
+ ret = btrfs_end_transaction(trans);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
@@ -1177,9 +1236,10 @@ out:
return ret;
}
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *parent;
struct btrfs_qgroup *member;
@@ -1216,13 +1276,13 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
}
}
- ret = add_qgroup_relation_item(trans, quota_root, src, dst);
+ ret = add_qgroup_relation_item(trans, src, dst);
if (ret)
goto out;
- ret = add_qgroup_relation_item(trans, quota_root, dst, src);
+ ret = add_qgroup_relation_item(trans, dst, src);
if (ret) {
- del_qgroup_relation_item(trans, quota_root, src, dst);
+ del_qgroup_relation_item(trans, src, dst);
goto out;
}
@@ -1240,9 +1300,10 @@ out:
return ret;
}
-static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *parent;
struct btrfs_qgroup *member;
@@ -1276,8 +1337,8 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
ret = -ENOENT;
goto out;
exist:
- ret = del_qgroup_relation_item(trans, quota_root, src, dst);
- err = del_qgroup_relation_item(trans, quota_root, dst, src);
+ ret = del_qgroup_relation_item(trans, src, dst);
+ err = del_qgroup_relation_item(trans, dst, src);
if (err && !ret)
ret = err;
@@ -1290,21 +1351,22 @@ out:
return ret;
}
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret = 0;
mutex_lock(&fs_info->qgroup_ioctl_lock);
- ret = __del_qgroup_relation(trans, fs_info, src, dst);
+ ret = __del_qgroup_relation(trans, src, dst);
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
-int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid)
+int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
@@ -1336,9 +1398,9 @@ out:
return ret;
}
-int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid)
+int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
struct btrfs_qgroup_list *list;
@@ -1362,16 +1424,15 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
goto out;
}
}
- ret = del_qgroup_item(trans, quota_root, qgroupid);
+ ret = del_qgroup_item(trans, qgroupid);
if (ret && ret != -ENOENT)
goto out;
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
struct btrfs_qgroup_list, next_group);
- ret = __del_qgroup_relation(trans, fs_info,
- qgroupid,
- list->group->qgroupid);
+ ret = __del_qgroup_relation(trans, qgroupid,
+ list->group->qgroupid);
if (ret)
goto out;
}
@@ -1384,10 +1445,10 @@ out:
return ret;
}
-int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid,
+int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
@@ -1451,7 +1512,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
spin_unlock(&fs_info->qgroup_lock);
- ret = update_qgroup_limit_item(trans, quota_root, qgroup);
+ ret = update_qgroup_limit_item(trans, qgroup);
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_info(fs_info, "unable to update quota limit for %llu",
@@ -1519,10 +1580,10 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
return 0;
}
-int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
- gfp_t gfp_flag)
+int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 num_bytes, gfp_t gfp_flag)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_qgroup_extent_record *record;
struct btrfs_delayed_ref_root *delayed_refs;
int ret;
@@ -1530,8 +1591,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
|| bytenr == 0 || num_bytes == 0)
return 0;
- if (WARN_ON(trans == NULL))
- return -EINVAL;
record = kmalloc(sizeof(*record), gfp_flag);
if (!record)
return -ENOMEM;
@@ -1552,9 +1611,9 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
}
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct extent_buffer *eb)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int nr = btrfs_header_nritems(eb);
int i, extent_type, ret;
struct btrfs_key key;
@@ -1584,8 +1643,8 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
- ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr,
- num_bytes, GFP_NOFS);
+ ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes,
+ GFP_NOFS);
if (ret)
return ret;
}
@@ -1655,11 +1714,10 @@ static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
}
int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct extent_buffer *root_eb,
u64 root_gen, int root_level)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret = 0;
int level;
struct extent_buffer *eb = root_eb;
@@ -1678,7 +1736,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
}
if (root_level == 0) {
- ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb);
+ ret = btrfs_qgroup_trace_leaf_items(trans, root_eb);
goto out;
}
@@ -1736,8 +1794,7 @@ walk_down:
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
- ret = btrfs_qgroup_trace_extent(trans, fs_info,
- child_bytenr,
+ ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
fs_info->nodesize,
GFP_NOFS);
if (ret)
@@ -1745,8 +1802,8 @@ walk_down:
}
if (level == 0) {
- ret = btrfs_qgroup_trace_leaf_items(trans,fs_info,
- path->nodes[level]);
+ ret = btrfs_qgroup_trace_leaf_items(trans,
+ path->nodes[level]);
if (ret)
goto out;
@@ -1981,12 +2038,11 @@ static int maybe_fs_roots(struct ulist *roots)
return is_fstree(unode->val);
}
-int
-btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 bytenr, u64 num_bytes,
- struct ulist *old_roots, struct ulist *new_roots)
+int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 num_bytes, struct ulist *old_roots,
+ struct ulist *new_roots)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct ulist *qgroups = NULL;
struct ulist *tmp = NULL;
u64 seq;
@@ -2116,9 +2172,10 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
ulist_del(record->old_roots, qgroup_to_skip,
0);
}
- ret = btrfs_qgroup_account_extent(trans, fs_info,
- record->bytenr, record->num_bytes,
- record->old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(trans, record->bytenr,
+ record->num_bytes,
+ record->old_roots,
+ new_roots);
record->old_roots = NULL;
new_roots = NULL;
}
@@ -2136,9 +2193,9 @@ cleanup:
/*
* called from commit_transaction. Writes all changed qgroups to disk.
*/
-int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root = fs_info->quota_root;
int ret = 0;
@@ -2152,11 +2209,11 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_qgroup, dirty);
list_del_init(&qgroup->dirty);
spin_unlock(&fs_info->qgroup_lock);
- ret = update_qgroup_info_item(trans, quota_root, qgroup);
+ ret = update_qgroup_info_item(trans, qgroup);
if (ret)
fs_info->qgroup_flags |=
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
- ret = update_qgroup_limit_item(trans, quota_root, qgroup);
+ ret = update_qgroup_limit_item(trans, qgroup);
if (ret)
fs_info->qgroup_flags |=
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -2168,7 +2225,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
spin_unlock(&fs_info->qgroup_lock);
- ret = update_qgroup_status_item(trans, fs_info, quota_root);
+ ret = update_qgroup_status_item(trans);
if (ret)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -2181,13 +2238,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
* cause a transaction abort so we take extra care here to only error
* when a readonly fs is a reasonable outcome.
*/
-int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
- struct btrfs_qgroup_inherit *inherit)
+int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
+ u64 objectid, struct btrfs_qgroup_inherit *inherit)
{
int ret = 0;
int i;
u64 *i_qgroups;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
@@ -2229,22 +2286,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- if (srcid) {
- struct btrfs_root *srcroot;
- struct btrfs_key srckey;
-
- srckey.objectid = srcid;
- srckey.type = BTRFS_ROOT_ITEM_KEY;
- srckey.offset = (u64)-1;
- srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
- if (IS_ERR(srcroot)) {
- ret = PTR_ERR(srcroot);
- goto out;
- }
-
- level_size = fs_info->nodesize;
- }
-
/*
* add qgroup to all inherited groups
*/
@@ -2253,12 +2294,12 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
if (*i_qgroups == 0)
continue;
- ret = add_qgroup_relation_item(trans, quota_root,
- objectid, *i_qgroups);
+ ret = add_qgroup_relation_item(trans, objectid,
+ *i_qgroups);
if (ret && ret != -EEXIST)
goto out;
- ret = add_qgroup_relation_item(trans, quota_root,
- *i_qgroups, objectid);
+ ret = add_qgroup_relation_item(trans, *i_qgroups,
+ objectid);
if (ret && ret != -EEXIST)
goto out;
}
@@ -2281,7 +2322,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
dstgroup->rsv_excl = inherit->lim.rsv_excl;
- ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
+ ret = update_qgroup_limit_item(trans, dstgroup);
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_info(fs_info,
@@ -2301,6 +2342,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
* our counts don't go crazy, so at this point the only
* difference between the two roots should be the root node.
*/
+ level_size = fs_info->nodesize;
dstgroup->rfer = srcgroup->rfer;
dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
dstgroup->excl = level_size;
@@ -2598,10 +2640,10 @@ static bool is_last_leaf(struct btrfs_path *path)
* returns < 0 on error, 0 when more leafs are to be scanned.
* returns 1 when done.
*/
-static int
-qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
- struct btrfs_trans_handle *trans)
+static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_key found;
struct extent_buffer *scratch_leaf = NULL;
struct ulist *roots = NULL;
@@ -2669,8 +2711,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
if (ret < 0)
goto out;
/* For rescan, just pass old_roots as NULL */
- ret = btrfs_qgroup_account_extent(trans, fs_info,
- found.objectid, num_bytes, NULL, roots);
+ ret = btrfs_qgroup_account_extent(trans, found.objectid,
+ num_bytes, NULL, roots);
if (ret < 0)
goto out;
}
@@ -2716,7 +2758,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
err = -EINTR;
} else {
- err = qgroup_rescan_leaf(fs_info, path, trans);
+ err = qgroup_rescan_leaf(trans, path);
}
if (err > 0)
btrfs_commit_transaction(trans);
@@ -2751,7 +2793,7 @@ out:
err);
goto done;
}
- ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
+ ret = update_qgroup_status_item(trans);
if (ret < 0) {
err = ret;
btrfs_err(fs_info, "fail to update qgroup status: %d", err);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index d60dd06445ce..54b8bb282c0e 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -141,24 +141,19 @@ struct btrfs_qgroup {
#define QGROUP_RELEASE (1<<1)
#define QGROUP_FREE (1<<2)
-int btrfs_quota_enable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_quota_disable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+int btrfs_quota_enable(struct btrfs_fs_info *fs_info);
+int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
bool interruptible);
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst);
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst);
-int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid);
-int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid);
-int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid,
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst);
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst);
+int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
+int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
+int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
@@ -217,9 +212,8 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
* Return <0 for error, like memory allocation failure or invalid parameter
* (NULL trans)
*/
-int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
- gfp_t gfp_flag);
+int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 num_bytes, gfp_t gfp_flag);
/*
* Inform qgroup to trace all leaf items of data
@@ -228,7 +222,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
* Return <0 for error(ENOMEM)
*/
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
/*
* Inform qgroup to trace a whole subtree, including all its child tree
@@ -241,20 +234,15 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
* Return <0 for error(ENOMEM or tree search error)
*/
int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct extent_buffer *root_eb,
u64 root_gen, int root_level);
-int
-btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 bytenr, u64 num_bytes,
- struct ulist *old_roots, struct ulist *new_roots);
+int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 num_bytes, struct ulist *old_roots,
+ struct ulist *new_roots);
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
-int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
- struct btrfs_qgroup_inherit *inherit);
+int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
+int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
+ u64 objectid, struct btrfs_qgroup_inherit *inherit);
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes,
enum btrfs_qgroup_rsv_type type);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 5e4ad134b9ad..df41d7049936 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -5,32 +5,19 @@
*/
#include <linux/sched.h>
-#include <linux/wait.h>
#include <linux/bio.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
#include <linux/blkdev.h>
-#include <linux/random.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
-#include <linux/ratelimit.h>
-#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/hash.h>
#include <linux/list_sort.h>
#include <linux/raid/xor.h>
#include <linux/mm.h>
-#include <asm/div64.h>
#include "ctree.h"
-#include "extent_map.h"
#include "disk-io.h"
-#include "transaction.h"
-#include "print-tree.h"
#include "volumes.h"
#include "raid56.h"
#include "async-thread.h"
-#include "check-integrity.h"
-#include "rcu-string.h"
/* set when additional merges to this rbio are not allowed */
#define RBIO_RMW_LOCKED_BIT 1
@@ -175,8 +162,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
static void rmw_work(struct btrfs_work *work);
static void read_rebuild_work(struct btrfs_work *work);
-static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
-static void async_read_rebuild(struct btrfs_raid_bio *rbio);
static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
static void __free_raid_bio(struct btrfs_raid_bio *rbio);
@@ -185,7 +170,13 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check);
-static void async_scrub_parity(struct btrfs_raid_bio *rbio);
+static void scrub_parity_work(struct btrfs_work *work);
+
+static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
+{
+ btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
+ btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
+}
/*
* the stripe hash table is used for locking, and to collect
@@ -260,7 +251,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
s = kmap(rbio->bio_pages[i]);
d = kmap(rbio->stripe_pages[i]);
- memcpy(d, s, PAGE_SIZE);
+ copy_page(d, s);
kunmap(rbio->bio_pages[i]);
kunmap(rbio->stripe_pages[i]);
@@ -516,32 +507,21 @@ static void run_xor(void **pages, int src_cnt, ssize_t len)
}
/*
- * returns true if the bio list inside this rbio
- * covers an entire stripe (no rmw required).
- * Must be called with the bio list lock held, or
- * at a time when you know it is impossible to add
- * new bios into the list
+ * Returns true if the bio list inside this rbio covers an entire stripe (no
+ * rmw required).
*/
-static int __rbio_is_full(struct btrfs_raid_bio *rbio)
+static int rbio_is_full(struct btrfs_raid_bio *rbio)
{
+ unsigned long flags;
unsigned long size = rbio->bio_list_bytes;
int ret = 1;
+ spin_lock_irqsave(&rbio->bio_list_lock, flags);
if (size != rbio->nr_data * rbio->stripe_len)
ret = 0;
-
BUG_ON(size > rbio->nr_data * rbio->stripe_len);
- return ret;
-}
-
-static int rbio_is_full(struct btrfs_raid_bio *rbio)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&rbio->bio_list_lock, flags);
- ret = __rbio_is_full(rbio);
spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
+
return ret;
}
@@ -812,16 +792,16 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
spin_unlock_irqrestore(&h->lock, flags);
if (next->operation == BTRFS_RBIO_READ_REBUILD)
- async_read_rebuild(next);
+ start_async_work(next, read_rebuild_work);
else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
steal_rbio(rbio, next);
- async_read_rebuild(next);
+ start_async_work(next, read_rebuild_work);
} else if (next->operation == BTRFS_RBIO_WRITE) {
steal_rbio(rbio, next);
- async_rmw_stripe(next);
+ start_async_work(next, rmw_work);
} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
steal_rbio(rbio, next);
- async_scrub_parity(next);
+ start_async_work(next, scrub_parity_work);
}
goto done_nolock;
@@ -1275,7 +1255,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
pointers);
} else {
/* raid5 */
- memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+ copy_page(pointers[nr_data], pointers[0]);
run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
}
@@ -1343,7 +1323,7 @@ write_data:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
submit_bio(bio);
}
@@ -1508,20 +1488,6 @@ cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR);
}
-static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL);
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
-static void async_read_rebuild(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper,
- read_rebuild_work, NULL, NULL);
-
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
/*
* the stripe must be locked by the caller. It will
* unlock after all the writes are done
@@ -1599,7 +1565,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_rmw_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
@@ -1652,7 +1618,7 @@ static int partial_stripe_write(struct btrfs_raid_bio *rbio)
ret = lock_stripe_add(rbio);
if (ret == 0)
- async_rmw_stripe(rbio);
+ start_async_work(rbio, rmw_work);
return 0;
}
@@ -1720,8 +1686,11 @@ static void run_plug(struct btrfs_plug_cb *plug)
list_del_init(&cur->plug_list);
if (rbio_is_full(cur)) {
+ int ret;
+
/* we have a full stripe, send it down */
- full_stripe_write(cur);
+ ret = full_stripe_write(cur);
+ BUG_ON(ret);
continue;
}
if (last) {
@@ -1941,9 +1910,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
BUG_ON(failb != -1);
pstripe:
/* Copy parity block into failed block to start with */
- memcpy(pointers[faila],
- pointers[rbio->nr_data],
- PAGE_SIZE);
+ copy_page(pointers[faila], pointers[rbio->nr_data]);
/* rearrange the pointer array */
p = pointers[faila];
@@ -2145,7 +2112,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_recover_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
@@ -2448,7 +2415,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
pointers);
} else {
/* raid5 */
- memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+ copy_page(pointers[nr_data], pointers[0]);
run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
}
@@ -2456,7 +2423,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
parity = kmap(p);
if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
- memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE);
+ copy_page(parity, pointers[rbio->scrubp]);
else
/* Parity is right, needn't writeback */
bitmap_clear(rbio->dbitmap, pagenr, 1);
@@ -2517,7 +2484,7 @@ submit_write:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
submit_bio(bio);
}
@@ -2699,7 +2666,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid56_parity_scrub_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
@@ -2728,18 +2695,10 @@ static void scrub_parity_work(struct btrfs_work *work)
raid56_parity_scrub_stripe(rbio);
}
-static void async_scrub_parity(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper,
- scrub_parity_work, NULL, NULL);
-
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
{
if (!lock_stripe_add(rbio))
- async_scrub_parity(rbio);
+ start_async_work(rbio, scrub_parity_work);
}
/* The following code is used for dev replace of a missing RAID 5/6 device. */
@@ -2781,5 +2740,5 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
{
if (!lock_stripe_add(rbio))
- async_read_rebuild(rbio);
+ start_async_work(rbio, read_rebuild_work);
}
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 40f1bcef394d..dec14b739b10 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -7,7 +7,6 @@
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
-#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include "ctree.h"
@@ -355,7 +354,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
dev = bbio->stripes[nzones].dev;
/* cannot read ahead on missing device. */
- if (!dev->bdev)
+ if (!dev->bdev)
continue;
zone = reada_find_zone(dev, logical, bbio);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 879b76fa881a..8783a1776540 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -586,29 +586,6 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
return btrfs_get_fs_root(fs_info, &key, false);
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static noinline_for_stack
-struct btrfs_root *find_tree_root(struct reloc_control *rc,
- struct extent_buffer *leaf,
- struct btrfs_extent_ref_v0 *ref0)
-{
- struct btrfs_root *root;
- u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
- u64 generation = btrfs_ref_generation_v0(leaf, ref0);
-
- BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
-
- root = read_fs_root(rc->extent_root->fs_info, root_objectid);
- BUG_ON(IS_ERR(root));
-
- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
- generation != btrfs_root_generation(&root->root_item))
- return NULL;
-
- return root;
-}
-#endif
-
static noinline_for_stack
int find_inline_backref(struct extent_buffer *leaf, int slot,
unsigned long *ptr, unsigned long *end)
@@ -621,12 +598,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
btrfs_item_key_to_cpu(leaf, &key, slot);
item_size = btrfs_item_size_nr(leaf, slot);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (item_size < sizeof(*ei)) {
- WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
+ btrfs_print_v0_err(leaf->fs_info);
+ btrfs_handle_fs_error(leaf->fs_info, -EINVAL, NULL);
return 1;
}
-#endif
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
WARN_ON(!(btrfs_extent_flags(leaf, ei) &
BTRFS_EXTENT_FLAG_TREE_BLOCK));
@@ -792,7 +768,7 @@ again:
type = btrfs_get_extent_inline_ref_type(eb, iref,
BTRFS_REF_TYPE_BLOCK);
if (type == BTRFS_REF_TYPE_INVALID) {
- err = -EINVAL;
+ err = -EUCLEAN;
goto out;
}
key.type = type;
@@ -811,29 +787,7 @@ again:
goto next;
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
- key.type == BTRFS_EXTENT_REF_V0_KEY) {
- if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(eb, path1->slots[0],
- struct btrfs_extent_ref_v0);
- if (key.objectid == key.offset) {
- root = find_tree_root(rc, eb, ref0);
- if (root && !should_ignore_root(root))
- cur->root = root;
- else
- list_add(&cur->list, &useless);
- break;
- }
- if (is_cowonly_root(btrfs_ref_root_v0(eb,
- ref0)))
- cur->cowonly = 1;
- }
-#else
- ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY);
if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
-#endif
if (key.objectid == key.offset) {
/*
* only root blocks of reloc trees use
@@ -876,6 +830,12 @@ again:
edge->node[UPPER] = upper;
goto next;
+ } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
+ err = -EINVAL;
+ btrfs_print_v0_err(rc->extent_root->fs_info);
+ btrfs_handle_fs_error(rc->extent_root->fs_info, err,
+ NULL);
+ goto out;
} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
goto next;
}
@@ -1321,18 +1281,19 @@ static void __del_reloc_root(struct btrfs_root *root)
struct mapping_node *node = NULL;
struct reloc_control *rc = fs_info->reloc_ctl;
- spin_lock(&rc->reloc_root_tree.lock);
- rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
- if (rb_node) {
- node = rb_entry(rb_node, struct mapping_node, rb_node);
- rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ if (rc) {
+ spin_lock(&rc->reloc_root_tree.lock);
+ rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+ root->node->start);
+ if (rb_node) {
+ node = rb_entry(rb_node, struct mapping_node, rb_node);
+ rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ }
+ spin_unlock(&rc->reloc_root_tree.lock);
+ if (!node)
+ return;
+ BUG_ON((struct btrfs_root *)node->data != root);
}
- spin_unlock(&rc->reloc_root_tree.lock);
-
- if (!node)
- return;
- BUG_ON((struct btrfs_root *)node->data != root);
spin_lock(&fs_info->trans_lock);
list_del_init(&root->root_list);
@@ -1918,13 +1879,12 @@ again:
* and tree block numbers, if current trans doesn't free
* data reloc tree inode.
*/
- ret = btrfs_qgroup_trace_subtree(trans, src, parent,
+ ret = btrfs_qgroup_trace_subtree(trans, parent,
btrfs_header_generation(parent),
btrfs_header_level(parent));
if (ret < 0)
break;
- ret = btrfs_qgroup_trace_subtree(trans, dest,
- path->nodes[level],
+ ret = btrfs_qgroup_trace_subtree(trans, path->nodes[level],
btrfs_header_generation(path->nodes[level]),
btrfs_header_level(path->nodes[level]));
if (ret < 0)
@@ -3333,48 +3293,6 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
return 0;
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static int get_ref_objectid_v0(struct reloc_control *rc,
- struct btrfs_path *path,
- struct btrfs_key *extent_key,
- u64 *ref_objectid, int *path_change)
-{
- struct btrfs_key key;
- struct extent_buffer *leaf;
- struct btrfs_extent_ref_v0 *ref0;
- int ret;
- int slot;
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- while (1) {
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(rc->extent_root, path);
- if (ret < 0)
- return ret;
- BUG_ON(ret > 0);
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (path_change)
- *path_change = 1;
- }
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != extent_key->objectid)
- return -ENOENT;
-
- if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
- slot++;
- continue;
- }
- ref0 = btrfs_item_ptr(leaf, slot,
- struct btrfs_extent_ref_v0);
- *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
- break;
- }
- return 0;
-}
-#endif
-
/*
* helper to add a tree block to the list.
* the major work is getting the generation and level of the block
@@ -3407,23 +3325,12 @@ static int add_tree_block(struct reloc_control *rc,
level = (int)extent_key->offset;
}
generation = btrfs_extent_generation(eb, ei);
+ } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
+ btrfs_print_v0_err(eb->fs_info);
+ btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+ return -EINVAL;
} else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- u64 ref_owner;
- int ret;
-
- BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
- ret = get_ref_objectid_v0(rc, path, extent_key,
- &ref_owner, NULL);
- if (ret < 0)
- return ret;
- BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
- level = (int)ref_owner;
- /* FIXME: get real generation */
- generation = 0;
-#else
BUG();
-#endif
}
btrfs_release_path(path);
@@ -3563,11 +3470,8 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, root, NULL);
- if (IS_ERR(inode) || is_bad_inode(inode)) {
- if (!IS_ERR(inode))
- iput(inode);
+ if (IS_ERR(inode))
return -ENOENT;
- }
truncate:
ret = btrfs_check_trunc_cache_free_space(fs_info,
@@ -3781,12 +3685,7 @@ int add_data_references(struct reloc_control *rc,
eb = path->nodes[0];
ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
- ptr = end;
- else
-#endif
- ptr += sizeof(struct btrfs_extent_item);
+ ptr += sizeof(struct btrfs_extent_item);
while (ptr < end) {
iref = (struct btrfs_extent_inline_ref *)ptr;
@@ -3801,7 +3700,7 @@ int add_data_references(struct reloc_control *rc,
ret = find_data_references(rc, extent_key,
eb, dref, blocks);
} else {
- ret = -EINVAL;
+ ret = -EUCLEAN;
btrfs_err(rc->extent_root->fs_info,
"extent %llu slot %d has an invalid inline ref type",
eb->start, path->slots[0]);
@@ -3832,13 +3731,7 @@ int add_data_references(struct reloc_control *rc,
if (key.objectid != extent_key->objectid)
break;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
- key.type == BTRFS_EXTENT_REF_V0_KEY) {
-#else
- BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
-#endif
ret = __add_tree_block(rc, key.offset, blocksize,
blocks);
} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
@@ -3846,6 +3739,10 @@ int add_data_references(struct reloc_control *rc,
struct btrfs_extent_data_ref);
ret = find_data_references(rc, extent_key,
eb, dref, blocks);
+ } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
+ btrfs_print_v0_err(eb->fs_info);
+ btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+ ret = -EINVAL;
} else {
ret = 0;
}
@@ -4084,41 +3981,13 @@ restart:
flags = btrfs_extent_flags(path->nodes[0], ei);
ret = check_extent_flags(flags);
BUG_ON(ret);
-
+ } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
+ err = -EINVAL;
+ btrfs_print_v0_err(trans->fs_info);
+ btrfs_abort_transaction(trans, err);
+ break;
} else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- u64 ref_owner;
- int path_change = 0;
-
- BUG_ON(item_size !=
- sizeof(struct btrfs_extent_item_v0));
- ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
- &path_change);
- if (ret < 0) {
- err = ret;
- break;
- }
- if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
- flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
- else
- flags = BTRFS_EXTENT_FLAG_DATA;
-
- if (path_change) {
- btrfs_release_path(path);
-
- path->search_commit_root = 1;
- path->skip_locking = 1;
- ret = btrfs_search_slot(NULL, rc->extent_root,
- &key, path, 0, 0);
- if (ret < 0) {
- err = ret;
- break;
- }
- BUG_ON(ret > 0);
- }
-#else
BUG();
-#endif
}
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
@@ -4169,8 +4038,7 @@ restart:
}
}
if (trans && progress && err == -ENOSPC) {
- ret = btrfs_force_chunk_alloc(trans, fs_info,
- rc->block_group->flags);
+ ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
if (ret == 1) {
err = 0;
progress = 0;
@@ -4284,7 +4152,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, root, NULL);
- BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
+ BUG_ON(IS_ERR(inode));
BTRFS_I(inode)->index_cnt = group->key.objectid;
err = btrfs_orphan_add(trans, BTRFS_I(inode));
@@ -4375,7 +4243,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
BUG_ON(!rc->block_group);
- ret = btrfs_inc_block_group_ro(fs_info, rc->block_group);
+ ret = btrfs_inc_block_group_ro(rc->block_group);
if (ret) {
err = ret;
goto out;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index c451285976ac..65bda0682928 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -320,9 +320,9 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
/* drop the root item for 'key' from the tree root */
int btrfs_del_root(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, const struct btrfs_key *key)
+ const struct btrfs_key *key)
{
- struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_root *root = trans->fs_info->tree_root;
struct btrfs_path *path;
int ret;
@@ -341,13 +341,12 @@ out:
return ret;
}
-int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
- const char *name, int name_len)
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+ u64 ref_id, u64 dirid, u64 *sequence, const char *name,
+ int name_len)
{
- struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *tree_root = trans->fs_info->tree_root;
struct btrfs_path *path;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
@@ -413,12 +412,11 @@ out:
*
* Will return 0, -ENOMEM, or anything from the CoW path
*/
-int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
- const char *name, int name_len)
+int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+ u64 ref_id, u64 dirid, u64 sequence, const char *name,
+ int name_len)
{
- struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *tree_root = trans->fs_info->tree_root;
struct btrfs_key key;
int ret;
struct btrfs_path *path;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6702896cdb8f..3be1456b5116 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -188,32 +188,6 @@ struct scrub_ctx {
refcount_t refs;
};
-struct scrub_fixup_nodatasum {
- struct scrub_ctx *sctx;
- struct btrfs_device *dev;
- u64 logical;
- struct btrfs_root *root;
- struct btrfs_work work;
- int mirror_num;
-};
-
-struct scrub_nocow_inode {
- u64 inum;
- u64 offset;
- u64 root;
- struct list_head list;
-};
-
-struct scrub_copy_nocow_ctx {
- struct scrub_ctx *sctx;
- u64 logical;
- u64 len;
- int mirror_num;
- u64 physical_for_dev_replace;
- struct list_head inodes;
- struct btrfs_work work;
-};
-
struct scrub_warning {
struct btrfs_path *path;
u64 extent_item_size;
@@ -232,8 +206,6 @@ struct full_stripe_lock {
static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct scrub_block *sblocks_for_recheck);
@@ -277,13 +249,6 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
static void scrub_wr_submit(struct scrub_ctx *sctx);
static void scrub_wr_bio_end_io(struct bio *bio);
static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
-static int write_page_nocow(struct scrub_ctx *sctx,
- u64 physical_for_dev_replace, struct page *page);
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
- struct scrub_copy_nocow_ctx *ctx);
-static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
- int mirror_num, u64 physical_for_dev_replace);
-static void copy_nocow_pages_worker(struct btrfs_work *work);
static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_put_ctx(struct scrub_ctx *sctx);
@@ -555,60 +520,6 @@ out:
return ret;
}
-/*
- * used for workers that require transaction commits (i.e., for the
- * NOCOW case)
- */
-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
-{
- struct btrfs_fs_info *fs_info = sctx->fs_info;
-
- refcount_inc(&sctx->refs);
- /*
- * increment scrubs_running to prevent cancel requests from
- * completing as long as a worker is running. we must also
- * increment scrubs_paused to prevent deadlocking on pause
- * requests used for transactions commits (as the worker uses a
- * transaction context). it is safe to regard the worker
- * as paused for all matters practical. effectively, we only
- * avoid cancellation requests from completing.
- */
- mutex_lock(&fs_info->scrub_lock);
- atomic_inc(&fs_info->scrubs_running);
- atomic_inc(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
-
- /*
- * check if @scrubs_running=@scrubs_paused condition
- * inside wait_event() is not an atomic operation.
- * which means we may inc/dec @scrub_running/paused
- * at any time. Let's wake up @scrub_pause_wait as
- * much as we can to let commit transaction blocked less.
- */
- wake_up(&fs_info->scrub_pause_wait);
-
- atomic_inc(&sctx->workers_pending);
-}
-
-/* used for workers that require transaction commits */
-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
-{
- struct btrfs_fs_info *fs_info = sctx->fs_info;
-
- /*
- * see scrub_pending_trans_workers_inc() why we're pretending
- * to be paused in the scrub counters
- */
- mutex_lock(&fs_info->scrub_lock);
- atomic_dec(&fs_info->scrubs_running);
- atomic_dec(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- atomic_dec(&sctx->workers_pending);
- wake_up(&fs_info->scrub_pause_wait);
- wake_up(&sctx->list_wait);
- scrub_put_ctx(sctx);
-}
-
static void scrub_free_csums(struct scrub_ctx *sctx)
{
while (!list_empty(&sctx->csum_list)) {
@@ -882,194 +793,6 @@ out:
btrfs_free_path(path);
}
-static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
-{
- struct page *page = NULL;
- unsigned long index;
- struct scrub_fixup_nodatasum *fixup = fixup_ctx;
- int ret;
- int corrected = 0;
- struct btrfs_key key;
- struct inode *inode = NULL;
- struct btrfs_fs_info *fs_info;
- u64 end = offset + PAGE_SIZE - 1;
- struct btrfs_root *local_root;
- int srcu_index;
-
- key.objectid = root;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- fs_info = fixup->root->fs_info;
- srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
-
- local_root = btrfs_read_fs_root_no_name(fs_info, &key);
- if (IS_ERR(local_root)) {
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- return PTR_ERR(local_root);
- }
-
- key.type = BTRFS_INODE_ITEM_KEY;
- key.objectid = inum;
- key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- index = offset >> PAGE_SHIFT;
-
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (PageUptodate(page)) {
- if (PageDirty(page)) {
- /*
- * we need to write the data to the defect sector. the
- * data that was in that sector is not in memory,
- * because the page was modified. we must not write the
- * modified page to that sector.
- *
- * TODO: what could be done here: wait for the delalloc
- * runner to write out that page (might involve
- * COW) and see whether the sector is still
- * referenced afterwards.
- *
- * For the meantime, we'll treat this error
- * incorrectable, although there is a chance that a
- * later scrub will find the bad sector again and that
- * there's no dirty page in memory, then.
- */
- ret = -EIO;
- goto out;
- }
- ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE,
- fixup->logical, page,
- offset - page_offset(page),
- fixup->mirror_num);
- unlock_page(page);
- corrected = !ret;
- } else {
- /*
- * we need to get good data first. the general readpage path
- * will call repair_io_failure for us, we just have to make
- * sure we read the bad mirror.
- */
- ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
- EXTENT_DAMAGED);
- if (ret) {
- /* set_extent_bits should give proper error */
- WARN_ON(ret > 0);
- if (ret > 0)
- ret = -EFAULT;
- goto out;
- }
-
- ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
- btrfs_get_extent,
- fixup->mirror_num);
- wait_on_page_locked(page);
-
- corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
- end, EXTENT_DAMAGED, 0, NULL);
- if (!corrected)
- clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
- EXTENT_DAMAGED);
- }
-
-out:
- if (page)
- put_page(page);
-
- iput(inode);
-
- if (ret < 0)
- return ret;
-
- if (ret == 0 && corrected) {
- /*
- * we only need to call readpage for one of the inodes belonging
- * to this extent. so make iterate_extent_inodes stop
- */
- return 1;
- }
-
- return -EIO;
-}
-
-static void scrub_fixup_nodatasum(struct btrfs_work *work)
-{
- struct btrfs_fs_info *fs_info;
- int ret;
- struct scrub_fixup_nodatasum *fixup;
- struct scrub_ctx *sctx;
- struct btrfs_trans_handle *trans = NULL;
- struct btrfs_path *path;
- int uncorrectable = 0;
-
- fixup = container_of(work, struct scrub_fixup_nodatasum, work);
- sctx = fixup->sctx;
- fs_info = fixup->root->fs_info;
-
- path = btrfs_alloc_path();
- if (!path) {
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.malloc_errors;
- spin_unlock(&sctx->stat_lock);
- uncorrectable = 1;
- goto out;
- }
-
- trans = btrfs_join_transaction(fixup->root);
- if (IS_ERR(trans)) {
- uncorrectable = 1;
- goto out;
- }
-
- /*
- * the idea is to trigger a regular read through the standard path. we
- * read a page from the (failed) logical address by specifying the
- * corresponding copynum of the failed sector. thus, that readpage is
- * expected to fail.
- * that is the point where on-the-fly error correction will kick in
- * (once it's finished) and rewrite the failed sector if a good copy
- * can be found.
- */
- ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
- scrub_fixup_readpage, fixup, false);
- if (ret < 0) {
- uncorrectable = 1;
- goto out;
- }
- WARN_ON(ret != 1);
-
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.corrected_errors;
- spin_unlock(&sctx->stat_lock);
-
-out:
- if (trans && !IS_ERR(trans))
- btrfs_end_transaction(trans);
- if (uncorrectable) {
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.uncorrectable_errors;
- spin_unlock(&sctx->stat_lock);
- btrfs_dev_replace_stats_inc(
- &fs_info->dev_replace.num_uncorrectable_read_errors);
- btrfs_err_rl_in_rcu(fs_info,
- "unable to fixup (nodatasum) error at logical %llu on dev %s",
- fixup->logical, rcu_str_deref(fixup->dev->name));
- }
-
- btrfs_free_path(path);
- kfree(fixup);
-
- scrub_pending_trans_workers_dec(sctx);
-}
-
static inline void scrub_get_recover(struct scrub_recover *recover)
{
refcount_inc(&recover->refs);
@@ -1264,42 +987,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
}
/*
- * NOTE: Even for nodatasum case, it's still possible that it's a
- * compressed data extent, thus scrub_fixup_nodatasum(), which write
- * inode page cache onto disk, could cause serious data corruption.
- *
- * So here we could only read from disk, and hope our recovery could
- * reach disk before the newer write.
- */
- if (0 && !is_metadata && !have_csum) {
- struct scrub_fixup_nodatasum *fixup_nodatasum;
-
- WARN_ON(sctx->is_dev_replace);
-
- /*
- * !is_metadata and !have_csum, this means that the data
- * might not be COWed, that it might be modified
- * concurrently. The general strategy to work on the
- * commit root does not help in the case when COW is not
- * used.
- */
- fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
- if (!fixup_nodatasum)
- goto did_not_correct_error;
- fixup_nodatasum->sctx = sctx;
- fixup_nodatasum->dev = dev;
- fixup_nodatasum->logical = logical;
- fixup_nodatasum->root = fs_info->extent_root;
- fixup_nodatasum->mirror_num = failed_mirror_index + 1;
- scrub_pending_trans_workers_inc(sctx);
- btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
- scrub_fixup_nodatasum, NULL, NULL);
- btrfs_queue_work(fs_info->scrub_workers,
- &fixup_nodatasum->work);
- goto out;
- }
-
- /*
* now build and submit the bios for the other mirrors, check
* checksums.
* First try to pick the mirror which is completely without I/O
@@ -1866,7 +1553,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
bio = btrfs_io_bio_alloc(1);
bio_set_dev(bio, page_bad->dev->bdev);
bio->bi_iter.bi_sector = page_bad->physical >> 9;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
if (PAGE_SIZE != ret) {
@@ -1961,7 +1648,7 @@ again:
bio->bi_end_io = scrub_wr_bio_end_io;
bio_set_dev(bio, sbio->dev->bdev);
bio->bi_iter.bi_sector = sbio->physical >> 9;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical_for_dev_replace ||
@@ -2361,7 +2048,7 @@ again:
bio->bi_end_io = scrub_bio_end_io;
bio_set_dev(bio, sbio->dev->bdev);
bio->bi_iter.bi_sector = sbio->physical >> 9;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical ||
@@ -2800,17 +2487,10 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
have_csum = scrub_find_csum(sctx, logical, csum);
if (have_csum == 0)
++sctx->stat.no_csum;
- if (0 && sctx->is_dev_replace && !have_csum) {
- ret = copy_nocow_pages(sctx, logical, l,
- mirror_num,
- physical_for_dev_replace);
- goto behind_scrub_pages;
- }
}
ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
mirror_num, have_csum ? csum : NULL, 0,
physical_for_dev_replace);
-behind_scrub_pages:
if (ret)
return ret;
len -= l;
@@ -3863,7 +3543,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* -> btrfs_scrub_pause()
*/
scrub_pause_on(fs_info);
- ret = btrfs_inc_block_group_ro(fs_info, cache);
+ ret = btrfs_inc_block_group_ro(cache);
if (!ret && is_dev_replace) {
/*
* If we are doing a device replace wait for any tasks
@@ -3982,14 +3662,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (!cache->removed && !cache->ro && cache->reserved == 0 &&
btrfs_block_group_used(&cache->item) == 0) {
spin_unlock(&cache->lock);
- spin_lock(&fs_info->unused_bgs_lock);
- if (list_empty(&cache->bg_list)) {
- btrfs_get_block_group(cache);
- trace_btrfs_add_unused_block_group(cache);
- list_add_tail(&cache->bg_list,
- &fs_info->unused_bgs);
- }
- spin_unlock(&fs_info->unused_bgs_lock);
+ btrfs_mark_bg_unused(cache);
} else {
spin_unlock(&cache->lock);
}
@@ -4072,10 +3745,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
if (!fs_info->scrub_wr_completion_workers)
goto fail_scrub_wr_completion_workers;
- fs_info->scrub_nocow_workers =
- btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0);
- if (!fs_info->scrub_nocow_workers)
- goto fail_scrub_nocow_workers;
fs_info->scrub_parity_workers =
btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
max_active, 2);
@@ -4086,8 +3755,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
return 0;
fail_scrub_parity_workers:
- btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
-fail_scrub_nocow_workers:
btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
fail_scrub_wr_completion_workers:
btrfs_destroy_workqueue(fs_info->scrub_workers);
@@ -4100,7 +3767,6 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
if (--fs_info->scrub_workers_refcnt == 0) {
btrfs_destroy_workqueue(fs_info->scrub_workers);
btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
- btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
}
WARN_ON(fs_info->scrub_workers_refcnt < 0);
@@ -4113,7 +3779,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
struct scrub_ctx *sctx;
int ret;
struct btrfs_device *dev;
- struct rcu_string *name;
if (btrfs_fs_closing(fs_info))
return -EINVAL;
@@ -4167,11 +3832,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
if (!is_dev_replace && !readonly &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- rcu_read_lock();
- name = rcu_dereference(dev->name);
- btrfs_err(fs_info, "scrub: device %s is not writable",
- name->str);
- rcu_read_unlock();
+ btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
+ rcu_str_deref(dev->name));
return -EROFS;
}
@@ -4359,330 +4021,3 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
*extent_dev = bbio->stripes[0].dev;
btrfs_put_bbio(bbio);
}
-
-static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
- int mirror_num, u64 physical_for_dev_replace)
-{
- struct scrub_copy_nocow_ctx *nocow_ctx;
- struct btrfs_fs_info *fs_info = sctx->fs_info;
-
- nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
- if (!nocow_ctx) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.malloc_errors++;
- spin_unlock(&sctx->stat_lock);
- return -ENOMEM;
- }
-
- scrub_pending_trans_workers_inc(sctx);
-
- nocow_ctx->sctx = sctx;
- nocow_ctx->logical = logical;
- nocow_ctx->len = len;
- nocow_ctx->mirror_num = mirror_num;
- nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
- btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
- copy_nocow_pages_worker, NULL, NULL);
- INIT_LIST_HEAD(&nocow_ctx->inodes);
- btrfs_queue_work(fs_info->scrub_nocow_workers,
- &nocow_ctx->work);
-
- return 0;
-}
-
-static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
-{
- struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
- struct scrub_nocow_inode *nocow_inode;
-
- nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
- if (!nocow_inode)
- return -ENOMEM;
- nocow_inode->inum = inum;
- nocow_inode->offset = offset;
- nocow_inode->root = root;
- list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
- return 0;
-}
-
-#define COPY_COMPLETE 1
-
-static void copy_nocow_pages_worker(struct btrfs_work *work)
-{
- struct scrub_copy_nocow_ctx *nocow_ctx =
- container_of(work, struct scrub_copy_nocow_ctx, work);
- struct scrub_ctx *sctx = nocow_ctx->sctx;
- struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct btrfs_root *root = fs_info->extent_root;
- u64 logical = nocow_ctx->logical;
- u64 len = nocow_ctx->len;
- int mirror_num = nocow_ctx->mirror_num;
- u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
- int ret;
- struct btrfs_trans_handle *trans = NULL;
- struct btrfs_path *path;
- int not_written = 0;
-
- path = btrfs_alloc_path();
- if (!path) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.malloc_errors++;
- spin_unlock(&sctx->stat_lock);
- not_written = 1;
- goto out;
- }
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- not_written = 1;
- goto out;
- }
-
- ret = iterate_inodes_from_logical(logical, fs_info, path,
- record_inode_for_nocow, nocow_ctx, false);
- if (ret != 0 && ret != -ENOENT) {
- btrfs_warn(fs_info,
- "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
- logical, physical_for_dev_replace, len, mirror_num,
- ret);
- not_written = 1;
- goto out;
- }
-
- btrfs_end_transaction(trans);
- trans = NULL;
- while (!list_empty(&nocow_ctx->inodes)) {
- struct scrub_nocow_inode *entry;
- entry = list_first_entry(&nocow_ctx->inodes,
- struct scrub_nocow_inode,
- list);
- list_del_init(&entry->list);
- ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
- entry->root, nocow_ctx);
- kfree(entry);
- if (ret == COPY_COMPLETE) {
- ret = 0;
- break;
- } else if (ret) {
- break;
- }
- }
-out:
- while (!list_empty(&nocow_ctx->inodes)) {
- struct scrub_nocow_inode *entry;
- entry = list_first_entry(&nocow_ctx->inodes,
- struct scrub_nocow_inode,
- list);
- list_del_init(&entry->list);
- kfree(entry);
- }
- if (trans && !IS_ERR(trans))
- btrfs_end_transaction(trans);
- if (not_written)
- btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
- num_uncorrectable_read_errors);
-
- btrfs_free_path(path);
- kfree(nocow_ctx);
-
- scrub_pending_trans_workers_dec(sctx);
-}
-
-static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
- u64 logical)
-{
- struct extent_state *cached_state = NULL;
- struct btrfs_ordered_extent *ordered;
- struct extent_io_tree *io_tree;
- struct extent_map *em;
- u64 lockstart = start, lockend = start + len - 1;
- int ret = 0;
-
- io_tree = &inode->io_tree;
-
- lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
- ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
- if (ordered) {
- btrfs_put_ordered_extent(ordered);
- ret = 1;
- goto out_unlock;
- }
-
- em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out_unlock;
- }
-
- /*
- * This extent does not actually cover the logical extent anymore,
- * move on to the next inode.
- */
- if (em->block_start > logical ||
- em->block_start + em->block_len < logical + len ||
- test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
- free_extent_map(em);
- ret = 1;
- goto out_unlock;
- }
- free_extent_map(em);
-
-out_unlock:
- unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
- return ret;
-}
-
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
- struct scrub_copy_nocow_ctx *nocow_ctx)
-{
- struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info;
- struct btrfs_key key;
- struct inode *inode;
- struct page *page;
- struct btrfs_root *local_root;
- struct extent_io_tree *io_tree;
- u64 physical_for_dev_replace;
- u64 nocow_ctx_logical;
- u64 len = nocow_ctx->len;
- unsigned long index;
- int srcu_index;
- int ret = 0;
- int err = 0;
-
- key.objectid = root;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
-
- local_root = btrfs_read_fs_root_no_name(fs_info, &key);
- if (IS_ERR(local_root)) {
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- return PTR_ERR(local_root);
- }
-
- key.type = BTRFS_INODE_ITEM_KEY;
- key.objectid = inum;
- key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- /* Avoid truncate/dio/punch hole.. */
- inode_lock(inode);
- inode_dio_wait(inode);
-
- physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
- io_tree = &BTRFS_I(inode)->io_tree;
- nocow_ctx_logical = nocow_ctx->logical;
-
- ret = check_extent_to_block(BTRFS_I(inode), offset, len,
- nocow_ctx_logical);
- if (ret) {
- ret = ret > 0 ? 0 : ret;
- goto out;
- }
-
- while (len >= PAGE_SIZE) {
- index = offset >> PAGE_SHIFT;
-again:
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
- if (!page) {
- btrfs_err(fs_info, "find_or_create_page() failed");
- ret = -ENOMEM;
- goto out;
- }
-
- if (PageUptodate(page)) {
- if (PageDirty(page))
- goto next_page;
- } else {
- ClearPageError(page);
- err = extent_read_full_page(io_tree, page,
- btrfs_get_extent,
- nocow_ctx->mirror_num);
- if (err) {
- ret = err;
- goto next_page;
- }
-
- lock_page(page);
- /*
- * If the page has been remove from the page cache,
- * the data on it is meaningless, because it may be
- * old one, the new data may be written into the new
- * page in the page cache.
- */
- if (page->mapping != inode->i_mapping) {
- unlock_page(page);
- put_page(page);
- goto again;
- }
- if (!PageUptodate(page)) {
- ret = -EIO;
- goto next_page;
- }
- }
-
- ret = check_extent_to_block(BTRFS_I(inode), offset, len,
- nocow_ctx_logical);
- if (ret) {
- ret = ret > 0 ? 0 : ret;
- goto next_page;
- }
-
- err = write_page_nocow(nocow_ctx->sctx,
- physical_for_dev_replace, page);
- if (err)
- ret = err;
-next_page:
- unlock_page(page);
- put_page(page);
-
- if (ret)
- break;
-
- offset += PAGE_SIZE;
- physical_for_dev_replace += PAGE_SIZE;
- nocow_ctx_logical += PAGE_SIZE;
- len -= PAGE_SIZE;
- }
- ret = COPY_COMPLETE;
-out:
- inode_unlock(inode);
- iput(inode);
- return ret;
-}
-
-static int write_page_nocow(struct scrub_ctx *sctx,
- u64 physical_for_dev_replace, struct page *page)
-{
- struct bio *bio;
- struct btrfs_device *dev;
-
- dev = sctx->wr_tgtdev;
- if (!dev)
- return -EIO;
- if (!dev->bdev) {
- btrfs_warn_rl(dev->fs_info,
- "scrub write_page_nocow(bdev == NULL) is unexpected");
- return -EIO;
- }
- bio = btrfs_io_bio_alloc(1);
- bio->bi_iter.bi_size = 0;
- bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
- bio_set_dev(bio, dev->bdev);
- bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
- /* bio_add_page won't fail on a freshly allocated bio */
- bio_add_page(bio, page, PAGE_SIZE, 0);
-
- if (btrfsic_submit_bio_wait(bio)) {
- bio_put(bio);
- btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
- return -EIO;
- }
-
- bio_put(bio);
- return 0;
-}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c47f62b19226..ba8950bfd9c7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -100,6 +100,7 @@ struct send_ctx {
u64 cur_inode_rdev;
u64 cur_inode_last_extent;
u64 cur_inode_next_write_offset;
+ bool ignore_cur_inode;
u64 send_progress;
@@ -1500,7 +1501,7 @@ static int read_symlink(struct btrfs_root *root,
BUG_ON(compression);
off = btrfs_file_extent_inline_start(ei);
- len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
+ len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
@@ -5006,6 +5007,15 @@ static int send_hole(struct send_ctx *sctx, u64 end)
u64 len;
int ret = 0;
+ /*
+ * A hole that starts at EOF or beyond it. Since we do not yet support
+ * fallocate (for extent preallocation and hole punching), sending a
+ * write of zeroes starting at EOF or beyond would later require issuing
+ * a truncate operation which would undo the write and achieve nothing.
+ */
+ if (offset >= sctx->cur_inode_size)
+ return 0;
+
if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
return send_update_extent(sctx, offset, end - offset);
@@ -5160,7 +5170,7 @@ static int clone_range(struct send_ctx *sctx,
ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
type = btrfs_file_extent_type(leaf, ei);
if (type == BTRFS_FILE_EXTENT_INLINE) {
- ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
+ ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
ext_len = PAGE_ALIGN(ext_len);
} else {
ext_len = btrfs_file_extent_num_bytes(leaf, ei);
@@ -5236,8 +5246,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path->nodes[0], ei);
if (type == BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_inline_len(path->nodes[0],
- path->slots[0], ei);
+ len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
/*
* it is possible the inline item won't cover the whole page,
* but there may be items after this page. Make
@@ -5375,7 +5384,7 @@ static int is_extent_unchanged(struct send_ctx *sctx,
}
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
- right_len = btrfs_file_extent_inline_len(eb, slot, ei);
+ right_len = btrfs_file_extent_ram_bytes(eb, ei);
right_len = PAGE_ALIGN(right_len);
} else {
right_len = btrfs_file_extent_num_bytes(eb, ei);
@@ -5496,8 +5505,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path->nodes[0], fi);
if (type == BTRFS_FILE_EXTENT_INLINE) {
- u64 size = btrfs_file_extent_inline_len(path->nodes[0],
- path->slots[0], fi);
+ u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
extent_end = ALIGN(key.offset + size,
sctx->send_root->fs_info->sectorsize);
} else {
@@ -5560,7 +5568,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) ==
BTRFS_FILE_EXTENT_INLINE) {
- u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
+ u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
extent_end = ALIGN(key.offset + size,
root->fs_info->sectorsize);
@@ -5606,8 +5614,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path->nodes[0], fi);
if (type == BTRFS_FILE_EXTENT_INLINE) {
- u64 size = btrfs_file_extent_inline_len(path->nodes[0],
- path->slots[0], fi);
+ u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
extent_end = ALIGN(key->offset + size,
sctx->send_root->fs_info->sectorsize);
} else {
@@ -5799,6 +5806,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
int pending_move = 0;
int refs_processed = 0;
+ if (sctx->ignore_cur_inode)
+ return 0;
+
ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
&refs_processed);
if (ret < 0)
@@ -5917,6 +5927,93 @@ out:
return ret;
}
+struct parent_paths_ctx {
+ struct list_head *refs;
+ struct send_ctx *sctx;
+};
+
+static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
+ void *ctx)
+{
+ struct parent_paths_ctx *ppctx = ctx;
+
+ return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
+ ppctx->refs);
+}
+
+/*
+ * Issue unlink operations for all paths of the current inode found in the
+ * parent snapshot.
+ */
+static int btrfs_unlink_all_paths(struct send_ctx *sctx)
+{
+ LIST_HEAD(deleted_refs);
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct parent_paths_ctx ctx;
+ int ret;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ ctx.refs = &deleted_refs;
+ ctx.sctx = sctx;
+
+ while (true) {
+ struct extent_buffer *eb = path->nodes[0];
+ int slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(sctx->parent_root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ if (key.objectid != sctx->cur_ino)
+ break;
+ if (key.type != BTRFS_INODE_REF_KEY &&
+ key.type != BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
+ record_parent_ref, &ctx);
+ if (ret < 0)
+ goto out;
+
+ path->slots[0]++;
+ }
+
+ while (!list_empty(&deleted_refs)) {
+ struct recorded_ref *ref;
+
+ ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
+ ret = send_unlink(sctx, ref->full_path);
+ if (ret < 0)
+ goto out;
+ fs_path_free(ref->full_path);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ if (ret)
+ __free_recorded_refs(&deleted_refs);
+ return ret;
+}
+
static int changed_inode(struct send_ctx *sctx,
enum btrfs_compare_tree_result result)
{
@@ -5931,6 +6028,7 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 0;
sctx->cur_inode_last_extent = (u64)-1;
sctx->cur_inode_next_write_offset = 0;
+ sctx->ignore_cur_inode = false;
/*
* Set send_progress to current inode. This will tell all get_cur_xxx
@@ -5971,6 +6069,33 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 1;
}
+ /*
+ * Normally we do not find inodes with a link count of zero (orphans)
+ * because the most common case is to create a snapshot and use it
+ * for a send operation. However other less common use cases involve
+ * using a subvolume and send it after turning it to RO mode just
+ * after deleting all hard links of a file while holding an open
+ * file descriptor against it or turning a RO snapshot into RW mode,
+ * keep an open file descriptor against a file, delete it and then
+ * turn the snapshot back to RO mode before using it for a send
+ * operation. So if we find such cases, ignore the inode and all its
+ * items completely if it's a new inode, or if it's a changed inode
+ * make sure all its previous paths (from the parent snapshot) are all
+ * unlinked and all other the inode items are ignored.
+ */
+ if (result == BTRFS_COMPARE_TREE_NEW ||
+ result == BTRFS_COMPARE_TREE_CHANGED) {
+ u32 nlinks;
+
+ nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
+ if (nlinks == 0) {
+ sctx->ignore_cur_inode = true;
+ if (result == BTRFS_COMPARE_TREE_CHANGED)
+ ret = btrfs_unlink_all_paths(sctx);
+ goto out;
+ }
+ }
+
if (result == BTRFS_COMPARE_TREE_NEW) {
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = 1;
@@ -6309,15 +6434,17 @@ static int changed_cb(struct btrfs_path *left_path,
key->objectid == BTRFS_FREE_SPACE_OBJECTID)
goto out;
- if (key->type == BTRFS_INODE_ITEM_KEY)
+ if (key->type == BTRFS_INODE_ITEM_KEY) {
ret = changed_inode(sctx, result);
- else if (key->type == BTRFS_INODE_REF_KEY ||
- key->type == BTRFS_INODE_EXTREF_KEY)
- ret = changed_ref(sctx, result);
- else if (key->type == BTRFS_XATTR_ITEM_KEY)
- ret = changed_xattr(sctx, result);
- else if (key->type == BTRFS_EXTENT_DATA_KEY)
- ret = changed_extent(sctx, result);
+ } else if (!sctx->ignore_cur_inode) {
+ if (key->type == BTRFS_INODE_REF_KEY ||
+ key->type == BTRFS_INODE_EXTREF_KEY)
+ ret = changed_ref(sctx, result);
+ else if (key->type == BTRFS_XATTR_ITEM_KEY)
+ ret = changed_xattr(sctx, result);
+ else if (key->type == BTRFS_EXTENT_DATA_KEY)
+ ret = changed_extent(sctx, result);
+ }
out:
return ret;
@@ -6328,7 +6455,6 @@ static int full_send_tree(struct send_ctx *sctx)
int ret;
struct btrfs_root *send_root = sctx->send_root;
struct btrfs_key key;
- struct btrfs_key found_key;
struct btrfs_path *path;
struct extent_buffer *eb;
int slot;
@@ -6350,17 +6476,13 @@ static int full_send_tree(struct send_ctx *sctx)
while (1) {
eb = path->nodes[0];
slot = path->slots[0];
- btrfs_item_key_to_cpu(eb, &found_key, slot);
+ btrfs_item_key_to_cpu(eb, &key, slot);
- ret = changed_cb(path, NULL, &found_key,
+ ret = changed_cb(path, NULL, &key,
BTRFS_COMPARE_TREE_NEW, sctx);
if (ret < 0)
goto out;
- key.objectid = found_key.objectid;
- key.type = found_key.type;
- key.offset = found_key.offset + 1;
-
ret = btrfs_next_item(send_root, path);
if (ret < 0)
goto out;
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index b7b4acb12833..4c13b737f568 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -3,7 +3,6 @@
* Copyright (C) 2007 Oracle. All rights reserved.
*/
-#include <linux/highmem.h>
#include <asm/unaligned.h>
#include "ctree.h"
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 81107ad49f3a..6601c9aa5e35 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -5,7 +5,6 @@
#include <linux/blkdev.h>
#include <linux/module.h>
-#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
@@ -15,8 +14,6 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mount.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
@@ -468,9 +465,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_subvolrootid:
case Opt_device:
/*
- * These are parsed by btrfs_parse_subvol_options
- * and btrfs_parse_early_options
- * and can be happily ignored here.
+ * These are parsed by btrfs_parse_subvol_options or
+ * btrfs_parse_device_options and can be ignored here.
*/
break;
case Opt_nodatasum:
@@ -760,6 +756,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_recovery:
btrfs_warn(info,
"'recovery' is deprecated, use 'usebackuproot' instead");
+ /* fall through */
case Opt_usebackuproot:
btrfs_info(info,
"trying to use backup root at mount time");
@@ -885,13 +882,16 @@ out:
* All other options will be parsed on much later in the mount process and
* only when we need to allocate a new super block.
*/
-static int btrfs_parse_early_options(const char *options, fmode_t flags,
- void *holder, struct btrfs_fs_devices **fs_devices)
+static int btrfs_parse_device_options(const char *options, fmode_t flags,
+ void *holder)
{
substring_t args[MAX_OPT_ARGS];
char *device_name, *opts, *orig, *p;
+ struct btrfs_device *device = NULL;
int error = 0;
+ lockdep_assert_held(&uuid_mutex);
+
if (!options)
return 0;
@@ -917,11 +917,13 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
error = -ENOMEM;
goto out;
}
- error = btrfs_scan_one_device(device_name,
- flags, holder, fs_devices);
+ device = btrfs_scan_one_device(device_name, flags,
+ holder);
kfree(device_name);
- if (error)
+ if (IS_ERR(device)) {
+ error = PTR_ERR(device);
goto out;
+ }
}
}
@@ -935,8 +937,8 @@ out:
*
* The value is later passed to mount_subvol()
*/
-static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
- char **subvol_name, u64 *subvol_objectid)
+static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
+ u64 *subvol_objectid)
{
substring_t args[MAX_OPT_ARGS];
char *opts, *orig, *p;
@@ -948,7 +950,7 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
/*
* strsep changes the string, duplicate it because
- * btrfs_parse_early_options gets called later
+ * btrfs_parse_device_options gets called later
*/
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
@@ -1517,6 +1519,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
{
struct block_device *bdev = NULL;
struct super_block *s;
+ struct btrfs_device *device = NULL;
struct btrfs_fs_devices *fs_devices = NULL;
struct btrfs_fs_info *fs_info = NULL;
struct security_mnt_opts new_sec_opts;
@@ -1526,12 +1529,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
- error = btrfs_parse_early_options(data, mode, fs_type,
- &fs_devices);
- if (error) {
- return ERR_PTR(error);
- }
-
security_init_mnt_opts(&new_sec_opts);
if (data) {
error = parse_security_options(data, &new_sec_opts);
@@ -1539,10 +1536,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
return ERR_PTR(error);
}
- error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
- if (error)
- goto error_sec_opts;
-
/*
* Setup a dummy root and fs_info for test/set super. This is because
* we don't actually fill this stuff out until open_ctree, but we need
@@ -1555,8 +1548,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
goto error_sec_opts;
}
- fs_info->fs_devices = fs_devices;
-
fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
security_init_mnt_opts(&fs_info->security_opts);
@@ -1565,7 +1556,25 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
goto error_fs_info;
}
+ mutex_lock(&uuid_mutex);
+ error = btrfs_parse_device_options(data, mode, fs_type);
+ if (error) {
+ mutex_unlock(&uuid_mutex);
+ goto error_fs_info;
+ }
+
+ device = btrfs_scan_one_device(device_name, mode, fs_type);
+ if (IS_ERR(device)) {
+ mutex_unlock(&uuid_mutex);
+ error = PTR_ERR(device);
+ goto error_fs_info;
+ }
+
+ fs_devices = device->fs_devices;
+ fs_info->fs_devices = fs_devices;
+
error = btrfs_open_devices(fs_devices, mode, fs_type);
+ mutex_unlock(&uuid_mutex);
if (error)
goto error_fs_info;
@@ -1650,8 +1659,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
- error = btrfs_parse_subvol_options(data, mode,
- &subvol_name, &subvol_objectid);
+ error = btrfs_parse_subvol_options(data, &subvol_name,
+ &subvol_objectid);
if (error) {
kfree(subvol_name);
return ERR_PTR(error);
@@ -2098,14 +2107,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
btrfs_account_ro_block_groups_free_space(found);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
- if (!list_empty(&found->block_groups[i])) {
- switch (i) {
- case BTRFS_RAID_DUP:
- case BTRFS_RAID_RAID1:
- case BTRFS_RAID_RAID10:
- factor = 2;
- }
- }
+ if (!list_empty(&found->block_groups[i]))
+ factor = btrfs_bg_type_to_factor(
+ btrfs_raid_array[i].bg_flag);
}
}
@@ -2222,7 +2226,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct btrfs_ioctl_vol_args *vol;
- struct btrfs_fs_devices *fs_devices;
+ struct btrfs_device *device = NULL;
int ret = -ENOTTY;
if (!capable(CAP_SYS_ADMIN))
@@ -2234,15 +2238,24 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
- ret = btrfs_scan_one_device(vol->name, FMODE_READ,
- &btrfs_root_fs_type, &fs_devices);
+ mutex_lock(&uuid_mutex);
+ device = btrfs_scan_one_device(vol->name, FMODE_READ,
+ &btrfs_root_fs_type);
+ ret = PTR_ERR_OR_ZERO(device);
+ mutex_unlock(&uuid_mutex);
break;
case BTRFS_IOC_DEVICES_READY:
- ret = btrfs_scan_one_device(vol->name, FMODE_READ,
- &btrfs_root_fs_type, &fs_devices);
- if (ret)
+ mutex_lock(&uuid_mutex);
+ device = btrfs_scan_one_device(vol->name, FMODE_READ,
+ &btrfs_root_fs_type);
+ if (IS_ERR(device)) {
+ mutex_unlock(&uuid_mutex);
+ ret = PTR_ERR(device);
break;
- ret = !(fs_devices->num_devices == fs_devices->total_devices);
+ }
+ ret = !(device->fs_devices->num_devices ==
+ device->fs_devices->total_devices);
+ mutex_unlock(&uuid_mutex);
break;
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
ret = btrfs_ioctl_get_supported_features((void __user*)arg);
@@ -2290,7 +2303,6 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
struct btrfs_fs_devices *cur_devices;
struct btrfs_device *dev, *first_dev = NULL;
struct list_head *head;
- struct rcu_string *name;
/*
* Lightweight locking of the devices. We should not need
@@ -2314,12 +2326,10 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
cur_devices = cur_devices->seed;
}
- if (first_dev) {
- name = rcu_dereference(first_dev->name);
- seq_escape(m, name->str, " \t\n\\");
- } else {
+ if (first_dev)
+ seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
+ else
WARN_ON(1);
- }
rcu_read_unlock();
return 0;
}
@@ -2331,7 +2341,6 @@ static const struct super_operations btrfs_super_ops = {
.sync_fs = btrfs_sync_fs,
.show_options = btrfs_show_options,
.show_devname = btrfs_show_devname,
- .write_inode = btrfs_write_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,
@@ -2369,7 +2378,7 @@ static __cold void btrfs_interface_exit(void)
static void __init btrfs_print_mod_info(void)
{
- pr_info("Btrfs loaded, crc32c=%s"
+ static const char options[] = ""
#ifdef CONFIG_BTRFS_DEBUG
", debug=on"
#endif
@@ -2382,8 +2391,8 @@ static void __init btrfs_print_mod_info(void)
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
", ref-verify=on"
#endif
- "\n",
- crc32c_impl());
+ ;
+ pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
}
static int __init init_btrfs_fs(void)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4a4e960c7c66..3717c864ba23 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -7,10 +7,8 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
-#include <linux/buffer_head.h>
#include <linux/kobject.h>
#include <linux/bug.h>
-#include <linux/genhd.h>
#include <linux/debugfs.h>
#include "ctree.h"
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index ace94db09d29..412b910b04cc 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -216,7 +216,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
btrfs_init_dummy_trans(&trans, fs_info);
test_msg("qgroup basic add");
- ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID);
+ ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID);
if (ret) {
test_err("couldn't create a qgroup %d", ret);
return ret;
@@ -249,8 +249,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return ret;
@@ -285,8 +285,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return -EINVAL;
@@ -322,7 +322,7 @@ static int test_multiple_refs(struct btrfs_root *root,
* We have BTRFS_FS_TREE_OBJECTID created already from the
* previous test.
*/
- ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID);
+ ret = btrfs_create_qgroup(&trans, BTRFS_FIRST_FREE_OBJECTID);
if (ret) {
test_err("couldn't create a qgroup %d", ret);
return ret;
@@ -350,8 +350,8 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return ret;
@@ -385,8 +385,8 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return ret;
@@ -426,8 +426,8 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ff5f6c719976..3b84f5015029 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -241,7 +241,7 @@ loop:
refcount_set(&cur_trans->use_count, 2);
atomic_set(&cur_trans->pending_ordered, 0);
cur_trans->flags = 0;
- cur_trans->start_time = get_seconds();
+ cur_trans->start_time = ktime_get_seconds();
memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
@@ -680,7 +680,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
trans = start_transaction(root, 0, TRANS_ATTACH,
BTRFS_RESERVE_NO_FLUSH, true);
- if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
+ if (trans == ERR_PTR(-ENOENT))
btrfs_wait_for_commit(root->fs_info, 0);
return trans;
@@ -1152,7 +1152,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
ret = btrfs_run_dev_replace(trans, fs_info);
if (ret)
return ret;
- ret = btrfs_run_qgroups(trans, fs_info);
+ ret = btrfs_run_qgroups(trans);
if (ret)
return ret;
@@ -1355,8 +1355,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
goto out;
/* Now qgroup are all updated, we can inherit it to new qgroups */
- ret = btrfs_qgroup_inherit(trans, fs_info,
- src->root_key.objectid, dst_objectid,
+ ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid,
inherit);
if (ret < 0)
goto out;
@@ -1574,7 +1573,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/*
* insert root back/forward references
*/
- ret = btrfs_add_root_ref(trans, fs_info, objectid,
+ ret = btrfs_add_root_ref(trans, objectid,
parent_root->root_key.objectid,
btrfs_ino(BTRFS_I(parent_inode)), index,
dentry->d_name.name, dentry->d_name.len);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94439482a0ec..4cbb1b55387d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -48,7 +48,7 @@ struct btrfs_transaction {
int aborted;
struct list_head list;
struct extent_io_tree dirty_pages;
- unsigned long start_time;
+ time64_t start_time;
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
wait_queue_head_t pending_wait;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 8d40e7dd8c30..db835635372f 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -19,6 +19,7 @@
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
+#include "volumes.h"
/*
* Error message should follow the following format:
@@ -353,6 +354,102 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
return 0;
}
+__printf(4, 5)
+__cold
+static void block_group_err(const struct btrfs_fs_info *fs_info,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+ key.objectid, key.offset, &vaf);
+ va_end(args);
+}
+
+static int check_block_group_item(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_block_group_item bgi;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u64 flags;
+ u64 type;
+
+ /*
+ * Here we don't really care about alignment since extent allocator can
+ * handle it. We care more about the size, as if one block group is
+ * larger than maximum size, it's must be some obvious corruption.
+ */
+ if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group size, have %llu expect (0, %llu]",
+ key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
+ return -EUCLEAN;
+ }
+
+ if (item_size != sizeof(bgi)) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid item size, have %u expect %zu",
+ item_size, sizeof(bgi));
+ return -EUCLEAN;
+ }
+
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bgi));
+ if (btrfs_block_group_chunk_objectid(&bgi) !=
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group chunk objectid, have %llu expect %llu",
+ btrfs_block_group_chunk_objectid(&bgi),
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ return -EUCLEAN;
+ }
+
+ if (btrfs_block_group_used(&bgi) > key->offset) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group used, have %llu expect [0, %llu)",
+ btrfs_block_group_used(&bgi), key->offset);
+ return -EUCLEAN;
+ }
+
+ flags = btrfs_block_group_flags(&bgi);
+ if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
+ block_group_err(fs_info, leaf, slot,
+"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
+ flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
+ hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
+ return -EUCLEAN;
+ }
+
+ type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ if (type != BTRFS_BLOCK_GROUP_DATA &&
+ type != BTRFS_BLOCK_GROUP_METADATA &&
+ type != BTRFS_BLOCK_GROUP_SYSTEM &&
+ type != (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DATA)) {
+ block_group_err(fs_info, leaf, slot,
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
+ type, hweight64(type),
+ BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
+ return -EUCLEAN;
+ }
+ return 0;
+}
+
/*
* Common point to switch the item-specific validation.
*/
@@ -374,6 +471,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info,
case BTRFS_XATTR_ITEM_KEY:
ret = check_dir_item(fs_info, leaf, key, slot);
break;
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ ret = check_block_group_item(fs_info, leaf, key, slot);
+ break;
}
return ret;
}
@@ -396,9 +496,22 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
* skip this check for relocation trees.
*/
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+ u64 owner = btrfs_header_owner(leaf);
struct btrfs_root *check_root;
- key.objectid = btrfs_header_owner(leaf);
+ /* These trees must never be empty */
+ if (owner == BTRFS_ROOT_TREE_OBJECTID ||
+ owner == BTRFS_CHUNK_TREE_OBJECTID ||
+ owner == BTRFS_EXTENT_TREE_OBJECTID ||
+ owner == BTRFS_DEV_TREE_OBJECTID ||
+ owner == BTRFS_FS_TREE_OBJECTID ||
+ owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ generic_err(fs_info, leaf, 0,
+ "invalid root, root %llu must never be empty",
+ owner);
+ return -EUCLEAN;
+ }
+ key.objectid = owner;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f8220ec02036..1650dc44a5e3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -545,12 +545,8 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
- if (IS_ERR(inode)) {
+ if (IS_ERR(inode))
inode = NULL;
- } else if (is_bad_inode(inode)) {
- iput(inode);
- inode = NULL;
- }
return inode;
}
@@ -597,7 +593,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
nbytes = 0;
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- size = btrfs_file_extent_inline_len(eb, slot, item);
+ size = btrfs_file_extent_ram_bytes(eb, item);
nbytes = btrfs_file_extent_ram_bytes(eb, item);
extent_end = ALIGN(start + size,
fs_info->sectorsize);
@@ -685,7 +681,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
* as the owner of the file extent changed from log tree
* (doesn't affect qgroup) to fs/file tree(affects qgroup)
*/
- ret = btrfs_qgroup_trace_extent(trans, fs_info,
+ ret = btrfs_qgroup_trace_extent(trans,
btrfs_file_extent_disk_bytenr(eb, item),
btrfs_file_extent_disk_num_bytes(eb, item),
GFP_NOFS);
@@ -715,7 +711,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
* allocation tree
*/
ret = btrfs_alloc_logged_file_extent(trans,
- fs_info,
root->root_key.objectid,
key->objectid, offset, &ins);
if (ret)
@@ -1291,6 +1286,46 @@ again:
return ret;
}
+static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir,
+ const u8 ref_type, const char *name,
+ const int namelen)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ const u64 parent_id = btrfs_ino(BTRFS_I(dir));
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = btrfs_ino(BTRFS_I(inode));
+ key.type = ref_type;
+ if (key.type == BTRFS_INODE_REF_KEY)
+ key.offset = parent_id;
+ else
+ key.offset = btrfs_extref_hash(parent_id, name, namelen);
+
+ ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ if (key.type == BTRFS_INODE_EXTREF_KEY)
+ ret = btrfs_find_name_in_ext_backref(path->nodes[0],
+ path->slots[0], parent_id,
+ name, namelen, NULL);
+ else
+ ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+ name, namelen, NULL);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* replay one inode back reference item found in the log tree.
* eb, slot and key refer to the buffer and key found in the log tree.
@@ -1400,6 +1435,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
}
+ /*
+ * If a reference item already exists for this inode
+ * with the same parent and name, but different index,
+ * drop it and the corresponding directory index entries
+ * from the parent before adding the new reference item
+ * and dir index entries, otherwise we would fail with
+ * -EEXIST returned from btrfs_add_link() below.
+ */
+ ret = btrfs_inode_ref_exists(inode, dir, key->type,
+ name, namelen);
+ if (ret > 0) {
+ ret = btrfs_unlink_inode(trans, root,
+ BTRFS_I(dir),
+ BTRFS_I(inode),
+ name, namelen);
+ /*
+ * If we dropped the link count to 0, bump it so
+ * that later the iput() on the inode will not
+ * free it. We will fixup the link count later.
+ */
+ if (!ret && inode->i_nlink == 0)
+ inc_nlink(inode);
+ }
+ if (ret < 0)
+ goto out;
+
/* insert our name */
ret = btrfs_add_link(trans, BTRFS_I(dir),
BTRFS_I(inode),
@@ -2120,7 +2181,7 @@ again:
dir_key->offset,
name, name_len, 0);
}
- if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) {
+ if (!log_di || log_di == ERR_PTR(-ENOENT)) {
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
@@ -2933,7 +2994,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
/* bail out if we need to do a full commit */
if (btrfs_need_log_full_commit(fs_info, trans)) {
ret = -EAGAIN;
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&root->log_mutex);
goto out;
}
@@ -2951,7 +3011,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (ret) {
blk_finish_plug(&plug);
btrfs_abort_transaction(trans, ret);
- btrfs_free_logged_extents(log, log_transid);
btrfs_set_log_full_commit(fs_info, trans);
mutex_unlock(&root->log_mutex);
goto out;
@@ -3002,7 +3061,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out;
}
btrfs_wait_tree_log_extents(log, mark);
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out;
@@ -3020,7 +3078,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (atomic_read(&log_root_tree->log_commit[index2])) {
blk_finish_plug(&plug);
ret = btrfs_wait_tree_log_extents(log, mark);
- btrfs_wait_logged_extents(trans, log, log_transid);
wait_log_commit(log_root_tree,
root_log_ctx.log_transid);
mutex_unlock(&log_root_tree->log_mutex);
@@ -3045,7 +3102,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (btrfs_need_log_full_commit(fs_info, trans)) {
blk_finish_plug(&plug);
btrfs_wait_tree_log_extents(log, mark);
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out_wake_log_root;
@@ -3058,7 +3114,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_set_log_full_commit(fs_info, trans);
btrfs_abort_transaction(trans, ret);
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
@@ -3068,11 +3123,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
EXTENT_NEW | EXTENT_DIRTY);
if (ret) {
btrfs_set_log_full_commit(fs_info, trans);
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
- btrfs_wait_logged_extents(trans, log, log_transid);
btrfs_set_super_log_root(fs_info->super_for_commit,
log_root_tree->node->start);
@@ -3159,14 +3212,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
}
- /*
- * We may have short-circuited the log tree with the full commit logic
- * and left ordered extents on our list, so clear these out to keep us
- * from leaking inodes and memory.
- */
- btrfs_free_logged_extents(log, 0);
- btrfs_free_logged_extents(log, 1);
-
free_extent_buffer(log->node);
kfree(log);
}
@@ -3756,7 +3801,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
int start_slot, int nr, int inode_only,
u64 logged_isize)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
unsigned long src_offset;
unsigned long dst_offset;
struct btrfs_root *log = inode->root->log_root;
@@ -3937,9 +3982,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(src, extent) ==
BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_inline_len(src,
- src_path->slots[0],
- extent);
+ len = btrfs_file_extent_ram_bytes(src, extent);
*last_extent = ALIGN(key.offset + len,
fs_info->sectorsize);
} else {
@@ -4004,7 +4047,7 @@ fill_holes:
extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(src, extent) ==
BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_inline_len(src, i, extent);
+ len = btrfs_file_extent_ram_bytes(src, extent);
extent_end = ALIGN(key.offset + len,
fs_info->sectorsize);
} else {
@@ -4078,131 +4121,32 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
return 0;
}
-static int wait_ordered_extents(struct btrfs_trans_handle *trans,
- struct inode *inode,
- struct btrfs_root *root,
- const struct extent_map *em,
- const struct list_head *logged_list,
- bool *ordered_io_error)
+static int log_extent_csums(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_root *log_root,
+ const struct extent_map *em)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_ordered_extent *ordered;
- struct btrfs_root *log = root->log_root;
- u64 mod_start = em->mod_start;
- u64 mod_len = em->mod_len;
- const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
u64 csum_offset;
u64 csum_len;
LIST_HEAD(ordered_sums);
int ret = 0;
- *ordered_io_error = false;
-
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
+ if (inode->flags & BTRFS_INODE_NODATASUM ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
em->block_start == EXTENT_MAP_HOLE)
return 0;
- /*
- * Wait far any ordered extent that covers our extent map. If it
- * finishes without an error, first check and see if our csums are on
- * our outstanding ordered extents.
- */
- list_for_each_entry(ordered, logged_list, log_list) {
- struct btrfs_ordered_sum *sum;
-
- if (!mod_len)
- break;
-
- if (ordered->file_offset + ordered->len <= mod_start ||
- mod_start + mod_len <= ordered->file_offset)
- continue;
-
- if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
- const u64 start = ordered->file_offset;
- const u64 end = ordered->file_offset + ordered->len - 1;
-
- WARN_ON(ordered->inode != inode);
- filemap_fdatawrite_range(inode->i_mapping, start, end);
- }
-
- wait_event(ordered->wait,
- (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) ||
- test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)));
-
- if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) {
- /*
- * Clear the AS_EIO/AS_ENOSPC flags from the inode's
- * i_mapping flags, so that the next fsync won't get
- * an outdated io error too.
- */
- filemap_check_errors(inode->i_mapping);
- *ordered_io_error = true;
- break;
- }
- /*
- * We are going to copy all the csums on this ordered extent, so
- * go ahead and adjust mod_start and mod_len in case this
- * ordered extent has already been logged.
- */
- if (ordered->file_offset > mod_start) {
- if (ordered->file_offset + ordered->len >=
- mod_start + mod_len)
- mod_len = ordered->file_offset - mod_start;
- /*
- * If we have this case
- *
- * |--------- logged extent ---------|
- * |----- ordered extent ----|
- *
- * Just don't mess with mod_start and mod_len, we'll
- * just end up logging more csums than we need and it
- * will be ok.
- */
- } else {
- if (ordered->file_offset + ordered->len <
- mod_start + mod_len) {
- mod_len = (mod_start + mod_len) -
- (ordered->file_offset + ordered->len);
- mod_start = ordered->file_offset +
- ordered->len;
- } else {
- mod_len = 0;
- }
- }
-
- if (skip_csum)
- continue;
-
- /*
- * To keep us from looping for the above case of an ordered
- * extent that falls inside of the logged extent.
- */
- if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
- &ordered->flags))
- continue;
-
- list_for_each_entry(sum, &ordered->list, list) {
- ret = btrfs_csum_file_blocks(trans, log, sum);
- if (ret)
- break;
- }
- }
-
- if (*ordered_io_error || !mod_len || ret || skip_csum)
- return ret;
-
+ /* If we're compressed we have to save the entire range of csums. */
if (em->compress_type) {
csum_offset = 0;
csum_len = max(em->block_len, em->orig_block_len);
} else {
- csum_offset = mod_start - em->start;
- csum_len = mod_len;
+ csum_offset = em->mod_start - em->start;
+ csum_len = em->mod_len;
}
/* block start is already adjusted for the file extent offset. */
- ret = btrfs_lookup_csums_range(fs_info->csum_root,
+ ret = btrfs_lookup_csums_range(trans->fs_info->csum_root,
em->block_start + csum_offset,
em->block_start + csum_offset +
csum_len - 1, &ordered_sums, 0);
@@ -4214,7 +4158,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = btrfs_csum_file_blocks(trans, log, sums);
+ ret = btrfs_csum_file_blocks(trans, log_root, sums);
list_del(&sums->list);
kfree(sums);
}
@@ -4226,7 +4170,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, struct btrfs_root *root,
const struct extent_map *em,
struct btrfs_path *path,
- const struct list_head *logged_list,
struct btrfs_log_ctx *ctx)
{
struct btrfs_root *log = root->log_root;
@@ -4238,18 +4181,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
u64 block_len;
int ret;
int extent_inserted = 0;
- bool ordered_io_err = false;
- ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em,
- logged_list, &ordered_io_err);
+ ret = log_extent_csums(trans, inode, log, em);
if (ret)
return ret;
- if (ordered_io_err) {
- ctx->io_err = -EIO;
- return ctx->io_err;
- }
-
btrfs_init_map_token(&token);
ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
@@ -4424,7 +4360,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode,
struct btrfs_path *path,
- struct list_head *logged_list,
struct btrfs_log_ctx *ctx,
const u64 start,
const u64 end)
@@ -4480,20 +4415,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
}
list_sort(NULL, &extents, extent_cmp);
- btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
- /*
- * Some ordered extents started by fsync might have completed
- * before we could collect them into the list logged_list, which
- * means they're gone, not in our logged_list nor in the inode's
- * ordered tree. We want the application/user space to know an
- * error happened while attempting to persist file data so that
- * it can take proper action. If such error happened, we leave
- * without writing to the log tree and the fsync must report the
- * file data write error and not commit the current transaction.
- */
- ret = filemap_check_errors(inode->vfs_inode.i_mapping);
- if (ret)
- ctx->io_err = ret;
process:
while (!list_empty(&extents)) {
em = list_entry(extents.next, struct extent_map, list);
@@ -4512,8 +4433,7 @@ process:
write_unlock(&tree->lock);
- ret = log_one_extent(trans, inode, root, em, path, logged_list,
- ctx);
+ ret = log_one_extent(trans, inode, root, em, path, ctx);
write_lock(&tree->lock);
clear_em_logging(tree, em);
free_extent_map(em);
@@ -4712,9 +4632,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
if (btrfs_file_extent_type(leaf, extent) ==
BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_inline_len(leaf,
- path->slots[0],
- extent);
+ len = btrfs_file_extent_ram_bytes(leaf, extent);
ASSERT(len == i_size ||
(len == fs_info->sectorsize &&
btrfs_file_extent_compression(leaf, extent) !=
@@ -4898,7 +4816,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key min_key;
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
- LIST_HEAD(logged_list);
u64 last_extent = 0;
int err = 0;
int ret;
@@ -5094,8 +5011,7 @@ again:
* we don't need to do more work nor fallback to
* a transaction commit.
*/
- if (IS_ERR(other_inode) &&
- PTR_ERR(other_inode) == -ENOENT) {
+ if (other_inode == ERR_PTR(-ENOENT)) {
goto next_key;
} else if (IS_ERR(other_inode)) {
err = PTR_ERR(other_inode);
@@ -5235,7 +5151,7 @@ log_extents:
}
if (fast_search) {
ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
- &logged_list, ctx, start, end);
+ ctx, start, end);
if (ret) {
err = ret;
goto out_unlock;
@@ -5286,10 +5202,6 @@ log_extents:
inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock);
out_unlock:
- if (unlikely(err))
- btrfs_put_logged_extents(&logged_list);
- else
- btrfs_submit_logged_extents(&logged_list, log);
mutex_unlock(&inode->log_mutex);
btrfs_free_path(path);
@@ -5585,7 +5497,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
@@ -6120,7 +6032,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
/*
* this will force the logging code to walk the dentry chain
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1da162928d1a..da86706123ff 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -8,15 +8,12 @@
#include <linux/slab.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/semaphore.h>
#include <linux/uuid.h>
#include <linux/list_sort.h>
-#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -634,44 +631,48 @@ static void pending_bios_fn(struct btrfs_work *work)
* devices.
*/
static void btrfs_free_stale_devices(const char *path,
- struct btrfs_device *skip_dev)
+ struct btrfs_device *skip_device)
{
- struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
- struct btrfs_device *dev, *tmp_dev;
+ struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
+ struct btrfs_device *device, *tmp_device;
- list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) {
-
- if (fs_devs->opened)
+ list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
+ mutex_lock(&fs_devices->device_list_mutex);
+ if (fs_devices->opened) {
+ mutex_unlock(&fs_devices->device_list_mutex);
continue;
+ }
- list_for_each_entry_safe(dev, tmp_dev,
- &fs_devs->devices, dev_list) {
+ list_for_each_entry_safe(device, tmp_device,
+ &fs_devices->devices, dev_list) {
int not_found = 0;
- if (skip_dev && skip_dev == dev)
+ if (skip_device && skip_device == device)
continue;
- if (path && !dev->name)
+ if (path && !device->name)
continue;
rcu_read_lock();
if (path)
- not_found = strcmp(rcu_str_deref(dev->name),
+ not_found = strcmp(rcu_str_deref(device->name),
path);
rcu_read_unlock();
if (not_found)
continue;
/* delete the stale device */
- if (fs_devs->num_devices == 1) {
- btrfs_sysfs_remove_fsid(fs_devs);
- list_del(&fs_devs->fs_list);
- free_fs_devices(fs_devs);
+ fs_devices->num_devices--;
+ list_del(&device->dev_list);
+ btrfs_free_device(device);
+
+ if (fs_devices->num_devices == 0)
break;
- } else {
- fs_devs->num_devices--;
- list_del(&dev->dev_list);
- btrfs_free_device(dev);
- }
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+ if (fs_devices->num_devices == 0) {
+ btrfs_sysfs_remove_fsid(fs_devices);
+ list_del(&fs_devices->fs_list);
+ free_fs_devices(fs_devices);
}
}
}
@@ -750,7 +751,8 @@ error_brelse:
* error pointer when failed
*/
static noinline struct btrfs_device *device_list_add(const char *path,
- struct btrfs_super_block *disk_super)
+ struct btrfs_super_block *disk_super,
+ bool *new_device_added)
{
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices;
@@ -764,21 +766,26 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (IS_ERR(fs_devices))
return ERR_CAST(fs_devices);
+ mutex_lock(&fs_devices->device_list_mutex);
list_add(&fs_devices->fs_list, &fs_uuids);
device = NULL;
} else {
+ mutex_lock(&fs_devices->device_list_mutex);
device = find_device(fs_devices, devid,
disk_super->dev_item.uuid);
}
if (!device) {
- if (fs_devices->opened)
+ if (fs_devices->opened) {
+ mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EBUSY);
+ }
device = btrfs_alloc_device(NULL, &devid,
disk_super->dev_item.uuid);
if (IS_ERR(device)) {
+ mutex_unlock(&fs_devices->device_list_mutex);
/* we can safely leave the fs_devices entry around */
return device;
}
@@ -786,17 +793,16 @@ static noinline struct btrfs_device *device_list_add(const char *path,
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
btrfs_free_device(device);
+ mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-ENOMEM);
}
rcu_assign_pointer(device->name, name);
- mutex_lock(&fs_devices->device_list_mutex);
list_add_rcu(&device->dev_list, &fs_devices->devices);
fs_devices->num_devices++;
- mutex_unlock(&fs_devices->device_list_mutex);
device->fs_devices = fs_devices;
- btrfs_free_stale_devices(path, device);
+ *new_device_added = true;
if (disk_super->label[0])
pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
@@ -840,12 +846,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
* with larger generation number or the last-in if
* generation are equal.
*/
+ mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EEXIST);
}
name = rcu_string_strdup(path, GFP_NOFS);
- if (!name)
+ if (!name) {
+ mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-ENOMEM);
+ }
rcu_string_free(device->name);
rcu_assign_pointer(device->name, name);
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
@@ -865,6 +874,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
fs_devices->total_devices = btrfs_super_num_devices(disk_super);
+ mutex_unlock(&fs_devices->device_list_mutex);
return device;
}
@@ -1004,7 +1014,7 @@ static void btrfs_close_bdev(struct btrfs_device *device)
blkdev_put(device->bdev, device->mode);
}
-static void btrfs_prepare_close_one_device(struct btrfs_device *device)
+static void btrfs_close_one_device(struct btrfs_device *device)
{
struct btrfs_fs_devices *fs_devices = device->fs_devices;
struct btrfs_device *new_device;
@@ -1022,6 +1032,8 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
fs_devices->missing_devices--;
+ btrfs_close_bdev(device);
+
new_device = btrfs_alloc_device(NULL, &device->devid,
device->uuid);
BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
@@ -1035,39 +1047,23 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
list_replace_rcu(&device->dev_list, &new_device->dev_list);
new_device->fs_devices = device->fs_devices;
+
+ call_rcu(&device->rcu, free_device_rcu);
}
static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device, *tmp;
- struct list_head pending_put;
-
- INIT_LIST_HEAD(&pending_put);
if (--fs_devices->opened > 0)
return 0;
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
- btrfs_prepare_close_one_device(device);
- list_add(&device->dev_list, &pending_put);
+ btrfs_close_one_device(device);
}
mutex_unlock(&fs_devices->device_list_mutex);
- /*
- * btrfs_show_devname() is using the device_list_mutex,
- * sometimes call to blkdev_put() leads vfs calling
- * into this func. So do put outside of device_list_mutex,
- * as of now.
- */
- while (!list_empty(&pending_put)) {
- device = list_first_entry(&pending_put,
- struct btrfs_device, dev_list);
- list_del(&device->dev_list);
- btrfs_close_bdev(device);
- call_rcu(&device->rcu, free_device_rcu);
- }
-
WARN_ON(fs_devices->open_devices);
WARN_ON(fs_devices->rw_devices);
fs_devices->opened = 0;
@@ -1146,7 +1142,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
{
int ret;
- mutex_lock(&uuid_mutex);
+ lockdep_assert_held(&uuid_mutex);
+
mutex_lock(&fs_devices->device_list_mutex);
if (fs_devices->opened) {
fs_devices->opened++;
@@ -1156,7 +1153,6 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
ret = open_fs_devices(fs_devices, flags, holder);
}
mutex_unlock(&fs_devices->device_list_mutex);
- mutex_unlock(&uuid_mutex);
return ret;
}
@@ -1217,16 +1213,18 @@ static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
* and we are not allowed to call set_blocksize during the scan. The superblock
* is read via pagecache
*/
-int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
- struct btrfs_fs_devices **fs_devices_ret)
+struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
+ void *holder)
{
struct btrfs_super_block *disk_super;
- struct btrfs_device *device;
+ bool new_device_added = false;
+ struct btrfs_device *device = NULL;
struct block_device *bdev;
struct page *page;
- int ret = 0;
u64 bytenr;
+ lockdep_assert_held(&uuid_mutex);
+
/*
* we would like to check all the supers, but that would make
* a btrfs mount succeed after a mkfs from a different FS.
@@ -1238,112 +1236,25 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
bdev = blkdev_get_by_path(path, flags, holder);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ return ERR_CAST(bdev);
if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
- ret = -EINVAL;
+ device = ERR_PTR(-EINVAL);
goto error_bdev_put;
}
- mutex_lock(&uuid_mutex);
- device = device_list_add(path, disk_super);
- if (IS_ERR(device))
- ret = PTR_ERR(device);
- else
- *fs_devices_ret = device->fs_devices;
- mutex_unlock(&uuid_mutex);
+ device = device_list_add(path, disk_super, &new_device_added);
+ if (!IS_ERR(device)) {
+ if (new_device_added)
+ btrfs_free_stale_devices(path, device);
+ }
btrfs_release_disk_super(page);
error_bdev_put:
blkdev_put(bdev, flags);
- return ret;
-}
-
-/* helper to account the used device space in the range */
-int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
- u64 end, u64 *length)
-{
- struct btrfs_key key;
- struct btrfs_root *root = device->fs_info->dev_root;
- struct btrfs_dev_extent *dev_extent;
- struct btrfs_path *path;
- u64 extent_end;
- int ret;
- int slot;
- struct extent_buffer *l;
-
- *length = 0;
-
- if (start >= device->total_bytes ||
- test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
- return 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = READA_FORWARD;
-
- key.objectid = device->devid;
- key.offset = start;
- key.type = BTRFS_DEV_EXTENT_KEY;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = btrfs_previous_item(root, path, key.objectid, key.type);
- if (ret < 0)
- goto out;
- }
-
- while (1) {
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto out;
-
- break;
- }
- btrfs_item_key_to_cpu(l, &key, slot);
-
- if (key.objectid < device->devid)
- goto next;
-
- if (key.objectid > device->devid)
- break;
-
- if (key.type != BTRFS_DEV_EXTENT_KEY)
- goto next;
-
- dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
- extent_end = key.offset + btrfs_dev_extent_length(l,
- dev_extent);
- if (key.offset <= start && extent_end > end) {
- *length = end - start + 1;
- break;
- } else if (key.offset <= start && extent_end > start)
- *length += extent_end - start;
- else if (key.offset > start && extent_end <= end)
- *length += extent_end - key.offset;
- else if (key.offset > start && key.offset <= end) {
- *length += end - key.offset + 1;
- break;
- } else if (key.offset > end)
- break;
-
-next:
- path->slots[0]++;
- }
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
+ return device;
}
static int contains_pending_extent(struct btrfs_transaction *transaction,
@@ -1755,10 +1666,8 @@ error:
* the btrfs_device struct should be fully filled in
*/
static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_device *device)
{
- struct btrfs_root *root = fs_info->chunk_root;
int ret;
struct btrfs_path *path;
struct btrfs_dev_item *dev_item;
@@ -1774,8 +1683,8 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(*dev_item));
+ ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
+ &key, sizeof(*dev_item));
if (ret)
goto out;
@@ -1800,7 +1709,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
ptr = btrfs_device_uuid(dev_item);
write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
ptr = btrfs_device_fsid(dev_item);
- write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE);
+ write_extent_buffer(leaf, trans->fs_info->fsid, ptr, BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(leaf);
ret = 0;
@@ -1924,9 +1833,10 @@ static struct btrfs_device * btrfs_find_next_active_device(
* where this function called, there should be always be another device (or
* this_dev) which is active.
*/
-void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
- struct btrfs_device *device, struct btrfs_device *this_dev)
+void btrfs_assign_next_active_device(struct btrfs_device *device,
+ struct btrfs_device *this_dev)
{
+ struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_device *next_device;
if (this_dev)
@@ -2029,11 +1939,14 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
cur_devices->num_devices--;
cur_devices->total_devices--;
+ /* Update total_devices of the parent fs_devices if it's seed */
+ if (cur_devices != fs_devices)
+ fs_devices->total_devices--;
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
cur_devices->missing_devices--;
- btrfs_assign_next_active_device(fs_info, device, NULL);
+ btrfs_assign_next_active_device(device, NULL);
if (device->bdev) {
cur_devices->open_devices--;
@@ -2084,12 +1997,11 @@ error_undo:
goto out;
}
-void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *srcdev)
+void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
{
struct btrfs_fs_devices *fs_devices;
- lockdep_assert_held(&fs_info->fs_devices->device_list_mutex);
+ lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
/*
* in case of fs with no seed, srcdev->fs_devices will point
@@ -2151,10 +2063,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
}
}
-void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *tgtdev)
+void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
{
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
WARN_ON(!tgtdev);
mutex_lock(&fs_devices->device_list_mutex);
@@ -2166,7 +2077,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
fs_devices->num_devices--;
- btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
+ btrfs_assign_next_active_device(tgtdev, NULL);
list_del_rcu(&tgtdev->dev_list);
@@ -2297,7 +2208,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&seed_devices->alloc_list);
mutex_init(&seed_devices->device_list_mutex);
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
synchronize_rcu);
list_for_each_entry(device, &seed_devices->devices, dev_list)
@@ -2317,7 +2228,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
generate_random_uuid(fs_devices->fsid);
memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
super_flags = btrfs_super_flags(disk_super) &
~BTRFS_SUPER_FLAG_SEEDING;
@@ -2407,15 +2318,16 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
struct btrfs_trans_handle *trans;
struct btrfs_device *device;
struct block_device *bdev;
- struct list_head *devices;
struct super_block *sb = fs_info->sb;
struct rcu_string *name;
- u64 tmp;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ u64 orig_super_total_bytes;
+ u64 orig_super_num_devices;
int seeding_dev = 0;
int ret = 0;
bool unlocked = false;
- if (sb_rdonly(sb) && !fs_info->fs_devices->seeding)
+ if (sb_rdonly(sb) && !fs_devices->seeding)
return -EROFS;
bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
@@ -2423,7 +2335,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (IS_ERR(bdev))
return PTR_ERR(bdev);
- if (fs_info->fs_devices->seeding) {
+ if (fs_devices->seeding) {
seeding_dev = 1;
down_write(&sb->s_umount);
mutex_lock(&uuid_mutex);
@@ -2431,18 +2343,16 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
filemap_write_and_wait(bdev->bd_inode->i_mapping);
- devices = &fs_info->fs_devices->devices;
-
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- list_for_each_entry(device, devices, dev_list) {
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (device->bdev == bdev) {
ret = -EEXIST;
mutex_unlock(
- &fs_info->fs_devices->device_list_mutex);
+ &fs_devices->device_list_mutex);
goto error;
}
}
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
device = btrfs_alloc_device(fs_info, NULL, NULL);
if (IS_ERR(device)) {
@@ -2491,33 +2401,34 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
}
}
- device->fs_devices = fs_info->fs_devices;
+ device->fs_devices = fs_devices;
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
mutex_lock(&fs_info->chunk_mutex);
- list_add_rcu(&device->dev_list, &fs_info->fs_devices->devices);
- list_add(&device->dev_alloc_list,
- &fs_info->fs_devices->alloc_list);
- fs_info->fs_devices->num_devices++;
- fs_info->fs_devices->open_devices++;
- fs_info->fs_devices->rw_devices++;
- fs_info->fs_devices->total_devices++;
- fs_info->fs_devices->total_rw_bytes += device->total_bytes;
+ list_add_rcu(&device->dev_list, &fs_devices->devices);
+ list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
+ fs_devices->num_devices++;
+ fs_devices->open_devices++;
+ fs_devices->rw_devices++;
+ fs_devices->total_devices++;
+ fs_devices->total_rw_bytes += device->total_bytes;
atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
if (!blk_queue_nonrot(q))
- fs_info->fs_devices->rotating = 1;
+ fs_devices->rotating = 1;
- tmp = btrfs_super_total_bytes(fs_info->super_copy);
+ orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
btrfs_set_super_total_bytes(fs_info->super_copy,
- round_down(tmp + device->total_bytes, fs_info->sectorsize));
+ round_down(orig_super_total_bytes + device->total_bytes,
+ fs_info->sectorsize));
- tmp = btrfs_super_num_devices(fs_info->super_copy);
- btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
+ orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
+ btrfs_set_super_num_devices(fs_info->super_copy,
+ orig_super_num_devices + 1);
/* add sysfs device entry */
- btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
+ btrfs_sysfs_add_device_link(fs_devices, device);
/*
* we've got more storage, clear any full flags on the space
@@ -2526,7 +2437,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
btrfs_clear_space_info_full(fs_info);
mutex_unlock(&fs_info->chunk_mutex);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
if (seeding_dev) {
mutex_lock(&fs_info->chunk_mutex);
@@ -2538,7 +2449,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
}
}
- ret = btrfs_add_dev_item(trans, fs_info, device);
+ ret = btrfs_add_dev_item(trans, device);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
@@ -2558,7 +2469,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
*/
snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
fs_info->fsid);
- if (kobject_rename(&fs_info->fs_devices->fsid_kobj, fsid_buf))
+ if (kobject_rename(&fs_devices->fsid_kobj, fsid_buf))
btrfs_warn(fs_info,
"sysfs: failed to create fsid for sprout");
}
@@ -2593,7 +2504,23 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
return ret;
error_sysfs:
- btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
+ btrfs_sysfs_rm_device_link(fs_devices, device);
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_info->chunk_mutex);
+ list_del_rcu(&device->dev_list);
+ list_del(&device->dev_alloc_list);
+ fs_info->fs_devices->num_devices--;
+ fs_info->fs_devices->open_devices--;
+ fs_info->fs_devices->rw_devices--;
+ fs_info->fs_devices->total_devices--;
+ fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
+ atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
+ btrfs_set_super_total_bytes(fs_info->super_copy,
+ orig_super_total_bytes);
+ btrfs_set_super_num_devices(fs_info->super_copy,
+ orig_super_num_devices);
+ mutex_unlock(&fs_info->chunk_mutex);
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
error_trans:
if (seeding_dev)
sb->s_flags |= SB_RDONLY;
@@ -2697,9 +2624,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
return btrfs_update_device(trans, device);
}
-static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 chunk_offset)
+static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->chunk_root;
int ret;
struct btrfs_path *path;
@@ -2808,9 +2735,9 @@ static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
return em;
}
-int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 chunk_offset)
+int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct extent_map *em;
struct map_lookup *map;
u64 dev_extent_len = 0;
@@ -2829,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
}
map = em->map_lookup;
mutex_lock(&fs_info->chunk_mutex);
- check_system_chunk(trans, fs_info, map->type);
+ check_system_chunk(trans, map->type);
mutex_unlock(&fs_info->chunk_mutex);
/*
@@ -2869,7 +2796,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
}
mutex_unlock(&fs_devices->device_list_mutex);
- ret = btrfs_free_chunk(trans, fs_info, chunk_offset);
+ ret = btrfs_free_chunk(trans, chunk_offset);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -2885,7 +2812,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
}
}
- ret = btrfs_remove_block_group(trans, fs_info, chunk_offset, em);
+ ret = btrfs_remove_block_group(trans, chunk_offset, em);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -2950,7 +2877,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
* step two, delete the device extents and the
* chunk tree entries
*/
- ret = btrfs_remove_chunk(trans, fs_info, chunk_offset);
+ ret = btrfs_remove_chunk(trans, chunk_offset);
btrfs_end_transaction(trans);
return ret;
}
@@ -3059,7 +2986,7 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_force_chunk_alloc(trans, fs_info,
+ ret = btrfs_force_chunk_alloc(trans,
BTRFS_BLOCK_GROUP_DATA);
btrfs_end_transaction(trans);
if (ret < 0)
@@ -4692,7 +4619,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_stripe_size = SZ_1G;
- max_chunk_size = 10 * max_stripe_size;
+ max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
@@ -4900,7 +4827,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
refcount_inc(&em->refs);
write_unlock(&em_tree->lock);
- ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes);
+ ret = btrfs_make_block_group(trans, 0, type, start, num_bytes);
if (ret)
goto error_del_extent;
@@ -4934,9 +4861,9 @@ error:
}
int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 chunk_offset, u64 chunk_size)
+ u64 chunk_offset, u64 chunk_size)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_root *chunk_root = fs_info->chunk_root;
struct btrfs_key key;
@@ -5038,13 +4965,12 @@ out:
* require modifying the chunk tree. This division is important for the
* bootstrap process of adding storage to a seed btrfs.
*/
-int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type)
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
{
u64 chunk_offset;
- lockdep_assert_held(&fs_info->chunk_mutex);
- chunk_offset = find_next_chunk(fs_info);
+ lockdep_assert_held(&trans->fs_info->chunk_mutex);
+ chunk_offset = find_next_chunk(trans->fs_info);
return __btrfs_alloc_chunk(trans, chunk_offset, type);
}
@@ -5175,7 +5101,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
/*
* There could be two corrupted data stripes, we need
* to loop retry in order to rebuild the correct data.
- *
+ *
* Fail a stripe at a time on every retry except the
* stripe under reconstruction.
*/
@@ -6187,21 +6113,11 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
btrfs_io_bio(bio)->stripe_index = dev_nr;
bio->bi_end_io = btrfs_end_bio;
bio->bi_iter.bi_sector = physical >> 9;
-#ifdef DEBUG
- {
- struct rcu_string *name;
-
- rcu_read_lock();
- name = rcu_dereference(dev->name);
- btrfs_debug(fs_info,
- "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
- bio_op(bio), bio->bi_opf,
- (u64)bio->bi_iter.bi_sector,
- (u_long)dev->bdev->bd_dev, name->str, dev->devid,
- bio->bi_iter.bi_size);
- rcu_read_unlock();
- }
-#endif
+ btrfs_debug_in_rcu(fs_info,
+ "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
+ bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
+ (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid,
+ bio->bi_iter.bi_size);
bio_set_dev(bio, dev->bdev);
btrfs_bio_counter_inc_noblocked(fs_info);
@@ -6403,6 +6319,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
u16 num_stripes;
u16 sub_stripes;
u64 type;
+ u64 features;
+ bool mixed = false;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
@@ -6441,6 +6359,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
btrfs_chunk_type(leaf, chunk));
return -EIO;
}
+
+ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
+ btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
+ return -EIO;
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(fs_info,
+ "system chunk with data or metadata type: 0x%llx", type);
+ return -EIO;
+ }
+
+ features = btrfs_super_incompat_flags(fs_info->super_copy);
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ mixed = true;
+
+ if (!mixed) {
+ if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
+ (type & BTRFS_BLOCK_GROUP_DATA)) {
+ btrfs_err(fs_info,
+ "mixed chunk type in non-mixed mode: 0x%llx", type);
+ return -EIO;
+ }
+ }
+
if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
(type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
@@ -6527,6 +6471,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
map->type = btrfs_chunk_type(leaf, chunk);
map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ map->verified_stripes = 0;
for (i = 0; i < num_stripes; i++) {
map->stripes[i].physical =
btrfs_stripe_offset_nr(leaf, chunk, i);
@@ -6563,10 +6508,14 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
write_lock(&map_tree->map_tree.lock);
ret = add_extent_mapping(&map_tree->map_tree, em, 0);
write_unlock(&map_tree->map_tree.lock);
- BUG_ON(ret); /* Tree corruption */
+ if (ret < 0) {
+ btrfs_err(fs_info,
+ "failed to add chunk map, start=%llu len=%llu: %d",
+ em->start, em->len, ret);
+ }
free_extent_map(em);
- return 0;
+ return ret;
}
static void fill_device_from_item(struct extent_buffer *leaf,
@@ -7108,9 +7057,9 @@ out:
}
static int update_dev_stat_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_device *device)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_path *path;
struct btrfs_key key;
@@ -7203,7 +7152,7 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
*/
smp_rmb();
- ret = update_dev_stat_item(trans, fs_info, device);
+ ret = update_dev_stat_item(trans, device);
if (!ret)
atomic_sub(stats_cnt, &device->dev_stats_ccnt);
}
@@ -7382,3 +7331,197 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
fs_devices = fs_devices->seed;
}
}
+
+/*
+ * Multiplicity factor for simple profiles: DUP, RAID1-like and RAID10.
+ */
+int btrfs_bg_type_to_factor(u64 flags)
+{
+ if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ return 2;
+ return 1;
+}
+
+
+static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
+{
+ int index = btrfs_bg_flags_to_raid_index(type);
+ int ncopies = btrfs_raid_array[index].ncopies;
+ int data_stripes;
+
+ switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case BTRFS_BLOCK_GROUP_RAID5:
+ data_stripes = num_stripes - 1;
+ break;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ data_stripes = num_stripes - 2;
+ break;
+ default:
+ data_stripes = num_stripes / ncopies;
+ break;
+ }
+ return div_u64(chunk_len, data_stripes);
+}
+
+static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
+ u64 chunk_offset, u64 devid,
+ u64 physical_offset, u64 physical_len)
+{
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map *em;
+ struct map_lookup *map;
+ u64 stripe_len;
+ bool found = false;
+ int ret = 0;
+ int i;
+
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, chunk_offset, 1);
+ read_unlock(&em_tree->lock);
+
+ if (!em) {
+ btrfs_err(fs_info,
+"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
+ physical_offset, devid);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ map = em->map_lookup;
+ stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
+ if (physical_len != stripe_len) {
+ btrfs_err(fs_info,
+"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
+ physical_offset, devid, em->start, physical_len,
+ stripe_len);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ for (i = 0; i < map->num_stripes; i++) {
+ if (map->stripes[i].dev->devid == devid &&
+ map->stripes[i].physical == physical_offset) {
+ found = true;
+ if (map->verified_stripes >= map->num_stripes) {
+ btrfs_err(fs_info,
+ "too many dev extents for chunk %llu found",
+ em->start);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ map->verified_stripes++;
+ break;
+ }
+ }
+ if (!found) {
+ btrfs_err(fs_info,
+ "dev extent physical offset %llu devid %llu has no corresponding chunk",
+ physical_offset, devid);
+ ret = -EUCLEAN;
+ }
+out:
+ free_extent_map(em);
+ return ret;
+}
+
+static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
+{
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map *em;
+ struct rb_node *node;
+ int ret = 0;
+
+ read_lock(&em_tree->lock);
+ for (node = rb_first(&em_tree->map); node; node = rb_next(node)) {
+ em = rb_entry(node, struct extent_map, rb_node);
+ if (em->map_lookup->num_stripes !=
+ em->map_lookup->verified_stripes) {
+ btrfs_err(fs_info,
+ "chunk %llu has missing dev extent, have %d expect %d",
+ em->start, em->map_lookup->verified_stripes,
+ em->map_lookup->num_stripes);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+out:
+ read_unlock(&em_tree->lock);
+ return ret;
+}
+
+/*
+ * Ensure that all dev extents are mapped to correct chunk, otherwise
+ * later chunk allocation/free would cause unexpected behavior.
+ *
+ * NOTE: This will iterate through the whole device tree, which should be of
+ * the same size level as the chunk tree. This slightly increases mount time.
+ */
+int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root = fs_info->dev_root;
+ struct btrfs_key key;
+ int ret = 0;
+
+ key.objectid = 1;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->reada = READA_FORWARD;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ /* No dev extents at all? Not good */
+ if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+ while (1) {
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_dev_extent *dext;
+ int slot = path->slots[0];
+ u64 chunk_offset;
+ u64 physical_offset;
+ u64 physical_len;
+ u64 devid;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.type != BTRFS_DEV_EXTENT_KEY)
+ break;
+ devid = key.objectid;
+ physical_offset = key.offset;
+
+ dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
+ chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
+ physical_len = btrfs_dev_extent_length(leaf, dext);
+
+ ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
+ physical_offset, physical_len);
+ if (ret < 0)
+ goto out;
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+
+ /* Ensure all chunks have corresponding dev extents */
+ ret = verify_chunk_dev_extent_mapping(fs_info);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5139ec8daf4c..23e9285d88de 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -11,6 +11,8 @@
#include <linux/btrfs.h>
#include "async-thread.h"
+#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
+
extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN SZ_64K
@@ -343,6 +345,7 @@ struct map_lookup {
u64 stripe_len;
int num_stripes;
int sub_stripes;
+ int verified_stripes; /* For mount time dev extent verification */
struct btrfs_bio_stripe stripes[];
};
@@ -382,8 +385,6 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
}
}
-int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
- u64 end, u64 *length);
void btrfs_get_bbio(struct btrfs_bio *bbio);
void btrfs_put_bbio(struct btrfs_bio *bbio);
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
@@ -396,20 +397,19 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
u64 physical, u64 **logical, int *naddrs, int *stripe_len);
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
-int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type);
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, int async_submit);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);
-int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
- struct btrfs_fs_devices **fs_devices_ret);
+struct btrfs_device *btrfs_scan_one_device(const char *path,
+ fmode_t flags, void *holder);
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
-void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
- struct btrfs_device *device, struct btrfs_device *this_dev);
+void btrfs_assign_next_active_device(struct btrfs_device *device,
+ struct btrfs_device *this_dev);
int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
const char *device_path,
struct btrfs_device **device);
@@ -453,22 +453,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
-void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *srcdev);
+void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev);
-void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *tgtdev);
+void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
u64 logical, u64 len);
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
u64 logical);
int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 chunk_offset, u64 chunk_size);
-int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 chunk_offset);
+ u64 chunk_offset, u64 chunk_size);
+int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index)
@@ -560,4 +556,7 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
+int btrfs_bg_type_to_factor(u64 flags);
+int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
+
#endif
diff --git a/fs/buffer.c b/fs/buffer.c
index cabc045f483d..c8c2b7d8b8d6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1900,15 +1900,16 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
break;
case IOMAP_UNWRITTEN:
/*
- * For unwritten regions, we always need to ensure that
- * sub-block writes cause the regions in the block we are not
- * writing to are zeroed. Set the buffer as new to ensure this.
+ * For unwritten regions, we always need to ensure that regions
+ * in the block we are not writing to are zeroed. Mark the
+ * buffer as new to ensure this.
*/
set_buffer_new(bh);
set_buffer_unwritten(bh);
/* FALLTHRU */
case IOMAP_MAPPED:
- if (offset >= i_size_read(inode))
+ if ((iomap->flags & IOMAP_F_NEW) ||
+ offset >= i_size_read(inode))
set_buffer_new(bh);
bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
inode->i_blkbits;
@@ -2076,6 +2077,40 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(block_write_begin);
+int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
+ struct page *page)
+{
+ loff_t old_size = inode->i_size;
+ bool i_size_changed = false;
+
+ /*
+ * No need to use i_size_read() here, the i_size cannot change under us
+ * because we hold i_rwsem.
+ *
+ * But it's important to update i_size while still holding page lock:
+ * page writeout could otherwise come in and zero beyond i_size.
+ */
+ if (pos + copied > inode->i_size) {
+ i_size_write(inode, pos + copied);
+ i_size_changed = true;
+ }
+
+ unlock_page(page);
+ put_page(page);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
+ /*
+ * Don't mark the inode dirty under page lock. First, it unnecessarily
+ * makes the holding time of page lock longer. Second, it forces lock
+ * ordering of page lock and transaction start for journaling
+ * filesystems.
+ */
+ if (i_size_changed)
+ mark_inode_dirty(inode);
+ return copied;
+}
+
int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
@@ -2116,39 +2151,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- struct inode *inode = mapping->host;
- loff_t old_size = inode->i_size;
- int i_size_changed = 0;
-
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
-
- /*
- * No need to use i_size_read() here, the i_size
- * cannot change under us because we hold i_mutex.
- *
- * But it's important to update i_size while still holding page lock:
- * page writeout could otherwise come in and zero beyond i_size.
- */
- if (pos+copied > inode->i_size) {
- i_size_write(inode, pos+copied);
- i_size_changed = 1;
- }
-
- unlock_page(page);
- put_page(page);
-
- if (old_size < pos)
- pagecache_isize_extended(inode, old_size, pos);
- /*
- * Don't mark the inode dirty under page lock. First, it unnecessarily
- * makes the holding time of page lock longer. Second, it forces lock
- * ordering of page lock and transaction start for journaling
- * filesystems.
- */
- if (i_size_changed)
- mark_inode_dirty(inode);
-
- return copied;
+ return __generic_write_end(mapping->host, pos, copied, page);
}
EXPORT_SYMBOL(generic_write_end);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ad0bed99b1d5..e2679e8a2535 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -429,8 +429,7 @@ out:
* file or symlink, return 1 so the VFS can retry.
*/
int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned flags, umode_t mode,
- int *opened)
+ struct file *file, unsigned flags, umode_t mode)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -507,9 +506,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
dout("atomic_open finish_open on dn %p\n", dn);
if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
ceph_init_inode_acls(d_inode(dentry), &acls);
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
}
- err = finish_open(file, dentry, ceph_open, opened);
+ err = finish_open(file, dentry, ceph_open);
}
out_req:
if (!req->r_err && req->r_target_inode)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a7077a0c989f..971328b99ede 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1025,8 +1025,7 @@ extern const struct file_operations ceph_file_fops;
extern int ceph_renew_caps(struct inode *inode);
extern int ceph_open(struct inode *inode, struct file *file);
extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned flags, umode_t mode,
- int *opened);
+ struct file *file, unsigned flags, umode_t mode);
extern int ceph_release(struct inode *inode, struct file *filp);
extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
char *data, size_t len);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 5f0231803431..f3a78efc3109 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -65,8 +65,7 @@ extern struct inode *cifs_root_iget(struct super_block *);
extern int cifs_create(struct inode *, struct dentry *, umode_t,
bool excl);
extern int cifs_atomic_open(struct inode *, struct dentry *,
- struct file *, unsigned, umode_t,
- int *);
+ struct file *, unsigned, umode_t);
extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
unsigned int);
extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ddae52bd1993..3713d22b95a7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -465,8 +465,7 @@ out_err:
int
cifs_atomic_open(struct inode *inode, struct dentry *direntry,
- struct file *file, unsigned oflags, umode_t mode,
- int *opened)
+ struct file *file, unsigned oflags, umode_t mode)
{
int rc;
unsigned int xid;
@@ -539,9 +538,9 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
}
if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
- rc = finish_open(file, direntry, generic_file_open, opened);
+ rc = finish_open(file, direntry, generic_file_open);
if (rc) {
if (server->ops->close)
server->ops->close(xid, tcon, &fid);
diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..8d2ec4898c2b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -358,14 +358,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
__releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
- bool hashed = !d_unhashed(dentry);
- if (hashed)
- raw_write_seqcount_begin(&dentry->d_seq);
+ raw_write_seqcount_begin(&dentry->d_seq);
__d_clear_type_and_inode(dentry);
hlist_del_init(&dentry->d_u.d_alias);
- if (hashed)
- raw_write_seqcount_end(&dentry->d_seq);
+ raw_write_seqcount_end(&dentry->d_seq);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
@@ -732,16 +729,16 @@ static inline bool fast_dput(struct dentry *dentry)
if (dentry->d_lockref.count > 1) {
dentry->d_lockref.count--;
spin_unlock(&dentry->d_lock);
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/*
* If we weren't the last ref, we're done.
*/
if (ret)
- return 1;
+ return true;
/*
* Careful, careful. The reference count went down
@@ -770,7 +767,7 @@ static inline bool fast_dput(struct dentry *dentry)
/* Nothing to do? Dropping the reference was all we needed? */
if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
- return 1;
+ return true;
/*
* Not the fast normal case? Get the lock. We've already decremented
@@ -787,7 +784,7 @@ static inline bool fast_dput(struct dentry *dentry)
*/
if (dentry->d_lockref.count) {
spin_unlock(&dentry->d_lock);
- return 1;
+ return true;
}
/*
@@ -796,7 +793,7 @@ static inline bool fast_dput(struct dentry *dentry)
* set it to 1.
*/
dentry->d_lockref.count = 1;
- return 0;
+ return false;
}
@@ -1892,50 +1889,25 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
WARN_ON(!(inode->i_state & I_NEW));
- inode->i_state &= ~I_NEW;
+ inode->i_state &= ~I_NEW & ~I_CREATING;
smp_mb();
wake_up_bit(&inode->i_state, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_instantiate_new);
-/**
- * d_instantiate_no_diralias - instantiate a non-aliased dentry
- * @entry: dentry to complete
- * @inode: inode to attach to this dentry
- *
- * Fill in inode information in the entry. If a directory alias is found, then
- * return an error (and drop inode). Together with d_materialise_unique() this
- * guarantees that a directory inode may never have more than one alias.
- */
-int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
-{
- BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
-
- security_d_instantiate(entry, inode);
- spin_lock(&inode->i_lock);
- if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
- spin_unlock(&inode->i_lock);
- iput(inode);
- return -EBUSY;
- }
- __d_instantiate(entry, inode);
- spin_unlock(&inode->i_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(d_instantiate_no_diralias);
-
struct dentry *d_make_root(struct inode *root_inode)
{
struct dentry *res = NULL;
if (root_inode) {
res = d_alloc_anon(root_inode->i_sb);
- if (res)
+ if (res) {
+ res->d_flags |= DCACHE_RCUACCESS;
d_instantiate(res, root_inode);
- else
+ } else {
iput(root_inode);
+ }
}
return res;
}
@@ -2676,33 +2648,6 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
}
EXPORT_SYMBOL(d_exact_alias);
-/**
- * dentry_update_name_case - update case insensitive dentry with a new name
- * @dentry: dentry to be updated
- * @name: new name
- *
- * Update a case insensitive dentry with new case of name.
- *
- * dentry must have been returned by d_lookup with name @name. Old and new
- * name lengths must match (ie. no d_compare which allows mismatched name
- * lengths).
- *
- * Parent inode i_mutex must be held over d_lookup and into this call (to
- * keep renames and concurrent inserts, and readdir(2) away).
- */
-void dentry_update_name_case(struct dentry *dentry, const struct qstr *name)
-{
- BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
- BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
-
- spin_lock(&dentry->d_lock);
- write_seqcount_begin(&dentry->d_seq);
- memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
- write_seqcount_end(&dentry->d_seq);
- spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(dentry_update_name_case);
-
static void swap_names(struct dentry *dentry, struct dentry *target)
{
if (unlikely(dname_external(target))) {
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 71fccccf317e..8c6ab6c95727 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -86,7 +86,9 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
/* length of the variable name itself: remove GUID and separator */
namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1;
- uuid_le_to_bin(dentry->d_name.name + namelen + 1, &var->var.VendorGuid);
+ err = guid_parse(dentry->d_name.name + namelen + 1, &var->var.VendorGuid);
+ if (err)
+ goto out;
if (efivar_variable_is_removable(var->var.VendorGuid,
dentry->d_name.name, namelen))
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 6484199b35d1..5c3d7b7e4975 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -611,8 +611,7 @@ fail_drop:
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
clear_nlink(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return ERR_PTR(err);
fail:
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 152453a91877..0c26dcc5d850 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -45,8 +45,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
return 0;
}
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return err;
}
@@ -192,8 +191,7 @@ out:
out_fail:
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput (inode);
+ discard_new_inode(inode);
goto out;
}
@@ -261,8 +259,7 @@ out:
out_fail:
inode_dec_link_count(inode);
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
out_dir:
inode_dec_link_count(dir);
goto out;
diff --git a/fs/file_table.c b/fs/file_table.c
index 7ec0b3e5f05d..d6eccd04d703 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -51,6 +51,7 @@ static void file_free_rcu(struct rcu_head *head)
static inline void file_free(struct file *f)
{
+ security_file_free(f);
percpu_counter_dec(&nr_files);
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
}
@@ -100,9 +101,8 @@ int proc_nr_files(struct ctl_table *table, int write,
* done, you will imbalance int the mount's writer count
* and a warning at __fput() time.
*/
-struct file *get_empty_filp(void)
+struct file *alloc_empty_file(int flags, const struct cred *cred)
{
- const struct cred *cred = current_cred();
static long old_max;
struct file *f;
int error;
@@ -123,11 +123,10 @@ struct file *get_empty_filp(void)
if (unlikely(!f))
return ERR_PTR(-ENOMEM);
- percpu_counter_inc(&nr_files);
f->f_cred = get_cred(cred);
error = security_file_alloc(f);
if (unlikely(error)) {
- file_free(f);
+ file_free_rcu(&f->f_u.fu_rcuhead);
return ERR_PTR(error);
}
@@ -136,7 +135,10 @@ struct file *get_empty_filp(void)
spin_lock_init(&f->f_lock);
mutex_init(&f->f_pos_lock);
eventpoll_init_file(f);
+ f->f_flags = flags;
+ f->f_mode = OPEN_FMODE(flags);
/* f->f_version: 0 */
+ percpu_counter_inc(&nr_files);
return f;
over:
@@ -152,15 +154,15 @@ over:
* alloc_file - allocate and initialize a 'struct file'
*
* @path: the (dentry, vfsmount) pair for the new file
- * @mode: the mode with which the new file will be opened
+ * @flags: O_... flags with which the new file will be opened
* @fop: the 'struct file_operations' for the new file
*/
-struct file *alloc_file(const struct path *path, fmode_t mode,
+static struct file *alloc_file(const struct path *path, int flags,
const struct file_operations *fop)
{
struct file *file;
- file = get_empty_filp();
+ file = alloc_empty_file(flags, current_cred());
if (IS_ERR(file))
return file;
@@ -168,19 +170,56 @@ struct file *alloc_file(const struct path *path, fmode_t mode,
file->f_inode = path->dentry->d_inode;
file->f_mapping = path->dentry->d_inode->i_mapping;
file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
- if ((mode & FMODE_READ) &&
+ if ((file->f_mode & FMODE_READ) &&
likely(fop->read || fop->read_iter))
- mode |= FMODE_CAN_READ;
- if ((mode & FMODE_WRITE) &&
+ file->f_mode |= FMODE_CAN_READ;
+ if ((file->f_mode & FMODE_WRITE) &&
likely(fop->write || fop->write_iter))
- mode |= FMODE_CAN_WRITE;
- file->f_mode = mode;
+ file->f_mode |= FMODE_CAN_WRITE;
+ file->f_mode |= FMODE_OPENED;
file->f_op = fop;
- if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(path->dentry->d_inode);
return file;
}
-EXPORT_SYMBOL(alloc_file);
+
+struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
+ const char *name, int flags,
+ const struct file_operations *fops)
+{
+ static const struct dentry_operations anon_ops = {
+ .d_dname = simple_dname
+ };
+ struct qstr this = QSTR_INIT(name, strlen(name));
+ struct path path;
+ struct file *file;
+
+ path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
+ if (!path.dentry)
+ return ERR_PTR(-ENOMEM);
+ if (!mnt->mnt_sb->s_d_op)
+ d_set_d_op(path.dentry, &anon_ops);
+ path.mnt = mntget(mnt);
+ d_instantiate(path.dentry, inode);
+ file = alloc_file(&path, flags, fops);
+ if (IS_ERR(file)) {
+ ihold(inode);
+ path_put(&path);
+ }
+ return file;
+}
+EXPORT_SYMBOL(alloc_file_pseudo);
+
+struct file *alloc_file_clone(struct file *base, int flags,
+ const struct file_operations *fops)
+{
+ struct file *f = alloc_file(&base->f_path, flags, fops);
+ if (!IS_ERR(f)) {
+ path_get(&f->f_path);
+ f->f_mapping = base->f_mapping;
+ }
+ return f;
+}
/* the real guts of fput() - releasing the last reference to file
*/
@@ -190,6 +229,9 @@ static void __fput(struct file *file)
struct vfsmount *mnt = file->f_path.mnt;
struct inode *inode = file->f_inode;
+ if (unlikely(!(file->f_mode & FMODE_OPENED)))
+ goto out;
+
might_sleep();
fsnotify_close(file);
@@ -207,7 +249,6 @@ static void __fput(struct file *file)
}
if (file->f_op->release)
file->f_op->release(inode, file);
- security_file_free(file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
!(file->f_mode & FMODE_PATH))) {
cdev_put(inode->i_cdev);
@@ -220,12 +261,10 @@ static void __fput(struct file *file)
put_write_access(inode);
__mnt_drop_write(mnt);
}
- file->f_path.dentry = NULL;
- file->f_path.mnt = NULL;
- file->f_inode = NULL;
- file_free(file);
dput(dentry);
mntput(mnt);
+out:
+ file_free(file);
}
static LLIST_HEAD(delayed_fput_list);
@@ -300,14 +339,6 @@ void __fput_sync(struct file *file)
EXPORT_SYMBOL(fput);
-void put_filp(struct file *file)
-{
- if (atomic_long_dec_and_test(&file->f_count)) {
- security_file_free(file);
- file_free(file);
- }
-}
-
void __init files_init(void)
{
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 56231b31f806..d80aab0d5982 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -399,7 +399,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
*/
static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct file *file, unsigned flags,
- umode_t mode, int *opened)
+ umode_t mode)
{
int err;
struct inode *inode;
@@ -469,7 +469,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry);
fuse_invalidate_attr(dir);
- err = finish_open(file, entry, generic_file_open, opened);
+ err = finish_open(file, entry, generic_file_open);
if (err) {
fuse_sync_release(ff, flags);
} else {
@@ -489,7 +489,7 @@ out_err:
static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
struct file *file, unsigned flags,
- umode_t mode, int *opened)
+ umode_t mode)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
@@ -508,12 +508,12 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
goto no_open;
/* Only creates */
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
if (fc->no_create)
goto mknod;
- err = fuse_create_open(dir, entry, file, flags, mode, opened);
+ err = fuse_create_open(dir, entry, file, flags, mode);
if (err == -ENOSYS) {
fc->no_create = 1;
goto mknod;
@@ -539,6 +539,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
{
struct fuse_entry_out outarg;
struct inode *inode;
+ struct dentry *d;
int err;
struct fuse_forget_link *forget;
@@ -570,11 +571,17 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
}
kfree(forget);
- err = d_instantiate_no_diralias(entry, inode);
- if (err)
- return err;
+ d_drop(entry);
+ d = d_splice_alias(inode, entry);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
- fuse_change_entry_timeout(entry, &outarg);
+ if (d) {
+ fuse_change_entry_timeout(d, &outarg);
+ dput(d);
+ } else {
+ fuse_change_entry_timeout(entry, &outarg);
+ }
fuse_invalidate_attr(dir);
return 0;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index feda55f67050..648f0ca1ad57 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -580,7 +580,7 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct file *file,
umode_t mode, dev_t dev, const char *symname,
- unsigned int size, int excl, int *opened)
+ unsigned int size, int excl)
{
const struct qstr *name = &dentry->d_name;
struct posix_acl *default_acl, *acl;
@@ -626,7 +626,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = 0;
if (file) {
if (S_ISREG(inode->i_mode))
- error = finish_open(file, dentry, gfs2_open_common, opened);
+ error = finish_open(file, dentry, gfs2_open_common);
else
error = finish_no_open(file, NULL);
}
@@ -767,8 +767,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
mark_inode_dirty(inode);
d_instantiate(dentry, inode);
if (file) {
- *opened |= FILE_CREATED;
- error = finish_open(file, dentry, gfs2_open_common, opened);
+ file->f_mode |= FMODE_CREATED;
+ error = finish_open(file, dentry, gfs2_open_common);
}
gfs2_glock_dq_uninit(ghs);
gfs2_glock_dq_uninit(ghs + 1);
@@ -822,7 +822,7 @@ fail:
static int gfs2_create(struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
- return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl, NULL);
+ return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl);
}
/**
@@ -830,14 +830,13 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
* @dir: The directory inode
* @dentry: The dentry of the new inode
* @file: File to be opened
- * @opened: atomic_open flags
*
*
* Returns: errno
*/
static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
- struct file *file, int *opened)
+ struct file *file)
{
struct inode *inode;
struct dentry *d;
@@ -866,7 +865,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
return d;
}
if (file && S_ISREG(inode->i_mode))
- error = finish_open(file, dentry, gfs2_open_common, opened);
+ error = finish_open(file, dentry, gfs2_open_common);
gfs2_glock_dq_uninit(&gh);
if (error) {
@@ -879,7 +878,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
unsigned flags)
{
- return __gfs2_lookup(dir, dentry, NULL, NULL);
+ return __gfs2_lookup(dir, dentry, NULL);
}
/**
@@ -1189,7 +1188,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
if (size >= gfs2_max_stuffed_size(GFS2_I(dir)))
return -ENAMETOOLONG;
- return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL);
+ return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0);
}
/**
@@ -1204,7 +1203,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir));
- return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL);
+ return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0);
}
/**
@@ -1219,7 +1218,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
dev_t dev)
{
- return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0, NULL);
+ return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0);
}
/**
@@ -1229,14 +1228,13 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
* @file: The proposed new struct file
* @flags: open flags
* @mode: File mode
- * @opened: Flag to say whether the file has been opened or not
*
* Returns: error code or 0 for success
*/
static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags,
- umode_t mode, int *opened)
+ umode_t mode)
{
struct dentry *d;
bool excl = !!(flags & O_EXCL);
@@ -1244,13 +1242,13 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
if (!d_in_lookup(dentry))
goto skip_lookup;
- d = __gfs2_lookup(dir, dentry, file, opened);
+ d = __gfs2_lookup(dir, dentry, file);
if (IS_ERR(d))
return PTR_ERR(d);
if (d != NULL)
dentry = d;
if (d_really_is_positive(dentry)) {
- if (!(*opened & FILE_OPENED))
+ if (!(file->f_mode & FMODE_OPENED))
return finish_no_open(file, d);
dput(d);
return 0;
@@ -1262,7 +1260,7 @@ skip_lookup:
if (!(flags & O_CREAT))
return -ENOENT;
- return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl, opened);
+ return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl);
}
/*
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2a16111d312f..a2dfa1b2a89c 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -541,7 +541,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
HFS_I(inode)->rsrc_inode = dir;
HFS_I(dir)->rsrc_inode = inode;
igrab(dir);
- hlist_add_fake(&inode->i_hash);
+ inode_fake_hash(inode);
mark_inode_dirty(inode);
dont_mount(dentry);
out:
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2597b290c2a5..444c7b170359 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -610,33 +610,21 @@ static struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
int err;
inode = hostfs_iget(ino->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
+ if (IS_ERR(inode))
goto out;
- }
err = -ENOMEM;
name = dentry_name(dentry);
- if (name == NULL)
- goto out_put;
-
- err = read_name(inode, name);
-
- __putname(name);
- if (err == -ENOENT) {
+ if (name) {
+ err = read_name(inode, name);
+ __putname(name);
+ }
+ if (err) {
iput(inode);
- inode = NULL;
+ inode = (err == -ENOENT) ? NULL : ERR_PTR(err);
}
- else if (err)
- goto out_put;
-
- d_add(dentry, inode);
- return NULL;
-
- out_put:
- iput(inode);
out:
- return ERR_PTR(err);
+ return d_splice_alias(inode, dentry);
}
static int hostfs_link(struct dentry *to, struct inode *ino,
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index c83ece7facc5..d85230c84ef2 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -244,6 +244,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
result = iget_locked(dir->i_sb, ino);
if (!result) {
hpfs_error(dir->i_sb, "hpfs_lookup: can't get inode");
+ result = ERR_PTR(-ENOMEM);
goto bail1;
}
if (result->i_state & I_NEW) {
@@ -266,6 +267,8 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
if (de->has_acl || de->has_xtd_perm) if (!sb_rdonly(dir->i_sb)) {
hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
+ iput(result);
+ result = ERR_PTR(-EINVAL);
goto bail1;
}
@@ -301,29 +304,17 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
}
}
+bail1:
hpfs_brelse4(&qbh);
/*
* Made it.
*/
- end:
- end_add:
+end:
+end_add:
hpfs_unlock(dir->i_sb);
- d_add(dentry, result);
- return NULL;
-
- /*
- * Didn't.
- */
- bail1:
-
- hpfs_brelse4(&qbh);
-
- /*bail:*/
-
- hpfs_unlock(dir->i_sb);
- return ERR_PTR(-ENOENT);
+ return d_splice_alias(result, dentry);
}
const struct file_operations hpfs_dir_ops =
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 40d4c66c7751..346a146c7617 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1310,10 +1310,6 @@ static int get_hstate_idx(int page_size_log)
return h - hstates;
}
-static const struct dentry_operations anon_ops = {
- .d_dname = simple_dname
-};
-
/*
* Note that size should be aligned to proper hugepage size in caller side,
* otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
@@ -1322,19 +1318,18 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
vm_flags_t acctflag, struct user_struct **user,
int creat_flags, int page_size_log)
{
- struct file *file = ERR_PTR(-ENOMEM);
struct inode *inode;
- struct path path;
- struct super_block *sb;
- struct qstr quick_string;
+ struct vfsmount *mnt;
int hstate_idx;
+ struct file *file;
hstate_idx = get_hstate_idx(page_size_log);
if (hstate_idx < 0)
return ERR_PTR(-ENODEV);
*user = NULL;
- if (!hugetlbfs_vfsmount[hstate_idx])
+ mnt = hugetlbfs_vfsmount[hstate_idx];
+ if (!mnt)
return ERR_PTR(-ENOENT);
if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
@@ -1350,45 +1345,28 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
}
}
- sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb;
- quick_string.name = name;
- quick_string.len = strlen(quick_string.name);
- quick_string.hash = 0;
- path.dentry = d_alloc_pseudo(sb, &quick_string);
- if (!path.dentry)
- goto out_shm_unlock;
-
- d_set_d_op(path.dentry, &anon_ops);
- path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
file = ERR_PTR(-ENOSPC);
- inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
+ inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0);
if (!inode)
- goto out_dentry;
+ goto out;
if (creat_flags == HUGETLB_SHMFS_INODE)
inode->i_flags |= S_PRIVATE;
- file = ERR_PTR(-ENOMEM);
- if (hugetlb_reserve_pages(inode, 0,
- size >> huge_page_shift(hstate_inode(inode)), NULL,
- acctflag))
- goto out_inode;
-
- d_instantiate(path.dentry, inode);
inode->i_size = size;
clear_nlink(inode);
- file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
- &hugetlbfs_file_operations);
- if (IS_ERR(file))
- goto out_dentry; /* inode is already attached */
-
- return file;
+ if (hugetlb_reserve_pages(inode, 0,
+ size >> huge_page_shift(hstate_inode(inode)), NULL,
+ acctflag))
+ file = ERR_PTR(-ENOMEM);
+ else
+ file = alloc_file_pseudo(inode, mnt, name, O_RDWR,
+ &hugetlbfs_file_operations);
+ if (!IS_ERR(file))
+ return file;
-out_inode:
iput(inode);
-out_dentry:
- path_put(&path);
-out_shm_unlock:
+out:
if (*user) {
user_shm_unlock(size, *user);
*user = NULL;
diff --git a/fs/inode.c b/fs/inode.c
index 8c86c809ca17..a06de4454232 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -804,6 +804,10 @@ repeat:
__wait_on_freeing_inode(inode);
goto repeat;
}
+ if (unlikely(inode->i_state & I_CREATING)) {
+ spin_unlock(&inode->i_lock);
+ return ERR_PTR(-ESTALE);
+ }
__iget(inode);
spin_unlock(&inode->i_lock);
return inode;
@@ -831,6 +835,10 @@ repeat:
__wait_on_freeing_inode(inode);
goto repeat;
}
+ if (unlikely(inode->i_state & I_CREATING)) {
+ spin_unlock(&inode->i_lock);
+ return ERR_PTR(-ESTALE);
+ }
__iget(inode);
spin_unlock(&inode->i_lock);
return inode;
@@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode)
lockdep_annotate_inode_mutex_key(inode);
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
- inode->i_state &= ~I_NEW;
+ inode->i_state &= ~I_NEW & ~I_CREATING;
smp_mb();
wake_up_bit(&inode->i_state, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);
+void discard_new_inode(struct inode *inode)
+{
+ lockdep_annotate_inode_mutex_key(inode);
+ spin_lock(&inode->i_lock);
+ WARN_ON(!(inode->i_state & I_NEW));
+ inode->i_state &= ~I_NEW;
+ smp_mb();
+ wake_up_bit(&inode->i_state, __I_NEW);
+ spin_unlock(&inode->i_lock);
+ iput(inode);
+}
+EXPORT_SYMBOL(discard_new_inode);
+
/**
* lock_two_nondirectories - take two i_mutexes on non-directory objects
*
@@ -1029,6 +1050,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
{
struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
struct inode *old;
+ bool creating = inode->i_state & I_CREATING;
again:
spin_lock(&inode_hash_lock);
@@ -1039,6 +1061,8 @@ again:
* Use the old inode instead of the preallocated one.
*/
spin_unlock(&inode_hash_lock);
+ if (IS_ERR(old))
+ return NULL;
wait_on_inode(old);
if (unlikely(inode_unhashed(old))) {
iput(old);
@@ -1060,6 +1084,8 @@ again:
inode->i_state |= I_NEW;
hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
+ if (!creating)
+ inode_sb_list_add(inode);
unlock:
spin_unlock(&inode_hash_lock);
@@ -1094,12 +1120,13 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
struct inode *inode = ilookup5(sb, hashval, test, data);
if (!inode) {
- struct inode *new = new_inode(sb);
+ struct inode *new = alloc_inode(sb);
if (new) {
+ new->i_state = 0;
inode = inode_insert5(new, hashval, test, set, data);
if (unlikely(inode != new))
- iput(new);
+ destroy_inode(new);
}
}
return inode;
@@ -1128,6 +1155,8 @@ again:
inode = find_inode_fast(sb, head, ino);
spin_unlock(&inode_hash_lock);
if (inode) {
+ if (IS_ERR(inode))
+ return NULL;
wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) {
iput(inode);
@@ -1165,6 +1194,8 @@ again:
*/
spin_unlock(&inode_hash_lock);
destroy_inode(inode);
+ if (IS_ERR(old))
+ return NULL;
inode = old;
wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) {
@@ -1282,7 +1313,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
inode = find_inode(sb, head, test, data);
spin_unlock(&inode_hash_lock);
- return inode;
+ return IS_ERR(inode) ? NULL : inode;
}
EXPORT_SYMBOL(ilookup5_nowait);
@@ -1338,6 +1369,8 @@ again:
spin_unlock(&inode_hash_lock);
if (inode) {
+ if (IS_ERR(inode))
+ return NULL;
wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) {
iput(inode);
@@ -1421,12 +1454,17 @@ int insert_inode_locked(struct inode *inode)
}
if (likely(!old)) {
spin_lock(&inode->i_lock);
- inode->i_state |= I_NEW;
+ inode->i_state |= I_NEW | I_CREATING;
hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
return 0;
}
+ if (unlikely(old->i_state & I_CREATING)) {
+ spin_unlock(&old->i_lock);
+ spin_unlock(&inode_hash_lock);
+ return -EBUSY;
+ }
__iget(old);
spin_unlock(&old->i_lock);
spin_unlock(&inode_hash_lock);
@@ -1443,7 +1481,10 @@ EXPORT_SYMBOL(insert_inode_locked);
int insert_inode_locked4(struct inode *inode, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
- struct inode *old = inode_insert5(inode, hashval, test, NULL, data);
+ struct inode *old;
+
+ inode->i_state |= I_CREATING;
+ old = inode_insert5(inode, hashval, test, NULL, data);
if (old != inode) {
iput(old);
diff --git a/fs/internal.h b/fs/internal.h
index 5645b4ebf494..50a28fc71300 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -43,6 +43,8 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
extern void guard_bio_eod(int rw, struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
+int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
+ struct page *page);
/*
* char_dev.c
@@ -93,7 +95,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
/*
* file_table.c
*/
-extern struct file *get_empty_filp(void);
+extern struct file *alloc_empty_file(int, const struct cred *);
/*
* super.c
@@ -125,8 +127,7 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
-extern int open_check_o_direct(struct file *f);
-extern int vfs_open(const struct path *, struct file *, const struct cred *);
+extern int vfs_open(const struct path *, struct file *);
/*
* inode.c
diff --git a/fs/iomap.c b/fs/iomap.c
index 0d0bd8845586..530670608fc8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2010 Red Hat, Inc.
- * Copyright (c) 2016 Christoph Hellwig.
+ * Copyright (c) 2016-2018 Christoph Hellwig.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,7 @@
#include <linux/uaccess.h>
#include <linux/gfp.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
@@ -104,6 +105,243 @@ iomap_sector(struct iomap *iomap, loff_t pos)
}
static void
+iomap_read_inline_data(struct inode *inode, struct page *page,
+ struct iomap *iomap)
+{
+ size_t size = i_size_read(inode);
+ void *addr;
+
+ if (PageUptodate(page))
+ return;
+
+ BUG_ON(page->index);
+ BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+ addr = kmap_atomic(page);
+ memcpy(addr, iomap->inline_data, size);
+ memset(addr + size, 0, PAGE_SIZE - size);
+ kunmap_atomic(addr);
+ SetPageUptodate(page);
+}
+
+static void
+iomap_read_end_io(struct bio *bio)
+{
+ int error = blk_status_to_errno(bio->bi_status);
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i)
+ page_endio(bvec->bv_page, false, error);
+ bio_put(bio);
+}
+
+struct iomap_readpage_ctx {
+ struct page *cur_page;
+ bool cur_page_in_bio;
+ bool is_readahead;
+ struct bio *bio;
+ struct list_head *pages;
+};
+
+static loff_t
+iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+ struct iomap *iomap)
+{
+ struct iomap_readpage_ctx *ctx = data;
+ struct page *page = ctx->cur_page;
+ unsigned poff = pos & (PAGE_SIZE - 1);
+ unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+ bool is_contig = false;
+ sector_t sector;
+
+ if (iomap->type == IOMAP_INLINE) {
+ WARN_ON_ONCE(poff);
+ iomap_read_inline_data(inode, page, iomap);
+ return PAGE_SIZE;
+ }
+
+ /* we don't support blocksize < PAGE_SIZE quite yet. */
+ WARN_ON_ONCE(pos != page_offset(page));
+ WARN_ON_ONCE(plen != PAGE_SIZE);
+
+ if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
+ zero_user(page, poff, plen);
+ SetPageUptodate(page);
+ goto done;
+ }
+
+ ctx->cur_page_in_bio = true;
+
+ /*
+ * Try to merge into a previous segment if we can.
+ */
+ sector = iomap_sector(iomap, pos);
+ if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
+ if (__bio_try_merge_page(ctx->bio, page, plen, poff))
+ goto done;
+ is_contig = true;
+ }
+
+ if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
+ gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
+ int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ if (ctx->bio)
+ submit_bio(ctx->bio);
+
+ if (ctx->is_readahead) /* same as readahead_gfp_mask */
+ gfp |= __GFP_NORETRY | __GFP_NOWARN;
+ ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
+ ctx->bio->bi_opf = REQ_OP_READ;
+ if (ctx->is_readahead)
+ ctx->bio->bi_opf |= REQ_RAHEAD;
+ ctx->bio->bi_iter.bi_sector = sector;
+ bio_set_dev(ctx->bio, iomap->bdev);
+ ctx->bio->bi_end_io = iomap_read_end_io;
+ }
+
+ __bio_add_page(ctx->bio, page, plen, poff);
+done:
+ return plen;
+}
+
+int
+iomap_readpage(struct page *page, const struct iomap_ops *ops)
+{
+ struct iomap_readpage_ctx ctx = { .cur_page = page };
+ struct inode *inode = page->mapping->host;
+ unsigned poff;
+ loff_t ret;
+
+ WARN_ON_ONCE(page_has_buffers(page));
+
+ for (poff = 0; poff < PAGE_SIZE; poff += ret) {
+ ret = iomap_apply(inode, page_offset(page) + poff,
+ PAGE_SIZE - poff, 0, ops, &ctx,
+ iomap_readpage_actor);
+ if (ret <= 0) {
+ WARN_ON_ONCE(ret == 0);
+ SetPageError(page);
+ break;
+ }
+ }
+
+ if (ctx.bio) {
+ submit_bio(ctx.bio);
+ WARN_ON_ONCE(!ctx.cur_page_in_bio);
+ } else {
+ WARN_ON_ONCE(ctx.cur_page_in_bio);
+ unlock_page(page);
+ }
+
+ /*
+ * Just like mpage_readpages and block_read_full_page we always
+ * return 0 and just mark the page as PageError on errors. This
+ * should be cleaned up all through the stack eventually.
+ */
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_readpage);
+
+static struct page *
+iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
+ loff_t length, loff_t *done)
+{
+ while (!list_empty(pages)) {
+ struct page *page = lru_to_page(pages);
+
+ if (page_offset(page) >= (u64)pos + length)
+ break;
+
+ list_del(&page->lru);
+ if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
+ GFP_NOFS))
+ return page;
+
+ /*
+ * If we already have a page in the page cache at index we are
+ * done. Upper layers don't care if it is uptodate after the
+ * readpages call itself as every page gets checked again once
+ * actually needed.
+ */
+ *done += PAGE_SIZE;
+ put_page(page);
+ }
+
+ return NULL;
+}
+
+static loff_t
+iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
+ void *data, struct iomap *iomap)
+{
+ struct iomap_readpage_ctx *ctx = data;
+ loff_t done, ret;
+
+ for (done = 0; done < length; done += ret) {
+ if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
+ if (!ctx->cur_page_in_bio)
+ unlock_page(ctx->cur_page);
+ put_page(ctx->cur_page);
+ ctx->cur_page = NULL;
+ }
+ if (!ctx->cur_page) {
+ ctx->cur_page = iomap_next_page(inode, ctx->pages,
+ pos, length, &done);
+ if (!ctx->cur_page)
+ break;
+ ctx->cur_page_in_bio = false;
+ }
+ ret = iomap_readpage_actor(inode, pos + done, length - done,
+ ctx, iomap);
+ }
+
+ return done;
+}
+
+int
+iomap_readpages(struct address_space *mapping, struct list_head *pages,
+ unsigned nr_pages, const struct iomap_ops *ops)
+{
+ struct iomap_readpage_ctx ctx = {
+ .pages = pages,
+ .is_readahead = true,
+ };
+ loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
+ loff_t last = page_offset(list_entry(pages->next, struct page, lru));
+ loff_t length = last - pos + PAGE_SIZE, ret = 0;
+
+ while (length > 0) {
+ ret = iomap_apply(mapping->host, pos, length, 0, ops,
+ &ctx, iomap_readpages_actor);
+ if (ret <= 0) {
+ WARN_ON_ONCE(ret == 0);
+ goto done;
+ }
+ pos += ret;
+ length -= ret;
+ }
+ ret = 0;
+done:
+ if (ctx.bio)
+ submit_bio(ctx.bio);
+ if (ctx.cur_page) {
+ if (!ctx.cur_page_in_bio)
+ unlock_page(ctx.cur_page);
+ put_page(ctx.cur_page);
+ }
+
+ /*
+ * Check that we didn't lose a page due to the arcance calling
+ * conventions..
+ */
+ WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iomap_readpages);
+
+static void
iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
{
loff_t i_size = i_size_read(inode);
@@ -117,6 +355,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
}
static int
+iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
+ unsigned poff, unsigned plen, unsigned from, unsigned to,
+ struct iomap *iomap)
+{
+ struct bio_vec bvec;
+ struct bio bio;
+
+ if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
+ zero_user_segments(page, poff, from, to, poff + plen);
+ return 0;
+ }
+
+ bio_init(&bio, &bvec, 1);
+ bio.bi_opf = REQ_OP_READ;
+ bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
+ bio_set_dev(&bio, iomap->bdev);
+ __bio_add_page(&bio, page, plen, poff);
+ return submit_bio_wait(&bio);
+}
+
+static int
+__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
+ struct page *page, struct iomap *iomap)
+{
+ loff_t block_size = i_blocksize(inode);
+ loff_t block_start = pos & ~(block_size - 1);
+ loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
+ unsigned poff = block_start & (PAGE_SIZE - 1);
+ unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
+ unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
+
+ WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+
+ if (PageUptodate(page))
+ return 0;
+ if (from <= poff && to >= poff + plen)
+ return 0;
+ return iomap_read_page_sync(inode, block_start, page,
+ poff, plen, from, to, iomap);
+}
+
+static int
iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, struct iomap *iomap)
{
@@ -133,7 +413,12 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
if (!page)
return -ENOMEM;
- status = __block_write_begin_int(page, pos, len, NULL, iomap);
+ if (iomap->type == IOMAP_INLINE)
+ iomap_read_inline_data(inode, page, iomap);
+ else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
+ status = __block_write_begin_int(page, pos, len, NULL, iomap);
+ else
+ status = __iomap_write_begin(inode, pos, len, page, iomap);
if (unlikely(status)) {
unlock_page(page);
put_page(page);
@@ -146,14 +431,93 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
return status;
}
+int
+iomap_set_page_dirty(struct page *page)
+{
+ struct address_space *mapping = page_mapping(page);
+ int newly_dirty;
+
+ if (unlikely(!mapping))
+ return !TestSetPageDirty(page);
+
+ /*
+ * Lock out page->mem_cgroup migration to keep PageDirty
+ * synchronized with per-memcg dirty page counters.
+ */
+ lock_page_memcg(page);
+ newly_dirty = !TestSetPageDirty(page);
+ if (newly_dirty)
+ __set_page_dirty(page, mapping, 0);
+ unlock_page_memcg(page);
+
+ if (newly_dirty)
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+ return newly_dirty;
+}
+EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
+
+static int
+__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+ unsigned copied, struct page *page, struct iomap *iomap)
+{
+ flush_dcache_page(page);
+
+ /*
+ * The blocks that were entirely written will now be uptodate, so we
+ * don't have to worry about a readpage reading them and overwriting a
+ * partial write. However if we have encountered a short write and only
+ * partially written into a block, it will not be marked uptodate, so a
+ * readpage might come in and destroy our partial write.
+ *
+ * Do the simplest thing, and just treat any short write to a non
+ * uptodate page as a zero-length write, and force the caller to redo
+ * the whole thing.
+ */
+ if (unlikely(copied < len && !PageUptodate(page))) {
+ copied = 0;
+ } else {
+ SetPageUptodate(page);
+ iomap_set_page_dirty(page);
+ }
+ return __generic_write_end(inode, pos, copied, page);
+}
+
+static int
+iomap_write_end_inline(struct inode *inode, struct page *page,
+ struct iomap *iomap, loff_t pos, unsigned copied)
+{
+ void *addr;
+
+ WARN_ON_ONCE(!PageUptodate(page));
+ BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+ addr = kmap_atomic(page);
+ memcpy(iomap->inline_data + pos, addr + pos, copied);
+ kunmap_atomic(addr);
+
+ mark_inode_dirty(inode);
+ __generic_write_end(inode, pos, copied, page);
+ return copied;
+}
+
static int
iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
- unsigned copied, struct page *page)
+ unsigned copied, struct page *page, struct iomap *iomap)
{
int ret;
- ret = generic_write_end(NULL, inode->i_mapping, pos, len,
- copied, page, NULL);
+ if (iomap->type == IOMAP_INLINE) {
+ ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
+ } else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+ ret = generic_write_end(NULL, inode->i_mapping, pos, len,
+ copied, page, NULL);
+ } else {
+ ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
+ }
+
+ if (iomap->page_done)
+ iomap->page_done(inode, pos, copied, page, iomap);
+
if (ret < len)
iomap_write_failed(inode, pos, len);
return ret;
@@ -208,7 +572,8 @@ again:
flush_dcache_page(page);
- status = iomap_write_end(inode, pos, bytes, copied, page);
+ status = iomap_write_end(inode, pos, bytes, copied, page,
+ iomap);
if (unlikely(status < 0))
break;
copied = status;
@@ -302,7 +667,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
WARN_ON_ONCE(!PageUptodate(page));
- status = iomap_write_end(inode, pos, bytes, bytes, page);
+ status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
if (unlikely(status <= 0)) {
if (WARN_ON_ONCE(status == 0))
return -EIO;
@@ -354,7 +719,7 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
zero_user(page, offset, bytes);
mark_page_accessed(page);
- return iomap_write_end(inode, pos, bytes, bytes, page);
+ return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
}
static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
@@ -440,11 +805,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
struct page *page = data;
int ret;
- ret = __block_write_begin_int(page, pos, length, NULL, iomap);
- if (ret)
- return ret;
+ if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+ ret = __block_write_begin_int(page, pos, length, NULL, iomap);
+ if (ret)
+ return ret;
+ block_commit_write(page, 0, length);
+ } else {
+ WARN_ON_ONCE(!PageUptodate(page));
+ WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+ }
- block_commit_write(page, 0, length);
return length;
}
@@ -811,6 +1181,7 @@ struct iomap_dio {
atomic_t ref;
unsigned flags;
int error;
+ bool wait_for_completion;
union {
/* used during submission and for synchronous completion: */
@@ -914,9 +1285,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
if (atomic_dec_and_test(&dio->ref)) {
- if (is_sync_kiocb(dio->iocb)) {
+ if (dio->wait_for_completion) {
struct task_struct *waiter = dio->submit.waiter;
-
WRITE_ONCE(dio->submit.waiter, NULL);
wake_up_process(waiter);
} else if (dio->flags & IOMAP_DIO_WRITE) {
@@ -963,10 +1333,9 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
}
static loff_t
-iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
- void *data, struct iomap *iomap)
+iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
+ struct iomap_dio *dio, struct iomap *iomap)
{
- struct iomap_dio *dio = data;
unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
unsigned int fs_block_size = i_blocksize(inode), pad;
unsigned int align = iov_iter_alignment(dio->submit.iter);
@@ -980,41 +1349,27 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
if ((pos | length | align) & ((1 << blkbits) - 1))
return -EINVAL;
- switch (iomap->type) {
- case IOMAP_HOLE:
- if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
- return -EIO;
- /*FALLTHRU*/
- case IOMAP_UNWRITTEN:
- if (!(dio->flags & IOMAP_DIO_WRITE)) {
- length = iov_iter_zero(length, dio->submit.iter);
- dio->size += length;
- return length;
- }
+ if (iomap->type == IOMAP_UNWRITTEN) {
dio->flags |= IOMAP_DIO_UNWRITTEN;
need_zeroout = true;
- break;
- case IOMAP_MAPPED:
- if (iomap->flags & IOMAP_F_SHARED)
- dio->flags |= IOMAP_DIO_COW;
- if (iomap->flags & IOMAP_F_NEW) {
- need_zeroout = true;
- } else {
- /*
- * Use a FUA write if we need datasync semantics, this
- * is a pure data IO that doesn't require any metadata
- * updates and the underlying device supports FUA. This
- * allows us to avoid cache flushes on IO completion.
- */
- if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
- (dio->flags & IOMAP_DIO_WRITE_FUA) &&
- blk_queue_fua(bdev_get_queue(iomap->bdev)))
- use_fua = true;
- }
- break;
- default:
- WARN_ON_ONCE(1);
- return -EIO;
+ }
+
+ if (iomap->flags & IOMAP_F_SHARED)
+ dio->flags |= IOMAP_DIO_COW;
+
+ if (iomap->flags & IOMAP_F_NEW) {
+ need_zeroout = true;
+ } else {
+ /*
+ * Use a FUA write if we need datasync semantics, this
+ * is a pure data IO that doesn't require any metadata
+ * updates and the underlying device supports FUA. This
+ * allows us to avoid cache flushes on IO completion.
+ */
+ if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
+ (dio->flags & IOMAP_DIO_WRITE_FUA) &&
+ blk_queue_fua(bdev_get_queue(iomap->bdev)))
+ use_fua = true;
}
/*
@@ -1093,6 +1448,66 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
return copied;
}
+static loff_t
+iomap_dio_hole_actor(loff_t length, struct iomap_dio *dio)
+{
+ length = iov_iter_zero(length, dio->submit.iter);
+ dio->size += length;
+ return length;
+}
+
+static loff_t
+iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
+ struct iomap_dio *dio, struct iomap *iomap)
+{
+ struct iov_iter *iter = dio->submit.iter;
+ size_t copied;
+
+ BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+ if (dio->flags & IOMAP_DIO_WRITE) {
+ loff_t size = inode->i_size;
+
+ if (pos > size)
+ memset(iomap->inline_data + size, 0, pos - size);
+ copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+ if (copied) {
+ if (pos + copied > size)
+ i_size_write(inode, pos + copied);
+ mark_inode_dirty(inode);
+ }
+ } else {
+ copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+ }
+ dio->size += copied;
+ return copied;
+}
+
+static loff_t
+iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
+ void *data, struct iomap *iomap)
+{
+ struct iomap_dio *dio = data;
+
+ switch (iomap->type) {
+ case IOMAP_HOLE:
+ if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
+ return -EIO;
+ return iomap_dio_hole_actor(length, dio);
+ case IOMAP_UNWRITTEN:
+ if (!(dio->flags & IOMAP_DIO_WRITE))
+ return iomap_dio_hole_actor(length, dio);
+ return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
+ case IOMAP_MAPPED:
+ return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
+ case IOMAP_INLINE:
+ return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
+ default:
+ WARN_ON_ONCE(1);
+ return -EIO;
+ }
+}
+
/*
* iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO
* is being issued as AIO or not. This allows us to optimise pure data writes
@@ -1131,13 +1546,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->end_io = end_io;
dio->error = 0;
dio->flags = 0;
+ dio->wait_for_completion = is_sync_kiocb(iocb);
dio->submit.iter = iter;
- if (is_sync_kiocb(iocb)) {
- dio->submit.waiter = current;
- dio->submit.cookie = BLK_QC_T_NONE;
- dio->submit.last_queue = NULL;
- }
+ dio->submit.waiter = current;
+ dio->submit.cookie = BLK_QC_T_NONE;
+ dio->submit.last_queue = NULL;
if (iov_iter_rw(iter) == READ) {
if (pos >= dio->i_size)
@@ -1187,7 +1601,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio_warn_stale_pagecache(iocb->ki_filp);
ret = 0;
- if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+ if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
!inode->i_sb->s_dio_done_wq) {
ret = sb_init_dio_done_wq(inode->i_sb);
if (ret < 0)
@@ -1202,8 +1616,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
iomap_dio_actor);
if (ret <= 0) {
/* magic error code to fall back to buffered I/O */
- if (ret == -ENOTBLK)
+ if (ret == -ENOTBLK) {
+ dio->wait_for_completion = true;
ret = 0;
+ }
break;
}
pos += ret;
@@ -1224,7 +1640,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->flags &= ~IOMAP_DIO_NEED_SYNC;
if (!atomic_dec_and_test(&dio->ref)) {
- if (!is_sync_kiocb(iocb))
+ if (!dio->wait_for_completion)
return -EIOCBQUEUED;
for (;;) {
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f36ef68905a7..93e8c590ff5c 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -491,13 +491,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
/* release the page */
release_metapage(mp);
- /*
- * __mark_inode_dirty expects inodes to be hashed. Since we don't
- * want special inodes in the fileset inode space, we make them
- * appear hashed, but do not put on any lists. hlist_del()
- * will work fine and require no locking.
- */
- hlist_add_fake(&ip->i_hash);
+ inode_fake_hash(ip);
return (ip);
}
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 5e9b7bb3aabf..4572b7cf183d 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -61,8 +61,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
inode = new_inode(sb);
if (!inode) {
jfs_warn("ialloc: new_inode returned NULL!");
- rc = -ENOMEM;
- goto fail;
+ return ERR_PTR(-ENOMEM);
}
jfs_inode = JFS_IP(inode);
@@ -70,8 +69,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
rc = diAlloc(parent, S_ISDIR(mode), inode);
if (rc) {
jfs_warn("ialloc: diAlloc returned %d!", rc);
- if (rc == -EIO)
- make_bad_inode(inode);
goto fail_put;
}
@@ -141,9 +138,10 @@ fail_drop:
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
clear_nlink(inode);
- unlock_new_inode(inode);
+ discard_new_inode(inode);
+ return ERR_PTR(rc);
+
fail_put:
iput(inode);
-fail:
return ERR_PTR(rc);
}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 56c3fcbfe80e..14528c0ffe63 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -175,8 +175,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
if (rc) {
free_ea_wmap(ip);
clear_nlink(ip);
- unlock_new_inode(ip);
- iput(ip);
+ discard_new_inode(ip);
} else {
d_instantiate_new(dentry, ip);
}
@@ -309,8 +308,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
if (rc) {
free_ea_wmap(ip);
clear_nlink(ip);
- unlock_new_inode(ip);
- iput(ip);
+ discard_new_inode(ip);
} else {
d_instantiate_new(dentry, ip);
}
@@ -1054,8 +1052,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
if (rc) {
free_ea_wmap(ip);
clear_nlink(ip);
- unlock_new_inode(ip);
- iput(ip);
+ discard_new_inode(ip);
} else {
d_instantiate_new(dentry, ip);
}
@@ -1441,8 +1438,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
if (rc) {
free_ea_wmap(ip);
clear_nlink(ip);
- unlock_new_inode(ip);
- iput(ip);
+ discard_new_inode(ip);
} else {
d_instantiate_new(dentry, ip);
}
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index f08571433aba..09da5cf14e27 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -581,7 +581,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
inode->i_ino = 0;
inode->i_size = i_size_read(sb->s_bdev->bd_inode);
inode->i_mapping->a_ops = &jfs_metapage_aops;
- hlist_add_fake(&inode->i_hash);
+ inode_fake_hash(inode);
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->direct_inode = inode;
diff --git a/fs/locks.c b/fs/locks.c
index db7b6917d9c5..bc047a7edc47 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -202,10 +202,6 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
* we often hold the flc_lock as well. In certain cases, when reading the fields
* protected by this lock, we can skip acquiring it iff we already hold the
* flc_lock.
- *
- * In particular, adding an entry to the fl_block list requires that you hold
- * both the flc_lock and the blocked_lock_lock (acquired in that order).
- * Deleting an entry from the list however only requires the file_lock_lock.
*/
static DEFINE_SPINLOCK(blocked_lock_lock);
@@ -990,6 +986,7 @@ out:
if (new_fl)
locks_free_lock(new_fl);
locks_dispose_list(&dispose);
+ trace_flock_lock_inode(inode, request, error);
return error;
}
@@ -2072,6 +2069,13 @@ static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
return -1;
if (IS_REMOTELCK(fl))
return fl->fl_pid;
+ /*
+ * If the flock owner process is dead and its pid has been already
+ * freed, the translation below won't work, but we still want to show
+ * flock owner pid number in init pidns.
+ */
+ if (ns == &init_pid_ns)
+ return (pid_t)fl->fl_pid;
rcu_read_lock();
pid = find_pid_ns(fl->fl_pid, &init_pid_ns);
@@ -2626,12 +2630,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
fl_pid = locks_translate_pid(fl, proc_pidns);
/*
- * If there isn't a fl_pid don't display who is waiting on
- * the lock if we are called from locks_show, or if we are
- * called from __show_fd_info - skip lock entirely
+ * If lock owner is dead (and pid is freed) or not visible in current
+ * pidns, zero is shown as a pid value. Check lock info from
+ * init_pid_ns to get saved lock pid value.
*/
- if (fl_pid == 0)
- return;
if (fl->fl_file != NULL)
inode = locks_inode(fl->fl_file);
diff --git a/fs/namei.c b/fs/namei.c
index 734cef54fdf8..3cd396277cd3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2028,6 +2028,8 @@ static int link_path_walk(const char *name, struct nameidata *nd)
{
int err;
+ if (IS_ERR(name))
+ return PTR_ERR(name);
while (*name=='/')
name++;
if (!*name)
@@ -2125,12 +2127,15 @@ OK:
}
}
+/* must be paired with terminate_walk() */
static const char *path_init(struct nameidata *nd, unsigned flags)
{
const char *s = nd->name->name;
if (!*s)
flags &= ~LOOKUP_RCU;
+ if (flags & LOOKUP_RCU)
+ rcu_read_lock();
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -2143,7 +2148,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->path = nd->root;
nd->inode = inode;
if (flags & LOOKUP_RCU) {
- rcu_read_lock();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
nd->root_seq = nd->seq;
nd->m_seq = read_seqbegin(&mount_lock);
@@ -2159,21 +2163,15 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->m_seq = read_seqbegin(&mount_lock);
if (*s == '/') {
- if (flags & LOOKUP_RCU)
- rcu_read_lock();
set_root(nd);
if (likely(!nd_jump_root(nd)))
return s;
- nd->root.mnt = NULL;
- rcu_read_unlock();
return ERR_PTR(-ECHILD);
} else if (nd->dfd == AT_FDCWD) {
if (flags & LOOKUP_RCU) {
struct fs_struct *fs = current->fs;
unsigned seq;
- rcu_read_lock();
-
do {
seq = read_seqcount_begin(&fs->seq);
nd->path = fs->pwd;
@@ -2195,16 +2193,13 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
dentry = f.file->f_path.dentry;
- if (*s) {
- if (!d_can_lookup(dentry)) {
- fdput(f);
- return ERR_PTR(-ENOTDIR);
- }
+ if (*s && unlikely(!d_can_lookup(dentry))) {
+ fdput(f);
+ return ERR_PTR(-ENOTDIR);
}
nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
- rcu_read_lock();
nd->inode = nd->path.dentry->d_inode;
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
} else {
@@ -2272,24 +2267,15 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
const char *s = path_init(nd, flags);
int err;
- if (IS_ERR(s))
- return PTR_ERR(s);
-
- if (unlikely(flags & LOOKUP_DOWN)) {
+ if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
err = handle_lookup_down(nd);
- if (unlikely(err < 0)) {
- terminate_walk(nd);
- return err;
- }
+ if (unlikely(err < 0))
+ s = ERR_PTR(err);
}
while (!(err = link_path_walk(s, nd))
&& ((err = lookup_last(nd)) > 0)) {
s = trailing_symlink(nd);
- if (IS_ERR(s)) {
- err = PTR_ERR(s);
- break;
- }
}
if (!err)
err = complete_walk(nd);
@@ -2336,10 +2322,7 @@ static int path_parentat(struct nameidata *nd, unsigned flags,
struct path *parent)
{
const char *s = path_init(nd, flags);
- int err;
- if (IS_ERR(s))
- return PTR_ERR(s);
- err = link_path_walk(s, nd);
+ int err = link_path_walk(s, nd);
if (!err)
err = complete_walk(nd);
if (!err) {
@@ -2666,15 +2649,10 @@ path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
{
const char *s = path_init(nd, flags);
int err;
- if (IS_ERR(s))
- return PTR_ERR(s);
+
while (!(err = link_path_walk(s, nd)) &&
(err = mountpoint_last(nd)) > 0) {
s = trailing_symlink(nd);
- if (IS_ERR(s)) {
- err = PTR_ERR(s);
- break;
- }
}
if (!err) {
*path = nd->path;
@@ -3027,17 +3005,16 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
* Returns 0 if successful. The file will have been created and attached to
* @file by the filesystem calling finish_open().
*
- * Returns 1 if the file was looked up only or didn't need creating. The
- * caller will need to perform the open themselves. @path will have been
- * updated to point to the new dentry. This may be negative.
+ * If the file was looked up only or didn't need creating, FMODE_OPENED won't
+ * be set. The caller will need to perform the open themselves. @path will
+ * have been updated to point to the new dentry. This may be negative.
*
* Returns an error code otherwise.
*/
static int atomic_open(struct nameidata *nd, struct dentry *dentry,
struct path *path, struct file *file,
const struct open_flags *op,
- int open_flag, umode_t mode,
- int *opened)
+ int open_flag, umode_t mode)
{
struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
struct inode *dir = nd->path.dentry->d_inode;
@@ -3052,39 +3029,38 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
file->f_path.dentry = DENTRY_NOT_SET;
file->f_path.mnt = nd->path.mnt;
error = dir->i_op->atomic_open(dir, dentry, file,
- open_to_namei_flags(open_flag),
- mode, opened);
+ open_to_namei_flags(open_flag), mode);
d_lookup_done(dentry);
if (!error) {
- /*
- * We didn't have the inode before the open, so check open
- * permission here.
- */
- int acc_mode = op->acc_mode;
- if (*opened & FILE_CREATED) {
- WARN_ON(!(open_flag & O_CREAT));
- fsnotify_create(dir, dentry);
- acc_mode = 0;
- }
- error = may_open(&file->f_path, acc_mode, open_flag);
- if (WARN_ON(error > 0))
- error = -EINVAL;
- } else if (error > 0) {
- if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
+ if (file->f_mode & FMODE_OPENED) {
+ /*
+ * We didn't have the inode before the open, so check open
+ * permission here.
+ */
+ int acc_mode = op->acc_mode;
+ if (file->f_mode & FMODE_CREATED) {
+ WARN_ON(!(open_flag & O_CREAT));
+ fsnotify_create(dir, dentry);
+ acc_mode = 0;
+ }
+ error = may_open(&file->f_path, acc_mode, open_flag);
+ if (WARN_ON(error > 0))
+ error = -EINVAL;
+ } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
error = -EIO;
} else {
if (file->f_path.dentry) {
dput(dentry);
dentry = file->f_path.dentry;
}
- if (*opened & FILE_CREATED)
+ if (file->f_mode & FMODE_CREATED)
fsnotify_create(dir, dentry);
if (unlikely(d_is_negative(dentry))) {
error = -ENOENT;
} else {
path->dentry = dentry;
path->mnt = nd->path.mnt;
- return 1;
+ return 0;
}
}
}
@@ -3095,25 +3071,22 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
/*
* Look up and maybe create and open the last component.
*
- * Must be called with i_mutex held on parent.
- *
- * Returns 0 if the file was successfully atomically created (if necessary) and
- * opened. In this case the file will be returned attached to @file.
- *
- * Returns 1 if the file was not completely opened at this time, though lookups
- * and creations will have been performed and the dentry returned in @path will
- * be positive upon return if O_CREAT was specified. If O_CREAT wasn't
- * specified then a negative dentry may be returned.
+ * Must be called with parent locked (exclusive in O_CREAT case).
*
- * An error code is returned otherwise.
+ * Returns 0 on success, that is, if
+ * the file was successfully atomically created (if necessary) and opened, or
+ * the file was not completely opened at this time, though lookups and
+ * creations were performed.
+ * These case are distinguished by presence of FMODE_OPENED on file->f_mode.
+ * In the latter case dentry returned in @path might be negative if O_CREAT
+ * hadn't been specified.
*
- * FILE_CREATE will be set in @*opened if the dentry was created and will be
- * cleared otherwise prior to returning.
+ * An error code is returned on failure.
*/
static int lookup_open(struct nameidata *nd, struct path *path,
struct file *file,
const struct open_flags *op,
- bool got_write, int *opened)
+ bool got_write)
{
struct dentry *dir = nd->path.dentry;
struct inode *dir_inode = dir->d_inode;
@@ -3126,7 +3099,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
if (unlikely(IS_DEADDIR(dir_inode)))
return -ENOENT;
- *opened &= ~FILE_CREATED;
+ file->f_mode &= ~FMODE_CREATED;
dentry = d_lookup(dir, &nd->last);
for (;;) {
if (!dentry) {
@@ -3188,7 +3161,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
if (dir_inode->i_op->atomic_open) {
error = atomic_open(nd, dentry, path, file, op, open_flag,
- mode, opened);
+ mode);
if (unlikely(error == -ENOENT) && create_error)
error = create_error;
return error;
@@ -3211,7 +3184,7 @@ no_open:
/* Negative dentry, just create the file */
if (!dentry->d_inode && (open_flag & O_CREAT)) {
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
if (!dir_inode->i_op->create) {
error = -EACCES;
@@ -3230,7 +3203,7 @@ no_open:
out_no_open:
path->dentry = dentry;
path->mnt = nd->path.mnt;
- return 1;
+ return 0;
out_dput:
dput(dentry);
@@ -3241,8 +3214,7 @@ out_dput:
* Handle the last step of open()
*/
static int do_last(struct nameidata *nd,
- struct file *file, const struct open_flags *op,
- int *opened)
+ struct file *file, const struct open_flags *op)
{
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
@@ -3308,17 +3280,17 @@ static int do_last(struct nameidata *nd,
inode_lock(dir->d_inode);
else
inode_lock_shared(dir->d_inode);
- error = lookup_open(nd, &path, file, op, got_write, opened);
+ error = lookup_open(nd, &path, file, op, got_write);
if (open_flag & O_CREAT)
inode_unlock(dir->d_inode);
else
inode_unlock_shared(dir->d_inode);
- if (error <= 0) {
- if (error)
- goto out;
+ if (error)
+ goto out;
- if ((*opened & FILE_CREATED) ||
+ if (file->f_mode & FMODE_OPENED) {
+ if ((file->f_mode & FMODE_CREATED) ||
!S_ISREG(file_inode(file)->i_mode))
will_truncate = false;
@@ -3326,7 +3298,7 @@ static int do_last(struct nameidata *nd,
goto opened;
}
- if (*opened & FILE_CREATED) {
+ if (file->f_mode & FMODE_CREATED) {
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
will_truncate = false;
@@ -3395,20 +3367,15 @@ finish_open_created:
error = may_open(&nd->path, acc_mode, open_flag);
if (error)
goto out;
- BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
- error = vfs_open(&nd->path, file, current_cred());
+ BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
+ error = vfs_open(&nd->path, file);
if (error)
goto out;
- *opened |= FILE_OPENED;
opened:
- error = open_check_o_direct(file);
- if (!error)
- error = ima_file_check(file, op->acc_mode, *opened);
+ error = ima_file_check(file, op->acc_mode);
if (!error && will_truncate)
error = handle_truncate(file);
out:
- if (unlikely(error) && (*opened & FILE_OPENED))
- fput(file);
if (unlikely(error > 0)) {
WARN_ON(1);
error = -EINVAL;
@@ -3458,7 +3425,7 @@ EXPORT_SYMBOL(vfs_tmpfile);
static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
- struct file *file, int *opened)
+ struct file *file)
{
struct dentry *child;
struct path path;
@@ -3480,12 +3447,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
if (error)
goto out2;
file->f_path.mnt = path.mnt;
- error = finish_open(file, child, NULL, opened);
- if (error)
- goto out2;
- error = open_check_o_direct(file);
- if (error)
- fput(file);
+ error = finish_open(file, child, NULL);
out2:
mnt_drop_write(path.mnt);
out:
@@ -3499,7 +3461,7 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
int error = path_lookupat(nd, flags, &path);
if (!error) {
audit_inode(nd->name, path.dentry, 0);
- error = vfs_open(&path, file, current_cred());
+ error = vfs_open(&path, file);
path_put(&path);
}
return error;
@@ -3508,59 +3470,40 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
static struct file *path_openat(struct nameidata *nd,
const struct open_flags *op, unsigned flags)
{
- const char *s;
struct file *file;
- int opened = 0;
int error;
- file = get_empty_filp();
+ file = alloc_empty_file(op->open_flag, current_cred());
if (IS_ERR(file))
return file;
- file->f_flags = op->open_flag;
-
if (unlikely(file->f_flags & __O_TMPFILE)) {
- error = do_tmpfile(nd, flags, op, file, &opened);
- goto out2;
- }
-
- if (unlikely(file->f_flags & O_PATH)) {
+ error = do_tmpfile(nd, flags, op, file);
+ } else if (unlikely(file->f_flags & O_PATH)) {
error = do_o_path(nd, flags, file);
- if (!error)
- opened |= FILE_OPENED;
- goto out2;
- }
-
- s = path_init(nd, flags);
- if (IS_ERR(s)) {
- put_filp(file);
- return ERR_CAST(s);
- }
- while (!(error = link_path_walk(s, nd)) &&
- (error = do_last(nd, file, op, &opened)) > 0) {
- nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
- s = trailing_symlink(nd);
- if (IS_ERR(s)) {
- error = PTR_ERR(s);
- break;
+ } else {
+ const char *s = path_init(nd, flags);
+ while (!(error = link_path_walk(s, nd)) &&
+ (error = do_last(nd, file, op)) > 0) {
+ nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
+ s = trailing_symlink(nd);
}
+ terminate_walk(nd);
}
- terminate_walk(nd);
-out2:
- if (!(opened & FILE_OPENED)) {
- BUG_ON(!error);
- put_filp(file);
+ if (likely(!error)) {
+ if (likely(file->f_mode & FMODE_OPENED))
+ return file;
+ WARN_ON(1);
+ error = -EINVAL;
}
- if (unlikely(error)) {
- if (error == -EOPENSTALE) {
- if (flags & LOOKUP_RCU)
- error = -ECHILD;
- else
- error = -ESTALE;
- }
- file = ERR_PTR(error);
+ fput(file);
+ if (error == -EOPENSTALE) {
+ if (flags & LOOKUP_RCU)
+ error = -ECHILD;
+ else
+ error = -ESTALE;
}
- return file;
+ return ERR_PTR(error);
}
struct file *do_filp_open(int dfd, struct filename *pathname,
@@ -4712,29 +4655,6 @@ out:
return len;
}
-/*
- * A helper for ->readlink(). This should be used *ONLY* for symlinks that
- * have ->get_link() not calling nd_jump_link(). Using (or not using) it
- * for any given inode is up to filesystem.
- */
-static int generic_readlink(struct dentry *dentry, char __user *buffer,
- int buflen)
-{
- DEFINE_DELAYED_CALL(done);
- struct inode *inode = d_inode(dentry);
- const char *link = inode->i_link;
- int res;
-
- if (!link) {
- link = inode->i_op->get_link(dentry, inode, &done);
- if (IS_ERR(link))
- return PTR_ERR(link);
- }
- res = readlink_copy(buffer, buflen, link);
- do_delayed_call(&done);
- return res;
-}
-
/**
* vfs_readlink - copy symlink body into userspace buffer
* @dentry: dentry on which to get symbolic link
@@ -4748,6 +4668,9 @@ static int generic_readlink(struct dentry *dentry, char __user *buffer,
int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
struct inode *inode = d_inode(dentry);
+ DEFINE_DELAYED_CALL(done);
+ const char *link;
+ int res;
if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
if (unlikely(inode->i_op->readlink))
@@ -4761,7 +4684,15 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
spin_unlock(&inode->i_lock);
}
- return generic_readlink(dentry, buffer, buflen);
+ link = inode->i_link;
+ if (!link) {
+ link = inode->i_op->get_link(dentry, inode, &done);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+ }
+ res = readlink_copy(buffer, buflen, link);
+ do_delayed_call(&done);
+ return res;
}
EXPORT_SYMBOL(vfs_readlink);
diff --git a/fs/namespace.c b/fs/namespace.c
index 8ddd14806799..bd2f4c68506a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -659,12 +659,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
return 0;
mnt = real_mount(bastard);
mnt_add_count(mnt, 1);
+ smp_mb(); // see mntput_no_expire()
if (likely(!read_seqretry(&mount_lock, seq)))
return 0;
if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
mnt_add_count(mnt, -1);
return 1;
}
+ lock_mount_hash();
+ if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+ mnt_add_count(mnt, -1);
+ unlock_mount_hash();
+ return 1;
+ }
+ unlock_mount_hash();
+ /* caller will mntput() */
return -1;
}
@@ -1195,12 +1204,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
static void mntput_no_expire(struct mount *mnt)
{
rcu_read_lock();
- mnt_add_count(mnt, -1);
- if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+ if (likely(READ_ONCE(mnt->mnt_ns))) {
+ /*
+ * Since we don't do lock_mount_hash() here,
+ * ->mnt_ns can change under us. However, if it's
+ * non-NULL, then there's a reference that won't
+ * be dropped until after an RCU delay done after
+ * turning ->mnt_ns NULL. So if we observe it
+ * non-NULL under rcu_read_lock(), the reference
+ * we are dropping is not the final one.
+ */
+ mnt_add_count(mnt, -1);
rcu_read_unlock();
return;
}
lock_mount_hash();
+ /*
+ * make sure that if __legitimize_mnt() has not seen us grab
+ * mount_lock, we'll see their refcount increment here.
+ */
+ smp_mb();
+ mnt_add_count(mnt, -1);
if (mnt_get_count(mnt)) {
rcu_read_unlock();
unlock_mount_hash();
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7a9c14426855..d7f158c3efc8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1434,12 +1434,11 @@ static int do_open(struct inode *inode, struct file *filp)
static int nfs_finish_open(struct nfs_open_context *ctx,
struct dentry *dentry,
- struct file *file, unsigned open_flags,
- int *opened)
+ struct file *file, unsigned open_flags)
{
int err;
- err = finish_open(file, dentry, do_open, opened);
+ err = finish_open(file, dentry, do_open);
if (err)
goto out;
if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
@@ -1452,7 +1451,7 @@ out:
int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned open_flags,
- umode_t mode, int *opened)
+ umode_t mode)
{
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct nfs_open_context *ctx;
@@ -1461,6 +1460,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct inode *inode;
unsigned int lookup_flags = 0;
bool switched = false;
+ int created = 0;
int err;
/* Expect a negative dentry */
@@ -1521,7 +1521,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
goto out;
trace_nfs_atomic_open_enter(dir, ctx, open_flags);
- inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
+ inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
+ if (created)
+ file->f_mode |= FMODE_CREATED;
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
@@ -1546,7 +1548,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
goto out;
}
- err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened);
+ err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
put_nfs_open_context(ctx);
out:
@@ -1641,6 +1643,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
struct dentry *parent = dget_parent(dentry);
struct inode *dir = d_inode(parent);
struct inode *inode;
+ struct dentry *d;
int error = -EACCES;
d_drop(dentry);
@@ -1662,10 +1665,12 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
goto out_error;
}
inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
- error = PTR_ERR(inode);
- if (IS_ERR(inode))
+ d = d_splice_alias(inode, dentry);
+ if (IS_ERR(d)) {
+ error = PTR_ERR(d);
goto out_error;
- d_add(dentry, inode);
+ }
+ dput(d);
out:
dput(parent);
return 0;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 137e18abb7e7..51beb6e38c90 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -258,7 +258,7 @@ extern const struct dentry_operations nfs4_dentry_operations;
/* dir.c */
int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
- unsigned, umode_t, int *);
+ unsigned, umode_t);
/* super.c */
extern struct file_system_type nfs4_fs_type;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f6c4ccd693f4..b790976d3913 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2951,7 +2951,7 @@ static int _nfs4_do_open(struct inode *dir,
}
}
if (opened && opendata->file_created)
- *opened |= FILE_CREATED;
+ *opened = 1;
if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
*ctx_th = opendata->f_attr.mdsthreshold;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b0555d7d8200..55a099e47ba2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -763,7 +763,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
goto out_nfserr;
}
- host_err = ima_file_check(file, may_flags, 0);
+ host_err = ima_file_check(file, may_flags);
if (host_err) {
fput(file);
goto out_nfserr;
diff --git a/fs/open.c b/fs/open.c
index d0e955b558ad..d98e19239bb7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -724,27 +724,13 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
return ksys_fchown(fd, user, group);
}
-int open_check_o_direct(struct file *f)
-{
- /* NB: we're sure to have correct a_ops only after f_op->open */
- if (f->f_flags & O_DIRECT) {
- if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
- return -EINVAL;
- }
- return 0;
-}
-
static int do_dentry_open(struct file *f,
struct inode *inode,
- int (*open)(struct inode *, struct file *),
- const struct cred *cred)
+ int (*open)(struct inode *, struct file *))
{
static const struct file_operations empty_fops = {};
int error;
- f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
- FMODE_PREAD | FMODE_PWRITE;
-
path_get(&f->f_path);
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
@@ -753,7 +739,7 @@ static int do_dentry_open(struct file *f,
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
if (unlikely(f->f_flags & O_PATH)) {
- f->f_mode = FMODE_PATH;
+ f->f_mode = FMODE_PATH | FMODE_OPENED;
f->f_op = &empty_fops;
return 0;
}
@@ -780,7 +766,7 @@ static int do_dentry_open(struct file *f,
goto cleanup_all;
}
- error = security_file_open(f, cred);
+ error = security_file_open(f);
if (error)
goto cleanup_all;
@@ -788,6 +774,8 @@ static int do_dentry_open(struct file *f,
if (error)
goto cleanup_all;
+ /* normally all 3 are set; ->open() can clear them if needed */
+ f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
if (!open)
open = f->f_op->open;
if (open) {
@@ -795,6 +783,7 @@ static int do_dentry_open(struct file *f,
if (error)
goto cleanup_all;
}
+ f->f_mode |= FMODE_OPENED;
if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(inode);
if ((f->f_mode & FMODE_READ) &&
@@ -809,9 +798,16 @@ static int do_dentry_open(struct file *f,
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
+ /* NB: we're sure to have correct a_ops only after f_op->open */
+ if (f->f_flags & O_DIRECT) {
+ if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
+ return -EINVAL;
+ }
return 0;
cleanup_all:
+ if (WARN_ON_ONCE(error > 0))
+ error = -EINVAL;
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITER) {
put_write_access(inode);
@@ -847,19 +843,12 @@ cleanup_file:
* Returns zero on success or -errno if the open failed.
*/
int finish_open(struct file *file, struct dentry *dentry,
- int (*open)(struct inode *, struct file *),
- int *opened)
+ int (*open)(struct inode *, struct file *))
{
- int error;
- BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
+ BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
file->f_path.dentry = dentry;
- error = do_dentry_open(file, d_backing_inode(dentry), open,
- current_cred());
- if (!error)
- *opened |= FILE_OPENED;
-
- return error;
+ return do_dentry_open(file, d_backing_inode(dentry), open);
}
EXPORT_SYMBOL(finish_open);
@@ -874,13 +863,13 @@ EXPORT_SYMBOL(finish_open);
* NB: unlike finish_open() this function does consume the dentry reference and
* the caller need not dput() it.
*
- * Returns "1" which must be the return value of ->atomic_open() after having
+ * Returns "0" which must be the return value of ->atomic_open() after having
* called this function.
*/
int finish_no_open(struct file *file, struct dentry *dentry)
{
file->f_path.dentry = dentry;
- return 1;
+ return 0;
}
EXPORT_SYMBOL(finish_no_open);
@@ -896,8 +885,7 @@ EXPORT_SYMBOL(file_path);
* @file: newly allocated file with f_flag initialized
* @cred: credentials to use
*/
-int vfs_open(const struct path *path, struct file *file,
- const struct cred *cred)
+int vfs_open(const struct path *path, struct file *file)
{
struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0);
@@ -905,7 +893,7 @@ int vfs_open(const struct path *path, struct file *file,
return PTR_ERR(dentry);
file->f_path = *path;
- return do_dentry_open(file, d_backing_inode(dentry), NULL, cred);
+ return do_dentry_open(file, d_backing_inode(dentry), NULL);
}
struct file *dentry_open(const struct path *path, int flags,
@@ -919,19 +907,11 @@ struct file *dentry_open(const struct path *path, int flags,
/* We must always pass in a valid mount pointer. */
BUG_ON(!path->mnt);
- f = get_empty_filp();
+ f = alloc_empty_file(flags, cred);
if (!IS_ERR(f)) {
- f->f_flags = flags;
- error = vfs_open(path, f, cred);
- if (!error) {
- /* from now on we need fput() to dispose of f */
- error = open_check_o_direct(f);
- if (error) {
- fput(f);
- f = ERR_PTR(error);
- }
- } else {
- put_filp(f);
+ error = vfs_open(path, f);
+ if (error) {
+ fput(f);
f = ERR_PTR(error);
}
}
@@ -1063,26 +1043,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(file_open_root);
-struct file *filp_clone_open(struct file *oldfile)
-{
- struct file *file;
- int retval;
-
- file = get_empty_filp();
- if (IS_ERR(file))
- return file;
-
- file->f_flags = oldfile->f_flags;
- retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred);
- if (retval) {
- put_filp(file);
- return ERR_PTR(retval);
- }
-
- return file;
-}
-EXPORT_SYMBOL(filp_clone_open);
-
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_flags op;
diff --git a/fs/pipe.c b/fs/pipe.c
index 39d6f431da83..bdc5d3c0977d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -741,54 +741,33 @@ fail_inode:
int create_pipe_files(struct file **res, int flags)
{
- int err;
struct inode *inode = get_pipe_inode();
struct file *f;
- struct path path;
if (!inode)
return -ENFILE;
- err = -ENOMEM;
- path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &empty_name);
- if (!path.dentry)
- goto err_inode;
- path.mnt = mntget(pipe_mnt);
-
- d_instantiate(path.dentry, inode);
-
- f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
+ f = alloc_file_pseudo(inode, pipe_mnt, "",
+ O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
+ &pipefifo_fops);
if (IS_ERR(f)) {
- err = PTR_ERR(f);
- goto err_dentry;
+ free_pipe_info(inode->i_pipe);
+ iput(inode);
+ return PTR_ERR(f);
}
- f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
f->private_data = inode->i_pipe;
- res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
+ res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
+ &pipefifo_fops);
if (IS_ERR(res[0])) {
- err = PTR_ERR(res[0]);
- goto err_file;
+ put_pipe_info(inode, inode->i_pipe);
+ fput(f);
+ return PTR_ERR(res[0]);
}
-
- path_get(&path);
res[0]->private_data = inode->i_pipe;
- res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
res[1] = f;
return 0;
-
-err_file:
- put_filp(f);
-err_dentry:
- free_pipe_info(inode->i_pipe);
- path_put(&path);
- return err;
-
-err_inode:
- free_pipe_info(inode->i_pipe);
- iput(inode);
- return err;
}
static int __do_pipe_flags(int *fd, struct file **files, int flags)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index cdad49da3ff7..d69ad801eb80 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -66,7 +66,7 @@ static void timerfd_triggered(struct timerfd_ctx *ctx)
spin_lock_irqsave(&ctx->wqh.lock, flags);
ctx->expired = 1;
ctx->ticks++;
- wake_up_locked(&ctx->wqh);
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
}
@@ -107,7 +107,7 @@ void timerfd_clock_was_set(void)
if (ctx->moffs != moffs) {
ctx->moffs = KTIME_MAX;
ctx->ticks++;
- wake_up_locked(&ctx->wqh);
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
}
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
}
@@ -345,7 +345,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
spin_lock_irq(&ctx->wqh.lock);
if (!timerfd_canceled(ctx)) {
ctx->ticks = ticks;
- wake_up_locked(&ctx->wqh);
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
} else
ret = -ECANCELED;
spin_unlock_irq(&ctx->wqh.lock);
@@ -533,8 +533,8 @@ static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
}
SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
- const struct itimerspec __user *, utmr,
- struct itimerspec __user *, otmr)
+ const struct __kernel_itimerspec __user *, utmr,
+ struct __kernel_itimerspec __user *, otmr)
{
struct itimerspec64 new, old;
int ret;
@@ -550,7 +550,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
return ret;
}
-SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
+SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr)
{
struct itimerspec64 kotmr;
int ret = do_timerfd_gettime(ufd, &kotmr);
@@ -559,7 +559,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
const struct compat_itimerspec __user *, utmr,
struct compat_itimerspec __user *, otmr)
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 06f37ddd2997..58cc2414992b 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -602,8 +602,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (unlikely(!fi)) {
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return err;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -694,8 +693,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
if (!fi) {
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
goto out;
}
set_nlink(inode, 2);
@@ -713,8 +711,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (!fi) {
clear_nlink(inode);
mark_inode_dirty(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
goto out;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -1041,8 +1038,7 @@ out:
out_no_entry:
up_write(&iinfo->i_data_sem);
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
goto out;
}
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index e1ef0f0a1353..02c0a4be4212 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -343,8 +343,7 @@ cg_found:
fail_remove_inode:
mutex_unlock(&sbi->s_lock);
clear_nlink(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
UFSD("EXIT (FAILED): err %d\n", err);
return ERR_PTR(err);
failed:
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index d5f43ba76c59..9ef40f100415 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -43,8 +43,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
return 0;
}
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return err;
}
@@ -142,8 +141,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
out_fail:
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return err;
}
@@ -198,8 +196,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
out_fail:
inode_dec_link_count(inode);
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput (inode);
+ discard_new_inode(inode);
out_dir:
inode_dec_link_count(dir);
return err;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 55876dd02f0c..e08a84d9ee72 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -626,7 +626,7 @@ retry:
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.
*/
- iomap->flags = IOMAP_F_NEW;
+ iomap->flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -1032,6 +1032,8 @@ xfs_file_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
+ iomap->flags |= IOMAP_F_BUFFER_HEAD;
+
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */
@@ -1132,7 +1134,7 @@ xfs_file_iomap_begin(
if (error)
return error;
- iomap->flags = IOMAP_F_NEW;
+ iomap->flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
out_finish:
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 0fa29f39d658..3a75de777843 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1253,7 +1253,7 @@ xfs_setup_inode(
inode_sb_list_add(inode);
/* make the inode look hashed for the writeback code */
- hlist_add_fake(&inode->i_hash);
+ inode_fake_hash(inode);
inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid);
inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid);