diff options
author | Steve French <sfrench@us.ibm.com> | 2009-07-16 04:21:39 +0000 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2009-07-16 04:21:39 +0000 |
commit | f6c43385435640e056424034caac0d765c45e370 (patch) | |
tree | 54c2ba0a7d3abb156bc51f6b9f02e00f0a260f85 /fs | |
parent | 65bc98b0059360e458aebd208587be44641227c1 (diff) | |
parent | 35b5c55fee08e6e4001ba98060a2d0b82f70b5f4 (diff) | |
download | lwn-f6c43385435640e056424034caac0d765c45e370.tar.gz lwn-f6c43385435640e056424034caac0d765c45e370.zip |
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
87 files changed, 432 insertions, 559 deletions
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index aad92f0a1048..6910a98bd73c 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -13,6 +13,7 @@ #include <linux/parser.h> #include <linux/mount.h> #include <linux/seq_file.h> +#include <linux/smp_lock.h> #include <linux/statfs.h> #include "adfs.h" #include "dir_f.h" diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9bd757774c9e..88067f36e5e7 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -564,7 +564,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) { struct afs_vnode *vnode, *dir; - struct afs_fid fid; + struct afs_fid uninitialized_var(fid); struct dentry *parent; struct key *key; void *dir_version; diff --git a/fs/afs/super.c b/fs/afs/super.c index ad0514d0115f..e1ea1c240b6a 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/smp_lock.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/parser.h> diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index f3da2eb51f56..00bf8fcb245f 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -19,7 +19,6 @@ #include <linux/sched.h> #include <linux/compat.h> #include <linux/syscalls.h> -#include <linux/smp_lock.h> #include <linux/magic.h> #include <linux/dcache.h> #include <linux/uaccess.h> diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 54bd07d44e68..1e41aadb1068 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -8,7 +8,6 @@ #include <linux/time.h> #include <linux/string.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/sched.h> #include "bfs.h" diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 6a021265f018..88b9a3ff44e4 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -11,7 +11,6 @@ #include <linux/fs.h> #include <linux/buffer_head.h> -#include <linux/smp_lock.h> #include "bfs.h" #undef DEBUG @@ -705,14 +705,13 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, } static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, - struct sg_iovec *iov, int iov_count, int uncopy, - int do_free_page) + struct sg_iovec *iov, int iov_count, + int to_user, int from_user, int do_free_page) { int ret = 0, i; struct bio_vec *bvec; int iov_idx = 0; unsigned int iov_off = 0; - int read = bio_data_dir(bio) == READ; __bio_for_each_segment(bvec, bio, i, 0) { char *bv_addr = page_address(bvec->bv_page); @@ -727,13 +726,14 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, iov_addr = iov[iov_idx].iov_base + iov_off; if (!ret) { - if (!read && !uncopy) - ret = copy_from_user(bv_addr, iov_addr, - bytes); - if (read && uncopy) + if (to_user) ret = copy_to_user(iov_addr, bv_addr, bytes); + if (from_user) + ret = copy_from_user(bv_addr, iov_addr, + bytes); + if (ret) ret = -EFAULT; } @@ -770,7 +770,8 @@ int bio_uncopy_user(struct bio *bio) if (!bio_flagged(bio, BIO_NULL_MAPPED)) ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, - bmd->nr_sgvecs, 1, bmd->is_our_pages); + bmd->nr_sgvecs, bio_data_dir(bio) == READ, + 0, bmd->is_our_pages); bio_free_map_data(bmd); bio_put(bio); return ret; @@ -875,8 +876,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q, /* * success */ - if (!write_to_vm && (!map_data || !map_data->null_mapped)) { - ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); + if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || + (map_data && map_data->from_user)) { + ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0); if (ret) goto cleanup; } diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index de1e2fd32080..9d8ba4d54a37 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -26,7 +26,6 @@ #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/backing-dev.h> #include <linux/mpage.h> #include <linux/swap.h> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7c3cd248d8d6..4b833972273a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -22,7 +22,6 @@ #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/backing-dev.h> #include <linux/mpage.h> #include <linux/swap.h> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7ffa3d34ea19..791eab19e330 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -26,7 +26,6 @@ #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/backing-dev.h> #include <linux/mpage.h> #include <linux/swap.h> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9f4db848db10..bd88f25889f7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -27,7 +27,6 @@ #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/backing-dev.h> #include <linux/mount.h> #include <linux/mpage.h> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9f179d4832d5..6d6d06cb6dfc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -26,7 +26,6 @@ #include <linux/init.h> #include <linux/seq_file.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/backing-dev.h> #include <linux/mount.h> #include <linux/mpage.h> diff --git a/fs/char_dev.c b/fs/char_dev.c index b7c9d5187a75..a173551e19d7 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -13,7 +13,6 @@ #include <linux/major.h> #include <linux/errno.h> #include <linux/module.h> -#include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/kobject.h> diff --git a/fs/compat.c b/fs/compat.c index fbadb947727b..94502dab972a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -32,7 +32,6 @@ #include <linux/smb_mount.h> #include <linux/ncp_mount.h> #include <linux/nfs4_mount.h> -#include <linux/smp_lock.h> #include <linux/syscalls.h> #include <linux/ctype.h> #include <linux/module.h> diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 626c7483b4de..f28f070a60fc 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -19,6 +19,7 @@ #include <linux/compiler.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/smp_lock.h> #include <linux/ioctl.h> #include <linux/if.h> #include <linux/if_bridge.h> diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 205ec95b347e..eb507c453c5f 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -435,7 +435,7 @@ static int search_rsb(struct dlm_ls *ls, char *name, int len, int b, static int find_rsb(struct dlm_ls *ls, char *name, int namelen, unsigned int flags, struct dlm_rsb **r_ret) { - struct dlm_rsb *r, *tmp; + struct dlm_rsb *r = NULL, *tmp; uint32_t hash, bucket; int error = -EINVAL; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index cdb580a9c7a2..618a60f03886 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -902,7 +902,7 @@ static void tcp_connect_to_sock(struct connection *con) int result = -EHOSTUNREACH; struct sockaddr_storage saddr, src_addr; int addr_len; - struct socket *sock; + struct socket *sock = NULL; if (con->nodeid == 0) { log_print("attempt to connect sock 0 foiled"); @@ -962,6 +962,8 @@ out_err: if (con->sock) { sock_release(con->sock); con->sock = NULL; + } else if (sock) { + sock_release(sock); } /* * Some errors are fatal and this list might need adjusting. For other diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 894a32d438d5..16f682e26c07 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -353,7 +353,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, { struct dlm_plock_info info; struct plock_op *op; - int found = 0; + int found = 0, do_callback = 0; if (count != sizeof(info)) return -EINVAL; @@ -366,21 +366,24 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, spin_lock(&ops_lock); list_for_each_entry(op, &recv_list, list) { - if (op->info.fsid == info.fsid && op->info.number == info.number && + if (op->info.fsid == info.fsid && + op->info.number == info.number && op->info.owner == info.owner) { + struct plock_xop *xop = (struct plock_xop *)op; list_del_init(&op->list); - found = 1; - op->done = 1; memcpy(&op->info, &info, sizeof(info)); + if (xop->callback) + do_callback = 1; + else + op->done = 1; + found = 1; break; } } spin_unlock(&ops_lock); if (found) { - struct plock_xop *xop; - xop = (struct plock_xop *)op; - if (xop->callback) + if (do_callback) dlm_plock_callback(op); else wake_up(&recv_wq); diff --git a/fs/exofs/common.h b/fs/exofs/common.h index 24667eedc023..c6718e4817fe 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h @@ -2,9 +2,7 @@ * common.h - Common definitions for both Kernel and user-mode utilities * * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 65b0c8c776a1..4cfab1cc75c0 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 0fd4c7859679..5ec72e020b22 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * @@ -156,6 +154,9 @@ ino_t exofs_parent_ino(struct dentry *child); int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, struct inode *); +/* super.c */ +int exofs_sync_fs(struct super_block *sb, int wait); + /********************* * operation vectors * *********************/ diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 6ed7fe484752..839b9dc1e70f 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * @@ -47,16 +45,23 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry, { int ret; struct address_space *mapping = filp->f_mapping; + struct inode *inode = dentry->d_inode; + struct super_block *sb; ret = filemap_write_and_wait(mapping); if (ret) return ret; - /*Note: file_fsync below also calles sync_blockdev, which is a no-op - * for exofs, but other then that it does sync_inode and - * sync_superblock which is what we need here. - */ - return file_fsync(filp, dentry, datasync); + /* sync the inode attributes */ + ret = write_inode_now(inode, 1); + + /* This is a good place to write the sb */ + /* TODO: Sechedule an sb-sync on create */ + sb = inode->i_sb; + if (sb->s_dirt) + exofs_sync_fs(sb, 1); + + return ret; } static int exofs_flush(struct file *file, fl_owner_t id) diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 77d0a295eb1c..6c10f7476699 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * @@ -295,6 +293,9 @@ static int read_exec(struct page_collect *pcol, bool is_sync) err: if (!is_sync) _unlock_pcol_pages(pcol, ret, READ); + else /* Pages unlocked by caller in sync mode only free bio */ + pcol_free(pcol); + kfree(pcol_copy); if (or) osd_end_request(or); diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 77fdd765e76d..b7dd0c236863 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index b3d2ccb87aaa..4372542df284 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 8216c5b77b53..5ab10c3bbebe 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * @@ -33,6 +31,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <linux/smp_lock.h> #include <linux/string.h> #include <linux/parser.h> #include <linux/vfs.h> @@ -200,7 +199,7 @@ static const struct export_operations exofs_export_ops; /* * Write the superblock to the OSD */ -static int exofs_sync_fs(struct super_block *sb, int wait) +int exofs_sync_fs(struct super_block *sb, int wait) { struct exofs_sb_info *sbi; struct exofs_fscb *fscb; diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c index 36e2d7bc7f7b..4dd687c3e747 100644 --- a/fs/exofs/symlink.c +++ b/fs/exofs/symlink.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh <bharrosh@panasas.com> * diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 7cb4badef927..e7431309bdca 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -13,7 +13,6 @@ #include <linux/sched.h> #include <linux/compat.h> #include <linux/mount.h> -#include <linux/smp_lock.h> #include <asm/current.h> #include <asm/uaccess.h> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0ddf7e55abe1..9714db393efe 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -93,20 +93,20 @@ typedef unsigned int ext4_group_t; struct ext4_allocation_request { /* target inode for block we're allocating */ struct inode *inode; + /* how many blocks we want to allocate */ + unsigned int len; /* logical block in target inode */ ext4_lblk_t logical; - /* phys. target (a hint) */ - ext4_fsblk_t goal; /* the closest logical allocated block to the left */ ext4_lblk_t lleft; - /* phys. block for ^^^ */ - ext4_fsblk_t pleft; /* the closest logical allocated block to the right */ ext4_lblk_t lright; - /* phys. block for ^^^ */ + /* phys. target (a hint) */ + ext4_fsblk_t goal; + /* phys. block for the closest logical allocated block to the left */ + ext4_fsblk_t pleft; + /* phys. block for the closest logical allocated block to the right */ ext4_fsblk_t pright; - /* how many blocks we want to allocate */ - unsigned int len; /* flags. see above EXT4_MB_HINT_* */ unsigned int flags; }; diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index ad13a84644e1..eb27fd0f2ee8 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -43,6 +43,8 @@ int __ext4_journal_forget(const char *where, handle_t *handle, ext4_journal_abort_handle(where, __func__, bh, handle, err); } + else + brelse(bh); return err; } @@ -57,6 +59,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, ext4_journal_abort_handle(where, __func__, bh, handle, err); } + else + brelse(bh); return err; } diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index be2f426f6805..139fb8cb87e4 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -131,9 +131,11 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle, int __ext4_journal_get_write_access(const char *where, handle_t *handle, struct buffer_head *bh); +/* When called with an invalid handle, this will still do a put on the BH */ int __ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh); +/* When called with an invalid handle, this will still do a put on the BH */ int __ext4_journal_revoke(const char *where, handle_t *handle, ext4_fsblk_t blocknr, struct buffer_head *bh); @@ -281,10 +283,10 @@ static inline int ext4_should_order_data(struct inode *inode) static inline int ext4_should_writeback_data(struct inode *inode) { - if (EXT4_JOURNAL(inode) == NULL) - return 0; if (!S_ISREG(inode->i_mode)) return 0; + if (EXT4_JOURNAL(inode) == NULL) + return 1; if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) return 0; if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 50322a09bd01..73ebfb44ad75 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1977,6 +1977,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, */ /* 1 bitmap, 1 block group descriptor */ ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); + return ret; } } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 2f645732e3b7..29e6dc7299b8 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -833,7 +833,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, if (!goal) goal = sbi->s_inode_goal; - if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) { + if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) { group = (goal - 1) / EXT4_INODES_PER_GROUP(sb); ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb); ret2 = 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 60a26f3a6f8b..f9c642b22efa 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -78,16 +78,14 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) * but there may still be a record of it in the journal, and that record * still needs to be revoked. * - * If the handle isn't valid we're not journaling so there's nothing to do. + * If the handle isn't valid we're not journaling, but we still need to + * call into ext4_journal_revoke() to put the buffer head. */ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr) { int err; - if (!ext4_handle_valid(handle)) - return 0; - might_sleep(); BUFFER_TRACE(bh, "enter"); @@ -1513,14 +1511,14 @@ retry: * Add inode to orphan list in case we crash before * truncate finishes */ - if (pos + len > inode->i_size) + if (pos + len > inode->i_size && ext4_can_truncate(inode)) ext4_orphan_add(handle, inode); ext4_journal_stop(handle); if (pos + len > inode->i_size) { - vmtruncate(inode, inode->i_size); + ext4_truncate(inode); /* - * If vmtruncate failed early the inode might + * If truncate failed early the inode might * still be on the orphan list; we need to * make sure the inode is removed from the * orphan list in that case. @@ -1614,7 +1612,7 @@ static int ext4_ordered_write_end(struct file *file, ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; - if (pos + len > inode->i_size) + if (pos + len > inode->i_size && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside * inode->i_size. So truncate them @@ -1628,9 +1626,9 @@ static int ext4_ordered_write_end(struct file *file, ret = ret2; if (pos + len > inode->i_size) { - vmtruncate(inode, inode->i_size); + ext4_truncate(inode); /* - * If vmtruncate failed early the inode might still be + * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode * is removed from the orphan list in that case. */ @@ -1655,7 +1653,7 @@ static int ext4_writeback_write_end(struct file *file, ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; - if (pos + len > inode->i_size) + if (pos + len > inode->i_size && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside * inode->i_size. So truncate them @@ -1670,9 +1668,9 @@ static int ext4_writeback_write_end(struct file *file, ret = ret2; if (pos + len > inode->i_size) { - vmtruncate(inode, inode->i_size); + ext4_truncate(inode); /* - * If vmtruncate failed early the inode might still be + * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode * is removed from the orphan list in that case. */ @@ -1722,7 +1720,7 @@ static int ext4_journalled_write_end(struct file *file, unlock_page(page); page_cache_release(page); - if (pos + len > inode->i_size) + if (pos + len > inode->i_size && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside * inode->i_size. So truncate them @@ -1733,9 +1731,9 @@ static int ext4_journalled_write_end(struct file *file, if (!ret) ret = ret2; if (pos + len > inode->i_size) { - vmtruncate(inode, inode->i_size); + ext4_truncate(inode); /* - * If vmtruncate failed early the inode might still be + * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode * is removed from the orphan list in that case. */ @@ -2305,15 +2303,9 @@ flush_it: return; } -static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) +static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) { - /* - * unmapped buffer is possible for holes. - * delay buffer is possible with delayed allocation. - * We also need to consider unwritten buffer as unmapped. - */ - return (!buffer_mapped(bh) || buffer_delay(bh) || - buffer_unwritten(bh)) && buffer_dirty(bh); + return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); } /* @@ -2398,9 +2390,9 @@ static int __mpage_da_writepage(struct page *page, * We need to try to allocate * unmapped blocks in the same page. * Otherwise we won't make progress - * with the page in ext4_da_writepage + * with the page in ext4_writepage */ - if (ext4_bh_unmapped_or_delay(NULL, bh)) { + if (ext4_bh_delay_or_unwritten(NULL, bh)) { mpage_add_bh_to_extent(mpd, logical, bh->b_size, bh->b_state); @@ -2517,7 +2509,6 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock, * so call get_block_wrap with create = 0 */ ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); - BUG_ON(create && ret == 0); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); ret = 0; @@ -2525,15 +2516,102 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock, return ret; } +static int bget_one(handle_t *handle, struct buffer_head *bh) +{ + get_bh(bh); + return 0; +} + +static int bput_one(handle_t *handle, struct buffer_head *bh) +{ + put_bh(bh); + return 0; +} + +static int __ext4_journalled_writepage(struct page *page, + struct writeback_control *wbc, + unsigned int len) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + struct buffer_head *page_bufs; + handle_t *handle = NULL; + int ret = 0; + int err; + + page_bufs = page_buffers(page); + BUG_ON(!page_bufs); + walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); + /* As soon as we unlock the page, it can go away, but we have + * references to buffers so we are safe */ + unlock_page(page); + + handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + + ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, + do_journal_get_write_access); + + err = walk_page_buffers(handle, page_bufs, 0, len, NULL, + write_end_fn); + if (ret == 0) + ret = err; + err = ext4_journal_stop(handle); + if (!ret) + ret = err; + + walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); + EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; +out: + return ret; +} + /* + * Note that we don't need to start a transaction unless we're journaling data + * because we should have holes filled from ext4_page_mkwrite(). We even don't + * need to file the inode to the transaction's list in ordered mode because if + * we are writing back data added by write(), the inode is already there and if + * we are writing back data modified via mmap(), noone guarantees in which + * transaction the data will hit the disk. In case we are journaling data, we + * cannot start transaction directly because transaction start ranks above page + * lock so we have to do some magic. + * * This function can get called via... * - ext4_da_writepages after taking page lock (have journal handle) * - journal_submit_inode_data_buffers (no journal handle) * - shrink_page_list via pdflush (no journal handle) * - grab_page_cache when doing write_begin (have journal handle) + * + * We don't do any block allocation in this function. If we have page with + * multiple blocks we need to write those buffer_heads that are mapped. This + * is important for mmaped based write. So if we do with blocksize 1K + * truncate(f, 1024); + * a = mmap(f, 0, 4096); + * a[0] = 'a'; + * truncate(f, 4096); + * we have in the page first buffer_head mapped via page_mkwrite call back + * but other bufer_heads would be unmapped but dirty(dirty done via the + * do_wp_page). So writepage should write the first block. If we modify + * the mmap area beyond 1024 we will again get a page_fault and the + * page_mkwrite callback will do the block allocation and mark the + * buffer_heads mapped. + * + * We redirty the page if we have any buffer_heads that is either delay or + * unwritten in the page. + * + * We can get recursively called as show below. + * + * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> + * ext4_writepage() + * + * But since we don't do any block allocation we should not deadlock. + * Page also have the dirty flag cleared so we don't get recurive page_lock. */ -static int ext4_da_writepage(struct page *page, - struct writeback_control *wbc) +static int ext4_writepage(struct page *page, + struct writeback_control *wbc) { int ret = 0; loff_t size; @@ -2541,7 +2619,7 @@ static int ext4_da_writepage(struct page *page, struct buffer_head *page_bufs; struct inode *inode = page->mapping->host; - trace_ext4_da_writepage(inode, page); + trace_ext4_writepage(inode, page); size = i_size_read(inode); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; @@ -2551,7 +2629,7 @@ static int ext4_da_writepage(struct page *page, if (page_has_buffers(page)) { page_bufs = page_buffers(page); if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, - ext4_bh_unmapped_or_delay)) { + ext4_bh_delay_or_unwritten)) { /* * We don't want to do block allocation * So redirty the page and return @@ -2578,13 +2656,13 @@ static int ext4_da_writepage(struct page *page, * all are mapped and non delay. We don't want to * do block allocation here. */ - ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, + ret = block_prepare_write(page, 0, len, noalloc_get_block_write); if (!ret) { page_bufs = page_buffers(page); /* check whether all are mapped and non delay */ if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, - ext4_bh_unmapped_or_delay)) { + ext4_bh_delay_or_unwritten)) { redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; @@ -2600,7 +2678,16 @@ static int ext4_da_writepage(struct page *page, return 0; } /* now mark the buffer_heads as dirty and uptodate */ - block_commit_write(page, 0, PAGE_CACHE_SIZE); + block_commit_write(page, 0, len); + } + + if (PageChecked(page) && ext4_should_journal_data(inode)) { + /* + * It's mmapped pagecache. Add buffers and journal it. There + * doesn't seem much point in redirtying the page here. + */ + ClearPageChecked(page); + return __ext4_journalled_writepage(page, wbc, len); } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) @@ -2907,7 +2994,7 @@ retry: * i_size_read because we hold i_mutex. */ if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); + ext4_truncate(inode); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -3130,222 +3217,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, ext4_get_block); } -static int bget_one(handle_t *handle, struct buffer_head *bh) -{ - get_bh(bh); - return 0; -} - -static int bput_one(handle_t *handle, struct buffer_head *bh) -{ - put_bh(bh); - return 0; -} - -/* - * Note that we don't need to start a transaction unless we're journaling data - * because we should have holes filled from ext4_page_mkwrite(). We even don't - * need to file the inode to the transaction's list in ordered mode because if - * we are writing back data added by write(), the inode is already there and if - * we are writing back data modified via mmap(), noone guarantees in which - * transaction the data will hit the disk. In case we are journaling data, we - * cannot start transaction directly because transaction start ranks above page - * lock so we have to do some magic. - * - * In all journaling modes block_write_full_page() will start the I/O. - * - * Problem: - * - * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> - * ext4_writepage() - * - * Similar for: - * - * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... - * - * Same applies to ext4_get_block(). We will deadlock on various things like - * lock_journal and i_data_sem - * - * Setting PF_MEMALLOC here doesn't work - too many internal memory - * allocations fail. - * - * 16May01: If we're reentered then journal_current_handle() will be - * non-zero. We simply *return*. - * - * 1 July 2001: @@@ FIXME: - * In journalled data mode, a data buffer may be metadata against the - * current transaction. But the same file is part of a shared mapping - * and someone does a writepage() on it. - * - * We will move the buffer onto the async_data list, but *after* it has - * been dirtied. So there's a small window where we have dirty data on - * BJ_Metadata. - * - * Note that this only applies to the last partial page in the file. The - * bit which block_write_full_page() uses prepare/commit for. (That's - * broken code anyway: it's wrong for msync()). - * - * It's a rare case: affects the final partial page, for journalled data - * where the file is subject to bith write() and writepage() in the same - * transction. To fix it we'll need a custom block_write_full_page(). - * We'll probably need that anyway for journalling writepage() output. - * - * We don't honour synchronous mounts for writepage(). That would be - * disastrous. Any write() or metadata operation will sync the fs for - * us. - * - */ -static int __ext4_normal_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - - if (test_opt(inode->i_sb, NOBH)) - return nobh_writepage(page, noalloc_get_block_write, wbc); - else - return block_write_full_page(page, noalloc_get_block_write, - wbc); -} - -static int ext4_normal_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - loff_t size = i_size_read(inode); - loff_t len; - - trace_ext4_normal_writepage(inode, page); - J_ASSERT(PageLocked(page)); - if (page->index == size >> PAGE_CACHE_SHIFT) - len = size & ~PAGE_CACHE_MASK; - else - len = PAGE_CACHE_SIZE; - - if (page_has_buffers(page)) { - /* if page has buffers it should all be mapped - * and allocated. If there are not buffers attached - * to the page we know the page is dirty but it lost - * buffers. That means that at some moment in time - * after write_begin() / write_end() has been called - * all buffers have been clean and thus they must have been - * written at least once. So they are all mapped and we can - * happily proceed with mapping them and writing the page. - */ - BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, - ext4_bh_unmapped_or_delay)); - } - - if (!ext4_journal_current_handle()) - return __ext4_normal_writepage(page, wbc); - - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; -} - -static int __ext4_journalled_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - struct buffer_head *page_bufs; - handle_t *handle = NULL; - int ret = 0; - int err; - - ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, - noalloc_get_block_write); - if (ret != 0) - goto out_unlock; - - page_bufs = page_buffers(page); - walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, - bget_one); - /* As soon as we unlock the page, it can go away, but we have - * references to buffers so we are safe */ - unlock_page(page); - - handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - - ret = walk_page_buffers(handle, page_bufs, 0, - PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); - - err = walk_page_buffers(handle, page_bufs, 0, - PAGE_CACHE_SIZE, NULL, write_end_fn); - if (ret == 0) - ret = err; - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - - walk_page_buffers(handle, page_bufs, 0, - PAGE_CACHE_SIZE, NULL, bput_one); - EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; - goto out; - -out_unlock: - unlock_page(page); -out: - return ret; -} - -static int ext4_journalled_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - loff_t size = i_size_read(inode); - loff_t len; - - trace_ext4_journalled_writepage(inode, page); - J_ASSERT(PageLocked(page)); - if (page->index == size >> PAGE_CACHE_SHIFT) - len = size & ~PAGE_CACHE_MASK; - else - len = PAGE_CACHE_SIZE; - - if (page_has_buffers(page)) { - /* if page has buffers it should all be mapped - * and allocated. If there are not buffers attached - * to the page we know the page is dirty but it lost - * buffers. That means that at some moment in time - * after write_begin() / write_end() has been called - * all buffers have been clean and thus they must have been - * written at least once. So they are all mapped and we can - * happily proceed with mapping them and writing the page. - */ - BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, - ext4_bh_unmapped_or_delay)); - } - - if (ext4_journal_current_handle()) - goto no_write; - - if (PageChecked(page)) { - /* - * It's mmapped pagecache. Add buffers and journal it. There - * doesn't seem much point in redirtying the page here. - */ - ClearPageChecked(page); - return __ext4_journalled_writepage(page, wbc); - } else { - /* - * It may be a page full of checkpoint-mode buffers. We don't - * really know unless we go poke around in the buffer_heads. - * But block_write_full_page will do the right thing. - */ - return block_write_full_page(page, noalloc_get_block_write, - wbc); - } -no_write: - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; -} - static int ext4_readpage(struct file *file, struct page *page) { return mpage_readpage(page, ext4_get_block); @@ -3492,7 +3363,7 @@ static int ext4_journalled_set_page_dirty(struct page *page) static const struct address_space_operations ext4_ordered_aops = { .readpage = ext4_readpage, .readpages = ext4_readpages, - .writepage = ext4_normal_writepage, + .writepage = ext4_writepage, .sync_page = block_sync_page, .write_begin = ext4_write_begin, .write_end = ext4_ordered_write_end, @@ -3507,7 +3378,7 @@ static const struct address_space_operations ext4_ordered_aops = { static const struct address_space_operations ext4_writeback_aops = { .readpage = ext4_readpage, .readpages = ext4_readpages, - .writepage = ext4_normal_writepage, + .writepage = ext4_writepage, .sync_page = block_sync_page, .write_begin = ext4_write_begin, .write_end = ext4_writeback_write_end, @@ -3522,7 +3393,7 @@ static const struct address_space_operations ext4_writeback_aops = { static const struct address_space_operations ext4_journalled_aops = { .readpage = ext4_readpage, .readpages = ext4_readpages, - .writepage = ext4_journalled_writepage, + .writepage = ext4_writepage, .sync_page = block_sync_page, .write_begin = ext4_write_begin, .write_end = ext4_journalled_write_end, @@ -3536,7 +3407,7 @@ static const struct address_space_operations ext4_journalled_aops = { static const struct address_space_operations ext4_da_aops = { .readpage = ext4_readpage, .readpages = ext4_readpages, - .writepage = ext4_da_writepage, + .writepage = ext4_writepage, .writepages = ext4_da_writepages, .sync_page = block_sync_page, .write_begin = ext4_da_write_begin, @@ -3583,7 +3454,8 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page; int err = 0; - page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT); + page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, + mapping_gfp_mask(mapping) & ~__GFP_FS); if (!page) return -EINVAL; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bb415408fdb6..7050a9cd04a4 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -12,7 +12,6 @@ #include <linux/capability.h> #include <linux/time.h> #include <linux/compat.h> -#include <linux/smp_lock.h> #include <linux/mount.h> #include <linux/file.h> #include <asm/uaccess.h> @@ -192,7 +191,7 @@ setversion_out: case EXT4_IOC_GROUP_EXTEND: { ext4_fsblk_t n_blocks_count; struct super_block *sb = inode->i_sb; - int err, err2; + int err, err2=0; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; @@ -205,9 +204,11 @@ setversion_out: return err; err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); - jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); - err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + if (EXT4_SB(sb)->s_journal) { + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + } if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); @@ -252,7 +253,7 @@ setversion_out: case EXT4_IOC_GROUP_ADD: { struct ext4_new_group_data input; struct super_block *sb = inode->i_sb; - int err, err2; + int err, err2=0; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; @@ -266,9 +267,11 @@ setversion_out: return err; err = ext4_group_add(sb, &input); - jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); - err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + if (EXT4_SB(sb)->s_journal) { + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + } if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 519a0a686d94..cd258463e2a9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -657,7 +657,8 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, } } -static void ext4_mb_generate_buddy(struct super_block *sb, +static noinline_for_stack +void ext4_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, ext4_group_t group) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); @@ -1480,7 +1481,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac, ext4_mb_check_limits(ac, e4b, 0); } -static int ext4_mb_try_best_found(struct ext4_allocation_context *ac, +static noinline_for_stack +int ext4_mb_try_best_found(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct ext4_free_extent ex = ac->ac_b_ex; @@ -1507,7 +1509,8 @@ static int ext4_mb_try_best_found(struct ext4_allocation_context *ac, return 0; } -static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, +static noinline_for_stack +int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { ext4_group_t group = ac->ac_g_ex.fe_group; @@ -1566,7 +1569,8 @@ static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, * The routine scans buddy structures (not bitmap!) from given order * to max order and tries to find big enough chunk to satisfy the req */ -static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, +static noinline_for_stack +void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct super_block *sb = ac->ac_sb; @@ -1609,7 +1613,8 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, * In order to optimize scanning, caller must pass number of * free blocks in the group, so the routine can know upper limit. */ -static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, +static noinline_for_stack +void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct super_block *sb = ac->ac_sb; @@ -1668,7 +1673,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, * we try to find stripe-aligned chunks for stripe-size requests * XXX should do so at least for multiples of stripe size as well */ -static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, +static noinline_for_stack +void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct super_block *sb = ac->ac_sb; @@ -1831,7 +1837,8 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb, } -static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) +static noinline_for_stack +int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) { int ret; @@ -2902,7 +2909,11 @@ int __init init_ext4_mballoc(void) void exit_ext4_mballoc(void) { - /* XXX: synchronize_rcu(); */ + /* + * Wait for completion of call_rcu()'s on ext4_pspace_cachep + * before destroying the slab cache. + */ + rcu_barrier(); kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); kmem_cache_destroy(ext4_free_ext_cachep); @@ -3457,7 +3468,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, * used in in-core bitmap. buddy must be generated from this bitmap * Need to be called with ext4 group lock held */ -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, +static noinline_for_stack +void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); @@ -4215,14 +4227,9 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ext4_get_group_no_and_offset(sb, goal, &group, &block); /* set up allocation goals */ + memset(ac, 0, sizeof(struct ext4_allocation_context)); ac->ac_b_ex.fe_logical = ar->logical; - ac->ac_b_ex.fe_group = 0; - ac->ac_b_ex.fe_start = 0; - ac->ac_b_ex.fe_len = 0; ac->ac_status = AC_STATUS_CONTINUE; - ac->ac_groups_scanned = 0; - ac->ac_ex_scanned = 0; - ac->ac_found = 0; ac->ac_sb = sb; ac->ac_inode = ar->inode; ac->ac_o_ex.fe_logical = ar->logical; @@ -4233,15 +4240,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ac->ac_g_ex.fe_group = group; ac->ac_g_ex.fe_start = block; ac->ac_g_ex.fe_len = len; - ac->ac_f_ex.fe_len = 0; ac->ac_flags = ar->flags; - ac->ac_2order = 0; - ac->ac_criteria = 0; - ac->ac_pa = NULL; - ac->ac_bitmap_page = NULL; - ac->ac_buddy_page = NULL; - ac->alloc_semp = NULL; - ac->ac_lg = NULL; /* we have to define context: we'll we work with a file or * locality group. this is a policy, actually */ @@ -4509,10 +4508,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (ac) { - ac->ac_sb = sb; - ac->ac_inode = ar->inode; - } else { + if (!ac) { ar->len = 0; *errp = -ENOMEM; goto out1; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 38ff75a0fe22..530b4ca01510 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,7 +16,6 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/time.h> -#include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/compat.h> #include <asm/uaccess.h> diff --git a/fs/fat/file.c b/fs/fat/file.c index b28ea646ff60..f042b965c95c 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp) if ((filp->f_mode & FMODE_WRITE) && MSDOS_SB(inode->i_sb)->options.flush) { fat_flush_inodes(inode->i_sb, inode, NULL); - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); } return 0; } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 82f88733b681..bbc94ae4fd77 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -9,7 +9,6 @@ #include <linux/module.h> #include <linux/time.h> #include <linux/buffer_head.h> -#include <linux/smp_lock.h> #include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 73471b7ecc8c..cb6e83557112 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -19,7 +19,6 @@ #include <linux/jiffies.h> #include <linux/ctype.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/namei.h> #include "fat.h" diff --git a/fs/fcntl.c b/fs/fcntl.c index a040b764f8e3..ae413086db97 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -19,7 +19,6 @@ #include <linux/signal.h> #include <linux/rcupdate.h> #include <linux/pid_namespace.h> -#include <linux/smp_lock.h> #include <asm/poll.h> #include <asm/siginfo.h> diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index cdbd1654e4cd..1e8af939b3e4 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -38,6 +38,7 @@ #include <linux/buffer_head.h> #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/smp_lock.h> #include <linux/stat.h> #include <linux/vfs.h> #include <linux/mount.h> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f58ecbc416c8..6484eb75acd6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -286,8 +286,8 @@ __releases(&fc->lock) } if (fc->num_background == FUSE_CONGESTION_THRESHOLD && fc->connected && fc->bdi_initialized) { - clear_bdi_congested(&fc->bdi, READ); - clear_bdi_congested(&fc->bdi, WRITE); + clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); + clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); } fc->num_background--; fc->active_background--; @@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, fc->blocked = 1; if (fc->num_background == FUSE_CONGESTION_THRESHOLD && fc->bdi_initialized) { - set_bdi_congested(&fc->bdi, READ); - set_bdi_congested(&fc->bdi, WRITE); + set_bdi_congested(&fc->bdi, BLK_RW_SYNC); + set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); } list_add_tail(&req->list, &fc->bg_queue); flush_bg_queue(fc); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 98d6ef1c1dc0..148d55c14171 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -1,12 +1,11 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gfs2 + #if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_GFS2_H #include <linux/tracepoint.h> -#undef TRACE_SYSTEM -#define TRACE_SYSTEM gfs2 -#define TRACE_INCLUDE_FILE trace_gfs2 - #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/dlmconstants.h> @@ -403,5 +402,6 @@ TRACE_EVENT(gfs2_block_alloc, /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_gfs2 #include <trace/define_trace.h> diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 6f833dc8e910..f7fcbe49da72 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -19,6 +19,7 @@ #include <linux/nls.h> #include <linux/parser.h> #include <linux/seq_file.h> +#include <linux/smp_lock.h> #include <linux/vfs.h> #include "hfs_fs.h" diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 9fc3af0c0dab..c0759fe0855b 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -12,6 +12,7 @@ #include <linux/pagemap.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/smp_lock.h> #include <linux/vfs.h> #include <linux/nls.h> diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 6916c41d7017..8865c94f55f6 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -6,6 +6,7 @@ * directory VFS functions */ +#include <linux/smp_lock.h> #include "hpfs_fn.h" static int hpfs_dir_release(struct inode *inode, struct file *filp) diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 64ab52259204..3efabff00367 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -6,6 +6,7 @@ * file VFS functions */ +#include <linux/smp_lock.h> #include "hpfs_fn.h" #define BLOCKS(size) (((size) + 511) >> 9) diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index c2ea31bae313..701ca54c0867 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -13,7 +13,6 @@ #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include "hpfs.h" diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 39a1bfbea312..fe703ae46bc7 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -6,6 +6,7 @@ * inode VFS functions */ +#include <linux/smp_lock.h> #include "hpfs_fn.h" void hpfs_init_inode(struct inode *i) diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index b649232dde97..82b9c4ba9ed0 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -6,6 +6,7 @@ * adding & removing files & directories */ #include <linux/sched.h> +#include <linux/smp_lock.h> #include "hpfs_fn.h" static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 58a7963e168a..85f96bc651c7 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -142,6 +142,7 @@ static const struct dentry_operations isofs_dentry_ops[] = { struct iso9660_options{ unsigned int rock:1; + unsigned int joliet:1; unsigned int cruft:1; unsigned int hide:1; unsigned int showassoc:1; @@ -151,7 +152,6 @@ struct iso9660_options{ unsigned int gid_set:1; unsigned int utf8:1; unsigned char map; - char joliet; unsigned char check; unsigned int blocksize; mode_t fmode; @@ -632,7 +632,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) { sec = (struct iso_supplementary_descriptor *)vdp; if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { - if (opt.joliet == 'y') { + if (opt.joliet) { if (sec->escape[2] == 0x40) joliet_level = 1; else if (sec->escape[2] == 0x43) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 18bfd5dab642..e378cb383979 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -297,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); struct jbd2_buffer_trigger_type *triggers; + journal_t *journal = transaction->t_journal; /* * The buffer really shouldn't be locked: only the current committing @@ -310,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); + /* keep subsequent assertions sane */ + new_bh->b_state = 0; + init_buffer(new_bh, NULL, NULL); + atomic_set(&new_bh->b_count, 1); + new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ /* * If a new transaction has already done a buffer copy-out, then @@ -388,14 +394,6 @@ repeat: kunmap_atomic(mapped_data, KM_USER0); } - /* keep subsequent assertions sane */ - new_bh->b_state = 0; - init_buffer(new_bh, NULL, NULL); - atomic_set(&new_bh->b_count, 1); - jbd_unlock_bh_state(bh_in); - - new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ - set_bh_page(new_bh, new_page, new_offset); new_jh->b_transaction = NULL; new_bh->b_size = jh2bh(jh_in)->b_size; @@ -412,7 +410,11 @@ repeat: * copying is moved to the transaction's shadow queue. */ JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); - jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); + spin_lock(&journal->j_list_lock); + __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh_in); + JBUFFER_TRACE(new_jh, "file as BJ_IO"); jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); @@ -2410,6 +2412,7 @@ const char *jbd2_dev_to_name(dev_t device) int i = hash_32(device, CACHE_SIZE_BITS); char *ret; struct block_device *bd; + static struct devname_cache *new_dev; rcu_read_lock(); if (devcache[i] && devcache[i]->device == device) { @@ -2419,20 +2422,20 @@ const char *jbd2_dev_to_name(dev_t device) } rcu_read_unlock(); + new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); + if (!new_dev) + return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ spin_lock(&devname_cache_lock); if (devcache[i]) { if (devcache[i]->device == device) { + kfree(new_dev); ret = devcache[i]->devname; spin_unlock(&devname_cache_lock); return ret; } call_rcu(&devcache[i]->rcu, free_devcache); } - devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); - if (!devcache[i]) { - spin_unlock(&devname_cache_lock); - return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ - } + devcache[i] = new_dev; devcache[i]->device = device; bd = bdget(device); if (bd) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 494501edba6b..6213ac728f30 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal) wake_up(&journal->j_wait_transaction_locked); } -/* - * Report any unexpected dirty buffers which turn up. Normally those - * indicate an error, but they can occur if the user is running (say) - * tune2fs to modify the live filesystem, so we need the option of - * continuing as gracefully as possible. # - * - * The caller should already hold the journal lock and - * j_list_lock spinlock: most callers will need those anyway - * in order to probe the buffer's journaling state safely. - */ -static void jbd_unexpected_dirty_buffer(struct journal_head *jh) +static void warn_dirty_buffer(struct buffer_head *bh) { - int jlist; - - /* If this buffer is one which might reasonably be dirty - * --- ie. data, or not part of this journal --- then - * we're OK to leave it alone, but otherwise we need to - * move the dirty bit to the journal's own internal - * JBDDirty bit. */ - jlist = jh->b_jlist; + char b[BDEVNAME_SIZE]; - if (jlist == BJ_Metadata || jlist == BJ_Reserved || - jlist == BJ_Shadow || jlist == BJ_Forget) { - struct buffer_head *bh = jh2bh(jh); - - if (test_clear_buffer_dirty(bh)) - set_buffer_jbddirty(bh); - } + printk(KERN_WARNING + "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " + "There's a risk of filesystem corruption in case of system " + "crash.\n", + bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); } /* @@ -593,14 +574,16 @@ repeat: if (jh->b_next_transaction) J_ASSERT_JH(jh, jh->b_next_transaction == transaction); + warn_dirty_buffer(bh); } /* * In any case we need to clean the dirty flag and we must * do it under the buffer lock to be sure we don't race * with running write-out. */ - JBUFFER_TRACE(jh, "Unexpected dirty buffer"); - jbd_unexpected_dirty_buffer(jh); + JBUFFER_TRACE(jh, "Journalling dirty buffer"); + clear_buffer_dirty(bh); + set_buffer_jbddirty(bh); } unlock_buffer(bh); @@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); if (jh->b_transaction == NULL) { + /* + * Previous jbd2_journal_forget() could have left the buffer + * with jbddirty bit set because it was being committed. When + * the commit finished, we've filed the buffer for + * checkpointing and marked it dirty. Now we are reallocating + * the buffer so the transaction freeing it must have + * committed and so it's safe to clear the dirty bit. + */ + clear_buffer_dirty(jh2bh(jh)); jh->b_transaction = transaction; /* first access by this transaction */ @@ -1644,8 +1636,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) if (jh->b_cp_transaction) { JBUFFER_TRACE(jh, "on running+cp transaction"); + /* + * We don't want to write the buffer anymore, clear the + * bit so that we don't confuse checks in + * __journal_file_buffer + */ + clear_buffer_dirty(bh); __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); - clear_buffer_jbddirty(bh); may_free = 0; } else { JBUFFER_TRACE(jh, "on running transaction"); @@ -1896,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, if (jh->b_transaction && jh->b_jlist == jlist) return; - /* The following list of buffer states needs to be consistent - * with __jbd_unexpected_dirty_buffer()'s handling of dirty - * state. */ - if (jlist == BJ_Metadata || jlist == BJ_Reserved || jlist == BJ_Shadow || jlist == BJ_Forget) { + /* + * For metadata buffers, we track dirty bit in buffer_jbddirty + * instead of buffer_dirty. We should not see a dirty bit set + * here because we clear it in do_get_write_access but e.g. + * tune2fs can modify the sb and set the dirty bit at any time + * so we try to gracefully handle that. + */ + if (buffer_dirty(bh)) + warn_dirty_buffer(bh); if (test_clear_buffer_dirty(bh) || test_clear_buffer_jbddirty(bh)) was_dirty = 1; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 07a22caf2687..0035c021395a 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/smp_lock.h> #include <linux/init.h> #include <linux/list.h> #include <linux/fs.h> diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index f2fdcbce143e..4336adba952a 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -7,6 +7,7 @@ */ #include <linux/module.h> +#include <linux/smp_lock.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 1725037374c5..bd173a6ca3b1 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -10,6 +10,7 @@ #include <linux/types.h> #include <linux/time.h> #include <linux/slab.h> +#include <linux/smp_lock.h> #include <linux/in.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 3688e55901fc..e1d28ddd2169 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -10,6 +10,7 @@ #include <linux/types.h> #include <linux/time.h> #include <linux/slab.h> +#include <linux/smp_lock.h> #include <linux/in.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index af05b918cb5b..6dd48a4405b4 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -10,6 +10,7 @@ #include <linux/kthread.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/smp_lock.h> #include <linux/spinlock.h> #include <linux/nfs4.h> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 89f98e9a024b..38d42c29fb92 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -29,7 +29,6 @@ #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include <linux/pagevec.h> #include <linux/namei.h> #include <linux/mount.h> diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0055b813ec2c..05062329b678 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -26,7 +26,6 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include <linux/aio.h> #include <asm/uaccess.h> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 64f87194d390..bd7938eda6a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -30,7 +30,6 @@ #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <linux/lockd/bind.h> -#include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/nfs_idmap.h> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92ce43517814..ff0c080db59b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -45,7 +45,6 @@ #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> -#include <linux/smp_lock.h> #include <linux/namei.h> #include <linux/mount.h> #include <linux/module.h> diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 96c4ebfa46f4..73ea5e8d66ce 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -18,7 +18,6 @@ #include <linux/sunrpc/clnt.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> -#include <linux/smp_lock.h> #include <asm/system.h> diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ce728829f79a..0a0a2ff767c3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page) struct nfs_server *nfss = NFS_SERVER(inode); if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) - set_bdi_congested(&nfss->backing_dev_info, WRITE); + NFS_CONGESTION_ON_THRESH) { + set_bdi_congested(&nfss->backing_dev_info, + BLK_RW_ASYNC); + } } return ret; } @@ -215,7 +217,7 @@ static void nfs_end_page_writeback(struct page *page) end_page_writeback(page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - clear_bdi_congested(&nfss->backing_dev_info, WRITE); + clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } /* diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1250fb978ac1..6d0847562d87 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,7 +25,6 @@ #include <linux/init.h> #include <linux/inet.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/ctype.h> #include <linux/nfs.h> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d4c9884cd54b..492c79b7800b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -18,7 +18,6 @@ #include <linux/unistd.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/freezer.h> #include <linux/fs_struct.h> #include <linux/kthread.h> diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 36df60b6d8a4..99d58a028b94 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -568,6 +568,7 @@ void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap, } static struct lock_class_key nilfs_bmap_dat_lock_key; +static struct lock_class_key nilfs_bmap_mdt_lock_key; /** * nilfs_bmap_read - read a bmap from an inode @@ -603,7 +604,11 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) bmap->b_ptr_type = NILFS_BMAP_PTR_VS; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); break; + case NILFS_IFILE_INO: + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); + /* Fall through */ default: bmap->b_ptr_type = NILFS_BMAP_PTR_VM; bmap->b_last_allocated_key = 0; diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 7d49813f66d6..aec942cf79e3 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -307,7 +307,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) { if (ret != -ENOENT) - goto out_header; + break; /* skip hole */ ret = 0; continue; @@ -340,7 +340,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; printk(KERN_ERR "%s: cannot delete block\n", __func__); - goto out_header; + break; } } @@ -358,7 +358,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, kunmap_atomic(kaddr, KM_USER0); } - out_header: brelse(header_bh); out_sem: diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 0b2710e2d565..8927ca27e6f7 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -134,15 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); - if (entry->de_blocknr != cpu_to_le64(0) || - entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) { - printk(KERN_CRIT - "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n", - __func__, (unsigned long long)req->pr_entry_nr, - (unsigned long long)le64_to_cpu(entry->de_start), - (unsigned long long)le64_to_cpu(entry->de_end), - (unsigned long long)le64_to_cpu(entry->de_blocknr)); - } entry->de_blocknr = cpu_to_le64(blocknr); kunmap_atomic(kaddr, KM_USER0); diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 54100acc1102..1a4fa04cf071 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -43,7 +43,6 @@ */ #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include "nilfs.h" #include "page.h" diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index aa977549919e..8b5e4778cf28 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1829,26 +1829,13 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci, err = nilfs_segbuf_write(segbuf, &wi); res = nilfs_segbuf_wait(segbuf, &wi); - err = unlikely(err) ? : res; - if (unlikely(err)) + err = err ? : res; + if (err) return err; } return 0; } -static int nilfs_page_has_uncleared_buffer(struct page *page) -{ - struct buffer_head *head, *bh; - - head = bh = page_buffers(page); - do { - if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers)) - return 1; - bh = bh->b_this_page; - } while (bh != head); - return 0; -} - static void __nilfs_end_page_io(struct page *page, int err) { if (!err) { @@ -1872,12 +1859,11 @@ static void nilfs_end_page_io(struct page *page, int err) if (!page) return; - if (buffer_nilfs_node(page_buffers(page)) && - nilfs_page_has_uncleared_buffer(page)) - /* For b-tree node pages, this function may be called twice - or more because they might be split in a segment. - This check assures that cleanup has been done for all - buffers in a split btnode page. */ + if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) + /* + * For b-tree node pages, this function may be called twice + * or more because they might be split in a segment. + */ return; __nilfs_end_page_io(page, err); @@ -1940,7 +1926,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, } if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); - if (unlikely(fs_page == failed_page)) + if (fs_page && fs_page == failed_page) goto done; fs_page = bh->b_page; } diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 9fcd36dcc9a0..467b413bec21 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -7,7 +7,6 @@ #include <linux/fs.h> #include <linux/mount.h> -#include <linux/smp_lock.h> #define MLOG_MASK_PREFIX ML_INODE #include <cluster/masklog.h> diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 1a9c7878f864..ea4e6cb29e13 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -436,7 +436,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk supresses it */ - if (!dev_get_uevent_suppress(pdev)) + if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); return p; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 77f5bb746bf0..90622200b39c 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s) DEFINE_WAIT(wait); struct reiserfs_journal *j = SB_JOURNAL(s); if (atomic_read(&j->j_async_throttle)) - congestion_wait(WRITE, HZ / 10); + congestion_wait(BLK_RW_ASYNC, HZ / 10); return 0; } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index f3d47d856848..6925b835a43b 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -46,7 +46,6 @@ #include <linux/reiserfs_acl.h> #include <asm/uaccess.h> #include <net/checksum.h> -#include <linux/smp_lock.h> #include <linux/stat.h> #include <linux/quotaops.h> diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 3b52770f46ff..cb5fc57e370b 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -30,6 +30,7 @@ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> +#include <linux/smp_lock.h> #include <linux/mutex.h> #include <linux/pagemap.h> #include <linux/init.h> diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index bc5857199ec2..762a7d6cec73 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -297,6 +297,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) { struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); + dbg_io("jhead %d", wbuf->jhead); wbuf->need_sync = 1; wbuf->c->need_wbuf_sync = 1; ubifs_wake_up_bgt(wbuf->c); @@ -311,8 +312,12 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) { ubifs_assert(!hrtimer_active(&wbuf->timer)); - if (!ktime_to_ns(wbuf->softlimit)) + if (wbuf->no_timer) return; + dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead, + div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), + div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, + USEC_PER_SEC)); hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, HRTIMER_MODE_REL); } @@ -323,11 +328,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) */ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) { - /* - * If the syncer is waiting for the lock (from the background thread's - * context) and another task is changing write-buffer then the syncing - * should be canceled. - */ + if (wbuf->no_timer) + return; wbuf->need_sync = 0; hrtimer_cancel(&wbuf->timer); } @@ -349,8 +351,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) /* Write-buffer is empty or not seeked */ return 0; - dbg_io("LEB %d:%d, %d bytes", - wbuf->lnum, wbuf->offs, wbuf->used); + dbg_io("LEB %d:%d, %d bytes, jhead %d", + wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead); ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); ubifs_assert(!(wbuf->avail & 7)); ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); @@ -390,7 +392,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) * @offs: logical eraseblock offset to seek to * @dtype: data type * - * This function targets the write buffer to logical eraseblock @lnum:@offs. + * This function targets the write-buffer to logical eraseblock @lnum:@offs. * The write-buffer is synchronized if it is not empty. Returns zero in case of * success and a negative error code in case of failure. */ @@ -399,7 +401,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, { const struct ubifs_info *c = wbuf->c; - dbg_io("LEB %d:%d", lnum, offs); + dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead); ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); ubifs_assert(offs >= 0 && offs <= c->leb_size); ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); @@ -506,9 +508,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) struct ubifs_info *c = wbuf->c; int err, written, n, aligned_len = ALIGN(len, 8), offs; - dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, - dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, - wbuf->offs + wbuf->used); + dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len, + dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead, + wbuf->lnum, wbuf->offs + wbuf->used); ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); @@ -533,8 +535,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) memcpy(wbuf->buf + wbuf->used, buf, len); if (aligned_len == wbuf->avail) { - dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, - wbuf->offs); + dbg_io("flush jhead %d wbuf to LEB %d:%d", + wbuf->jhead, wbuf->lnum, wbuf->offs); err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, c->min_io_size, wbuf->dtype); @@ -562,7 +564,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) * minimal I/O unit. We have to fill and flush write-buffer and switch * to the next min. I/O unit. */ - dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); + dbg_io("flush jhead %d wbuf to LEB %d:%d", + wbuf->jhead, wbuf->lnum, wbuf->offs); memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, c->min_io_size, wbuf->dtype); @@ -695,7 +698,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int err, rlen, overlap; struct ubifs_ch *ch = buf; - dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs, + dbg_ntype(type), len, wbuf->jhead); ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); @@ -819,13 +823,12 @@ out: * @c: UBIFS file-system description object * @wbuf: write-buffer to initialize * - * This function initializes write buffer. Returns zero in case of success + * This function initializes write-buffer. Returns zero in case of success * %-ENOMEM in case of failure. */ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) { size_t size; - ktime_t hardlimit; wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); if (!wbuf->buf) @@ -851,22 +854,16 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); wbuf->timer.function = wbuf_timer_callback_nolock; - /* - * Make write-buffer soft limit to be 20% of the hard limit. The - * write-buffer timer is allowed to expire any time between the soft - * and hard limits. - */ - hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0); - wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10; - wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta); - hrtimer_set_expires_range_ns(&wbuf->timer, wbuf->softlimit, - wbuf->delta); + wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0); + wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT; + wbuf->delta *= 1000000000ULL; + ubifs_assert(wbuf->delta <= ULONG_MAX); return 0; } /** * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. - * @wbuf: the write-buffer whereto add + * @wbuf: the write-buffer where to add * @inum: the inode number * * This function adds an inode number to the inode array of the write-buffer. diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 6db7a6be6c97..8aacd64957a2 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -25,7 +25,6 @@ /* This file implements EXT2-compatible extended attribute ioctl() calls */ #include <linux/compat.h> -#include <linux/smp_lock.h> #include <linux/mount.h> #include "ubifs.h" diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 805605250f12..e5f6cf8a1155 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -53,6 +53,25 @@ static int is_empty(void *buf, int len) } /** + * first_non_ff - find offset of the first non-0xff byte. + * @buf: buffer to search in + * @len: length of buffer + * + * This function returns offset of the first non-0xff byte in @buf or %-1 if + * the buffer contains only 0xff bytes. + */ +static int first_non_ff(void *buf, int len) +{ + uint8_t *p = buf; + int i; + + for (i = 0; i < len; i++) + if (*p++ != 0xff) + return i; + return -1; +} + +/** * get_master_node - get the last valid master node allowing for corruption. * @c: UBIFS file-system description object * @lnum: LEB number @@ -357,11 +376,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) empty_offs = ALIGN(offs + 1, c->min_io_size); check_len = c->leb_size - empty_offs; p = buf + empty_offs - offs; - - for (; check_len > 0; check_len--) - if (*p++ != 0xff) - return 0; - return 1; + return is_empty(p, check_len); } /** @@ -543,8 +558,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. - * - * This function returns %0 on success and a negative error code on failure. + * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is + * found, and a negative error code in case of failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int grouped) @@ -643,7 +658,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, goto corrupted; default: dbg_err("unknown"); - goto corrupted; + err = -EINVAL; + goto error; } } @@ -652,8 +668,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, clean_buf(c, &buf, lnum, &offs, &len); need_clean = 1; } else { - ubifs_err("corrupt empty space at LEB %d:%d", - lnum, offs); + int corruption = first_non_ff(buf, len); + + ubifs_err("corrupt empty space LEB %d:%d, corruption " + "starts at %d", lnum, offs, corruption); + /* Make sure we dump interesting non-0xFF data */ + offs = corruption; + buf += corruption; goto corrupted; } } @@ -813,7 +834,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, static int recover_head(const struct ubifs_info *c, int lnum, int offs, void *sbuf) { - int len, err, need_clean = 0; + int len, err; if (c->min_io_size > 1) len = c->min_io_size; @@ -827,19 +848,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, /* Read at the head location and check it is empty flash */ err = ubi_read(c->ubi, lnum, sbuf, offs, len); - if (err) - need_clean = 1; - else { - uint8_t *p = sbuf; - - while (len--) - if (*p++ != 0xff) { - need_clean = 1; - break; - } - } - - if (need_clean) { + if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 11cc80125a49..2970500f32df 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -837,9 +837,10 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) dbg_mnt("replay log LEB %d:%d", lnum, offs); sleb = ubifs_scan(c, lnum, offs, sbuf); - if (IS_ERR(sleb)) { - if (c->need_recovery) - sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); + if (IS_ERR(sleb) ) { + if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) + return PTR_ERR(sleb); + sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); if (IS_ERR(sleb)) return PTR_ERR(sleb); } @@ -957,7 +958,7 @@ out: return err; out_dump: - ubifs_err("log error detected while replying the log at LEB %d:%d", + ubifs_err("log error detected while replaying the log at LEB %d:%d", lnum, offs + snod->offs); dbg_dump_node(c, snod->node); ubifs_scan_destroy(sleb); diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 0ed82479b44b..892ebfee4fe5 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -238,12 +238,12 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, { int len; - ubifs_err("corrupted data at LEB %d:%d", lnum, offs); + ubifs_err("corruption at LEB %d:%d", lnum, offs); if (dbg_failure_mode) return; len = c->leb_size - offs; - if (len > 4096) - len = 4096; + if (len > 8192) + len = 8192; dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); } @@ -256,7 +256,9 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, * @sbuf: scan buffer (must be c->leb_size) * * This function scans LEB number @lnum and returns complete information about - * its contents. Returns an error code in case of failure. + * its contents. Returns the scaned information in case of success and, + * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case + * of failure. */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, int offs, void *sbuf) @@ -279,7 +281,6 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, cond_resched(); ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); - if (ret > 0) { /* Padding bytes or a valid padding node */ offs += ret; @@ -304,7 +305,8 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, goto corrupted; default: dbg_err("unknown"); - goto corrupted; + err = -EINVAL; + goto error; } err = ubifs_add_snod(c, sleb, buf, offs); @@ -317,8 +319,10 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, len -= node_len; } - if (offs % c->min_io_size) - goto corrupted; + if (offs % c->min_io_size) { + ubifs_err("empty space starts at non-aligned offset %d", offs); + goto corrupted;; + } ubifs_end_scan(c, sleb, lnum, offs); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 79fad43f3c57..26d2e0d80465 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -797,7 +797,7 @@ static int alloc_wbufs(struct ubifs_info *c) * does not need to be synchronized by timer. */ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; - c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0); + c->jheads[GCHD].wbuf.no_timer = 1; return 0; } @@ -986,7 +986,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, switch (token) { /* * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. - * We accepte them in order to be backware-compatible. But this + * We accept them in order to be backward-compatible. But this * should be removed at some point. */ case Opt_fast_unmount: @@ -1287,6 +1287,9 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_journal; + /* Calculate 'min_idx_lebs' after journal replay */ + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); if (err) goto out_orphans; @@ -1754,10 +1757,8 @@ static void ubifs_put_super(struct super_block *sb) /* Synchronize write-buffers */ if (c->jheads) - for (i = 0; i < c->jhead_cnt; i++) { + for (i = 0; i < c->jhead_cnt; i++) ubifs_wbuf_sync(&c->jheads[i].wbuf); - hrtimer_cancel(&c->jheads[i].wbuf.timer); - } /* * On fatal errors c->ro_media is set to 1, in which case we do @@ -1975,7 +1976,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) err = bdi_init(&c->bdi); if (err) goto out_close; - err = bdi_register(&c->bdi, NULL, "ubifs"); + err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d", + c->vi.ubi_num, c->vi.vol_id); if (err) goto out_bdi; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 1bf01d820066..a29349094422 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -95,8 +95,9 @@ */ #define BGT_NAME_PATTERN "ubifs_bgt%d_%d" -/* Default write-buffer synchronization timeout in seconds */ -#define DEFAULT_WBUF_TIMEOUT_SECS 5 +/* Write-buffer synchronization timeout interval in seconds */ +#define WBUF_TIMEOUT_SOFTLIMIT 3 +#define WBUF_TIMEOUT_HARDLIMIT 5 /* Maximum possible inode number (only 32-bit inodes are supported now) */ #define MAX_INUM 0xFFFFFFFF @@ -654,7 +655,8 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit * and @softlimit + @delta) * @timer: write-buffer timer - * @need_sync: it is set if its timer expired and needs sync + * @no_timer: non-zero if this write-buffer does not have a timer + * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing * @next_ino: points to the next position of the following inode number * @inodes: stores the inode numbers of the nodes which are in wbuf * @@ -683,7 +685,8 @@ struct ubifs_wbuf { ktime_t softlimit; unsigned long long delta; struct hrtimer timer; - int need_sync; + unsigned int no_timer:1; + unsigned int need_sync:1; int next_ino; ino_t *inodes; }; diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 1cd3b55ee3d2..2d3f90afe5f1 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) printk(KERN_ERR "XFS: possible memory allocation " "deadlock in %s (mode:0x%x)\n", __func__, lflags); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } @@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) printk(KERN_ERR "XFS: possible memory allocation " "deadlock in %s (mode:0x%x)\n", __func__, lflags); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 1418b916fc27..0c93c7ef3d18 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -412,7 +412,7 @@ _xfs_buf_lookup_pages( XFS_STATS_INC(xb_page_retries); xfsbufd_wakeup(0, gfp_mask); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f4e255441574..0542fd507649 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -41,7 +41,6 @@ #include "xfs_ioctl.h" #include <linux/dcache.h> -#include <linux/smp_lock.h> static struct vm_operations_struct xfs_file_vm_ops; |