summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDeepak Saxena <dsaxena@laptop.org>2008-08-10 14:06:08 -0700
committerDeepak Saxena <dsaxena@laptop.org>2008-08-10 14:06:08 -0700
commit0bc186239fb1afa2dbc83f02f745e6290d664d70 (patch)
tree0b0e40752e0597d9aa0d3b63ae1497b405d1977d /fs
parentf5931acac40cff4ec59a12db54570d0c2cb3cd91 (diff)
parent3563ed56ad1ddc4691d082cb6af5ab7d2618ffa0 (diff)
downloadlwn-0bc186239fb1afa2dbc83f02f745e6290d664d70.tar.gz
lwn-0bc186239fb1afa2dbc83f02f745e6290d664d70.zip
Merge commit 'v2.6.25.15' into olpc-testing
Diffstat (limited to 'fs')
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/jbd/commit.c19
-rw-r--r--fs/jbd/transaction.c91
-rw-r--r--fs/namei.c31
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs3acl.c9
-rw-r--r--fs/nfs/nfs4proc.c6
-rw-r--r--fs/partitions/msdos.c20
-rw-r--r--fs/romfs/inode.c37
10 files changed, 168 insertions, 52 deletions
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 53f3cf62b7c1..98f0e874c5e1 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1208,7 +1208,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
*/
media = b->media;
- if (!FAT_VALID_MEDIA(media)) {
+ if (!fat_valid_media(media)) {
if (!silent)
printk(KERN_ERR "FAT: invalid media value (0x%02x)\n",
media);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a38c7186c570..cd931ef1f000 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -407,22 +407,6 @@ void journal_commit_transaction(journal_t *journal)
jbd_debug (3, "JBD: commit phase 2\n");
/*
- * First, drop modified flag: all accesses to the buffers
- * will be tracked for a new trasaction only -bzzz
- */
- spin_lock(&journal->j_list_lock);
- if (commit_transaction->t_buffers) {
- new_jh = jh = commit_transaction->t_buffers->b_tnext;
- do {
- J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
- new_jh->b_modified == 0);
- new_jh->b_modified = 0;
- new_jh = new_jh->b_tnext;
- } while (new_jh != jh);
- }
- spin_unlock(&journal->j_list_lock);
-
- /*
* Now start flushing things to disk, in the order they appear
* on the transaction lists. Data blocks go first.
*/
@@ -488,6 +472,9 @@ void journal_commit_transaction(journal_t *journal)
*/
commit_transaction->t_state = T_COMMIT;
+ J_ASSERT(commit_transaction->t_nr_buffers <=
+ commit_transaction->t_outstanding_credits);
+
descriptor = NULL;
bufs = 0;
while (commit_transaction->t_buffers) {
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 2c9e8f5d13aa..7665e01c2dcf 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -609,6 +609,12 @@ repeat:
goto done;
/*
+ * this is the first time this transaction is touching this buffer,
+ * reset the modified flag
+ */
+ jh->b_modified = 0;
+
+ /*
* If there is already a copy-out version of this buffer, then we don't
* need to make another one
*/
@@ -820,9 +826,16 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
if (jh->b_transaction == NULL) {
jh->b_transaction = transaction;
+
+ /* first access by this transaction */
+ jh->b_modified = 0;
+
JBUFFER_TRACE(jh, "file as BJ_Reserved");
__journal_file_buffer(jh, transaction, BJ_Reserved);
} else if (jh->b_transaction == journal->j_committing_transaction) {
+ /* first access by this transaction */
+ jh->b_modified = 0;
+
JBUFFER_TRACE(jh, "set next transaction");
jh->b_next_transaction = transaction;
}
@@ -1222,6 +1235,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
int drop_reserve = 0;
int err = 0;
+ int was_modified = 0;
BUFFER_TRACE(bh, "entry");
@@ -1240,6 +1254,9 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
goto not_jbd;
}
+ /* keep track of wether or not this transaction modified us */
+ was_modified = jh->b_modified;
+
/*
* The buffer's going from the transaction, we must drop
* all references -bzzz
@@ -1257,7 +1274,12 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
- drop_reserve = 1;
+ /*
+ * we only want to drop a reference if this transaction
+ * modified the buffer
+ */
+ if (was_modified)
+ drop_reserve = 1;
/*
* We are no longer going to journal this buffer.
@@ -1297,7 +1319,13 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
if (jh->b_next_transaction) {
J_ASSERT(jh->b_next_transaction == transaction);
jh->b_next_transaction = NULL;
- drop_reserve = 1;
+
+ /*
+ * only drop a reference if this transaction modified
+ * the buffer
+ */
+ if (was_modified)
+ drop_reserve = 1;
}
}
@@ -1620,12 +1648,42 @@ out:
return;
}
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+ transaction_t *transaction = NULL;
+ tid_t tid;
+
+ spin_lock(&journal->j_state_lock);
+ transaction = journal->j_committing_transaction;
+
+ if (!transaction) {
+ spin_unlock(&journal->j_state_lock);
+ return;
+ }
+
+ tid = transaction->t_tid;
+ spin_unlock(&journal->j_state_lock);
+ log_wait_commit(journal, tid);
+}
/**
* int journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
*
*
* For all the buffers on this page,
@@ -1654,9 +1712,11 @@ out:
* journal_try_to_free_buffer() is changing its state. But that
* cannot happen because we never reallocate freed data as metadata
* while the data is part of a transaction. Yes?
+ *
+ * Return 0 on failure, 1 on success
*/
int journal_try_to_free_buffers(journal_t *journal,
- struct page *page, gfp_t unused_gfp_mask)
+ struct page *page, gfp_t gfp_mask)
{
struct buffer_head *head;
struct buffer_head *bh;
@@ -1685,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
if (buffer_jbd(bh))
goto busy;
} while ((bh = bh->b_this_page) != head);
+
ret = try_to_free_buffers(page);
+
+ /*
+ * There are a number of places where journal_try_to_free_buffers()
+ * could race with journal_commit_transaction(), the later still
+ * holds the reference to the buffers to free while processing them.
+ * try_to_free_buffers() failed to free those buffers. Some of the
+ * caller of releasepage() request page buffers to be dropped, otherwise
+ * treat the fail-to-free as errors (such as generic_file_direct_IO())
+ *
+ * So, if the caller of try_to_release_page() wants the synchronous
+ * behaviour(i.e make sure buffers are dropped upon return),
+ * let's wait for the current transaction to finish flush of
+ * dirty data buffers, then try to free those buffers again,
+ * with the journal locked.
+ */
+ if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+ journal_wait_for_transaction_sync_data(journal);
+ ret = try_to_free_buffers(page);
+ }
+
busy:
return ret;
}
@@ -2069,7 +2150,7 @@ void __journal_refile_buffer(struct journal_head *jh)
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
__journal_file_buffer(jh, jh->b_transaction,
- was_dirty ? BJ_Metadata : BJ_Reserved);
+ jh->b_modified ? BJ_Metadata : BJ_Reserved);
J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
if (was_dirty)
diff --git a/fs/namei.c b/fs/namei.c
index e421fb9d25d1..149b167f7831 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -515,7 +515,14 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
*/
result = d_lookup(parent, name);
if (!result) {
- struct dentry * dentry = d_alloc(parent, name);
+ struct dentry *dentry;
+
+ /* Don't create child dentry for a dead directory. */
+ result = ERR_PTR(-ENOENT);
+ if (IS_DEADDIR(dir))
+ goto out_unlock;
+
+ dentry = d_alloc(parent, name);
result = ERR_PTR(-ENOMEM);
if (dentry) {
result = dir->i_op->lookup(dir, dentry, nd);
@@ -524,6 +531,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
else
result = dentry;
}
+out_unlock:
mutex_unlock(&dir->i_mutex);
return result;
}
@@ -1314,7 +1322,14 @@ static struct dentry *__lookup_hash(struct qstr *name,
dentry = cached_lookup(base, name, nd);
if (!dentry) {
- struct dentry *new = d_alloc(base, name);
+ struct dentry *new;
+
+ /* Don't create child dentry for a dead directory. */
+ dentry = ERR_PTR(-ENOENT);
+ if (IS_DEADDIR(inode))
+ goto out;
+
+ new = d_alloc(base, name);
dentry = ERR_PTR(-ENOMEM);
if (!new)
goto out;
@@ -1905,18 +1920,22 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
if (IS_ERR(dentry))
goto fail;
+ if (dentry->d_inode)
+ goto eexist;
/*
* Special case - lookup gave negative, but... we had foo/bar/
* From the vfs_mknod() POV we just have a negative dentry -
* all is fine. Let's be bastards - you had / on the end, you've
* been asking for (non-existent) directory. -ENOENT for you.
*/
- if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
- goto enoent;
+ if (unlikely(!is_dir && nd->last.name[nd->last.len])) {
+ dput(dentry);
+ dentry = ERR_PTR(-ENOENT);
+ }
return dentry;
-enoent:
+eexist:
dput(dentry);
- dentry = ERR_PTR(-ENOENT);
+ dentry = ERR_PTR(-EEXIST);
fail:
return dentry;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6f88d7c77ac9..f83fb735916c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -57,8 +57,6 @@ static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
static void nfs_invalidate_inode(struct inode *);
static int nfs_update_inode(struct inode *, struct nfs_fattr *);
-static void nfs_zap_acl_cache(struct inode *);
-
static struct kmem_cache * nfs_inode_cachep;
static inline unsigned long
@@ -167,7 +165,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
}
}
-static void nfs_zap_acl_cache(struct inode *inode)
+void nfs_zap_acl_cache(struct inode *inode)
{
void (*clear_acl_cache)(struct inode *);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 931992763e68..4c62be1db24f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -153,6 +153,7 @@ extern void nfs_clear_inode(struct inode *);
#ifdef CONFIG_NFS_V4
extern void nfs4_clear_inode(struct inode *);
#endif
+void nfs_zap_acl_cache(struct inode *inode);
/* super.c */
extern struct file_system_type nfs_xdev_fs_type;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9b7362565c0c..423842f51ac9 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -5,6 +5,8 @@
#include <linux/posix_acl_xattr.h>
#include <linux/nfsacl.h>
+#include "internal.h"
+
#define NFSDBG_FACILITY NFSDBG_PROC
ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -205,6 +207,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
status = nfs_revalidate_inode(server, inode);
if (status < 0)
return ERR_PTR(status);
+ if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
+ nfs_zap_acl_cache(inode);
acl = nfs3_get_cached_acl(inode, type);
if (acl != ERR_PTR(-EAGAIN))
return acl;
@@ -319,9 +323,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
dprintk("NFS call setacl\n");
msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
status = rpc_call_sync(server->client_acl, &msg, 0);
- spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
- spin_unlock(&inode->i_lock);
+ nfs_access_zap_cache(inode);
+ nfs_zap_acl_cache(inode);
dprintk("NFS reply setacl: %d\n", status);
/* pages may have been allocated at the xdr layer. */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7ce07862c2fb..3a2ff77071e2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -52,6 +52,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
#include "iostat.h"
+#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -2707,6 +2708,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
ret = nfs_revalidate_inode(server, inode);
if (ret < 0)
return ret;
+ if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
+ nfs_zap_acl_cache(inode);
ret = nfs4_read_cached_acl(inode, buf, buflen);
if (ret != -ENOENT)
return ret;
@@ -2734,7 +2737,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
nfs_inode_return_delegation(inode);
buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
- nfs_zap_caches(inode);
+ nfs_access_zap_cache(inode);
+ nfs_zap_acl_cache(inode);
return ret;
}
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 5567ec0d03a3..796511886f28 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -18,7 +18,7 @@
*
* Re-organised Feb 1998 Russell King
*/
-
+#include <linux/msdos_fs.h>
#include "check.h"
#include "msdos.h"
@@ -419,6 +419,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
Sector sect;
unsigned char *data;
struct partition *p;
+ struct fat_boot_sector *fb;
int slot;
data = read_dev_sector(bdev, 0, &sect);
@@ -444,8 +445,21 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
p = (struct partition *) (data + 0x1be);
for (slot = 1; slot <= 4; slot++, p++) {
if (p->boot_ind != 0 && p->boot_ind != 0x80) {
- put_dev_sector(sect);
- return 0;
+ /*
+ * Even without a valid boot inidicator value
+ * its still possible this is valid FAT filesystem
+ * without a partition table.
+ */
+ fb = (struct fat_boot_sector *) data;
+ if (slot == 1 && fb->reserved && fb->fats
+ && fat_valid_media(fb->media)) {
+ printk("\n");
+ put_dev_sector(sect);
+ return 1;
+ } else {
+ put_dev_sector(sect);
+ return 0;
+ }
}
}
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 3f13d491c7c7..35e5c6e26bca 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,8 @@ static int
romfs_readpage(struct file *file, struct page * page)
{
struct inode *inode = page->mapping->host;
- loff_t offset, avail, readlen;
+ loff_t offset, size;
+ unsigned long filled;
void *buf;
int result = -EIO;
@@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page)
/* 32 bit warning -- but not for us :) */
offset = page_offset(page);
- if (offset < i_size_read(inode)) {
- avail = inode->i_size-offset;
- readlen = min_t(unsigned long, avail, PAGE_SIZE);
- if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
- if (readlen < PAGE_SIZE) {
- memset(buf + readlen,0,PAGE_SIZE-readlen);
- }
- SetPageUptodate(page);
- result = 0;
+ size = i_size_read(inode);
+ filled = 0;
+ result = 0;
+ if (offset < size) {
+ unsigned long readlen;
+
+ size -= offset;
+ readlen = size > PAGE_SIZE ? PAGE_SIZE : size;
+
+ filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen);
+
+ if (filled != readlen) {
+ SetPageError(page);
+ filled = 0;
+ result = -EIO;
}
}
- if (result) {
- memset(buf, 0, PAGE_SIZE);
- SetPageError(page);
- }
+
+ if (filled < PAGE_SIZE)
+ memset(buf + filled, 0, PAGE_SIZE-filled);
+
+ if (!result)
+ SetPageUptodate(page);
flush_dcache_page(page);
unlock_page(page);