/*
* fs/logfs/super.c
*
* As should be obvious for Linux kernel code, license is GPLv2
*
* Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
*
* Generally contains mount/umount code and also serves as a dump area for
* any functions that don't fit elsewhere and neither justify a file of their
* own.
*/
#include "logfs.h"
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/mtd/mtd.h>
#include <linux/statfs.h>
#include <linux/buffer_head.h>
static DEFINE_MUTEX(emergency_mutex);
static struct page *emergency_page;
struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
{
filler_t *filler = (filler_t *)mapping->a_ops->readpage;
struct page *page;
int err;
page = read_cache_page(mapping, index, filler, NULL);
if (page)
return page;
/* No more pages available, switch to emergency page */
printk(KERN_INFO"Logfs: Using emergency page\n");
mutex_lock(&emergency_mutex);
err = filler(NULL, emergency_page);
if (err) {
mutex_unlock(&emergency_mutex);
printk(KERN_EMERG"Logfs: Error reading emergency page\n");
return ERR_PTR(err);
}
return emergency_page;
}
void emergency_read_end(struct page *page)
{
if (page == emergency_page)
mutex_unlock(&emergency_mutex);
else
page_cache_release(page);
}
static void dump_segfile(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
struct logfs_segment_entry se;
u32 segno;
for (segno = 0; segno < super->s_no_segs; segno++) {
logfs_get_segment_entry(sb, segno, &se);
printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
be32_to_cpu(se.valid));
if (++segno < super->s_no_segs) {
logfs_get_segment_entry(sb, segno, &se);
printk(" %6x %8x", be32_to_cpu(se.ec_level),
be32_to_cpu(se.valid));
}
if (++segno < super->s_no_segs) {
logfs_get_segment_entry(sb, segno, &se);
printk(" %6x %8x", be32_to_cpu(se.ec_level),
be32_to_cpu(se.valid));
}
if (++segno < super->s_no_segs) {
logfs_get_segment_entry(sb, segno, &se);
printk(" %6x %8x", be32_to_cpu(se.ec_level),
be32_to_cpu(se.valid));
}
printk("\n");
}
}
/*
* logfs_crash_dump - dump debug information to device
*
* The LogFS superblock only occupies part of a segment. This function will
* write as much debug information as it can gather into the spare space.
*/
void logfs_crash_dump(struct super_block *sb)
{
dump_segfile(sb);
}
/*
* TODO: move to lib/string.c
*/
/**
* memchr_inv - Find a character in an area of memory.
* @s: The memory area
* @c: The byte to search for
* @n: The size of the area.
*
* returns the address of the first character other than @c, or %NULL
* if the whole buffer contains just @c.
*/
void *memchr_inv(const void *s, int c, size_t n)
{
const unsigned char *p = s;
while (n-- != 0)
if ((unsigned char)c != *p++)
return (void *)(p - 1);
return NULL;
}
/*
* FIXME: There should be a reserve for root, similar to ext2.
*/
int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
{
struct super_block *sb = dentry->d_sb;
struct logfs_super *super = logfs_super(sb);
stats->f_type = LOGFS_MAGIC_U32;
stats->f_bsize = sb->s_blocksize;
stats->f_blocks = super->s_size >> LOGFS_BLOCK_BITS >> 3;
stats->f_bfree = super->s_free_bytes >> sb->s_blocksize_bits;
stats->f_bavail = super->s_free_bytes >> sb->s_blocksize_bits;
stats->f_files = 0;
stats->f_ffree = 0;
stats->f_namelen = LOGFS_MAX_NAMELEN;
return 0;
}
static int logfs_sb_set(struct super_block *sb, void *_super)
{
struct logfs_super *super = _super;
sb->s_fs_info = super;
sb->s_mtd = super->s_mtd;
sb->s_bdev = super->s_bdev;
if (sb->s_bdev)
sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
if (sb->s_mtd)
sb->s_bdi = sb->s_mtd->backing_dev_info;
return 0;
}
static int logfs_sb_test(struct super_block *sb, void *_super)
{
struct logfs_super *super = _super;
struct mtd_info *mtd = super->s_mtd;
if (mtd && sb->s_mtd == mtd)
return 1;
if (super->s_bdev && sb->s_bdev == super->s_bdev)
return 1;
return 0;
}
static void set_segment_header(struct logfs_segment_header *sh, u8 type,
u8 level, u32 segno, u32 ec)
{
sh->pad = 0;
sh->type = type;
sh->level = level;
sh->segno = cpu_to_be32(segno);
sh->ec = cpu_to_be32(ec);
sh->gec = cpu_to_be64(segno);
sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
}
static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
u32 segno, u32 ec)
{
struct logfs_super *super = logfs_super(sb);
struct logfs_segment_header *sh = &ds->ds_sh;
int i;
memset(ds, 0, sizeof(*ds));
set_segment_header(sh, SEG_SUPER, 0, segno, ec);
ds->ds_ifile_levels = super->s_ifile_levels;
ds->ds_iblock_levels = super->s_iblock_levels;
ds->ds_data_levels = super->s_data_levels; /* XXX: Remove */
ds->ds_segment_shift = super->s_segshift;
ds->ds_block_shift = sb->s_blocksize_bits;
ds->ds_write_shift = super->s_writeshift;
ds->ds_filesystem_size = cpu_to_be64(super->s_size);
ds->ds_segment_size = cpu_to_be32(super->s_segsize);
ds->ds_bad_seg_reserve = cpu_to_be32(super->s_bad_seg_reserve);
ds->ds_feature_incompat = cpu_to_be64(super->s_feature_incompat);
ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
ds->ds_feature_compat = cpu_to_be64(super->s_feature_compat);
ds->ds_feature_flags = cpu_to_be64(super->s_feature_flags);
ds->ds_root_reserve = cpu_to_be64(super->s_root_reserve);
ds->ds_speed_reserve = cpu_to_be64(super->s_speed_reserve);
journal_for_each(i)
ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
ds->ds_magic = cpu_to_be64(LOGFS_MAGIC);
ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
LOGFS_SEGMENT_HEADERSIZE + 12);
}
static int write_one_sb(struct super_block *sb,
struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
{
struct logfs_super *super = logfs_super(sb);
struct logfs_disk_super *ds;
struct logfs_segment_entry se;
struct page *page;
u64 ofs;
u32 ec, segno;
int err;
page = find_sb(sb, &ofs);
if (!page)
return -EIO;
ds = page_address(page);
segno = seg_no(sb, ofs);
logfs_get_segment_entry(sb, segno, &se);
ec = be32_to_cpu(se.ec_level) >> 4;
ec++;
logfs_set_segment_erased(sb, segno, ec, 0);
logfs_write_ds(sb, ds, segno, ec);
err = super->s_devops->write_sb(sb, page);
page_cache_release(page);
return err;
}
int logfs_write_sb(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
int err;
/* First superblock */
err = write_one_sb(sb, super->s_devops->find_first_sb);
if (err)
return err;
/* Last superblock */
err = write_one_sb(sb, super->s_devops->find_last_sb);
if (err)
return err;
return 0;
}
static int ds_cmp(const void *ds0, const void *ds1)
{
size_t len = sizeof(struct logfs_disk_super);
/* We know the segment headers differ, so ignore them */
len -= LOGFS_SEGMENT_HEADERSIZE;
ds0 += LOGFS_SEGMENT_HEADERSIZE;
ds1 += LOGFS_SEGMENT_HEADERSIZE;
return memcmp(ds0, ds1, len);
}
static int logfs_recover_sb(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
struct logfs_disk_super _ds0, *ds0 = &_ds0;
struct logfs_disk_super _ds1, *ds1 = &_ds1;
int err, valid0, valid1;
/* read first superblock */
err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
if (err)
return err;
/* read last superblock */
err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
if (err)
return err;
valid0 = logfs_check_ds(ds0) == 0;
valid1 = logfs_check_ds(ds1) == 0;
if (!valid0 && valid1) {
printk(KERN_INFO"First superblock is invalid - fixing.\n");
return write_one_sb(sb, super->s_devops->find_first_sb);
}
if (valid0 && !valid1) {
printk(KERN_INFO"Last superblock is invalid - fixing.\n");
return write_one_sb(sb, super->s_devops->find_last_sb);
}
if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
printk(KERN_INFO"Superblocks don't match - fixing.\n");
return logfs_write_sb(sb);
}
/* If neither is valid now, something's wrong. Didn't we properly
* check them before?!? */
BUG_ON(!valid0 && !valid1);
return 0;
}
static int logfs_make_writeable(struct super_block *sb)
{
int err;
err = logfs_open_segfile(sb);
if (err)
return err;
/* Repair any broken superblock copies */
err = logfs_recover_sb(sb);
if (err)
return err;
/* Check areas for trailing unaccounted data */
err = logfs_check_areas(sb);
if (err)
return err;
/* Do one GC pass before any data gets dirtied */
logfs_gc_pass(sb);
/* after all initializations are done, replay the journal
* for rw-mounts, if necessary */
err = logfs_replay_journal(sb);
if (err)
return err;
return 0;
}
static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
{
struct logfs_super *super = logfs_super(sb);
struct inode *rootdir;
int err;
/* root dir */
rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
if (IS_ERR(rootdir))
goto fail;
sb->s_root = d_alloc_root(rootdir);
if (!sb->s_root)
goto fail2;
super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
if (!super->s_erase_page)
goto fail2;
memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
/* FIXME: check for read-only mounts */
err = logfs_make_writeable(sb);
if (err)
goto fail3;
log_super("LogFS: Finished mounting\n");
simple_set_mnt(mnt, sb);
return 0;
fail3:
__free_page(super->s_erase_page);
fail2:
iput(rootdir);
fail:
iput(logfs_super(sb)->s_master_inode);
return -EIO;
}
int logfs_check_ds(struct logfs_disk_super *ds)
{
struct logfs_segment_header *sh = &ds->ds_sh;
if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
return -EINVAL;
if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
return -EINVAL;
if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
LOGFS_SEGMENT_HEADERSIZE + 12))
return -EINVAL;
return 0;
}
static struct page *find_super_block(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
struct page *first, *last;
first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
if (!first || IS_ERR(first))
return NULL;
last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
if (!last || IS_ERR(last)) {
page_cache_release(first);
return NULL;
}
if (!logfs_check_ds(page_address(first))) {
page_cache_release(last);
return first;
}
/* First one didn't work, try the second superblock */
if (!logfs_check_ds(page_address(last))) {
page_cache_release(first);
return last;
}
/* Neither worked, sorry folks */
page_cache_release(first);
page_cache_release(last);
return NULL;
}
static int __logfs_read_sb(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
struct page *page;
struct logfs_disk_super *ds;
int i;
page = find_super_block(sb);
if (!page)
return -EINVAL;
ds = page_address(page);
super->s_size = be64_to_cpu(ds->ds_filesystem_size);
super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
super->s_segsize = 1 << ds->ds_segment_shift;
super->s_segmask = (1 << ds->ds_segment_shift) - 1;
super->s_segshift = ds->ds_segment_shift;
sb->s_blocksize = 1 << ds->ds_block_shift;
sb->s_blocksize_bits = ds->ds_block_shift;
super->s_writesize = 1 << ds->ds_write_shift;
super->s_writeshift = ds->ds_write_shift;
super->s_no_segs = super->s_size >> super->s_segshift;
super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
journal_for_each(i)
super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
super->s_ifile_levels = ds->ds_ifile_levels;
super->s_iblock_levels = ds->ds_iblock_levels;
super->s_data_levels = ds->ds_data_levels;
super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
+ super->s_data_levels;
page_cache_release(page);
return 0;
}
static int logfs_read_sb(struct super_block *sb, int read_only)
{
struct logfs_super *super = logfs_super(sb);
int ret;
super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
if (!super->s_btree_pool)
return -ENOMEM;
btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
btree_init_mempool32(&super->s_shadow_tree.segment_map,
super->s_btree_pool);
ret = logfs_init_mapping(sb);
if (ret)
return ret;
ret = __logfs_read_sb(sb);
if (ret)
return ret;
if (super->s_feature_incompat & ~LOGFS_FEATURES_INCOMPAT)
return -EIO;
if ((super->s_feature_ro_compat & ~LOGFS_FEATURES_RO_COMPAT) &&
!read_only)
return -EIO;
mutex_init(&super->s_dirop_mutex);
mutex_init(&super->s_object_alias_mutex);
INIT_LIST_HEAD(&super->s_freeing_list);
ret = logfs_init_rw(sb);
if (ret)
return ret;
ret = logfs_init_areas(sb);
if (ret)
return ret;
ret = logfs_init_gc(sb);
if (ret)
return ret;
ret = logfs_init_journal(sb);
if (ret)
return ret;
return 0;
}
static void logfs_kill_sb(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
log_super("LogFS: Start unmounting\n");
/* Alias entries slow down mount, so evict as many as possible */
sync_filesystem(sb);
logfs_write_anchor(sb);
/*
* From this point on alias entries are simply dropped - and any
* writes to the object store are considered bugs.
*/
super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
log_super("LogFS: Now in shutdown\n");
generic_shutdown_super(sb);
BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
logfs_cleanup_gc(sb);
logfs_cleanup_journal(sb);
logfs_cleanup_areas(sb);
logfs_cleanup_rw(sb);
if (super->s_erase_page)
__free_page(super->s_erase_page);
super->s_devops->put_device(sb);
logfs_mempool_destroy(super->s_btree_pool);
logfs_mempool_destroy(super->s_alias_pool);
kfree(super);
log_super("LogFS: Finished unmounting\n");
}
int logfs_get_sb_device(struct file_system_type *type, int flags,
struct mtd_info *mtd, struct block_device *bdev,
const struct logfs_device_ops *devops, struct vfsmount *mnt)
{
struct logfs_super *super;
struct super_block *sb;
int err = -ENOMEM;
static int mount_count;
log_super("LogFS: Start mount %x\n", mount_count++);
super = kzalloc(sizeof(*super), GFP_KERNEL);
if (!super)
goto err0;
super->s_mtd = mtd;
super->s_bdev = bdev;
err = -EINVAL;
sb = sget(type, logfs_sb_test, logfs_sb_set, super);
if (IS_ERR(sb))
goto err0;
if (sb->s_root) {
/* Device is already in use */
err = 0;
simple_set_mnt(mnt, sb);
goto err0;
}
super->s_devops = devops;
/*
* sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache
* only covers 16TB and the upper 8TB are used for indirect blocks.
* On 64bit system we could bump up the limit, but that would make
* the filesystem incompatible with 32bit systems.
*/
sb->s_maxbytes = (1ull << 43) - 1;
sb->s_op = &logfs_super_operations;
sb->s_flags = flags | MS_NOATIME;
err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY);
if (err)
goto err1;
sb->s_flags |= MS_ACTIVE;
err = logfs_get_sb_final(sb, mnt);
if (err)
goto err1;
return 0;
err1:
deactivate_locked_super(sb);
return err;
err0:
kfree(super);
//devops->put_device(sb);
return err;
}
static int logfs_get_sb(struct file_system_type *type, int flags,
const char *devname, void *data, struct vfsmount *mnt)
{
ulong mtdnr;
if (!devname)
return logfs_get_sb_bdev(type, flags, devname, mnt);
if (strncmp(devname, "mtd", 3))
return logfs_get_sb_bdev(type, flags, devname, mnt);
{
char *garbage;
mtdnr = simple_strtoul(devname+3, &garbage, 0);
if (*garbage)
return -EINVAL;
}
return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
}
static struct file_system_type logfs_fs_type = {
.owner = THIS_MODULE,
.name = "logfs",
.get_sb = logfs_get_sb,
.kill_sb = logfs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
static int __init logfs_init(void)
{
int ret;
emergency_page = alloc_pages(GFP_KERNEL, 0);
if (!emergency_page)
return -ENOMEM;
ret = logfs_compr_init();
if (ret)
goto out1;
ret = logfs_init_inode_cache();
if (ret)
goto out2;
return register_filesystem(&logfs_fs_type);
out2:
logfs_compr_exit();
out1:
__free_pages(emergency_page, 0);
return ret;
}
static void __exit logfs_exit(void)
{
unregister_filesystem(&logfs_fs_type);
logfs_destroy_inode_cache();
logfs_compr_exit();
__free_pages(emergency_page, 0);
}
module_init(logfs_init);
module_exit(logfs_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
MODULE_DESCRIPTION("scalable flash filesystem");