/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This file implements UBIFS shrinker which evicts clean znodes from the TNC
* tree when Linux VM needs more RAM.
*
* We do not implement any LRU lists to find oldest znodes to free because it
* would add additional overhead to the file system fast paths. So the shrinker
* just walks the TNC tree when searching for znodes to free.
*
* If the root of a TNC sub-tree is clean and old enough, then the children are
* also clean and old enough. So the shrinker walks the TNC in level order and
* dumps entire sub-trees.
*
* The age of znodes is just the time-stamp when they were last looked at.
* The current shrinker first tries to evict old znodes, then young ones.
*
* Since the shrinker is global, it has to protect against races with FS
* un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
*/
#include "ubifs.h"
/* List of all UBIFS file-system instances */
LIST_HEAD(ubifs_infos);
/*
* We number each shrinker run and record the number on the ubifs_info structure
* so that we can easily work out which ubifs_info structures have already been
* done by the current run.
*/
static unsigned int shrinker_run_no;
/* Protects 'ubifs_infos' list */
DEFINE_SPINLOCK(ubifs_infos_lock);
/* Global clean znode counter (for all mounted UBIFS instances) */
atomic_long_t ubifs_clean_zn_cnt;
/**
* shrink_tnc - shrink TNC tree.
* @c: UBIFS file-system description object
* @nr: number of znodes to free
* @age: the age of znodes to free
* @contention: if any contention, this is set to %1
*
* This function traverses TNC tree and frees clean znodes. It does not free
* clean znodes which younger then @age. Returns number of freed znodes.
*/
static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
{
int total_freed = 0;
struct ubifs_znode *znode, *zprev;
int time = get_seconds();
ubifs_assert(mutex_is_locked(&c->umount_mutex));
ubifs_assert(mutex_is_locked(&c->tnc_mutex));
if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
return 0;
/*
* Traverse the TNC tree in levelorder manner, so that it is possible
* to destroy large sub-trees. Indeed, if a znode is old, then all its
* children are older or of the same age.
*
* Note, we are holding 'c->tnc_mutex', so we do not have to lock the
* 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
* changed only when the 'c->tnc_mutex' is held.
*/
zprev = NULL;
znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
while (znode && total_freed < nr &&
atomic_long_read(&c->clean_zn_cnt) > 0) {
int freed;
/*
* If the znode is clean, but it is in the 'c->cnext' list, this
* means that this znode has just been written to flash as a
* part of commit and was marked clean. They will be removed
* from the list at end commit. We cannot change the list,
* because it is not protected by any mutex (design decision to
* make commit really independent and parallel to main I/O). So
* we just skip these znodes.
*
* Note, the 'clean_zn_cnt' counters are not updated until
* after the commit, so the UBIFS shrinker does not report
* the znodes which are in the 'c->cnext' list as freeable.
*
* Also note, if the root of a sub-tree is not in 'c->cnext',
* then the whole sub-tree is not in 'c->cnext' as well, so it
* is safe to dump whole sub-tree.
*/
if (znode->cnext) {
/*
* Very soon these znodes will be removed from the list
* and become freeable.
*/
*contention = 1;
} else if (!ubifs_zn_dirty(znode) &&
abs(time - znode->time) >= age) {
if (znode->parent)
znode->parent->zbranch[znode->iip].znode = NULL;
else
c->zroot.znode = NULL;
freed = ubifs_destroy_tnc_subtree(znode);
atomic_long_sub(freed, &ubifs_clean_zn_cnt);
atomic_long_sub(freed, &c->clean_zn_cnt);
ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
total_freed += freed;
znode = zprev;
}
if (unlikely(!c->zroot.znode))
break;
zprev = znode;
znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
cond_resched();
}
return total_freed;
}
/**
* shrink_tnc_trees - shrink UBIFS TNC trees.
* @nr: number of znodes to free
* @age: the age of znodes to free
* @contention: if any contention, this is set to %1
*
* This function walks the list of mounted UBIFS file-systems and frees clean
* znodes which are older than @age, until at least @nr znodes are freed.
* Returns the number of freed znodes.
*/
static int shrink_tnc_trees(int nr, int age, int *contention)
{
struct ubifs_info *c;
struct list_head *p;
unsigned int run_no;
int freed = 0;
spin_lock(&ubifs_infos_lock);
do {
run_no = ++shrinker_run_no;
} while (run_no == 0);
/* Iterate over all mounted UBIFS file-systems and try to shrink them */
p = ubifs_infos.next;
while (p != &ubifs_infos) {
c = list_entry(p, struct ubifs_info, infos_list);
/*
* We move the ones we do to the end of the list, so we stop
* when we see one we have already done.
*/
if (c->shrinker_run_no == run_no)
break;
if (!mutex_trylock(&c->umount_mutex)) {
/* Some un-mount is in progress, try next FS */
*contention = 1;
p = p->next;
continue;
}
/*
* We're holding 'c->umount_mutex', so the file-system won't go
* away.
*/
if (!mutex_trylock(&c->tnc_mutex)) {
mutex_unlock(&c->umount_mutex);
*contention = 1;
p = p->next;
continue;
}
spin_unlock(&ubifs_infos_lock);
/*
* OK, now we have TNC locked, the file-system cannot go away -
* it is safe to reap the cache.
*/
c->shrinker_run_no = run_no;
freed += shrink_tnc(c, nr, age, contention);
mutex_unlock(&c->tnc_mutex);
spin_lock(&ubifs_infos_lock);
/* Get the next list element before we move this one */
p = p->next;
/*
* Move this one to the end of the list to provide some
* fairness.
*/
list_move_tail(&c->infos_list, &ubifs_infos);
mutex_unlock(&c->umount_mutex);
if (freed >= nr)
break;
}
spin_unlock(&ubifs_infos_lock);
return freed;
}
/**
* kick_a_thread - kick a background thread to start commit.
*
* This function kicks a background thread to start background commit. Returns
* %-1 if a thread was kicked or there is another reason to assume the memory
* will soon be freed or become freeable. If there are no dirty znodes, returns
* %0.
*/
static int kick_a_thread(void)
{
int i;
struct ubifs_info *c;
/*
* Iterate over all mounted UBIFS file-systems and find out if there is
* already an ongoing commit operation there. If no, then iterate for
* the second time and initiate background commit.
*/
spin_lock(&ubifs_infos_lock);
for (i = 0; i < 2; i++) {
list_for_each_entry(c, &ubifs_infos, infos_list) {
long dirty_zn_cnt;
if (!mutex_trylock(&c->umount_mutex)) {
/*
* Some un-mount is in progress, it will
* certainly free memory, so just return.
*/
spin_unlock(&ubifs_infos_lock);
return -1;
}
dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
c->ro_media || c->ro_error) {
mutex_unlock(&c->umount_mutex);
continue;
}
if (c->cmt_state != COMMIT_RESTING) {
spin_unlock(&ubifs_infos_lock);
mutex_unlock(&c->umount_mutex);
return -1;
}
if (i == 1) {
list_move_tail(&c->infos_list, &ubifs_infos);
spin_unlock(&ubifs_infos_lock);
ubifs_request_bg_commit(c);
mutex_unlock(&c->umount_mutex);
return -1;
}
mutex_unlock(&c->umount_mutex);
}
}
spin_unlock(&ubifs_infos_lock);
return 0;
}
int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
int freed, contention = 0;
long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
if (nr == 0)
return clean_zn_cnt;
if (!clean_zn_cnt) {
/*
* No clean znodes, nothing to reap. All we can do in this case
* is to kick background threads to start commit, which will
* probably make clean znodes which, in turn, will be freeable.
* And we return -1 which means will make VM call us again
* later.
*/
dbg_tnc("no clean znodes, kick a thread");
return kick_a_thread();
}
freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
if (freed >= nr)
goto out;
dbg_tnc("not enough old znodes, try to free young ones");
freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
if (freed >= nr)
goto out;
dbg_tnc("not enough young znodes, free all");
freed += shrink_tnc_trees(nr - freed, 0, &contention);
if (!freed && contention) {
dbg_tnc("freed nothing, but contention");
return -1;
}
out:
dbg_tnc("%d znodes were freed, requested %d", freed, nr);
return freed;
}