cf5eba5334
We currently get fairly poor behaviour with files which get many short writes, such as system logs. This is because we end up with many tiny data nodes, and the rbtree gets massive. None of these nodes are actually obsolete, so they are counted as 'clean' space. Eraseblocks can be entirely full of these nodes (which are REF_NORMAL instead of REF_PRISTINE), and still they count entirely towards 'used_size' and the eraseblocks can sit on the clean_list for a long time without being picked for GC. One way to alleviate this in the long term is to account REF_NORMAL space separately from REF_PRISTINE space, rather than counting them both towards used_size. Then these eraseblocks can be picked for GC and the offending nodes will be garbage collected. The short-term fix, though -- which probably makes sense even if we do eventually implement the above -- is to merge these nodes as they're written. When we write the last byte in a page, write the _whole_ page. This obsoletes the earlier nodes in the page _immediately_ and we don't even need to wait for the garbage collection to do it. Original implementation from Ferenc Havasi <havasi@inf.u-szeged.hu> Signed-off-by: David Woodhouse <dwmw2@infradead.org>
293 lines
8.4 KiB
C
293 lines
8.4 KiB
C
/*
|
|
* JFFS2 -- Journalling Flash File System, Version 2.
|
|
*
|
|
* Copyright (C) 2001-2003 Red Hat, Inc.
|
|
*
|
|
* Created by David Woodhouse <dwmw2@infradead.org>
|
|
*
|
|
* For licensing information, see the file 'LICENCE' in this directory.
|
|
*
|
|
* $Id: file.c,v 1.104 2005/10/18 23:29:35 tpoynor Exp $
|
|
*
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/time.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/crc32.h>
|
|
#include <linux/jffs2.h>
|
|
#include "nodelist.h"
|
|
|
|
static int jffs2_commit_write (struct file *filp, struct page *pg,
|
|
unsigned start, unsigned end);
|
|
static int jffs2_prepare_write (struct file *filp, struct page *pg,
|
|
unsigned start, unsigned end);
|
|
static int jffs2_readpage (struct file *filp, struct page *pg);
|
|
|
|
int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync)
|
|
{
|
|
struct inode *inode = dentry->d_inode;
|
|
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
|
|
|
|
/* Trigger GC to flush any pending writes for this inode */
|
|
jffs2_flush_wbuf_gc(c, inode->i_ino);
|
|
|
|
return 0;
|
|
}
|
|
|
|
const struct file_operations jffs2_file_operations =
|
|
{
|
|
.llseek = generic_file_llseek,
|
|
.open = generic_file_open,
|
|
.read = generic_file_read,
|
|
.write = generic_file_write,
|
|
.ioctl = jffs2_ioctl,
|
|
.mmap = generic_file_readonly_mmap,
|
|
.fsync = jffs2_fsync,
|
|
.sendfile = generic_file_sendfile
|
|
};
|
|
|
|
/* jffs2_file_inode_operations */
|
|
|
|
struct inode_operations jffs2_file_inode_operations =
|
|
{
|
|
.setattr = jffs2_setattr
|
|
};
|
|
|
|
struct address_space_operations jffs2_file_address_operations =
|
|
{
|
|
.readpage = jffs2_readpage,
|
|
.prepare_write =jffs2_prepare_write,
|
|
.commit_write = jffs2_commit_write
|
|
};
|
|
|
|
static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
|
|
{
|
|
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
|
|
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
|
|
unsigned char *pg_buf;
|
|
int ret;
|
|
|
|
D2(printk(KERN_DEBUG "jffs2_do_readpage_nolock(): ino #%lu, page at offset 0x%lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT));
|
|
|
|
BUG_ON(!PageLocked(pg));
|
|
|
|
pg_buf = kmap(pg);
|
|
/* FIXME: Can kmap fail? */
|
|
|
|
ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE);
|
|
|
|
if (ret) {
|
|
ClearPageUptodate(pg);
|
|
SetPageError(pg);
|
|
} else {
|
|
SetPageUptodate(pg);
|
|
ClearPageError(pg);
|
|
}
|
|
|
|
flush_dcache_page(pg);
|
|
kunmap(pg);
|
|
|
|
D2(printk(KERN_DEBUG "readpage finished\n"));
|
|
return 0;
|
|
}
|
|
|
|
int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg)
|
|
{
|
|
int ret = jffs2_do_readpage_nolock(inode, pg);
|
|
unlock_page(pg);
|
|
return ret;
|
|
}
|
|
|
|
|
|
static int jffs2_readpage (struct file *filp, struct page *pg)
|
|
{
|
|
struct jffs2_inode_info *f = JFFS2_INODE_INFO(pg->mapping->host);
|
|
int ret;
|
|
|
|
down(&f->sem);
|
|
ret = jffs2_do_readpage_unlock(pg->mapping->host, pg);
|
|
up(&f->sem);
|
|
return ret;
|
|
}
|
|
|
|
static int jffs2_prepare_write (struct file *filp, struct page *pg,
|
|
unsigned start, unsigned end)
|
|
{
|
|
struct inode *inode = pg->mapping->host;
|
|
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
|
|
uint32_t pageofs = pg->index << PAGE_CACHE_SHIFT;
|
|
int ret = 0;
|
|
|
|
D1(printk(KERN_DEBUG "jffs2_prepare_write()\n"));
|
|
|
|
if (pageofs > inode->i_size) {
|
|
/* Make new hole frag from old EOF to new page */
|
|
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
|
|
struct jffs2_raw_inode ri;
|
|
struct jffs2_full_dnode *fn;
|
|
uint32_t phys_ofs, alloc_len;
|
|
|
|
D1(printk(KERN_DEBUG "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
|
|
(unsigned int)inode->i_size, pageofs));
|
|
|
|
ret = jffs2_reserve_space(c, sizeof(ri), &phys_ofs, &alloc_len,
|
|
ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
|
|
if (ret)
|
|
return ret;
|
|
|
|
down(&f->sem);
|
|
memset(&ri, 0, sizeof(ri));
|
|
|
|
ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
|
|
ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
|
|
ri.totlen = cpu_to_je32(sizeof(ri));
|
|
ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
|
|
|
|
ri.ino = cpu_to_je32(f->inocache->ino);
|
|
ri.version = cpu_to_je32(++f->highest_version);
|
|
ri.mode = cpu_to_jemode(inode->i_mode);
|
|
ri.uid = cpu_to_je16(inode->i_uid);
|
|
ri.gid = cpu_to_je16(inode->i_gid);
|
|
ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
|
|
ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds());
|
|
ri.offset = cpu_to_je32(inode->i_size);
|
|
ri.dsize = cpu_to_je32(pageofs - inode->i_size);
|
|
ri.csize = cpu_to_je32(0);
|
|
ri.compr = JFFS2_COMPR_ZERO;
|
|
ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
|
|
ri.data_crc = cpu_to_je32(0);
|
|
|
|
fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_NORMAL);
|
|
|
|
if (IS_ERR(fn)) {
|
|
ret = PTR_ERR(fn);
|
|
jffs2_complete_reservation(c);
|
|
up(&f->sem);
|
|
return ret;
|
|
}
|
|
ret = jffs2_add_full_dnode_to_inode(c, f, fn);
|
|
if (f->metadata) {
|
|
jffs2_mark_node_obsolete(c, f->metadata->raw);
|
|
jffs2_free_full_dnode(f->metadata);
|
|
f->metadata = NULL;
|
|
}
|
|
if (ret) {
|
|
D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in prepare_write, returned %d\n", ret));
|
|
jffs2_mark_node_obsolete(c, fn->raw);
|
|
jffs2_free_full_dnode(fn);
|
|
jffs2_complete_reservation(c);
|
|
up(&f->sem);
|
|
return ret;
|
|
}
|
|
jffs2_complete_reservation(c);
|
|
inode->i_size = pageofs;
|
|
up(&f->sem);
|
|
}
|
|
|
|
/* Read in the page if it wasn't already present, unless it's a whole page */
|
|
if (!PageUptodate(pg) && (start || end < PAGE_CACHE_SIZE)) {
|
|
down(&f->sem);
|
|
ret = jffs2_do_readpage_nolock(inode, pg);
|
|
up(&f->sem);
|
|
}
|
|
D1(printk(KERN_DEBUG "end prepare_write(). pg->flags %lx\n", pg->flags));
|
|
return ret;
|
|
}
|
|
|
|
static int jffs2_commit_write (struct file *filp, struct page *pg,
|
|
unsigned start, unsigned end)
|
|
{
|
|
/* Actually commit the write from the page cache page we're looking at.
|
|
* For now, we write the full page out each time. It sucks, but it's simple
|
|
*/
|
|
struct inode *inode = pg->mapping->host;
|
|
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
|
|
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
|
|
struct jffs2_raw_inode *ri;
|
|
unsigned aligned_start = start & ~3;
|
|
int ret = 0;
|
|
uint32_t writtenlen = 0;
|
|
|
|
D1(printk(KERN_DEBUG "jffs2_commit_write(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
|
|
inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags));
|
|
|
|
if (end == PAGE_CACHE_SIZE) {
|
|
if (!start) {
|
|
/* We need to avoid deadlock with page_cache_read() in
|
|
jffs2_garbage_collect_pass(). So we have to mark the
|
|
page up to date, to prevent page_cache_read() from
|
|
trying to re-lock it. */
|
|
SetPageUptodate(pg);
|
|
} else {
|
|
/* When writing out the end of a page, write out the
|
|
_whole_ page. This helps to reduce the number of
|
|
nodes in files which have many short writes, like
|
|
syslog files. */
|
|
start = aligned_start = 0;
|
|
}
|
|
}
|
|
|
|
ri = jffs2_alloc_raw_inode();
|
|
|
|
if (!ri) {
|
|
D1(printk(KERN_DEBUG "jffs2_commit_write(): Allocation of raw inode failed\n"));
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/* Set the fields that the generic jffs2_write_inode_range() code can't find */
|
|
ri->ino = cpu_to_je32(inode->i_ino);
|
|
ri->mode = cpu_to_jemode(inode->i_mode);
|
|
ri->uid = cpu_to_je16(inode->i_uid);
|
|
ri->gid = cpu_to_je16(inode->i_gid);
|
|
ri->isize = cpu_to_je32((uint32_t)inode->i_size);
|
|
ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds());
|
|
|
|
/* In 2.4, it was already kmapped by generic_file_write(). Doesn't
|
|
hurt to do it again. The alternative is ifdefs, which are ugly. */
|
|
kmap(pg);
|
|
|
|
ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start,
|
|
(pg->index << PAGE_CACHE_SHIFT) + aligned_start,
|
|
end - aligned_start, &writtenlen);
|
|
|
|
kunmap(pg);
|
|
|
|
if (ret) {
|
|
/* There was an error writing. */
|
|
SetPageError(pg);
|
|
}
|
|
|
|
/* Adjust writtenlen for the padding we did, so we don't confuse our caller */
|
|
if (writtenlen < (start&3))
|
|
writtenlen = 0;
|
|
else
|
|
writtenlen -= (start&3);
|
|
|
|
if (writtenlen) {
|
|
if (inode->i_size < (pg->index << PAGE_CACHE_SHIFT) + start + writtenlen) {
|
|
inode->i_size = (pg->index << PAGE_CACHE_SHIFT) + start + writtenlen;
|
|
inode->i_blocks = (inode->i_size + 511) >> 9;
|
|
|
|
inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime));
|
|
}
|
|
}
|
|
|
|
jffs2_free_raw_inode(ri);
|
|
|
|
if (start+writtenlen < end) {
|
|
/* generic_file_write has written more to the page cache than we've
|
|
actually written to the medium. Mark the page !Uptodate so that
|
|
it gets reread */
|
|
D1(printk(KERN_DEBUG "jffs2_commit_write(): Not all bytes written. Marking page !uptodate\n"));
|
|
SetPageError(pg);
|
|
ClearPageUptodate(pg);
|
|
}
|
|
|
|
D1(printk(KERN_DEBUG "jffs2_commit_write() returning %d\n",start+writtenlen==end?0:ret));
|
|
return start+writtenlen==end?0:ret;
|
|
}
|