android_kernel_xiaomi_sm8350/fs/mpage.c
Greg Kroah-Hartman b0b02162a4 This is the 5.4.13 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAl4iAaQACgkQONu9yGCS
 aT5vIg/+Lj4wdF3UuUWonHdWBhnfG2FKCWFTYJKPpFXFRMltAa27XKns/CvR8CBW
 9ztOH928CR8K9BS7HbfGtsgOEOVzILb4+akco5UhrTH93dc2T6RwSDiMpaULgeIF
 x/n834yNlsHs1NSmjjuimBe1j4NcZwPnnNVGKmFojkv04QPsFjP6HCp7PR2/PMXP
 CVO5JBXqMYtMRprY0xkpAGCStqVZPF6uwfTPrKRgaOCTpkKsqBEFJbwqOoqGQWou
 fQPOmEFjw+e9rIKzJgou6k4YGrWITcpNnUMdxavCszcQFTeUnY1vpLTiVxyZC1E3
 R+7ulfe+/zoQvWIer9H85ySLuOjSmmXb5CM9Fc0WLSsvKmTKfUNe/g5Cce+rngPY
 x/+tIBvXgFSoGR4oO5dEHhXn9Hzqr0OHbZy1dLKY1RU4NzxLsAtR2DH4ps25I4ux
 Ty2P0kYwm5Sz43MspnFAPTaU5kC3qHVNMjanbb5I7xGF2m0HZmh0zRHBC50DqP4Y
 nmLUklpX4EGVAYGb94YZMa3ugksSvie2SLgk838UQG+lGqaQoxAyAeRmDdyR1zE7
 GHlkNxWj8cbkBsPDSYt6Wvrt+7+e8Bbk5Y/fM5+j02h6ehs9wqOaQ985CfvrrYix
 RyGc7pWt1FPL7Kqv/CtbDieglS/P0BMPPGYX2rfidk6i+0knWaE=
 =53PP
 -----END PGP SIGNATURE-----

Merge 5.4.13 into android-5.4

Changes in 5.4.13
	HID: hidraw, uhid: Always report EPOLLOUT
	rtc: mt6397: fix alarm register overwrite
	phy: mapphone-mdm6600: Fix uninitialized status value regression
	RDMA/bnxt_re: Avoid freeing MR resources if dereg fails
	RDMA/bnxt_re: Fix Send Work Entry state check while polling completions
	IB/hfi1: Don't cancel unused work item
	mtd: rawnand: stm32_fmc2: avoid to lock the CPU bus
	i2c: bcm2835: Store pointer to bus clock
	ASoC: SOF: imx8: fix memory allocation failure check on priv->pd_dev
	ASoC: soc-core: Set dpcm_playback / dpcm_capture
	ASoC: stm32: spdifrx: fix inconsistent lock state
	ASoC: stm32: spdifrx: fix race condition in irq handler
	ASoC: stm32: spdifrx: fix input pin state management
	pinctrl: lochnagar: select GPIOLIB
	netfilter: nft_flow_offload: fix underflow in flowtable reference counter
	ASoC: SOF: imx8: Fix dsp_box offset
	mtd: onenand: omap2: Pass correct flags for prep_dma_memcpy
	gpio: zynq: Fix for bug in zynq_gpio_restore_context API
	pinctrl: meson: Fix wrong shift value when get drive-strength
	selftests: loopback.sh: skip this test if the driver does not support
	iommu/vt-d: Unlink device if failed to add to group
	iommu: Remove device link to group on failure
	bpf: cgroup: prevent out-of-order release of cgroup bpf
	fs: move guard_bio_eod() after bio_set_op_attrs
	scsi: mpt3sas: Fix double free in attach error handling
	gpio: Fix error message on out-of-range GPIO in lookup table
	PM / devfreq: tegra: Add COMMON_CLK dependency
	PCI: amlogic: Fix probed clock names
	drm/tegra: Fix ordering of cleanup code
	hsr: add hsr root debugfs directory
	hsr: rename debugfs file when interface name is changed
	hsr: reset network header when supervision frame is created
	s390/qeth: fix qdio teardown after early init error
	s390/qeth: fix false reporting of VNIC CHAR config failure
	s390/qeth: Fix vnicc_is_in_use if rx_bcast not set
	s390/qeth: vnicc Fix init to default
	s390/qeth: fix initialization on old HW
	cifs: Adjust indentation in smb2_open_file
	scsi: smartpqi: Update attribute name to `driver_version`
	MAINTAINERS: Append missed file to the database
	ath9k: use iowrite32 over __raw_writel
	can: j1939: fix address claim code example
	dt-bindings: reset: Fix brcmstb-reset example
	reset: brcmstb: Remove resource checks
	afs: Fix missing cell comparison in afs_test_super()
	perf vendor events s390: Remove name from L1D_RO_EXCL_WRITES description
	syscalls/x86: Wire up COMPAT_SYSCALL_DEFINE0
	syscalls/x86: Use COMPAT_SYSCALL_DEFINE0 for IA32 (rt_)sigreturn
	syscalls/x86: Use the correct function type for sys_ni_syscall
	syscalls/x86: Fix function types in COND_SYSCALL
	hsr: fix slab-out-of-bounds Read in hsr_debugfs_rename()
	btrfs: simplify inode locking for RWF_NOWAIT
	netfilter: nf_tables_offload: release flow_rule on error from commit path
	netfilter: nft_meta: use 64-bit time arithmetic
	ASoC: dt-bindings: mt8183: add missing update
	ASoC: simple_card_utils.h: Add missing include
	ASoC: fsl_esai: Add spin lock to protect reset, stop and start
	ASoC: SOF: Intel: Broadwell: clarify mutual exclusion with legacy driver
	ASoC: core: Fix compile warning with CONFIG_DEBUG_FS=n
	ASoC: rsnd: fix DALIGN register for SSIU
	RDMA/hns: Prevent undefined behavior in hns_roce_set_user_sq_size()
	RDMA/hns: remove a redundant le16_to_cpu
	RDMA/hns: Modify return value of restrack functions
	RDMA/counter: Prevent QP counter manual binding in auto mode
	RDMA/siw: Fix port number endianness in a debug message
	RDMA/hns: Fix build error again
	RDMA/hns: Release qp resources when failed to destroy qp
	xprtrdma: Add unique trace points for posting Local Invalidate WRs
	xprtrdma: Connection becomes unstable after a reconnect
	xprtrdma: Fix MR list handling
	xprtrdma: Close window between waking RPC senders and posting Receives
	RDMA/hns: Fix to support 64K page for srq
	RDMA/hns: Bugfix for qpc/cqc timer configuration
	rdma: Remove nes ABI header
	RDMA/mlx5: Return proper error value
	RDMA/srpt: Report the SCSI residual to the initiator
	uaccess: Add non-pagefault user-space write function
	bpf: Make use of probe_user_write in probe write helper
	bpf: skmsg, fix potential psock NULL pointer dereference
	bpf: Support pre-2.25-binutils objcopy for vmlinux BTF
	libbpf: Fix Makefile' libbpf symbol mismatch diagnostic
	afs: Fix use-after-loss-of-ref
	afs: Fix afs_lookup() to not clobber the version on a new dentry
	keys: Fix request_key() cache
	scsi: enclosure: Fix stale device oops with hot replug
	scsi: sd: Clear sdkp->protection_type if disk is reformatted without PI
	platform/mellanox: fix potential deadlock in the tmfifo driver
	platform/x86: asus-wmi: Fix keyboard brightness cannot be set to 0
	platform/x86: GPD pocket fan: Use default values when wrong modparams are given
	asm-generic/nds32: don't redefine cacheflush primitives
	Documentation/ABI: Fix documentation inconsistency for mlxreg-io sysfs interfaces
	Documentation/ABI: Add missed attribute for mlxreg-io sysfs interfaces
	xprtrdma: Fix create_qp crash on device unload
	xprtrdma: Fix completion wait during device removal
	xprtrdma: Fix oops in Receive handler after device removal
	dm: add dm-clone to the documentation index
	scsi: ufs: Give an unique ID to each ufs-bsg
	crypto: cavium/nitrox - fix firmware assignment to AE cores
	crypto: hisilicon - select NEED_SG_DMA_LENGTH in qm Kconfig
	crypto: arm64/aes-neonbs - add return value of skcipher_walk_done() in __xts_crypt()
	crypto: virtio - implement missing support for output IVs
	crypto: algif_skcipher - Use chunksize instead of blocksize
	crypto: geode-aes - convert to skcipher API and make thread-safe
	NFSv2: Fix a typo in encode_sattr()
	nfsd: Fix cld_net->cn_tfm initialization
	nfsd: v4 support requires CRYPTO_SHA256
	NFSv4.x: Handle bad/dead sessions correctly in nfs41_sequence_process()
	NFSv4.x: Drop the slot if nfs4_delegreturn_prepare waits for layoutreturn
	iio: imu: st_lsm6dsx: fix gyro gain definitions for LSM9DS1
	iio: imu: adis16480: assign bias value only if operation succeeded
	mei: fix modalias documentation
	clk: meson: axg-audio: fix regmap last register
	clk: samsung: exynos5420: Preserve CPU clocks configuration during suspend/resume
	clk: Fix memory leak in clk_unregister()
	dmaengine: dw: platform: Mark 'hclk' clock optional
	clk: imx: pll14xx: Fix quick switch of S/K parameter
	rsi: fix potential null dereference in rsi_probe()
	affs: fix a memory leak in affs_remount
	pinctl: ti: iodelay: fix error checking on pinctrl_count_index_with_args call
	pinctrl: sh-pfc: Fix PINMUX_IPSR_PHYS() to set GPSR
	pinctrl: sh-pfc: Do not use platform_get_irq() to count interrupts
	pinctrl: lewisburg: Update pin list according to v1.1v6
	PCI: pciehp: Do not disable interrupt twice on suspend
	Revert "drm/virtio: switch virtio_gpu_wait_ioctl() to gem helper."
	drm/amdgpu: cleanup creating BOs at fixed location (v2)
	drm/amdgpu/discovery: reserve discovery data at the top of VRAM
	scsi: sd: enable compat ioctls for sed-opal
	arm64: dts: apq8096-db820c: Increase load on l21 for SDCARD
	gfs2: add compat_ioctl support
	af_unix: add compat_ioctl support
	compat_ioctl: handle SIOCOUTQNSD
	PCI: aardvark: Use LTSSM state to build link training flag
	PCI: aardvark: Fix PCI_EXP_RTCTL register configuration
	PCI: dwc: Fix find_next_bit() usage
	PCI: Fix missing bridge dma_ranges resource list cleanup
	PCI/PM: Clear PCIe PME Status even for legacy power management
	tools: PCI: Fix fd leakage
	PCI/PTM: Remove spurious "d" from granularity message
	powerpc/powernv: Disable native PCIe port management
	MIPS: PCI: remember nasid changed by set interrupt affinity
	MIPS: Loongson: Fix return value of loongson_hwmon_init
	MIPS: SGI-IP27: Fix crash, when CPUs are disabled via nr_cpus parameter
	tty: serial: imx: use the sg count from dma_map_sg
	tty: serial: pch_uart: correct usage of dma_unmap_sg
	ARM: 8943/1: Fix topology setup in case of CPU hotplug for CONFIG_SCHED_MC
	media: ov6650: Fix incorrect use of JPEG colorspace
	media: ov6650: Fix some format attributes not under control
	media: ov6650: Fix .get_fmt() V4L2_SUBDEV_FORMAT_TRY support
	media: ov6650: Fix default format not applied on device probe
	media: rcar-vin: Fix incorrect return statement in rvin_try_format()
	media: hantro: h264: Fix the frame_num wraparound case
	media: v4l: cadence: Fix how unsued lanes are handled in 'csi2rx_start()'
	media: exynos4-is: Fix recursive locking in isp_video_release()
	media: coda: fix deadlock between decoder picture run and start command
	media: cedrus: Use correct H264 8x8 scaling list
	media: hantro: Do not reorder H264 scaling list
	media: aspeed-video: Fix memory leaks in aspeed_video_probe
	media: hantro: Set H264 FIELDPIC_FLAG_E flag correctly
	iommu/mediatek: Correct the flush_iotlb_all callback
	iommu/mediatek: Add a new tlb_lock for tlb_flush
	memory: mtk-smi: Add PM suspend and resume ops
	Revert "ubifs: Fix memory leak bug in alloc_ubifs_info() error path"
	ubifs: Fixed missed le64_to_cpu() in journal
	ubifs: do_kill_orphans: Fix a memory leak bug
	spi: sprd: Fix the incorrect SPI register
	mtd: spi-nor: fix silent truncation in spi_nor_read()
	mtd: spi-nor: fix silent truncation in spi_nor_read_raw()
	spi: pxa2xx: Set controller->max_transfer_size in dma mode
	spi: atmel: fix handling of cs_change set on non-last xfer
	spi: rspi: Use platform_get_irq_byname_optional() for optional irqs
	spi: lpspi: fix memory leak in fsl_lpspi_probe
	iwlwifi: mvm: consider ieee80211 station max amsdu value
	rtlwifi: Remove unnecessary NULL check in rtl_regd_init
	iwlwifi: mvm: fix support for single antenna diversity
	sch_cake: Add missing NLA policy entry TCA_CAKE_SPLIT_GSO
	f2fs: fix potential overflow
	NFSD fixing possible null pointer derefering in copy offload
	rtc: msm6242: Fix reading of 10-hour digit
	rtc: brcmstb-waketimer: add missed clk_disable_unprepare
	rtc: bd70528: Add MODULE ALIAS to autoload module
	gpio: mpc8xxx: Add platform device to gpiochip->parent
	scsi: libcxgbi: fix NULL pointer dereference in cxgbi_device_destroy()
	scsi: target/iblock: Fix protection error with blocks greater than 512B
	selftests: firmware: Fix it to do root uid check and skip
	rseq/selftests: Turn off timeout setting
	riscv: export flush_icache_all to modules
	mips: cacheinfo: report shared CPU map
	mips: Fix gettimeofday() in the vdso library
	tomoyo: Suppress RCU warning at list_for_each_entry_rcu().
	MIPS: Prevent link failure with kcov instrumentation
	drm/arm/mali: make malidp_mw_connector_helper_funcs static
	rxrpc: Unlock new call in rxrpc_new_incoming_call() rather than the caller
	rxrpc: Don't take call->user_mutex in rxrpc_new_incoming_call()
	rxrpc: Fix missing security check on incoming calls
	dmaengine: k3dma: Avoid null pointer traversal
	s390/qeth: lock the card while changing its hsuid
	ioat: ioat_alloc_ring() failure handling.
	drm/amdgpu: enable gfxoff for raven1 refresh
	media: intel-ipu3: Align struct ipu3_uapi_awb_fr_config_s to 32 bytes
	kbuild/deb-pkg: annotate libelf-dev dependency as :native
	hexagon: parenthesize registers in asm predicates
	hexagon: work around compiler crash
	ocfs2: call journal flush to mark journal as empty after journal recovery when mount
	Linux 5.4.13

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I90734cd9d80f000e05a8109a529916ae641cdede
2020-01-17 23:38:39 +01:00

796 lines
22 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* fs/mpage.c
*
* Copyright (C) 2002, Linus Torvalds.
*
* Contains functions related to preparing and submitting BIOs which contain
* multiple pagecache pages.
*
* 15May2002 Andrew Morton
* Initial version
* 27Jun2002 axboe@suse.de
* use bio_add_page() to build bio's just the right size
*/
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/kdev_t.h>
#include <linux/gfp.h>
#include <linux/bio.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/highmem.h>
#include <linux/prefetch.h>
#include <linux/mpage.h>
#include <linux/mm_inline.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/cleancache.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/android_fs.h>
EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start);
EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end);
EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start);
EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end);
/*
* I/O completion handler for multipage BIOs.
*
* The mpage code never puts partial pages into a BIO (except for end-of-file).
* If a page does not map to a contiguous run of blocks then it simply falls
* back to block_read_full_page().
*
* Why is this? If a page's completion depends on a number of different BIOs
* which can complete in any order (or at the same time) then determining the
* status of that page is hard. See end_buffer_async_read() for the details.
* There is no point in duplicating all that complexity.
*/
static void mpage_end_io(struct bio *bio)
{
struct bio_vec *bv;
struct bvec_iter_all iter_all;
if (trace_android_fs_dataread_end_enabled() &&
(bio_data_dir(bio) == READ)) {
struct page *first_page = bio->bi_io_vec[0].bv_page;
if (first_page != NULL)
trace_android_fs_dataread_end(first_page->mapping->host,
page_offset(first_page),
bio->bi_iter.bi_size);
}
bio_for_each_segment_all(bv, bio, iter_all) {
struct page *page = bv->bv_page;
page_endio(page, bio_op(bio),
blk_status_to_errno(bio->bi_status));
}
bio_put(bio);
}
static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
{
if (trace_android_fs_dataread_start_enabled() && (op == REQ_OP_READ)) {
struct page *first_page = bio->bi_io_vec[0].bv_page;
if (first_page != NULL) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
first_page->mapping->host);
trace_android_fs_dataread_start(
first_page->mapping->host,
page_offset(first_page),
bio->bi_iter.bi_size,
current->pid,
path,
current->comm);
}
}
bio->bi_end_io = mpage_end_io;
bio_set_op_attrs(bio, op, op_flags);
guard_bio_eod(bio);
submit_bio(bio);
return NULL;
}
static struct bio *
mpage_alloc(struct block_device *bdev,
sector_t first_sector, int nr_vecs,
gfp_t gfp_flags)
{
struct bio *bio;
/* Restrict the given (page cache) mask for slab allocations */
gfp_flags &= GFP_KERNEL;
bio = bio_alloc(gfp_flags, nr_vecs);
if (bio == NULL && (current->flags & PF_MEMALLOC)) {
while (!bio && (nr_vecs /= 2))
bio = bio_alloc(gfp_flags, nr_vecs);
}
if (bio) {
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_sector;
}
return bio;
}
/*
* support function for mpage_readpages. The fs supplied get_block might
* return an up to date buffer. This is used to map that buffer into
* the page, which allows readpage to avoid triggering a duplicate call
* to get_block.
*
* The idea is to avoid adding buffers to pages that don't already have
* them. So when the buffer is up to date and the page size == block size,
* this marks the page up to date instead of adding new buffers.
*/
static void
map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
{
struct inode *inode = page->mapping->host;
struct buffer_head *page_bh, *head;
int block = 0;
if (!page_has_buffers(page)) {
/*
* don't make any buffers if there is only one buffer on
* the page and the page just needs to be set up to date
*/
if (inode->i_blkbits == PAGE_SHIFT &&
buffer_uptodate(bh)) {
SetPageUptodate(page);
return;
}
create_empty_buffers(page, i_blocksize(inode), 0);
}
head = page_buffers(page);
page_bh = head;
do {
if (block == page_block) {
page_bh->b_state = bh->b_state;
page_bh->b_bdev = bh->b_bdev;
page_bh->b_blocknr = bh->b_blocknr;
break;
}
page_bh = page_bh->b_this_page;
block++;
} while (page_bh != head);
}
struct mpage_readpage_args {
struct bio *bio;
struct page *page;
unsigned int nr_pages;
bool is_readahead;
sector_t last_block_in_bio;
struct buffer_head map_bh;
unsigned long first_logical_block;
get_block_t *get_block;
};
/*
* This is the worker routine which does all the work of mapping the disk
* blocks and constructs largest possible bios, submits them for IO if the
* blocks are not contiguous on the disk.
*
* We pass a buffer_head back and forth and use its buffer_mapped() flag to
* represent the validity of its disk mapping and to decide when to do the next
* get_block() call.
*/
static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
{
struct page *page = args->page;
struct inode *inode = page->mapping->host;
const unsigned blkbits = inode->i_blkbits;
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
const unsigned blocksize = 1 << blkbits;
struct buffer_head *map_bh = &args->map_bh;
sector_t block_in_file;
sector_t last_block;
sector_t last_block_in_file;
sector_t blocks[MAX_BUF_PER_PAGE];
unsigned page_block;
unsigned first_hole = blocks_per_page;
struct block_device *bdev = NULL;
int length;
int fully_mapped = 1;
int op_flags;
unsigned nblocks;
unsigned relative_block;
gfp_t gfp;
if (args->is_readahead) {
op_flags = REQ_RAHEAD;
gfp = readahead_gfp_mask(page->mapping);
} else {
op_flags = 0;
gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
}
if (page_has_buffers(page))
goto confused;
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
last_block = block_in_file + args->nr_pages * blocks_per_page;
last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
page_block = 0;
/*
* Map blocks using the result from the previous get_blocks call first.
*/
nblocks = map_bh->b_size >> blkbits;
if (buffer_mapped(map_bh) &&
block_in_file > args->first_logical_block &&
block_in_file < (args->first_logical_block + nblocks)) {
unsigned map_offset = block_in_file - args->first_logical_block;
unsigned last = nblocks - map_offset;
for (relative_block = 0; ; relative_block++) {
if (relative_block == last) {
clear_buffer_mapped(map_bh);
break;
}
if (page_block == blocks_per_page)
break;
blocks[page_block] = map_bh->b_blocknr + map_offset +
relative_block;
page_block++;
block_in_file++;
}
bdev = map_bh->b_bdev;
}
/*
* Then do more get_blocks calls until we are done with this page.
*/
map_bh->b_page = page;
while (page_block < blocks_per_page) {
map_bh->b_state = 0;
map_bh->b_size = 0;
if (block_in_file < last_block) {
map_bh->b_size = (last_block-block_in_file) << blkbits;
if (args->get_block(inode, block_in_file, map_bh, 0))
goto confused;
args->first_logical_block = block_in_file;
}
if (!buffer_mapped(map_bh)) {
fully_mapped = 0;
if (first_hole == blocks_per_page)
first_hole = page_block;
page_block++;
block_in_file++;
continue;
}
/* some filesystems will copy data into the page during
* the get_block call, in which case we don't want to
* read it again. map_buffer_to_page copies the data
* we just collected from get_block into the page's buffers
* so readpage doesn't have to repeat the get_block call
*/
if (buffer_uptodate(map_bh)) {
map_buffer_to_page(page, map_bh, page_block);
goto confused;
}
if (first_hole != blocks_per_page)
goto confused; /* hole -> non-hole */
/* Contiguous blocks? */
if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
goto confused;
nblocks = map_bh->b_size >> blkbits;
for (relative_block = 0; ; relative_block++) {
if (relative_block == nblocks) {
clear_buffer_mapped(map_bh);
break;
} else if (page_block == blocks_per_page)
break;
blocks[page_block] = map_bh->b_blocknr+relative_block;
page_block++;
block_in_file++;
}
bdev = map_bh->b_bdev;
}
if (first_hole != blocks_per_page) {
zero_user_segment(page, first_hole << blkbits, PAGE_SIZE);
if (first_hole == 0) {
SetPageUptodate(page);
unlock_page(page);
goto out;
}
} else if (fully_mapped) {
SetPageMappedToDisk(page);
}
if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
cleancache_get_page(page) == 0) {
SetPageUptodate(page);
goto confused;
}
/*
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (args->bio && (args->last_block_in_bio != blocks[0] - 1))
args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
alloc_new:
if (args->bio == NULL) {
if (first_hole == blocks_per_page) {
if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
page))
goto out;
}
args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
min_t(int, args->nr_pages,
BIO_MAX_PAGES),
gfp);
if (args->bio == NULL)
goto confused;
}
length = first_hole << blkbits;
if (bio_add_page(args->bio, page, length, 0) < length) {
args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
goto alloc_new;
}
relative_block = block_in_file - args->first_logical_block;
nblocks = map_bh->b_size >> blkbits;
if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
(first_hole != blocks_per_page))
args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
else
args->last_block_in_bio = blocks[blocks_per_page - 1];
out:
return args->bio;
confused:
if (args->bio)
args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
if (!PageUptodate(page))
block_read_full_page(page, args->get_block);
else
unlock_page(page);
goto out;
}
/**
* mpage_readpages - populate an address space with some pages & start reads against them
* @mapping: the address_space
* @pages: The address of a list_head which contains the target pages. These
* pages have their ->index populated and are otherwise uninitialised.
* The page at @pages->prev has the lowest file offset, and reads should be
* issued in @pages->prev to @pages->next order.
* @nr_pages: The number of pages at *@pages
* @get_block: The filesystem's block mapper function.
*
* This function walks the pages and the blocks within each page, building and
* emitting large BIOs.
*
* If anything unusual happens, such as:
*
* - encountering a page which has buffers
* - encountering a page which has a non-hole after a hole
* - encountering a page with non-contiguous blocks
*
* then this code just gives up and calls the buffer_head-based read function.
* It does handle a page which has holes at the end - that is a common case:
* the end-of-file on blocksize < PAGE_SIZE setups.
*
* BH_Boundary explanation:
*
* There is a problem. The mpage read code assembles several pages, gets all
* their disk mappings, and then submits them all. That's fine, but obtaining
* the disk mappings may require I/O. Reads of indirect blocks, for example.
*
* So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
* submitted in the following order:
*
* 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
*
* because the indirect block has to be read to get the mappings of blocks
* 13,14,15,16. Obviously, this impacts performance.
*
* So what we do it to allow the filesystem's get_block() function to set
* BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block
* after this one will require I/O against a block which is probably close to
* this one. So you should push what I/O you have currently accumulated.
*
* This all causes the disk requests to be issued in the correct order.
*/
int
mpage_readpages(struct address_space *mapping, struct list_head *pages,
unsigned nr_pages, get_block_t get_block)
{
struct mpage_readpage_args args = {
.get_block = get_block,
.is_readahead = true,
};
unsigned page_idx;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = lru_to_page(pages);
prefetchw(&page->flags);
list_del(&page->lru);
if (!add_to_page_cache_lru(page, mapping,
page->index,
readahead_gfp_mask(mapping))) {
args.page = page;
args.nr_pages = nr_pages - page_idx;
args.bio = do_mpage_readpage(&args);
}
put_page(page);
}
BUG_ON(!list_empty(pages));
if (args.bio)
mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio);
return 0;
}
EXPORT_SYMBOL(mpage_readpages);
/*
* This isn't called much at all
*/
int mpage_readpage(struct page *page, get_block_t get_block)
{
struct mpage_readpage_args args = {
.page = page,
.nr_pages = 1,
.get_block = get_block,
};
args.bio = do_mpage_readpage(&args);
if (args.bio)
mpage_bio_submit(REQ_OP_READ, 0, args.bio);
return 0;
}
EXPORT_SYMBOL(mpage_readpage);
/*
* Writing is not so simple.
*
* If the page has buffers then they will be used for obtaining the disk
* mapping. We only support pages which are fully mapped-and-dirty, with a
* special case for pages which are unmapped at the end: end-of-file.
*
* If the page has no buffers (preferred) then the page is mapped here.
*
* If all blocks are found to be contiguous then the page can go into the
* BIO. Otherwise fall back to the mapping's writepage().
*
* FIXME: This code wants an estimate of how many pages are still to be
* written, so it can intelligently allocate a suitably-sized BIO. For now,
* just allocate full-size (16-page) BIOs.
*/
struct mpage_data {
struct bio *bio;
sector_t last_block_in_bio;
get_block_t *get_block;
unsigned use_writepage;
};
/*
* We have our BIO, so we can now mark the buffers clean. Make
* sure to only clean buffers which we know we'll be writing.
*/
static void clean_buffers(struct page *page, unsigned first_unmapped)
{
unsigned buffer_counter = 0;
struct buffer_head *bh, *head;
if (!page_has_buffers(page))
return;
head = page_buffers(page);
bh = head;
do {
if (buffer_counter++ == first_unmapped)
break;
clear_buffer_dirty(bh);
bh = bh->b_this_page;
} while (bh != head);
/*
* we cannot drop the bh if the page is not uptodate or a concurrent
* readpage would fail to serialize with the bh and it would read from
* disk before we reach the platter.
*/
if (buffer_heads_over_limit && PageUptodate(page))
try_to_free_buffers(page);
}
/*
* For situations where we want to clean all buffers attached to a page.
* We don't need to calculate how many buffers are attached to the page,
* we just need to specify a number larger than the maximum number of buffers.
*/
void clean_page_buffers(struct page *page)
{
clean_buffers(page, ~0U);
}
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
struct mpage_data *mpd = data;
struct bio *bio = mpd->bio;
struct address_space *mapping = page->mapping;
struct inode *inode = page->mapping->host;
const unsigned blkbits = inode->i_blkbits;
unsigned long end_index;
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
sector_t last_block;
sector_t block_in_file;
sector_t blocks[MAX_BUF_PER_PAGE];
unsigned page_block;
unsigned first_unmapped = blocks_per_page;
struct block_device *bdev = NULL;
int boundary = 0;
sector_t boundary_block = 0;
struct block_device *boundary_bdev = NULL;
int length;
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
int op_flags = wbc_to_write_flags(wbc);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh = head;
/* If they're all mapped and dirty, do it */
page_block = 0;
do {
BUG_ON(buffer_locked(bh));
if (!buffer_mapped(bh)) {
/*
* unmapped dirty buffers are created by
* __set_page_dirty_buffers -> mmapped data
*/
if (buffer_dirty(bh))
goto confused;
if (first_unmapped == blocks_per_page)
first_unmapped = page_block;
continue;
}
if (first_unmapped != blocks_per_page)
goto confused; /* hole -> non-hole */
if (!buffer_dirty(bh) || !buffer_uptodate(bh))
goto confused;
if (page_block) {
if (bh->b_blocknr != blocks[page_block-1] + 1)
goto confused;
}
blocks[page_block++] = bh->b_blocknr;
boundary = buffer_boundary(bh);
if (boundary) {
boundary_block = bh->b_blocknr;
boundary_bdev = bh->b_bdev;
}
bdev = bh->b_bdev;
} while ((bh = bh->b_this_page) != head);
if (first_unmapped)
goto page_is_mapped;
/*
* Page has buffers, but they are all unmapped. The page was
* created by pagein or read over a hole which was handled by
* block_read_full_page(). If this address_space is also
* using mpage_readpages then this can rarely happen.
*/
goto confused;
}
/*
* The page has no buffers: map it to disk
*/
BUG_ON(!PageUptodate(page));
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
last_block = (i_size - 1) >> blkbits;
map_bh.b_page = page;
for (page_block = 0; page_block < blocks_per_page; ) {
map_bh.b_state = 0;
map_bh.b_size = 1 << blkbits;
if (mpd->get_block(inode, block_in_file, &map_bh, 1))
goto confused;
if (buffer_new(&map_bh))
clean_bdev_bh_alias(&map_bh);
if (buffer_boundary(&map_bh)) {
boundary_block = map_bh.b_blocknr;
boundary_bdev = map_bh.b_bdev;
}
if (page_block) {
if (map_bh.b_blocknr != blocks[page_block-1] + 1)
goto confused;
}
blocks[page_block++] = map_bh.b_blocknr;
boundary = buffer_boundary(&map_bh);
bdev = map_bh.b_bdev;
if (block_in_file == last_block)
break;
block_in_file++;
}
BUG_ON(page_block == 0);
first_unmapped = page_block;
page_is_mapped:
end_index = i_size >> PAGE_SHIFT;
if (page->index >= end_index) {
/*
* The page straddles i_size. It must be zeroed out on each
* and every writepage invocation because it may be mmapped.
* "A file is mapped in multiples of the page size. For a file
* that is not a multiple of the page size, the remaining memory
* is zeroed when mapped, and writes to that region are not
* written out to the file."
*/
unsigned offset = i_size & (PAGE_SIZE - 1);
if (page->index > end_index || !offset)
goto confused;
zero_user_segment(page, offset, PAGE_SIZE);
}
/*
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
alloc_new:
if (bio == NULL) {
if (first_unmapped == blocks_per_page) {
if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
page, wbc))
goto out;
}
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
if (bio == NULL)
goto confused;
wbc_init_bio(wbc, bio);
bio->bi_write_hint = inode->i_write_hint;
}
/*
* Must try to add the page before marking the buffer clean or
* the confused fail path above (OOM) will be very confused when
* it finds all bh marked clean (i.e. it will not write anything)
*/
wbc_account_cgroup_owner(wbc, page, PAGE_SIZE);
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
goto alloc_new;
}
clean_buffers(page, first_unmapped);
BUG_ON(PageWriteback(page));
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
}
} else {
mpd->last_block_in_bio = blocks[blocks_per_page - 1];
}
goto out;
confused:
if (bio)
bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
if (mpd->use_writepage) {
ret = mapping->a_ops->writepage(page, wbc);
} else {
ret = -EAGAIN;
goto out;
}
/*
* The caller has a ref on the inode, so *mapping is stable
*/
mapping_set_error(mapping, ret);
out:
mpd->bio = bio;
return ret;
}
/**
* mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
* @mapping: address space structure to write
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
* @get_block: the filesystem's block mapper function.
* If this is NULL then use a_ops->writepage. Otherwise, go
* direct-to-BIO.
*
* This is a library function, which implements the writepages()
* address_space_operation.
*
* If a page is already under I/O, generic_writepages() skips it, even
* if it's dirty. This is desirable behaviour for memory-cleaning writeback,
* but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
* and msync() need to guarantee that all the data which was dirty at the time
* the call was made get new I/O started against them. If wbc->sync_mode is
* WB_SYNC_ALL then we were called for data integrity and we must wait for
* existing IO to complete.
*/
int
mpage_writepages(struct address_space *mapping,
struct writeback_control *wbc, get_block_t get_block)
{
struct blk_plug plug;
int ret;
blk_start_plug(&plug);
if (!get_block)
ret = generic_writepages(mapping, wbc);
else {
struct mpage_data mpd = {
.bio = NULL,
.last_block_in_bio = 0,
.get_block = get_block,
.use_writepage = 1,
};
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
if (mpd.bio) {
int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
REQ_SYNC : 0);
mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
}
}
blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL(mpage_writepages);
int mpage_writepage(struct page *page, get_block_t get_block,
struct writeback_control *wbc)
{
struct mpage_data mpd = {
.bio = NULL,
.last_block_in_bio = 0,
.get_block = get_block,
.use_writepage = 0,
};
int ret = __mpage_writepage(page, wbc, &mpd);
if (mpd.bio) {
int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
REQ_SYNC : 0);
mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
}
return ret;
}
EXPORT_SYMBOL(mpage_writepage);