c4f92aff87
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmFLBPMACgkQONu9yGCS aT6BIQ//Wb4ZQJtEVvaKnda7vFwe8BoZzPGYZA4Imn9KERDRgHuavEuRfMQtKc2y YHwe/PD2JreuDHcd+Wz32xsdMe045xNvgiE1oGcxq0jNBvhJqANSmVTWpdqAquON cTmwsK3roa7ELC2g1WjrYZDv6CrCggqvbuM9AJ/cLITtd8zerhLdZo+CCDG/28cH EosrWvkBcaGmX+r/IBC86Rt6K2OFQ/3LLbb79L4vjKi5lopsm5CTAmfOfIk8p1gB mGB3PkQZnIqphBfqGXLGuljl4e+zb1SONrugUh78Egom393Ex34oo+RjWEGe9dV2 Stkuqo0GTi85X7JA7SGCA/xgF8A8yvaaLjQBsJsL9+2ji+GW+J7hfn4mE5h8H3Di UBjeLMFJA8Mge8Ng9xUSttvjRdwSTm0jWTS9SOl07w24b0pKYbMrQdWt2eI6CT+/ ytq3nCxNJZKeVcAVH+OJNrbSLYvMy/PgYvGTbzASkNmpAeyNiHOyBz1sRcoiAM9U QCWDdZyaqDKktqEyKHxK3opqPzbnHfZFFlCxR7Gw7vvR+itIGJEh/50RNv2F6vnu wzowrVxe+Bf1h7JiNEqLLVHdiuygRqjH1ygepGM4+3TVF4jYHzDISyrqlA/Se3Pg Hhvlzsbv7PH+KiApwBFjSeHTs5WOrokGMFQ7ZYFDpPkleWiywS0= =50Hk -----END PGP SIGNATURE----- Merge 5.4.148 into android11-5.4-lts Changes in 5.4.148 rtc: tps65910: Correct driver module alias btrfs: wake up async_delalloc_pages waiters after submit btrfs: reset replace target device to allocation state on close blk-zoned: allow zone management send operations without CAP_SYS_ADMIN blk-zoned: allow BLKREPORTZONE without CAP_SYS_ADMIN PCI/MSI: Skip masking MSI-X on Xen PV powerpc/perf/hv-gpci: Fix counter value parsing xen: fix setting of max_pfn in shared_info include/linux/list.h: add a macro to test if entry is pointing to the head 9p/xen: Fix end of loop tests for list_for_each_entry tools/thermal/tmon: Add cross compiling support pinctrl: stmfx: Fix hazardous u8[] to unsigned long cast pinctrl: ingenic: Fix incorrect pull up/down info soc: qcom: aoss: Fix the out of bound usage of cooling_devs soc: aspeed: lpc-ctrl: Fix boundary check for mmap soc: aspeed: p2a-ctrl: Fix boundary check for mmap arm64: head: avoid over-mapping in map_memory crypto: public_key: fix overflow during implicit conversion block: bfq: fix bfq_set_next_ioprio_data() power: supply: max17042: handle fails of reading status register dm crypt: Avoid percpu_counter spinlock contention in crypt_page_alloc() VMCI: fix NULL pointer dereference when unmapping queue pair media: uvc: don't do DMA on stack media: rc-loopback: return number of emitters rather than error Revert "dmaengine: imx-sdma: refine to load context only once" dmaengine: imx-sdma: remove duplicated sdma_load_context libata: add ATA_HORKAGE_NO_NCQ_TRIM for Samsung 860 and 870 SSDs ARM: 9105/1: atags_to_fdt: don't warn about stack size PCI/portdrv: Enable Bandwidth Notification only if port supports it PCI: Restrict ASMedia ASM1062 SATA Max Payload Size Supported PCI: Return ~0 data on pciconfig_read() CAP_SYS_ADMIN failure PCI: xilinx-nwl: Enable the clock through CCF PCI: aardvark: Fix checking for PIO status PCI: aardvark: Increase polling delay to 1.5s while waiting for PIO response PCI: aardvark: Fix masking and unmasking legacy INTx interrupts HID: input: do not report stylus battery state as "full" f2fs: quota: fix potential deadlock scsi: bsg: Remove support for SCSI_IOCTL_SEND_COMMAND IB/hfi1: Adjust pkey entry in index 0 RDMA/iwcm: Release resources if iw_cm module initialization fails docs: Fix infiniband uverbs minor number pinctrl: samsung: Fix pinctrl bank pin count vfio: Use config not menuconfig for VFIO_NOIOMMU powerpc/stacktrace: Include linux/delay.h RDMA/efa: Remove double QP type assignment f2fs: show f2fs instance in printk_ratelimited f2fs: reduce the scope of setting fsck tag when de->name_len is zero openrisc: don't printk() unconditionally dma-debug: fix debugfs initialization order SUNRPC: Fix potential memory corruption scsi: fdomain: Fix error return code in fdomain_probe() pinctrl: single: Fix error return code in pcs_parse_bits_in_pinctrl_entry() scsi: smartpqi: Fix an error code in pqi_get_raid_map() scsi: qedi: Fix error codes in qedi_alloc_global_queues() scsi: qedf: Fix error codes in qedf_alloc_global_queues() powerpc/config: Renable MTD_PHYSMAP_OF scsi: target: avoid per-loop XCOPY buffer allocations HID: i2c-hid: Fix Elan touchpad regression KVM: PPC: Book3S HV Nested: Reflect guest PMU in-use to L0 when guest SPRs are live platform/x86: dell-smbios-wmi: Add missing kfree in error-exit from run_smbios_call fscache: Fix cookie key hashing clk: at91: sam9x60: Don't use audio PLL clk: at91: clk-generated: pass the id of changeable parent at registration clk: at91: clk-generated: Limit the requested rate to our range KVM: PPC: Fix clearing never mapped TCEs in realmode f2fs: fix to account missing .skipped_gc_rwsem f2fs: fix unexpected ENOENT comes from f2fs_map_blocks() f2fs: fix to unmap pages from userspace process in punch_hole() MIPS: Malta: fix alignment of the devicetree buffer kbuild: Fix 'no symbols' warning when CONFIG_TRIM_UNUSD_KSYMS=y userfaultfd: prevent concurrent API initialization drm/amdgpu: Fix amdgpu_ras_eeprom_init() ASoC: atmel: ATMEL drivers don't need HAS_DMA media: dib8000: rewrite the init prbs logic crypto: mxs-dcp - Use sg_mapping_iter to copy data PCI: Use pci_update_current_state() in pci_enable_device_flags() tipc: keep the skb in rcv queue until the whole data is read iio: dac: ad5624r: Fix incorrect handling of an optional regulator. iavf: do not override the adapter state in the watchdog task iavf: fix locking of critical sections ARM: dts: qcom: apq8064: correct clock names video: fbdev: kyro: fix a DoS bug by restricting user input netlink: Deal with ESRCH error in nlmsg_notify() Smack: Fix wrong semantics in smk_access_entry() drm: avoid blocking in drm_clients_info's rcu section igc: Check if num of q_vectors is smaller than max before array access usb: host: fotg210: fix the endpoint's transactional opportunities calculation usb: host: fotg210: fix the actual_length of an iso packet usb: gadget: u_ether: fix a potential null pointer dereference USB: EHCI: ehci-mv: improve error handling in mv_ehci_enable() usb: gadget: composite: Allow bMaxPower=0 if self-powered staging: board: Fix uninitialized spinlock when attaching genpd tty: serial: jsm: hold port lock when reporting modem line changes drm/amd/display: Fix timer_per_pixel unit error drm/amd/amdgpu: Update debugfs link_settings output link_rate field in hex bpf/tests: Fix copy-and-paste error in double word test bpf/tests: Do not PASS tests without actually testing the result video: fbdev: asiliantfb: Error out if 'pixclock' equals zero video: fbdev: kyro: Error out if 'pixclock' equals zero video: fbdev: riva: Error out if 'pixclock' equals zero ipv4: ip_output.c: Fix out-of-bounds warning in ip_copy_addrs() flow_dissector: Fix out-of-bounds warnings s390/jump_label: print real address in a case of a jump label bug s390: make PCI mio support a machine flag serial: 8250: Define RX trigger levels for OxSemi 950 devices xtensa: ISS: don't panic in rs_init hvsi: don't panic on tty_register_driver failure serial: 8250_pci: make setup_port() parameters explicitly unsigned staging: ks7010: Fix the initialization of the 'sleep_status' structure samples: bpf: Fix tracex7 error raised on the missing argument ata: sata_dwc_460ex: No need to call phy_exit() befre phy_init() Bluetooth: skip invalid hci_sync_conn_complete_evt workqueue: Fix possible memory leaks in wq_numa_init() bonding: 3ad: fix the concurrency between __bond_release_one() and bond_3ad_state_machine_handler() arm64: tegra: Fix Tegra194 PCIe EP compatible string ASoC: Intel: bytcr_rt5640: Move "Platform Clock" routes to the maps for the matching in-/output media: imx258: Rectify mismatch of VTS value media: imx258: Limit the max analogue gain to 480 media: v4l2-dv-timings.c: fix wrong condition in two for-loops media: TDA1997x: fix tda1997x_query_dv_timings() return value media: tegra-cec: Handle errors of clk_prepare_enable() ARM: dts: imx53-ppd: Fix ACHC entry arm64: dts: qcom: sdm660: use reg value for memory node net: ethernet: stmmac: Do not use unreachable() in ipq806x_gmac_probe() drm/msm: mdp4: drop vblank get/put from prepare/complete_commit selftests/bpf: Fix xdp_tx.c prog section name Bluetooth: schedule SCO timeouts with delayed_work Bluetooth: avoid circular locks in sco_sock_connect net/mlx5: Fix variable type to match 64bit gpu: drm: amd: amdgpu: amdgpu_i2c: fix possible uninitialized-variable access in amdgpu_i2c_router_select_ddc_port() drm/display: fix possible null-pointer dereference in dcn10_set_clock() mac80211: Fix monitor MTU limit so that A-MSDUs get through ARM: tegra: tamonten: Fix UART pad setting arm64: tegra: Fix compatible string for Tegra132 CPUs arm64: dts: ls1046a: fix eeprom entries nvme-tcp: don't check blk_mq_tag_to_rq when receiving pdu data Bluetooth: Fix handling of LE Enhanced Connection Complete opp: Don't print an error if required-opps is missing serial: sh-sci: fix break handling for sysrq tcp: enable data-less, empty-cookie SYN with TFO_SERVER_COOKIE_NOT_REQD rpc: fix gss_svc_init cleanup on failure staging: rts5208: Fix get_ms_information() heap buffer size gfs2: Don't call dlm after protocol is unmounted usb: chipidea: host: fix port index underflow and UBSAN complains lockd: lockd server-side shouldn't set fl_ops drm/exynos: Always initialize mapping in exynos_drm_register_dma() m68knommu: only set CONFIG_ISA_DMA_API for ColdFire sub-arch btrfs: tree-log: check btrfs_lookup_data_extent return value ASoC: Intel: Skylake: Fix module configuration for KPB and MIXER ASoC: Intel: Skylake: Fix passing loadable flag for module of: Don't allow __of_attached_node_sysfs() without CONFIG_SYSFS mmc: sdhci-of-arasan: Check return value of non-void funtions mmc: rtsx_pci: Fix long reads when clock is prescaled selftests/bpf: Enlarge select() timeout for test_maps mmc: core: Return correct emmc response in case of ioctl error cifs: fix wrong release in sess_alloc_buffer() failed path Revert "USB: xhci: fix U1/U2 handling for hardware with XHCI_INTEL_HOST quirk set" usb: musb: musb_dsps: request_irq() after initializing musb usbip: give back URBs for unsent unlink requests during cleanup usbip:vhci_hcd USB port can get stuck in the disabled state ASoC: rockchip: i2s: Fix regmap_ops hang ASoC: rockchip: i2s: Fixup config for DAIFMT_DSP_A/B drm/amdkfd: Account for SH/SE count when setting up cu masks. iwlwifi: mvm: fix a memory leak in iwl_mvm_mac_ctxt_beacon_changed iwlwifi: mvm: avoid static queue number aliasing iwlwifi: mvm: fix access to BSS elements net/mlx5: DR, Enable QP retransmission parport: remove non-zero check on count ath9k: fix OOB read ar9300_eeprom_restore_internal ath9k: fix sleeping in atomic context net: fix NULL pointer reference in cipso_v4_doi_free fix array-index-out-of-bounds in taprio_change net: w5100: check return value after calling platform_get_resource() parisc: fix crash with signals and alloca ovl: fix BUG_ON() in may_delete() when called from ovl_cleanup() scsi: BusLogic: Fix missing pr_cont() use scsi: qla2xxx: Changes to support kdump kernel scsi: qla2xxx: Sync queue idx with queue_pair_map idx cpufreq: powernv: Fix init_chip_info initialization in numa=off s390/pv: fix the forcing of the swiotlb mm/hugetlb: initialize hugetlb_usage in mm_init mm,vmscan: fix divide by zero in get_scan_count memcg: enable accounting for pids in nested pid namespaces platform/chrome: cros_ec_proto: Send command again when timeout occurs lib/test_stackinit: Fix static initializer test net: dsa: lantiq_gswip: fix maximum frame length drm/msi/mdp4: populate priv->kms in mdp4_kms_init drm/amdgpu: Fix BUG_ON assert drm/panfrost: Simplify lock_region calculation drm/panfrost: Use u64 for size in lock_region drm/panfrost: Clamp lock region to Bifrost minimum btrfs: fix upper limit for max_inline for page size 64K xen: reset legacy rtc flag for PV domU bnx2x: Fix enabling network interfaces without VFs arm64/sve: Use correct size when reinitialising SVE state PM: base: power: don't try to use non-existing RTC for storing data PCI: Add AMD GPU multi-function power dependencies drm/amd/amdgpu: Increase HWIP_MAX_INSTANCE to 10 drm/etnaviv: return context from etnaviv_iommu_context_get drm/etnaviv: put submit prev MMU context when it exists drm/etnaviv: stop abusing mmu_context as FE running marker drm/etnaviv: keep MMU context across runtime suspend/resume drm/etnaviv: exec and MMU state is lost when resetting the GPU drm/etnaviv: fix MMU context leak on GPU reset drm/etnaviv: reference MMU context when setting up hardware state drm/etnaviv: add missing MMU context put when reaping MMU mapping s390/sclp: fix Secure-IPL facility detection x86/mm: Fix kern_addr_valid() to cope with existing but not present entries tipc: fix an use-after-free issue in tipc_recvmsg net-caif: avoid user-triggerable WARN_ON(1) ptp: dp83640: don't define PAGE0 dccp: don't duplicate ccid when cloning dccp sock net/l2tp: Fix reference count leak in l2tp_udp_recv_core r6040: Restore MDIO clock frequency after MAC reset tipc: increase timeout in tipc_sk_enqueue() perf machine: Initialize srcline string member in add_location struct net/mlx5: FWTrace, cancel work on alloc pd error flow net/mlx5: Fix potential sleeping in atomic context events: Reuse value read using READ_ONCE instead of re-reading it vhost_net: fix OoB on sendmsg() failure. net/af_unix: fix a data-race in unix_dgram_poll net: dsa: destroy the phylink instance on any error in dsa_slave_phy_setup tcp: fix tp->undo_retrans accounting in tcp_sacktag_one() qed: Handle management FW error dt-bindings: arm: Fix Toradex compatible typo ibmvnic: check failover_pending in login response KVM: PPC: Book3S HV: Tolerate treclaim. in fake-suspend mode changing registers net: hns3: pad the short tunnel frame before sending to hardware net: hns3: change affinity_mask to numa node range net: hns3: disable mac in flr process net: hns3: fix the timing issue of VF clearing interrupt sources mm/memory_hotplug: use "unsigned long" for PFN in zone_for_pfn_range() dt-bindings: mtd: gpmc: Fix the ECC bytes vs. OOB bytes equation mfd: db8500-prcmu: Adjust map to reality PCI: Add ACS quirks for NXP LX2xx0 and LX2xx2 platforms fuse: fix use after free in fuse_read_interrupt() mfd: Don't use irq_create_mapping() to resolve a mapping tracing/probes: Reject events which have the same name of existing one PCI: Add ACS quirks for Cavium multi-function devices Set fc_nlinfo in nh_create_ipv4, nh_create_ipv6 net: usb: cdc_mbim: avoid altsetting toggling for Telit LN920 block, bfq: honor already-setup queue merges PCI: ibmphp: Fix double unmap of io_mem ethtool: Fix an error code in cxgb2.c NTB: Fix an error code in ntb_msit_probe() NTB: perf: Fix an error code in perf_setup_inbuf() mfd: axp20x: Update AXP288 volatile ranges PCI: Fix pci_dev_str_match_path() alloc while atomic bug mfd: tqmx86: Clear GPIO IRQ resource when no IRQ is set KVM: arm64: Handle PSCI resets before userspace touches vCPU state PCI: Sync __pci_register_driver() stub for CONFIG_PCI=n mtd: rawnand: cafe: Fix a resource leak in the error handling path of 'cafe_nand_probe()' ARC: export clear_user_page() for modules perf unwind: Do not overwrite FEATURE_CHECK_LDFLAGS-libunwind-{x86,aarch64} net: dsa: b53: Fix calculating number of switch ports netfilter: socket: icmp6: fix use-after-scope fq_codel: reject silly quantum parameters qlcnic: Remove redundant unlock in qlcnic_pinit_from_rom ip_gre: validate csum_start only on pull net: renesas: sh_eth: Fix freeing wrong tx descriptor s390/bpf: Fix optimizing out zero-extensions s390/bpf: Fix 64-bit subtraction of the -0x80000000 constant Linux 5.4.148 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I8613b511cb543a7ce0d1623663fc1306aaa45af1
475 lines
12 KiB
C
475 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Zoned block device handling
|
|
*
|
|
* Copyright (c) 2015, Hannes Reinecke
|
|
* Copyright (c) 2015, SUSE Linux GmbH
|
|
*
|
|
* Copyright (c) 2016, Damien Le Moal
|
|
* Copyright (c) 2016, Western Digital
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/blk-mq.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/sched/mm.h>
|
|
|
|
#include "blk.h"
|
|
|
|
static inline sector_t blk_zone_start(struct request_queue *q,
|
|
sector_t sector)
|
|
{
|
|
sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
|
|
|
|
return sector & ~zone_mask;
|
|
}
|
|
|
|
/*
|
|
* Return true if a request is a write requests that needs zone write locking.
|
|
*/
|
|
bool blk_req_needs_zone_write_lock(struct request *rq)
|
|
{
|
|
if (!rq->q->seq_zones_wlock)
|
|
return false;
|
|
|
|
if (blk_rq_is_passthrough(rq))
|
|
return false;
|
|
|
|
switch (req_op(rq)) {
|
|
case REQ_OP_WRITE_ZEROES:
|
|
case REQ_OP_WRITE_SAME:
|
|
case REQ_OP_WRITE:
|
|
return blk_rq_zone_is_seq(rq);
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
|
|
|
|
void __blk_req_zone_write_lock(struct request *rq)
|
|
{
|
|
if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
|
|
rq->q->seq_zones_wlock)))
|
|
return;
|
|
|
|
WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
|
|
rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
|
|
|
|
void __blk_req_zone_write_unlock(struct request *rq)
|
|
{
|
|
rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
|
|
if (rq->q->seq_zones_wlock)
|
|
WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
|
|
rq->q->seq_zones_wlock));
|
|
}
|
|
EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
|
|
|
|
/**
|
|
* blkdev_nr_zones - Get number of zones
|
|
* @disk: Target gendisk
|
|
*
|
|
* Return the total number of zones of a zoned block device. For a block
|
|
* device without zone capabilities, the number of zones is always 0.
|
|
*/
|
|
unsigned int blkdev_nr_zones(struct gendisk *disk)
|
|
{
|
|
sector_t zone_sectors = blk_queue_zone_sectors(disk->queue);
|
|
|
|
if (!blk_queue_is_zoned(disk->queue))
|
|
return 0;
|
|
return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors);
|
|
}
|
|
EXPORT_SYMBOL_GPL(blkdev_nr_zones);
|
|
|
|
/**
|
|
* blkdev_report_zones - Get zones information
|
|
* @bdev: Target block device
|
|
* @sector: Sector from which to report zones
|
|
* @nr_zones: Maximum number of zones to report
|
|
* @cb: Callback function called for each reported zone
|
|
* @data: Private data for the callback
|
|
*
|
|
* Description:
|
|
* Get zone information starting from the zone containing @sector for at most
|
|
* @nr_zones, and call @cb for each zone reported by the device.
|
|
* To report all zones in a device starting from @sector, the BLK_ALL_ZONES
|
|
* constant can be passed to @nr_zones.
|
|
* Returns the number of zones reported by the device, or a negative errno
|
|
* value in case of failure.
|
|
*
|
|
* Note: The caller must use memalloc_noXX_save/restore() calls to control
|
|
* memory allocations done within this function.
|
|
*/
|
|
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
|
|
unsigned int nr_zones, report_zones_cb cb, void *data)
|
|
{
|
|
struct gendisk *disk = bdev->bd_disk;
|
|
sector_t capacity = get_capacity(disk);
|
|
|
|
if (!blk_queue_is_zoned(bdev_get_queue(bdev)) ||
|
|
WARN_ON_ONCE(!disk->fops->report_zones))
|
|
return -EOPNOTSUPP;
|
|
|
|
if (!nr_zones || sector >= capacity)
|
|
return 0;
|
|
|
|
return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
|
|
}
|
|
EXPORT_SYMBOL_GPL(blkdev_report_zones);
|
|
|
|
static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
|
|
sector_t sector,
|
|
sector_t nr_sectors)
|
|
{
|
|
if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
|
|
return false;
|
|
|
|
/*
|
|
* REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
|
|
* of the applicable zone range is the entire disk.
|
|
*/
|
|
return !sector && nr_sectors == get_capacity(bdev->bd_disk);
|
|
}
|
|
|
|
/**
|
|
* blkdev_zone_mgmt - Execute a zone management operation on a range of zones
|
|
* @bdev: Target block device
|
|
* @op: Operation to be performed on the zones
|
|
* @sector: Start sector of the first zone to operate on
|
|
* @nr_sectors: Number of sectors, should be at least the length of one zone and
|
|
* must be zone size aligned.
|
|
* @gfp_mask: Memory allocation flags (for bio_alloc)
|
|
*
|
|
* Description:
|
|
* Perform the specified operation on the range of zones specified by
|
|
* @sector..@sector+@nr_sectors. Specifying the entire disk sector range
|
|
* is valid, but the specified range should not contain conventional zones.
|
|
* The operation to execute on each zone can be a zone reset, open, close
|
|
* or finish request.
|
|
*/
|
|
int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
|
|
sector_t sector, sector_t nr_sectors,
|
|
gfp_t gfp_mask)
|
|
{
|
|
struct request_queue *q = bdev_get_queue(bdev);
|
|
sector_t zone_sectors = blk_queue_zone_sectors(q);
|
|
sector_t capacity = get_capacity(bdev->bd_disk);
|
|
sector_t end_sector = sector + nr_sectors;
|
|
struct bio *bio = NULL;
|
|
int ret;
|
|
|
|
if (!blk_queue_is_zoned(q))
|
|
return -EOPNOTSUPP;
|
|
|
|
if (bdev_read_only(bdev))
|
|
return -EPERM;
|
|
|
|
if (!op_is_zone_mgmt(op))
|
|
return -EOPNOTSUPP;
|
|
|
|
if (!nr_sectors || end_sector > capacity)
|
|
/* Out of range */
|
|
return -EINVAL;
|
|
|
|
/* Check alignment (handle eventual smaller last zone) */
|
|
if (sector & (zone_sectors - 1))
|
|
return -EINVAL;
|
|
|
|
if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
|
|
return -EINVAL;
|
|
|
|
while (sector < end_sector) {
|
|
bio = blk_next_bio(bio, 0, gfp_mask);
|
|
bio_set_dev(bio, bdev);
|
|
|
|
/*
|
|
* Special case for the zone reset operation that reset all
|
|
* zones, this is useful for applications like mkfs.
|
|
*/
|
|
if (op == REQ_OP_ZONE_RESET &&
|
|
blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
|
|
bio->bi_opf = REQ_OP_ZONE_RESET_ALL;
|
|
break;
|
|
}
|
|
|
|
bio->bi_opf = op;
|
|
bio->bi_iter.bi_sector = sector;
|
|
sector += zone_sectors;
|
|
|
|
/* This may take a while, so be nice to others */
|
|
cond_resched();
|
|
}
|
|
|
|
ret = submit_bio_wait(bio);
|
|
bio_put(bio);
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
|
|
|
|
struct zone_report_args {
|
|
struct blk_zone __user *zones;
|
|
};
|
|
|
|
static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
|
|
void *data)
|
|
{
|
|
struct zone_report_args *args = data;
|
|
|
|
if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
|
|
return -EFAULT;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* BLKREPORTZONE ioctl processing.
|
|
* Called from blkdev_ioctl.
|
|
*/
|
|
int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
|
unsigned int cmd, unsigned long arg)
|
|
{
|
|
void __user *argp = (void __user *)arg;
|
|
struct zone_report_args args;
|
|
struct request_queue *q;
|
|
struct blk_zone_report rep;
|
|
int ret;
|
|
|
|
if (!argp)
|
|
return -EINVAL;
|
|
|
|
q = bdev_get_queue(bdev);
|
|
if (!q)
|
|
return -ENXIO;
|
|
|
|
if (!blk_queue_is_zoned(q))
|
|
return -ENOTTY;
|
|
|
|
if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
|
|
return -EFAULT;
|
|
|
|
if (!rep.nr_zones)
|
|
return -EINVAL;
|
|
|
|
args.zones = argp + sizeof(struct blk_zone_report);
|
|
ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
|
|
blkdev_copy_zone_to_user, &args);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
rep.nr_zones = ret;
|
|
if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
|
|
return -EFAULT;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
|
|
* Called from blkdev_ioctl.
|
|
*/
|
|
int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
|
|
unsigned int cmd, unsigned long arg)
|
|
{
|
|
void __user *argp = (void __user *)arg;
|
|
struct request_queue *q;
|
|
struct blk_zone_range zrange;
|
|
enum req_opf op;
|
|
|
|
if (!argp)
|
|
return -EINVAL;
|
|
|
|
q = bdev_get_queue(bdev);
|
|
if (!q)
|
|
return -ENXIO;
|
|
|
|
if (!blk_queue_is_zoned(q))
|
|
return -ENOTTY;
|
|
|
|
if (!(mode & FMODE_WRITE))
|
|
return -EBADF;
|
|
|
|
if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
|
|
return -EFAULT;
|
|
|
|
switch (cmd) {
|
|
case BLKRESETZONE:
|
|
op = REQ_OP_ZONE_RESET;
|
|
break;
|
|
case BLKOPENZONE:
|
|
op = REQ_OP_ZONE_OPEN;
|
|
break;
|
|
case BLKCLOSEZONE:
|
|
op = REQ_OP_ZONE_CLOSE;
|
|
break;
|
|
case BLKFINISHZONE:
|
|
op = REQ_OP_ZONE_FINISH;
|
|
break;
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
|
|
return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
|
|
GFP_KERNEL);
|
|
}
|
|
|
|
static inline unsigned long *blk_alloc_zone_bitmap(int node,
|
|
unsigned int nr_zones)
|
|
{
|
|
return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
|
|
GFP_NOIO, node);
|
|
}
|
|
|
|
void blk_queue_free_zone_bitmaps(struct request_queue *q)
|
|
{
|
|
kfree(q->conv_zones_bitmap);
|
|
q->conv_zones_bitmap = NULL;
|
|
kfree(q->seq_zones_wlock);
|
|
q->seq_zones_wlock = NULL;
|
|
}
|
|
|
|
struct blk_revalidate_zone_args {
|
|
struct gendisk *disk;
|
|
unsigned long *conv_zones_bitmap;
|
|
unsigned long *seq_zones_wlock;
|
|
unsigned int nr_zones;
|
|
sector_t zone_sectors;
|
|
sector_t sector;
|
|
};
|
|
|
|
/*
|
|
* Helper function to check the validity of zones of a zoned block device.
|
|
*/
|
|
static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
|
void *data)
|
|
{
|
|
struct blk_revalidate_zone_args *args = data;
|
|
struct gendisk *disk = args->disk;
|
|
struct request_queue *q = disk->queue;
|
|
sector_t capacity = get_capacity(disk);
|
|
|
|
/*
|
|
* All zones must have the same size, with the exception on an eventual
|
|
* smaller last zone.
|
|
*/
|
|
if (zone->start == 0) {
|
|
if (zone->len == 0 || !is_power_of_2(zone->len)) {
|
|
pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
|
|
disk->disk_name, zone->len);
|
|
return -ENODEV;
|
|
}
|
|
|
|
args->zone_sectors = zone->len;
|
|
args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
|
|
} else if (zone->start + args->zone_sectors < capacity) {
|
|
if (zone->len != args->zone_sectors) {
|
|
pr_warn("%s: Invalid zoned device with non constant zone size\n",
|
|
disk->disk_name);
|
|
return -ENODEV;
|
|
}
|
|
} else {
|
|
if (zone->len > args->zone_sectors) {
|
|
pr_warn("%s: Invalid zoned device with larger last zone size\n",
|
|
disk->disk_name);
|
|
return -ENODEV;
|
|
}
|
|
}
|
|
|
|
/* Check for holes in the zone report */
|
|
if (zone->start != args->sector) {
|
|
pr_warn("%s: Zone gap at sectors %llu..%llu\n",
|
|
disk->disk_name, args->sector, zone->start);
|
|
return -ENODEV;
|
|
}
|
|
|
|
/* Check zone type */
|
|
switch (zone->type) {
|
|
case BLK_ZONE_TYPE_CONVENTIONAL:
|
|
if (!args->conv_zones_bitmap) {
|
|
args->conv_zones_bitmap =
|
|
blk_alloc_zone_bitmap(q->node, args->nr_zones);
|
|
if (!args->conv_zones_bitmap)
|
|
return -ENOMEM;
|
|
}
|
|
set_bit(idx, args->conv_zones_bitmap);
|
|
break;
|
|
case BLK_ZONE_TYPE_SEQWRITE_REQ:
|
|
case BLK_ZONE_TYPE_SEQWRITE_PREF:
|
|
if (!args->seq_zones_wlock) {
|
|
args->seq_zones_wlock =
|
|
blk_alloc_zone_bitmap(q->node, args->nr_zones);
|
|
if (!args->seq_zones_wlock)
|
|
return -ENOMEM;
|
|
}
|
|
break;
|
|
default:
|
|
pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
|
|
disk->disk_name, (int)zone->type, zone->start);
|
|
return -ENODEV;
|
|
}
|
|
|
|
args->sector += zone->len;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
|
|
* @disk: Target disk
|
|
*
|
|
* Helper function for low-level device drivers to (re) allocate and initialize
|
|
* a disk request queue zone bitmaps. This functions should normally be called
|
|
* within the disk ->revalidate method for blk-mq based drivers. For BIO based
|
|
* drivers only q->nr_zones needs to be updated so that the sysfs exposed value
|
|
* is correct.
|
|
*/
|
|
int blk_revalidate_disk_zones(struct gendisk *disk)
|
|
{
|
|
struct request_queue *q = disk->queue;
|
|
struct blk_revalidate_zone_args args = {
|
|
.disk = disk,
|
|
};
|
|
unsigned int noio_flag;
|
|
int ret;
|
|
|
|
if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
|
|
return -EIO;
|
|
if (WARN_ON_ONCE(!queue_is_mq(q)))
|
|
return -EIO;
|
|
|
|
/*
|
|
* Ensure that all memory allocations in this context are done as if
|
|
* GFP_NOIO was specified.
|
|
*/
|
|
noio_flag = memalloc_noio_save();
|
|
ret = disk->fops->report_zones(disk, 0, UINT_MAX,
|
|
blk_revalidate_zone_cb, &args);
|
|
memalloc_noio_restore(noio_flag);
|
|
|
|
/*
|
|
* Install the new bitmaps and update nr_zones only once the queue is
|
|
* stopped and all I/Os are completed (i.e. a scheduler is not
|
|
* referencing the bitmaps).
|
|
*/
|
|
blk_mq_freeze_queue(q);
|
|
if (ret >= 0) {
|
|
blk_queue_chunk_sectors(q, args.zone_sectors);
|
|
q->nr_zones = args.nr_zones;
|
|
swap(q->seq_zones_wlock, args.seq_zones_wlock);
|
|
swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
|
|
ret = 0;
|
|
} else {
|
|
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
|
|
blk_queue_free_zone_bitmaps(q);
|
|
}
|
|
blk_mq_unfreeze_queue(q);
|
|
|
|
kfree(args.seq_zones_wlock);
|
|
kfree(args.conv_zones_bitmap);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
|