android_kernel_xiaomi_sm8350/mm/memory_hotplug.c
Srinivasarao Pathipati 16e939c6e4 Merge android11-5.4.161+ (b9d179c) into msm-5.4
* refs/heads/tmp-b9d179c:
  UPSTREAM: driver core: Fix possible memory leak in device_link_add()
  UPSTREAM: blk-mq: fix kernel panic during iterating over flush request
  UPSTREAM: net: xfrm: fix memory leak in xfrm_user_rcv_msg
  UPSTREAM: binder: fix the missing BR_FROZEN_REPLY in binder_return_strings
  ANDROID: incremental-fs: fix mount_fs issue
  UPSTREAM: vfs: fs_context: fix up param length parsing in legacy_parse_param
  ANDROID: GKI: disable CONFIG_FORTIFY_SOURCE
  Linux 5.4.161
  erofs: fix unsafe pagevec reuse of hooked pclusters
  erofs: remove the occupied parameter from z_erofs_pagevec_enqueue()
  PCI: Add MSI masking quirk for Nvidia ION AHCI
  PCI/MSI: Deal with devices lying about their MSI mask capability
  PCI/MSI: Destroy sysfs before freeing entries
  parisc/entry: fix trace test in syscall exit path
  fortify: Explicitly disable Clang support
  scsi: ufs: Fix tm request when non-fatal error happens
  ext4: fix lazy initialization next schedule time computation in more granular unit
  MIPS: Fix assembly error from MIPSr2 code used within MIPS_ISA_ARCH_LEVEL
  scsi: ufs: Fix interrupt error message for shared interrupts
  soc/tegra: pmc: Fix imbalanced clock disabling in error code path
  Revert "net: sched: update default qdisc visibility after Tx queue cnt changes"
  Revert "serial: core: Fix initializing and restoring termios speed"
  Linux 5.4.160
  selftests/bpf: Fix also no-alu32 strobemeta selftest
  ath10k: fix invalid dma_addr_t token assignment
  SUNRPC: Partial revert of commit 6f9f17287e78
  PCI: Add PCI_EXP_DEVCTL_PAYLOAD_* macros
  powerpc/powernv/prd: Unregister OPAL_MSG_PRD2 notifier during module unload
  s390/cio: make ccw_device_dma_* more robust
  s390/tape: fix timer initialization in tape_std_assign()
  s390/cio: check the subchannel validity for dev_busid
  video: backlight: Drop maximum brightness override for brightness zero
  mm, oom: do not trigger out_of_memory from the #PF
  mm, oom: pagefault_out_of_memory: don't force global OOM for dying tasks
  powerpc/bpf: Emit stf barrier instruction sequences for BPF_NOSPEC
  powerpc/security: Add a helper to query stf_barrier type
  powerpc/bpf: Fix BPF_SUB when imm == 0x80000000
  powerpc/bpf: Validate branch ranges
  powerpc/lib: Add helper to check if offset is within conditional branch range
  ovl: fix deadlock in splice write
  9p/net: fix missing error check in p9_check_errors
  net, neigh: Enable state migration between NUD_PERMANENT and NTF_USE
  f2fs: should use GFP_NOFS for directory inodes
  irqchip/sifive-plic: Fixup EOI failed when masked
  parisc: Fix set_fixmap() on PA1.x CPUs
  parisc: Fix backtrace to always include init funtion names
  ARM: 9156/1: drop cc-option fallbacks for architecture selection
  ARM: 9155/1: fix early early_iounmap()
  selftests/net: udpgso_bench_rx: fix port argument
  cxgb4: fix eeprom len when diagnostics not implemented
  net/smc: fix sk_refcnt underflow on linkdown and fallback
  vsock: prevent unnecessary refcnt inc for nonblocking connect
  net: hns3: allow configure ETS bandwidth of all TCs
  net/sched: sch_taprio: fix undefined behavior in ktime_mono_to_any
  bpf: sockmap, strparser, and tls are reusing qdisc_skb_cb and colliding
  arm64: pgtable: make __pte_to_phys/__phys_to_pte_val inline functions
  nfc: pn533: Fix double free when pn533_fill_fragment_skbs() fails
  llc: fix out-of-bound array index in llc_sk_dev_hash()
  perf bpf: Add missing free to bpf_event__print_bpf_prog_info()
  zram: off by one in read_block_state()
  mm/zsmalloc.c: close race window between zs_pool_dec_isolated() and zs_unregister_migration()
  bonding: Fix a use-after-free problem when bond_sysfs_slave_add() failed
  ACPI: PMIC: Fix intel_pmic_regs_handler() read accesses
  net: vlan: fix a UAF in vlan_dev_real_dev()
  net: davinci_emac: Fix interrupt pacing disable
  xen-pciback: Fix return in pm_ctrl_init()
  i2c: xlr: Fix a resource leak in the error handling path of 'xlr_i2c_probe()'
  NFSv4: Fix a regression in nfs_set_open_stateid_locked()
  scsi: qla2xxx: Turn off target reset during issue_lip
  scsi: qla2xxx: Fix gnl list corruption
  ar7: fix kernel builds for compiler test
  watchdog: f71808e_wdt: fix inaccurate report in WDIOC_GETTIMEOUT
  m68k: set a default value for MEMORY_RESERVE
  signal/sh: Use force_sig(SIGKILL) instead of do_group_exit(SIGKILL)
  dmaengine: dmaengine_desc_callback_valid(): Check for `callback_result`
  netfilter: nfnetlink_queue: fix OOB when mac header was cleared
  soc: fsl: dpaa2-console: free buffer before returning from dpaa2_console_read
  auxdisplay: ht16k33: Fix frame buffer device blanking
  auxdisplay: ht16k33: Connect backlight to fbdev
  auxdisplay: img-ascii-lcd: Fix lock-up when displaying empty string
  dmaengine: at_xdmac: fix AT_XDMAC_CC_PERID() macro
  mtd: core: don't remove debugfs directory if device is in use
  mtd: spi-nor: hisi-sfc: Remove excessive clk_disable_unprepare()
  fs: orangefs: fix error return code of orangefs_revalidate_lookup()
  NFS: Fix deadlocks in nfs_scan_commit_list()
  opp: Fix return in _opp_add_static_v2()
  PCI: aardvark: Fix preserving PCI_EXP_RTCTL_CRSSVE flag on emulated bridge
  PCI: aardvark: Don't spam about PIO Response Status
  drm/plane-helper: fix uninitialized variable reference
  pnfs/flexfiles: Fix misplaced barrier in nfs4_ff_layout_prepare_ds
  rpmsg: Fix rpmsg_create_ept return when RPMSG config is not defined
  apparmor: fix error check
  power: supply: bq27xxx: Fix kernel crash on IRQ handler register error
  mips: cm: Convert to bitfield API to fix out-of-bounds access
  powerpc/44x/fsp2: add missing of_node_put
  HID: u2fzero: properly handle timeouts in usb_submit_urb
  HID: u2fzero: clarify error check and length calculations
  serial: xilinx_uartps: Fix race condition causing stuck TX
  phy: qcom-qusb2: Fix a memory leak on probe
  ASoC: cs42l42: Defer probe if request_threaded_irq() returns EPROBE_DEFER
  ASoC: cs42l42: Correct some register default values
  ARM: dts: stm32: fix SAI sub nodes register range
  staging: ks7010: select CRYPTO_HASH/CRYPTO_MICHAEL_MIC
  RDMA/mlx4: Return missed an error if device doesn't support steering
  scsi: csiostor: Uninitialized data in csio_ln_vnp_read_cbfn()
  power: supply: rt5033_battery: Change voltage values to µV
  usb: gadget: hid: fix error code in do_config()
  serial: 8250_dw: Drop wrong use of ACPI_PTR()
  video: fbdev: chipsfb: use memset_io() instead of memset()
  clk: at91: check pmc node status before registering syscore ops
  memory: fsl_ifc: fix leak of irq and nand_irq in fsl_ifc_ctrl_probe
  soc/tegra: Fix an error handling path in tegra_powergate_power_up()
  arm: dts: omap3-gta04a4: accelerometer irq fix
  ALSA: hda: Reduce udelay() at SKL+ position reporting
  JFS: fix memleak in jfs_mount
  MIPS: loongson64: make CPU_LOONGSON64 depends on MIPS_FP_SUPPORT
  scsi: dc395: Fix error case unwinding
  ARM: dts: at91: tse850: the emac<->phy interface is rmii
  arm64: dts: meson-g12a: Fix the pwm regulator supply properties
  RDMA/bnxt_re: Fix query SRQ failure
  ARM: dts: qcom: msm8974: Add xo_board reference clock to DSI0 PHY
  arm64: dts: rockchip: Fix GPU register width for RK3328
  ARM: s3c: irq-s3c24xx: Fix return value check for s3c24xx_init_intc()
  clk: mvebu: ap-cpu-clk: Fix a memory leak in error handling paths
  RDMA/rxe: Fix wrong port_cap_flags
  ibmvnic: Process crqs after enabling interrupts
  ibmvnic: don't stop queue in xmit
  udp6: allow SO_MARK ctrl msg to affect routing
  selftests/bpf: Fix fclose/pclose mismatch in test_progs
  crypto: pcrypt - Delay write to padata->info
  net: phylink: avoid mvneta warning when setting pause parameters
  net: amd-xgbe: Toggle PLL settings during rate change
  drm/amdgpu/gmc6: fix DMA mask from 44 to 40 bits
  wcn36xx: add proper DMA memory barriers in rx path
  libertas: Fix possible memory leak in probe and disconnect
  libertas_tf: Fix possible memory leak in probe and disconnect
  KVM: s390: Fix handle_sske page fault handling
  samples/kretprobes: Fix return value if register_kretprobe() failed
  tcp: don't free a FIN sk_buff in tcp_remove_empty_skb()
  irq: mips: avoid nested irq_enter()
  s390/gmap: don't unconditionally call pte_unmap_unlock() in __gmap_zap()
  libbpf: Fix BTF data layout checks and allow empty BTF
  smackfs: use netlbl_cfg_cipsov4_del() for deleting cipso_v4_doi
  drm/msm: Fix potential NULL dereference in DPU SSPP
  clocksource/drivers/timer-ti-dm: Select TIMER_OF
  PM: hibernate: fix sparse warnings
  nvme-rdma: fix error code in nvme_rdma_setup_ctrl
  phy: micrel: ksz8041nl: do not use power down mode
  mwifiex: Send DELBA requests according to spec
  rsi: stop thread firstly in rsi_91x_init() error handling
  mt76: mt76x02: fix endianness warnings in mt76x02_mac.c
  platform/x86: thinkpad_acpi: Fix bitwise vs. logical warning
  block: ataflop: fix breakage introduced at blk-mq refactoring
  mmc: mxs-mmc: disable regulator on error and in the remove function
  net: stream: don't purge sk_error_queue in sk_stream_kill_queues()
  drm/msm: uninitialized variable in msm_gem_import()
  ath10k: fix max antenna gain unit
  hwmon: (pmbus/lm25066) Let compiler determine outer dimension of lm25066_coeff
  hwmon: Fix possible memleak in __hwmon_device_register()
  net, neigh: Fix NTF_EXT_LEARNED in combination with NTF_USE
  memstick: jmb38x_ms: use appropriate free function in jmb38x_ms_alloc_host()
  memstick: avoid out-of-range warning
  mmc: sdhci-omap: Fix NULL pointer exception if regulator is not configured
  b43: fix a lower bounds test
  b43legacy: fix a lower bounds test
  hwrng: mtk - Force runtime pm ops for sleep ops
  crypto: qat - disregard spurious PFVF interrupts
  crypto: qat - detect PFVF collision after ACK
  media: dvb-frontends: mn88443x: Handle errors of clk_prepare_enable()
  netfilter: nft_dynset: relax superfluous check on set updates
  EDAC/amd64: Handle three rank interleaving mode
  ath9k: Fix potential interrupt storm on queue reset
  media: em28xx: Don't use ops->suspend if it is NULL
  cpuidle: Fix kobject memory leaks in error paths
  crypto: ecc - fix CRYPTO_DEFAULT_RNG dependency
  kprobes: Do not use local variable when creating debugfs file
  media: cx23885: Fix snd_card_free call on null card pointer
  media: tm6000: Avoid card name truncation
  media: si470x: Avoid card name truncation
  media: radio-wl1273: Avoid card name truncation
  media: mtk-vpu: Fix a resource leak in the error handling path of 'mtk_vpu_probe()'
  media: TDA1997x: handle short reads of hdmi info frame.
  media: dvb-usb: fix ununit-value in az6027_rc_query
  media: cxd2880-spi: Fix a null pointer dereference on error handling path
  media: em28xx: add missing em28xx_close_extension
  drm/amdgpu: fix warning for overflow check
  ath10k: Fix missing frame timestamp for beacon/probe-resp
  net: dsa: rtl8366rb: Fix off-by-one bug
  rxrpc: Fix _usecs_to_jiffies() by using usecs_to_jiffies()
  crypto: caam - disable pkc for non-E SoCs
  Bluetooth: btmtkuart: fix a memleak in mtk_hci_wmt_sync
  wilc1000: fix possible memory leak in cfg_scan_result()
  cgroup: Make rebind_subsystems() disable v2 controllers all at once
  net: net_namespace: Fix undefined member in key_remove_domain()
  virtio-gpu: fix possible memory allocation failure
  drm/v3d: fix wait for TMU write combiner flush
  rcu: Fix existing exp request check in sync_sched_exp_online_cleanup()
  Bluetooth: fix init and cleanup of sco_conn.timeout_work
  selftests/bpf: Fix strobemeta selftest regression
  netfilter: conntrack: set on IPS_ASSURED if flows enters internal stream state
  parisc/kgdb: add kgdb_roundup() to make kgdb work with idle polling
  parisc/unwind: fix unwinder when CONFIG_64BIT is enabled
  task_stack: Fix end_of_stack() for architectures with upwards-growing stack
  parisc: fix warning in flush_tlb_all
  x86/hyperv: Protect set_hv_tscchange_cb() against getting preempted
  spi: bcm-qspi: Fix missing clk_disable_unprepare() on error in bcm_qspi_probe()
  btrfs: do not take the uuid_mutex in btrfs_rm_device
  net: annotate data-race in neigh_output()
  vrf: run conntrack only in context of lower/physdev for locally generated packets
  ARM: 9136/1: ARMv7-M uses BE-8, not BE-32
  gre/sit: Don't generate link-local addr if addr_gen_mode is IN6_ADDR_GEN_MODE_NONE
  ARM: clang: Do not rely on lr register for stacktrace
  smackfs: use __GFP_NOFAIL for smk_cipso_doi()
  iwlwifi: mvm: disable RX-diversity in powersave
  selftests: kvm: fix mismatched fclose() after popen()
  PM: hibernate: Get block device exclusively in swsusp_check()
  nvme: drop scan_lock and always kick requeue list when removing namespaces
  nvmet-tcp: fix use-after-free when a port is removed
  nvmet: fix use-after-free when a port is removed
  block: remove inaccurate requeue check
  mwl8k: Fix use-after-free in mwl8k_fw_state_machine()
  tracing/cfi: Fix cmp_entries_* functions signature mismatch
  workqueue: make sysfs of unbound kworker cpumask more clever
  lib/xz: Validate the value before assigning it to an enum variable
  lib/xz: Avoid overlapping memcpy() with invalid input with in-place decompression
  memstick: r592: Fix a UAF bug when removing the driver
  leaking_addresses: Always print a trailing newline
  ACPI: battery: Accept charges over the design capacity as full
  iov_iter: Fix iov_iter_get_pages{,_alloc} page fault return value
  ath: dfs_pattern_detector: Fix possible null-pointer dereference in channel_detector_create()
  tracefs: Have tracefs directories not set OTH permission bits by default
  net-sysfs: try not to restart the syscall if it will fail eventually
  media: usb: dvd-usb: fix uninit-value bug in dibusb_read_eeprom_byte()
  media: ipu3-imgu: VIDIOC_QUERYCAP: Fix bus_info
  media: ipu3-imgu: imgu_fmt: Handle properly try
  ACPICA: Avoid evaluating methods too early during system resume
  ipmi: Disable some operations during a panic
  media: rcar-csi2: Add checking to rcsi2_start_receiver()
  brcmfmac: Add DMI nvram filename quirk for Cyberbook T116 tablet
  ia64: don't do IA64_CMPXCHG_DEBUG without CONFIG_PRINTK
  media: mceusb: return without resubmitting URB in case of -EPROTO error.
  media: imx: set a media_device bus_info string
  media: s5p-mfc: Add checking to s5p_mfc_probe().
  media: s5p-mfc: fix possible null-pointer dereference in s5p_mfc_probe()
  media: uvcvideo: Set unique vdev name based in type
  media: uvcvideo: Return -EIO for control errors
  media: uvcvideo: Set capability in s_param
  media: stm32: Potential NULL pointer dereference in dcmi_irq_thread()
  media: netup_unidvb: handle interrupt properly according to the firmware
  media: mt9p031: Fix corrupted frame after restarting stream
  ath10k: high latency fixes for beacon buffer
  mwifiex: Properly initialize private structure on interface type changes
  mwifiex: Run SET_BSS_MODE when changing from P2P to STATION vif-type
  x86: Increase exception stack sizes
  smackfs: Fix use-after-free in netlbl_catmap_walk()
  net: sched: update default qdisc visibility after Tx queue cnt changes
  locking/lockdep: Avoid RCU-induced noinstr fail
  MIPS: lantiq: dma: reset correct number of channel
  MIPS: lantiq: dma: add small delay after reset
  platform/x86: wmi: do not fail if disabling fails
  drm/panel-orientation-quirks: add Valve Steam Deck
  Bluetooth: fix use-after-free error in lock_sock_nested()
  Bluetooth: sco: Fix lock_sock() blockage by memcpy_from_msg()
  drm: panel-orientation-quirks: Add quirk for the Samsung Galaxy Book 10.6
  drm: panel-orientation-quirks: Add quirk for KD Kurio Smart C15200 2-in-1
  drm: panel-orientation-quirks: Update the Lenovo Ideapad D330 quirk (v2)
  dma-buf: WARN on dmabuf release with pending attachments
  USB: chipidea: fix interrupt deadlock
  USB: iowarrior: fix control-message timeouts
  USB: serial: keyspan: fix memleak on probe errors
  iio: dac: ad5446: Fix ad5622_write() return value
  pinctrl: core: fix possible memory leak in pinctrl_enable()
  quota: correct error number in free_dqentry()
  quota: check block number when reading the block in quota file
  PCI: aardvark: Read all 16-bits from PCIE_MSI_PAYLOAD_REG
  PCI: aardvark: Fix return value of MSI domain .alloc() method
  PCI: aardvark: Fix reporting Data Link Layer Link Active
  PCI: aardvark: Do not unmask unused interrupts
  PCI: aardvark: Fix checking for link up via LTSSM state
  PCI: aardvark: Do not clear status bits of masked interrupts
  PCI: pci-bridge-emul: Fix emulation of W1C bits
  xen/balloon: add late_initcall_sync() for initial ballooning done
  ALSA: mixer: fix deadlock in snd_mixer_oss_set_volume
  ALSA: mixer: oss: Fix racy access to slots
  serial: core: Fix initializing and restoring termios speed
  powerpc/85xx: Fix oops when mpc85xx_smp_guts_ids node cannot be found
  can: j1939: j1939_can_recv(): ignore messages with invalid source address
  can: j1939: j1939_tp_cmd_recv(): ignore abort message in the BAM transport
  KVM: nVMX: Query current VMCS when determining if MSR bitmaps are in use
  power: supply: max17042_battery: use VFSOC for capacity when no rsns
  power: supply: max17042_battery: Prevent int underflow in set_soc_threshold
  signal/mips: Update (_save|_restore)_fp_context to fail with -EFAULT
  signal: Remove the bogus sigkill_pending in ptrace_stop
  RDMA/qedr: Fix NULL deref for query_qp on the GSI QP
  rsi: Fix module dev_oper_mode parameter description
  rsi: fix rate mask set leading to P2P failure
  rsi: fix key enabled check causing unwanted encryption for vap_id > 0
  rsi: fix occasional initialisation failure with BT coex
  wcn36xx: handle connection loss indication
  libata: fix checking of DMA state
  mwifiex: Read a PCI register after writing the TX ring write pointer
  wcn36xx: Fix HT40 capability for 2Ghz band
  evm: mark evm_fixmode as __ro_after_init
  rtl8187: fix control-message timeouts
  PCI: Mark Atheros QCA6174 to avoid bus reset
  ath10k: fix division by zero in send path
  ath10k: fix control-message timeout
  ath6kl: fix control-message timeout
  ath6kl: fix division by zero in send path
  mwifiex: fix division by zero in fw download path
  EDAC/sb_edac: Fix top-of-high-memory value for Broadwell/Haswell
  regulator: dt-bindings: samsung,s5m8767: correct s5m8767,pmic-buck-default-dvs-idx property
  regulator: s5m8767: do not use reset value as DVS voltage if GPIO DVS is disabled
  hwmon: (pmbus/lm25066) Add offset coefficients
  ia64: kprobes: Fix to pass correct trampoline address to the handler
  btrfs: call btrfs_check_rw_degradable only if there is a missing device
  btrfs: fix lost error handling when replaying directory deletes
  btrfs: clear MISSING device status bit in btrfs_close_one_device
  net/smc: Correct spelling mistake to TCPF_SYN_RECV
  nfp: bpf: relax prog rejection for mtu check through max_pkt_offset
  vmxnet3: do not stop tx queues after netif_device_detach()
  r8169: Add device 10ec:8162 to driver r8169
  nvmet-tcp: fix header digest verification
  drm: panel-orientation-quirks: Add quirk for GPD Win3
  watchdog: Fix OMAP watchdog early handling
  net: multicast: calculate csum of looped-back and forwarded packets
  spi: spl022: fix Microwire full duplex mode
  nvmet-tcp: fix a memory leak when releasing a queue
  xen/netfront: stop tx queues during live migration
  bpf: Prevent increasing bpf_jit_limit above max
  bpf: Define bpf_jit_alloc_exec_limit for arm64 JIT
  drm: panel-orientation-quirks: Add quirk for Aya Neo 2021
  mmc: winbond: don't build on M68K
  reset: socfpga: add empty driver allowing consumers to probe
  ARM: dts: sun7i: A20-olinuxino-lime2: Fix ethernet phy-mode
  hyperv/vmbus: include linux/bitops.h
  sfc: Don't use netif_info before net_device setup
  cavium: Fix return values of the probe function
  scsi: qla2xxx: Fix unmap of already freed sgl
  scsi: qla2xxx: Return -ENOMEM if kzalloc() fails
  cavium: Return negative value when pci_alloc_irq_vectors() fails
  x86/irq: Ensure PI wakeup handler is unregistered before module unload
  x86/cpu: Fix migration safety with X86_BUG_NULL_SEL
  x86/sme: Use #define USE_EARLY_PGTABLE_L5 in mem_encrypt_identity.c
  fuse: fix page stealing
  ALSA: timer: Unconditionally unlink slave instances, too
  ALSA: timer: Fix use-after-free problem
  ALSA: synth: missing check for possible NULL after the call to kstrdup
  ALSA: usb-audio: Add registration quirk for JBL Quantum 400
  ALSA: line6: fix control and interrupt message timeouts
  ALSA: 6fire: fix control and bulk message timeouts
  ALSA: ua101: fix division by zero at probe
  ALSA: hda/realtek: Add quirk for HP EliteBook 840 G7 mute LED
  ALSA: hda/realtek: Add quirk for ASUS UX550VE
  ALSA: hda/realtek: Add a quirk for Acer Spin SP513-54N
  ALSA: hda/realtek: Add quirk for Clevo PC70HS
  media: v4l2-ioctl: Fix check_ext_ctrls
  media: ir-kbd-i2c: improve responsiveness of hauppauge zilog receivers
  media: ite-cir: IR receiver stop working after receive overflow
  crypto: s5p-sss - Add error handling in s5p_aes_probe()
  firmware/psci: fix application of sizeof to pointer
  tpm: Check for integer overflow in tpm2_map_response_body()
  parisc: Fix ptrace check on syscall return
  mmc: dw_mmc: Dont wait for DRTO on Write RSP error
  scsi: qla2xxx: Fix use after free in eh_abort path
  scsi: qla2xxx: Fix kernel crash when accessing port_speed sysfs file
  ocfs2: fix data corruption on truncate
  libata: fix read log timeout value
  Input: i8042 - Add quirk for Fujitsu Lifebook T725
  Input: elantench - fix misreporting trackpoint coordinates
  Input: iforce - fix control-message timeout
  binder: use cred instead of task for getsecid
  binder: use cred instead of task for selinux checks
  binder: use euid from cred instead of using task
  usb: xhci: Enable runtime-pm by default on AMD Yellow Carp platform
  xhci: Fix USB 3.1 enumeration issues by increasing roothub power-on-good delay
  Linux 5.4.159
  rsi: fix control-message timeout
  media: staging/intel-ipu3: css: Fix wrong size comparison imgu_css_fw_init
  staging: rtl8192u: fix control-message timeouts
  staging: r8712u: fix control-message timeout
  comedi: vmk80xx: fix bulk and interrupt message timeouts
  comedi: vmk80xx: fix bulk-buffer overflow
  comedi: vmk80xx: fix transfer-buffer overflows
  comedi: ni_usb6501: fix NULL-deref in command paths
  comedi: dt9812: fix DMA buffers on stack
  isofs: Fix out of bound access for corrupted isofs image
  printk/console: Allow to disable console output by using console="" or console=null
  binder: don't detect sender/target during buffer cleanup
  usb-storage: Add compatibility quirk flags for iODD 2531/2541
  usb: musb: Balance list entry in musb_gadget_queue
  usb: gadget: Mark USB_FSL_QE broken on 64-bit
  usb: ehci: handshake CMD_RUN instead of STS_HALT
  Revert "x86/kvm: fix vcpu-id indexed array sizes"
  Linux 5.4.158
  ARM: 9120/1: Revert "amba: make use of -1 IRQs warn"
  Revert "drm/ttm: fix memleak in ttm_transfered_destroy"
  sfc: Fix reading non-legacy supported link modes
  Revert "usb: core: hcd: Add support for deferring roothub registration"
  Revert "xhci: Set HCD flag to defer primary roothub registration"
  media: firewire: firedtv-avc: fix a buffer overflow in avc_ca_pmt()
  net: ethernet: microchip: lan743x: Fix skb allocation failure
  vrf: Revert "Reset skb conntrack connection..."
  scsi: core: Put LLD module refcnt after SCSI device is released
  Linux 5.4.157
  perf script: Check session->header.env.arch before using it
  KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu
  KVM: s390: clear kicked_mask before sleeping again
  cfg80211: correct bridge/4addr mode check
  net: use netif_is_bridge_port() to check for IFF_BRIDGE_PORT
  sctp: add vtag check in sctp_sf_ootb
  sctp: add vtag check in sctp_sf_do_8_5_1_E_sa
  sctp: add vtag check in sctp_sf_violation
  sctp: fix the processing for COOKIE_ECHO chunk
  sctp: fix the processing for INIT_ACK chunk
  sctp: use init_tag from inithdr for ABORT chunk
  phy: phy_start_aneg: Add an unlocked version
  phy: phy_ethtool_ksettings_get: Lock the phy for consistency
  net/tls: Fix flipped sign in async_wait.err assignment
  net: nxp: lpc_eth.c: avoid hang when bringing interface down
  net: ethernet: microchip: lan743x: Fix dma allocation failure by using dma_set_mask_and_coherent
  net: ethernet: microchip: lan743x: Fix driver crash when lan743x_pm_resume fails
  nios2: Make NIOS2_DTB_SOURCE_BOOL depend on !COMPILE_TEST
  RDMA/sa_query: Use strscpy_pad instead of memcpy to copy a string
  net: Prevent infinite while loop in skb_tx_hash()
  net: batman-adv: fix error handling
  regmap: Fix possible double-free in regcache_rbtree_exit()
  arm64: dts: allwinner: h5: NanoPI Neo 2: Fix ethernet node
  RDMA/mlx5: Set user priority for DCT
  nvme-tcp: fix data digest pointer calculation
  nvmet-tcp: fix data digest pointer calculation
  IB/hfi1: Fix abba locking issue with sc_disable()
  IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields
  tcp_bpf: Fix one concurrency problem in the tcp_bpf_send_verdict function
  drm/ttm: fix memleak in ttm_transfered_destroy
  net: lan78xx: fix division by zero in send path
  cfg80211: scan: fix RCU in cfg80211_add_nontrans_list()
  mmc: sdhci-esdhc-imx: clear the buffer_read_ready to reset standard tuning circuit
  mmc: sdhci: Map more voltage level to SDHCI_POWER_330
  mmc: dw_mmc: exynos: fix the finding clock sample value
  mmc: cqhci: clear HALT state after CQE enable
  mmc: vub300: fix control-message timeouts
  net/tls: Fix flipped sign in tls_err_abort() calls
  Revert "net: mdiobus: Fix memory leak in __mdiobus_register"
  nfc: port100: fix using -ERRNO as command type mask
  ata: sata_mv: Fix the error handling of mv_chip_id()
  Revert "pinctrl: bcm: ns: support updated DT binding as syscon subnode"
  usbnet: fix error return code in usbnet_probe()
  usbnet: sanity check for maxpacket
  ipv4: use siphash instead of Jenkins in fnhe_hashfun()
  ipv6: use siphash in rt6_exception_hash()
  powerpc/bpf: Fix BPF_MOD when imm == 1
  ARM: 9141/1: only warn about XIP address when not compile testing
  ARM: 9139/1: kprobes: fix arch_init_kprobes() prototype
  ARM: 9134/1: remove duplicate memcpy() definition
  ARM: 9133/1: mm: proc-macros: ensure *_tlb_fns are 4B aligned
  Linux 5.4.156
  pinctrl: stm32: use valid pin identifier in stm32_pinctrl_resume()
  ARM: 9122/1: select HAVE_FUTEX_CMPXCHG
  tracing: Have all levels of checks prevent recursion
  net: mdiobus: Fix memory leak in __mdiobus_register
  scsi: core: Fix shost->cmd_per_lun calculation in scsi_add_host_with_dma()
  Input: snvs_pwrkey - add clk handling
  ALSA: hda: avoid write to STATESTS if controller is in reset
  platform/x86: intel_scu_ipc: Update timeout value in comment
  isdn: mISDN: Fix sleeping function called from invalid context
  ARM: dts: spear3xx: Fix gmac node
  net: stmmac: add support for dwmac 3.40a
  btrfs: deal with errors when checking if a dir entry exists during log replay
  gcc-plugins/structleak: add makefile var for disabling structleak
  selftests: netfilter: remove stray bash debug line
  netfilter: Kconfig: use 'default y' instead of 'm' for bool config option
  isdn: cpai: check ctr->cnr to avoid array index out of bound
  nfc: nci: fix the UAF of rf_conn_info object
  mm, slub: fix potential memoryleak in kmem_cache_open()
  mm, slub: fix mismatch between reconstructed freelist depth and cnt
  powerpc/idle: Don't corrupt back chain when going idle
  KVM: PPC: Book3S HV: Make idle_kvm_start_guest() return 0 if it went to guest
  KVM: PPC: Book3S HV: Fix stack handling in idle_kvm_start_guest()
  powerpc64/idle: Fix SP offsets when saving GPRs
  audit: fix possible null-pointer dereference in audit_filter_rules
  ASoC: DAPM: Fix missing kctl change notifications
  ALSA: hda/realtek: Add quirk for Clevo PC50HS
  ALSA: usb-audio: Provide quirk for Sennheiser GSP670 Headset
  vfs: check fd has read access in kernel_read_file_from_fd()
  elfcore: correct reference to CONFIG_UML
  ocfs2: mount fails with buffer overflow in strlen
  ocfs2: fix data corruption after conversion from inline format
  ceph: fix handling of "meta" errors
  can: j1939: j1939_xtp_rx_rts_session_new(): abort TP less than 9 bytes
  can: j1939: j1939_xtp_rx_dat_one(): cancel session if receive TP.DT with error length
  can: j1939: j1939_netdev_start(): fix UAF for rx_kref of j1939_priv
  can: j1939: j1939_tp_rxtimer(): fix errant alert in j1939_tp_rxtimer
  can: peak_pci: peak_pci_remove(): fix UAF
  can: peak_usb: pcan_usb_fd_decode_status(): fix back to ERROR_ACTIVE state notification
  can: rcar_can: fix suspend/resume
  net: enetc: fix ethtool counter name for PM0_TERR
  net: stmmac: Fix E2E delay mechanism
  net: hns3: disable sriov before unload hclge layer
  net: hns3: add limit ets dwrr bandwidth cannot be 0
  net: hns3: reset DWRR of unused tc to zero
  NIOS2: irqflags: rename a redefined register name
  net: dsa: lantiq_gswip: fix register definition
  lan78xx: select CRC32
  netfilter: ipvs: make global sysctl readonly in non-init netns
  ASoC: wm8960: Fix clock configuration on slave mode
  dma-debug: fix sg checks in debug_dma_map_sg()
  NFSD: Keep existing listeners on portlist error
  xtensa: xtfpga: Try software restart before simulating CPU reset
  xtensa: xtfpga: use CONFIG_USE_OF instead of CONFIG_OF
  ARM: dts: at91: sama5d2_som1_ek: disable ISC node by default
  tee: optee: Fix missing devices unregister during optee_remove
  net: switchdev: do not propagate bridge updates across bridges
  parisc: math-emu: Fix fall-through warnings
  Linux 5.4.155
  ionic: don't remove netdev->dev_addr when syncing uc list
  r8152: select CRC32 and CRYPTO/CRYPTO_HASH/CRYPTO_SHA256
  qed: Fix missing error code in qed_slowpath_start()
  mqprio: Correct stats in mqprio_dump_class_stats().
  acpi/arm64: fix next_platform_timer() section mismatch error
  drm/msm/dsi: fix off by one in dsi_bus_clk_enable error handling
  drm/msm/dsi: Fix an error code in msm_dsi_modeset_init()
  drm/msm: Fix null pointer dereference on pointer edp
  drm/panel: olimex-lcd-olinuxino: select CRC32
  platform/mellanox: mlxreg-io: Fix argument base in kstrtou32() call
  mlxsw: thermal: Fix out-of-bounds memory accesses
  ata: ahci_platform: fix null-ptr-deref in ahci_platform_enable_regulators()
  pata_legacy: fix a couple uninitialized variable bugs
  NFC: digital: fix possible memory leak in digital_in_send_sdd_req()
  NFC: digital: fix possible memory leak in digital_tg_listen_mdaa()
  nfc: fix error handling of nfc_proto_register()
  ethernet: s2io: fix setting mac address during resume
  net: encx24j600: check error in devm_regmap_init_encx24j600
  net: stmmac: fix get_hw_feature() on old hardware
  net/mlx5e: Mutually exclude RX-FCS and RX-port-timestamp
  net: korina: select CRC32
  net: arc: select CRC32
  gpio: pca953x: Improve bias setting
  sctp: account stream padding length for reconf chunk
  iio: dac: ti-dac5571: fix an error code in probe()
  iio: ssp_sensors: fix error code in ssp_print_mcu_debug()
  iio: ssp_sensors: add more range checking in ssp_parse_dataframe()
  iio: light: opt3001: Fixed timeout error when 0 lux
  iio: mtk-auxadc: fix case IIO_CHAN_INFO_PROCESSED
  iio: adc128s052: Fix the error handling path of 'adc128_probe()'
  iio: adc: aspeed: set driver data when adc probe.
  powerpc/xive: Discard disabled interrupts in get_irqchip_state()
  x86/Kconfig: Do not enable AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT automatically
  nvmem: Fix shift-out-of-bound (UBSAN) with byte size cells
  EDAC/armada-xp: Fix output of uncorrectable error counter
  virtio: write back F_VERSION_1 before validate
  USB: serial: option: add prod. id for Quectel EG91
  USB: serial: option: add Telit LE910Cx composition 0x1204
  USB: serial: option: add Quectel EC200S-CN module support
  USB: serial: qcserial: add EM9191 QDL support
  Input: xpad - add support for another USB ID of Nacon GC-100
  usb: musb: dsps: Fix the probe error path
  efi: Change down_interruptible() in virt_efi_reset_system() to down_trylock()
  efi/cper: use stack buffer for error record decoding
  cb710: avoid NULL pointer subtraction
  xhci: Enable trust tx length quirk for Fresco FL11 USB controller
  xhci: Fix command ring pointer corruption while aborting a command
  xhci: guard accesses to ep_state in xhci_endpoint_reset()
  mei: me: add Ice Lake-N device id.
  x86/resctrl: Free the ctrlval arrays when domain_setup_mon_state() fails
  watchdog: orion: use 0 for unset heartbeat
  btrfs: check for error when looking up inode during dir entry replay
  btrfs: deal with errors when adding inode reference during log replay
  btrfs: deal with errors when replaying dir entry during log replay
  btrfs: unlock newly allocated extent buffer after error
  csky: Fixup regs.sr broken in ptrace
  csky: don't let sigreturn play with priveleged bits of status register
  s390: fix strrchr() implementation
  nds32/ftrace: Fix Error: invalid operands (*UND* and *UND* sections) for `^'
  ALSA: hda/realtek: Fix the mic type detection issue for ASUS G551JW
  ALSA: hda/realtek - ALC236 headset MIC recording issue
  ALSA: hda/realtek: Add quirk for Clevo X170KM-G
  ALSA: hda/realtek: Complete partial device name to avoid ambiguity
  ALSA: seq: Fix a potential UAF by wrong private_free call order
  ALSA: usb-audio: Add quirk for VF0770
  ovl: simplify file splice
  Linux 5.4.154
  sched: Always inline is_percpu_thread()
  scsi: virtio_scsi: Fix spelling mistake "Unsupport" -> "Unsupported"
  scsi: ses: Fix unsigned comparison with less than zero
  drm/amdgpu: fix gart.bo pin_count leak
  net: sun: SUNVNET_COMMON should depend on INET
  mac80211: check return value of rhashtable_init
  net: prevent user from passing illegal stab size
  m68k: Handle arrivals of multiple signals correctly
  mac80211: Drop frames from invalid MAC address in ad-hoc mode
  netfilter: nf_nat_masquerade: defer conntrack walk to work queue
  netfilter: nf_nat_masquerade: make async masq_inet6_event handling generic
  HID: wacom: Add new Intuos BT (CTL-4100WL/CTL-6100WL) device IDs
  netfilter: ip6_tables: zero-initialize fragment offset
  HID: apple: Fix logical maximum and usage maximum of Magic Keyboard JIS
  ext4: correct the error path of ext4_write_inline_data_end()
  net: phy: bcm7xxx: Fixed indirect MMD operations
  UPSTREAM: ovl: simplify file splice
  Linux 5.4.153
  x86/Kconfig: Correct reference to MWINCHIP3D
  x86/hpet: Use another crystalball to evaluate HPET usability
  x86/platform/olpc: Correct ifdef symbol to intended CONFIG_OLPC_XO15_SCI
  RISC-V: Include clone3() on rv32
  bpf, s390: Fix potential memory leak about jit_data
  i2c: acpi: fix resource leak in reconfiguration device addition
  net: prefer socket bound to interface when not in VRF
  i40e: Fix freeing of uninitialized misc IRQ vector
  i40e: fix endless loop under rtnl
  gve: fix gve_get_stats()
  rtnetlink: fix if_nlmsg_stats_size() under estimation
  gve: Correct available tx qpl check
  drm/nouveau/debugfs: fix file release memory leak
  video: fbdev: gbefb: Only instantiate device when built for IP32
  bus: ti-sysc: Use CLKDM_NOAUTO for dra7 dcan1 for errata i893
  netlink: annotate data races around nlk->bound
  net: sfp: Fix typo in state machine debug string
  net/sched: sch_taprio: properly cancel timer from taprio_destroy()
  net: bridge: use nla_total_size_64bit() in br_get_linkxstats_size()
  ARM: imx6: disable the GIC CPU interface before calling stby-poweroff sequence
  arm64: dts: ls1028a: add missing CAN nodes
  arm64: dts: freescale: Fix SP805 clock-names
  ptp_pch: Load module automatically if ID matches
  powerpc/fsl/dts: Fix phy-connection-type for fm1mac3
  net_sched: fix NULL deref in fifo_set_limit()
  phy: mdio: fix memory leak
  bpf: Fix integer overflow in prealloc_elems_and_freelist()
  bpf, arm: Fix register clobbering in div/mod implementation
  xtensa: call irqchip_init only when CONFIG_USE_OF is selected
  xtensa: use CONFIG_USE_OF instead of CONFIG_OF
  xtensa: move XCHAL_KIO_* definitions to kmem_layout.h
  arm64: dts: qcom: pm8150: use qcom,pm8998-pon binding
  ARM: dts: imx: Fix USB host power regulator polarity on M53Menlo
  ARM: dts: imx: Add missing pinctrl-names for panel on M53Menlo
  soc: qcom: mdt_loader: Drop PT_LOAD check on hash segment
  ARM: dts: qcom: apq8064: Use 27MHz PXO clock as DSI PLL reference
  soc: qcom: socinfo: Fixed argument passed to platform_set_data()
  bpf, mips: Validate conditional branch offsets
  MIPS: BPF: Restore MIPS32 cBPF JIT
  ARM: dts: qcom: apq8064: use compatible which contains chipid
  ARM: dts: omap3430-sdp: Fix NAND device node
  xen/balloon: fix cancelled balloon action
  nfsd4: Handle the NFSv4 READDIR 'dircount' hint being zero
  nfsd: fix error handling of register_pernet_subsys() in init_nfsd()
  ovl: fix missing negative dentry check in ovl_rename()
  mmc: meson-gx: do not use memcpy_to/fromio for dram-access-quirk
  xen/privcmd: fix error handling in mmap-resource processing
  usb: typec: tcpm: handle SRC_STARTUP state if cc changes
  USB: cdc-acm: fix break reporting
  USB: cdc-acm: fix racy tty buffer accesses
  Partially revert "usb: Kconfig: using select for USB_COMMON dependency"
  ANDROID: Different fix for KABI breakage in 5.4.151 in struct sock
  Linux 5.4.152
  libata: Add ATA_HORKAGE_NO_NCQ_ON_ATI for Samsung 860 and 870 SSD.
  silence nfscache allocation warnings with kvzalloc
  perf/x86: Reset destroy callback on event init failure
  kvm: x86: Add AMD PMU MSRs to msrs_to_save_all[]
  KVM: do not shrink halt_poll_ns below grow_start
  tools/vm/page-types: remove dependency on opt_file for idle page tracking
  scsi: ses: Retry failed Send/Receive Diagnostic commands
  selftests:kvm: fix get_warnings_count() ignoring fscanf() return warn
  selftests: be sure to make khdr before other targets
  usb: dwc2: check return value after calling platform_get_resource()
  usb: testusb: Fix for showing the connection speed
  scsi: sd: Free scsi_disk device via put_device()
  ext2: fix sleeping in atomic bugs on error
  sparc64: fix pci_iounmap() when CONFIG_PCI is not set
  xen-netback: correct success/error reporting for the SKB-with-fraglist case
  net: mdio: introduce a shutdown method to mdio device drivers
  ANDROID: Fix up KABI breakage in 5.4.151 in struct sock
  Linux 5.4.151
  HID: usbhid: free raw_report buffers in usbhid_stop
  netfilter: ipset: Fix oversized kvmalloc() calls
  HID: betop: fix slab-out-of-bounds Write in betop_probe
  crypto: ccp - fix resource leaks in ccp_run_aes_gcm_cmd()
  usb: hso: remove the bailout parameter
  usb: hso: fix error handling code of hso_create_net_device
  hso: fix bailout in error case of probe
  libnvdimm/pmem: Fix crash triggered when I/O in-flight during unbind
  PCI: Fix pci_host_bridge struct device release/free handling
  net: stmmac: don't attach interface until resume finishes
  net: udp: annotate data race around udp_sk(sk)->corkflag
  HID: u2fzero: ignore incomplete packets without data
  ext4: fix potential infinite loop in ext4_dx_readdir()
  ext4: fix reserved space counter leakage
  ext4: fix loff_t overflow in ext4_max_bitmap_size()
  ipack: ipoctal: fix module reference leak
  ipack: ipoctal: fix missing allocation-failure check
  ipack: ipoctal: fix tty-registration error handling
  ipack: ipoctal: fix tty registration race
  ipack: ipoctal: fix stack information leak
  debugfs: debugfs_create_file_size(): use IS_ERR to check for error
  elf: don't use MAP_FIXED_NOREPLACE for elf interpreter mappings
  perf/x86/intel: Update event constraints for ICX
  af_unix: fix races in sk_peer_pid and sk_peer_cred accesses
  net: sched: flower: protect fl_walk() with rcu
  net: hns3: do not allow call hns3_nic_net_open repeatedly
  scsi: csiostor: Add module softdep on cxgb4
  Revert "block, bfq: honor already-setup queue merges"
  selftests, bpf: test_lwt_ip_encap: Really disable rp_filter
  e100: fix buffer overrun in e100_get_regs
  e100: fix length calculation in e100_get_regs_len
  net: ipv4: Fix rtnexthop len when RTA_FLOW is present
  hwmon: (tmp421) fix rounding for negative values
  hwmon: (tmp421) report /PVLD condition as fault
  sctp: break out if skb_header_pointer returns NULL in sctp_rcv_ootb
  mac80211-hwsim: fix late beacon hrtimer handling
  mac80211: mesh: fix potentially unaligned access
  mac80211: limit injected vht mcs/nss in ieee80211_parse_tx_radiotap
  mac80211: Fix ieee80211_amsdu_aggregate frag_tail bug
  hwmon: (mlxreg-fan) Return non-zero value when fan current state is enforced from sysfs
  ipvs: check that ip_vs_conn_tab_bits is between 8 and 20
  drm/amd/display: Pass PCI deviceid into DC
  x86/kvmclock: Move this_cpu_pvti into kvmclock.h
  mac80211: fix use-after-free in CCMP/GCMP RX
  scsi: ufs: Fix illegal offset in UPIU event trace
  hwmon: (w83791d) Fix NULL pointer dereference by removing unnecessary structure field
  hwmon: (w83792d) Fix NULL pointer dereference by removing unnecessary structure field
  hwmon: (w83793) Fix NULL pointer dereference by removing unnecessary structure field
  fs-verity: fix signed integer overflow with i_size near S64_MAX
  usb: cdns3: fix race condition before setting doorbell
  cpufreq: schedutil: Destroy mutex before kobject_put() frees the memory
  cpufreq: schedutil: Use kobject release() method to free sugov_tunables
  tty: Fix out-of-bound vmalloc access in imageblit
  Revert "crypto: public_key: fix overflow during implicit conversion"
  Linux 5.4.150
  qnx4: work around gcc false positive warning bug
  xen/balloon: fix balloon kthread freezing
  arm64: dts: marvell: armada-37xx: Extend PCIe MEM space
  thermal/drivers/int340x: Do not set a wrong tcc offset on resume
  EDAC/synopsys: Fix wrong value type assignment for edac_mode
  spi: Fix tegra20 build with CONFIG_PM=n
  net: 6pack: Fix tx timeout and slot time
  alpha: Declare virt_to_phys and virt_to_bus parameter as pointer to volatile
  arm64: Mark __stack_chk_guard as __ro_after_init
  parisc: Use absolute_pointer() to define PAGE0
  qnx4: avoid stringop-overread errors
  sparc: avoid stringop-overread errors
  net: i825xx: Use absolute_pointer for memcpy from fixed memory location
  compiler.h: Introduce absolute_pointer macro
  blk-cgroup: fix UAF by grabbing blkcg lock before destroying blkg pd
  sparc32: page align size in arch_dma_alloc
  nvme-multipath: fix ANA state updates when a namespace is not present
  xen/balloon: use a kernel thread instead a workqueue
  bpf: Add oversize check before call kvcalloc()
  ipv6: delay fib6_sernum increase in fib6_add
  m68k: Double cast io functions to unsigned long
  net: stmmac: allow CSR clock of 300MHz
  net: macb: fix use after free on rmmod
  blktrace: Fix uaf in blk_trace access after removing by sysfs
  md: fix a lock order reversal in md_alloc
  irqchip/gic-v3-its: Fix potential VPE leak on error
  irqchip/goldfish-pic: Select GENERIC_IRQ_CHIP to fix build
  scsi: lpfc: Use correct scnprintf() limit
  scsi: qla2xxx: Restore initiator in dual mode
  cifs: fix a sign extension bug
  thermal/core: Potential buffer overflow in thermal_build_list_of_policies()
  fpga: machxo2-spi: Fix missing error code in machxo2_write_complete()
  fpga: machxo2-spi: Return an error on failure
  tty: synclink_gt: rename a conflicting function name
  tty: synclink_gt, drop unneeded forward declarations
  scsi: iscsi: Adjust iface sysfs attr detection
  net/mlx4_en: Don't allow aRFS for encapsulated packets
  qed: rdma - don't wait for resources under hw error recovery flow
  gpio: uniphier: Fix void functions to remove return value
  net/smc: add missing error check in smc_clc_prfx_set()
  bnxt_en: Fix TX timeout when TX ring size is set to the smallest
  enetc: Fix illegal access when reading affinity_hint
  platform/x86/intel: punit_ipc: Drop wrong use of ACPI_PTR()
  afs: Fix incorrect triggering of sillyrename on 3rd-party invalidation
  net: hso: fix muxed tty registration
  serial: mvebu-uart: fix driver's tx_empty callback
  xhci: Set HCD flag to defer primary roothub registration
  btrfs: prevent __btrfs_dump_space_info() to underflow its free space
  erofs: fix up erofs_lookup tracepoint
  mcb: fix error handling in mcb_alloc_bus()
  USB: serial: option: add device id for Foxconn T99W265
  USB: serial: option: remove duplicate USB device ID
  USB: serial: option: add Telit LN920 compositions
  USB: serial: mos7840: remove duplicated 0xac24 device ID
  usb: core: hcd: Add support for deferring roothub registration
  Re-enable UAS for LaCie Rugged USB3-FW with fk quirk
  staging: greybus: uart: fix tty use after free
  binder: make sure fd closes complete
  USB: cdc-acm: fix minor-number release
  USB: serial: cp210x: add ID for GW Instek GDM-834x Digital Multimeter
  usb-storage: Add quirk for ScanLogic SL11R-IDE older than 2.6c
  xen/x86: fix PV trap handling on secondary processors
  cifs: fix incorrect check for null pointer in header_assemble
  usb: musb: tusb6010: uninitialized data in tusb_fifo_write_unaligned()
  usb: dwc2: gadget: Fix ISOC transfer complete handling for DDMA
  usb: dwc2: gadget: Fix ISOC flow for BDMA and Slave
  usb: gadget: r8a66597: fix a loop in set_feature()
  ocfs2: drop acl cache for directories too
  Linux 5.4.149
  drm/nouveau/nvkm: Replace -ENOSYS with -ENODEV
  rtc: rx8010: select REGMAP_I2C
  blk-throttle: fix UAF by deleteing timer in blk_throtl_exit()
  pwm: stm32-lp: Don't modify HW state in .remove() callback
  pwm: rockchip: Don't modify HW state in .remove() callback
  pwm: img: Don't modify HW state in .remove() callback
  nilfs2: fix memory leak in nilfs_sysfs_delete_snapshot_group
  nilfs2: fix memory leak in nilfs_sysfs_create_snapshot_group
  nilfs2: fix memory leak in nilfs_sysfs_delete_##name##_group
  nilfs2: fix memory leak in nilfs_sysfs_create_##name##_group
  nilfs2: fix NULL pointer in nilfs_##name##_attr_release
  nilfs2: fix memory leak in nilfs_sysfs_create_device_group
  btrfs: fix lockdep warning while mounting sprout fs
  ceph: lockdep annotations for try_nonblocking_invalidate
  ceph: request Fw caps before updating the mtime in ceph_write_iter
  dmaengine: xilinx_dma: Set DMA mask for coherent APIs
  dmaengine: ioat: depends on !UML
  dmaengine: sprd: Add missing MODULE_DEVICE_TABLE
  parisc: Move pci_dev_is_behind_card_dino to where it is used
  drivers: base: cacheinfo: Get rid of DEFINE_SMP_CALL_CACHE_FUNCTION()
  thermal/core: Fix thermal_cooling_device_register() prototype
  Kconfig.debug: drop selecting non-existing HARDLOCKUP_DETECTOR_ARCH
  net: stmmac: reset Tx desc base address before restarting Tx
  phy: avoid unnecessary link-up delay in polling mode
  pwm: lpc32xx: Don't modify HW state in .probe() after the PWM chip was registered
  profiling: fix shift-out-of-bounds bugs
  nilfs2: use refcount_dec_and_lock() to fix potential UAF
  prctl: allow to setup brk for et_dyn executables
  9p/trans_virtio: Remove sysfs file on probe failure
  thermal/drivers/exynos: Fix an error code in exynos_tmu_probe()
  dmaengine: acpi: Avoid comparison GSI with Linux vIRQ
  um: virtio_uml: fix memory leak on init failures
  staging: rtl8192u: Fix bitwise vs logical operator in TranslateRxSignalStuff819xUsb()
  sctp: add param size validation for SCTP_PARAM_SET_PRIMARY
  sctp: validate chunk size in __rcv_asconf_lookup
  ARM: 9098/1: ftrace: MODULE_PLT: Fix build problem without DYNAMIC_FTRACE
  ARM: 9079/1: ftrace: Add MODULE_PLTS support
  ARM: 9078/1: Add warn suppress parameter to arm_gen_branch_link()
  ARM: 9077/1: PLT: Move struct plt_entries definition to header
  apparmor: remove duplicate macro list_entry_is_head()
  ARM: Qualify enabling of swiotlb_init()
  s390/pci_mmio: fully validate the VMA before calling follow_pte()
  console: consume APC, DM, DCS
  KVM: remember position in kvm->vcpus array
  PCI/ACPI: Add Ampere Altra SOC MCFG quirk
  PCI: aardvark: Fix reporting CRS value
  PCI: pci-bridge-emul: Add PCIe Root Capabilities Register
  PCI: aardvark: Indicate error in 'val' when config read fails
  PCI: pci-bridge-emul: Fix big-endian support
  Linux 5.4.148
  s390/bpf: Fix 64-bit subtraction of the -0x80000000 constant
  s390/bpf: Fix optimizing out zero-extensions
  net: renesas: sh_eth: Fix freeing wrong tx descriptor
  ip_gre: validate csum_start only on pull
  qlcnic: Remove redundant unlock in qlcnic_pinit_from_rom
  fq_codel: reject silly quantum parameters
  netfilter: socket: icmp6: fix use-after-scope
  net: dsa: b53: Fix calculating number of switch ports
  perf unwind: Do not overwrite FEATURE_CHECK_LDFLAGS-libunwind-{x86,aarch64}
  ARC: export clear_user_page() for modules
  mtd: rawnand: cafe: Fix a resource leak in the error handling path of 'cafe_nand_probe()'
  PCI: Sync __pci_register_driver() stub for CONFIG_PCI=n
  KVM: arm64: Handle PSCI resets before userspace touches vCPU state
  mfd: tqmx86: Clear GPIO IRQ resource when no IRQ is set
  PCI: Fix pci_dev_str_match_path() alloc while atomic bug
  mfd: axp20x: Update AXP288 volatile ranges
  NTB: perf: Fix an error code in perf_setup_inbuf()
  NTB: Fix an error code in ntb_msit_probe()
  ethtool: Fix an error code in cxgb2.c
  PCI: ibmphp: Fix double unmap of io_mem
  block, bfq: honor already-setup queue merges
  net: usb: cdc_mbim: avoid altsetting toggling for Telit LN920
  Set fc_nlinfo in nh_create_ipv4, nh_create_ipv6
  PCI: Add ACS quirks for Cavium multi-function devices
  tracing/probes: Reject events which have the same name of existing one
  mfd: Don't use irq_create_mapping() to resolve a mapping
  fuse: fix use after free in fuse_read_interrupt()
  PCI: Add ACS quirks for NXP LX2xx0 and LX2xx2 platforms
  mfd: db8500-prcmu: Adjust map to reality
  dt-bindings: mtd: gpmc: Fix the ECC bytes vs. OOB bytes equation
  mm/memory_hotplug: use "unsigned long" for PFN in zone_for_pfn_range()
  net: hns3: fix the timing issue of VF clearing interrupt sources
  net: hns3: disable mac in flr process
  net: hns3: change affinity_mask to numa node range
  net: hns3: pad the short tunnel frame before sending to hardware
  KVM: PPC: Book3S HV: Tolerate treclaim. in fake-suspend mode changing registers
  ibmvnic: check failover_pending in login response
  dt-bindings: arm: Fix Toradex compatible typo
  qed: Handle management FW error
  tcp: fix tp->undo_retrans accounting in tcp_sacktag_one()
  net: dsa: destroy the phylink instance on any error in dsa_slave_phy_setup
  net/af_unix: fix a data-race in unix_dgram_poll
  vhost_net: fix OoB on sendmsg() failure.
  events: Reuse value read using READ_ONCE instead of re-reading it
  net/mlx5: Fix potential sleeping in atomic context
  net/mlx5: FWTrace, cancel work on alloc pd error flow
  perf machine: Initialize srcline string member in add_location struct
  tipc: increase timeout in tipc_sk_enqueue()
  r6040: Restore MDIO clock frequency after MAC reset
  net/l2tp: Fix reference count leak in l2tp_udp_recv_core
  dccp: don't duplicate ccid when cloning dccp sock
  ptp: dp83640: don't define PAGE0
  net-caif: avoid user-triggerable WARN_ON(1)
  tipc: fix an use-after-free issue in tipc_recvmsg
  x86/mm: Fix kern_addr_valid() to cope with existing but not present entries
  s390/sclp: fix Secure-IPL facility detection
  drm/etnaviv: add missing MMU context put when reaping MMU mapping
  drm/etnaviv: reference MMU context when setting up hardware state
  drm/etnaviv: fix MMU context leak on GPU reset
  drm/etnaviv: exec and MMU state is lost when resetting the GPU
  drm/etnaviv: keep MMU context across runtime suspend/resume
  drm/etnaviv: stop abusing mmu_context as FE running marker
  drm/etnaviv: put submit prev MMU context when it exists
  drm/etnaviv: return context from etnaviv_iommu_context_get
  drm/amd/amdgpu: Increase HWIP_MAX_INSTANCE to 10
  PCI: Add AMD GPU multi-function power dependencies
  PM: base: power: don't try to use non-existing RTC for storing data
  arm64/sve: Use correct size when reinitialising SVE state
  bnx2x: Fix enabling network interfaces without VFs
  xen: reset legacy rtc flag for PV domU
  btrfs: fix upper limit for max_inline for page size 64K
  drm/panfrost: Clamp lock region to Bifrost minimum
  drm/panfrost: Use u64 for size in lock_region
  drm/panfrost: Simplify lock_region calculation
  drm/amdgpu: Fix BUG_ON assert
  drm/msi/mdp4: populate priv->kms in mdp4_kms_init
  net: dsa: lantiq_gswip: fix maximum frame length
  lib/test_stackinit: Fix static initializer test
  platform/chrome: cros_ec_proto: Send command again when timeout occurs
  memcg: enable accounting for pids in nested pid namespaces
  mm,vmscan: fix divide by zero in get_scan_count
  mm/hugetlb: initialize hugetlb_usage in mm_init
  s390/pv: fix the forcing of the swiotlb
  cpufreq: powernv: Fix init_chip_info initialization in numa=off
  scsi: qla2xxx: Sync queue idx with queue_pair_map idx
  scsi: qla2xxx: Changes to support kdump kernel
  scsi: BusLogic: Fix missing pr_cont() use
  ovl: fix BUG_ON() in may_delete() when called from ovl_cleanup()
  parisc: fix crash with signals and alloca
  net: w5100: check return value after calling platform_get_resource()
  fix array-index-out-of-bounds in taprio_change
  net: fix NULL pointer reference in cipso_v4_doi_free
  ath9k: fix sleeping in atomic context
  ath9k: fix OOB read ar9300_eeprom_restore_internal
  parport: remove non-zero check on count
  net/mlx5: DR, Enable QP retransmission
  iwlwifi: mvm: fix access to BSS elements
  iwlwifi: mvm: avoid static queue number aliasing
  iwlwifi: mvm: fix a memory leak in iwl_mvm_mac_ctxt_beacon_changed
  drm/amdkfd: Account for SH/SE count when setting up cu masks.
  ASoC: rockchip: i2s: Fixup config for DAIFMT_DSP_A/B
  ASoC: rockchip: i2s: Fix regmap_ops hang
  usbip:vhci_hcd USB port can get stuck in the disabled state
  usbip: give back URBs for unsent unlink requests during cleanup
  usb: musb: musb_dsps: request_irq() after initializing musb
  Revert "USB: xhci: fix U1/U2 handling for hardware with XHCI_INTEL_HOST quirk set"
  cifs: fix wrong release in sess_alloc_buffer() failed path
  mmc: core: Return correct emmc response in case of ioctl error
  selftests/bpf: Enlarge select() timeout for test_maps
  mmc: rtsx_pci: Fix long reads when clock is prescaled
  mmc: sdhci-of-arasan: Check return value of non-void funtions
  of: Don't allow __of_attached_node_sysfs() without CONFIG_SYSFS
  ASoC: Intel: Skylake: Fix passing loadable flag for module
  ASoC: Intel: Skylake: Fix module configuration for KPB and MIXER
  btrfs: tree-log: check btrfs_lookup_data_extent return value
  m68knommu: only set CONFIG_ISA_DMA_API for ColdFire sub-arch
  drm/exynos: Always initialize mapping in exynos_drm_register_dma()
  lockd: lockd server-side shouldn't set fl_ops
  usb: chipidea: host: fix port index underflow and UBSAN complains
  gfs2: Don't call dlm after protocol is unmounted
  staging: rts5208: Fix get_ms_information() heap buffer size
  rpc: fix gss_svc_init cleanup on failure
  tcp: enable data-less, empty-cookie SYN with TFO_SERVER_COOKIE_NOT_REQD
  serial: sh-sci: fix break handling for sysrq
  opp: Don't print an error if required-opps is missing
  Bluetooth: Fix handling of LE Enhanced Connection Complete
  nvme-tcp: don't check blk_mq_tag_to_rq when receiving pdu data
  arm64: dts: ls1046a: fix eeprom entries
  arm64: tegra: Fix compatible string for Tegra132 CPUs
  ARM: tegra: tamonten: Fix UART pad setting
  mac80211: Fix monitor MTU limit so that A-MSDUs get through
  drm/display: fix possible null-pointer dereference in dcn10_set_clock()
  gpu: drm: amd: amdgpu: amdgpu_i2c: fix possible uninitialized-variable access in amdgpu_i2c_router_select_ddc_port()
  net/mlx5: Fix variable type to match 64bit
  Bluetooth: avoid circular locks in sco_sock_connect
  Bluetooth: schedule SCO timeouts with delayed_work
  selftests/bpf: Fix xdp_tx.c prog section name
  drm/msm: mdp4: drop vblank get/put from prepare/complete_commit
  net: ethernet: stmmac: Do not use unreachable() in ipq806x_gmac_probe()
  arm64: dts: qcom: sdm660: use reg value for memory node
  ARM: dts: imx53-ppd: Fix ACHC entry
  media: tegra-cec: Handle errors of clk_prepare_enable()
  media: TDA1997x: fix tda1997x_query_dv_timings() return value
  media: v4l2-dv-timings.c: fix wrong condition in two for-loops
  media: imx258: Limit the max analogue gain to 480
  media: imx258: Rectify mismatch of VTS value
  ASoC: Intel: bytcr_rt5640: Move "Platform Clock" routes to the maps for the matching in-/output
  arm64: tegra: Fix Tegra194 PCIe EP compatible string
  bonding: 3ad: fix the concurrency between __bond_release_one() and bond_3ad_state_machine_handler()
  workqueue: Fix possible memory leaks in wq_numa_init()
  Bluetooth: skip invalid hci_sync_conn_complete_evt
  ata: sata_dwc_460ex: No need to call phy_exit() befre phy_init()
  samples: bpf: Fix tracex7 error raised on the missing argument
  staging: ks7010: Fix the initialization of the 'sleep_status' structure
  serial: 8250_pci: make setup_port() parameters explicitly unsigned
  hvsi: don't panic on tty_register_driver failure
  xtensa: ISS: don't panic in rs_init
  serial: 8250: Define RX trigger levels for OxSemi 950 devices
  s390: make PCI mio support a machine flag
  s390/jump_label: print real address in a case of a jump label bug
  flow_dissector: Fix out-of-bounds warnings
  ipv4: ip_output.c: Fix out-of-bounds warning in ip_copy_addrs()
  video: fbdev: riva: Error out if 'pixclock' equals zero
  video: fbdev: kyro: Error out if 'pixclock' equals zero
  video: fbdev: asiliantfb: Error out if 'pixclock' equals zero
  bpf/tests: Do not PASS tests without actually testing the result
  bpf/tests: Fix copy-and-paste error in double word test
  drm/amd/amdgpu: Update debugfs link_settings output link_rate field in hex
  drm/amd/display: Fix timer_per_pixel unit error
  tty: serial: jsm: hold port lock when reporting modem line changes
  staging: board: Fix uninitialized spinlock when attaching genpd
  usb: gadget: composite: Allow bMaxPower=0 if self-powered
  USB: EHCI: ehci-mv: improve error handling in mv_ehci_enable()
  usb: gadget: u_ether: fix a potential null pointer dereference
  usb: host: fotg210: fix the actual_length of an iso packet
  usb: host: fotg210: fix the endpoint's transactional opportunities calculation
  igc: Check if num of q_vectors is smaller than max before array access
  drm: avoid blocking in drm_clients_info's rcu section
  Smack: Fix wrong semantics in smk_access_entry()
  netlink: Deal with ESRCH error in nlmsg_notify()
  video: fbdev: kyro: fix a DoS bug by restricting user input
  ARM: dts: qcom: apq8064: correct clock names
  iavf: fix locking of critical sections
  iavf: do not override the adapter state in the watchdog task
  iio: dac: ad5624r: Fix incorrect handling of an optional regulator.
  tipc: keep the skb in rcv queue until the whole data is read
  PCI: Use pci_update_current_state() in pci_enable_device_flags()
  crypto: mxs-dcp - Use sg_mapping_iter to copy data
  media: dib8000: rewrite the init prbs logic
  ASoC: atmel: ATMEL drivers don't need HAS_DMA
  drm/amdgpu: Fix amdgpu_ras_eeprom_init()
  userfaultfd: prevent concurrent API initialization
  kbuild: Fix 'no symbols' warning when CONFIG_TRIM_UNUSD_KSYMS=y
  MIPS: Malta: fix alignment of the devicetree buffer
  f2fs: fix to unmap pages from userspace process in punch_hole()
  f2fs: fix unexpected ENOENT comes from f2fs_map_blocks()
  f2fs: fix to account missing .skipped_gc_rwsem
  KVM: PPC: Fix clearing never mapped TCEs in realmode
  clk: at91: clk-generated: Limit the requested rate to our range
  clk: at91: clk-generated: pass the id of changeable parent at registration
  clk: at91: sam9x60: Don't use audio PLL
  fscache: Fix cookie key hashing
  platform/x86: dell-smbios-wmi: Add missing kfree in error-exit from run_smbios_call
  KVM: PPC: Book3S HV Nested: Reflect guest PMU in-use to L0 when guest SPRs are live
  HID: i2c-hid: Fix Elan touchpad regression
  scsi: target: avoid per-loop XCOPY buffer allocations
  powerpc/config: Renable MTD_PHYSMAP_OF
  scsi: qedf: Fix error codes in qedf_alloc_global_queues()
  scsi: qedi: Fix error codes in qedi_alloc_global_queues()
  scsi: smartpqi: Fix an error code in pqi_get_raid_map()
  pinctrl: single: Fix error return code in pcs_parse_bits_in_pinctrl_entry()
  scsi: fdomain: Fix error return code in fdomain_probe()
  SUNRPC: Fix potential memory corruption
  dma-debug: fix debugfs initialization order
  openrisc: don't printk() unconditionally
  f2fs: reduce the scope of setting fsck tag when de->name_len is zero
  f2fs: show f2fs instance in printk_ratelimited
  RDMA/efa: Remove double QP type assignment
  powerpc/stacktrace: Include linux/delay.h
  vfio: Use config not menuconfig for VFIO_NOIOMMU
  pinctrl: samsung: Fix pinctrl bank pin count
  docs: Fix infiniband uverbs minor number
  RDMA/iwcm: Release resources if iw_cm module initialization fails
  IB/hfi1: Adjust pkey entry in index 0
  scsi: bsg: Remove support for SCSI_IOCTL_SEND_COMMAND
  f2fs: quota: fix potential deadlock
  HID: input: do not report stylus battery state as "full"
  PCI: aardvark: Fix masking and unmasking legacy INTx interrupts
  PCI: aardvark: Increase polling delay to 1.5s while waiting for PIO response
  PCI: aardvark: Fix checking for PIO status
  PCI: xilinx-nwl: Enable the clock through CCF
  PCI: Return ~0 data on pciconfig_read() CAP_SYS_ADMIN failure
  PCI: Restrict ASMedia ASM1062 SATA Max Payload Size Supported
  PCI/portdrv: Enable Bandwidth Notification only if port supports it
  ARM: 9105/1: atags_to_fdt: don't warn about stack size
  libata: add ATA_HORKAGE_NO_NCQ_TRIM for Samsung 860 and 870 SSDs
  dmaengine: imx-sdma: remove duplicated sdma_load_context
  Revert "dmaengine: imx-sdma: refine to load context only once"
  media: rc-loopback: return number of emitters rather than error
  media: uvc: don't do DMA on stack
  VMCI: fix NULL pointer dereference when unmapping queue pair
  dm crypt: Avoid percpu_counter spinlock contention in crypt_page_alloc()
  power: supply: max17042: handle fails of reading status register
  block: bfq: fix bfq_set_next_ioprio_data()
  crypto: public_key: fix overflow during implicit conversion
  arm64: head: avoid over-mapping in map_memory
  soc: aspeed: p2a-ctrl: Fix boundary check for mmap
  soc: aspeed: lpc-ctrl: Fix boundary check for mmap
  soc: qcom: aoss: Fix the out of bound usage of cooling_devs
  pinctrl: ingenic: Fix incorrect pull up/down info
  pinctrl: stmfx: Fix hazardous u8[] to unsigned long cast
  tools/thermal/tmon: Add cross compiling support
  9p/xen: Fix end of loop tests for list_for_each_entry
  include/linux/list.h: add a macro to test if entry is pointing to the head
  xen: fix setting of max_pfn in shared_info
  powerpc/perf/hv-gpci: Fix counter value parsing
  PCI/MSI: Skip masking MSI-X on Xen PV
  blk-zoned: allow BLKREPORTZONE without CAP_SYS_ADMIN
  blk-zoned: allow zone management send operations without CAP_SYS_ADMIN
  btrfs: reset replace target device to allocation state on close
  btrfs: wake up async_delalloc_pages waiters after submit
  rtc: tps65910: Correct driver module alias

 Conflicts:
	Documentation/devicetree/bindings
	Documentation/devicetree/bindings/arm/tegra.yaml
	Documentation/devicetree/bindings/mtd/gpmc-nand.txt
	Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
	kernel/sched/cpufreq_schedutil.c

Change-Id: Id17c4366cdc6854cd23fba0f41d335b09fc6100e
Signed-off-by: Srinivasarao Pathipati <quic_spathi@quicinc.com>
2022-02-07 22:29:21 +05:30

1867 lines
49 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/mm/memory_hotplug.c
*
* Copyright (C)
*/
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/sched/signal.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/pagevec.h>
#include <linux/writeback.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/memory.h>
#include <linux/memremap.h>
#include <linux/memory_hotplug.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/migrate.h>
#include <linux/page-isolation.h>
#include <linux/pfn.h>
#include <linux/suspend.h>
#include <linux/mm_inline.h>
#include <linux/firmware-map.h>
#include <linux/stop_machine.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
#include <linux/compaction.h>
#include <linux/rmap.h>
#include <asm/tlbflush.h>
#include "internal.h"
#include "shuffle.h"
/*
* online_page_callback contains pointer to current page onlining function.
* Initially it is generic_online_page(). If it is required it could be
* changed by calling set_online_page_callback() for callback registration
* and restore_online_page_callback() for generic callback restore.
*/
static void generic_online_page(struct page *page, unsigned int order);
static online_page_callback_t online_page_callback = generic_online_page;
static DEFINE_MUTEX(online_page_callback_lock);
DEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
void get_online_mems(void)
{
percpu_down_read(&mem_hotplug_lock);
}
void put_online_mems(void)
{
percpu_up_read(&mem_hotplug_lock);
}
bool movable_node_enabled = IS_ENABLED(CONFIG_MEMORY_HOTPLUG_MOVABLE_NODE);
#ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
bool memhp_auto_online;
#else
bool memhp_auto_online = true;
#endif
EXPORT_SYMBOL_GPL(memhp_auto_online);
static int __init setup_memhp_default_state(char *str)
{
if (!strcmp(str, "online"))
memhp_auto_online = true;
else if (!strcmp(str, "offline"))
memhp_auto_online = false;
return 1;
}
__setup("memhp_default_state=", setup_memhp_default_state);
void mem_hotplug_begin(void)
{
cpus_read_lock();
percpu_down_write(&mem_hotplug_lock);
}
void mem_hotplug_done(void)
{
percpu_up_write(&mem_hotplug_lock);
cpus_read_unlock();
}
u64 max_mem_size = U64_MAX;
/* add this memory to iomem resource */
static struct resource *register_memory_resource(u64 start, u64 size)
{
struct resource *res;
unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
char *resource_name = "System RAM";
if (start + size > max_mem_size)
return ERR_PTR(-E2BIG);
/*
* Request ownership of the new memory range. This might be
* a child of an existing resource that was present but
* not marked as busy.
*/
res = __request_region(&iomem_resource, start, size,
resource_name, flags);
if (!res) {
pr_debug("Unable to reserve System RAM region: %016llx->%016llx\n",
start, start + size);
return ERR_PTR(-EEXIST);
}
return res;
}
static void release_memory_resource(struct resource *res)
{
if (!res)
return;
release_resource(res);
kfree(res);
}
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
void get_page_bootmem(unsigned long info, struct page *page,
unsigned long type)
{
page->freelist = (void *)type;
SetPagePrivate(page);
set_page_private(page, info);
page_ref_inc(page);
}
void put_page_bootmem(struct page *page)
{
unsigned long type;
type = (unsigned long) page->freelist;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
if (page_ref_dec_return(page) == 1) {
page->freelist = NULL;
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
free_reserved_page(page);
}
}
#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
#ifndef CONFIG_SPARSEMEM_VMEMMAP
static void register_page_bootmem_info_section(unsigned long start_pfn)
{
unsigned long mapsize, section_nr, i;
struct mem_section *ms;
struct page *page, *memmap;
struct mem_section_usage *usage;
section_nr = pfn_to_section_nr(start_pfn);
ms = __nr_to_section(section_nr);
/* Get section's memmap address */
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
/*
* Get page for the memmap's phys address
* XXX: need more consideration for sparse_vmemmap...
*/
page = virt_to_page(memmap);
mapsize = sizeof(struct page) * PAGES_PER_SECTION;
mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
/* remember memmap's page */
for (i = 0; i < mapsize; i++, page++)
get_page_bootmem(section_nr, page, SECTION_INFO);
usage = ms->usage;
page = virt_to_page(usage);
mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT;
for (i = 0; i < mapsize; i++, page++)
get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
}
#else /* CONFIG_SPARSEMEM_VMEMMAP */
static void register_page_bootmem_info_section(unsigned long start_pfn)
{
unsigned long mapsize, section_nr, i;
struct mem_section *ms;
struct page *page, *memmap;
struct mem_section_usage *usage;
section_nr = pfn_to_section_nr(start_pfn);
ms = __nr_to_section(section_nr);
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
usage = ms->usage;
page = virt_to_page(usage);
mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT;
for (i = 0; i < mapsize; i++, page++)
get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
}
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
{
unsigned long i, pfn, end_pfn, nr_pages;
int node = pgdat->node_id;
struct page *page;
nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
page = virt_to_page(pgdat);
for (i = 0; i < nr_pages; i++, page++)
get_page_bootmem(node, page, NODE_INFO);
pfn = pgdat->node_start_pfn;
end_pfn = pgdat_end_pfn(pgdat);
/* register section info */
for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
/*
* Some platforms can assign the same pfn to multiple nodes - on
* node0 as well as nodeN. To avoid registering a pfn against
* multiple nodes we check that this pfn does not already
* reside in some other nodes.
*/
if (pfn_valid(pfn) && (early_pfn_to_nid(pfn) == node))
register_page_bootmem_info_section(pfn);
}
}
#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
static int check_pfn_span(unsigned long pfn, unsigned long nr_pages,
const char *reason)
{
/*
* Disallow all operations smaller than a sub-section and only
* allow operations smaller than a section for
* SPARSEMEM_VMEMMAP. Note that check_hotplug_memory_range()
* enforces a larger memory_block_size_bytes() granularity for
* memory that will be marked online, so this check should only
* fire for direct arch_{add,remove}_memory() users outside of
* add_memory_resource().
*/
unsigned long min_align;
if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
min_align = PAGES_PER_SUBSECTION;
else
min_align = PAGES_PER_SECTION;
if (!IS_ALIGNED(pfn, min_align)
|| !IS_ALIGNED(nr_pages, min_align)) {
WARN(1, "Misaligned __%s_pages start: %#lx end: #%lx\n",
reason, pfn, pfn + nr_pages - 1);
return -EINVAL;
}
return 0;
}
/*
* Reasonably generic function for adding memory. It is
* expected that archs that support memory hotplug will
* call this function after deciding the zone to which to
* add the new pages.
*/
int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
struct mhp_restrictions *restrictions)
{
int err;
unsigned long nr, start_sec, end_sec;
struct vmem_altmap *altmap = restrictions->altmap;
if (altmap) {
/*
* Validate altmap is within bounds of the total request
*/
if (altmap->base_pfn != pfn
|| vmem_altmap_offset(altmap) > nr_pages) {
pr_warn_once("memory add fail, invalid altmap\n");
return -EINVAL;
}
altmap->alloc = 0;
}
err = check_pfn_span(pfn, nr_pages, "add");
if (err)
return err;
start_sec = pfn_to_section_nr(pfn);
end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
for (nr = start_sec; nr <= end_sec; nr++) {
unsigned long pfns;
pfns = min(nr_pages, PAGES_PER_SECTION
- (pfn & ~PAGE_SECTION_MASK));
err = sparse_add_section(nid, pfn, pfns, altmap);
if (err)
break;
pfn += pfns;
nr_pages -= pfns;
cond_resched();
}
vmemmap_populate_print_last();
return err;
}
/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
unsigned long start_pfn,
unsigned long end_pfn)
{
for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SUBSECTION) {
if (unlikely(!pfn_to_online_page(start_pfn)))
continue;
if (unlikely(pfn_to_nid(start_pfn) != nid))
continue;
if (zone && zone != page_zone(pfn_to_page(start_pfn)))
continue;
return start_pfn;
}
return 0;
}
/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
unsigned long start_pfn,
unsigned long end_pfn)
{
unsigned long pfn;
/* pfn is the end pfn of a memory section. */
pfn = end_pfn - 1;
for (; pfn >= start_pfn; pfn -= PAGES_PER_SUBSECTION) {
if (unlikely(!pfn_to_online_page(pfn)))
continue;
if (unlikely(pfn_to_nid(pfn) != nid))
continue;
if (zone && zone != page_zone(pfn_to_page(pfn)))
continue;
return pfn;
}
return 0;
}
static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
unsigned long end_pfn)
{
unsigned long zone_start_pfn = zone->zone_start_pfn;
unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
unsigned long zone_end_pfn = z;
unsigned long pfn;
int nid = zone_to_nid(zone);
zone_span_writelock(zone);
if (zone_start_pfn == start_pfn) {
/*
* If the section is smallest section in the zone, it need
* shrink zone->zone_start_pfn and zone->zone_spanned_pages.
* In this case, we find second smallest valid mem_section
* for shrinking zone.
*/
pfn = find_smallest_section_pfn(nid, zone, end_pfn,
zone_end_pfn);
if (pfn) {
zone->zone_start_pfn = pfn;
zone->spanned_pages = zone_end_pfn - pfn;
}
} else if (zone_end_pfn == end_pfn) {
/*
* If the section is biggest section in the zone, it need
* shrink zone->spanned_pages.
* In this case, we find second biggest valid mem_section for
* shrinking zone.
*/
pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
start_pfn);
if (pfn)
zone->spanned_pages = pfn - zone_start_pfn + 1;
}
/*
* The section is not biggest or smallest mem_section in the zone, it
* only creates a hole in the zone. So in this case, we need not
* change the zone. But perhaps, the zone has only hole data. Thus
* it check the zone has only hole or not.
*/
pfn = zone_start_pfn;
for (; pfn < zone_end_pfn; pfn += PAGES_PER_SUBSECTION) {
if (unlikely(!pfn_to_online_page(pfn)))
continue;
if (page_zone(pfn_to_page(pfn)) != zone)
continue;
/* Skip range to be removed */
if (pfn >= start_pfn && pfn < end_pfn)
continue;
/* If we find valid section, we have nothing to do */
zone_span_writeunlock(zone);
return;
}
/* The zone has no valid section */
zone->zone_start_pfn = 0;
zone->spanned_pages = 0;
zone_span_writeunlock(zone);
}
static void update_pgdat_span(struct pglist_data *pgdat)
{
unsigned long node_start_pfn = 0, node_end_pfn = 0;
struct zone *zone;
for (zone = pgdat->node_zones;
zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
unsigned long zone_end_pfn = zone->zone_start_pfn +
zone->spanned_pages;
/* No need to lock the zones, they can't change. */
if (!zone->spanned_pages)
continue;
if (!node_end_pfn) {
node_start_pfn = zone->zone_start_pfn;
node_end_pfn = zone_end_pfn;
continue;
}
if (zone_end_pfn > node_end_pfn)
node_end_pfn = zone_end_pfn;
if (zone->zone_start_pfn < node_start_pfn)
node_start_pfn = zone->zone_start_pfn;
}
pgdat->node_start_pfn = node_start_pfn;
pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
}
void __ref remove_pfn_range_from_zone(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages)
{
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long flags;
#ifdef CONFIG_ZONE_DEVICE
/*
* Zone shrinking code cannot properly deal with ZONE_DEVICE. So
* we will not try to shrink the zones - which is okay as
* set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
*/
if (zone_idx(zone) == ZONE_DEVICE)
return;
#endif
clear_zone_contiguous(zone);
pgdat_resize_lock(zone->zone_pgdat, &flags);
shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
update_pgdat_span(pgdat);
pgdat_resize_unlock(zone->zone_pgdat, &flags);
set_zone_contiguous(zone);
}
static void __remove_section(unsigned long pfn, unsigned long nr_pages,
unsigned long map_offset,
struct vmem_altmap *altmap)
{
struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
if (WARN_ON_ONCE(!valid_section(ms)))
return;
sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
}
/**
* __remove_pages() - remove sections of pages
* @pfn: starting pageframe (must be aligned to start of a section)
* @nr_pages: number of pages to remove (must be multiple of section size)
* @altmap: alternative device page map or %NULL if default memmap is used
*
* Generic helper function to remove section mappings and sysfs entries
* for the section of the memory we are removing. Caller needs to make
* sure that pages are marked reserved and zones are adjust properly by
* calling offline_pages().
*/
void __remove_pages(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
unsigned long map_offset = 0;
unsigned long nr, start_sec, end_sec;
map_offset = vmem_altmap_offset(altmap);
if (check_pfn_span(pfn, nr_pages, "remove"))
return;
start_sec = pfn_to_section_nr(pfn);
end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
for (nr = start_sec; nr <= end_sec; nr++) {
unsigned long pfns;
cond_resched();
pfns = min(nr_pages, PAGES_PER_SECTION
- (pfn & ~PAGE_SECTION_MASK));
__remove_section(pfn, pfns, map_offset, altmap);
pfn += pfns;
nr_pages -= pfns;
map_offset = 0;
}
}
int set_online_page_callback(online_page_callback_t callback)
{
int rc = -EINVAL;
get_online_mems();
mutex_lock(&online_page_callback_lock);
if (online_page_callback == generic_online_page) {
online_page_callback = callback;
rc = 0;
}
mutex_unlock(&online_page_callback_lock);
put_online_mems();
return rc;
}
EXPORT_SYMBOL_GPL(set_online_page_callback);
int restore_online_page_callback(online_page_callback_t callback)
{
int rc = -EINVAL;
get_online_mems();
mutex_lock(&online_page_callback_lock);
if (online_page_callback == callback) {
online_page_callback = generic_online_page;
rc = 0;
}
mutex_unlock(&online_page_callback_lock);
put_online_mems();
return rc;
}
EXPORT_SYMBOL_GPL(restore_online_page_callback);
void __online_page_set_limits(struct page *page)
{
}
EXPORT_SYMBOL_GPL(__online_page_set_limits);
void __online_page_increment_counters(struct page *page)
{
adjust_managed_page_count(page, 1);
}
EXPORT_SYMBOL_GPL(__online_page_increment_counters);
void __online_page_free(struct page *page)
{
__free_reserved_page(page);
}
EXPORT_SYMBOL_GPL(__online_page_free);
static void generic_online_page(struct page *page, unsigned int order)
{
/*
* Freeing the page with debug_pagealloc enabled will try to unmap it,
* so we should map it first. This is better than introducing a special
* case in page freeing fast path.
*/
if (debug_pagealloc_enabled_static())
kernel_map_pages(page, 1 << order, 1);
__free_pages_core(page, order);
totalram_pages_add(1UL << order);
#ifdef CONFIG_HIGHMEM
if (PageHighMem(page))
totalhigh_pages_add(1UL << order);
#endif
}
static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg)
{
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn;
int order;
/*
* Online the pages. The callback might decide to keep some pages
* PG_reserved (to add them to the buddy later), but we still account
* them as being online/belonging to this zone ("present").
*/
for (pfn = start_pfn; pfn < end_pfn; pfn += 1ul << order) {
order = min(MAX_ORDER - 1, get_order(PFN_PHYS(end_pfn - pfn)));
/* __free_pages_core() wants pfns to be aligned to the order */
if (WARN_ON_ONCE(!IS_ALIGNED(pfn, 1ul << order)))
order = 0;
(*online_page_callback)(pfn_to_page(pfn), order);
}
/* mark all involved sections as online */
online_mem_sections(start_pfn, end_pfn);
*(unsigned long *)arg += nr_pages;
return 0;
}
/* check which state of node_states will be changed when online memory */
static void node_states_check_changes_online(unsigned long nr_pages,
struct zone *zone, struct memory_notify *arg)
{
int nid = zone_to_nid(zone);
arg->status_change_nid = NUMA_NO_NODE;
arg->status_change_nid_normal = NUMA_NO_NODE;
arg->status_change_nid_high = NUMA_NO_NODE;
if (!node_state(nid, N_MEMORY))
arg->status_change_nid = nid;
if (zone_idx(zone) <= ZONE_NORMAL && !node_state(nid, N_NORMAL_MEMORY))
arg->status_change_nid_normal = nid;
#ifdef CONFIG_HIGHMEM
if (zone_idx(zone) <= ZONE_HIGHMEM && !node_state(nid, N_HIGH_MEMORY))
arg->status_change_nid_high = nid;
#endif
}
static void node_states_set_node(int node, struct memory_notify *arg)
{
if (arg->status_change_nid_normal >= 0)
node_set_state(node, N_NORMAL_MEMORY);
if (arg->status_change_nid_high >= 0)
node_set_state(node, N_HIGH_MEMORY);
if (arg->status_change_nid >= 0)
node_set_state(node, N_MEMORY);
}
static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages)
{
unsigned long old_end_pfn = zone_end_pfn(zone);
if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
zone->zone_start_pfn = start_pfn;
zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
}
static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
unsigned long nr_pages)
{
unsigned long old_end_pfn = pgdat_end_pfn(pgdat);
if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
pgdat->node_start_pfn = start_pfn;
pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
}
/*
* Associate the pfn range with the given zone, initializing the memmaps
* and resizing the pgdat/zone data to span the added pages. After this
* call, all affected pages are PG_reserved.
*/
void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int nid = pgdat->node_id;
unsigned long flags;
clear_zone_contiguous(zone);
/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
pgdat_resize_lock(pgdat, &flags);
zone_span_writelock(zone);
if (zone_is_empty(zone))
init_currently_empty_zone(zone, start_pfn, nr_pages);
resize_zone_range(zone, start_pfn, nr_pages);
zone_span_writeunlock(zone);
resize_pgdat_range(pgdat, start_pfn, nr_pages);
pgdat_resize_unlock(pgdat, &flags);
/*
* TODO now we have a visible range of pages which are not associated
* with their zone properly. Not nice but set_pfnblock_flags_mask
* expects the zone spans the pfn range. All the pages in the range
* are reserved so nobody should be touching them so we should be safe
*/
memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
MEMINIT_HOTPLUG, altmap);
set_zone_contiguous(zone);
}
/*
* Returns a default kernel memory zone for the given pfn range.
* If no kernel zone covers this pfn range it will automatically go
* to the ZONE_NORMAL.
*/
static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn,
unsigned long nr_pages)
{
struct pglist_data *pgdat = NODE_DATA(nid);
int zid;
for (zid = 0; zid <= ZONE_NORMAL; zid++) {
struct zone *zone = &pgdat->node_zones[zid];
if (zone_intersects(zone, start_pfn, nr_pages))
return zone;
}
return &pgdat->node_zones[ZONE_NORMAL];
}
static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
unsigned long nr_pages)
{
struct zone *kernel_zone = default_kernel_zone_for_pfn(nid, start_pfn,
nr_pages);
struct zone *movable_zone = &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
bool in_kernel = zone_intersects(kernel_zone, start_pfn, nr_pages);
bool in_movable = zone_intersects(movable_zone, start_pfn, nr_pages);
/*
* We inherit the existing zone in a simple case where zones do not
* overlap in the given range
*/
if (in_kernel ^ in_movable)
return (in_kernel) ? kernel_zone : movable_zone;
/*
* If the range doesn't belong to any zone or two zones overlap in the
* given range then we use movable zone only if movable_node is
* enabled because we always online to a kernel zone by default.
*/
return movable_node_enabled ? movable_zone : kernel_zone;
}
struct zone *zone_for_pfn_range(int online_type, int nid,
unsigned long start_pfn, unsigned long nr_pages)
{
if (online_type == MMOP_ONLINE_KERNEL)
return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
if (online_type == MMOP_ONLINE_MOVABLE)
return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
return default_zone_for_pfn(nid, start_pfn, nr_pages);
}
int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
{
unsigned long flags;
unsigned long onlined_pages = 0;
struct zone *zone;
int need_zonelists_rebuild = 0;
int nid;
int ret;
struct memory_notify arg;
struct memory_block *mem;
mem_hotplug_begin();
/*
* We can't use pfn_to_nid() because nid might be stored in struct page
* which is not yet initialized. Instead, we find nid from memory block.
*/
mem = find_memory_block(__pfn_to_section(pfn));
nid = mem->nid;
put_device(&mem->dev);
/* associate pfn range with the zone */
zone = zone_for_pfn_range(online_type, nid, pfn, nr_pages);
move_pfn_range_to_zone(zone, pfn, nr_pages, NULL);
arg.start_pfn = pfn;
arg.nr_pages = nr_pages;
node_states_check_changes_online(nr_pages, zone, &arg);
ret = memory_notify(MEM_GOING_ONLINE, &arg);
ret = notifier_to_errno(ret);
if (ret)
goto failed_addition;
/*
* If this zone is not populated, then it is not in zonelist.
* This means the page allocator ignores this zone.
* So, zonelist must be updated after online.
*/
if (!populated_zone(zone)) {
need_zonelists_rebuild = 1;
setup_zone_pageset(zone);
}
ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
online_pages_range);
if (ret) {
/* not a single memory resource was applicable */
if (need_zonelists_rebuild)
zone_pcp_reset(zone);
goto failed_addition;
}
zone->present_pages += onlined_pages;
pgdat_resize_lock(zone->zone_pgdat, &flags);
zone->zone_pgdat->node_present_pages += onlined_pages;
pgdat_resize_unlock(zone->zone_pgdat, &flags);
shuffle_zone(zone);
node_states_set_node(nid, &arg);
if (need_zonelists_rebuild)
build_all_zonelists(NULL);
zone_pcp_update(zone);
init_per_zone_wmark_min();
kswapd_run(nid);
kcompactd_run(nid);
vm_total_pages = nr_free_pagecache_pages();
writeback_set_ratelimit();
memory_notify(MEM_ONLINE, &arg);
mem_hotplug_done();
return 0;
failed_addition:
pr_debug("online_pages [mem %#010llx-%#010llx] failed\n",
(unsigned long long) pfn << PAGE_SHIFT,
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_ONLINE, &arg);
remove_pfn_range_from_zone(zone, pfn, nr_pages);
mem_hotplug_done();
return ret;
}
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
static void reset_node_present_pages(pg_data_t *pgdat)
{
struct zone *z;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
z->present_pages = 0;
pgdat->node_present_pages = 0;
}
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
{
struct pglist_data *pgdat;
unsigned long start_pfn = PFN_DOWN(start);
pgdat = NODE_DATA(nid);
if (!pgdat) {
pgdat = arch_alloc_nodedata(nid);
if (!pgdat)
return NULL;
pgdat->per_cpu_nodestats =
alloc_percpu(struct per_cpu_nodestat);
arch_refresh_nodedata(nid, pgdat);
} else {
int cpu;
/*
* Reset the nr_zones, order and classzone_idx before reuse.
* Note that kswapd will init kswapd_classzone_idx properly
* when it starts in the near future.
*/
pgdat->nr_zones = 0;
pgdat->kswapd_order = 0;
pgdat->kswapd_classzone_idx = 0;
for_each_online_cpu(cpu) {
struct per_cpu_nodestat *p;
p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
memset(p, 0, sizeof(*p));
}
}
/* we can use NODE_DATA(nid) from here */
pgdat->node_id = nid;
pgdat->node_start_pfn = start_pfn;
/* init node's zones as empty zones, we don't have any present pages.*/
free_area_init_core_hotplug(nid);
/*
* The node we allocated has no zone fallback lists. For avoiding
* to access not-initialized zonelist, build here.
*/
build_all_zonelists(pgdat);
/*
* When memory is hot-added, all the memory is in offline state. So
* clear all zones' present_pages because they will be updated in
* online_pages() and offline_pages().
*/
reset_node_managed_pages(pgdat);
reset_node_present_pages(pgdat);
return pgdat;
}
static void rollback_node_hotadd(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
arch_refresh_nodedata(nid, NULL);
free_percpu(pgdat->per_cpu_nodestats);
arch_free_nodedata(pgdat);
}
/**
* try_online_node - online a node if offlined
* @nid: the node ID
* @start: start addr of the node
* @set_node_online: Whether we want to online the node
* called by cpu_up() to online a node without onlined memory.
*
* Returns:
* 1 -> a new node has been allocated
* 0 -> the node is already online
* -ENOMEM -> the node could not be allocated
*/
static int __try_online_node(int nid, u64 start, bool set_node_online)
{
pg_data_t *pgdat;
int ret = 1;
if (node_online(nid))
return 0;
pgdat = hotadd_new_pgdat(nid, start);
if (!pgdat) {
pr_err("Cannot online node %d due to NULL pgdat\n", nid);
ret = -ENOMEM;
goto out;
}
if (set_node_online) {
node_set_online(nid);
ret = register_one_node(nid);
BUG_ON(ret);
}
out:
return ret;
}
/*
* Users of this function always want to online/register the node
*/
int try_online_node(int nid)
{
int ret;
mem_hotplug_begin();
ret = __try_online_node(nid, 0, true);
mem_hotplug_done();
return ret;
}
static int online_memory_one_block(struct memory_block *mem, void *arg)
{
bool *onlined_block = (bool *)arg;
int ret;
if (*onlined_block || !is_memblock_offlined(mem))
return 0;
ret = device_online(&mem->dev);
if (!ret)
*onlined_block = true;
return 0;
}
bool try_online_one_block(int nid)
{
bool onlined_block = false;
if (!trylock_device_hotplug())
return false;
for_each_memory_block(&onlined_block, online_memory_one_block);
unlock_device_hotplug();
return onlined_block;
}
static int check_hotplug_memory_range(u64 start, u64 size)
{
/* memory range must be block size aligned */
if (!size || !IS_ALIGNED(start, memory_block_size_bytes()) ||
!IS_ALIGNED(size, memory_block_size_bytes())) {
pr_err("Block size [%#lx] unaligned hotplug range: start %#llx, size %#llx",
memory_block_size_bytes(), start, size);
return -EINVAL;
}
return 0;
}
static int online_memory_block(struct memory_block *mem, void *arg)
{
return device_online(&mem->dev);
}
/*
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
* and online/offline operations (triggered e.g. by sysfs).
*
* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
*/
int __ref add_memory_resource(int nid, struct resource *res)
{
struct mhp_restrictions restrictions = {};
u64 start, size;
bool new_node = false;
int ret;
start = res->start;
size = resource_size(res);
ret = check_hotplug_memory_range(start, size);
if (ret)
return ret;
mem_hotplug_begin();
/*
* Add new range to memblock so that when hotadd_new_pgdat() is called
* to allocate new pgdat, get_pfn_range_for_nid() will be able to find
* this new range and calculate total pages correctly. The range will
* be removed at hot-remove time.
*/
memblock_add_node(start, size, nid);
ret = __try_online_node(nid, start, false);
if (ret < 0)
goto error;
new_node = ret;
/* call arch's memory hotadd */
ret = arch_add_memory(nid, start, size, &restrictions);
if (ret < 0)
goto error;
/* create memory block devices after memory was added */
ret = create_memory_block_devices(start, size);
if (ret) {
arch_remove_memory(nid, start, size, NULL);
goto error;
}
if (new_node) {
/* If sysfs file of new node can't be created, cpu on the node
* can't be hot-added. There is no rollback way now.
* So, check by BUG_ON() to catch it reluctantly..
* We online node here. We can't roll back from here.
*/
node_set_online(nid);
ret = __register_one_node(nid);
BUG_ON(ret);
}
/* link memory sections under this node.*/
ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
MEMINIT_HOTPLUG);
BUG_ON(ret);
/* create new memmap entry */
firmware_map_add_hotplug(start, start + size, "System RAM");
/* device_online() will take the lock when calling online_pages() */
mem_hotplug_done();
/* online pages if requested */
if (memhp_auto_online)
walk_memory_blocks(start, size, NULL, online_memory_block);
return ret;
error:
/* rollback pgdat allocation and others */
if (new_node)
rollback_node_hotadd(nid);
memblock_remove(start, size);
mem_hotplug_done();
return ret;
}
/* requires device_hotplug_lock, see add_memory_resource() */
int __ref __add_memory(int nid, u64 start, u64 size)
{
struct resource *res;
int ret;
res = register_memory_resource(start, size);
if (IS_ERR(res))
return PTR_ERR(res);
ret = add_memory_resource(nid, res);
if (ret < 0)
release_memory_resource(res);
return ret;
}
int add_memory(int nid, u64 start, u64 size)
{
int rc;
lock_device_hotplug();
rc = __add_memory(nid, start, size);
unlock_device_hotplug();
return rc;
}
EXPORT_SYMBOL_GPL(add_memory);
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
* A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
* set and the size of the free page is given by page_order(). Using this,
* the function determines if the pageblock contains only free pages.
* Due to buddy contraints, a free page at least the size of a pageblock will
* be located at the start of the pageblock
*/
static inline int pageblock_free(struct page *page)
{
return PageBuddy(page) && page_order(page) >= pageblock_order;
}
/* Return the pfn of the start of the next active pageblock after a given pfn */
static unsigned long next_active_pageblock(unsigned long pfn)
{
struct page *page = pfn_to_page(pfn);
/* Ensure the starting page is pageblock-aligned */
BUG_ON(pfn & (pageblock_nr_pages - 1));
/* If the entire pageblock is free, move to the end of free page */
if (pageblock_free(page)) {
int order;
/* be careful. we don't have locks, page_order can be changed.*/
order = page_order(page);
if ((order < MAX_ORDER) && (order >= pageblock_order))
return pfn + (1 << order);
}
return pfn + pageblock_nr_pages;
}
static bool is_pageblock_removable_nolock(unsigned long pfn)
{
struct page *page = pfn_to_page(pfn);
struct zone *zone;
/*
* We have to be careful here because we are iterating over memory
* sections which are not zone aware so we might end up outside of
* the zone but still within the section.
* We have to take care about the node as well. If the node is offline
* its NODE_DATA will be NULL - see page_zone.
*/
if (!node_online(page_to_nid(page)))
return false;
zone = page_zone(page);
pfn = page_to_pfn(page);
if (!zone_spans_pfn(zone, pfn))
return false;
return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, SKIP_HWPOISON);
}
/* Checks if this range of memory is likely to be hot-removable. */
bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
{
unsigned long end_pfn, pfn;
end_pfn = min(start_pfn + nr_pages,
zone_end_pfn(page_zone(pfn_to_page(start_pfn))));
/* Check the starting page of each pageblock within the range */
for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) {
if (!is_pageblock_removable_nolock(pfn))
return false;
cond_resched();
}
/* All pageblocks in the memory block are likely to be hot-removable */
return true;
}
/*
* Confirm all pages in a range [start, end) belong to the same zone.
* When true, return its valid [start, end).
*/
int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
unsigned long *valid_start, unsigned long *valid_end)
{
unsigned long pfn, sec_end_pfn;
unsigned long start, end;
struct zone *zone = NULL;
struct page *page;
int i;
for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
pfn < end_pfn;
pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
/* Make sure the memory section is present first */
if (!present_section_nr(pfn_to_section_nr(pfn)))
continue;
for (; pfn < sec_end_pfn && pfn < end_pfn;
pfn += MAX_ORDER_NR_PAGES) {
i = 0;
/* This is just a CONFIG_HOLES_IN_ZONE check.*/
while ((i < MAX_ORDER_NR_PAGES) &&
!pfn_valid_within(pfn + i))
i++;
if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
continue;
/* Check if we got outside of the zone */
if (zone && !zone_spans_pfn(zone, pfn + i))
return 0;
page = pfn_to_page(pfn + i);
if (zone && page_zone(page) != zone)
return 0;
if (!zone)
start = pfn + i;
zone = page_zone(page);
end = pfn + MAX_ORDER_NR_PAGES;
}
}
if (zone) {
*valid_start = start;
*valid_end = min(end, end_pfn);
return 1;
} else {
return 0;
}
}
/*
* Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
* non-lru movable pages and hugepages). We scan pfn because it's much
* easier than scanning over linked list. This function returns the pfn
* of the first found movable page if it's found, otherwise 0.
*/
static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
{
unsigned long pfn;
for (pfn = start; pfn < end; pfn++) {
struct page *page, *head;
unsigned long skip;
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
if (PageLRU(page))
return pfn;
if (__PageMovable(page))
return pfn;
if (!PageHuge(page))
continue;
head = compound_head(page);
if (page_huge_active(head))
return pfn;
skip = compound_nr(head) - (page - head);
pfn += skip - 1;
}
return 0;
}
static struct page *new_node_page(struct page *page, unsigned long private)
{
int nid = page_to_nid(page);
nodemask_t nmask = node_states[N_MEMORY];
/*
* try to allocate from a different node but reuse this node if there
* are no other online nodes to be used (e.g. we are offlining a part
* of the only existing node)
*/
node_clear(nid, nmask);
if (nodes_empty(nmask))
node_set(nid, nmask);
return new_page_nodemask(page, nid, &nmask);
}
static int
do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
struct page *page;
int ret = 0;
LIST_HEAD(source);
static DEFINE_RATELIMIT_STATE(migrate_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
if (PageHuge(page)) {
struct page *head = compound_head(page);
pfn = page_to_pfn(head) + compound_nr(head) - 1;
isolate_huge_page(head, &source);
continue;
} else if (PageTransHuge(page))
pfn = page_to_pfn(compound_head(page))
+ hpage_nr_pages(page) - 1;
/*
* HWPoison pages have elevated reference counts so the migration would
* fail on them. It also doesn't make any sense to migrate them in the
* first place. Still try to unmap such a page in case it is still mapped
* (e.g. current hwpoison implementation doesn't unmap KSM pages but keep
* the unmap as the catch all safety net).
*/
if (PageHWPoison(page)) {
if (WARN_ON(PageLRU(page)))
isolate_lru_page(page);
if (page_mapped(page))
try_to_unmap(page,
TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS,
NULL);
continue;
}
if (!get_page_unless_zero(page))
continue;
/*
* We can skip free pages. And we can deal with pages on
* LRU and non-lru movable pages.
*/
if (PageLRU(page))
ret = isolate_lru_page(page);
else
ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
if (!ret) { /* Success */
list_add_tail(&page->lru, &source);
if (!__PageMovable(page))
inc_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
} else {
if (__ratelimit(&migrate_rs)) {
pr_warn("failed to isolate pfn %lx\n", pfn);
dump_page(page, "isolation failed");
}
}
put_page(page);
}
if (!list_empty(&source)) {
/* Allocate a new page from the nearest neighbor node */
ret = migrate_pages(&source, new_node_page, NULL, 0,
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
if (ret) {
list_for_each_entry(page, &source, lru) {
if (__ratelimit(&migrate_rs)) {
pr_warn("migrating pfn %lx failed ret:%d\n",
page_to_pfn(page), ret);
__dump_page(page, "migration failure");
#if defined(CONFIG_DEBUG_VM)
dump_page_owner(page);
#endif
}
}
putback_movable_pages(&source);
}
}
return ret;
}
/*
* remove from free_area[] and mark all as Reserved.
*/
static int
offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
void *data)
{
unsigned long *offlined_pages = (unsigned long *)data;
*offlined_pages += __offline_isolated_pages(start, start + nr_pages);
return 0;
}
/*
* Check all pages in range, recoreded as memory resource, are isolated.
*/
static int
check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
void *data)
{
return test_pages_isolated(start_pfn, start_pfn + nr_pages, true);
}
static int __init cmdline_parse_movable_node(char *p)
{
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
movable_node_enabled = true;
#else
pr_warn("movable_node parameter depends on CONFIG_HAVE_MEMBLOCK_NODE_MAP to work properly\n");
#endif
return 0;
}
early_param("movable_node", cmdline_parse_movable_node);
/* check which state of node_states will be changed when offline memory */
static void node_states_check_changes_offline(unsigned long nr_pages,
struct zone *zone, struct memory_notify *arg)
{
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long present_pages = 0;
enum zone_type zt;
arg->status_change_nid = NUMA_NO_NODE;
arg->status_change_nid_normal = NUMA_NO_NODE;
arg->status_change_nid_high = NUMA_NO_NODE;
/*
* Check whether node_states[N_NORMAL_MEMORY] will be changed.
* If the memory to be offline is within the range
* [0..ZONE_NORMAL], and it is the last present memory there,
* the zones in that range will become empty after the offlining,
* thus we can determine that we need to clear the node from
* node_states[N_NORMAL_MEMORY].
*/
for (zt = 0; zt <= ZONE_NORMAL; zt++)
present_pages += pgdat->node_zones[zt].present_pages;
if (zone_idx(zone) <= ZONE_NORMAL && nr_pages >= present_pages)
arg->status_change_nid_normal = zone_to_nid(zone);
#ifdef CONFIG_HIGHMEM
/*
* node_states[N_HIGH_MEMORY] contains nodes which
* have normal memory or high memory.
* Here we add the present_pages belonging to ZONE_HIGHMEM.
* If the zone is within the range of [0..ZONE_HIGHMEM), and
* we determine that the zones in that range become empty,
* we need to clear the node for N_HIGH_MEMORY.
*/
present_pages += pgdat->node_zones[ZONE_HIGHMEM].present_pages;
if (zone_idx(zone) <= ZONE_HIGHMEM && nr_pages >= present_pages)
arg->status_change_nid_high = zone_to_nid(zone);
#endif
/*
* We have accounted the pages from [0..ZONE_NORMAL), and
* in case of CONFIG_HIGHMEM the pages from ZONE_HIGHMEM
* as well.
* Here we count the possible pages from ZONE_MOVABLE.
* If after having accounted all the pages, we see that the nr_pages
* to be offlined is over or equal to the accounted pages,
* we know that the node will become empty, and so, we can clear
* it for N_MEMORY as well.
*/
present_pages += pgdat->node_zones[ZONE_MOVABLE].present_pages;
if (nr_pages >= present_pages)
arg->status_change_nid = zone_to_nid(zone);
}
static void node_states_clear_node(int node, struct memory_notify *arg)
{
if (arg->status_change_nid_normal >= 0)
node_clear_state(node, N_NORMAL_MEMORY);
if (arg->status_change_nid_high >= 0)
node_clear_state(node, N_HIGH_MEMORY);
if (arg->status_change_nid >= 0)
node_clear_state(node, N_MEMORY);
}
static int __ref __offline_pages(unsigned long start_pfn,
unsigned long end_pfn)
{
unsigned long pfn, nr_pages;
unsigned long offlined_pages = 0;
int ret, node, nr_isolate_pageblock;
unsigned long flags;
unsigned long valid_start, valid_end;
struct zone *zone;
struct memory_notify arg;
char *reason;
mem_hotplug_begin();
/* This makes hotplug much easier...and readable.
we assume this for now. .*/
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
&valid_end)) {
ret = -EINVAL;
reason = "multizone range";
goto failed_removal;
}
zone = page_zone(pfn_to_page(valid_start));
node = zone_to_nid(zone);
nr_pages = end_pfn - start_pfn;
/* set above range as isolated */
ret = start_isolate_page_range(start_pfn, end_pfn,
MIGRATE_MOVABLE,
SKIP_HWPOISON | REPORT_FAILURE);
if (ret < 0) {
reason = "failure to isolate range";
goto failed_removal;
}
nr_isolate_pageblock = ret;
arg.start_pfn = start_pfn;
arg.nr_pages = nr_pages;
node_states_check_changes_offline(nr_pages, zone, &arg);
ret = memory_notify(MEM_GOING_OFFLINE, &arg);
ret = notifier_to_errno(ret);
if (ret) {
reason = "notifier failure";
goto failed_removal_isolated;
}
do {
for (pfn = start_pfn; pfn;) {
if (signal_pending(current)) {
ret = -EINTR;
reason = "signal backoff";
goto failed_removal_isolated;
}
cond_resched();
lru_add_drain_all();
pfn = scan_movable_pages(pfn, end_pfn);
if (pfn) {
/*
* TODO: fatal migration failures should bail
* out
*/
do_migrate_range(pfn, end_pfn);
}
}
/*
* Dissolve free hugepages in the memory block before doing
* offlining actually in order to make hugetlbfs's object
* counting consistent.
*/
ret = dissolve_free_huge_pages(start_pfn, end_pfn);
if (ret) {
reason = "failure to dissolve huge pages";
goto failed_removal_isolated;
}
/* check again */
ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
NULL, check_pages_isolated_cb);
/*
* per-cpu pages are drained in start_isolate_page_range, but if
* there are still pages that are not free, make sure that we
* drain again, because when we isolated range we might
* have raced with another thread that was adding pages to pcp
* list.
*
* Forward progress should be still guaranteed because
* pages on the pcp list can only belong to MOVABLE_ZONE
* because has_unmovable_pages explicitly checks for
* PageBuddy on freed pages on other zones.
*/
if (ret)
drain_all_pages(zone);
} while (ret);
/* Ok, all of our target is isolated.
We cannot do rollback at this point. */
walk_system_ram_range(start_pfn, end_pfn - start_pfn,
&offlined_pages, offline_isolated_pages_cb);
pr_info("Offlined Pages %ld\n", offlined_pages);
/*
* Onlining will reset pagetype flags and makes migrate type
* MOVABLE, so just need to decrease the number of isolated
* pageblocks zone counter here.
*/
spin_lock_irqsave(&zone->lock, flags);
zone->nr_isolate_pageblock -= nr_isolate_pageblock;
spin_unlock_irqrestore(&zone->lock, flags);
/* removal success */
adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
zone->present_pages -= offlined_pages;
pgdat_resize_lock(zone->zone_pgdat, &flags);
zone->zone_pgdat->node_present_pages -= offlined_pages;
pgdat_resize_unlock(zone->zone_pgdat, &flags);
init_per_zone_wmark_min();
if (!populated_zone(zone)) {
zone_pcp_reset(zone);
build_all_zonelists(NULL);
} else
zone_pcp_update(zone);
node_states_clear_node(node, &arg);
if (arg.status_change_nid >= 0) {
kswapd_stop(node);
kcompactd_stop(node);
}
vm_total_pages = nr_free_pagecache_pages();
writeback_set_ratelimit();
memory_notify(MEM_OFFLINE, &arg);
remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
mem_hotplug_done();
return 0;
failed_removal_isolated:
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
memory_notify(MEM_CANCEL_OFFLINE, &arg);
failed_removal:
pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
(unsigned long long) start_pfn << PAGE_SHIFT,
((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
reason);
/* pushback to free area */
mem_hotplug_done();
return ret;
}
int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
{
return __offline_pages(start_pfn, start_pfn + nr_pages);
}
static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
{
int ret = !is_memblock_offlined(mem);
if (unlikely(ret)) {
phys_addr_t beginpa, endpa;
beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
endpa = beginpa + memory_block_size_bytes() - 1;
pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
&beginpa, &endpa);
return -EBUSY;
}
return 0;
}
static int check_cpu_on_node(pg_data_t *pgdat)
{
int cpu;
for_each_present_cpu(cpu) {
if (cpu_to_node(cpu) == pgdat->node_id)
/*
* the cpu on this node isn't removed, and we can't
* offline this node.
*/
return -EBUSY;
}
return 0;
}
static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg)
{
int nid = *(int *)arg;
/*
* If a memory block belongs to multiple nodes, the stored nid is not
* reliable. However, such blocks are always online (e.g., cannot get
* offlined) and, therefore, are still spanned by the node.
*/
return mem->nid == nid ? -EEXIST : 0;
}
/**
* try_offline_node
* @nid: the node ID
*
* Offline a node if all memory sections and cpus of the node are removed.
*
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
* and online/offline operations before this call.
*/
void try_offline_node(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
int rc;
/*
* If the node still spans pages (especially ZONE_DEVICE), don't
* offline it. A node spans memory after move_pfn_range_to_zone(),
* e.g., after the memory block was onlined.
*/
if (pgdat->node_spanned_pages)
return;
/*
* Especially offline memory blocks might not be spanned by the
* node. They will get spanned by the node once they get onlined.
* However, they link to the node in sysfs and can get onlined later.
*/
rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb);
if (rc)
return;
if (check_cpu_on_node(pgdat))
return;
/*
* all memory/cpu of this node are removed, we can offline this
* node now.
*/
node_set_offline(nid);
unregister_one_node(nid);
}
EXPORT_SYMBOL(try_offline_node);
static void __release_memory_resource(resource_size_t start,
resource_size_t size)
{
int ret;
/*
* When removing memory in the same granularity as it was added,
* this function never fails. It might only fail if resources
* have to be adjusted or split. We'll ignore the error, as
* removing of memory cannot fail.
*/
ret = release_mem_region_adjustable(&iomem_resource, start, size);
if (ret) {
resource_size_t endres = start + size - 1;
pr_warn("Unable to release resource <%pa-%pa> (%d)\n",
&start, &endres, ret);
}
}
static int __ref try_remove_memory(int nid, u64 start, u64 size)
{
int rc = 0;
BUG_ON(check_hotplug_memory_range(start, size));
/*
* All memory blocks must be offlined before removing memory. Check
* whether all memory blocks in question are offline and return error
* if this is not the case.
*/
rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb);
if (rc)
return rc;
/* remove memmap entry */
firmware_map_remove(start, start + size, "System RAM");
memblock_free(start, size);
memblock_remove(start, size);
/*
* Memory block device removal under the device_hotplug_lock is
* a barrier against racing online attempts.
*/
remove_memory_block_devices(start, size);
mem_hotplug_begin();
arch_remove_memory(nid, start, size, NULL);
__release_memory_resource(start, size);
try_offline_node(nid);
mem_hotplug_done();
return 0;
}
/**
* remove_memory
* @nid: the node ID
* @start: physical address of the region to remove
* @size: size of the region to remove
*
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
* and online/offline operations before this call, as required by
* try_offline_node().
*/
void __remove_memory(int nid, u64 start, u64 size)
{
/*
* trigger BUG() if some memory is not offlined prior to calling this
* function
*/
if (try_remove_memory(nid, start, size))
BUG();
}
/*
* Remove memory if every memory block is offline, otherwise return -EBUSY is
* some memory is not offline
*/
int remove_memory(int nid, u64 start, u64 size)
{
int rc;
lock_device_hotplug();
rc = try_remove_memory(nid, start, size);
unlock_device_hotplug();
return rc;
}
EXPORT_SYMBOL_GPL(remove_memory);
#endif /* CONFIG_MEMORY_HOTREMOVE */