android_kernel_xiaomi_sm8350/mm/slab_common.c
Srinivasarao Pathipati 828abb51dd Merge android11-5.4.147+ (dc8c919) into msm-5.4
* refs/heads/tmp-dc8c919:
  Revert dwc3 changes
  ANDROID: GKI: Update FCNT KMI symbol list No new symbols added that are not already in the .xml file.
  ANDROID: GKI : Update symbols to symbol list
  BACKPORT: crypto: arch - conditionalize crypto api in arch glue for lib code
  BACKPORT: crypto: arch/lib - limit simd usage to 4k chunks
  UPSTREAM: crypto: arm/blake2s - fix for big endian
  ANDROID: gki_defconfig: enable BLAKE2b support
  BACKPORT: crypto: arm/blake2b - add NEON-accelerated BLAKE2b
  BACKPORT: crypto: blake2b - update file comment
  BACKPORT: crypto: blake2b - sync with blake2s implementation
  UPSTREAM: crypto: arm/blake2s - add ARM scalar optimized BLAKE2s
  UPSTREAM: crypto: blake2s - include <linux/bug.h> instead of <asm/bug.h>
  UPSTREAM: crypto: blake2s - adjust include guard naming
  UPSTREAM: crypto: blake2s - add comment for blake2s_state fields
  UPSTREAM: crypto: blake2s - optimize blake2s initialization
  BACKPORT: crypto: blake2s - share the "shash" API boilerplate code
  UPSTREAM: crypto: blake2s - move update and final logic to internal/blake2s.h
  UPSTREAM: crypto: blake2s - remove unneeded includes
  UPSTREAM: crypto: x86/blake2s - define shash_alg structs using macros
  UPSTREAM: crypto: blake2s - define shash_alg structs using macros
  UPSTREAM: crypto: lib/blake2s - Move selftest prototype into header file
  UPSTREAM: crypto: blake2b - Fix clang optimization for ARMv7-M
  UPSTREAM: crypto: blake2b - rename tfm context and _setkey callback
  UPSTREAM: crypto: blake2b - merge _update to api callback
  UPSTREAM: crypto: blake2b - open code set last block helper
  UPSTREAM: crypto: blake2b - delete unused structs or members
  UPSTREAM: crypto: blake2b - simplify key init
  UPSTREAM: crypto: blake2b - merge blake2 init to api callback
  UPSTREAM: crypto: blake2b - merge _final implementation to callback
  BACKPORT: crypto: testmgr - add test vectors for blake2b
  BACKPORT: crypto: blake2b - add blake2b generic implementation
  UPSTREAM: crypto: blake2s - x86_64 SIMD implementation
  UPSTREAM: crypto: blake2s - implement generic shash driver
  UPSTREAM: crypto: testmgr - add test cases for Blake2s
  UPSTREAM: crypto: blake2s - generic C library implementation and selftest
  UPSTREAM: crypto: lib - tidy up lib/crypto Kconfig and Makefile
  ANDROID: ion heap: init ion heaps in subsys_initcall
  UPSTREAM: ovl: simplify file splice
  ANDROID: ABI: update allowed list for galaxy
  ANDROID: ABI: Update allowed list for QCOM
  ANDROID: distribute Module.symvers
  UPSTREAM: usb: max-3421: Prevent corruption of freed memory
  ANDROID: ABI: Update allowed list for QCOM
  UPSTREAM: driver core: Prevent warning when removing a device link from unregistered consumer
  UPSTREAM: udp: properly flush normal packet at GRO time
  UPSTREAM: net/xfrm/compat: Copy xfrm_spdattr_type_t atributes
  UPSTREAM: f2fs: Advertise encrypted casefolding in sysfs
  UPSTREAM: loop: Fix missing discard support when using LOOP_CONFIGURE
  UPSTREAM: thermal/drivers/sprd: Add missing MODULE_DEVICE_TABLE
  UPSTREAM: nvmem: sprd: Fix an error message
  UPSTREAM: usb: musb: Fix an error message
  UPSTREAM: scsi: ufs: core: Cancel rpm_dev_flush_recheck_work during system suspend
  UPSTREAM: scsi: ufs: core: Do not put UFS power into LPM if link is broken
  UPSTREAM: selinux: add proper NULL termination to the secclass_map permissions
  UPSTREAM: of: property: fw_devlink: do not link ".*,nr-gpios"
  UPSTREAM: udp: never accept GSO_FRAGLIST packets
  UPSTREAM: udp: skip L4 aggregation for UDP tunnel packets
  UPSTREAM: xfrm/compat: Cleanup WARN()s that can be user-triggered
  UPSTREAM: pinctrl: sunxi: fix irq bank map for the Allwinner A100 pin controller
  UPSTREAM: loop: Set correct device size when using LOOP_CONFIGURE
  UPSTREAM: loop: unset GENHD_FL_NO_PART_SCAN on LOOP_CONFIGURE
  ANDROID: ion_system_heap: Add __GFP_NOWARN to mid-order allocations
  ANDROID: drivers: gpu: drm: increase the MAX_DRM_OPEN_COUNT
  UPSTREAM: af_unix: fix garbage collect vs MSG_PEEK
  Linux 5.4.147
  Revert "time: Handle negative seconds correctly in timespec64_to_ns()"
  Revert "posix-cpu-timers: Force next expiration recalc after itimer reset"
  Revert "block: nbd: add sanity check for first_minor"
  Revert "Bluetooth: Move shutdown callback before flushing tx and rx queue"
  Linux 5.4.146
  clk: kirkwood: Fix a clocking boot regression
  backlight: pwm_bl: Improve bootloader/kernel device handover
  fbmem: don't allow too huge resolutions
  IMA: remove the dependency on CRYPTO_MD5
  IMA: remove -Wmissing-prototypes warning
  fuse: flush extending writes
  fuse: truncate pagecache on atomic_o_trunc
  KVM: nVMX: Unconditionally clear nested.pi_pending on nested VM-Enter
  KVM: x86: Update vCPU's hv_clock before back to guest when tsc_offset is adjusted
  KVM: s390: index kvm->arch.idle_mask by vcpu_idx
  x86/resctrl: Fix a maybe-uninitialized build warning treated as error
  perf/x86/amd/ibs: Extend PERF_PMU_CAP_NO_EXCLUDE to IBS Op
  tty: Fix data race between tiocsti() and flush_to_ldisc()
  time: Handle negative seconds correctly in timespec64_to_ns()
  bpf: Fix pointer arithmetic mask tightening under state pruning
  bpf: verifier: Allocate idmap scratch in verifier env
  bpf: Fix leakage due to insufficient speculative store bypass mitigation
  bpf: Introduce BPF nospec instruction for mitigating Spectre v4
  ipv4: fix endianness issue in inet_rtm_getroute_build_skb()
  octeontx2-af: Fix loop in free and unmap counter
  net: qualcomm: fix QCA7000 checksum handling
  net: sched: Fix qdisc_rate_table refcount leak when get tcf_block failed
  ipv4: make exception cache less predictible
  ipv6: make exception cache less predictible
  brcmfmac: pcie: fix oops on failure to resume and reprobe
  bcma: Fix memory leak for internally-handled cores
  ath6kl: wmi: fix an error code in ath6kl_wmi_sync_point()
  ASoC: wcd9335: Disable irq on slave ports in the remove function
  ASoC: wcd9335: Fix a memory leak in the error handling path of the probe function
  ASoC: wcd9335: Fix a double irq free in the remove function
  tty: serial: fsl_lpuart: fix the wrong mapbase value
  usb: bdc: Fix an error handling path in 'bdc_probe()' when no suitable DMA config is available
  usb: ehci-orion: Handle errors of clk_prepare_enable() in probe
  i2c: mt65xx: fix IRQ check
  CIFS: Fix a potencially linear read overflow
  bpf: Fix possible out of bound write in narrow load handling
  mmc: moxart: Fix issue with uninitialized dma_slave_config
  mmc: dw_mmc: Fix issue with uninitialized dma_slave_config
  ASoC: Intel: Skylake: Fix module resource and format selection
  ASoC: Intel: Skylake: Leave data as is when invoking TLV IPCs
  rsi: fix an error code in rsi_probe()
  rsi: fix error code in rsi_load_9116_firmware()
  i2c: s3c2410: fix IRQ check
  i2c: iop3xx: fix deferred probing
  Bluetooth: add timeout sanity check to hci_inquiry
  mm/swap: consider max pages in iomap_swapfile_add_extent
  usb: gadget: mv_u3d: request_irq() after initializing UDC
  nfsd4: Fix forced-expiry locking
  lockd: Fix invalid lockowner cast after vfs_test_lock
  mac80211: Fix insufficient headroom issue for AMSDU
  usb: phy: tahvo: add IRQ check
  usb: host: ohci-tmio: add IRQ check
  Bluetooth: Move shutdown callback before flushing tx and rx queue
  usb: gadget: udc: renesas_usb3: Fix soc_device_match() abuse
  usb: phy: twl6030: add IRQ checks
  usb: phy: fsl-usb: add IRQ check
  usb: gadget: udc: at91: add IRQ check
  drm/msm/dsi: Fix some reference counted resource leaks
  Bluetooth: fix repeated calls to sco_sock_kill
  counter: 104-quad-8: Return error when invalid mode during ceiling_write
  arm64: dts: exynos: correct GIC CPU interfaces address range on Exynos7
  drm/msm/dpu: make dpu_hw_ctl_clear_all_blendstages clear necessary LMs
  PM: EM: Increase energy calculation precision
  Bluetooth: increase BTNAMSIZ to 21 chars to fix potential buffer overflow
  debugfs: Return error during {full/open}_proxy_open() on rmmod
  soc: qcom: smsm: Fix missed interrupts if state changes while masked
  PCI: PM: Enable PME if it can be signaled from D3cold
  PCI: PM: Avoid forcing PCI_D0 for wakeup reasons inconsistently
  media: venus: venc: Fix potential null pointer dereference on pointer fmt
  media: em28xx-input: fix refcount bug in em28xx_usb_disconnect
  leds: trigger: audio: Add an activate callback to ensure the initial brightness is set
  leds: lt3593: Put fwnode in any case during ->probe()
  i2c: highlander: add IRQ check
  net: cipso: fix warnings in netlbl_cipsov4_add_std
  cgroup/cpuset: Fix a partition bug with hotplug
  net/mlx5e: Prohibit inner indir TIRs in IPoIB
  ARM: dts: meson8b: ec100: Fix the pwm regulator supply properties
  ARM: dts: meson8b: mxq: Fix the pwm regulator supply properties
  ARM: dts: meson8b: odroidc1: Fix the pwm regulator supply properties
  ARM: dts: meson8: Use a higher default GPU clock frequency
  tcp: seq_file: Avoid skipping sk during tcp_seek_last_pos
  drm/amdgpu/acp: Make PM domain really work
  netns: protect netns ID lookups with RCU
  6lowpan: iphc: Fix an off-by-one check of array index
  Bluetooth: sco: prevent information leak in sco_conn_defer_accept()
  media: coda: fix frame_mem_ctrl for YUV420 and YVU420 formats
  media: go7007: remove redundant initialization
  media: dvb-usb: Fix error handling in dvb_usb_i2c_init
  media: dvb-usb: fix uninit-value in vp702x_read_mac_addr
  media: dvb-usb: fix uninit-value in dvb_usb_adapter_dvb_init
  soc: qcom: rpmhpd: Use corner in power_off
  arm64: dts: renesas: r8a77995: draak: Remove bogus adv7511w properties
  ARM: dts: aspeed-g6: Fix HVI3C function-group in pinctrl dtsi
  bpf: Fix potential memleak and UAF in the verifier.
  bpf: Fix a typo of reuseport map in bpf.h.
  media: cxd2880-spi: Fix an error handling path
  soc: rockchip: ROCKCHIP_GRF should not default to y, unconditionally
  media: TDA1997x: enable EDID support
  drm/panfrost: Fix missing clk_disable_unprepare() on error in panfrost_clk_init()
  EDAC/i10nm: Fix NVDIMM detection
  spi: spi-zynq-qspi: use wait_for_completion_timeout to make zynq_qspi_exec_mem_op not interruptible
  spi: sprd: Fix the wrong WDG_LOAD_VAL
  regulator: vctrl: Avoid lockdep warning in enable/disable ops
  regulator: vctrl: Use locked regulator_get_voltage in probe path
  certs: Trigger creation of RSA module signing key if it's not an RSA key
  crypto: qat - use proper type for vf_mask
  block: nbd: add sanity check for first_minor
  clocksource/drivers/sh_cmt: Fix wrong setting if don't request IRQ for clock source channel
  lib/mpi: use kcalloc in mpi_resize
  genirq/timings: Fix error return code in irq_timings_test_irqs()
  spi: spi-pic32: Fix issue with uninitialized dma_slave_config
  spi: spi-fsl-dspi: Fix issue with uninitialized dma_slave_config
  sched: Fix UCLAMP_FLAG_IDLE setting
  m68k: emu: Fix invalid free in nfeth_cleanup()
  s390/debug: fix debug area life cycle
  s390/kasan: fix large PMD pages address alignment check
  udf_get_extendedattr() had no boundary checks.
  fcntl: fix potential deadlock for &fasync_struct.fa_lock
  crypto: qat - do not export adf_iov_putmsg()
  crypto: qat - fix naming for init/shutdown VF to PF notifications
  crypto: qat - fix reuse of completion variable
  crypto: qat - handle both source of interrupt in VF ISR
  crypto: qat - do not ignore errors from enable_vf2pf_comms()
  libata: fix ata_host_start()
  s390/cio: add dev_busid sysfs entry for each subchannel
  power: supply: max17042_battery: fix typo in MAx17042_TOFF
  nvmet: pass back cntlid on successful completion
  nvme-rdma: don't update queue count when failing to set io queues
  nvme-tcp: don't update queue count when failing to set io queues
  bcache: add proper error unwinding in bcache_device_init
  isofs: joliet: Fix iocharset=utf8 mount option
  udf: Fix iocharset=utf8 mount option
  udf: Check LVID earlier
  hrtimer: Ensure timerfd notification for HIGHRES=n
  hrtimer: Avoid double reprogramming in __hrtimer_start_range_ns()
  posix-cpu-timers: Force next expiration recalc after itimer reset
  rcu/tree: Handle VM stoppage in stall detection
  sched/deadline: Fix missing clock update in migrate_task_rq_dl()
  crypto: omap-sham - clear dma flags only after omap_sham_update_dma_stop()
  power: supply: axp288_fuel_gauge: Report register-address on readb / writeb errors
  sched/deadline: Fix reset_on_fork reporting of DL tasks
  crypto: mxs-dcp - Check for DMA mapping errors
  regmap: fix the offset of register error log
  locking/mutex: Fix HANDOFF condition
  ANDROID: GKI: db845c: Update symbols list and ABI for lts v5.4.144
  Linux 5.4.145
  PCI: Call Max Payload Size-related fixup quirks early
  x86/reboot: Limit Dell Optiplex 990 quirk to early BIOS versions
  xhci: fix unsafe memory usage in xhci tracing
  usb: mtu3: fix the wrong HS mult value
  usb: mtu3: use @mult for HS isoc or intr
  usb: host: xhci-rcar: Don't reload firmware after the completion
  ALSA: usb-audio: Add registration quirk for JBL Quantum 800
  Revert "btrfs: compression: don't try to compress if we don't have enough pages"
  x86/events/amd/iommu: Fix invalid Perf result due to IOMMU PMC power-gating
  Revert "r8169: avoid link-up interrupt issue on RTL8106e if user enables ASPM"
  mm/page_alloc: speed up the iteration of max_order
  net: ll_temac: Remove left-over debug message
  powerpc/boot: Delete unneeded .globl _zimage_start
  ipv4/icmp: l3mdev: Perform icmp error route lookup on source device routing table (v2)
  USB: serial: mos7720: improve OOM-handling in read_mos_reg()
  igmp: Add ip_mc_list lock in ip_check_mc_rcu
  media: stkwebcam: fix memory leak in stk_camera_probe
  ARC: wireup clone3 syscall
  ALSA: pcm: fix divide error in snd_pcm_lib_ioctl
  ALSA: hda/realtek: Workaround for conflicting SSID on ASUS ROG Strix G17
  ARM: 8918/2: only build return_address() if needed
  cryptoloop: add a deprecation warning
  perf/x86/amd/power: Assign pmu.module
  perf/x86/amd/ibs: Work around erratum #1197
  perf/x86/intel/pt: Fix mask of num_address_ranges
  qede: Fix memset corruption
  net: macb: Add a NULL check on desc_ptp
  qed: Fix the VF msix vectors flow
  reset: reset-zynqmp: Fixed the argument data type
  gpu: ipu-v3: Fix i.MX IPU-v3 offset calculations for (semi)planar U/V formats
  xtensa: fix kconfig unmet dependency warning for HAVE_FUTEX_CMPXCHG
  kthread: Fix PF_KTHREAD vs to_kthread() race
  ubifs: report correct st_size for encrypted symlinks
  f2fs: report correct st_size for encrypted symlinks
  ext4: report correct st_size for encrypted symlinks
  fscrypt: add fscrypt_symlink_getattr() for computing st_size
  ext4: fix race writing to an inline_data file while its xattrs are changing
  Revert "once: Fix panic when module unload"
  Linux 5.4.144
  audit: move put_tree() to avoid trim_trees refcount underflow and UAF
  net: don't unconditionally copy_from_user a struct ifreq for socket ioctls
  Revert "parisc: Add assembly implementations for memset, strlen, strcpy, strncpy and strcat"
  Revert "floppy: reintroduce O_NDELAY fix"
  btrfs: fix NULL pointer dereference when deleting device by invalid id
  arm64: dts: qcom: msm8994-angler: Fix gpio-reserved-ranges 85-88
  KVM: x86/mmu: Treat NX as used (not reserved) for all !TDP shadow MMUs
  net: dsa: mt7530: fix VLAN traffic leaks again
  bpf: Fix cast to pointer from integer of different size warning
  bpf: Track contents of read-only maps as scalars
  vt_kdsetmode: extend console locking
  btrfs: fix race between marking inode needs to be logged and log syncing
  net/rds: dma_map_sg is entitled to merge entries
  drm/nouveau/disp: power down unused DP links during init
  drm: Copy drm_wait_vblank to user before returning
  qed: Fix null-pointer dereference in qed_rdma_create_qp()
  qed: qed ll2 race condition fixes
  vringh: Use wiov->used to check for read/write desc order
  virtio_pci: Support surprise removal of virtio pci device
  virtio: Improve vq->broken access to avoid any compiler optimization
  opp: remove WARN when no valid OPPs remain
  perf/x86/intel/uncore: Fix integer overflow on 23 bit left shift of a u32
  usb: gadget: u_audio: fix race condition on endpoint stop
  drm/i915: Fix syncmap memory leak
  net: hns3: fix get wrong pfc_en when query PFC configuration
  net: hns3: fix duplicate node in VLAN list
  net: hns3: clear hardware resource when loading driver
  rtnetlink: Return correct error on changing device netns
  net: marvell: fix MVNETA_TX_IN_PRGRS bit number
  xgene-v2: Fix a resource leak in the error handling path of 'xge_probe()'
  ip_gre: add validation for csum_start
  RDMA/efa: Free IRQ vectors on error flow
  e1000e: Fix the max snoop/no-snoop latency for 10M
  IB/hfi1: Fix possible null-pointer dereference in _extend_sdma_tx_descs()
  RDMA/bnxt_re: Add missing spin lock initialization
  scsi: core: Fix hang of freezing queue between blocking and running device
  usb: dwc3: gadget: Stop EP0 transfers during pullup disable
  usb: dwc3: gadget: Fix dwc3_calc_trbs_left()
  USB: serial: option: add new VID/PID to support Fibocom FG150
  Revert "USB: serial: ch341: fix character loss at high transfer rates"
  can: usb: esd_usb2: esd_usb2_rx_event(): fix the interchange of the CAN RX and TX error counters
  mm, oom: make the calculation of oom badness more accurate
  mmc: sdhci-msm: Update the software timeout value for sdhc
  ovl: fix uninitialized pointer read in ovl_lookup_real_one()
  once: Fix panic when module unload
  netfilter: conntrack: collect all entries in one cycle
  ARC: Fix CONFIG_STACKDEPOT
  net: qrtr: fix another OOB Read in qrtr_endpoint_post
  Revert "virtio: Protect vqs list access"
  Revert "net: igmp: fix data-race in igmp_ifc_timer_expire()"
  Revert "net: igmp: increase size of mr_ifc_count"
  Revert "PCI/MSI: Protect msi_desc::masked for multi-MSI"
  Linux 5.4.143
  netfilter: nft_exthdr: fix endianness of tcp option cast
  fs: warn about impending deprecation of mandatory locks
  mm: memcontrol: fix occasional OOMs due to proportional memory.low reclaim
  mm, memcg: avoid stale protection values when cgroup is above protection
  ASoC: intel: atom: Fix breakage for PCM buffer address setup
  PCI: Increase D3 delay for AMD Renoir/Cezanne XHCI
  btrfs: prevent rename2 from exchanging a subvol with a directory from different parents
  ipack: tpci200: fix memory leak in the tpci200_register
  ipack: tpci200: fix many double free issues in tpci200_pci_probe
  slimbus: ngd: reset dma setup during runtime pm
  slimbus: messaging: check for valid transaction id
  slimbus: messaging: start transaction ids from 1 instead of zero
  tracing / histogram: Fix NULL pointer dereference on strcmp() on NULL event name
  ALSA: hda - fix the 'Capture Switch' value change notifications
  mmc: dw_mmc: Fix hang on data CRC error
  ovl: add splice file read write helper
  iavf: Fix ping is lost after untrusted VF had tried to change MAC
  i40e: Fix ATR queue selection
  ovs: clear skb->tstamp in forwarding path
  net: mdio-mux: Handle -EPROBE_DEFER correctly
  net: mdio-mux: Don't ignore memory allocation errors
  net: qlcnic: add missed unlock in qlcnic_83xx_flash_read32
  virtio-net: use NETIF_F_GRO_HW instead of NETIF_F_LRO
  virtio-net: support XDP when not more queues
  vrf: Reset skb conntrack connection on VRF rcv
  bnxt_en: Add missing DMA memory barriers
  ptp_pch: Restore dependency on PCI
  net: 6pack: fix slab-out-of-bounds in decode_data
  bnxt: disable napi before canceling DIM
  bnxt: don't lock the tx queue from napi poll
  bpf: Clear zext_dst of dead insns
  vhost: Fix the calculation in vhost_overflow()
  virtio: Protect vqs list access
  dccp: add do-while-0 stubs for dccp_pr_debug macros
  cpufreq: armada-37xx: forbid cpufreq for 1.2 GHz variant
  iommu: Check if group is NULL before remove device
  Bluetooth: hidp: use correct wait queue when removing ctrl_wait
  drm/amd/display: Fix Dynamic bpp issue with 8K30 with Navi 1X
  net: usb: lan78xx: don't modify phy_device state concurrently
  ARM: dts: nomadik: Fix up interrupt controller node names
  scsi: core: Fix capacity set to zero after offlinining device
  scsi: core: Avoid printing an error if target_alloc() returns -ENXIO
  scsi: scsi_dh_rdac: Avoid crash during rdac_bus_attach()
  scsi: megaraid_mm: Fix end of loop tests for list_for_each_entry()
  dmaengine: of-dma: router_xlate to return -EPROBE_DEFER if controller is not yet available
  ARM: dts: am43x-epos-evm: Reduce i2c0 bus speed for tps65218
  dmaengine: usb-dmac: Fix PM reference leak in usb_dmac_probe()
  dmaengine: xilinx_dma: Fix read-after-free bug when terminating transfers
  USB: core: Avoid WARNings for 0-length descriptor requests
  media: drivers/media/usb: fix memory leak in zr364xx_probe
  media: zr364xx: fix memory leaks in probe()
  media: zr364xx: propagate errors from zr364xx_start_readpipe()
  mtd: cfi_cmdset_0002: fix crash when erasing/writing AMD cards
  ath9k: Postpone key cache entry deletion for TXQ frames reference it
  ath: Modify ath_key_delete() to not need full key entry
  ath: Export ath_hw_keysetmac()
  ath9k: Clear key cache explicitly on disabling hardware
  ath: Use safer key clearing with key cache entries
  x86/fpu: Make init_fpstate correct with optimized XSAVE
  ext4: fix EXT4_MAX_LOGICAL_BLOCK macro
  Linux 5.4.142
  KVM: nSVM: always intercept VMLOAD/VMSAVE when nested (CVE-2021-3656)
  KVM: nSVM: avoid picking up unsupported bits from L2 in int_ctl (CVE-2021-3653)
  iommu/vt-d: Fix agaw for a supported 48 bit guest address width
  vmlinux.lds.h: Handle clang's module.{c,d}tor sections
  ceph: take snap_empty_lock atomically with snaprealm refcount change
  ceph: clean up locking annotation for ceph_get_snap_realm and __lookup_snap_realm
  ceph: add some lockdep assertions around snaprealm handling
  KVM: VMX: Use current VMCS to query WAITPKG support for MSR emulation
  PCI/MSI: Protect msi_desc::masked for multi-MSI
  PCI/MSI: Use msi_mask_irq() in pci_msi_shutdown()
  PCI/MSI: Correct misleading comments
  PCI/MSI: Do not set invalid bits in MSI mask
  PCI/MSI: Enforce MSI[X] entry updates to be visible
  PCI/MSI: Enforce that MSI-X table entry is masked for update
  PCI/MSI: Mask all unused MSI-X entries
  PCI/MSI: Enable and mask MSI-X early
  genirq/timings: Prevent potential array overflow in __irq_timings_store()
  genirq/msi: Ensure deactivation on teardown
  x86/resctrl: Fix default monitoring groups reporting
  x86/ioapic: Force affinity setup before startup
  x86/msi: Force affinity setup before startup
  genirq: Provide IRQCHIP_AFFINITY_PRE_STARTUP
  x86/tools: Fix objdump version check again
  powerpc/kprobes: Fix kprobe Oops happens in booke
  nbd: Aovid double completion of a request
  vsock/virtio: avoid potential deadlock when vsock device remove
  xen/events: Fix race in set_evtchn_to_irq
  net: igmp: increase size of mr_ifc_count
  tcp_bbr: fix u32 wrap bug in round logic if bbr_init() called after 2B packets
  net: linkwatch: fix failure to restore device state across suspend/resume
  net: bridge: fix memleak in br_add_if()
  net: dsa: sja1105: fix broken backpressure in .port_fdb_dump
  net: dsa: lantiq: fix broken backpressure in .port_fdb_dump
  net: dsa: lan9303: fix broken backpressure in .port_fdb_dump
  net: igmp: fix data-race in igmp_ifc_timer_expire()
  net: Fix memory leak in ieee802154_raw_deliver
  net: dsa: microchip: Fix ksz_read64()
  drm/meson: fix colour distortion from HDR set during vendor u-boot
  net/mlx5: Fix return value from tracer initialization
  psample: Add a fwd declaration for skbuff
  iavf: Set RSS LUT and key in reset handle path
  net: sched: act_mirred: Reset ct info when mirror/redirect skb
  ppp: Fix generating ifname when empty IFLA_IFNAME is specified
  net: phy: micrel: Fix link detection on ksz87xx switch"
  platform/x86: pcengines-apuv2: Add missing terminating entries to gpio-lookup tables
  platform/x86: pcengines-apuv2: revert wiring up simswitch GPIO as LED
  net: dsa: mt7530: add the missing RxUnicast MIB counter
  ASoC: cs42l42: Fix LRCLK frame start edge
  netfilter: nf_conntrack_bridge: Fix memory leak when error
  ASoC: cs42l42: Remove duplicate control for WNF filter frequency
  ASoC: cs42l42: Fix inversion of ADC Notch Switch control
  ASoC: cs42l42: Don't allow SND_SOC_DAIFMT_LEFT_J
  ASoC: cs42l42: Correct definition of ADC Volume control
  ieee802154: hwsim: fix GPF in hwsim_new_edge_nl
  ieee802154: hwsim: fix GPF in hwsim_set_edge_lqi
  libnvdimm/region: Fix label activation vs errors
  ACPI: NFIT: Fix support for virtual SPA ranges
  ceph: reduce contention in ceph_check_delayed_caps()
  i2c: dev: zero out array used for i2c reads from userspace
  ASoC: intel: atom: Fix reference to PCM buffer address
  ASoC: xilinx: Fix reference to PCM buffer address
  iio: adc: Fix incorrect exit of for-loop
  iio: humidity: hdc100x: Add margin to the conversion time
  iio: adc: ti-ads7950: Ensure CS is deasserted after reading channels
  Linux 5.4.141
  btrfs: don't flush from btrfs_delayed_inode_reserve_metadata
  btrfs: export and rename qgroup_reserve_meta
  btrfs: qgroup: don't commit transaction when we already hold the handle
  net: xilinx_emaclite: Do not print real IOMEM pointer
  btrfs: fix lockdep splat when enabling and disabling qgroups
  btrfs: qgroup: remove ASYNC_COMMIT mechanism in favor of reserve retry-after-EDQUOT
  btrfs: transaction: Cleanup unused TRANS_STATE_BLOCKED
  btrfs: qgroup: try to flush qgroup space when we get -EDQUOT
  btrfs: qgroup: allow to unreserve range without releasing other ranges
  btrfs: make btrfs_qgroup_reserve_data take btrfs_inode
  btrfs: make qgroup_free_reserved_data take btrfs_inode
  ovl: prevent private clone if bind mount is not allowed
  ppp: Fix generating ppp unit id when ifname is not specified
  ALSA: hda: Add quirk for ASUS Flow x13
  USB:ehci:fix Kunpeng920 ehci hardware problem
  KVM: X86: MMU: Use the correct inherited permissions to get shadow page
  usb: dwc3: gadget: Avoid runtime resume if disabling pullup
  usb: dwc3: gadget: Disable gadget IRQ during pullup disable
  usb: dwc3: gadget: Clear DEP flags after stop transfers in ep disable
  usb: dwc3: gadget: Prevent EP queuing while stopping transfers
  usb: dwc3: gadget: Restart DWC3 gadget when enabling pullup
  usb: dwc3: gadget: Allow runtime suspend if UDC unbinded
  usb: dwc3: Stop active transfers before halting the controller
  tracing: Reject string operand in the histogram expression
  media: v4l2-mem2mem: always consider OUTPUT queue during poll
  tee: Correct inappropriate usage of TEE_SHM_DMA_BUF flag
  KVM: SVM: Fix off-by-one indexing when nullifying last used SEV VMCB
  Linux 5.4.140
  arm64: fix compat syscall return truncation
  net/qla3xxx: fix schedule while atomic in ql_wait_for_drvr_lock and ql_adapter_reset
  alpha: Send stop IPI to send to online CPUs
  virt_wifi: fix error on connect
  reiserfs: check directory items on read from disk
  reiserfs: add check for root_inode in reiserfs_fill_super
  libata: fix ata_pio_sector for CONFIG_HIGHMEM
  bpf, selftests: Adjust few selftest result_unpriv outcomes
  perf/x86/amd: Don't touch the AMD64_EVENTSEL_HOSTONLY bit inside the guest
  soc: ixp4xx/qmgr: fix invalid __iomem access
  spi: meson-spicc: fix memory leak in meson_spicc_remove
  soc: ixp4xx: fix printing resources
  arm64: vdso: Avoid ISB after reading from cntvct_el0
  KVM: x86/mmu: Fix per-cpu counter corruption on 32-bit builds
  KVM: Do not leak memory for duplicate debugfs directories
  KVM: x86: accept userspace interrupt only if no event is injected
  md/raid10: properly indicate failure when ending a failed write request
  pcmcia: i82092: fix a null pointer dereference bug
  timers: Move clearing of base::timer_running under base:: Lock
  serial: 8250_pci: Avoid irq sharing for MSI(-X) interrupts.
  serial: 8250_pci: Enumerate Elkhart Lake UARTs via dedicated driver
  MIPS: Malta: Do not byte-swap accesses to the CBUS UART
  serial: 8250: Mask out floating 16/32-bit bus bits
  serial: 8250_mtk: fix uart corruption issue when rx power off
  serial: tegra: Only print FIFO error message when an error occurs
  ext4: fix potential htree corruption when growing large_dir directories
  pipe: increase minimum default pipe size to 2 pages
  media: rtl28xxu: fix zero-length control request
  staging: rtl8712: get rid of flush_scheduled_work
  staging: rtl8723bs: Fix a resource leak in sd_int_dpc
  tpm_ftpm_tee: Free and unregister TEE shared memory during kexec
  optee: Fix memory leak when failing to register shm pages
  tee: add tee_shm_alloc_kernel_buf()
  optee: Clear stale cache entries during initialization
  tracing / histogram: Give calculation hist_fields a size
  scripts/tracing: fix the bug that can't parse raw_trace_func
  clk: fix leak on devm_clk_bulk_get_all() unwind
  usb: otg-fsm: Fix hrtimer list corruption
  usb: gadget: f_hid: idle uses the highest byte for duration
  usb: gadget: f_hid: fixed NULL pointer dereference
  usb: gadget: f_hid: added GET_IDLE and SET_IDLE handlers
  usb: cdns3: Fixed incorrect gadget state
  ALSA: usb-audio: Add registration quirk for JBL Quantum 600
  ALSA: hda/realtek: add mic quirk for Acer SF314-42
  firmware_loader: fix use-after-free in firmware_fallback_sysfs
  firmware_loader: use -ETIMEDOUT instead of -EAGAIN in fw_load_sysfs_fallback
  USB: serial: ftdi_sio: add device ID for Auto-M3 OP-COM v2
  USB: serial: ch341: fix character loss at high transfer rates
  USB: serial: option: add Telit FD980 composition 0x1056
  USB: usbtmc: Fix RCU stall warning
  Bluetooth: defer cleanup of resources in hci_unregister_dev()
  blk-iolatency: error out if blk_get_queue() failed in iolatency_set_limit()
  net: vxge: fix use-after-free in vxge_device_unregister
  net: fec: fix use-after-free in fec_drv_remove
  net: pegasus: fix uninit-value in get_interrupt_interval
  bnx2x: fix an error code in bnx2x_nic_load()
  mips: Fix non-POSIX regexp
  net: ipv6: fix returned variable type in ip6_skb_dst_mtu
  nfp: update ethtool reporting of pauseframe control
  sctp: move the active_key update after sh_keys is added
  gpio: tqmx86: really make IRQ optional
  net: natsemi: Fix missing pci_disable_device() in probe and remove
  net: phy: micrel: Fix detection of ksz87xx switch
  net: dsa: sja1105: invalidate dynamic FDB entries learned concurrently with statically added ones
  net: dsa: sja1105: overwrite dynamic FDB entries with static ones in .port_fdb_add
  net, gro: Set inner transport header offset in tcp/udp GRO hook
  dmaengine: imx-dma: configure the generic DMA type to make it work
  media: videobuf2-core: dequeue if start_streaming fails
  scsi: sr: Return correct event when media event code is 3
  spi: imx: mx51-ecspi: Fix low-speed CONFIGREG delay calculation
  spi: imx: mx51-ecspi: Reinstate low-speed CONFIGREG delay
  omap5-board-common: remove not physically existing vdds_1v8_main fixed-regulator
  ARM: dts: am437x-l4: fix typo in can@0 node
  clk: stm32f4: fix post divisor setup for I2S/SAI PLLs
  ALSA: usb-audio: fix incorrect clock source setting
  arm64: dts: armada-3720-turris-mox: remove mrvl,i2c-fast-mode
  ARM: dts: imx: Swap M53Menlo pinctrl_power_button/pinctrl_power_out pins
  ARM: imx: fix missing 3rd argument in macro imx_mmdc_perf_init
  ARM: dts: colibri-imx6ull: limit SDIO clock to 25MHz
  ARM: dts: imx6qdl-sr-som: Increase the PHY reset duration to 10ms
  ARM: imx: add missing clk_disable_unprepare()
  ARM: imx: add missing iounmap()
  arm64: dts: ls1028a: fix node name for the sysclk
  ALSA: seq: Fix racy deletion of subscriber
  Revert "ACPICA: Fix memory leak caused by _CID repair function"
  ANDROID: GKI: fix up android/abi_gki_aarch64.xml merge
  Linux 5.4.139
  spi: mediatek: Fix fifo transfer
  bpf, selftests: Adjust few selftest outcomes wrt unreachable code
  bpf, selftests: Add a verifier test for assigning 32bit reg states to 64bit ones
  bpf: Test_verifier, add alu32 bounds tracking tests
  bpf: Fix leakage under speculation on mispredicted branches
  bpf: Do not mark insn as seen under speculative path verification
  bpf: Inherit expanded/patched seen count from old aux data
  Revert "watchdog: iTCO_wdt: Account for rebooting on second timeout"
  firmware: arm_scmi: Add delayed response status check
  firmware: arm_scmi: Ensure drivers provide a probe function
  Revert "Bluetooth: Shutdown controller after workqueues are flushed or cancelled"
  ACPI: fix NULL pointer dereference
  nvme: fix nvme_setup_command metadata trace event
  net: Fix zero-copy head len calculation.
  qed: fix possible unpaired spin_{un}lock_bh in _qed_mcp_cmd_and_union()
  r8152: Fix potential PM refcount imbalance
  ASoC: tlv320aic31xx: fix reversed bclk/wclk master bits
  spi: stm32h7: fix full duplex irq handler handling
  regulator: rt5033: Fix n_voltages settings for BUCK and LDO
  btrfs: fix lost inode on log replay after mix of fsync, rename and inode eviction
  btrfs: fix race causing unnecessary inode logging during link and rename
  btrfs: do not commit logs and transactions during link and rename operations
  btrfs: delete duplicated words + other fixes in comments
  Linux 5.4.138
  can: j1939: j1939_session_deactivate(): clarify lifetime of session object
  i40e: Add additional info to PHY type error
  Revert "perf map: Fix dso->nsinfo refcounting"
  powerpc/pseries: Fix regression while building external modules
  PCI: mvebu: Setup BAR0 in order to fix MSI
  can: hi311x: fix a signedness bug in hi3110_cmd()
  sis900: Fix missing pci_disable_device() in probe and remove
  tulip: windbond-840: Fix missing pci_disable_device() in probe and remove
  sctp: fix return value check in __sctp_rcv_asconf_lookup
  net/mlx5e: Fix nullptr in mlx5e_hairpin_get_mdev()
  net/mlx5: Fix flow table chaining
  net: llc: fix skb_over_panic
  mlx4: Fix missing error code in mlx4_load_one()
  net: Set true network header for ECN decapsulation
  tipc: fix sleeping in tipc accept routine
  i40e: Fix log TC creation failure when max num of queues is exceeded
  i40e: Fix queue-to-TC mapping on Tx
  i40e: Fix firmware LLDP agent related warning
  i40e: Fix logic of disabling queues
  netfilter: nft_nat: allow to specify layer 4 protocol NAT only
  netfilter: conntrack: adjust stop timestamp to real expiry value
  cfg80211: Fix possible memory leak in function cfg80211_bss_update
  nfc: nfcsim: fix use after free during module unload
  NIU: fix incorrect error return, missed in previous revert
  HID: wacom: Re-enable touch by default for Cintiq 24HDT / 27QHDT
  can: esd_usb2: fix memory leak
  can: ems_usb: fix memory leak
  can: usb_8dev: fix memory leak
  can: mcba_usb_start(): add missing urb->transfer_dma initialization
  can: raw: raw_setsockopt(): fix raw_rcv panic for sock UAF
  can: j1939: j1939_xtp_rx_dat_one(): fix rxtimer value between consecutive TP.DT to 750ms
  ocfs2: issue zeroout to EOF blocks
  ocfs2: fix zero out valid data
  KVM: add missing compat KVM_CLEAR_DIRTY_LOG
  x86/kvm: fix vcpu-id indexed array sizes
  Revert "ACPI: resources: Add checks for ACPI IRQ override"
  btrfs: mark compressed range uptodate only if all bio succeed
  btrfs: fix rw device counting in __btrfs_free_extra_devids
  x86/asm: Ensure asm/proto.h can be included stand-alone
  net_sched: check error pointer in tcf_dump_walker()
  Linux 5.4.137
  ipv6: ip6_finish_output2: set sk into newly allocated nskb
  ARM: dts: versatile: Fix up interrupt controller node names
  iomap: remove the length variable in iomap_seek_hole
  iomap: remove the length variable in iomap_seek_data
  cifs: fix the out of range assignment to bit fields in parse_server_interfaces
  firmware: arm_scmi: Fix range check for the maximum number of pending messages
  firmware: arm_scmi: Fix possible scmi_linux_errmap buffer overflow
  hfs: add lock nesting notation to hfs_find_init
  hfs: fix high memory mapping in hfs_bnode_read
  hfs: add missing clean-up in hfs_fill_super
  ipv6: allocate enough headroom in ip6_finish_output2()
  sctp: move 198 addresses from unusable to private scope
  net: annotate data race around sk_ll_usec
  net/802/garp: fix memleak in garp_request_join()
  net/802/mrp: fix memleak in mrp_request_join()
  cgroup1: fix leaked context root causing sporadic NULL deref in LTP
  workqueue: fix UAF in pwq_unbound_release_workfn()
  af_unix: fix garbage collect vs MSG_PEEK
  KVM: x86: determine if an exception has an error code only when injecting it.
  tools: Allow proper CC/CXX/... override with LLVM=1 in Makefile.include
  selftest: fix build error in tools/testing/selftests/vm/userfaultfd.c
  ANDROID: Update android/abi_gki_aarch64.xml
  ANDROID: Update android/abi_gki_aarch64_goldfish
  Linux 5.4.136
  xhci: add xhci_get_virt_ep() helper
  perf inject: Close inject.output on exit
  PCI: Mark AMD Navi14 GPU ATS as broken
  btrfs: compression: don't try to compress if we don't have enough pages
  iio: accel: bma180: Fix BMA25x bandwidth register values
  iio: accel: bma180: Use explicit member assignment
  net: bcmgenet: ensure EXT_ENERGY_DET_MASK is clear
  net: dsa: mv88e6xxx: use correct .stats_set_histogram() on Topaz
  drm: Return -ENOTTY for non-drm ioctls
  nds32: fix up stack guard gap
  rbd: always kick acquire on "acquired" and "released" notifications
  rbd: don't hold lock_rwsem while running_list is being drained
  hugetlbfs: fix mount mode command line processing
  userfaultfd: do not untag user pointers
  selftest: use mmap instead of posix_memalign to allocate memory
  ixgbe: Fix packet corruption due to missing DMA sync
  media: ngene: Fix out-of-bounds bug in ngene_command_config_free_buf()
  btrfs: check for missing device in btrfs_trim_fs
  tracing: Fix bug in rb_per_cpu_empty() that might cause deadloop.
  tracing/histogram: Rename "cpu" to "common_cpu"
  firmware/efi: Tell memblock about EFI iomem reservations
  usb: dwc2: gadget: Fix sending zero length packet in DDMA mode.
  USB: serial: cp210x: add ID for CEL EM3588 USB ZigBee stick
  USB: serial: cp210x: fix comments for GE CS1000
  USB: serial: option: add support for u-blox LARA-R6 family
  usb: renesas_usbhs: Fix superfluous irqs happen after usb_pkt_pop()
  usb: max-3421: Prevent corruption of freed memory
  USB: usb-storage: Add LaCie Rugged USB3-FW to IGNORE_UAS
  usb: hub: Fix link power management max exit latency (MEL) calculations
  usb: hub: Disable USB 3 device initiated lpm if exit latency is too high
  KVM: PPC: Book3S HV Nested: Sanitise H_ENTER_NESTED TM state
  KVM: PPC: Book3S: Fix H_RTAS rets buffer overflow
  xhci: Fix lost USB 2 remote wake
  ALSA: hdmi: Expose all pins on MSI MS-7C94 board
  ALSA: sb: Fix potential ABBA deadlock in CSP driver
  ALSA: usb-audio: Add registration quirk for JBL Quantum headsets
  ALSA: usb-audio: Add missing proc text entry for BESPOKEN type
  s390/boot: fix use of expolines in the DMA code
  s390/ftrace: fix ftrace_update_ftrace_func implementation
  Revert "MIPS: add PMD table accounting into MIPS'pmd_alloc_one"
  proc: Avoid mixing integer types in mem_rw()
  drm/panel: raspberrypi-touchscreen: Prevent double-free
  net: sched: cls_api: Fix the the wrong parameter
  sctp: update active_key for asoc when old key is being replaced
  nvme: set the PRACT bit when using Write Zeroes with T10 PI
  r8169: Avoid duplicate sysfs entry creation error
  afs: Fix tracepoint string placement with built-in AFS
  Revert "USB: quirks: ignore remote wake-up on Fibocom L850-GL LTE modem"
  nvme-pci: don't WARN_ON in nvme_reset_work if ctrl.state is not RESETTING
  ipv6: fix another slab-out-of-bounds in fib6_nh_flush_exceptions
  net/sched: act_skbmod: Skip non-Ethernet packets
  net: hns3: fix rx VLAN offload state inconsistent issue
  net/tcp_fastopen: fix data races around tfo_active_disable_stamp
  net: hisilicon: rename CACHE_LINE_MASK to avoid redefinition
  bnxt_en: Check abort error state in bnxt_half_open_nic()
  bnxt_en: Add missing check for BNXT_STATE_ABORT_ERR in bnxt_fw_rset_task()
  bnxt_en: Refresh RoCE capabilities in bnxt_ulp_probe()
  bnxt_en: Improve bnxt_ulp_stop()/bnxt_ulp_start() call sequence.
  spi: cadence: Correct initialisation of runtime PM again
  scsi: target: Fix protect handling in WRITE SAME(32)
  scsi: iscsi: Fix iface sysfs attr detection
  netrom: Decrease sock refcount when sock timers expire
  net: sched: fix memory leak in tcindex_partial_destroy_work
  KVM: PPC: Fix kvm_arch_vcpu_ioctl vcpu_load leak
  KVM: PPC: Book3S: Fix CONFIG_TRANSACTIONAL_MEM=n crash
  net: decnet: Fix sleeping inside in af_decnet
  efi/tpm: Differentiate missing and invalid final event log table.
  net: fix uninit-value in caif_seqpkt_sendmsg
  bpftool: Check malloc return value in mount_bpffs_for_pin
  bpf, sockmap, tcp: sk_prot needs inuse_idx set for proc stats
  s390/bpf: Perform r1 range checking before accessing jit->seen_reg[r1]
  liquidio: Fix unintentional sign extension issue on left shift of u16
  ASoC: rt5631: Fix regcache sync errors on resume
  spi: mediatek: fix fifo rx mode
  regulator: hi6421: Fix getting wrong drvdata
  regulator: hi6421: Use correct variable type for regmap api val argument
  spi: stm32: fixes pm_runtime calls in probe/remove
  spi: stm32: Use dma_request_chan() instead dma_request_slave_channel()
  spi: imx: add a check for speed_hz before calculating the clock
  perf data: Close all files in close_dir()
  perf probe-file: Delete namelist in del_events() on the error path
  perf lzma: Close lzma stream on exit
  perf script: Fix memory 'threads' and 'cpus' leaks on exit
  perf dso: Fix memory leak in dso__new_map()
  perf test event_update: Fix memory leak of evlist
  perf test session_topology: Delete session->evlist
  perf env: Fix sibling_dies memory leak
  perf probe: Fix dso->nsinfo refcounting
  perf map: Fix dso->nsinfo refcounting
  nvme-pci: do not call nvme_dev_remove_admin from nvme_remove
  cxgb4: fix IRQ free race during driver unload
  pwm: sprd: Ensure configuring period and duty_cycle isn't wrongly skipped
  selftests: icmp_redirect: IPv6 PMTU info should be cleared after redirect
  selftests: icmp_redirect: remove from checking for IPv6 route get
  ipv6: fix 'disable_policy' for fwd packets
  gve: Fix an error handling path in 'gve_probe()'
  igb: Fix position of assignment to *ring
  igb: Check if num of q_vectors is smaller than max before array access
  iavf: Fix an error handling path in 'iavf_probe()'
  e1000e: Fix an error handling path in 'e1000_probe()'
  fm10k: Fix an error handling path in 'fm10k_probe()'
  igb: Fix an error handling path in 'igb_probe()'
  igc: Fix an error handling path in 'igc_probe()'
  igc: Prefer to use the pci_release_mem_regions method
  ixgbe: Fix an error handling path in 'ixgbe_probe()'
  igc: change default return of igc_read_phy_reg()
  igb: Fix use-after-free error during reset
  igc: Fix use-after-free error during reset
  Linux 5.4.135
  udp: annotate data races around unix_sk(sk)->gso_size
  perf test bpf: Free obj_buf
  bpftool: Properly close va_list 'ap' by va_end() on error
  ipv6: tcp: drop silly ICMPv6 packet too big messages
  tcp: annotate data races around tp->mtu_info
  dma-buf/sync_file: Don't leak fences on merge failure
  net: fddi: fix UAF in fza_probe
  net: validate lwtstate->data before returning from skb_tunnel_info()
  net: send SYNACK packet with accepted fwmark
  net: ti: fix UAF in tlan_remove_one
  net: qcom/emac: fix UAF in emac_remove
  net: moxa: fix UAF in moxart_mac_probe
  net: ip_tunnel: fix mtu calculation for ETHER tunnel devices
  net: bcmgenet: Ensure all TX/RX queues DMAs are disabled
  net: bridge: sync fdb to new unicast-filtering ports
  net/sched: act_ct: fix err check for nf_conntrack_confirm
  netfilter: ctnetlink: suspicious RCU usage in ctnetlink_dump_helpinfo
  net: ipv6: fix return value of ip6_skb_dst_mtu
  net: dsa: mv88e6xxx: enable .rmu_disable() on Topaz
  net: dsa: mv88e6xxx: enable .port_set_policy() on Topaz
  dm writecache: return the exact table values that were set
  mm: slab: fix kmem_cache_create failed when sysfs node not destroyed
  usb: cdns3: Enable TDL_CHK only for OUT ep
  f2fs: Show casefolding support only when supported
  arm64: dts: marvell: armada-37xx: move firmware node to generic dtsi file
  firmware: turris-mox-rwtm: add marvell,armada-3700-rwtm-firmware compatible string
  arm64: dts: armada-3720-turris-mox: add firmware node
  cifs: prevent NULL deref in cifs_compose_mount_options()
  s390: introduce proper type handling call_on_stack() macro
  sched/fair: Fix CFS bandwidth hrtimer expiry type
  scsi: qedf: Add check to synchronize abort and flush
  scsi: libfc: Fix array index out of bound exception
  scsi: libsas: Add LUN number check in .slave_alloc callback
  scsi: aic7xxx: Fix unintentional sign extension issue on left shift of u8
  rtc: max77686: Do not enforce (incorrect) interrupt trigger type
  kbuild: mkcompile_h: consider timestamp if KBUILD_BUILD_TIMESTAMP is set
  thermal/core: Correct function name thermal_zone_device_unregister()
  arm64: dts: imx8mq: assign PCIe clocks
  arm64: dts: ls208xa: remove bus-num from dspi node
  firmware: tegra: bpmp: Fix Tegra234-only builds
  soc/tegra: fuse: Fix Tegra234-only builds
  ARM: dts: stm32: move stmmac axi config in ethernet node on stm32mp15
  ARM: dts: stm32: fix i2c node name on stm32f746 to prevent warnings
  ARM: dts: rockchip: fix supply properties in io-domains nodes
  arm64: dts: juno: Update SCPI nodes as per the YAML schema
  ARM: dts: stm32: fix timer nodes on STM32 MCU to prevent warnings
  ARM: dts: stm32: fix RCC node name on stm32f429 MCU
  ARM: dts: stm32: fix gpio-keys node on STM32 MCU boards
  ARM: dts: am437x-gp-evm: fix ti,no-reset-on-init flag for gpios
  ARM: dts: am57xx-cl-som-am57x: fix ti,no-reset-on-init flag for gpios
  kbuild: sink stdout from cmd for silent build
  rtc: mxc_v2: add missing MODULE_DEVICE_TABLE
  ARM: imx: pm-imx5: Fix references to imx5_cpu_suspend_info
  ARM: dts: imx6: phyFLEX: Fix UART hardware flow control
  ARM: dts: Hurricane 2: Fix NAND nodes names
  ARM: dts: BCM63xx: Fix NAND nodes names
  ARM: NSP: dts: fix NAND nodes names
  ARM: Cygnus: dts: fix NAND nodes names
  ARM: brcmstb: dts: fix NAND nodes names
  reset: ti-syscon: fix to_ti_syscon_reset_data macro
  arm64: dts: rockchip: Fix power-controller node names for rk3328
  arm64: dts: rockchip: Fix power-controller node names for px30
  ARM: dts: rockchip: Fix power-controller node names for rk3288
  ARM: dts: rockchip: Fix power-controller node names for rk3188
  ARM: dts: rockchip: Fix power-controller node names for rk3066a
  ARM: dts: rockchip: Fix IOMMU nodes properties on rk322x
  ARM: dts: rockchip: Fix the timer clocks order
  arm64: dts: rockchip: fix pinctrl sleep nodename for rk3399.dtsi
  ARM: dts: rockchip: fix pinctrl sleep nodename for rk3036-kylin and rk3288
  ARM: dts: gemini: add device_type on pci
  ARM: dts: gemini: rename mdio to the right name

 Conflicts:
	drivers/mmc/host/sdhci-msm.c
	drivers/scsi/ufs/ufshcd.c
	drivers/staging/android/ion/heaps/ion_cma_heap.c
	drivers/usb/dwc3/gadget.c
	include/linux/oom.h
	kernel/time/hrtimer.c
	mm/oom_kill.c
	net/qrtr/qrtr.c

Change-Id: I1c29c9ef4233acd05550475b29b8f7d30b6c452d
Signed-off-by: Srinivasarao Pathipati <quic_spathi@quicinc.com>
2021-11-05 17:27:33 +05:30

1891 lines
47 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Slab allocator functions that are independent of the allocator strategy
*
* (C) 2012 Christoph Lameter <cl@linux.com>
*/
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/poison.h>
#include <linux/interrupt.h>
#include <linux/memory.h>
#include <linux/cache.h>
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/uaccess.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <linux/memcontrol.h>
#define CREATE_TRACE_POINTS
#include <trace/events/kmem.h>
#include "slab.h"
#ifdef CONFIG_QCOM_MINIDUMP_PANIC_DUMP
#include <soc/qcom/minidump.h>
#include <linux/seq_buf.h>
#endif
enum slab_state slab_state;
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
struct kmem_cache *kmem_cache;
#ifdef CONFIG_HARDENED_USERCOPY
bool usercopy_fallback __ro_after_init =
IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
module_param(usercopy_fallback, bool, 0400);
MODULE_PARM_DESC(usercopy_fallback,
"WARN instead of reject usercopy whitelist violations");
#endif
static LIST_HEAD(slab_caches_to_rcu_destroy);
static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
slab_caches_to_rcu_destroy_workfn);
/*
* Set of flags that will prevent slab merging
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
SLAB_FAILSLAB | SLAB_KASAN)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
/*
* Merge control. If this is set then no merging of slab caches will occur.
*/
static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
static int __init setup_slab_nomerge(char *str)
{
slab_nomerge = true;
return 1;
}
#ifdef CONFIG_SLUB
__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
#endif
__setup("slab_nomerge", setup_slab_nomerge);
/*
* Determine the size of a slab object
*/
unsigned int kmem_cache_size(struct kmem_cache *s)
{
return s->object_size;
}
EXPORT_SYMBOL(kmem_cache_size);
#ifdef CONFIG_DEBUG_VM
static int kmem_cache_sanity_check(const char *name, unsigned int size)
{
if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
pr_err("kmem_cache_create(%s) integrity check failed\n", name);
return -EINVAL;
}
WARN_ON(strchr(name, ' ')); /* It confuses parsers */
return 0;
}
#else
static inline int kmem_cache_sanity_check(const char *name, unsigned int size)
{
return 0;
}
#endif
void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
{
size_t i;
for (i = 0; i < nr; i++) {
if (s)
kmem_cache_free(s, p[i]);
else
kfree(p[i]);
}
}
int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
void **p)
{
size_t i;
for (i = 0; i < nr; i++) {
void *x = p[i] = kmem_cache_alloc(s, flags);
if (!x) {
__kmem_cache_free_bulk(s, i, p);
return 0;
}
}
return i;
}
#ifdef CONFIG_MEMCG_KMEM
LIST_HEAD(slab_root_caches);
static DEFINE_SPINLOCK(memcg_kmem_wq_lock);
static void kmemcg_cache_shutdown(struct percpu_ref *percpu_ref);
void slab_init_memcg_params(struct kmem_cache *s)
{
s->memcg_params.root_cache = NULL;
RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
INIT_LIST_HEAD(&s->memcg_params.children);
s->memcg_params.dying = false;
}
static int init_memcg_params(struct kmem_cache *s,
struct kmem_cache *root_cache)
{
struct memcg_cache_array *arr;
if (root_cache) {
int ret = percpu_ref_init(&s->memcg_params.refcnt,
kmemcg_cache_shutdown,
0, GFP_KERNEL);
if (ret)
return ret;
s->memcg_params.root_cache = root_cache;
INIT_LIST_HEAD(&s->memcg_params.children_node);
INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
return 0;
}
slab_init_memcg_params(s);
if (!memcg_nr_cache_ids)
return 0;
arr = kvzalloc(sizeof(struct memcg_cache_array) +
memcg_nr_cache_ids * sizeof(void *),
GFP_KERNEL);
if (!arr)
return -ENOMEM;
RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
return 0;
}
static void destroy_memcg_params(struct kmem_cache *s)
{
if (is_root_cache(s)) {
kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
} else {
mem_cgroup_put(s->memcg_params.memcg);
WRITE_ONCE(s->memcg_params.memcg, NULL);
percpu_ref_exit(&s->memcg_params.refcnt);
}
}
static void free_memcg_params(struct rcu_head *rcu)
{
struct memcg_cache_array *old;
old = container_of(rcu, struct memcg_cache_array, rcu);
kvfree(old);
}
static int update_memcg_params(struct kmem_cache *s, int new_array_size)
{
struct memcg_cache_array *old, *new;
new = kvzalloc(sizeof(struct memcg_cache_array) +
new_array_size * sizeof(void *), GFP_KERNEL);
if (!new)
return -ENOMEM;
old = rcu_dereference_protected(s->memcg_params.memcg_caches,
lockdep_is_held(&slab_mutex));
if (old)
memcpy(new->entries, old->entries,
memcg_nr_cache_ids * sizeof(void *));
rcu_assign_pointer(s->memcg_params.memcg_caches, new);
if (old)
call_rcu(&old->rcu, free_memcg_params);
return 0;
}
int memcg_update_all_caches(int num_memcgs)
{
struct kmem_cache *s;
int ret = 0;
mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_root_caches, root_caches_node) {
ret = update_memcg_params(s, num_memcgs);
/*
* Instead of freeing the memory, we'll just leave the caches
* up to this point in an updated state.
*/
if (ret)
break;
}
mutex_unlock(&slab_mutex);
return ret;
}
void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg)
{
if (is_root_cache(s)) {
list_add(&s->root_caches_node, &slab_root_caches);
} else {
css_get(&memcg->css);
s->memcg_params.memcg = memcg;
list_add(&s->memcg_params.children_node,
&s->memcg_params.root_cache->memcg_params.children);
list_add(&s->memcg_params.kmem_caches_node,
&s->memcg_params.memcg->kmem_caches);
}
}
static void memcg_unlink_cache(struct kmem_cache *s)
{
if (is_root_cache(s)) {
list_del(&s->root_caches_node);
} else {
list_del(&s->memcg_params.children_node);
list_del(&s->memcg_params.kmem_caches_node);
}
}
#else
static inline int init_memcg_params(struct kmem_cache *s,
struct kmem_cache *root_cache)
{
return 0;
}
static inline void destroy_memcg_params(struct kmem_cache *s)
{
}
static inline void memcg_unlink_cache(struct kmem_cache *s)
{
}
#endif /* CONFIG_MEMCG_KMEM */
/*
* Figure out what the alignment of the objects will be given a set of
* flags, a user specified alignment and the size of the objects.
*/
static unsigned int calculate_alignment(slab_flags_t flags,
unsigned int align, unsigned int size)
{
/*
* If the user wants hardware cache aligned objects then follow that
* suggestion if the object is sufficiently large.
*
* The hardware cache alignment cannot override the specified
* alignment though. If that is greater then use it.
*/
if (flags & SLAB_HWCACHE_ALIGN) {
unsigned int ralign;
ralign = cache_line_size();
while (size <= ralign / 2)
ralign /= 2;
align = max(align, ralign);
}
if (align < ARCH_SLAB_MINALIGN)
align = ARCH_SLAB_MINALIGN;
return ALIGN(align, sizeof(void *));
}
/*
* Find a mergeable slab cache
*/
int slab_unmergeable(struct kmem_cache *s)
{
if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
return 1;
if (!is_root_cache(s))
return 1;
if (s->ctor)
return 1;
if (s->usersize)
return 1;
/*
* We may have set a slab to be unmergeable during bootstrap.
*/
if (s->refcount < 0)
return 1;
return 0;
}
struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
slab_flags_t flags, const char *name, void (*ctor)(void *))
{
struct kmem_cache *s;
if (slab_nomerge)
return NULL;
if (ctor)
return NULL;
size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
flags = kmem_cache_flags(size, flags, name, NULL);
if (flags & SLAB_NEVER_MERGE)
return NULL;
list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
if (slab_unmergeable(s))
continue;
if (size > s->size)
continue;
if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
continue;
/*
* Check if alignment is compatible.
* Courtesy of Adrian Drzewiecki
*/
if ((s->size & ~(align - 1)) != s->size)
continue;
if (s->size - size >= sizeof(void *))
continue;
if (IS_ENABLED(CONFIG_SLAB) && align &&
(align > s->align || s->align % align))
continue;
return s;
}
return NULL;
}
static struct kmem_cache *create_cache(const char *name,
unsigned int object_size, unsigned int align,
slab_flags_t flags, unsigned int useroffset,
unsigned int usersize, void (*ctor)(void *),
struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
struct kmem_cache *s;
int err;
if (WARN_ON(useroffset + usersize > object_size))
useroffset = usersize = 0;
err = -ENOMEM;
s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
if (!s)
goto out;
s->name = name;
s->size = s->object_size = object_size;
s->align = align;
s->ctor = ctor;
s->useroffset = useroffset;
s->usersize = usersize;
err = init_memcg_params(s, root_cache);
if (err)
goto out_free_cache;
err = __kmem_cache_create(s, flags);
if (err)
goto out_free_cache;
s->refcount = 1;
list_add(&s->list, &slab_caches);
memcg_link_cache(s, memcg);
out:
if (err)
return ERR_PTR(err);
return s;
out_free_cache:
destroy_memcg_params(s);
kmem_cache_free(kmem_cache, s);
goto out;
}
/**
* kmem_cache_create_usercopy - Create a cache with a region suitable
* for copying to userspace
* @name: A string which is used in /proc/slabinfo to identify this cache.
* @size: The size of objects to be created in this cache.
* @align: The required alignment for the objects.
* @flags: SLAB flags
* @useroffset: Usercopy region offset
* @usersize: Usercopy region size
* @ctor: A constructor for the objects.
*
* Cannot be called within a interrupt, but can be interrupted.
* The @ctor is run when new pages are allocated by the cache.
*
* The flags are
*
* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
* to catch references to uninitialised memory.
*
* %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
* for buffer overruns.
*
* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
*
* Return: a pointer to the cache on success, NULL on failure.
*/
struct kmem_cache *
kmem_cache_create_usercopy(const char *name,
unsigned int size, unsigned int align,
slab_flags_t flags,
unsigned int useroffset, unsigned int usersize,
void (*ctor)(void *))
{
struct kmem_cache *s = NULL;
const char *cache_name;
int err;
get_online_cpus();
get_online_mems();
memcg_get_cache_ids();
mutex_lock(&slab_mutex);
err = kmem_cache_sanity_check(name, size);
if (err) {
goto out_unlock;
}
/* Refuse requests with allocator specific flags */
if (flags & ~SLAB_FLAGS_PERMITTED) {
err = -EINVAL;
goto out_unlock;
}
/*
* Some allocators will constraint the set of valid flags to a subset
* of all flags. We expect them to define CACHE_CREATE_MASK in this
* case, and we'll just provide them with a sanitized version of the
* passed flags.
*/
flags &= CACHE_CREATE_MASK;
/* Fail closed on bad usersize of useroffset values. */
if (WARN_ON(!usersize && useroffset) ||
WARN_ON(size < usersize || size - usersize < useroffset))
usersize = useroffset = 0;
if (!usersize)
s = __kmem_cache_alias(name, size, align, flags, ctor);
if (s)
goto out_unlock;
cache_name = kstrdup_const(name, GFP_KERNEL);
if (!cache_name) {
err = -ENOMEM;
goto out_unlock;
}
s = create_cache(cache_name, size,
calculate_alignment(flags, align, size),
flags, useroffset, usersize, ctor, NULL, NULL);
if (IS_ERR(s)) {
err = PTR_ERR(s);
kfree_const(cache_name);
}
out_unlock:
mutex_unlock(&slab_mutex);
memcg_put_cache_ids();
put_online_mems();
put_online_cpus();
if (err) {
if (flags & SLAB_PANIC)
panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
name, err);
else {
pr_warn("kmem_cache_create(%s) failed with error %d\n",
name, err);
dump_stack();
}
return NULL;
}
return s;
}
EXPORT_SYMBOL(kmem_cache_create_usercopy);
/**
* kmem_cache_create - Create a cache.
* @name: A string which is used in /proc/slabinfo to identify this cache.
* @size: The size of objects to be created in this cache.
* @align: The required alignment for the objects.
* @flags: SLAB flags
* @ctor: A constructor for the objects.
*
* Cannot be called within a interrupt, but can be interrupted.
* The @ctor is run when new pages are allocated by the cache.
*
* The flags are
*
* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
* to catch references to uninitialised memory.
*
* %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
* for buffer overruns.
*
* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
*
* Return: a pointer to the cache on success, NULL on failure.
*/
struct kmem_cache *
kmem_cache_create(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
{
return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
ctor);
}
EXPORT_SYMBOL(kmem_cache_create);
static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
{
LIST_HEAD(to_destroy);
struct kmem_cache *s, *s2;
/*
* On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
* @slab_caches_to_rcu_destroy list. The slab pages are freed
* through RCU and and the associated kmem_cache are dereferenced
* while freeing the pages, so the kmem_caches should be freed only
* after the pending RCU operations are finished. As rcu_barrier()
* is a pretty slow operation, we batch all pending destructions
* asynchronously.
*/
mutex_lock(&slab_mutex);
list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
mutex_unlock(&slab_mutex);
if (list_empty(&to_destroy))
return;
rcu_barrier();
list_for_each_entry_safe(s, s2, &to_destroy, list) {
#ifdef SLAB_SUPPORTS_SYSFS
sysfs_slab_release(s);
#else
slab_kmem_cache_release(s);
#endif
}
}
static int shutdown_cache(struct kmem_cache *s)
{
/* free asan quarantined objects */
kasan_cache_shutdown(s);
if (__kmem_cache_shutdown(s) != 0)
return -EBUSY;
memcg_unlink_cache(s);
list_del(&s->list);
if (s->flags & SLAB_TYPESAFE_BY_RCU) {
#ifdef SLAB_SUPPORTS_SYSFS
sysfs_slab_unlink(s);
#endif
list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
schedule_work(&slab_caches_to_rcu_destroy_work);
} else {
#ifdef SLAB_SUPPORTS_SYSFS
sysfs_slab_unlink(s);
sysfs_slab_release(s);
#else
slab_kmem_cache_release(s);
#endif
}
return 0;
}
#ifdef CONFIG_MEMCG_KMEM
/*
* memcg_create_kmem_cache - Create a cache for a memory cgroup.
* @memcg: The memory cgroup the new cache is for.
* @root_cache: The parent of the new cache.
*
* This function attempts to create a kmem cache that will serve allocation
* requests going from @memcg to @root_cache. The new cache inherits properties
* from its parent.
*/
void memcg_create_kmem_cache(struct mem_cgroup *memcg,
struct kmem_cache *root_cache)
{
static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
struct cgroup_subsys_state *css = &memcg->css;
struct memcg_cache_array *arr;
struct kmem_cache *s = NULL;
char *cache_name;
int idx;
get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex);
/*
* The memory cgroup could have been offlined while the cache
* creation work was pending.
*/
if (memcg->kmem_state != KMEM_ONLINE)
goto out_unlock;
idx = memcg_cache_id(memcg);
arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
lockdep_is_held(&slab_mutex));
/*
* Since per-memcg caches are created asynchronously on first
* allocation (see memcg_kmem_get_cache()), several threads can try to
* create the same cache, but only one of them may succeed.
*/
if (arr->entries[idx])
goto out_unlock;
cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
css->serial_nr, memcg_name_buf);
if (!cache_name)
goto out_unlock;
s = create_cache(cache_name, root_cache->object_size,
root_cache->align,
root_cache->flags & CACHE_CREATE_MASK,
root_cache->useroffset, root_cache->usersize,
root_cache->ctor, memcg, root_cache);
/*
* If we could not create a memcg cache, do not complain, because
* that's not critical at all as we can always proceed with the root
* cache.
*/
if (IS_ERR(s)) {
kfree(cache_name);
goto out_unlock;
}
/*
* Since readers won't lock (see memcg_kmem_get_cache()), we need a
* barrier here to ensure nobody will see the kmem_cache partially
* initialized.
*/
smp_wmb();
arr->entries[idx] = s;
out_unlock:
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
}
static void kmemcg_workfn(struct work_struct *work)
{
struct kmem_cache *s = container_of(work, struct kmem_cache,
memcg_params.work);
get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex);
s->memcg_params.work_fn(s);
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
}
static void kmemcg_rcufn(struct rcu_head *head)
{
struct kmem_cache *s = container_of(head, struct kmem_cache,
memcg_params.rcu_head);
/*
* We need to grab blocking locks. Bounce to ->work. The
* work item shares the space with the RCU head and can't be
* initialized eariler.
*/
INIT_WORK(&s->memcg_params.work, kmemcg_workfn);
queue_work(memcg_kmem_cache_wq, &s->memcg_params.work);
}
static void kmemcg_cache_shutdown_fn(struct kmem_cache *s)
{
WARN_ON(shutdown_cache(s));
}
static void kmemcg_cache_shutdown(struct percpu_ref *percpu_ref)
{
struct kmem_cache *s = container_of(percpu_ref, struct kmem_cache,
memcg_params.refcnt);
unsigned long flags;
spin_lock_irqsave(&memcg_kmem_wq_lock, flags);
if (s->memcg_params.root_cache->memcg_params.dying)
goto unlock;
s->memcg_params.work_fn = kmemcg_cache_shutdown_fn;
INIT_WORK(&s->memcg_params.work, kmemcg_workfn);
queue_work(memcg_kmem_cache_wq, &s->memcg_params.work);
unlock:
spin_unlock_irqrestore(&memcg_kmem_wq_lock, flags);
}
static void kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
{
__kmemcg_cache_deactivate_after_rcu(s);
percpu_ref_kill(&s->memcg_params.refcnt);
}
static void kmemcg_cache_deactivate(struct kmem_cache *s)
{
if (WARN_ON_ONCE(is_root_cache(s)))
return;
__kmemcg_cache_deactivate(s);
s->flags |= SLAB_DEACTIVATED;
/*
* memcg_kmem_wq_lock is used to synchronize memcg_params.dying
* flag and make sure that no new kmem_cache deactivation tasks
* are queued (see flush_memcg_workqueue() ).
*/
spin_lock_irq(&memcg_kmem_wq_lock);
if (s->memcg_params.root_cache->memcg_params.dying)
goto unlock;
s->memcg_params.work_fn = kmemcg_cache_deactivate_after_rcu;
call_rcu(&s->memcg_params.rcu_head, kmemcg_rcufn);
unlock:
spin_unlock_irq(&memcg_kmem_wq_lock);
}
void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg,
struct mem_cgroup *parent)
{
int idx;
struct memcg_cache_array *arr;
struct kmem_cache *s, *c;
unsigned int nr_reparented;
idx = memcg_cache_id(memcg);
get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_root_caches, root_caches_node) {
arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
lockdep_is_held(&slab_mutex));
c = arr->entries[idx];
if (!c)
continue;
kmemcg_cache_deactivate(c);
arr->entries[idx] = NULL;
}
nr_reparented = 0;
list_for_each_entry(s, &memcg->kmem_caches,
memcg_params.kmem_caches_node) {
WRITE_ONCE(s->memcg_params.memcg, parent);
css_put(&memcg->css);
nr_reparented++;
}
if (nr_reparented) {
list_splice_init(&memcg->kmem_caches,
&parent->kmem_caches);
css_get_many(&parent->css, nr_reparented);
}
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
}
static int shutdown_memcg_caches(struct kmem_cache *s)
{
struct memcg_cache_array *arr;
struct kmem_cache *c, *c2;
LIST_HEAD(busy);
int i;
BUG_ON(!is_root_cache(s));
/*
* First, shutdown active caches, i.e. caches that belong to online
* memory cgroups.
*/
arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
lockdep_is_held(&slab_mutex));
for_each_memcg_cache_index(i) {
c = arr->entries[i];
if (!c)
continue;
if (shutdown_cache(c))
/*
* The cache still has objects. Move it to a temporary
* list so as not to try to destroy it for a second
* time while iterating over inactive caches below.
*/
list_move(&c->memcg_params.children_node, &busy);
else
/*
* The cache is empty and will be destroyed soon. Clear
* the pointer to it in the memcg_caches array so that
* it will never be accessed even if the root cache
* stays alive.
*/
arr->entries[i] = NULL;
}
/*
* Second, shutdown all caches left from memory cgroups that are now
* offline.
*/
list_for_each_entry_safe(c, c2, &s->memcg_params.children,
memcg_params.children_node)
shutdown_cache(c);
list_splice(&busy, &s->memcg_params.children);
/*
* A cache being destroyed must be empty. In particular, this means
* that all per memcg caches attached to it must be empty too.
*/
if (!list_empty(&s->memcg_params.children))
return -EBUSY;
return 0;
}
static void memcg_set_kmem_cache_dying(struct kmem_cache *s)
{
spin_lock_irq(&memcg_kmem_wq_lock);
s->memcg_params.dying = true;
spin_unlock_irq(&memcg_kmem_wq_lock);
}
static void flush_memcg_workqueue(struct kmem_cache *s)
{
/*
* SLAB and SLUB deactivate the kmem_caches through call_rcu. Make
* sure all registered rcu callbacks have been invoked.
*/
rcu_barrier();
/*
* SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
* deactivates the memcg kmem_caches through workqueue. Make sure all
* previous workitems on workqueue are processed.
*/
if (likely(memcg_kmem_cache_wq))
flush_workqueue(memcg_kmem_cache_wq);
/*
* If we're racing with children kmem_cache deactivation, it might
* take another rcu grace period to complete their destruction.
* At this moment the corresponding percpu_ref_kill() call should be
* done, but it might take another rcu grace period to complete
* switching to the atomic mode.
* Please, note that we check without grabbing the slab_mutex. It's safe
* because at this moment the children list can't grow.
*/
if (!list_empty(&s->memcg_params.children))
rcu_barrier();
}
#else
static inline int shutdown_memcg_caches(struct kmem_cache *s)
{
return 0;
}
#endif /* CONFIG_MEMCG_KMEM */
void slab_kmem_cache_release(struct kmem_cache *s)
{
__kmem_cache_release(s);
destroy_memcg_params(s);
kfree_const(s->name);
kmem_cache_free(kmem_cache, s);
}
void kmem_cache_destroy(struct kmem_cache *s)
{
int err;
if (unlikely(!s))
return;
get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex);
s->refcount--;
if (s->refcount)
goto out_unlock;
#ifdef CONFIG_MEMCG_KMEM
memcg_set_kmem_cache_dying(s);
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
flush_memcg_workqueue(s);
get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex);
/*
* Another thread referenced it again
*/
if (READ_ONCE(s->refcount)) {
spin_lock_irq(&memcg_kmem_wq_lock);
s->memcg_params.dying = false;
spin_unlock_irq(&memcg_kmem_wq_lock);
goto out_unlock;
}
#endif
err = shutdown_memcg_caches(s);
if (!err)
err = shutdown_cache(s);
if (err) {
pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
s->name);
dump_stack();
}
out_unlock:
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
}
EXPORT_SYMBOL(kmem_cache_destroy);
/**
* kmem_cache_shrink - Shrink a cache.
* @cachep: The cache to shrink.
*
* Releases as many slabs as possible for a cache.
* To help debugging, a zero exit status indicates all slabs were released.
*
* Return: %0 if all slabs were released, non-zero otherwise
*/
int kmem_cache_shrink(struct kmem_cache *cachep)
{
int ret;
get_online_cpus();
get_online_mems();
kasan_cache_shrink(cachep);
ret = __kmem_cache_shrink(cachep);
put_online_mems();
put_online_cpus();
return ret;
}
EXPORT_SYMBOL(kmem_cache_shrink);
/**
* kmem_cache_shrink_all - shrink a cache and all memcg caches for root cache
* @s: The cache pointer
*/
void kmem_cache_shrink_all(struct kmem_cache *s)
{
struct kmem_cache *c;
if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || !is_root_cache(s)) {
kmem_cache_shrink(s);
return;
}
get_online_cpus();
get_online_mems();
kasan_cache_shrink(s);
__kmem_cache_shrink(s);
/*
* We have to take the slab_mutex to protect from the memcg list
* modification.
*/
mutex_lock(&slab_mutex);
for_each_memcg_cache(c, s) {
/*
* Don't need to shrink deactivated memcg caches.
*/
if (s->flags & SLAB_DEACTIVATED)
continue;
kasan_cache_shrink(c);
__kmem_cache_shrink(c);
}
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
}
bool slab_is_available(void)
{
return slab_state >= UP;
}
#ifndef CONFIG_SLOB
/* Create a cache during boot when no slab services are available yet */
void __init create_boot_cache(struct kmem_cache *s, const char *name,
unsigned int size, slab_flags_t flags,
unsigned int useroffset, unsigned int usersize)
{
int err;
unsigned int align = ARCH_KMALLOC_MINALIGN;
s->name = name;
s->size = s->object_size = size;
/*
* For power of two sizes, guarantee natural alignment for kmalloc
* caches, regardless of SL*B debugging options.
*/
if (is_power_of_2(size))
align = max(align, size);
s->align = calculate_alignment(flags, align, size);
s->useroffset = useroffset;
s->usersize = usersize;
slab_init_memcg_params(s);
err = __kmem_cache_create(s, flags);
if (err)
panic("Creation of kmalloc slab %s size=%u failed. Reason %d\n",
name, size, err);
s->refcount = -1; /* Exempt from merging for now */
}
struct kmem_cache *__init create_kmalloc_cache(const char *name,
unsigned int size, slab_flags_t flags,
unsigned int useroffset, unsigned int usersize)
{
struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
if (!s)
panic("Out of memory when creating slab %s\n", name);
create_boot_cache(s, name, size, flags, useroffset, usersize);
list_add(&s->list, &slab_caches);
memcg_link_cache(s, NULL);
s->refcount = 1;
return s;
}
struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ };
EXPORT_SYMBOL(kmalloc_caches);
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
* of two cache sizes there. The size of larger slabs can be determined using
* fls.
*/
static u8 size_index[24] __ro_after_init = {
3, /* 8 */
4, /* 16 */
5, /* 24 */
5, /* 32 */
6, /* 40 */
6, /* 48 */
6, /* 56 */
6, /* 64 */
1, /* 72 */
1, /* 80 */
1, /* 88 */
1, /* 96 */
7, /* 104 */
7, /* 112 */
7, /* 120 */
7, /* 128 */
2, /* 136 */
2, /* 144 */
2, /* 152 */
2, /* 160 */
2, /* 168 */
2, /* 176 */
2, /* 184 */
2 /* 192 */
};
static inline unsigned int size_index_elem(unsigned int bytes)
{
return (bytes - 1) / 8;
}
/*
* Find the kmem_cache structure that serves a given size of
* allocation
*/
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
{
unsigned int index;
if (size <= 192) {
if (!size)
return ZERO_SIZE_PTR;
index = size_index[size_index_elem(size)];
} else {
if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
return NULL;
index = fls(size - 1);
}
return kmalloc_caches[kmalloc_type(flags)][index];
}
/*
* kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
* kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
* kmalloc-67108864.
*/
const struct kmalloc_info_struct kmalloc_info[] __initconst = {
{NULL, 0}, {"kmalloc-96", 96},
{"kmalloc-192", 192}, {"kmalloc-8", 8},
{"kmalloc-16", 16}, {"kmalloc-32", 32},
{"kmalloc-64", 64}, {"kmalloc-128", 128},
{"kmalloc-256", 256}, {"kmalloc-512", 512},
{"kmalloc-1k", 1024}, {"kmalloc-2k", 2048},
{"kmalloc-4k", 4096}, {"kmalloc-8k", 8192},
{"kmalloc-16k", 16384}, {"kmalloc-32k", 32768},
{"kmalloc-64k", 65536}, {"kmalloc-128k", 131072},
{"kmalloc-256k", 262144}, {"kmalloc-512k", 524288},
{"kmalloc-1M", 1048576}, {"kmalloc-2M", 2097152},
{"kmalloc-4M", 4194304}, {"kmalloc-8M", 8388608},
{"kmalloc-16M", 16777216}, {"kmalloc-32M", 33554432},
{"kmalloc-64M", 67108864}
};
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
* MIPS it seems. The standard arches will not generate any code here.
*
* Largest permitted alignment is 256 bytes due to the way we
* handle the index determination for the smaller caches.
*
* Make sure that nothing crazy happens if someone starts tinkering
* around with ARCH_KMALLOC_MINALIGN
*/
void __init setup_kmalloc_cache_index_table(void)
{
unsigned int i;
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
unsigned int elem = size_index_elem(i);
if (elem >= ARRAY_SIZE(size_index))
break;
size_index[elem] = KMALLOC_SHIFT_LOW;
}
if (KMALLOC_MIN_SIZE >= 64) {
/*
* The 96 byte size cache is not used if the alignment
* is 64 byte.
*/
for (i = 64 + 8; i <= 96; i += 8)
size_index[size_index_elem(i)] = 7;
}
if (KMALLOC_MIN_SIZE >= 128) {
/*
* The 192 byte sized cache is not used if the alignment
* is 128 byte. Redirect kmalloc to use the 256 byte cache
* instead.
*/
for (i = 128 + 8; i <= 192; i += 8)
size_index[size_index_elem(i)] = 8;
}
}
static const char *
kmalloc_cache_name(const char *prefix, unsigned int size)
{
static const char units[3] = "\0kM";
int idx = 0;
while (size >= 1024 && (size % 1024 == 0)) {
size /= 1024;
idx++;
}
return kasprintf(GFP_NOWAIT, "%s-%u%c", prefix, size, units[idx]);
}
static void __init
new_kmalloc_cache(int idx, int type, slab_flags_t flags)
{
const char *name;
if (type == KMALLOC_RECLAIM) {
flags |= SLAB_RECLAIM_ACCOUNT;
name = kmalloc_cache_name("kmalloc-rcl",
kmalloc_info[idx].size);
BUG_ON(!name);
} else {
name = kmalloc_info[idx].name;
}
kmalloc_caches[type][idx] = create_kmalloc_cache(name,
kmalloc_info[idx].size, flags, 0,
kmalloc_info[idx].size);
}
/*
* Create the kmalloc array. Some of the regular kmalloc arrays
* may already have been created because they were needed to
* enable allocations for slab creation.
*/
void __init create_kmalloc_caches(slab_flags_t flags)
{
int i, type;
for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
if (!kmalloc_caches[type][i])
new_kmalloc_cache(i, type, flags);
/*
* Caches that are not of the two-to-the-power-of size.
* These have to be created immediately after the
* earlier power of two caches
*/
if (KMALLOC_MIN_SIZE <= 32 && i == 6 &&
!kmalloc_caches[type][1])
new_kmalloc_cache(1, type, flags);
if (KMALLOC_MIN_SIZE <= 64 && i == 7 &&
!kmalloc_caches[type][2])
new_kmalloc_cache(2, type, flags);
}
}
/* Kmalloc array is now usable */
slab_state = UP;
#ifdef CONFIG_ZONE_DMA
for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
if (s) {
unsigned int size = kmalloc_size(i);
const char *n = kmalloc_cache_name("dma-kmalloc", size);
BUG_ON(!n);
kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
n, size, SLAB_CACHE_DMA | flags, 0, 0);
}
}
#endif
}
#endif /* !CONFIG_SLOB */
/*
* To avoid unnecessary overhead, we pass through large allocation requests
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
void *ret = NULL;
struct page *page;
flags |= __GFP_COMP;
page = alloc_pages(flags, order);
if (likely(page)) {
ret = page_address(page);
mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
1 << order);
}
ret = kasan_kmalloc_large(ret, size, flags);
/* As ret might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ret, size, 1, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order);
#ifdef CONFIG_TRACING
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
void *ret = kmalloc_order(size, flags, order);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order_trace);
#endif
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Randomize a generic freelist */
static void freelist_randomize(struct rnd_state *state, unsigned int *list,
unsigned int count)
{
unsigned int rand;
unsigned int i;
for (i = 0; i < count; i++)
list[i] = i;
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
rand = prandom_u32_state(state);
rand %= (i + 1);
swap(list[i], list[rand]);
}
}
/* Create a random sequence per cache */
int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
gfp_t gfp)
{
struct rnd_state state;
if (count < 2 || cachep->random_seq)
return 0;
cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
if (!cachep->random_seq)
return -ENOMEM;
/* Get best entropy at this stage of boot */
prandom_seed_state(&state, get_random_long());
freelist_randomize(&state, cachep->random_seq, count);
return 0;
}
/* Destroy the per-cache random freelist sequence */
void cache_random_seq_destroy(struct kmem_cache *cachep)
{
kfree(cachep->random_seq);
cachep->random_seq = NULL;
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
#ifdef CONFIG_SLAB
#define SLABINFO_RIGHTS (0600)
#else
#define SLABINFO_RIGHTS (0400)
#endif
static void print_slabinfo_header(struct seq_file *m)
{
/*
* Output format version, so at least we can change it
* without _too_ many complaints.
*/
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
#else
seq_puts(m, "slabinfo - version: 2.1\n");
#endif
seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
#endif
seq_putc(m, '\n');
}
void *slab_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&slab_mutex);
return seq_list_start(&slab_root_caches, *pos);
}
void *slab_next(struct seq_file *m, void *p, loff_t *pos)
{
return seq_list_next(p, &slab_root_caches, pos);
}
void slab_stop(struct seq_file *m, void *p)
{
mutex_unlock(&slab_mutex);
}
static void
memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
{
struct kmem_cache *c;
struct slabinfo sinfo;
if (!is_root_cache(s))
return;
for_each_memcg_cache(c, s) {
memset(&sinfo, 0, sizeof(sinfo));
get_slabinfo(c, &sinfo);
info->active_slabs += sinfo.active_slabs;
info->num_slabs += sinfo.num_slabs;
info->shared_avail += sinfo.shared_avail;
info->active_objs += sinfo.active_objs;
info->num_objs += sinfo.num_objs;
}
}
static void cache_show(struct kmem_cache *s, struct seq_file *m)
{
struct slabinfo sinfo;
memset(&sinfo, 0, sizeof(sinfo));
get_slabinfo(s, &sinfo);
memcg_accumulate_slabinfo(s, &sinfo);
seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
sinfo.objects_per_slab, (1 << sinfo.cache_order));
seq_printf(m, " : tunables %4u %4u %4u",
sinfo.limit, sinfo.batchcount, sinfo.shared);
seq_printf(m, " : slabdata %6lu %6lu %6lu",
sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
slabinfo_show_stats(m, s);
seq_putc(m, '\n');
}
static int slab_show(struct seq_file *m, void *p)
{
struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
if (p == slab_root_caches.next)
print_slabinfo_header(m);
cache_show(s, m);
return 0;
}
#ifdef CONFIG_QCOM_MINIDUMP_PANIC_DUMP
void md_dump_slabinfo(void)
{
struct kmem_cache *s;
struct slabinfo sinfo;
if (!md_slabinfo_seq_buf)
return;
/* print_slabinfo_header */
#ifdef CONFIG_DEBUG_SLAB
seq_buf_printf(md_slabinfo_seq_buf,
"slabinfo - version: 2.1 (statistics)\n");
#else
seq_buf_printf(md_slabinfo_seq_buf,
"slabinfo - version: 2.1\n");
#endif
seq_buf_printf(md_slabinfo_seq_buf,
"# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
seq_buf_printf(md_slabinfo_seq_buf,
" : tunables <limit> <batchcount> <sharedfactor>");
seq_buf_printf(md_slabinfo_seq_buf,
" : slabdata <active_slabs> <num_slabs> <sharedavail>");
#ifdef CONFIG_DEBUG_SLAB
seq_buf_printf(md_slabinfo_seq_buf,
" : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
seq_buf_printf(md_slabinfo_seq_buf,
" : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
#endif
seq_buf_printf(md_slabinfo_seq_buf, "\n");
/* Loop through all slabs */
mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_root_caches, root_caches_node) {
memset(&sinfo, 0, sizeof(sinfo));
get_slabinfo(s, &sinfo);
memcg_accumulate_slabinfo(s, &sinfo);
seq_buf_printf(md_slabinfo_seq_buf,
"%-17s %6lu %6lu %6u %4u %4d",
cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
sinfo.objects_per_slab, (1 << sinfo.cache_order));
seq_buf_printf(md_slabinfo_seq_buf, " : tunables %4u %4u %4u",
sinfo.limit, sinfo.batchcount, sinfo.shared);
seq_buf_printf(md_slabinfo_seq_buf,
" : slabdata %6lu %6lu %6lu",
sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
slabinfo_show_stats(NULL, s);
seq_buf_printf(md_slabinfo_seq_buf, "\n");
}
mutex_unlock(&slab_mutex);
}
#endif
void dump_unreclaimable_slab(void)
{
struct kmem_cache *s, *s2;
struct slabinfo sinfo;
/*
* Here acquiring slab_mutex is risky since we don't prefer to get
* sleep in oom path. But, without mutex hold, it may introduce a
* risk of crash.
* Use mutex_trylock to protect the list traverse, dump nothing
* without acquiring the mutex.
*/
if (!mutex_trylock(&slab_mutex)) {
pr_warn("excessive unreclaimable slab but cannot dump stats\n");
return;
}
pr_info("Unreclaimable slab info:\n");
pr_info("Name Used Total\n");
list_for_each_entry_safe(s, s2, &slab_caches, list) {
if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
continue;
get_slabinfo(s, &sinfo);
if (sinfo.num_objs > 0)
pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
(sinfo.active_objs * s->size) / 1024,
(sinfo.num_objs * s->size) / 1024);
}
mutex_unlock(&slab_mutex);
}
#if defined(CONFIG_MEMCG)
void *memcg_slab_start(struct seq_file *m, loff_t *pos)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
mutex_lock(&slab_mutex);
return seq_list_start(&memcg->kmem_caches, *pos);
}
void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
return seq_list_next(p, &memcg->kmem_caches, pos);
}
void memcg_slab_stop(struct seq_file *m, void *p)
{
mutex_unlock(&slab_mutex);
}
int memcg_slab_show(struct seq_file *m, void *p)
{
struct kmem_cache *s = list_entry(p, struct kmem_cache,
memcg_params.kmem_caches_node);
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
if (p == memcg->kmem_caches.next)
print_slabinfo_header(m);
cache_show(s, m);
return 0;
}
#endif
/*
* slabinfo_op - iterator that generates /proc/slabinfo
*
* Output layout:
* cache-name
* num-active-objs
* total-objs
* object size
* num-active-slabs
* total-slabs
* num-pages-per-slab
* + further values on SMP and with statistics enabled
*/
static const struct seq_operations slabinfo_op = {
.start = slab_start,
.next = slab_next,
.stop = slab_stop,
.show = slab_show,
};
static int slabinfo_open(struct inode *inode, struct file *file)
{
return seq_open(file, &slabinfo_op);
}
static const struct file_operations proc_slabinfo_operations = {
.open = slabinfo_open,
.read = seq_read,
.write = slabinfo_write,
.llseek = seq_lseek,
.release = seq_release,
};
static int __init slab_proc_init(void)
{
proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
&proc_slabinfo_operations);
return 0;
}
module_init(slab_proc_init);
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_MEMCG_KMEM)
/*
* Display information about kmem caches that have child memcg caches.
*/
static int memcg_slabinfo_show(struct seq_file *m, void *unused)
{
struct kmem_cache *s, *c;
struct slabinfo sinfo;
mutex_lock(&slab_mutex);
seq_puts(m, "# <name> <css_id[:dead|deact]> <active_objs> <num_objs>");
seq_puts(m, " <active_slabs> <num_slabs>\n");
list_for_each_entry(s, &slab_root_caches, root_caches_node) {
/*
* Skip kmem caches that don't have any memcg children.
*/
if (list_empty(&s->memcg_params.children))
continue;
memset(&sinfo, 0, sizeof(sinfo));
get_slabinfo(s, &sinfo);
seq_printf(m, "%-17s root %6lu %6lu %6lu %6lu\n",
cache_name(s), sinfo.active_objs, sinfo.num_objs,
sinfo.active_slabs, sinfo.num_slabs);
for_each_memcg_cache(c, s) {
struct cgroup_subsys_state *css;
char *status = "";
css = &c->memcg_params.memcg->css;
if (!(css->flags & CSS_ONLINE))
status = ":dead";
else if (c->flags & SLAB_DEACTIVATED)
status = ":deact";
memset(&sinfo, 0, sizeof(sinfo));
get_slabinfo(c, &sinfo);
seq_printf(m, "%-17s %4d%-6s %6lu %6lu %6lu %6lu\n",
cache_name(c), css->id, status,
sinfo.active_objs, sinfo.num_objs,
sinfo.active_slabs, sinfo.num_slabs);
}
}
mutex_unlock(&slab_mutex);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(memcg_slabinfo);
static int __init memcg_slabinfo_init(void)
{
debugfs_create_file("memcg_slabinfo", S_IFREG | S_IRUGO,
NULL, NULL, &memcg_slabinfo_fops);
return 0;
}
late_initcall(memcg_slabinfo_init);
#endif /* CONFIG_DEBUG_FS && CONFIG_MEMCG_KMEM */
#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
static __always_inline void *__do_krealloc(const void *p, size_t new_size,
gfp_t flags)
{
void *ret;
size_t ks = 0;
if (p)
ks = ksize(p);
if (ks >= new_size) {
p = kasan_krealloc((void *)p, new_size, flags);
return (void *)p;
}
ret = kmalloc_track_caller(new_size, flags);
if (ret && p)
memcpy(ret, p, ks);
return ret;
}
/**
* __krealloc - like krealloc() but don't free @p.
* @p: object to reallocate memory for.
* @new_size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
*
* This function is like krealloc() except it never frees the originally
* allocated buffer. Use this if you don't want to free the buffer immediately
* like, for example, with RCU.
*
* Return: pointer to the allocated memory or %NULL in case of error
*/
void *__krealloc(const void *p, size_t new_size, gfp_t flags)
{
if (unlikely(!new_size))
return ZERO_SIZE_PTR;
return __do_krealloc(p, new_size, flags);
}
EXPORT_SYMBOL(__krealloc);
/**
* krealloc - reallocate memory. The contents will remain unchanged.
* @p: object to reallocate memory for.
* @new_size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
*
* The contents of the object pointed to are preserved up to the
* lesser of the new and old sizes. If @p is %NULL, krealloc()
* behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
* %NULL pointer, the object pointed to is freed.
*
* Return: pointer to the allocated memory or %NULL in case of error
*/
void *krealloc(const void *p, size_t new_size, gfp_t flags)
{
void *ret;
if (unlikely(!new_size)) {
kfree(p);
return ZERO_SIZE_PTR;
}
ret = __do_krealloc(p, new_size, flags);
if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
kfree(p);
return ret;
}
EXPORT_SYMBOL(krealloc);
/**
* kzfree - like kfree but zero memory
* @p: object to free memory of
*
* The memory of the object @p points to is zeroed before freed.
* If @p is %NULL, kzfree() does nothing.
*
* Note: this function zeroes the whole allocated buffer which can be a good
* deal bigger than the requested buffer size passed to kmalloc(). So be
* careful when using this function in performance sensitive code.
*/
void kzfree(const void *p)
{
size_t ks;
void *mem = (void *)p;
if (unlikely(ZERO_OR_NULL_PTR(mem)))
return;
ks = ksize(mem);
memzero_explicit(mem, ks);
kfree(mem);
}
EXPORT_SYMBOL(kzfree);
/**
* ksize - get the actual amount of memory allocated for a given object
* @objp: Pointer to the object
*
* kmalloc may internally round up allocations and return more memory
* than requested. ksize() can be used to determine the actual amount of
* memory allocated. The caller may use this additional memory, even though
* a smaller amount of memory was initially specified with the kmalloc call.
* The caller must guarantee that objp points to a valid object previously
* allocated with either kmalloc() or kmem_cache_alloc(). The object
* must not be freed during the duration of the call.
*
* Return: size of the actual memory used by @objp in bytes
*/
size_t ksize(const void *objp)
{
size_t size;
if (WARN_ON_ONCE(!objp))
return 0;
/*
* We need to check that the pointed to object is valid, and only then
* unpoison the shadow memory below. We use __kasan_check_read(), to
* generate a more useful report at the time ksize() is called (rather
* than later where behaviour is undefined due to potential
* use-after-free or double-free).
*
* If the pointed to memory is invalid we return 0, to avoid users of
* ksize() writing to and potentially corrupting the memory region.
*
* We want to perform the check before __ksize(), to avoid potentially
* crashing in __ksize() due to accessing invalid metadata.
*/
if (unlikely(objp == ZERO_SIZE_PTR) || !__kasan_check_read(objp, 1))
return 0;
size = __ksize(objp);
/*
* We assume that ksize callers could use whole allocated area,
* so we need to unpoison this area.
*/
kasan_unpoison_shadow(objp, size);
return size;
}
EXPORT_SYMBOL(ksize);
/* Tracepoints definitions. */
EXPORT_TRACEPOINT_SYMBOL(kmalloc);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
EXPORT_TRACEPOINT_SYMBOL(kfree);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
if (__should_failslab(s, gfpflags))
return -ENOMEM;
return 0;
}
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);