android_kernel_xiaomi_sm8350/kernel/sched/sched.h
Michael Bestas 9c70abfc5e
Merge tag 'ASB-2022-11-01_11-5.4' of https://android.googlesource.com/kernel/common into android13-5.4-lahaina
https://source.android.com/docs/security/bulletin/2022-11-01

* tag 'ASB-2022-11-01_11-5.4' of https://android.googlesource.com/kernel/common:
  UPSTREAM: mm/mremap: hold the rmap lock in write mode when moving page table entries.
  FROMLIST: binder: fix UAF of alloc->vma in race with munmap()
  UPSTREAM: mm: Fix TLB flush for not-first PFNMAP mappings in unmap_region()
  UPSTREAM: mm: Force TLB flush for PFNMAP mappings before unlink_file_vma()
  UPSTREAM: af_key: Do not call xfrm_probe_algs in parallel
  UPSTREAM: wifi: cfg80211: fix u8 overflow in cfg80211_update_notlisted_nontrans()
  UPSTREAM: wifi: cfg80211/mac80211: reject bad MBSSID elements
  UPSTREAM: wifi: cfg80211: ensure length byte is present before access
  UPSTREAM: wifi: cfg80211: fix BSS refcounting bugs
  UPSTREAM: wifi: cfg80211: avoid nontransmitted BSS list corruption
  UPSTREAM: wifi: mac80211_hwsim: avoid mac80211 warning on bad rate
  UPSTREAM: wifi: cfg80211: update hidden BSSes to avoid WARN_ON
  UPSTREAM: mac80211: mlme: find auth challenge directly
  UPSTREAM: wifi: mac80211: don't parse mbssid in assoc response
  UPSTREAM: wifi: mac80211: fix MBSSID parsing use-after-free
  ANDROID: Drop explicit 'CONFIG_INIT_STACK_ALL_ZERO=y' from gki_defconfig
  UPSTREAM: hardening: Remove Clang's enable flag for -ftrivial-auto-var-init=zero
  UPSTREAM: hardening: Avoid harmless Clang option under CONFIG_INIT_STACK_ALL_ZERO
  UPSTREAM: hardening: Clarify Kconfig text for auto-var-init
  ANDROID: GKI: Update FCNT KMI symbol list
  ANDROID: Fix kenelci build-break for !CONFIG_PERF_EVENTS
  BACKPORT: HID: steam: Prevent NULL pointer dereference in steam_{recv,send}_report
  ANDROID: ABI: Update allowed list for QCOM
  UPSTREAM: wifi: mac80211_hwsim: use 32-bit skb cookie
  UPSTREAM: wifi: mac80211_hwsim: add back erroneously removed cast
  UPSTREAM: wifi: mac80211_hwsim: fix race condition in pending packet
  ANDROID: incfs: Add check for ATTR_KILL_SUID and ATTR_MODE in incfs_setattr
  Linux 5.4.210
  x86/speculation: Add LFENCE to RSB fill sequence
  x86/speculation: Add RSB VM Exit protections
  macintosh/adb: fix oob read in do_adb_query() function
  media: v4l2-mem2mem: Apply DST_QUEUE_OFF_BASE on MMAP buffers across ioctls
  selftests: KVM: Handle compiler optimizations in ucall
  KVM: Don't null dereference ops->destroy
  selftests/bpf: Fix "dubious pointer arithmetic" test
  selftests/bpf: Fix test_align verifier log patterns
  bpf: Test_verifier, #70 error message updates for 32-bit right shift
  selftests/bpf: Extend verifier and bpf_sock tests for dst_port loads
  bpf: Verifer, adjust_scalar_min_max_vals to always call update_reg_bounds()
  ACPI: APEI: Better fix to avoid spamming the console with old error logs
  ACPI: video: Shortening quirk list by identifying Clevo by board_name only
  ACPI: video: Force backlight native for some TongFang devices
  thermal: Fix NULL pointer dereferences in of_thermal_ functions
  ANDROID: GKI: db845c: Update symbols list and ABI
  Linux 5.4.209
  scsi: core: Fix race between handling STS_RESOURCE and completion
  mt7601u: add USB device ID for some versions of XiaoDu WiFi Dongle.
  ARM: crypto: comment out gcc warning that breaks clang builds
  sctp: leave the err path free in sctp_stream_init to sctp_stream_free
  sfc: disable softirqs for ptp TX
  perf symbol: Correct address for bss symbols
  virtio-net: fix the race between refill work and close
  netfilter: nf_queue: do not allow packet truncation below transport header offset
  sctp: fix sleep in atomic context bug in timer handlers
  i40e: Fix interface init with MSI interrupts (no MSI-X)
  tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
  tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
  Documentation: fix sctp_wmem in ip-sysctl.rst
  tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
  tcp: Fix a data-race around sysctl_tcp_autocorking.
  tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
  tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
  net: sungem_phy: Add of_node_put() for reference returned by of_get_parent()
  igmp: Fix data-races around sysctl_igmp_qrv.
  ipv6/addrconf: fix a null-ptr-deref bug for ip6_ptr
  net: ping6: Fix memleak in ipv6_renew_options().
  tcp: Fix a data-race around sysctl_tcp_challenge_ack_limit.
  tcp: Fix a data-race around sysctl_tcp_limit_output_bytes.
  scsi: ufs: host: Hold reference returned by of_parse_phandle()
  ice: do not setup vlan for loopback VSI
  ice: check (DD | EOF) bits on Rx descriptor rather than (EOP | RS)
  tcp: Fix a data-race around sysctl_tcp_nometrics_save.
  tcp: Fix a data-race around sysctl_tcp_frto.
  tcp: Fix a data-race around sysctl_tcp_adv_win_scale.
  tcp: Fix a data-race around sysctl_tcp_app_win.
  tcp: Fix data-races around sysctl_tcp_dsack.
  s390/archrandom: prevent CPACF trng invocations in interrupt context
  ntfs: fix use-after-free in ntfs_ucsncmp()
  Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put
  ANDROID: restore some removed refcount functions
  ANDROID: add tty_schedule_flip() back to the kernel
  Linux 5.4.208
  x86: drop bogus "cc" clobber from __try_cmpxchg_user_asm()
  net: usb: ax88179_178a needs FLAG_SEND_ZLP
  tty: use new tty_insert_flip_string_and_push_buffer() in pty_write()
  tty: extract tty_flip_buffer_commit() from tty_flip_buffer_push()
  tty: drop tty_schedule_flip()
  tty: the rest, stop using tty_schedule_flip()
  tty: drivers/tty/, stop using tty_schedule_flip()
  Bluetooth: Fix bt_skb_sendmmsg not allocating partial chunks
  Bluetooth: SCO: Fix sco_send_frame returning skb->len
  Bluetooth: Fix passing NULL to PTR_ERR
  Bluetooth: RFCOMM: Replace use of memcpy_from_msg with bt_skb_sendmmsg
  Bluetooth: SCO: Replace use of memcpy_from_msg with bt_skb_sendmsg
  Bluetooth: Add bt_skb_sendmmsg helper
  Bluetooth: Add bt_skb_sendmsg helper
  ALSA: memalloc: Align buffer allocations in page size
  bitfield.h: Fix "type of reg too small for mask" test
  x86/mce: Deduplicate exception handling
  mmap locking API: initial implementation as rwsem wrappers
  x86/uaccess: Implement macros for CMPXCHG on user addresses
  x86: get rid of small constant size cases in raw_copy_{to,from}_user()
  locking/refcount: Consolidate implementations of refcount_t
  locking/refcount: Consolidate REFCOUNT_{MAX,SATURATED} definitions
  locking/refcount: Move saturation warnings out of line
  locking/refcount: Improve performance of generic REFCOUNT_FULL code
  locking/refcount: Move the bulk of the REFCOUNT_FULL implementation into the <linux/refcount.h> header
  locking/refcount: Remove unused refcount_*_checked() variants
  locking/refcount: Ensure integer operands are treated as signed
  locking/refcount: Define constants for saturation and max refcount values
  ima: remove the IMA_TEMPLATE Kconfig option
  dlm: fix pending remove if msg allocation fails
  bpf: Make sure mac_header was set before using it
  mm/mempolicy: fix uninit-value in mpol_rebind_policy()
  spi: bcm2835: bcm2835_spi_handle_err(): fix NULL pointer deref for non DMA transfers
  tcp: Fix data-races around sysctl_tcp_max_reordering.
  tcp: Fix a data-race around sysctl_tcp_rfc1337.
  tcp: Fix a data-race around sysctl_tcp_stdurg.
  tcp: Fix a data-race around sysctl_tcp_retrans_collapse.
  tcp: Fix data-races around sysctl_tcp_slow_start_after_idle.
  tcp: Fix a data-race around sysctl_tcp_thin_linear_timeouts.
  tcp: Fix data-races around sysctl_tcp_recovery.
  tcp: Fix a data-race around sysctl_tcp_early_retrans.
  tcp: Fix data-races around sysctl knobs related to SYN option.
  udp: Fix a data-race around sysctl_udp_l3mdev_accept.
  ipv4: Fix a data-race around sysctl_fib_multipath_use_neigh.
  be2net: Fix buffer overflow in be_get_module_eeprom
  gpio: pca953x: only use single read/write for No AI mode
  ixgbe: Add locking to prevent panic when setting sriov_numvfs to zero
  i40e: Fix erroneous adapter reinitialization during recovery process
  iavf: Fix handling of dummy receive descriptors
  tcp: Fix data-races around sysctl_tcp_fastopen.
  tcp: Fix data-races around sysctl_max_syn_backlog.
  tcp: Fix a data-race around sysctl_tcp_tw_reuse.
  tcp: Fix a data-race around sysctl_tcp_notsent_lowat.
  tcp: Fix data-races around some timeout sysctl knobs.
  tcp: Fix data-races around sysctl_tcp_reordering.
  tcp: Fix data-races around sysctl_tcp_syncookies.
  igmp: Fix a data-race around sysctl_igmp_max_memberships.
  igmp: Fix data-races around sysctl_igmp_llm_reports.
  net/tls: Fix race in TLS device down flow
  net: stmmac: fix dma queue left shift overflow issue
  i2c: cadence: Change large transfer count reset logic to be unconditional
  tcp: Fix a data-race around sysctl_tcp_probe_interval.
  tcp: Fix a data-race around sysctl_tcp_probe_threshold.
  tcp: Fix a data-race around sysctl_tcp_mtu_probe_floor.
  tcp: Fix data-races around sysctl_tcp_min_snd_mss.
  tcp: Fix data-races around sysctl_tcp_base_mss.
  tcp: Fix data-races around sysctl_tcp_mtu_probing.
  tcp/dccp: Fix a data-race around sysctl_tcp_fwmark_accept.
  ip: Fix a data-race around sysctl_fwmark_reflect.
  ip: Fix data-races around sysctl_ip_nonlocal_bind.
  ip: Fix data-races around sysctl_ip_fwd_use_pmtu.
  ip: Fix data-races around sysctl_ip_no_pmtu_disc.
  igc: Reinstate IGC_REMOVED logic and implement it properly
  perf/core: Fix data race between perf_event_set_output() and perf_mmap_close()
  pinctrl: ralink: Check for null return of devm_kcalloc
  power/reset: arm-versatile: Fix refcount leak in versatile_reboot_probe
  xfrm: xfrm_policy: fix a possible double xfrm_pols_put() in xfrm_bundle_lookup()
  serial: mvebu-uart: correctly report configured baudrate value
  PCI: hv: Fix interrupt mapping for multi-MSI
  PCI: hv: Reuse existing IRTE allocation in compose_msi_msg()
  PCI: hv: Fix hv_arch_irq_unmask() for multi-MSI
  PCI: hv: Fix multi-MSI to allow more than one MSI vector
  xen/gntdev: Ignore failure to unmap INVALID_GRANT_HANDLE
  lockdown: Fix kexec lockdown bypass with ima policy
  mlxsw: spectrum_router: Fix IPv4 nexthop gateway indication
  riscv: add as-options for modules with assembly compontents
  pinctrl: stm32: fix optional IRQ support to gpios
  Revert "cgroup: Use separate src/dst nodes when preloading css_sets for migration"
  Linux 5.4.207
  can: m_can: m_can_tx_handler(): fix use after free of skb
  serial: pl011: UPSTAT_AUTORTS requires .throttle/unthrottle
  serial: stm32: Clear prev values before setting RTS delays
  serial: 8250: fix return error code in serial8250_request_std_resource()
  tty: serial: samsung_tty: set dma burst_size to 1
  usb: dwc3: gadget: Fix event pending check
  usb: typec: add missing uevent when partner support PD
  USB: serial: ftdi_sio: add Belimo device ids
  signal handling: don't use BUG_ON() for debugging
  ARM: dts: stm32: use the correct clock source for CEC on stm32mp151
  soc: ixp4xx/npe: Fix unused match warning
  x86: Clear .brk area at early boot
  irqchip: or1k-pic: Undefine mask_ack for level triggered hardware
  ASoC: madera: Fix event generation for rate controls
  ASoC: madera: Fix event generation for OUT1 demux
  ASoC: cs47l15: Fix event generation for low power mux control
  ASoC: wm5110: Fix DRE control
  ASoC: ops: Fix off by one in range control validation
  net: sfp: fix memory leak in sfp_probe()
  nvme: fix regression when disconnect a recovering ctrl
  NFC: nxp-nci: don't print header length mismatch on i2c error
  net: tipc: fix possible refcount leak in tipc_sk_create()
  platform/x86: hp-wmi: Ignore Sanitization Mode event
  cpufreq: pmac32-cpufreq: Fix refcount leak bug
  netfilter: br_netfilter: do not skip all hooks with 0 priority
  virtio_mmio: Restore guest page size on resume
  virtio_mmio: Add missing PM calls to freeze/restore
  mm: sysctl: fix missing numa_stat when !CONFIG_HUGETLB_PAGE
  sfc: fix kernel panic when creating VF
  seg6: bpf: fix skb checksum in bpf_push_seg6_encap()
  seg6: fix skb checksum in SRv6 End.B6 and End.B6.Encaps behaviors
  seg6: fix skb checksum evaluation in SRH encapsulation/insertion
  sfc: fix use after free when disabling sriov
  net: ftgmac100: Hold reference returned by of_get_child_by_name()
  ipv4: Fix data-races around sysctl_ip_dynaddr.
  raw: Fix a data-race around sysctl_raw_l3mdev_accept.
  icmp: Fix a data-race around sysctl_icmp_ratemask.
  icmp: Fix a data-race around sysctl_icmp_ratelimit.
  drm/i915/gt: Serialize TLB invalidates with GT resets
  ARM: dts: sunxi: Fix SPI NOR campatible on Orange Pi Zero
  ARM: dts: at91: sama5d2: Fix typo in i2s1 node
  ipv4: Fix a data-race around sysctl_fib_sync_mem.
  icmp: Fix data-races around sysctl.
  cipso: Fix data-races around sysctl.
  net: Fix data-races around sysctl_mem.
  inetpeer: Fix data-races around sysctl.
  net: stmmac: dwc-qos: Disable split header for Tegra194
  ASoC: sgtl5000: Fix noise on shutdown/remove
  ima: Fix a potential integer overflow in ima_appraise_measurement
  drm/i915: fix a possible refcount leak in intel_dp_add_mst_connector()
  ARM: 9210/1: Mark the FDT_FIXED sections as shareable
  ARM: 9209/1: Spectre-BHB: avoid pr_info() every time a CPU comes out of idle
  ARM: dts: imx6qdl-ts7970: Fix ngpio typo and count
  ext4: fix race condition between ext4_write and ext4_convert_inline_data
  sched/rt: Disable RT_RUNTIME_SHARE by default
  Revert "evm: Fix memleak in init_desc"
  nilfs2: fix incorrect masking of permission flags for symlinks
  drm/panfrost: Fix shrinker list corruption by madvise IOCTL
  cgroup: Use separate src/dst nodes when preloading css_sets for migration
  wifi: mac80211: fix queue selection for mesh/OCB interfaces
  ARM: 9214/1: alignment: advance IT state after emulating Thumb instruction
  ARM: 9213/1: Print message about disabled Spectre workarounds only once
  ip: fix dflt addr selection for connected nexthop
  net: sock: tracing: Fix sock_exceed_buf_limit not to dereference stale pointer
  tracing/histograms: Fix memory leak problem
  xen/netback: avoid entering xenvif_rx_next_skb() with an empty rx queue
  ALSA: hda/realtek - Enable the headset-mic on a Xiaomi's laptop
  ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc221
  ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc671
  ALSA: hda/conexant: Apply quirk for another HP ProDesk 600 G3 model
  ALSA: hda - Add fixup for Dell Latitidue E5430
  Linux 5.4.206
  Revert "mtd: rawnand: gpmi: Fix setting busy timeout setting"
  Linux 5.4.205
  dmaengine: ti: Add missing put_device in ti_dra7_xbar_route_allocate
  dmaengine: ti: Fix refcount leak in ti_dra7_xbar_route_allocate
  dmaengine: at_xdma: handle errors of at_xdmac_alloc_desc() correctly
  dmaengine: pl330: Fix lockdep warning about non-static key
  ida: don't use BUG_ON() for debugging
  dt-bindings: dma: allwinner,sun50i-a64-dma: Fix min/max typo
  misc: rtsx_usb: set return value in rsp_buf alloc err path
  misc: rtsx_usb: use separate command and response buffers
  misc: rtsx_usb: fix use of dma mapped buffer for usb bulk transfer
  dmaengine: imx-sdma: Allow imx8m for imx7 FW revs
  i2c: cadence: Unregister the clk notifier in error path
  selftests: forwarding: fix error message in learning_test
  selftests: forwarding: fix learning_test when h1 supports IFF_UNICAST_FLT
  selftests: forwarding: fix flood_unicast_test when h2 supports IFF_UNICAST_FLT
  ibmvnic: Properly dispose of all skbs during a failover.
  ARM: at91: pm: use proper compatibles for sam9x60's rtc and rtt
  ARM: at91: pm: use proper compatible for sama5d2's rtc
  pinctrl: sunxi: sunxi_pconf_set: use correct offset
  pinctrl: sunxi: a83t: Fix NAND function name for some pins
  ARM: meson: Fix refcount leak in meson_smp_prepare_cpus
  xfs: remove incorrect ASSERT in xfs_rename
  can: kvaser_usb: kvaser_usb_leaf: fix bittiming limits
  can: kvaser_usb: kvaser_usb_leaf: fix CAN clock frequency regression
  can: kvaser_usb: replace run-time checks with struct kvaser_usb_driver_info
  powerpc/powernv: delay rng platform device creation until later in boot
  video: of_display_timing.h: include errno.h
  fbcon: Prevent that screen size is smaller than font size
  fbcon: Disallow setting font bigger than screen size
  fbmem: Check virtual screen sizes in fb_set_var()
  fbdev: fbmem: Fix logo center image dx issue
  iommu/vt-d: Fix PCI bus rescan device hot add
  net: rose: fix UAF bug caused by rose_t0timer_expiry
  usbnet: fix memory leak in error case
  can: gs_usb: gs_usb_open/close(): fix memory leak
  can: grcan: grcan_probe(): remove extra of_node_get()
  can: bcm: use call_rcu() instead of costly synchronize_rcu()
  mm/slub: add missing TID updates on slab deactivation
  esp: limit skb_page_frag_refill use to a single page
  Linux 5.4.204
  clocksource/drivers/ixp4xx: remove EXPORT_SYMBOL_GPL from ixp4xx_timer_setup()
  net: usb: qmi_wwan: add Telit 0x1070 composition
  net: usb: qmi_wwan: add Telit 0x1060 composition
  xen/arm: Fix race in RB-tree based P2M accounting
  xen/blkfront: force data bouncing when backend is untrusted
  xen/netfront: force data bouncing when backend is untrusted
  xen/netfront: fix leaking data in shared pages
  xen/blkfront: fix leaking data in shared pages
  selftests/rseq: Change type of rseq_offset to ptrdiff_t
  selftests/rseq: x86-32: use %gs segment selector for accessing rseq thread area
  selftests/rseq: x86-64: use %fs segment selector for accessing rseq thread area
  selftests/rseq: Fix: work-around asm goto compiler bugs
  selftests/rseq: Remove arm/mips asm goto compiler work-around
  selftests/rseq: Fix warnings about #if checks of undefined tokens
  selftests/rseq: Fix ppc32 offsets by using long rather than off_t
  selftests/rseq: Fix ppc32 missing instruction selection "u" and "x" for load/store
  selftests/rseq: Fix ppc32: wrong rseq_cs 32-bit field pointer on big endian
  selftests/rseq: Uplift rseq selftests for compatibility with glibc-2.35
  selftests/rseq: Introduce thread pointer getters
  selftests/rseq: Introduce rseq_get_abi() helper
  selftests/rseq: Remove volatile from __rseq_abi
  selftests/rseq: Remove useless assignment to cpu variable
  selftests/rseq: introduce own copy of rseq uapi header
  selftests/rseq: remove ARRAY_SIZE define from individual tests
  rseq/selftests,x86_64: Add rseq_offset_deref_addv()
  ipv6/sit: fix ipip6_tunnel_get_prl return value
  sit: use min
  net: dsa: bcm_sf2: force pause link settings
  hwmon: (ibmaem) don't call platform_device_del() if platform_device_add() fails
  xen/gntdev: Avoid blocking in unmap_grant_pages()
  net: tun: avoid disabling NAPI twice
  NFC: nxp-nci: Don't issue a zero length i2c_master_read()
  nfc: nfcmrvl: Fix irq_of_parse_and_map() return value
  net: bonding: fix use-after-free after 802.3ad slave unbind
  net: bonding: fix possible NULL deref in rlb code
  net/sched: act_api: Notify user space if any actions were flushed before error
  netfilter: nft_dynset: restore set element counter when failing to update
  s390: remove unneeded 'select BUILD_BIN2C'
  PM / devfreq: exynos-ppmu: Fix refcount leak in of_get_devfreq_events
  caif_virtio: fix race between virtio_device_ready() and ndo_open()
  net: ipv6: unexport __init-annotated seg6_hmac_net_init()
  usbnet: fix memory allocation in helpers
  linux/dim: Fix divide by 0 in RDMA DIM
  RDMA/qedr: Fix reporting QP timeout attribute
  net: tun: stop NAPI when detaching queues
  net: tun: unlink NAPI from device on destruction
  selftests/net: pass ipv6_args to udpgso_bench's IPv6 TCP test
  virtio-net: fix race between ndo_open() and virtio_device_ready()
  net: usb: ax88179_178a: Fix packet receiving
  net: rose: fix UAF bugs caused by timer handler
  SUNRPC: Fix READ_PLUS crasher
  s390/archrandom: simplify back to earlier design and initialize earlier
  dm raid: fix KASAN warning in raid5_add_disks
  dm raid: fix accesses beyond end of raid member array
  powerpc/bpf: Fix use of user_pt_regs in uapi
  powerpc/prom_init: Fix kernel config grep
  nvdimm: Fix badblocks clear off-by-one error
  ipv6: take care of disable_policy when restoring routes
  Linux 5.4.203
  crypto: arm/ghash-ce - define fpu before fpu registers are referenced
  crypto: arm - use Kconfig based compiler checks for crypto opcodes
  ARM: 9029/1: Make iwmmxt.S support Clang's integrated assembler
  ARM: OMAP2+: drop unnecessary adrl
  ARM: 8929/1: use APSR_nzcv instead of r15 as mrc operand
  ARM: 8933/1: replace Sun/Solaris style flag on section directive
  crypto: arm/sha512-neon - avoid ADRL pseudo instruction
  crypto: arm/sha256-neon - avoid ADRL pseudo instruction
  ARM: 8971/1: replace the sole use of a symbol with its definition
  ARM: 8990/1: use VFP assembler mnemonics in register load/store macros
  ARM: 8989/1: use .fpu assembler directives instead of assembler arguments
  net: mscc: ocelot: allow unregistered IP multicast flooding
  kexec_file: drop weak attribute from arch_kexec_apply_relocations[_add]
  powerpc/ftrace: Remove ftrace init tramp once kernel init is complete
  drm: remove drm_fb_helper_modinit
  Linux 5.4.202
  powerpc/pseries: wire up rng during setup_arch()
  kbuild: link vmlinux only once for CONFIG_TRIM_UNUSED_KSYMS (2nd attempt)
  random: update comment from copy_to_user() -> copy_to_iter()
  modpost: fix section mismatch check for exported init/exit sections
  ARM: cns3xxx: Fix refcount leak in cns3xxx_init
  ARM: Fix refcount leak in axxia_boot_secondary
  soc: bcm: brcmstb: pm: pm-arm: Fix refcount leak in brcmstb_pm_probe
  ARM: exynos: Fix refcount leak in exynos_map_pmu
  ARM: dts: imx6qdl: correct PU regulator ramp delay
  powerpc/powernv: wire up rng during setup_arch
  powerpc/rtas: Allow ibm,platform-dump RTAS call with null buffer address
  powerpc: Enable execve syscall exit tracepoint
  parisc: Enable ARCH_HAS_STRICT_MODULE_RWX
  xtensa: Fix refcount leak bug in time.c
  xtensa: xtfpga: Fix refcount leak bug in setup
  iio: adc: axp288: Override TS pin bias current for some models
  iio: adc: stm32: fix maximum clock rate for stm32mp15x
  iio: trigger: sysfs: fix use-after-free on remove
  iio: gyro: mpu3050: Fix the error handling in mpu3050_power_up()
  iio: accel: mma8452: ignore the return value of reset operation
  iio:accel:mxc4005: rearrange iio trigger get and register
  iio:accel:bma180: rearrange iio trigger get and register
  iio:chemical:ccs811: rearrange iio trigger get and register
  usb: chipidea: udc: check request status before setting device address
  xhci: turn off port power in shutdown
  iio: adc: vf610: fix conversion mode sysfs node name
  s390/cpumf: Handle events cycles and instructions identical
  gpio: winbond: Fix error code in winbond_gpio_get()
  Revert "net/tls: fix tls_sk_proto_close executed repeatedly"
  virtio_net: fix xdp_rxq_info bug after suspend/resume
  igb: Make DMA faster when CPU is active on the PCIe link
  regmap-irq: Fix a bug in regmap_irq_enable() for type_in_mask chips
  ice: ethtool: advertise 1000M speeds properly
  afs: Fix dynamic root getattr
  MIPS: Remove repetitive increase irq_err_count
  x86/xen: Remove undefined behavior in setup_features()
  udmabuf: add back sanity check
  net/tls: fix tls_sk_proto_close executed repeatedly
  erspan: do not assume transport header is always set
  drm/msm/mdp4: Fix refcount leak in mdp4_modeset_init_intf
  net/sched: sch_netem: Fix arithmetic in netem_dump() for 32-bit platforms
  bonding: ARP monitor spams NETDEV_NOTIFY_PEERS notifiers
  phy: aquantia: Fix AN when higher speeds than 1G are not advertised
  bpf: Fix request_sock leak in sk lookup helpers
  USB: serial: option: add Quectel RM500K module support
  USB: serial: option: add Quectel EM05-G modem
  USB: serial: option: add Telit LE910Cx 0x1250 composition
  random: quiet urandom warning ratelimit suppression message
  dm mirror log: clear log bits up to BITS_PER_LONG boundary
  dm era: commit metadata in postsuspend after worker stops
  ata: libata: add qc->flags in ata_qc_complete_template tracepoint
  mtd: rawnand: gpmi: Fix setting busy timeout setting
  mmc: sdhci-pci-o2micro: Fix card detect by dealing with debouncing
  net: openvswitch: fix parsing of nw_proto for IPv6 fragments
  ALSA: hda/realtek: Add quirk for Clevo PD70PNT
  ALSA: hda/realtek - ALC897 headset MIC no sound
  ALSA: hda/conexant: Fix missing beep setup
  ALSA: hda/via: Fix missing beep setup
  random: schedule mix_interrupt_randomness() less often
  vt: drop old FONT ioctls
  Linux 5.4.201
  Revert "hwmon: Make chip parameter for with_info API mandatory"
  arm64: mm: Don't invalidate FROM_DEVICE buffers at start of DMA transfer
  tcp: drop the hash_32() part from the index calculation
  tcp: increase source port perturb table to 2^16
  tcp: dynamically allocate the perturb table used by source ports
  tcp: add small random increments to the source port
  tcp: use different parts of the port_offset for index and offset
  tcp: add some entropy in __inet_hash_connect()
  usb: gadget: u_ether: fix regression in setting fixed MAC address
  dm: remove special-casing of bio-based immutable singleton target on NVMe
  s390/mm: use non-quiescing sske for KVM switch to keyed guest
  UPSTREAM: ext4: verify dir block before splitting it
  UPSTREAM: ext4: fix use-after-free in ext4_rename_dir_prepare
  BACKPORT: ext4: Only advertise encrypted_casefold when encryption and unicode are enabled
  BACKPORT: ext4: fix no-key deletion for encrypt+casefold
  BACKPORT: ext4: optimize match for casefolded encrypted dirs
  BACKPORT: ext4: handle casefolding with encryption
  Revert "ANDROID: ext4: Handle casefolding with encryption"
  Revert "ANDROID: ext4: Optimize match for casefolded encrypted dirs"
  ANDROID: cpu/hotplug: avoid breaking Android ABI by fusing cpuhp steps
  ANDROID: change function signatures for some random functions.
  Revert "mailbox: forward the hrtimer if not queued and under a lock"
  Revert "drm: fix EDID struct for old ARM OABI format"
  Revert "ALSA: jack: Access input_dev under mutex"
  Linux 5.4.200
  powerpc/mm: Switch obsolete dssall to .long
  riscv: Less inefficient gcc tishift helpers (and export their symbols)
  RISC-V: fix barrier() use in <vdso/processor.h>
  arm64: kprobes: Use BRK instead of single-step when executing instructions out-of-line
  net: openvswitch: fix leak of nested actions
  net: openvswitch: fix misuse of the cached connection on tuple changes
  net/sched: act_police: more accurate MTU policing
  virtio-pci: Remove wrong address verification in vp_del_vqs()
  ALSA: hda/realtek: fix right sounds and mute/micmute LEDs for HP machine
  ALSA: hda/realtek: fix mute/micmute LEDs for HP 440 G8
  ext4: add reserved GDT blocks check
  ext4: make variable "count" signed
  ext4: fix bug_on ext4_mb_use_inode_pa
  dm mirror log: round up region bitmap size to BITS_PER_LONG
  serial: 8250: Store to lsr_save_flags after lsr read
  usb: gadget: lpc32xx_udc: Fix refcount leak in lpc32xx_udc_probe
  usb: dwc2: Fix memory leak in dwc2_hcd_init
  USB: serial: io_ti: add Agilent E5805A support
  USB: serial: option: add support for Cinterion MV31 with new baseline
  comedi: vmk80xx: fix expression for tx buffer size
  i2c: designware: Use standard optional ref clock implementation
  irqchip/gic-v3: Fix refcount leak in gic_populate_ppi_partitions
  irqchip/gic-v3: Fix error handling in gic_populate_ppi_partitions
  irqchip/gic/realview: Fix refcount leak in realview_gic_of_init
  faddr2line: Fix overlapping text section failures, the sequel
  certs/blacklist_hashes.c: fix const confusion in certs blacklist
  arm64: ftrace: fix branch range checks
  net: bgmac: Fix an erroneous kfree() in bgmac_remove()
  mlxsw: spectrum_cnt: Reorder counter pools
  misc: atmel-ssc: Fix IRQ check in ssc_probe
  tty: goldfish: Fix free_irq() on remove
  i40e: Fix call trace in setup_tx_descriptors
  i40e: Fix calculating the number of queue pairs
  i40e: Fix adding ADQ filter to TC0
  clocksource: hyper-v: unexport __init-annotated hv_init_clocksource()
  pNFS: Don't keep retrying if the server replied NFS4ERR_LAYOUTUNAVAILABLE
  random: credit cpu and bootloader seeds by default
  net: ethernet: mtk_eth_soc: fix misuse of mem alloc interface netdev[napi]_alloc_frag
  ipv6: Fix signed integer overflow in l2tp_ip6_sendmsg
  nfc: nfcmrvl: Fix memory leak in nfcmrvl_play_deferred
  virtio-mmio: fix missing put_device() when vm_cmdline_parent registration failed
  ALSA: hda/realtek - Add HW8326 support
  scsi: pmcraid: Fix missing resource cleanup in error case
  scsi: ipr: Fix missing/incorrect resource cleanup in error case
  scsi: lpfc: Allow reduced polling rate for nvme_admin_async_event cmd completion
  scsi: lpfc: Fix port stuck in bypassed state after LIP in PT2PT topology
  scsi: vmw_pvscsi: Expand vcpuHint to 16 bits
  ASoC: wm_adsp: Fix event generation for wm_adsp_fw_put()
  ASoC: es8328: Fix event generation for deemphasis control
  ASoC: wm8962: Fix suspend while playing music
  ata: libata-core: fix NULL pointer deref in ata_host_alloc_pinfo()
  ASoC: cs42l56: Correct typo in minimum level for SX volume controls
  ASoC: cs42l52: Correct TLV for Bypass Volume
  ASoC: cs53l30: Correct number of volume levels on SX controls
  ASoC: cs35l36: Update digital volume TLV
  ASoC: cs42l52: Fix TLV scales for mixer controls
  dma-debug: make things less spammy under memory pressure
  ASoC: nau8822: Add operation for internal PLL off and on
  powerpc/kasan: Silence KASAN warnings in __get_wchan()
  random: account for arch randomness in bits
  random: mark bootloader randomness code as __init
  random: avoid checking crng_ready() twice in random_init()
  crypto: drbg - make reseeding from get_random_bytes() synchronous
  crypto: drbg - always try to free Jitter RNG instance
  crypto: drbg - move dynamic ->reseed_threshold adjustments to __drbg_seed()
  crypto: drbg - track whether DRBG was seeded with !rng_is_initialized()
  crypto: drbg - prepare for more fine-grained tracking of seeding state
  crypto: drbg - always seeded with SP800-90B compliant noise source
  Revert "random: use static branch for crng_ready()"
  random: check for signals after page of pool writes
  random: wire up fops->splice_{read,write}_iter()
  random: convert to using fops->write_iter()
  random: convert to using fops->read_iter()
  random: unify batched entropy implementations
  random: move randomize_page() into mm where it belongs
  random: move initialization functions out of hot pages
  random: make consistent use of buf and len
  random: use proper return types on get_random_{int,long}_wait()
  random: remove extern from functions in header
  random: use static branch for crng_ready()
  random: credit architectural init the exact amount
  random: handle latent entropy and command line from random_init()
  random: use proper jiffies comparison macro
  random: remove ratelimiting for in-kernel unseeded randomness
  random: move initialization out of reseeding hot path
  random: avoid initializing twice in credit race
  random: use symbolic constants for crng_init states
  siphash: use one source of truth for siphash permutations
  random: help compiler out with fast_mix() by using simpler arguments
  random: do not use input pool from hard IRQs
  random: order timer entropy functions below interrupt functions
  random: do not pretend to handle premature next security model
  random: use first 128 bits of input as fast init
  random: do not use batches when !crng_ready()
  random: insist on random_get_entropy() existing in order to simplify
  xtensa: use fallback for random_get_entropy() instead of zero
  sparc: use fallback for random_get_entropy() instead of zero
  um: use fallback for random_get_entropy() instead of zero
  x86/tsc: Use fallback for random_get_entropy() instead of zero
  nios2: use fallback for random_get_entropy() instead of zero
  arm: use fallback for random_get_entropy() instead of zero
  mips: use fallback for random_get_entropy() instead of just c0 random
  m68k: use fallback for random_get_entropy() instead of zero
  timekeeping: Add raw clock fallback for random_get_entropy()
  powerpc: define get_cycles macro for arch-override
  alpha: define get_cycles macro for arch-override
  parisc: define get_cycles macro for arch-override
  s390: define get_cycles macro for arch-override
  ia64: define get_cycles macro for arch-override
  init: call time_init() before rand_initialize()
  random: fix sysctl documentation nits
  random: document crng_fast_key_erasure() destination possibility
  random: make random_get_entropy() return an unsigned long
  random: allow partial reads if later user copies fail
  random: check for signals every PAGE_SIZE chunk of /dev/[u]random
  random: check for signal_pending() outside of need_resched() check
  random: do not allow user to keep crng key around on stack
  random: do not split fast init input in add_hwgenerator_randomness()
  random: mix build-time latent entropy into pool at init
  random: re-add removed comment about get_random_{u32,u64} reseeding
  random: treat bootloader trust toggle the same way as cpu trust toggle
  random: skip fast_init if hwrng provides large chunk of entropy
  random: check for signal and try earlier when generating entropy
  random: reseed more often immediately after booting
  random: make consistent usage of crng_ready()
  random: use SipHash as interrupt entropy accumulator
  random: replace custom notifier chain with standard one
  random: don't let 644 read-only sysctls be written to
  random: give sysctl_random_min_urandom_seed a more sensible value
  random: do crng pre-init loading in worker rather than irq
  random: unify cycles_t and jiffies usage and types
  random: cleanup UUID handling
  random: only wake up writers after zap if threshold was passed
  random: round-robin registers as ulong, not u32
  random: clear fast pool, crng, and batches in cpuhp bring up
  random: pull add_hwgenerator_randomness() declaration into random.h
  random: check for crng_init == 0 in add_device_randomness()
  random: unify early init crng load accounting
  random: do not take pool spinlock at boot
  random: defer fast pool mixing to worker
  random: rewrite header introductory comment
  random: group sysctl functions
  random: group userspace read/write functions
  random: group entropy collection functions
  random: group entropy extraction functions
  random: group crng functions
  random: group initialization wait functions
  random: remove whitespace and reorder includes
  random: remove useless header comment
  random: introduce drain_entropy() helper to declutter crng_reseed()
  random: deobfuscate irq u32/u64 contributions
  random: add proper SPDX header
  random: remove unused tracepoints
  random: remove ifdef'd out interrupt bench
  random: tie batched entropy generation to base_crng generation
  random: fix locking for crng_init in crng_reseed()
  random: zero buffer after reading entropy from userspace
  random: remove outdated INT_MAX >> 6 check in urandom_read()
  random: make more consistent use of integer types
  random: use hash function for crng_slow_load()
  random: use simpler fast key erasure flow on per-cpu keys
  random: absorb fast pool into input pool after fast load
  random: do not xor RDRAND when writing into /dev/random
  random: ensure early RDSEED goes through mixer on init
  random: inline leaves of rand_initialize()
  random: get rid of secondary crngs
  random: use RDSEED instead of RDRAND in entropy extraction
  random: fix locking in crng_fast_load()
  random: remove batched entropy locking
  random: remove use_input_pool parameter from crng_reseed()
  random: make credit_entropy_bits() always safe
  random: always wake up entropy writers after extraction
  random: use linear min-entropy accumulation crediting
  random: simplify entropy debiting
  random: use computational hash for entropy extraction
  random: only call crng_finalize_init() for primary_crng
  random: access primary_pool directly rather than through pointer
  random: continually use hwgenerator randomness
  random: simplify arithmetic function flow in account()
  random: selectively clang-format where it makes sense
  random: access input_pool_data directly rather than through pointer
  random: cleanup fractional entropy shift constants
  random: prepend remaining pool constants with POOL_
  random: de-duplicate INPUT_POOL constants
  random: remove unused OUTPUT_POOL constants
  random: rather than entropy_store abstraction, use global
  random: remove unused extract_entropy() reserved argument
  random: remove incomplete last_data logic
  random: cleanup integer types
  random: cleanup poolinfo abstraction
  random: fix typo in comments
  random: don't reset crng_init_cnt on urandom_read()
  random: avoid superfluous call to RDRAND in CRNG extraction
  random: early initialization of ChaCha constants
  random: initialize ChaCha20 constants with correct endianness
  random: use IS_ENABLED(CONFIG_NUMA) instead of ifdefs
  random: harmonize "crng init done" messages
  random: mix bootloader randomness into pool
  random: do not re-init if crng_reseed completes before primary init
  random: do not sign extend bytes for rotation when mixing
  random: use BLAKE2s instead of SHA1 in extraction
  random: remove unused irq_flags argument from add_interrupt_randomness()
  random: document add_hwgenerator_randomness() with other input functions
  crypto: blake2s - adjust include guard naming
  crypto: blake2s - include <linux/bug.h> instead of <asm/bug.h>
  MAINTAINERS: co-maintain random.c
  random: remove dead code left over from blocking pool
  random: avoid arch_get_random_seed_long() when collecting IRQ randomness
  random: add arch_get_random_*long_early()
  powerpc: Use bool in archrandom.h
  linux/random.h: Mark CONFIG_ARCH_RANDOM functions __must_check
  linux/random.h: Use false with bool
  linux/random.h: Remove arch_has_random, arch_has_random_seed
  s390: Remove arch_has_random, arch_has_random_seed
  powerpc: Remove arch_has_random, arch_has_random_seed
  x86: Remove arch_has_random, arch_has_random_seed
  random: avoid warnings for !CONFIG_NUMA builds
  random: split primary/secondary crng init paths
  random: remove some dead code of poolinfo
  random: fix typo in add_timer_randomness()
  random: Add and use pr_fmt()
  random: convert to ENTROPY_BITS for better code readability
  random: remove unnecessary unlikely()
  random: remove kernel.random.read_wakeup_threshold
  random: delete code to pull data into pools
  random: remove the blocking pool
  random: make /dev/random be almost like /dev/urandom
  random: ignore GRND_RANDOM in getentropy(2)
  random: add GRND_INSECURE to return best-effort non-cryptographic bytes
  random: Add a urandom_read_nowait() for random APIs that don't warn
  random: Don't wake crng_init_wait when crng_init == 1
  random: don't forget compat_ioctl on urandom
  compat_ioctl: remove /dev/random commands
  lib/crypto: sha1: re-roll loops to reduce code size
  lib/crypto: blake2s: move hmac construction into wireguard
  crypto: blake2s - generic C library implementation and selftest
  nfc: st21nfca: fix incorrect sizing calculations in EVT_TRANSACTION
  bpf: Fix incorrect memory charge cost calculation in stack_map_alloc()
  9p: missing chunk of "fs/9p: Don't update file type when updating file attributes"
  Revert "ext4: fix use-after-free in ext4_rename_dir_prepare"
  Revert "ext4: verify dir block before splitting it"
  Linux 5.4.199
  x86/speculation/mmio: Print SMT warning
  KVM: x86/speculation: Disable Fill buffer clear within guests
  x86/speculation/mmio: Reuse SRBDS mitigation for SBDS
  x86/speculation/srbds: Update SRBDS mitigation selection
  x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data
  x86/speculation/mmio: Enable CPU Fill buffer clearing on idle
  x86/bugs: Group MDS, TAA & Processor MMIO Stale Data mitigations
  x86/speculation/mmio: Add mitigation for Processor MMIO Stale Data
  x86/speculation: Add a common function for MD_CLEAR mitigation update
  x86/speculation/mmio: Enumerate Processor MMIO Stale Data bug
  Documentation: Add documentation for Processor MMIO Stale Data
  x86/cpu: Add another Alder Lake CPU to the Intel family
  x86/cpu: Add Lakefield, Alder Lake and Rocket Lake models to the to Intel CPU family
  x86/cpu: Add Jasper Lake to Intel family
  cpu/speculation: Add prototype for cpu_show_srbds()
  Linux 5.4.198
  tcp: fix tcp_mtup_probe_success vs wrong snd_cwnd
  mtd: cfi_cmdset_0002: Use chip_ready() for write on S29GL064N
  md/raid0: Ignore RAID0 layout if the second zone has only one device
  powerpc/32: Fix overread/overwrite of thread_struct via ptrace
  Input: bcm5974 - set missing URB_NO_TRANSFER_DMA_MAP urb flag
  ixgbe: fix unexpected VLAN Rx in promisc mode on VF
  ixgbe: fix bcast packets Rx on VF after promisc removal
  nfc: st21nfca: fix memory leaks in EVT_TRANSACTION handling
  nfc: st21nfca: fix incorrect validating logic in EVT_TRANSACTION
  mmc: block: Fix CQE recovery reset success
  ata: libata-transport: fix {dma|pio|xfer}_mode sysfs files
  cifs: return errors during session setup during reconnects
  ALSA: hda/conexant - Fix loopback issue with CX20632
  scripts/gdb: change kernel config dumping method
  vringh: Fix loop descriptors check in the indirect cases
  nodemask: Fix return values to be unsigned
  cifs: version operations for smb20 unneeded when legacy support disabled
  s390/gmap: voluntarily schedule during key setting
  nbd: fix io hung while disconnecting device
  nbd: fix race between nbd_alloc_config() and module removal
  nbd: call genl_unregister_family() first in nbd_cleanup()
  x86/cpu: Elide KCSAN for cpu_has() and friends
  modpost: fix undefined behavior of is_arm_mapping_symbol()
  drm/radeon: fix a possible null pointer dereference
  ceph: allow ceph.dir.rctime xattr to be updatable
  Revert "net: af_key: add check for pfkey_broadcast in function pfkey_process"
  scsi: myrb: Fix up null pointer access on myrb_cleanup()
  md: protect md_unregister_thread from reentrancy
  watchdog: wdat_wdt: Stop watchdog when rebooting the system
  kernfs: Separate kernfs_pr_cont_buf and rename_lock.
  serial: msm_serial: disable interrupts in __msm_console_write()
  staging: rtl8712: fix uninit-value in r871xu_drv_init()
  staging: rtl8712: fix uninit-value in usb_read8() and friends
  clocksource/drivers/sp804: Avoid error on multiple instances
  extcon: Modify extcon device to be created after driver data is set
  misc: rtsx: set NULL intfdata when probe fails
  usb: dwc2: gadget: don't reset gadget's driver->bus
  USB: hcd-pci: Fully suspend across freeze/thaw cycle
  drivers: usb: host: Fix deadlock in oxu_bus_suspend()
  drivers: tty: serial: Fix deadlock in sa1100_set_termios()
  USB: host: isp116x: check return value after calling platform_get_resource()
  drivers: staging: rtl8192e: Fix deadlock in rtllib_beacons_stop()
  drivers: staging: rtl8192u: Fix deadlock in ieee80211_beacons_stop()
  tty: Fix a possible resource leak in icom_probe
  tty: synclink_gt: Fix null-pointer-dereference in slgt_clean()
  lkdtm/usercopy: Expand size of "out of frame" object
  iio: st_sensors: Add a local lock for protecting odr
  iio: dummy: iio_simple_dummy: check the return value of kstrdup()
  drm: imx: fix compiler warning with gcc-12
  net: altera: Fix refcount leak in altera_tse_mdio_create
  ip_gre: test csum_start instead of transport header
  net/mlx5: fs, fail conflicting actions
  net/mlx5: Rearm the FW tracer after each tracer event
  net: ipv6: unexport __init-annotated seg6_hmac_init()
  net: xfrm: unexport __init-annotated xfrm4_protocol_init()
  net: mdio: unexport __init-annotated mdio_bus_init()
  SUNRPC: Fix the calculation of xdr->end in xdr_get_next_encode_buffer()
  net/mlx4_en: Fix wrong return value on ioctl EEPROM query failure
  net: dsa: lantiq_gswip: Fix refcount leak in gswip_gphy_fw_list
  bpf, arm64: Clear prog->jited_len along prog->jited
  af_unix: Fix a data-race in unix_dgram_peer_wake_me().
  xen: unexport __init-annotated xen_xlate_map_ballooned_pages()
  netfilter: nf_tables: memleak flow rule from commit path
  ata: pata_octeon_cf: Fix refcount leak in octeon_cf_probe
  netfilter: nat: really support inet nat without l3 address
  xprtrdma: treat all calls not a bcall when bc_serv is NULL
  video: fbdev: pxa3xx-gcu: release the resources correctly in pxa3xx_gcu_probe/remove()
  NFSv4: Don't hold the layoutget locks across multiple RPC calls
  dmaengine: zynqmp_dma: In struct zynqmp_dma_chan fix desc_size data type
  m68knommu: fix undefined reference to `_init_sp'
  m68knommu: set ZERO_PAGE() to the allocated zeroed page
  i2c: cadence: Increase timeout per message if necessary
  f2fs: remove WARN_ON in f2fs_is_valid_blkaddr
  tracing: Avoid adding tracer option before update_tracer_options
  tracing: Fix sleeping function called from invalid context on RT kernel
  mips: cpc: Fix refcount leak in mips_cpc_default_phys_base
  perf c2c: Fix sorting in percent_rmt_hitm_cmp()
  tipc: check attribute length for bearer name
  afs: Fix infinite loop found by xfstest generic/676
  tcp: tcp_rtx_synack() can be called from process context
  net: sched: add barrier to fix packet stuck problem for lockless qdisc
  net/mlx5e: Update netdev features after changing XDP state
  net/mlx5: Don't use already freed action pointer
  nfp: only report pause frame configuration for physical device
  ubi: ubi_create_volume: Fix use-after-free when volume creation failed
  jffs2: fix memory leak in jffs2_do_fill_super
  modpost: fix removing numeric suffixes
  net: dsa: mv88e6xxx: Fix refcount leak in mv88e6xxx_mdios_register
  net: ethernet: mtk_eth_soc: out of bounds read in mtk_hwlro_get_fdir_entry()
  net: sched: fixed barrier to prevent skbuff sticking in qdisc backlog
  s390/crypto: fix scatterwalk_unmap() callers in AES-GCM
  clocksource/drivers/oxnas-rps: Fix irq_of_parse_and_map() return value
  ASoC: fsl_sai: Fix FSL_SAI_xDR/xFR definition
  watchdog: ts4800_wdt: Fix refcount leak in ts4800_wdt_probe
  driver core: fix deadlock in __device_attach
  driver: base: fix UAF when driver_attach failed
  bus: ti-sysc: Fix warnings for unbind for serial
  firmware: dmi-sysfs: Fix memory leak in dmi_sysfs_register_handle
  serial: stm32-usart: Correct CSIZE, bits, and parity
  serial: st-asc: Sanitize CSIZE and correct PARENB for CS7
  serial: sifive: Sanitize CSIZE and c_iflag
  serial: sh-sci: Don't allow CS5-6
  serial: txx9: Don't allow CS5-6
  serial: rda-uart: Don't allow CS5-6
  serial: digicolor-usart: Don't allow CS5-6
  serial: 8250_fintek: Check SER_RS485_RTS_* only with RS485
  serial: meson: acquire port->lock in startup()
  rtc: mt6397: check return value after calling platform_get_resource()
  clocksource/drivers/riscv: Events are stopped during CPU suspend
  soc: rockchip: Fix refcount leak in rockchip_grf_init
  coresight: cpu-debug: Replace mutex with mutex_trylock on panic notifier
  serial: sifive: Report actual baud base rather than fixed 115200
  phy: qcom-qmp: fix pipe-clock imbalance on power-on failure
  rpmsg: qcom_smd: Fix returning 0 if irq_of_parse_and_map() fails
  iio: adc: sc27xx: Fine tune the scale calibration values
  iio: adc: sc27xx: fix read big scale voltage not right
  iio: adc: stmpe-adc: Fix wait_for_completion_timeout return value check
  firmware: stratix10-svc: fix a missing check on list iterator
  usb: dwc3: pci: Fix pm_runtime_get_sync() error checking
  rpmsg: qcom_smd: Fix irq_of_parse_and_map() return value
  pwm: lp3943: Fix duty calculation in case period was clamped
  staging: fieldbus: Fix the error handling path in anybuss_host_common_probe()
  usb: musb: Fix missing of_node_put() in omap2430_probe
  USB: storage: karma: fix rio_karma_init return
  usb: usbip: add missing device lock on tweak configuration cmd
  usb: usbip: fix a refcount leak in stub_probe()
  tty: serial: fsl_lpuart: fix potential bug when using both of_alias_get_id and ida_simple_get
  tty: serial: owl: Fix missing clk_disable_unprepare() in owl_uart_probe
  tty: goldfish: Use tty_port_destroy() to destroy port
  iio: adc: ad7124: Remove shift from scan_type
  staging: greybus: codecs: fix type confusion of list iterator variable
  pcmcia: db1xxx_ss: restrict to MIPS_DB1XXX boards
  md: bcache: check the return value of kzalloc() in detached_dev_do_request()
  block: fix bio_clone_blkg_association() to associate with proper blkcg_gq
  bfq: Make sure bfqg for which we are queueing requests is online
  bfq: Get rid of __bio_blkcg() usage
  bfq: Remove pointless bfq_init_rq() calls
  bfq: Drop pointless unlock-lock pair
  bfq: Avoid merging queues with different parents
  MIPS: IP27: Remove incorrect `cpu_has_fpu' override
  RDMA/rxe: Generate a completion for unsupported/invalid opcode
  Kconfig: add config option for asm goto w/ outputs
  phy: qcom-qmp: fix reset-controller leak on probe errors
  blk-iolatency: Fix inflight count imbalances and IO hangs on offline
  dt-bindings: gpio: altera: correct interrupt-cells
  docs/conf.py: Cope with removal of language=None in Sphinx 5.0.0
  ARM: pxa: maybe fix gpio lookup tables
  phy: qcom-qmp: fix struct clk leak on probe errors
  arm64: dts: qcom: ipq8074: fix the sleep clock frequency
  gma500: fix an incorrect NULL check on list iterator
  tilcdc: tilcdc_external: fix an incorrect NULL check on list iterator
  serial: pch: don't overwrite xmit->buf[0] by x_char
  carl9170: tx: fix an incorrect use of list iterator
  ASoC: rt5514: Fix event generation for "DSP Voice Wake Up" control
  rtl818x: Prevent using not initialized queues
  hugetlb: fix huge_pmd_unshare address update
  nodemask.h: fix compilation error with GCC12
  iommu/msm: Fix an incorrect NULL check on list iterator
  um: Fix out-of-bounds read in LDT setup
  um: chan_user: Fix winch_tramp() return value
  mac80211: upgrade passive scan to active scan on DFS channels after beacon rx
  irqchip: irq-xtensa-mx: fix initial IRQ affinity
  irqchip/armada-370-xp: Do not touch Performance Counter Overflow on A375, A38x, A39x
  RDMA/hfi1: Fix potential integer multiplication overflow errors
  Kconfig: Add option for asm goto w/ tied outputs to workaround clang-13 bug
  media: coda: Add more H264 levels for CODA960
  media: coda: Fix reported H264 profile
  mtd: cfi_cmdset_0002: Move and rename chip_check/chip_ready/chip_good_for_write
  md: fix an incorrect NULL check in md_reload_sb
  md: fix an incorrect NULL check in does_sb_need_changing
  drm/bridge: analogix_dp: Grab runtime PM reference for DP-AUX
  drm/nouveau/clk: Fix an incorrect NULL check on list iterator
  drm/etnaviv: check for reaped mapping in etnaviv_iommu_unmap_gem
  drm/amdgpu/cs: make commands with 0 chunks illegal behaviour.
  scsi: ufs: qcom: Add a readl() to make sure ref_clk gets enabled
  scsi: dc395x: Fix a missing check on list iterator
  ocfs2: dlmfs: fix error handling of user_dlm_destroy_lock
  dlm: fix missing lkb refcount handling
  dlm: fix plock invalid read
  mm, compaction: fast_find_migrateblock() should return pfn in the target zone
  PCI: qcom: Fix unbalanced PHY init on probe errors
  PCI: qcom: Fix runtime PM imbalance on probe errors
  PCI/PM: Fix bridge_d3_blacklist[] Elo i2 overwrite of Gigabyte X299
  tracing: Fix potential double free in create_var_ref()
  ACPI: property: Release subnode properties with data nodes
  ext4: avoid cycles in directory h-tree
  ext4: verify dir block before splitting it
  ext4: fix bug_on in ext4_writepages
  ext4: fix warning in ext4_handle_inode_extension
  ext4: fix use-after-free in ext4_rename_dir_prepare
  netfilter: nf_tables: disallow non-stateful expression in sets earlier
  bfq: Track whether bfq_group is still online
  bfq: Update cgroup information before merging bio
  bfq: Split shared queues on move between cgroups
  efi: Do not import certificates from UEFI Secure Boot for T2 Macs
  fs-writeback: writeback_sb_inodes:Recalculate 'wrote' according skipped pages
  iwlwifi: mvm: fix assert 1F04 upon reconfig
  wifi: mac80211: fix use-after-free in chanctx code
  f2fs: fix fallocate to use file_modified to update permissions consistently
  f2fs: don't need inode lock for system hidden quota
  f2fs: fix deadloop in foreground GC
  f2fs: fix to clear dirty inode in f2fs_evict_inode()
  f2fs: fix to do sanity check on block address in f2fs_do_zero_range()
  f2fs: fix to avoid f2fs_bug_on() in dec_valid_node_count()
  perf jevents: Fix event syntax error caused by ExtSel
  perf c2c: Use stdio interface if slang is not supported
  iommu/amd: Increase timeout waiting for GA log enablement
  dmaengine: stm32-mdma: remove GISR1 register
  video: fbdev: clcdfb: Fix refcount leak in clcdfb_of_vram_setup
  NFSv4/pNFS: Do not fail I/O when we fail to allocate the pNFS layout
  NFS: Don't report errors from nfs_pageio_complete() more than once
  NFS: Do not report flush errors in nfs_write_end()
  NFS: Do not report EINTR/ERESTARTSYS as mapping errors
  i2c: at91: Initialize dma_buf in at91_twi_xfer()
  i2c: at91: use dma safe buffers
  iommu/mediatek: Add list_del in mtk_iommu_remove
  f2fs: fix dereference of stale list iterator after loop body
  Input: stmfts - do not leave device disabled in stmfts_input_open
  RDMA/hfi1: Prevent use of lock before it is initialized
  mailbox: forward the hrtimer if not queued and under a lock
  mfd: davinci_voicecodec: Fix possible null-ptr-deref davinci_vc_probe()
  powerpc/fsl_rio: Fix refcount leak in fsl_rio_setup
  macintosh: via-pmu and via-cuda need RTC_LIB
  powerpc/perf: Fix the threshold compare group constraint for power9
  powerpc/64: Only WARN if __pa()/__va() called with bad addresses
  Input: sparcspkr - fix refcount leak in bbc_beep_probe
  crypto: cryptd - Protect per-CPU resource by disabling BH.
  tty: fix deadlock caused by calling printk() under tty_port->lock
  PCI: imx6: Fix PERST# start-up sequence
  ipc/mqueue: use get_tree_nodev() in mqueue_get_tree()
  proc: fix dentry/inode overinstantiating under /proc/${pid}/net
  powerpc/4xx/cpm: Fix return value of __setup() handler
  powerpc/idle: Fix return value of __setup() handler
  powerpc/8xx: export 'cpm_setbrg' for modules
  dax: fix cache flush on PMD-mapped pages
  drivers/base/node.c: fix compaction sysfs file leak
  pinctrl: mvebu: Fix irq_of_parse_and_map() return value
  nvdimm: Allow overwrite in the presence of disabled dimms
  firmware: arm_scmi: Fix list protocols enumeration in the base protocol
  scsi: fcoe: Fix Wstringop-overflow warnings in fcoe_wwn_from_mac()
  mfd: ipaq-micro: Fix error check return value of platform_get_irq()
  powerpc/fadump: fix PT_LOAD segment for boot memory area
  arm: mediatek: select arch timer for mt7629
  crypto: marvell/cesa - ECB does not IV
  misc: ocxl: fix possible double free in ocxl_file_register_afu
  ARM: dts: bcm2835-rpi-b: Fix GPIO line names
  ARM: dts: bcm2837-rpi-3-b-plus: Fix GPIO line name of power LED
  ARM: dts: bcm2837-rpi-cm3-io3: Fix GPIO line names for SMPS I2C
  ARM: dts: bcm2835-rpi-zero-w: Fix GPIO line name for Wifi/BT
  can: xilinx_can: mark bit timing constants as const
  KVM: nVMX: Leave most VM-Exit info fields unmodified on failed VM-Entry
  PCI: rockchip: Fix find_first_zero_bit() limit
  PCI: cadence: Fix find_first_zero_bit() limit
  soc: qcom: smsm: Fix missing of_node_put() in smsm_parse_ipc
  soc: qcom: smp2p: Fix missing of_node_put() in smp2p_parse_ipc
  ARM: dts: suniv: F1C100: fix watchdog compatible
  arm64: dts: rockchip: Move drive-impedance-ohm to emmc phy on rk3399
  net/smc: postpone sk_refcnt increment in connect()
  rxrpc: Fix decision on when to generate an IDLE ACK
  rxrpc: Don't let ack.previousPacket regress
  rxrpc: Fix overlapping ACK accounting
  rxrpc: Don't try to resend the request if we're receiving the reply
  rxrpc: Fix listen() setting the bar too high for the prealloc rings
  NFC: hci: fix sleep in atomic context bugs in nfc_hci_hcp_message_tx
  ASoC: wm2000: fix missing clk_disable_unprepare() on error in wm2000_anc_transition()
  thermal/drivers/broadcom: Fix potential NULL dereference in sr_thermal_probe
  drm: msm: fix possible memory leak in mdp5_crtc_cursor_set()
  drm/msm/a6xx: Fix refcount leak in a6xx_gpu_init
  ext4: reject the 'commit' option on ext2 filesystems
  media: ov7670: remove ov7670_power_off from ov7670_remove
  sctp: read sk->sk_bound_dev_if once in sctp_rcv()
  m68k: math-emu: Fix dependencies of math emulation support
  Bluetooth: fix dangling sco_conn and use-after-free in sco_sock_timeout
  media: vsp1: Fix offset calculation for plane cropping
  media: pvrusb2: fix array-index-out-of-bounds in pvr2_i2c_core_init
  media: exynos4-is: Change clk_disable to clk_disable_unprepare
  media: st-delta: Fix PM disable depth imbalance in delta_probe
  media: aspeed: Fix an error handling path in aspeed_video_probe()
  scripts/faddr2line: Fix overlapping text section failures
  regulator: pfuze100: Fix refcount leak in pfuze_parse_regulators_dt
  ASoC: mxs-saif: Fix refcount leak in mxs_saif_probe
  ASoC: fsl: Fix refcount leak in imx_sgtl5000_probe
  perf/amd/ibs: Use interrupt regs ip for stack unwinding
  Revert "cpufreq: Fix possible race in cpufreq online error path"
  iomap: iomap_write_failed fix
  media: uvcvideo: Fix missing check to determine if element is found in list
  drm/msm: return an error pointer in msm_gem_prime_get_sg_table()
  drm/msm/mdp5: Return error code in mdp5_mixer_release when deadlock is detected
  drm/msm/mdp5: Return error code in mdp5_pipe_release when deadlock is detected
  regulator: core: Fix enable_count imbalance with EXCLUSIVE_GET
  x86/mm: Cleanup the control_va_addr_alignment() __setup handler
  irqchip/aspeed-i2c-ic: Fix irq_of_parse_and_map() return value
  irqchip/exiu: Fix acknowledgment of edge triggered interrupts
  x86: Fix return value of __setup handlers
  virtio_blk: fix the discard_granularity and discard_alignment queue limits
  drm/rockchip: vop: fix possible null-ptr-deref in vop_bind()
  drm/msm/hdmi: fix error check return value of irq_of_parse_and_map()
  drm/msm/hdmi: check return value after calling platform_get_resource_byname()
  drm/msm/dsi: fix error checks and return values for DSI xmit functions
  drm/msm/disp/dpu1: set vbif hw config to NULL to avoid use after memory free during pm runtime resume
  perf tools: Add missing headers needed by util/data.h
  ASoC: rk3328: fix disabling mclk on pclk probe failure
  x86/speculation: Add missing prototype for unpriv_ebpf_notify()
  x86/pm: Fix false positive kmemleak report in msr_build_context()
  scsi: ufs: core: Exclude UECxx from SFR dump list
  of: overlay: do not break notify on NOTIFY_{OK|STOP}
  fsnotify: fix wrong lockdep annotations
  inotify: show inotify mask flags in proc fdinfo
  ath9k_htc: fix potential out of bounds access with invalid rxstatus->rs_keyix
  cpufreq: Fix possible race in cpufreq online error path
  spi: img-spfi: Fix pm_runtime_get_sync() error checking
  sched/fair: Fix cfs_rq_clock_pelt() for throttled cfs_rq
  drm/bridge: Fix error handling in analogix_dp_probe
  HID: elan: Fix potential double free in elan_input_configured
  HID: hid-led: fix maximum brightness for Dream Cheeky
  drbd: fix duplicate array initializer
  efi: Add missing prototype for efi_capsule_setup_info
  NFC: NULL out the dev->rfkill to prevent UAF
  spi: spi-ti-qspi: Fix return value handling of wait_for_completion_timeout
  drm: mali-dp: potential dereference of null pointer
  drm/komeda: Fix an undefined behavior bug in komeda_plane_add()
  nl80211: show SSID for P2P_GO interfaces
  bpf: Fix excessive memory allocation in stack_map_alloc()
  drm/vc4: txp: Force alpha to be 0xff if it's disabled
  drm/vc4: txp: Don't set TXP_VSTART_AT_EOF
  drm/mediatek: Fix mtk_cec_mask()
  x86/delay: Fix the wrong asm constraint in delay_loop()
  ASoC: mediatek: Fix missing of_node_put in mt2701_wm8960_machine_probe
  ASoC: mediatek: Fix error handling in mt8173_max98090_dev_probe
  drm/bridge: adv7511: clean up CEC adapter when probe fails
  drm/edid: fix invalid EDID extension block filtering
  ath9k: fix ar9003_get_eepmisc
  drm: fix EDID struct for old ARM OABI format
  RDMA/hfi1: Prevent panic when SDMA is disabled
  powerpc/iommu: Add missing of_node_put in iommu_init_early_dart
  macintosh/via-pmu: Fix build failure when CONFIG_INPUT is disabled
  powerpc/powernv: fix missing of_node_put in uv_init()
  powerpc/xics: fix refcount leak in icp_opal_init()
  tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate
  PCI: Avoid pci_dev_lock() AB/BA deadlock with sriov_numvfs_store()
  ARM: hisi: Add missing of_node_put after of_find_compatible_node
  ARM: dts: exynos: add atmel,24c128 fallback to Samsung EEPROM
  ARM: versatile: Add missing of_node_put in dcscb_init
  fat: add ratelimit to fat*_ent_bread()
  powerpc/fadump: Fix fadump to work with a different endian capture kernel
  ARM: OMAP1: clock: Fix UART rate reporting algorithm
  fs: jfs: fix possible NULL pointer dereference in dbFree()
  PM / devfreq: rk3399_dmc: Disable edev on remove()
  ARM: dts: ox820: align interrupt controller node name with dtschema
  IB/rdmavt: add missing locks in rvt_ruc_loopback
  selftests/bpf: fix btf_dump/btf_dump due to recent clang change
  eth: tg3: silence the GCC 12 array-bounds warning
  rxrpc: Return an error to sendmsg if call failed
  hwmon: Make chip parameter for with_info API mandatory
  ASoC: max98357a: remove dependency on GPIOLIB
  media: exynos4-is: Fix compile warning
  net: phy: micrel: Allow probing without .driver_data
  nbd: Fix hung on disconnect request if socket is closed before
  ASoC: rt5645: Fix errorenous cleanup order
  nvme-pci: fix a NULL pointer dereference in nvme_alloc_admin_tags
  openrisc: start CPU timer early in boot
  media: cec-adap.c: fix is_configuring state
  media: coda: limit frame interval enumeration to supported encoder frame sizes
  rtlwifi: Use pr_warn instead of WARN_ONCE
  ipmi: Fix pr_fmt to avoid compilation issues
  ipmi:ssif: Check for NULL msg when handling events and messages
  ACPI: PM: Block ASUS B1400CEAE from suspend to idle by default
  dma-debug: change allocation mode from GFP_NOWAIT to GFP_ATIOMIC
  spi: stm32-qspi: Fix wait_cmd timeout in APM mode
  s390/preempt: disable __preempt_count_add() optimization for PROFILE_ALL_BRANCHES
  ASoC: tscs454: Add endianness flag in snd_soc_component_driver
  HID: bigben: fix slab-out-of-bounds Write in bigben_probe
  drm/amdgpu/ucode: Remove firmware load type check in amdgpu_ucode_free_bo
  mlxsw: spectrum_dcb: Do not warn about priority changes
  ASoC: dapm: Don't fold register value changes into notifications
  net/mlx5: fs, delete the FTE when there are no rules attached to it
  ipv6: Don't send rs packets to the interface of ARPHRD_TUNNEL
  drm: msm: fix error check return value of irq_of_parse_and_map()
  arm64: compat: Do not treat syscall number as ESR_ELx for a bad syscall
  drm/amd/pm: fix the compile warning
  drm/plane: Move range check for format_count earlier
  scsi: megaraid: Fix error check return value of register_chrdev()
  mmc: jz4740: Apply DMA engine limits to maximum segment size
  md/bitmap: don't set sb values if can't pass sanity check
  media: cx25821: Fix the warning when removing the module
  media: pci: cx23885: Fix the error handling in cx23885_initdev()
  media: venus: hfi: avoid null dereference in deinit
  ath9k: fix QCA9561 PA bias level
  drm/amd/pm: fix double free in si_parse_power_table()
  tools/power turbostat: fix ICX DRAM power numbers
  spi: spi-rspi: Remove setting {src,dst}_{addr,addr_width} based on DMA direction
  ALSA: jack: Access input_dev under mutex
  drm/komeda: return early if drm_universal_plane_init() fails.
  ACPICA: Avoid cache flush inside virtual machines
  fbcon: Consistently protect deferred_takeover with console_lock()
  ipv6: fix locking issues with loops over idev->addr_list
  ipw2x00: Fix potential NULL dereference in libipw_xmit()
  b43: Fix assigning negative value to unsigned variable
  b43legacy: Fix assigning negative value to unsigned variable
  mwifiex: add mutex lock for call in mwifiex_dfs_chan_sw_work_queue
  drm/virtio: fix NULL pointer dereference in virtio_gpu_conn_get_modes
  btrfs: repair super block num_devices automatically
  btrfs: add "0x" prefix for unsupported optional features
  ptrace: Reimplement PTRACE_KILL by always sending SIGKILL
  ptrace/xtensa: Replace PT_SINGLESTEP with TIF_SINGLESTEP
  ptrace/um: Replace PT_DTRACE with TIF_SINGLESTEP
  perf/x86/intel: Fix event constraints for ICL
  usb: core: hcd: Add support for deferring roothub registration
  USB: new quirk for Dell Gen 2 devices
  USB: serial: option: add Quectel BG95 modem
  ALSA: hda/realtek - Fix microphone noise on ASUS TUF B550M-PLUS
  binfmt_flat: do not stop relocating GOT entries prematurely on riscv

 Conflicts:
	Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
	Documentation/devicetree/bindings~HEAD
	drivers/char/Kconfig
	drivers/mmc/core/block.c
	kernel/sysctl.c

Change-Id: If11e1865055bfb94b3268960268c88c3dfc032c3
2022-11-09 19:53:28 +02:00

3445 lines
89 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Scheduler internal types and methods:
*/
#include <linux/sched.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/clock.h>
#include <linux/sched/coredump.h>
#include <linux/sched/cpufreq.h>
#include <linux/sched/cputime.h>
#include <linux/sched/deadline.h>
#include <linux/sched/debug.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/idle.h>
#include <linux/sched/init.h>
#include <linux/sched/isolation.h>
#include <linux/sched/jobctl.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/mm.h>
#include <linux/sched/nohz.h>
#include <linux/sched/numa_balancing.h>
#include <linux/sched/prio.h>
#include <linux/sched/rt.h>
#include <linux/sched/signal.h>
#include <linux/sched/smt.h>
#include <linux/sched/stat.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/topology.h>
#include <linux/sched/user.h>
#include <linux/sched/wake_q.h>
#include <linux/sched/xacct.h>
#include <uapi/linux/sched/types.h>
#include <linux/binfmts.h>
#include <linux/blkdev.h>
#include <linux/compat.h>
#include <linux/context_tracking.h>
#include <linux/cpufreq.h>
#include <linux/cpuidle.h>
#include <linux/cpuset.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/delayacct.h>
#include <linux/energy_model.h>
#include <linux/init_task.h>
#include <linux/kprobes.h>
#include <linux/kthread.h>
#include <linux/membarrier.h>
#include <linux/migrate.h>
#include <linux/mmu_context.h>
#include <linux/nmi.h>
#include <linux/proc_fs.h>
#include <linux/prefetch.h>
#include <linux/profile.h>
#include <linux/psi.h>
#include <linux/rcupdate_wait.h>
#include <linux/security.h>
#include <linux/stop_machine.h>
#include <linux/suspend.h>
#include <linux/swait.h>
#include <linux/syscalls.h>
#include <linux/task_work.h>
#include <linux/tsacct_kern.h>
#include <asm/tlb.h>
#ifdef CONFIG_PARAVIRT
# include <asm/paravirt.h>
#endif
#include "cpupri.h"
#include "cpudeadline.h"
#ifdef CONFIG_SCHED_DEBUG
# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
#else
# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
#endif
struct rq;
struct cpuidle_state;
extern __read_mostly bool sched_predl;
extern unsigned int sched_capacity_margin_up[NR_CPUS];
extern unsigned int sched_capacity_margin_down[NR_CPUS];
struct walt_cpu_load {
unsigned long nl;
unsigned long pl;
bool rtgb_active;
u64 ws;
};
#ifdef CONFIG_SCHED_WALT
#define DECLARE_BITMAP_ARRAY(name, nr, bits) \
unsigned long name[nr][BITS_TO_LONGS(bits)]
extern unsigned int sched_ravg_window;
struct walt_sched_stats {
int nr_big_tasks;
u64 cumulative_runnable_avg_scaled;
u64 pred_demands_sum_scaled;
unsigned int nr_rtg_high_prio_tasks;
};
struct walt_task_group {
/* Toggle ability to override sched boost enabled */
bool sched_boost_no_override;
/*
* Controls whether a cgroup is eligible for sched boost or not. This
* can temporariliy be disabled by the kernel based on the no_override
* flag above.
*/
bool sched_boost_enabled;
/*
* Controls whether tasks of this cgroup should be colocated with each
* other and tasks of other cgroups that have the same flag turned on.
*/
bool colocate;
/* Controls whether further updates are allowed to the colocate flag */
bool colocate_update_disabled;
};
struct walt_root_domain {
/* First cpu with maximum and minimum original capacity */
int max_cap_orig_cpu, min_cap_orig_cpu;
/* First cpu with mid capacity */
int mid_cap_orig_cpu;
};
#define NUM_TRACKED_WINDOWS 2
#define NUM_LOAD_INDICES 1000
struct group_cpu_time {
u64 curr_runnable_sum;
u64 prev_runnable_sum;
u64 nt_curr_runnable_sum;
u64 nt_prev_runnable_sum;
};
struct load_subtractions {
u64 window_start;
u64 subs;
u64 new_subs;
};
struct walt_rq {
struct task_struct *push_task;
struct walt_sched_cluster *cluster;
struct cpumask freq_domain_cpumask;
struct walt_sched_stats walt_stats;
u64 window_start;
u32 prev_window_size;
unsigned long walt_flags;
u64 avg_irqload;
u64 last_irq_window;
u64 prev_irq_time;
struct task_struct *ed_task;
u64 task_exec_scale;
u64 old_busy_time;
u64 old_estimated_time;
u64 curr_runnable_sum;
u64 prev_runnable_sum;
u64 nt_curr_runnable_sum;
u64 nt_prev_runnable_sum;
u64 cum_window_demand_scaled;
struct group_cpu_time grp_time;
struct load_subtractions load_subs[NUM_TRACKED_WINDOWS];
DECLARE_BITMAP_ARRAY(top_tasks_bitmap,
NUM_TRACKED_WINDOWS, NUM_LOAD_INDICES);
u8 *top_tasks[NUM_TRACKED_WINDOWS];
u8 curr_table;
int prev_top;
int curr_top;
bool notif_pending;
bool high_irqload;
u64 last_cc_update;
u64 cycles;
};
struct walt_sched_cluster {
raw_spinlock_t load_lock;
struct list_head list;
struct cpumask cpus;
int id;
/*
* max_possible_freq = maximum supported by hardware
*/
unsigned int cur_freq;
unsigned int max_possible_freq;
u64 aggr_grp_load;
};
extern cpumask_t asym_cap_sibling_cpus;
#endif /* CONFIG_SCHED_WALT */
/* task_struct::on_rq states: */
#define TASK_ON_RQ_QUEUED 1
#define TASK_ON_RQ_MIGRATING 2
extern __read_mostly int scheduler_running;
extern unsigned long calc_load_update;
extern atomic_long_t calc_load_tasks;
extern void calc_global_load_tick(struct rq *this_rq);
extern long calc_load_fold_active(struct rq *this_rq, long adjust);
#ifdef CONFIG_SMP
extern void init_sched_groups_capacity(int cpu, struct sched_domain *sd);
#endif
/*
* Helpers for converting nanosecond timing to jiffy resolution
*/
#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
/*
* Increase resolution of nice-level calculations for 64-bit architectures.
* The extra resolution improves shares distribution and load balancing of
* low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
* hierarchies, especially on larger systems. This is not a user-visible change
* and does not change the user-interface for setting shares/weights.
*
* We increase resolution only if we have enough bits to allow this increased
* resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
* are pretty high and the returns do not justify the increased costs.
*
* Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
* increase coverage and consistency always enable it on 64-bit platforms.
*/
#ifdef CONFIG_64BIT
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
# define scale_load_down(w) \
({ \
unsigned long __w = (w); \
if (__w) \
__w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
__w; \
})
#else
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
# define scale_load(w) (w)
# define scale_load_down(w) (w)
#endif
/*
* Task weight (visible to users) and its load (invisible to users) have
* independent resolution, but they should be well calibrated. We use
* scale_load() and scale_load_down(w) to convert between them. The
* following must be true:
*
* scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
*
*/
#define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
/*
* Single value that decides SCHED_DEADLINE internal math precision.
* 10 -> just above 1us
* 9 -> just above 0.5us
*/
#define DL_SCALE 10
/*
* Single value that denotes runtime == period, ie unlimited time.
*/
#define RUNTIME_INF ((u64)~0ULL)
static inline int idle_policy(int policy)
{
return policy == SCHED_IDLE;
}
static inline int fair_policy(int policy)
{
return policy == SCHED_NORMAL || policy == SCHED_BATCH;
}
static inline int rt_policy(int policy)
{
return policy == SCHED_FIFO || policy == SCHED_RR;
}
static inline int dl_policy(int policy)
{
return policy == SCHED_DEADLINE;
}
static inline bool valid_policy(int policy)
{
return idle_policy(policy) || fair_policy(policy) ||
rt_policy(policy) || dl_policy(policy);
}
static inline int task_has_idle_policy(struct task_struct *p)
{
return idle_policy(p->policy);
}
static inline int task_has_rt_policy(struct task_struct *p)
{
return rt_policy(p->policy);
}
static inline int task_has_dl_policy(struct task_struct *p)
{
return dl_policy(p->policy);
}
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
/*
* !! For sched_setattr_nocheck() (kernel) only !!
*
* This is actually gross. :(
*
* It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
* tasks, but still be able to sleep. We need this on platforms that cannot
* atomically change clock frequency. Remove once fast switching will be
* available on such platforms.
*
* SUGOV stands for SchedUtil GOVernor.
*/
#define SCHED_FLAG_SUGOV 0x10000000
#define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM | SCHED_FLAG_DL_OVERRUN | SCHED_FLAG_SUGOV)
static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
{
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
#else
return false;
#endif
}
/*
* Tells if entity @a should preempt entity @b.
*/
static inline bool
dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
{
return dl_entity_is_special(a) ||
dl_time_before(a->deadline, b->deadline);
}
/*
* This is the priority-queue data structure of the RT scheduling class:
*/
struct rt_prio_array {
DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
struct list_head queue[MAX_RT_PRIO];
};
struct rt_bandwidth {
/* nests inside the rq lock: */
raw_spinlock_t rt_runtime_lock;
ktime_t rt_period;
u64 rt_runtime;
struct hrtimer rt_period_timer;
unsigned int rt_period_active;
};
void __dl_clear_params(struct task_struct *p);
struct dl_bandwidth {
raw_spinlock_t dl_runtime_lock;
u64 dl_runtime;
u64 dl_period;
};
static inline int dl_bandwidth_enabled(void)
{
return sysctl_sched_rt_runtime >= 0;
}
/*
* To keep the bandwidth of -deadline tasks under control
* we need some place where:
* - store the maximum -deadline bandwidth of each cpu;
* - cache the fraction of bandwidth that is currently allocated in
* each root domain;
*
* This is all done in the data structure below. It is similar to the
* one used for RT-throttling (rt_bandwidth), with the main difference
* that, since here we are only interested in admission control, we
* do not decrease any runtime while the group "executes", neither we
* need a timer to replenish it.
*
* With respect to SMP, bandwidth is given on a per root domain basis,
* meaning that:
* - bw (< 100%) is the deadline bandwidth of each CPU;
* - total_bw is the currently allocated bandwidth in each root domain;
*/
struct dl_bw {
raw_spinlock_t lock;
u64 bw;
u64 total_bw;
};
static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
static inline
void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
{
dl_b->total_bw -= tsk_bw;
__dl_update(dl_b, (s32)tsk_bw / cpus);
}
static inline
void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
{
dl_b->total_bw += tsk_bw;
__dl_update(dl_b, -((s32)tsk_bw / cpus));
}
static inline
bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
{
return dl_b->bw != -1 &&
dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
}
extern void dl_change_utilization(struct task_struct *p, u64 new_bw);
extern void init_dl_bw(struct dl_bw *dl_b);
extern int sched_dl_global_validate(void);
extern void sched_dl_do_global(void);
extern int sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
extern bool __checkparam_dl(const struct sched_attr *attr);
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
extern bool dl_cpu_busy(unsigned int cpu);
#ifdef CONFIG_CGROUP_SCHED
#include <linux/cgroup.h>
#include <linux/psi.h>
struct cfs_rq;
struct rt_rq;
extern struct list_head task_groups;
struct cfs_bandwidth {
#ifdef CONFIG_CFS_BANDWIDTH
raw_spinlock_t lock;
ktime_t period;
u64 quota;
u64 runtime;
s64 hierarchical_quota;
u8 idle;
u8 period_active;
u8 distribute_running;
u8 slack_started;
struct hrtimer period_timer;
struct hrtimer slack_timer;
struct list_head throttled_cfs_rq;
/* Statistics: */
int nr_periods;
int nr_throttled;
u64 throttled_time;
#endif
};
/* Task group related information */
struct task_group {
struct cgroup_subsys_state css;
#ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each CPU */
struct sched_entity **se;
/* runqueue "owned" by this group on each CPU */
struct cfs_rq **cfs_rq;
unsigned long shares;
#ifdef CONFIG_SMP
/*
* load_avg can be heavily contended at clock tick time, so put
* it in its own cacheline separated from the fields above which
* will also be accessed at each tick.
*/
atomic_long_t load_avg ____cacheline_aligned;
#endif
#endif
#ifdef CONFIG_RT_GROUP_SCHED
struct sched_rt_entity **rt_se;
struct rt_rq **rt_rq;
struct rt_bandwidth rt_bandwidth;
#endif
struct rcu_head rcu;
struct list_head list;
struct task_group *parent;
struct list_head siblings;
struct list_head children;
#ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup *autogroup;
#endif
struct cfs_bandwidth cfs_bandwidth;
#ifdef CONFIG_UCLAMP_TASK_GROUP
/* The two decimal precision [%] value requested from user-space */
unsigned int uclamp_pct[UCLAMP_CNT];
/* Clamp values requested for a task group */
struct uclamp_se uclamp_req[UCLAMP_CNT];
/* Effective clamp values used for a task group */
struct uclamp_se uclamp[UCLAMP_CNT];
/* Latency-sensitive flag used for a task group */
unsigned int latency_sensitive;
#ifdef CONFIG_SCHED_WALT
struct walt_task_group wtg;
#endif /* CONFIG_SCHED_WALT */
#endif /* CONFIG_UCLAMP_TASK_GROUP */
};
#ifdef CONFIG_FAIR_GROUP_SCHED
#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
/*
* A weight of 0 or 1 can cause arithmetics problems.
* A weight of a cfs_rq is the sum of weights of which entities
* are queued on this cfs_rq, so a weight of a entity should not be
* too large, so as the shares value of a task group.
* (The default weight is 1024 - so there's no practical
* limitation from this.)
*/
#define MIN_SHARES (1UL << 1)
#define MAX_SHARES (1UL << 18)
#endif
typedef int (*tg_visitor)(struct task_group *, void *);
extern int walk_tg_tree_from(struct task_group *from,
tg_visitor down, tg_visitor up, void *data);
/*
* Iterate the full tree, calling @down when first entering a node and @up when
* leaving it for the final time.
*
* Caller must hold rcu_lock or sufficient equivalent.
*/
static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
{
return walk_tg_tree_from(&root_task_group, down, up, data);
}
extern int tg_nop(struct task_group *tg, void *data);
extern void free_fair_sched_group(struct task_group *tg);
extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
extern void online_fair_sched_group(struct task_group *tg);
extern void unregister_fair_sched_group(struct task_group *tg);
extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
struct sched_entity *se, int cpu,
struct sched_entity *parent);
extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
extern void free_rt_sched_group(struct task_group *tg);
extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
struct sched_rt_entity *rt_se, int cpu,
struct sched_rt_entity *parent);
extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
extern long sched_group_rt_runtime(struct task_group *tg);
extern long sched_group_rt_period(struct task_group *tg);
extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
extern struct task_group *sched_create_group(struct task_group *parent);
extern void sched_online_group(struct task_group *tg,
struct task_group *parent);
extern void sched_destroy_group(struct task_group *tg);
extern void sched_offline_group(struct task_group *tg);
extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
#ifdef CONFIG_SMP
extern void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next);
#else /* !CONFIG_SMP */
static inline void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next) { }
#endif /* CONFIG_SMP */
#endif /* CONFIG_FAIR_GROUP_SCHED */
#else /* CONFIG_CGROUP_SCHED */
struct cfs_bandwidth { };
#endif /* CONFIG_CGROUP_SCHED */
/* CFS-related fields in a runqueue */
struct cfs_rq {
struct load_weight load;
unsigned long runnable_weight;
unsigned int nr_running;
unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
unsigned int idle_h_nr_running; /* SCHED_IDLE */
u64 exec_clock;
u64 min_vruntime;
#ifndef CONFIG_64BIT
u64 min_vruntime_copy;
#endif
struct rb_root_cached tasks_timeline;
/*
* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
struct sched_entity *curr;
struct sched_entity *next;
struct sched_entity *last;
struct sched_entity *skip;
#ifdef CONFIG_SCHED_DEBUG
unsigned int nr_spread_over;
#endif
#ifdef CONFIG_SMP
/*
* CFS load tracking
*/
struct sched_avg avg;
#ifndef CONFIG_64BIT
u64 load_last_update_time_copy;
#endif
struct {
raw_spinlock_t lock ____cacheline_aligned;
int nr;
unsigned long load_avg;
unsigned long util_avg;
unsigned long runnable_sum;
} removed;
#ifdef CONFIG_FAIR_GROUP_SCHED
unsigned long tg_load_avg_contrib;
long propagate;
long prop_runnable_sum;
/*
* h_load = weight * f(tg)
*
* Where f(tg) is the recursive weight fraction assigned to
* this group.
*/
unsigned long h_load;
u64 last_h_load_update;
struct sched_entity *h_load_next;
#endif /* CONFIG_FAIR_GROUP_SCHED */
#endif /* CONFIG_SMP */
#ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq; /* CPU runqueue to which this cfs_rq is attached */
/*
* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
* a hierarchy). Non-leaf lrqs hold other higher schedulable entities
* (like users, containers etc.)
*
* leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
* This list is used during load balance.
*/
int on_list;
struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */
#ifdef CONFIG_CFS_BANDWIDTH
#ifdef CONFIG_SCHED_WALT
struct walt_sched_stats walt_stats;
#endif
int runtime_enabled;
s64 runtime_remaining;
u64 throttled_clock;
u64 throttled_clock_pelt;
u64 throttled_clock_pelt_time;
int throttled;
int throttle_count;
struct list_head throttled_list;
#endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_FAIR_GROUP_SCHED */
};
static inline int rt_bandwidth_enabled(void)
{
return sysctl_sched_rt_runtime >= 0;
}
/* RT IPI pull logic requires IRQ_WORK */
#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
# define HAVE_RT_PUSH_IPI
#endif
/* Real-Time classes' related field in a runqueue: */
struct rt_rq {
struct rt_prio_array active;
unsigned int rt_nr_running;
unsigned int rr_nr_running;
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
struct {
int curr; /* highest queued rt task prio */
#ifdef CONFIG_SMP
int next; /* next highest */
#endif
} highest_prio;
#endif
#ifdef CONFIG_SMP
unsigned long rt_nr_migratory;
unsigned long rt_nr_total;
int overloaded;
struct plist_head pushable_tasks;
#endif /* CONFIG_SMP */
int rt_queued;
int rt_throttled;
u64 rt_time;
u64 rt_runtime;
/* Nests inside the rq lock: */
raw_spinlock_t rt_runtime_lock;
#ifdef CONFIG_RT_GROUP_SCHED
unsigned long rt_nr_boosted;
struct rq *rq;
struct task_group *tg;
#endif
};
static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
{
return rt_rq->rt_queued && rt_rq->rt_nr_running;
}
/* Deadline class' related fields in a runqueue */
struct dl_rq {
/* runqueue is an rbtree, ordered by deadline */
struct rb_root_cached root;
unsigned long dl_nr_running;
#ifdef CONFIG_SMP
/*
* Deadline values of the currently executing and the
* earliest ready task on this rq. Caching these facilitates
* the decision whether or not a ready but not running task
* should migrate somewhere else.
*/
struct {
u64 curr;
u64 next;
} earliest_dl;
unsigned long dl_nr_migratory;
int overloaded;
/*
* Tasks on this rq that can be pushed away. They are kept in
* an rb-tree, ordered by tasks' deadlines, with caching
* of the leftmost (earliest deadline) element.
*/
struct rb_root_cached pushable_dl_tasks_root;
#else
struct dl_bw dl_bw;
#endif
/*
* "Active utilization" for this runqueue: increased when a
* task wakes up (becomes TASK_RUNNING) and decreased when a
* task blocks
*/
u64 running_bw;
/*
* Utilization of the tasks "assigned" to this runqueue (including
* the tasks that are in runqueue and the tasks that executed on this
* CPU and blocked). Increased when a task moves to this runqueue, and
* decreased when the task moves away (migrates, changes scheduling
* policy, or terminates).
* This is needed to compute the "inactive utilization" for the
* runqueue (inactive utilization = this_bw - running_bw).
*/
u64 this_bw;
u64 extra_bw;
/*
* Inverse of the fraction of CPU utilization that can be reclaimed
* by the GRUB algorithm.
*/
u64 bw_ratio;
};
#ifdef CONFIG_FAIR_GROUP_SCHED
/* An entity is a task if it doesn't "own" a runqueue */
#define entity_is_task(se) (!se->my_q)
#else
#define entity_is_task(se) 1
#endif
#ifdef CONFIG_SMP
/*
* XXX we want to get rid of these helpers and use the full load resolution.
*/
static inline long se_weight(struct sched_entity *se)
{
return scale_load_down(se->load.weight);
}
static inline long se_runnable(struct sched_entity *se)
{
return scale_load_down(se->runnable_weight);
}
static inline bool sched_asym_prefer(int a, int b)
{
return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
}
struct perf_domain {
struct em_perf_domain *em_pd;
struct perf_domain *next;
struct rcu_head rcu;
};
struct max_cpu_capacity {
raw_spinlock_t lock;
unsigned long val;
int cpu;
};
/* Scheduling group status flags */
#define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */
#define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
#define SG_HAS_MISFIT_TASK 0x4 /* Group has misfit task. */
/*
* We add the notion of a root-domain which will be used to define per-domain
* variables. Each exclusive cpuset essentially defines an island domain by
* fully partitioning the member CPUs from any other cpuset. Whenever a new
* exclusive cpuset is created, we also create and attach a new root-domain
* object.
*
*/
struct root_domain {
atomic_t refcount;
atomic_t rto_count;
struct rcu_head rcu;
cpumask_var_t span;
cpumask_var_t online;
/*
* Indicate pullable load on at least one CPU, e.g:
* - More than one runnable task
* - Running task is misfit
*/
int overload;
/* Indicate one or more cpus over-utilized (tipping point) */
int overutilized;
/*
* The bit corresponding to a CPU gets set here if such CPU has more
* than one runnable -deadline task (as it is below for RT tasks).
*/
cpumask_var_t dlo_mask;
atomic_t dlo_count;
struct dl_bw dl_bw;
struct cpudl cpudl;
#ifdef HAVE_RT_PUSH_IPI
/*
* For IPI pull requests, loop across the rto_mask.
*/
struct irq_work rto_push_work;
raw_spinlock_t rto_lock;
/* These are only updated and read within rto_lock */
int rto_loop;
int rto_cpu;
/* These atomics are updated outside of a lock */
atomic_t rto_loop_next;
atomic_t rto_loop_start;
#endif
/*
* The "RT overload" flag: it gets set if a CPU has more than
* one runnable RT task.
*/
cpumask_var_t rto_mask;
struct cpupri cpupri;
/* Maximum cpu capacity in the system. */
struct max_cpu_capacity max_cpu_capacity;
/*
* NULL-terminated list of performance domains intersecting with the
* CPUs of the rd. Protected by RCU.
*/
struct perf_domain __rcu *pd;
#ifdef CONFIG_SCHED_WALT
struct walt_root_domain wrd;
#endif
};
extern void init_defrootdomain(void);
extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
extern int sched_init_domains(const struct cpumask *cpu_map);
extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
extern void sched_get_rd(struct root_domain *rd);
extern void sched_put_rd(struct root_domain *rd);
#ifdef HAVE_RT_PUSH_IPI
extern void rto_push_irq_work_func(struct irq_work *work);
#endif
#endif /* CONFIG_SMP */
#ifdef CONFIG_UCLAMP_TASK
/*
* struct uclamp_bucket - Utilization clamp bucket
* @value: utilization clamp value for tasks on this clamp bucket
* @tasks: number of RUNNABLE tasks on this clamp bucket
*
* Keep track of how many tasks are RUNNABLE for a given utilization
* clamp value.
*/
struct uclamp_bucket {
unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
};
/*
* struct uclamp_rq - rq's utilization clamp
* @value: currently active clamp values for a rq
* @bucket: utilization clamp buckets affecting a rq
*
* Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
* A clamp value is affecting a rq when there is at least one task RUNNABLE
* (or actually running) with that value.
*
* There are up to UCLAMP_CNT possible different clamp values, currently there
* are only two: minimum utilization and maximum utilization.
*
* All utilization clamping values are MAX aggregated, since:
* - for util_min: we want to run the CPU at least at the max of the minimum
* utilization required by its currently RUNNABLE tasks.
* - for util_max: we want to allow the CPU to run up to the max of the
* maximum utilization allowed by its currently RUNNABLE tasks.
*
* Since on each system we expect only a limited number of different
* utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
* the metrics required to compute all the per-rq utilization clamp values.
*/
struct uclamp_rq {
unsigned int value;
struct uclamp_bucket bucket[UCLAMP_BUCKETS];
};
DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
#endif /* CONFIG_UCLAMP_TASK */
/*
* This is the main, per-CPU runqueue data structure.
*
* Locking rule: those places that want to lock multiple runqueues
* (such as the load balancing or the thread migration code), lock
* acquire operations must be ordered by ascending &runqueue.
*/
struct rq {
/* runqueue lock: */
raw_spinlock_t lock;
/*
* nr_running and cpu_load should be in the same cacheline because
* remote CPUs use both these fields when doing load calculation.
*/
unsigned int nr_running;
#ifdef CONFIG_NUMA_BALANCING
unsigned int nr_numa_running;
unsigned int nr_preferred_running;
unsigned int numa_migrate_on;
#endif
#ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP
unsigned long last_load_update_tick;
unsigned long last_blocked_load_update_tick;
unsigned int has_blocked_load;
#endif /* CONFIG_SMP */
unsigned int nohz_tick_stopped;
atomic_t nohz_flags;
#endif /* CONFIG_NO_HZ_COMMON */
unsigned long nr_load_updates;
u64 nr_switches;
#ifdef CONFIG_UCLAMP_TASK
/* Utilization clamp values based on CPU's RUNNABLE tasks */
struct uclamp_rq uclamp[UCLAMP_CNT] ____cacheline_aligned;
unsigned int uclamp_flags;
#define UCLAMP_FLAG_IDLE 0x01
#endif
struct cfs_rq cfs;
struct rt_rq rt;
struct dl_rq dl;
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this CPU: */
struct list_head leaf_cfs_rq_list;
struct list_head *tmp_alone_branch;
#endif /* CONFIG_FAIR_GROUP_SCHED */
/*
* This is part of a global counter where only the total sum
* over all CPUs matters. A task can increase this counter on
* one CPU and if it got migrated afterwards it may decrease
* it on another CPU. Always updated under the runqueue lock:
*/
unsigned long nr_uninterruptible;
struct task_struct *curr;
struct task_struct *idle;
struct task_struct *stop;
unsigned long next_balance;
struct mm_struct *prev_mm;
unsigned int clock_update_flags;
u64 clock;
/* Ensure that all clocks are in the same cache line */
u64 clock_task ____cacheline_aligned;
u64 clock_pelt;
unsigned long lost_idle_time;
atomic_t nr_iowait;
#ifdef CONFIG_MEMBARRIER
int membarrier_state;
#endif
#ifdef CONFIG_SMP
struct root_domain *rd;
struct sched_domain __rcu *sd;
unsigned long cpu_capacity;
unsigned long cpu_capacity_orig;
struct callback_head *balance_callback;
unsigned char idle_balance;
unsigned long misfit_task_load;
/* For active balancing */
int active_balance;
int push_cpu;
struct cpu_stop_work active_balance_work;
/* CPU of this runqueue: */
int cpu;
int online;
struct list_head cfs_tasks;
struct sched_avg avg_rt;
struct sched_avg avg_dl;
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
struct sched_avg avg_irq;
#endif
u64 idle_stamp;
u64 avg_idle;
/* This is used to determine avg_idle's max value */
u64 max_idle_balance_cost;
#endif
#ifdef CONFIG_SCHED_WALT
struct walt_rq wrq;
#endif /* CONFIG_SCHED_WALT */
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time;
#endif
#ifdef CONFIG_PARAVIRT
u64 prev_steal_time;
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
u64 prev_steal_time_rq;
#endif
/* calc_load related fields */
unsigned long calc_load_update;
long calc_load_active;
#ifdef CONFIG_SCHED_HRTICK
#ifdef CONFIG_SMP
int hrtick_csd_pending;
call_single_data_t hrtick_csd;
#endif
struct hrtimer hrtick_timer;
#endif
#ifdef CONFIG_SCHEDSTATS
/* latency stats */
struct sched_info rq_sched_info;
unsigned long long rq_cpu_time;
/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
/* sys_sched_yield() stats */
unsigned int yld_count;
/* schedule() stats */
unsigned int sched_count;
unsigned int sched_goidle;
/* try_to_wake_up() stats */
unsigned int ttwu_count;
unsigned int ttwu_local;
#endif
#ifdef CONFIG_SMP
struct llist_head wake_list;
#endif
#ifdef CONFIG_CPU_IDLE
/* Must be inspected within a rcu lock section */
struct cpuidle_state *idle_state;
#ifdef CONFIG_SCHED_WALT
int idle_state_idx;
#endif
#endif
};
#ifdef CONFIG_FAIR_GROUP_SCHED
/* CPU runqueue to which this cfs_rq is attached */
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
{
return cfs_rq->rq;
}
#else
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
{
return container_of(cfs_rq, struct rq, cfs);
}
#endif
static inline int cpu_of(struct rq *rq)
{
#ifdef CONFIG_SMP
return rq->cpu;
#else
return 0;
#endif
}
#ifdef CONFIG_SCHED_SMT
extern void __update_idle_core(struct rq *rq);
static inline void update_idle_core(struct rq *rq)
{
if (static_branch_unlikely(&sched_smt_present))
__update_idle_core(rq);
}
#else
static inline void update_idle_core(struct rq *rq) { }
#endif
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() this_cpu_ptr(&runqueues)
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)
extern void update_rq_clock(struct rq *rq);
static inline u64 __rq_clock_broken(struct rq *rq)
{
return READ_ONCE(rq->clock);
}
/*
* rq::clock_update_flags bits
*
* %RQCF_REQ_SKIP - will request skipping of clock update on the next
* call to __schedule(). This is an optimisation to avoid
* neighbouring rq clock updates.
*
* %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
* in effect and calls to update_rq_clock() are being ignored.
*
* %RQCF_UPDATED - is a debug flag that indicates whether a call has been
* made to update_rq_clock() since the last time rq::lock was pinned.
*
* If inside of __schedule(), clock_update_flags will have been
* shifted left (a left shift is a cheap operation for the fast path
* to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
*
* if (rq-clock_update_flags >= RQCF_UPDATED)
*
* to check if %RQCF_UPADTED is set. It'll never be shifted more than
* one position though, because the next rq_unpin_lock() will shift it
* back.
*/
#define RQCF_REQ_SKIP 0x01
#define RQCF_ACT_SKIP 0x02
#define RQCF_UPDATED 0x04
static inline void assert_clock_updated(struct rq *rq)
{
/*
* The only reason for not seeing a clock update since the
* last rq_pin_lock() is if we're currently skipping updates.
*/
SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
}
static inline u64 rq_clock(struct rq *rq)
{
lockdep_assert_held(&rq->lock);
assert_clock_updated(rq);
return rq->clock;
}
static inline u64 rq_clock_task(struct rq *rq)
{
lockdep_assert_held(&rq->lock);
assert_clock_updated(rq);
return rq->clock_task;
}
static inline void rq_clock_skip_update(struct rq *rq)
{
lockdep_assert_held(&rq->lock);
rq->clock_update_flags |= RQCF_REQ_SKIP;
}
/*
* See rt task throttling, which is the only time a skip
* request is cancelled.
*/
static inline void rq_clock_cancel_skipupdate(struct rq *rq)
{
lockdep_assert_held(&rq->lock);
rq->clock_update_flags &= ~RQCF_REQ_SKIP;
}
struct rq_flags {
unsigned long flags;
struct pin_cookie cookie;
#ifdef CONFIG_SCHED_DEBUG
/*
* A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
* current pin context is stashed here in case it needs to be
* restored in rq_repin_lock().
*/
unsigned int clock_update_flags;
#endif
};
static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
{
rf->cookie = lockdep_pin_lock(&rq->lock);
#ifdef CONFIG_SCHED_DEBUG
rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
rf->clock_update_flags = 0;
#endif
}
static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
{
#ifdef CONFIG_SCHED_DEBUG
if (rq->clock_update_flags > RQCF_ACT_SKIP)
rf->clock_update_flags = RQCF_UPDATED;
#endif
lockdep_unpin_lock(&rq->lock, rf->cookie);
}
static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
{
lockdep_repin_lock(&rq->lock, rf->cookie);
#ifdef CONFIG_SCHED_DEBUG
/*
* Restore the value we stashed in @rf for this pin context.
*/
rq->clock_update_flags |= rf->clock_update_flags;
#endif
}
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(rq->lock);
struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(p->pi_lock)
__acquires(rq->lock);
static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
raw_spin_unlock(&rq->lock);
}
static inline void
task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
__releases(rq->lock)
__releases(p->pi_lock)
{
rq_unpin_lock(rq, rf);
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
}
static inline void
rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
raw_spin_lock_irqsave(&rq->lock, rf->flags);
rq_pin_lock(rq, rf);
}
static inline void
rq_lock_irq(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
raw_spin_lock_irq(&rq->lock);
rq_pin_lock(rq, rf);
}
static inline void
rq_lock(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
raw_spin_lock(&rq->lock);
rq_pin_lock(rq, rf);
}
static inline void
rq_relock(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
raw_spin_lock(&rq->lock);
rq_repin_lock(rq, rf);
}
static inline void
rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
}
static inline void
rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
raw_spin_unlock_irq(&rq->lock);
}
static inline void
rq_unlock(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
raw_spin_unlock(&rq->lock);
}
static inline struct rq *
this_rq_lock_irq(struct rq_flags *rf)
__acquires(rq->lock)
{
struct rq *rq;
local_irq_disable();
rq = this_rq();
rq_lock(rq, rf);
return rq;
}
#ifdef CONFIG_NUMA
enum numa_topology_type {
NUMA_DIRECT,
NUMA_GLUELESS_MESH,
NUMA_BACKPLANE,
};
extern enum numa_topology_type sched_numa_topology_type;
extern int sched_max_numa_distance;
extern bool find_numa_distance(int distance);
extern void sched_init_numa(void);
extern void sched_domains_numa_masks_set(unsigned int cpu);
extern void sched_domains_numa_masks_clear(unsigned int cpu);
extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
#else
static inline void sched_init_numa(void) { }
static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
{
return nr_cpu_ids;
}
#endif
#ifdef CONFIG_NUMA_BALANCING
/* The regions in numa_faults array from task_struct */
enum numa_faults_stats {
NUMA_MEM = 0,
NUMA_CPU,
NUMA_MEMBUF,
NUMA_CPUBUF
};
extern void sched_setnuma(struct task_struct *p, int node);
extern int migrate_task_to(struct task_struct *p, int cpu);
extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
#else
static inline void
init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
{
}
#endif /* CONFIG_NUMA_BALANCING */
extern int migrate_swap(struct task_struct *p, struct task_struct *t,
int cpu, int scpu);
#ifdef CONFIG_SMP
static inline void
queue_balance_callback(struct rq *rq,
struct callback_head *head,
void (*func)(struct rq *rq))
{
lockdep_assert_held(&rq->lock);
if (unlikely(head->next))
return;
head->func = (void (*)(struct callback_head *))func;
head->next = rq->balance_callback;
rq->balance_callback = head;
}
extern void sched_ttwu_pending(void);
#define rcu_dereference_check_sched_domain(p) \
rcu_dereference_check((p), \
lockdep_is_held(&sched_domains_mutex))
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
* See destroy_sched_domains: call_rcu for details.
*
* The domain tree of any CPU may only be accessed from within
* preempt-disabled sections.
*/
#define for_each_domain(cpu, __sd) \
for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
__sd; __sd = __sd->parent)
/**
* highest_flag_domain - Return highest sched_domain containing flag.
* @cpu: The CPU whose highest level of sched domain is to
* be returned.
* @flag: The flag to check for the highest sched_domain
* for the given CPU.
*
* Returns the highest sched_domain of a CPU which contains the given flag.
*/
static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
{
struct sched_domain *sd, *hsd = NULL;
for_each_domain(cpu, sd) {
if (!(sd->flags & flag))
break;
hsd = sd;
}
return hsd;
}
static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
{
struct sched_domain *sd;
for_each_domain(cpu, sd) {
if (sd->flags & flag)
break;
}
return sd;
}
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
DECLARE_PER_CPU(int, sd_llc_size);
DECLARE_PER_CPU(int, sd_llc_id);
DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
extern struct static_key_false sched_asym_cpucapacity;
struct sched_group_capacity {
atomic_t ref;
/*
* CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
* for a single CPU.
*/
unsigned long capacity;
unsigned long min_capacity; /* Min per-CPU capacity in group */
unsigned long max_capacity; /* Max per-CPU capacity in group */
unsigned long next_update;
int imbalance; /* XXX unrelated to capacity but shared group state */
#ifdef CONFIG_SCHED_DEBUG
int id;
#endif
unsigned long cpumask[0]; /* Balance mask */
};
struct sched_group {
struct sched_group *next; /* Must be a circular list */
atomic_t ref;
unsigned int group_weight;
struct sched_group_capacity *sgc;
int asym_prefer_cpu; /* CPU of highest priority in group */
/*
* The CPUs this group covers.
*
* NOTE: this field is variable length. (Allocated dynamically
* by attaching extra space to the end of the structure,
* depending on how many CPUs the kernel has booted up with)
*/
unsigned long cpumask[0];
};
static inline struct cpumask *sched_group_span(struct sched_group *sg)
{
return to_cpumask(sg->cpumask);
}
/*
* See build_balance_mask().
*/
static inline struct cpumask *group_balance_mask(struct sched_group *sg)
{
return to_cpumask(sg->sgc->cpumask);
}
/**
* group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
* @group: The group whose first CPU is to be returned.
*/
static inline unsigned int group_first_cpu(struct sched_group *group)
{
return cpumask_first(sched_group_span(group));
}
extern int group_balance_cpu(struct sched_group *sg);
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
void register_sched_domain_sysctl(void);
void dirty_sched_domain_sysctl(int cpu);
void unregister_sched_domain_sysctl(void);
#else
static inline void register_sched_domain_sysctl(void)
{
}
static inline void dirty_sched_domain_sysctl(int cpu)
{
}
static inline void unregister_sched_domain_sysctl(void)
{
}
#endif
extern int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
#else
static inline void sched_ttwu_pending(void) { }
static inline int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { return 0; }
#endif /* CONFIG_SMP */
#include "stats.h"
#include "autogroup.h"
#ifdef CONFIG_CGROUP_SCHED
/*
* Return the group to which this tasks belongs.
*
* We cannot use task_css() and friends because the cgroup subsystem
* changes that value before the cgroup_subsys::attach() method is called,
* therefore we cannot pin it and might observe the wrong value.
*
* The same is true for autogroup's p->signal->autogroup->tg, the autogroup
* core changes this before calling sched_move_task().
*
* Instead we use a 'copy' which is updated from sched_move_task() while
* holding both task_struct::pi_lock and rq::lock.
*/
static inline struct task_group *task_group(struct task_struct *p)
{
return p->sched_task_group;
}
/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
{
#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
struct task_group *tg = task_group(p);
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
p->se.cfs_rq = tg->cfs_rq[cpu];
p->se.parent = tg->se[cpu];
#endif
#ifdef CONFIG_RT_GROUP_SCHED
p->rt.rt_rq = tg->rt_rq[cpu];
p->rt.parent = tg->rt_se[cpu];
#endif
}
#else /* CONFIG_CGROUP_SCHED */
static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
static inline struct task_group *task_group(struct task_struct *p)
{
return NULL;
}
#endif /* CONFIG_CGROUP_SCHED */
static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
{
set_task_rq(p, cpu);
#ifdef CONFIG_SMP
/*
* After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
* successfully executed on another CPU. We must ensure that updates of
* per-task data have been completed by this moment.
*/
smp_wmb();
#ifdef CONFIG_THREAD_INFO_IN_TASK
WRITE_ONCE(p->cpu, cpu);
#else
WRITE_ONCE(task_thread_info(p)->cpu, cpu);
#endif
p->wake_cpu = cpu;
#endif
}
/*
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
*/
#ifdef CONFIG_SCHED_DEBUG
# include <linux/static_key.h>
# define const_debug __read_mostly
#else
# define const_debug const
#endif
#define SCHED_FEAT(name, enabled) \
__SCHED_FEAT_##name ,
enum {
#include "features.h"
__SCHED_FEAT_NR,
};
#undef SCHED_FEAT
#ifdef CONFIG_SCHED_DEBUG
/*
* To support run-time toggling of sched features, all the translation units
* (but core.c) reference the sysctl_sched_features defined in core.c.
*/
extern const_debug unsigned int sysctl_sched_features;
#ifdef CONFIG_JUMP_LABEL
#define SCHED_FEAT(name, enabled) \
static __always_inline bool static_branch_##name(struct static_key *key) \
{ \
return static_key_##enabled(key); \
}
#include "features.h"
#undef SCHED_FEAT
extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
#else /* !CONFIG_JUMP_LABEL */
#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
#endif /* CONFIG_JUMP_LABEL */
#else /* !SCHED_DEBUG */
/*
* Each translation unit has its own copy of sysctl_sched_features to allow
* constants propagation at compile time and compiler optimization based on
* features default.
*/
#define SCHED_FEAT(name, enabled) \
(1UL << __SCHED_FEAT_##name) * enabled |
static const_debug __maybe_unused unsigned int sysctl_sched_features =
#include "features.h"
0;
#undef SCHED_FEAT
#define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
#endif /* SCHED_DEBUG */
extern struct static_key_false sched_numa_balancing;
extern struct static_key_false sched_schedstats;
static inline u64 global_rt_period(void)
{
return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
}
static inline u64 global_rt_runtime(void)
{
if (sysctl_sched_rt_runtime < 0)
return RUNTIME_INF;
return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
}
static inline int task_current(struct rq *rq, struct task_struct *p)
{
return rq->curr == p;
}
static inline int task_running(struct rq *rq, struct task_struct *p)
{
#ifdef CONFIG_SMP
return p->on_cpu;
#else
return task_current(rq, p);
#endif
}
static inline int task_on_rq_queued(struct task_struct *p)
{
return p->on_rq == TASK_ON_RQ_QUEUED;
}
static inline int task_on_rq_migrating(struct task_struct *p)
{
return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
}
/*
* wake flags
*/
#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* Child wakeup after fork */
#define WF_MIGRATED 0x4 /* Internal use, task got migrated */
/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
* of tasks with abnormal "nice" values across CPUs the contribution that
* each task makes to its run queue's load is weighted according to its
* scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
* scaled version of the new time slice allocation that they receive on time
* slice expiry etc.
*/
#define WEIGHT_IDLEPRIO 3
#define WMULT_IDLEPRIO 1431655765
extern const int sched_prio_to_weight[40];
extern const u32 sched_prio_to_wmult[40];
/*
* {de,en}queue flags:
*
* DEQUEUE_SLEEP - task is no longer runnable
* ENQUEUE_WAKEUP - task just became runnable
*
* SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
* are in a known state which allows modification. Such pairs
* should preserve as much state as possible.
*
* MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
* in the runqueue.
*
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
* ENQUEUE_MIGRATED - the task was migrated during wakeup
*
*/
#define DEQUEUE_SLEEP 0x01
#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
#define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_RESTORE 0x02
#define ENQUEUE_MOVE 0x04
#define ENQUEUE_NOCLOCK 0x08
#define ENQUEUE_HEAD 0x10
#define ENQUEUE_REPLENISH 0x20
#ifdef CONFIG_SMP
#define ENQUEUE_MIGRATED 0x40
#else
#define ENQUEUE_MIGRATED 0x00
#endif
#define RETRY_TASK ((void *)-1UL)
struct sched_class {
const struct sched_class *next;
#ifdef CONFIG_UCLAMP_TASK
int uclamp_enabled;
#endif
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
/*
* Both @prev and @rf are optional and may be NULL, in which case the
* caller must already have invoked put_prev_task(rq, prev, rf).
*
* Otherwise it is the responsibility of the pick_next_task() to call
* put_prev_task() on the @prev task or something equivalent, IFF it
* returns a next task.
*
* In that case (@rf != NULL) it may return RETRY_TASK when it finds a
* higher prio class has runnable tasks.
*/
struct task_struct * (*pick_next_task)(struct rq *rq,
struct task_struct *prev,
struct rq_flags *rf);
void (*put_prev_task)(struct rq *rq, struct task_struct *p);
void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
#ifdef CONFIG_SMP
int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
#ifdef CONFIG_SCHED_WALT
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags,
int subling_count_hint);
#else
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
#endif
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
void (*task_woken)(struct rq *this_rq, struct task_struct *task);
void (*set_cpus_allowed)(struct task_struct *p,
const struct cpumask *newmask);
void (*rq_online)(struct rq *rq);
void (*rq_offline)(struct rq *rq);
#endif
void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
void (*task_fork)(struct task_struct *p);
void (*task_dead)(struct task_struct *p);
/*
* The switched_from() call is allowed to drop rq->lock, therefore we
* cannot assume the switched_from/switched_to pair is serliazed by
* rq->lock. They are however serialized by p->pi_lock.
*/
void (*switched_from)(struct rq *this_rq, struct task_struct *task);
void (*switched_to) (struct rq *this_rq, struct task_struct *task);
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
int oldprio);
unsigned int (*get_rr_interval)(struct rq *rq,
struct task_struct *task);
void (*update_curr)(struct rq *rq);
#define TASK_SET_GROUP 0
#define TASK_MOVE_GROUP 1
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p, int type);
#endif
};
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{
WARN_ON_ONCE(rq->curr != prev);
prev->sched_class->put_prev_task(rq, prev);
}
static inline void set_next_task(struct rq *rq, struct task_struct *next)
{
WARN_ON_ONCE(rq->curr != next);
next->sched_class->set_next_task(rq, next, false);
}
#ifdef CONFIG_SMP
#define sched_class_highest (&stop_sched_class)
#else
#define sched_class_highest (&dl_sched_class)
#endif
#define for_class_range(class, _from, _to) \
for (class = (_from); class != (_to); class = class->next)
#define for_each_class(class) \
for_class_range(class, sched_class_highest, NULL)
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
extern const struct sched_class rt_sched_class;
extern const struct sched_class fair_sched_class;
extern const struct sched_class idle_sched_class;
static inline bool sched_stop_runnable(struct rq *rq)
{
return rq->stop && task_on_rq_queued(rq->stop);
}
static inline bool sched_dl_runnable(struct rq *rq)
{
return rq->dl.dl_nr_running > 0;
}
static inline bool sched_rt_runnable(struct rq *rq)
{
return rq->rt.rt_queued > 0;
}
static inline bool sched_fair_runnable(struct rq *rq)
{
return rq->cfs.nr_running > 0;
}
#ifdef CONFIG_SMP
extern void update_group_capacity(struct sched_domain *sd, int cpu);
extern void trigger_load_balance(struct rq *rq);
extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
bool __cpu_overutilized(int cpu, int delta);
bool cpu_overutilized(int cpu);
#endif
#ifdef CONFIG_CPU_IDLE
static inline void idle_set_state(struct rq *rq,
struct cpuidle_state *idle_state)
{
rq->idle_state = idle_state;
}
static inline struct cpuidle_state *idle_get_state(struct rq *rq)
{
SCHED_WARN_ON(!rcu_read_lock_held());
return rq->idle_state;
}
#else
static inline void idle_set_state(struct rq *rq,
struct cpuidle_state *idle_state)
{
}
static inline struct cpuidle_state *idle_get_state(struct rq *rq)
{
return NULL;
}
#endif
#if defined(CONFIG_CPU_IDLE) && defined (CONFIG_SCHED_WALT)
static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
{
rq->idle_state_idx = idle_state_idx;
}
static inline int idle_get_state_idx(struct rq *rq)
{
WARN_ON(!rcu_read_lock_held());
return rq->idle_state_idx;
}
#else
static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
{
}
static inline int idle_get_state_idx(struct rq *rq)
{
return -1;
}
#endif
extern void schedule_idle(void);
extern void sysrq_sched_debug_show(void);
extern void sched_init_granularity(void);
extern void update_max_interval(void);
extern void init_sched_dl_class(void);
extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void);
extern void reweight_task(struct task_struct *p, int prio);
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
extern struct rt_bandwidth def_rt_bandwidth;
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
extern struct dl_bandwidth def_dl_bandwidth;
extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
#define BW_SHIFT 20
#define BW_UNIT (1 << BW_SHIFT)
#define RATIO_SHIFT 8
#define MAX_BW_BITS (64 - BW_SHIFT)
#define MAX_BW ((1ULL << MAX_BW_BITS) - 1)
unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se);
extern void post_init_entity_util_avg(struct task_struct *p);
#ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(struct rq *rq);
extern int __init sched_tick_offload_init(void);
/*
* Tick may be needed by tasks in the runqueue depending on their policy and
* requirements. If tick is needed, lets send the target an IPI to kick it out of
* nohz mode if necessary.
*/
static inline void sched_update_tick_dependency(struct rq *rq)
{
int cpu;
if (!tick_nohz_full_enabled())
return;
cpu = cpu_of(rq);
if (!tick_nohz_full_cpu(cpu))
return;
if (sched_can_stop_tick(rq))
tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
else
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
}
#else
static inline int sched_tick_offload_init(void) { return 0; }
static inline void sched_update_tick_dependency(struct rq *rq) { }
#endif
static inline void add_nr_running(struct rq *rq, unsigned count)
{
unsigned prev_nr = rq->nr_running;
sched_update_nr_prod(cpu_of(rq), count, true);
rq->nr_running = prev_nr + count;
#ifdef CONFIG_SMP
if (prev_nr < 2 && rq->nr_running >= 2) {
if (!READ_ONCE(rq->rd->overload))
WRITE_ONCE(rq->rd->overload, 1);
}
#endif
sched_update_tick_dependency(rq);
}
static inline void sub_nr_running(struct rq *rq, unsigned count)
{
sched_update_nr_prod(cpu_of(rq), count, false);
rq->nr_running -= count;
/* Check if we still need preemption */
sched_update_tick_dependency(rq);
}
extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost;
#ifdef CONFIG_SCHED_HRTICK
/*
* Use hrtick when:
* - enabled by features
* - hrtimer is actually high res
*/
static inline int hrtick_enabled(struct rq *rq)
{
if (!sched_feat(HRTICK))
return 0;
if (!cpu_active(cpu_of(rq)))
return 0;
return hrtimer_is_hres_active(&rq->hrtick_timer);
}
void hrtick_start(struct rq *rq, u64 delay);
#else
static inline int hrtick_enabled(struct rq *rq)
{
return 0;
}
#endif /* CONFIG_SCHED_HRTICK */
#ifdef CONFIG_SCHED_WALT
u64 sched_ktime_clock(void);
unsigned long
cpu_util_freq_walt(int cpu, struct walt_cpu_load *walt_load);
#else
#define sched_ravg_window TICK_NSEC
static inline u64 sched_ktime_clock(void)
{
return sched_clock();
}
#endif
#ifndef arch_scale_freq_capacity
static __always_inline
unsigned long arch_scale_freq_capacity(int cpu)
{
return SCHED_CAPACITY_SCALE;
}
#endif
#ifndef arch_scale_max_freq_capacity
struct sched_domain;
static __always_inline
unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
{
return SCHED_CAPACITY_SCALE;
}
#endif
unsigned long capacity_curr_of(int cpu);
#ifdef CONFIG_SCHED_WALT
static inline int per_task_boost(struct task_struct *p)
{
if (p->wts.boost_period) {
if (sched_clock() > p->wts.boost_expires) {
p->wts.boost_period = 0;
p->wts.boost_expires = 0;
p->wts.boost = 0;
}
}
return p->wts.boost;
}
#else
static inline int per_task_boost(struct task_struct *p)
{
return 0;
}
#endif
#ifdef CONFIG_SMP
static inline unsigned long capacity_of(int cpu)
{
return cpu_rq(cpu)->cpu_capacity;
}
static inline unsigned long capacity_orig_of(int cpu)
{
return cpu_rq(cpu)->cpu_capacity_orig;
}
static inline unsigned long task_util(struct task_struct *p)
{
#ifdef CONFIG_SCHED_WALT
return p->wts.demand_scaled;
#endif
return READ_ONCE(p->se.avg.util_avg);
}
/**
* Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
* @cpu: the CPU to get the utilization of
*
* The unit of the return value must be the one of capacity so we can compare
* the utilization with the capacity of the CPU that is available for CFS task
* (ie cpu_capacity).
*
* cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
* recent utilization of currently non-runnable tasks on a CPU. It represents
* the amount of utilization of a CPU in the range [0..capacity_orig] where
* capacity_orig is the cpu_capacity available at the highest frequency
* (arch_scale_freq_capacity()).
* The utilization of a CPU converges towards a sum equal to or less than the
* current capacity (capacity_curr <= capacity_orig) of the CPU because it is
* the running time on this CPU scaled by capacity_curr.
*
* The estimated utilization of a CPU is defined to be the maximum between its
* cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
* currently RUNNABLE on that CPU.
* This allows to properly represent the expected utilization of a CPU which
* has just got a big task running since a long sleep period. At the same time
* however it preserves the benefits of the "blocked utilization" in
* describing the potential for other tasks waking up on the same CPU.
*
* Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
* higher than capacity_orig because of unfortunate rounding in
* cfs.avg.util_avg or just after migrating tasks and new task wakeups until
* the average stabilizes with the new running time. We need to check that the
* utilization stays within the range of [0..capacity_orig] and cap it if
* necessary. Without utilization capping, a group could be seen as overloaded
* (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
* available capacity. We allow utilization to overshoot capacity_curr (but not
* capacity_orig) as it useful for predicting the capacity required after task
* migrations (scheduler-driven DVFS).
*
* Return: the (estimated) utilization for the specified CPU
*/
static inline unsigned long cpu_util(int cpu)
{
struct cfs_rq *cfs_rq;
unsigned int util;
#ifdef CONFIG_SCHED_WALT
u64 walt_cpu_util =
cpu_rq(cpu)->wrq.walt_stats.cumulative_runnable_avg_scaled;
return min_t(unsigned long, walt_cpu_util, capacity_orig_of(cpu));
#endif
cfs_rq = &cpu_rq(cpu)->cfs;
util = READ_ONCE(cfs_rq->avg.util_avg);
if (sched_feat(UTIL_EST))
util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
return min_t(unsigned long, util, capacity_orig_of(cpu));
}
static inline unsigned long cpu_util_cum(int cpu, int delta)
{
u64 util = cpu_rq(cpu)->cfs.avg.util_avg;
unsigned long capacity = capacity_orig_of(cpu);
#ifdef CONFIG_SCHED_WALT
util = cpu_rq(cpu)->wrq.cum_window_demand_scaled;
#endif
delta += util;
if (delta < 0)
return 0;
return (delta >= capacity) ? capacity : delta;
}
extern unsigned int capacity_margin_freq;
static inline unsigned long
add_capacity_margin(unsigned long cpu_capacity, int cpu)
{
cpu_capacity = cpu_capacity * capacity_margin_freq *
(100 + per_cpu(sched_load_boost, cpu));
cpu_capacity /= 100;
cpu_capacity /= SCHED_CAPACITY_SCALE;
return cpu_capacity;
}
#endif /* CONFIG_SMP */
#ifdef CONFIG_SMP
#ifdef CONFIG_PREEMPT
static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
/*
* fair double_lock_balance: Safely acquires both rq->locks in a fair
* way at the expense of forcing extra atomic operations in all
* invocations. This assures that the double_lock is acquired using the
* same underlying policy as the spinlock_t on this architecture, which
* reduces latency compared to the unfair variant below. However, it
* also adds more overhead and therefore may reduce throughput.
*/
static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
__releases(this_rq->lock)
__acquires(busiest->lock)
__acquires(this_rq->lock)
{
raw_spin_unlock(&this_rq->lock);
double_rq_lock(this_rq, busiest);
return 1;
}
#else
/*
* Unfair double_lock_balance: Optimizes throughput at the expense of
* latency by eliminating extra atomic operations when the locks are
* already in proper order on entry. This favors lower CPU-ids and will
* grant the double lock to lower CPUs over higher ids under contention,
* regardless of entry order into the function.
*/
static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
__releases(this_rq->lock)
__acquires(busiest->lock)
__acquires(this_rq->lock)
{
int ret = 0;
if (unlikely(!raw_spin_trylock(&busiest->lock))) {
if (busiest < this_rq) {
raw_spin_unlock(&this_rq->lock);
raw_spin_lock(&busiest->lock);
raw_spin_lock_nested(&this_rq->lock,
SINGLE_DEPTH_NESTING);
ret = 1;
} else
raw_spin_lock_nested(&busiest->lock,
SINGLE_DEPTH_NESTING);
}
return ret;
}
#endif /* CONFIG_PREEMPTION */
/*
* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
*/
static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
{
if (unlikely(!irqs_disabled())) {
/* printk() doesn't work well under rq->lock */
raw_spin_unlock(&this_rq->lock);
BUG_ON(1);
}
return _double_lock_balance(this_rq, busiest);
}
static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
__releases(busiest->lock)
{
raw_spin_unlock(&busiest->lock);
lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
}
static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
{
if (l1 > l2)
swap(l1, l2);
spin_lock(l1);
spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
}
static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
{
if (l1 > l2)
swap(l1, l2);
spin_lock_irq(l1);
spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
}
static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
{
if (l1 > l2)
swap(l1, l2);
raw_spin_lock(l1);
raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
}
/*
* double_rq_lock - safely lock two runqueues
*
* Note this does not disable interrupts like task_rq_lock,
* you need to do so manually before calling.
*/
static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
__acquires(rq1->lock)
__acquires(rq2->lock)
{
BUG_ON(!irqs_disabled());
if (rq1 == rq2) {
raw_spin_lock(&rq1->lock);
__acquire(rq2->lock); /* Fake it out ;) */
} else {
if (rq1 < rq2) {
raw_spin_lock(&rq1->lock);
raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
} else {
raw_spin_lock(&rq2->lock);
raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
}
}
}
/*
* double_rq_unlock - safely unlock two runqueues
*
* Note this does not restore interrupts like task_rq_unlock,
* you need to do so manually after calling.
*/
static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__releases(rq1->lock)
__releases(rq2->lock)
{
raw_spin_unlock(&rq1->lock);
if (rq1 != rq2)
raw_spin_unlock(&rq2->lock);
else
__release(rq2->lock);
}
extern void set_rq_online (struct rq *rq);
extern void set_rq_offline(struct rq *rq);
extern bool sched_smp_initialized;
/*
* task_may_not_preempt - check whether a task may not be preemptible soon
*/
extern bool task_may_not_preempt(struct task_struct *task, int cpu);
#else /* CONFIG_SMP */
/*
* double_rq_lock - safely lock two runqueues
*
* Note this does not disable interrupts like task_rq_lock,
* you need to do so manually before calling.
*/
static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
__acquires(rq1->lock)
__acquires(rq2->lock)
{
BUG_ON(!irqs_disabled());
BUG_ON(rq1 != rq2);
raw_spin_lock(&rq1->lock);
__acquire(rq2->lock); /* Fake it out ;) */
}
/*
* double_rq_unlock - safely unlock two runqueues
*
* Note this does not restore interrupts like task_rq_unlock,
* you need to do so manually after calling.
*/
static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__releases(rq1->lock)
__releases(rq2->lock)
{
BUG_ON(rq1 != rq2);
raw_spin_unlock(&rq1->lock);
__release(rq2->lock);
}
#endif
extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
#ifdef CONFIG_SCHED_DEBUG
extern bool sched_debug_enabled;
extern void print_cfs_stats(struct seq_file *m, int cpu);
extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
#ifdef CONFIG_NUMA_BALANCING
extern void
show_numa_stats(struct task_struct *p, struct seq_file *m);
extern void
print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
unsigned long tpf, unsigned long gsf, unsigned long gpf);
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
extern void init_cfs_rq(struct cfs_rq *cfs_rq);
extern void init_rt_rq(struct rt_rq *rt_rq);
extern void init_dl_rq(struct dl_rq *dl_rq);
extern void cfs_bandwidth_usage_inc(void);
extern void cfs_bandwidth_usage_dec(void);
#ifdef CONFIG_NO_HZ_COMMON
#define NOHZ_BALANCE_KICK_BIT 0
#define NOHZ_STATS_KICK_BIT 1
#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT)
#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT)
#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
extern void nohz_balance_exit_idle(struct rq *rq);
#else
static inline void nohz_balance_exit_idle(struct rq *rq) { }
#endif
#ifdef CONFIG_SMP
static inline
void __dl_update(struct dl_bw *dl_b, s64 bw)
{
struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
int i;
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
"sched RCU must be held");
for_each_cpu_and(i, rd->span, cpu_active_mask) {
struct rq *rq = cpu_rq(i);
rq->dl.extra_bw += bw;
}
}
#else
static inline
void __dl_update(struct dl_bw *dl_b, s64 bw)
{
struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
dl->extra_bw += bw;
}
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
u64 total;
u64 tick_delta;
u64 irq_start_time;
struct u64_stats_sync sync;
};
DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
/*
* Returns the irqtime minus the softirq time computed by ksoftirqd.
* Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
* and never move forward.
*/
static inline u64 irq_time_read(int cpu)
{
struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
unsigned int seq;
u64 total;
do {
seq = __u64_stats_fetch_begin(&irqtime->sync);
total = irqtime->total;
} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
return total;
}
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_CPU_FREQ
DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
* @rq: Runqueue to carry out the update for.
* @flags: Update reason flags.
*
* This function is called by the scheduler on the CPU whose utilization is
* being updated.
*
* It can only be called from RCU-sched read-side critical sections.
*
* The way cpufreq is currently arranged requires it to evaluate the CPU
* performance state (frequency/voltage) on a regular basis to prevent it from
* being stuck in a completely inadequate performance level for too long.
* That is not guaranteed to happen if the updates are only triggered from CFS
* and DL, though, because they may not be coming in if only RT tasks are
* active all the time (or there are RT tasks only).
*
* As a workaround for that issue, this function is called periodically by the
* RT sched class to trigger extra cpufreq updates to prevent it from stalling,
* but that really is a band-aid. Going forward it should be replaced with
* solutions targeted more specifically at RT tasks.
*/
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
{
struct update_util_data *data;
u64 clock;
#ifdef CONFIG_SCHED_WALT
if (!(flags & SCHED_CPUFREQ_WALT))
return;
clock = sched_ktime_clock();
#else
clock = rq_clock(rq);
#endif
data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
cpu_of(rq)));
if (data)
data->func(data, clock, flags);
}
#else
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
#ifdef CONFIG_UCLAMP_TASK
unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
/**
* uclamp_util_with - clamp @util with @rq and @p effective uclamp values.
* @rq: The rq to clamp against. Must not be NULL.
* @util: The util value to clamp.
* @p: The task to clamp against. Can be NULL if you want to clamp
* against @rq only.
*
* Clamps the passed @util to the max(@rq, @p) effective uclamp values.
*
* If sched_uclamp_used static key is disabled, then just return the util
* without any clamping since uclamp aggregation at the rq level in the fast
* path is disabled, rendering this operation a NOP.
*
* Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
* will return the correct effective uclamp value of the task even if the
* static key is disabled.
*/
static __always_inline
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
struct task_struct *p)
{
unsigned long min_util;
unsigned long max_util;
if (!static_branch_likely(&sched_uclamp_used))
return util;
min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
if (p) {
min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
max_util = max(max_util, uclamp_eff_value(p, UCLAMP_MAX));
}
/*
* Since CPU's {min,max}_util clamps are MAX aggregated considering
* RUNNABLE tasks with _different_ clamps, we can end up with an
* inversion. Fix it now when the clamps are applied.
*/
if (unlikely(min_util >= max_util))
return min_util;
return clamp(util, min_util, max_util);
}
static inline bool uclamp_boosted(struct task_struct *p)
{
return uclamp_eff_value(p, UCLAMP_MIN) > 0;
}
/*
* When uclamp is compiled in, the aggregation at rq level is 'turned off'
* by default in the fast path and only gets turned on once userspace performs
* an operation that requires it.
*
* Returns true if userspace opted-in to use uclamp and aggregation at rq level
* hence is active.
*/
static inline bool uclamp_is_used(void)
{
return static_branch_likely(&sched_uclamp_used);
}
#else /* CONFIG_UCLAMP_TASK */
static inline
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
struct task_struct *p)
{
return util;
}
static inline bool uclamp_boosted(struct task_struct *p)
{
return false;
}
static inline bool uclamp_is_used(void)
{
return false;
}
#endif /* CONFIG_UCLAMP_TASK */
#ifdef CONFIG_UCLAMP_TASK_GROUP
static inline bool uclamp_latency_sensitive(struct task_struct *p)
{
struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id);
struct task_group *tg;
if (!css)
return false;
tg = container_of(css, struct task_group, css);
return tg->latency_sensitive;
}
#else
static inline bool uclamp_latency_sensitive(struct task_struct *p)
{
return false;
}
#endif /* CONFIG_UCLAMP_TASK_GROUP */
#ifdef arch_scale_freq_capacity
# ifndef arch_scale_freq_invariant
# define arch_scale_freq_invariant() true
# endif
#else
# define arch_scale_freq_invariant() false
#endif
/**
* enum schedutil_type - CPU utilization type
* @FREQUENCY_UTIL: Utilization used to select frequency
* @ENERGY_UTIL: Utilization used during energy calculation
*
* The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
* need to be aggregated differently depending on the usage made of them. This
* enum is used within schedutil_freq_util() to differentiate the types of
* utilization expected by the callers, and adjust the aggregation accordingly.
*/
enum schedutil_type {
FREQUENCY_UTIL,
ENERGY_UTIL,
};
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
unsigned long max, enum schedutil_type type,
struct task_struct *p);
static inline unsigned long cpu_bw_dl(struct rq *rq)
{
return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
}
static inline unsigned long cpu_util_dl(struct rq *rq)
{
return READ_ONCE(rq->avg_dl.util_avg);
}
static inline unsigned long cpu_util_cfs(struct rq *rq)
{
unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
if (sched_feat(UTIL_EST)) {
util = max_t(unsigned long, util,
READ_ONCE(rq->cfs.avg.util_est.enqueued));
}
return util;
}
static inline unsigned long cpu_util_rt(struct rq *rq)
{
return READ_ONCE(rq->avg_rt.util_avg);
}
#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
unsigned long max, enum schedutil_type type,
struct task_struct *p)
{
return 0;
}
#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
static inline unsigned long cpu_util_irq(struct rq *rq)
{
return rq->avg_irq.util_avg;
}
static inline
unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
{
util *= (max - irq);
util /= max;
return util;
}
#else
static inline unsigned long cpu_util_irq(struct rq *rq)
{
return 0;
}
static inline
unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
{
return util;
}
#endif
#ifdef CONFIG_ENERGY_MODEL
#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
DECLARE_STATIC_KEY_FALSE(sched_energy_present);
static inline bool sched_energy_enabled(void)
{
return static_branch_likely(&sched_energy_present);
}
#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
#define perf_domain_span(pd) NULL
static inline bool sched_energy_enabled(void) { return false; }
#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
#ifdef CONFIG_MEMBARRIER
/*
* The scheduler provides memory barriers required by membarrier between:
* - prior user-space memory accesses and store to rq->membarrier_state,
* - store to rq->membarrier_state and following user-space memory accesses.
* In the same way it provides those guarantees around store to rq->curr.
*/
static inline void membarrier_switch_mm(struct rq *rq,
struct mm_struct *prev_mm,
struct mm_struct *next_mm)
{
int membarrier_state;
if (prev_mm == next_mm)
return;
membarrier_state = atomic_read(&next_mm->membarrier_state);
if (READ_ONCE(rq->membarrier_state) == membarrier_state)
return;
WRITE_ONCE(rq->membarrier_state, membarrier_state);
}
#else
static inline void membarrier_switch_mm(struct rq *rq,
struct mm_struct *prev_mm,
struct mm_struct *next_mm)
{
}
#endif
enum sched_boost_policy {
SCHED_BOOST_NONE,
SCHED_BOOST_ON_BIG,
SCHED_BOOST_ON_ALL,
};
#ifdef CONFIG_SCHED_WALT
#define WALT_MANY_WAKEUP_DEFAULT 1000
static inline bool walt_want_remote_wakeup(void)
{
return sysctl_sched_many_wakeup_threshold < WALT_MANY_WAKEUP_DEFAULT;
}
static inline int cluster_first_cpu(struct walt_sched_cluster *cluster)
{
return cpumask_first(&cluster->cpus);
}
struct walt_related_thread_group {
int id;
raw_spinlock_t lock;
struct list_head tasks;
struct list_head list;
bool skip_min;
struct rcu_head rcu;
u64 last_update;
u64 downmigrate_ts;
u64 start_ts;
};
extern struct walt_sched_cluster *sched_cluster[NR_CPUS];
extern unsigned int max_possible_capacity;
extern unsigned int min_max_possible_capacity;
extern unsigned int __read_mostly sched_init_task_load_windows;
extern unsigned int __read_mostly sched_load_granule;
extern int update_preferred_cluster(struct walt_related_thread_group *grp,
struct task_struct *p, u32 old_load, bool from_tick);
extern void set_preferred_cluster(struct walt_related_thread_group *grp);
extern void add_new_task_to_grp(struct task_struct *new);
#define NO_BOOST 0
#define FULL_THROTTLE_BOOST 1
#define CONSERVATIVE_BOOST 2
#define RESTRAINED_BOOST 3
#define FULL_THROTTLE_BOOST_DISABLE -1
#define CONSERVATIVE_BOOST_DISABLE -2
#define RESTRAINED_BOOST_DISABLE -3
#define MAX_NUM_BOOST_TYPE (RESTRAINED_BOOST+1)
static inline int asym_cap_siblings(int cpu1, int cpu2)
{
return (cpumask_test_cpu(cpu1, &asym_cap_sibling_cpus) &&
cpumask_test_cpu(cpu2, &asym_cap_sibling_cpus));
}
static inline bool asym_cap_sibling_group_has_capacity(int dst_cpu, int margin)
{
int sib1, sib2;
int nr_running;
unsigned long total_util, total_capacity;
if (cpumask_empty(&asym_cap_sibling_cpus) ||
cpumask_test_cpu(dst_cpu, &asym_cap_sibling_cpus))
return false;
sib1 = cpumask_first(&asym_cap_sibling_cpus);
sib2 = cpumask_last(&asym_cap_sibling_cpus);
if (!cpu_active(sib1) || cpu_isolated(sib1) ||
!cpu_active(sib2) || cpu_isolated(sib2))
return false;
nr_running = cpu_rq(sib1)->cfs.h_nr_running +
cpu_rq(sib2)->cfs.h_nr_running;
if (nr_running <= 2)
return true;
total_capacity = capacity_of(sib1) + capacity_of(sib2);
total_util = cpu_util(sib1) + cpu_util(sib2);
return ((total_capacity * 100) > (total_util * margin));
}
static inline unsigned int cpu_max_possible_freq(int cpu)
{
return cpu_rq(cpu)->wrq.cluster->max_possible_freq;
}
static inline unsigned int cpu_max_freq(int cpu)
{
return mult_frac(cpu_max_possible_freq(cpu), capacity_orig_of(cpu),
arch_scale_cpu_capacity(cpu));
}
static inline bool hmp_capable(void)
{
return max_possible_capacity != min_max_possible_capacity;
}
static inline bool is_max_capacity_cpu(int cpu)
{
return arch_scale_cpu_capacity(cpu) == max_possible_capacity;
}
static inline bool is_min_capacity_cpu(int cpu)
{
return arch_scale_cpu_capacity(cpu) == min_max_possible_capacity;
}
static inline unsigned int task_load(struct task_struct *p)
{
return p->wts.demand;
}
static inline unsigned int task_pl(struct task_struct *p)
{
return p->wts.pred_demand;
}
static inline bool task_in_related_thread_group(struct task_struct *p)
{
return (rcu_access_pointer(p->wts.grp) != NULL);
}
static inline bool task_rtg_high_prio(struct task_struct *p)
{
return task_in_related_thread_group(p) &&
(p->prio <= sysctl_walt_rtg_cfs_boost_prio);
}
static inline struct walt_related_thread_group
*task_related_thread_group(struct task_struct *p)
{
return rcu_dereference(p->wts.grp);
}
/* applying the task threshold for all types of low latency tasks. */
static inline bool walt_low_latency_task(struct task_struct *p)
{
return p->wts.low_latency &&
(task_util(p) < sysctl_walt_low_latency_task_threshold);
}
static inline bool walt_binder_low_latency_task(struct task_struct *p)
{
return (p->wts.low_latency & WALT_LOW_LATENCY_BINDER) &&
(task_util(p) < sysctl_walt_low_latency_task_threshold);
}
static inline bool walt_procfs_low_latency_task(struct task_struct *p)
{
return (p->wts.low_latency & WALT_LOW_LATENCY_PROCFS) &&
(task_util(p) < sysctl_walt_low_latency_task_threshold);
}
/* Is frequency of two cpus synchronized with each other? */
static inline int same_freq_domain(int src_cpu, int dst_cpu)
{
struct rq *rq = cpu_rq(src_cpu);
if (src_cpu == dst_cpu)
return 1;
if (asym_cap_siblings(src_cpu, dst_cpu))
return 1;
return cpumask_test_cpu(dst_cpu, &rq->wrq.freq_domain_cpumask);
}
#define CPU_RESERVED 1
extern enum sched_boost_policy boost_policy;
static inline enum sched_boost_policy sched_boost_policy(void)
{
return boost_policy;
}
extern unsigned int sched_boost_type;
static inline int sched_boost(void)
{
return sched_boost_type;
}
static inline bool rt_boost_on_big(void)
{
return sched_boost() == FULL_THROTTLE_BOOST ?
(sched_boost_policy() == SCHED_BOOST_ON_BIG) : false;
}
static inline bool is_full_throttle_boost(void)
{
return sched_boost() == FULL_THROTTLE_BOOST;
}
extern int preferred_cluster(struct walt_sched_cluster *cluster,
struct task_struct *p);
extern struct walt_sched_cluster *rq_cluster(struct rq *rq);
#ifdef CONFIG_UCLAMP_TASK_GROUP
static inline bool task_sched_boost(struct task_struct *p)
{
struct cgroup_subsys_state *css;
struct task_group *tg;
bool sched_boost_enabled;
rcu_read_lock();
css = task_css(p, cpu_cgrp_id);
if (!css) {
rcu_read_unlock();
return false;
}
tg = container_of(css, struct task_group, css);
sched_boost_enabled = tg->wtg.sched_boost_enabled;
rcu_read_unlock();
return sched_boost_enabled;
}
extern int sync_cgroup_colocation(struct task_struct *p, bool insert);
#else
static inline bool
same_schedtg(struct task_struct *tsk1, struct task_struct *tsk2)
{
return true;
}
static inline bool task_sched_boost(struct task_struct *p)
{
return true;
}
#endif
extern int alloc_related_thread_groups(void);
extern void check_for_migration(struct rq *rq, struct task_struct *p);
static inline int is_reserved(int cpu)
{
struct rq *rq = cpu_rq(cpu);
return test_bit(CPU_RESERVED, &rq->wrq.walt_flags);
}
static inline int mark_reserved(int cpu)
{
struct rq *rq = cpu_rq(cpu);
return test_and_set_bit(CPU_RESERVED, &rq->wrq.walt_flags);
}
static inline void clear_reserved(int cpu)
{
struct rq *rq = cpu_rq(cpu);
clear_bit(CPU_RESERVED, &rq->wrq.walt_flags);
}
static inline bool
task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
{
return cpu_of(rq) == task_cpu(p) && (p->on_rq ||
p->wts.last_sleep_ts >= rq->wrq.window_start);
}
static inline void walt_fixup_cum_window_demand(struct rq *rq, s64 scaled_delta)
{
rq->wrq.cum_window_demand_scaled += scaled_delta;
if (unlikely((s64)rq->wrq.cum_window_demand_scaled < 0))
rq->wrq.cum_window_demand_scaled = 0;
}
extern unsigned long thermal_cap(int cpu);
extern void clear_walt_request(int cpu);
extern enum sched_boost_policy sched_boost_policy(void);
extern void sched_boost_parse_dt(void);
extern void clear_ed_task(struct task_struct *p, struct rq *rq);
extern bool early_detection_notify(struct rq *rq, u64 wallclock);
void note_task_waking(struct task_struct *p, u64 wallclock);
static inline bool task_placement_boost_enabled(struct task_struct *p)
{
if (likely(sched_boost_policy() == SCHED_BOOST_NONE))
return false;
return task_sched_boost(p);
}
static inline enum sched_boost_policy task_boost_policy(struct task_struct *p)
{
enum sched_boost_policy policy;
if (likely(sched_boost_policy() == SCHED_BOOST_NONE))
return SCHED_BOOST_NONE;
policy = task_sched_boost(p) ? sched_boost_policy() : SCHED_BOOST_NONE;
if (policy == SCHED_BOOST_ON_BIG) {
/*
* Filter out tasks less than min task util threshold
* under conservative boost.
*/
if (sched_boost() == CONSERVATIVE_BOOST &&
task_util(p) <= sysctl_sched_min_task_util_for_boost)
policy = SCHED_BOOST_NONE;
}
return policy;
}
static inline bool is_min_capacity_cluster(struct walt_sched_cluster *cluster)
{
return is_min_capacity_cpu(cluster_first_cpu(cluster));
}
#else /* CONFIG_SCHED_WALT */
struct walt_sched_stats;
struct walt_related_thread_group;
struct walt_sched_cluster;
static inline bool task_sched_boost(struct task_struct *p)
{
return false;
}
static inline bool task_placement_boost_enabled(struct task_struct *p)
{
return false;
}
static inline void check_for_migration(struct rq *rq, struct task_struct *p) { }
static inline int sched_boost(void)
{
return 0;
}
static inline bool rt_boost_on_big(void)
{
return false;
}
static inline bool is_full_throttle_boost(void)
{
return false;
}
static inline enum sched_boost_policy task_boost_policy(struct task_struct *p)
{
return SCHED_BOOST_NONE;
}
static inline bool
task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
{
return false;
}
static inline bool hmp_capable(void) { return false; }
static inline bool is_max_capacity_cpu(int cpu) { return true; }
static inline bool is_min_capacity_cpu(int cpu) { return true; }
static inline int
preferred_cluster(struct walt_sched_cluster *cluster, struct task_struct *p)
{
return -1;
}
static inline struct walt_sched_cluster *rq_cluster(struct rq *rq)
{
return NULL;
}
static inline int asym_cap_siblings(int cpu1, int cpu2) { return 0; }
static inline bool asym_cap_sibling_group_has_capacity(int dst_cpu, int margin)
{
return false;
}
static inline void
set_preferred_cluster(struct walt_related_thread_group *grp) { }
static inline bool task_in_related_thread_group(struct task_struct *p)
{
return false;
}
static inline struct walt_related_thread_group *task_related_thread_group(
struct task_struct *p)
{
return NULL;
}
static inline u32 task_load(struct task_struct *p) { return 0; }
static inline u32 task_pl(struct task_struct *p) { return 0; }
static inline int
update_preferred_cluster(struct walt_related_thread_group *grp,
struct task_struct *p, u32 old_load, bool from_tick)
{
return 0;
}
static inline void add_new_task_to_grp(struct task_struct *new) {}
static inline int same_freq_domain(int src_cpu, int dst_cpu)
{
return 1;
}
static inline int mark_reserved(int cpu)
{
return 0;
}
static inline void clear_reserved(int cpu) { }
static inline int alloc_related_thread_groups(void) { return 0; }
static inline void walt_fixup_cum_window_demand(struct rq *rq,
s64 scaled_delta) { }
#ifdef CONFIG_SMP
static inline unsigned long thermal_cap(int cpu)
{
return cpu_rq(cpu)->cpu_capacity_orig;
}
#endif
static inline void clear_walt_request(int cpu) { }
static inline int is_reserved(int cpu)
{
return 0;
}
static inline enum sched_boost_policy sched_boost_policy(void)
{
return SCHED_BOOST_NONE;
}
static inline void sched_boost_parse_dt(void) { }
static inline void clear_ed_task(struct task_struct *p, struct rq *rq) { }
static inline bool early_detection_notify(struct rq *rq, u64 wallclock)
{
return 0;
}
static inline void note_task_waking(struct task_struct *p, u64 wallclock) { }
static inline bool walt_want_remote_wakeup(void)
{
return false;
}
#endif /* CONFIG_SCHED_WALT */
struct sched_avg_stats {
int nr;
int nr_misfit;
int nr_max;
int nr_scaled;
};
extern void sched_get_nr_running_avg(struct sched_avg_stats *stats);
#ifdef CONFIG_SMP
#ifdef CONFIG_SCHED_WALT
extern int group_balance_cpu_not_isolated(struct sched_group *sg);
#else
static inline int group_balance_cpu_not_isolated(struct sched_group *sg)
{
return group_balance_cpu(sg);
}
#endif /* CONFIG_SCHED_WALT */
#endif /* CONFIG_SMP */
extern int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu,
int sync, int sibling_count_hint);
extern int active_load_balance_cpu_stop(void *data);
#ifdef CONFIG_HOTPLUG_CPU
extern void set_rq_online(struct rq *rq);
extern void set_rq_offline(struct rq *rq);
extern void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf,
bool migrate_pinned_tasks);
extern void calc_load_migrate(struct rq *rq);
#ifdef CONFIG_SCHED_WALT
extern void
detach_one_task_core(struct task_struct *p, struct rq *rq,
struct list_head *tasks);
extern void attach_tasks_core(struct list_head *tasks, struct rq *rq);
#else
static inline void
detach_one_task_core(struct task_struct *p, struct rq *rq,
struct list_head *tasks)
{
}
static inline void attach_tasks_core(struct list_head *tasks, struct rq *rq) {}
#endif
#endif
extern struct task_struct *find_process_by_pid(pid_t pid);
extern void enqueue_task_core(struct rq *rq, struct task_struct *p, int flags);
extern void dequeue_task_core(struct rq *rq, struct task_struct *p, int flags);
#if defined(CONFIG_SCHED_WALT) && defined(CONFIG_UCLAMP_TASK_GROUP)
extern void walt_init_sched_boost(struct task_group *tg);
#else
static inline void walt_init_sched_boost(struct task_group *tg) {}
#endif
#ifdef CONFIG_SCHED_WALT
static inline void walt_irq_work_queue(struct irq_work *work)
{
if (likely(cpu_online(raw_smp_processor_id())))
irq_work_queue(work);
else
irq_work_queue_on(work, cpumask_any(cpu_online_mask));
}
#else
static inline void walt_irq_work_queue(struct irq_work *work)
{
irq_work_queue(work);
}
#endif