29949ccfbb
https://source.android.com/docs/security/bulletin/2023-08-01 CVE-2023-21264 CVE-2020-29374 * tag 'ASB-2023-08-05_11-5.4' of https://android.googlesource.com/kernel/common: UPSTREAM: media: dvb-core: Fix kernel WARNING for blocking operation in wait_event*() ANDROID: ABI: Update allowed list for QCOM UPSTREAM: usb: gadget: udc: renesas_usb3: Fix use after free bug in renesas_usb3_remove due to race condition UPSTREAM: x86/mm: Avoid using set_pgd() outside of real PGD pages UPSTREAM: net/sched: flower: fix possible OOB write in fl_set_geneve_opt() Linux 5.4.249 xfs: verify buffer contents when we skip log replay mm: make wait_on_page_writeback() wait for multiple pending writebacks mm: fix VM_BUG_ON(PageTail) and BUG_ON(PageWriteback) i2c: imx-lpi2c: fix type char overflow issue when calculating the clock cycle x86/apic: Fix kernel panic when booting with intremap=off and x2apic_phys drm/radeon: fix race condition UAF in radeon_gem_set_domain_ioctl drm/exynos: fix race condition UAF in exynos_g2d_exec_ioctl drm/exynos: vidi: fix a wrong error return ARM: dts: Fix erroneous ADS touchscreen polarities ASoC: nau8824: Add quirk to active-high jack-detect s390/cio: unregister device when the only path is gone usb: gadget: udc: fix NULL dereference in remove() nfcsim.c: Fix error checking for debugfs_create_dir media: cec: core: don't set last_initiator if tx in progress arm64: Add missing Set/Way CMO encodings HID: wacom: Add error check to wacom_parse_and_register() scsi: target: iscsi: Prevent login threads from racing between each other sch_netem: acquire qdisc lock in netem_change() Revert "net: phy: dp83867: perform soft reset and retain established link" netfilter: nfnetlink_osf: fix module autoload netfilter: nf_tables: disallow element updates of bound anonymous sets be2net: Extend xmit workaround to BE3 chip net: dsa: mt7530: fix trapping frames on non-MT7621 SoC MT7530 switch ipvs: align inner_mac_header for encapsulation mmc: usdhi60rol0: fix deferred probing mmc: sh_mmcif: fix deferred probing mmc: sdhci-acpi: fix deferred probing mmc: omap_hsmmc: fix deferred probing mmc: omap: fix deferred probing mmc: mvsdio: fix deferred probing mmc: mvsdio: convert to devm_platform_ioremap_resource mmc: mtk-sd: fix deferred probing net: qca_spi: Avoid high load if QCA7000 is not available xfrm: Linearize the skb after offloading if needed. ieee802154: hwsim: Fix possible memory leaks rcu: Upgrade rcu_swap_protected() to rcu_replace_pointer() x86/mm: Avoid using set_pgd() outside of real PGD pages cifs: Fix potential deadlock when updating vol in cifs_reconnect() cifs: Merge is_path_valid() into get_normalized_path() cifs: Introduce helpers for finding TCP connection cifs: Get rid of kstrdup_const()'d paths cifs: Clean up DFS referral cache nilfs2: prevent general protection fault in nilfs_clear_dirty_page() writeback: fix dereferencing NULL mapping->host on writeback_page_template ip_tunnels: allow VXLAN/GENEVE to inherit TOS/TTL from VLAN mmc: meson-gx: remove redundant mmc_request_done() call from irq context cgroup: Do not corrupt task iteration when rebinding subsystem PCI: hv: Fix a race condition bug in hv_pci_query_relations() Drivers: hv: vmbus: Fix vmbus_wait_for_unload() to scan present CPUs nilfs2: fix buffer corruption due to concurrent device reads media: dvb-core: Fix use-after-free due to race at dvb_register_device() media: dvbdev: fix error logic at dvb_register_device() media: dvbdev: Fix memleak in dvb_register_device tick/common: Align tick period during sched_timer setup x86/purgatory: remove PGO flags tracing: Add tracing_reset_all_online_cpus_unlocked() function epoll: ep_autoremove_wake_function should use list_del_init_careful list: add "list_del_init_careful()" to go with "list_empty_careful()" mm: rewrite wait_on_page_bit_common() logic nilfs2: reject devices with insufficient block count Revert "neighbour: Replace zero-length array with flexible-array member" Revert "neighbour: fix unaligned access to pneigh_entry" Revert "tcp: deny tcp_disconnect() when threads are waiting" Linux 5.4.248 mmc: block: ensure error propagation for non-blk drm/nouveau/kms: Fix NULL pointer dereference in nouveau_connector_detect_depth neighbour: delete neigh_lookup_nodev as not used net: Remove unused inline function dst_hold_and_use() neighbour: Remove unused inline function neigh_key_eq16() afs: Fix vlserver probe RTT handling selftests/ptp: Fix timestamp printf format for PTP_SYS_OFFSET net: tipc: resize nlattr array to correct size net: lapbether: only support ethernet devices net/sched: cls_api: Fix lockup on flushing explicitly created chain drm/nouveau: add nv_encoder pointer check for NULL drm/nouveau/kms: Don't change EDID when it hasn't actually changed drm/nouveau/dp: check for NULL nv_connector->native_mode igb: fix nvm.ops.read() error handling sctp: fix an error code in sctp_sf_eat_auth() ipvlan: fix bound dev checking for IPv6 l3s mode IB/isert: Fix incorrect release of isert connection IB/isert: Fix possible list corruption in CMA handler IB/isert: Fix dead lock in ib_isert IB/uverbs: Fix to consider event queue closing also upon non-blocking mode iavf: remove mask from iavf_irq_enable_queues() RDMA/rxe: Fix the use-before-initialization error of resp_pkts RDMA/rxe: Removed unused name from rxe_task struct RDMA/rxe: Remove the unused variable obj net/sched: cls_u32: Fix reference counter leak leading to overflow ping6: Fix send to link-local addresses with VRF. netfilter: nfnetlink: skip error delivery on batch in case of ENOMEM spi: fsl-dspi: avoid SCK glitches with continuous transfers spi: spi-fsl-dspi: Remove unused chip->void_write_data usb: dwc3: gadget: Reset num TRBs before giving back the request serial: lantiq: add missing interrupt ack USB: serial: option: add Quectel EM061KGL series Remove DECnet support from kernel ALSA: hda/realtek: Add a quirk for Compaq N14JP6 net: usb: qmi_wwan: add support for Compal RXM-G1 RDMA/uverbs: Restrict usage of privileged QKEYs nouveau: fix client work fence deletion race powerpc/purgatory: remove PGO flags kexec: support purgatories with .text.hot sections nilfs2: fix possible out-of-bounds segment allocation in resize ioctl nilfs2: fix incomplete buffer cleanup in nilfs_btnode_abort_change_key() nios2: dts: Fix tse_mac "max-frame-size" property ocfs2: check new file size on fallocate call ocfs2: fix use-after-free when unmounting read-only filesystem drm:amd:amdgpu: Fix missing buffer object unlock in failure path xen/blkfront: Only check REQ_FUA for writes mips: Move initrd_start check after initrd address sanitisation. MIPS: Alchemy: fix dbdma2 parisc: Flush gatt writes and adjust gatt mask in parisc_agp_mask_memory() parisc: Improve cache flushing for PCXL in arch_sync_dma_for_cpu() btrfs: handle memory allocation failure in btrfs_csum_one_bio power: supply: Fix logic checking if system is running from battery irqchip/meson-gpio: Mark OF related data as maybe unused regulator: Fix error checking for debugfs_create_dir platform/x86: asus-wmi: Ignore WMI events with codes 0x7B, 0xC0 power: supply: Ratelimit no data debug output ARM: dts: vexpress: add missing cache properties power: supply: bq27xxx: Use mod_delayed_work() instead of cancel() + schedule() power: supply: sc27xx: Fix external_power_changed race power: supply: ab8500: Fix external_power_changed race s390/dasd: Use correct lock while counting channel queue length dasd: refactor dasd_ioctl_information KEYS: asymmetric: Copy sig and digest in public_key_verify_signature() test_firmware: fix a memory leak with reqs buffer Revert "firmware: arm_sdei: Fix sleep from invalid context BUG" Revert "PM: domains: Fix up terminology with parent/child" Revert "PM: domains: Restore comment indentation for generic_pm_domain.child_links" Revert "scripts/gdb: bail early if there are no generic PD" Revert "uapi/linux/const.h: prefer ISO-friendly __typeof__" Revert "netfilter: nf_tables: don't write table validation state without mutex" Linux 5.4.247 Revert "staging: rtl8192e: Replace macro RTL_PCI_DEVICE with PCI_DEVICE" mtd: spinand: macronix: Add support for MX35LFxGE4AD btrfs: unset reloc control if transaction commit fails in prepare_to_relocate() btrfs: check return value of btrfs_commit_transaction in relocation rbd: get snapshot context after exclusive lock is ensured to be held drm/atomic: Don't pollute crtc_state->mode_blob with error pointers cifs: handle empty list of targets in cifs_reconnect() cifs: get rid of unused parameter in reconn_setup_dfs_targets() ext4: only check dquot_initialize_needed() when debugging eeprom: at24: also select REGMAP i2c: sprd: Delete i2c adapter in .remove's error path bonding (gcc13): synchronize bond_{a,t}lb_xmit() types usb: usbfs: Use consistent mmap functions usb: usbfs: Enforce page requirements for mmap pinctrl: meson-axg: add missing GPIOA_18 gpio group rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting Bluetooth: Fix use-after-free in hci_remove_ltk/hci_remove_irk ceph: fix use-after-free bug for inodes when flushing capsnaps can: j1939: avoid possible use-after-free when j1939_can_rx_register fails can: j1939: change j1939_netdev_lock type to mutex can: j1939: j1939_sk_send_loop_abort(): improved error queue handling in J1939 Socket drm/amdgpu: fix xclk freq on CHIP_STONEY ALSA: hda/realtek: Add Lenovo P3 Tower platform ALSA: hda/realtek: Add a quirk for HP Slim Desktop S01 Input: psmouse - fix OOB access in Elantech protocol Input: xpad - delete a Razer DeathAdder mouse VID/PID entry batman-adv: Broken sync while rescheduling delayed work bnxt_en: Query default VLAN before VNIC setup on a VF lib: cpu_rmap: Fix potential use-after-free in irq_cpu_rmap_release() net: sched: fix possible refcount leak in tc_chain_tmplt_add() net: sched: move rtm_tca_policy declaration to include file rfs: annotate lockless accesses to RFS sock flow table rfs: annotate lockless accesses to sk->sk_rxhash netfilter: ipset: Add schedule point in call_ad(). netfilter: conntrack: fix NULL pointer dereference in nf_confirm_cthelper Bluetooth: L2CAP: Add missing checks for invalid DCID Bluetooth: Fix l2cap_disconnect_req deadlock net: dsa: lan9303: allow vid != 0 in port_fdb_{add|del} methods neighbour: fix unaligned access to pneigh_entry neighbour: Replace zero-length array with flexible-array member spi: qup: Request DMA before enabling clocks i40e: fix build warnings in i40e_alloc.h i40iw: fix build warning in i40iw_manage_apbvt() block/blk-iocost (gcc13): keep large values in a new enum blk-iocost: avoid 64-bit division in ioc_timer_fn Linux 5.4.246 drm/edid: fix objtool warning in drm_cvt_modes() wifi: rtlwifi: 8192de: correct checking of IQK reload drm/edid: Fix uninitialized variable in drm_cvt_modes() RDMA/bnxt_re: Remove the qp from list only if the qp destroy succeeds RDMA/bnxt_re: Remove set but not used variable 'dev_attr' scsi: dpt_i2o: Do not process completions with invalid addresses scsi: dpt_i2o: Remove broken pass-through ioctl (I2OUSERCMD) regmap: Account for register length when chunking test_firmware: fix the memory leak of the allocated firmware buffer fbcon: Fix null-ptr-deref in soft_cursor ext4: add lockdep annotations for i_data_sem for ea_inode's ext4: disallow ea_inodes with extended attributes ext4: set lockdep subclass for the ea_inode in ext4_xattr_inode_cache_find() ext4: add EA_INODE checking to ext4_iget() tracing/probe: trace_probe_primary_from_call(): checked list_first_entry selinux: don't use make's grouped targets feature yet tty: serial: fsl_lpuart: use UARTCTRL_TXINV to send break instead of UARTCTRL_SBK mmc: vub300: fix invalid response handling wifi: rtlwifi: remove always-true condition pointed out by GCC 12 lib/dynamic_debug.c: use address-of operator on section symbols treewide: Remove uninitialized_var() usage kernel/extable.c: use address-of operator on section symbols eth: sun: cassini: remove dead code gcc-12: disable '-Wdangling-pointer' warning for now ACPI: thermal: drop an always true check x86/boot: Wrap literal addresses in absolute_pointer() flow_dissector: work around stack frame size warning ata: libata-scsi: Use correct device no in ata_find_dev() scsi: stex: Fix gcc 13 warnings misc: fastrpc: reject new invocations during device removal misc: fastrpc: return -EPIPE to invocations on device removal usb: gadget: f_fs: Add unbind event before functionfs_unbind net: usb: qmi_wwan: Set DTR quirk for BroadMobi BM818 iio: dac: build ad5758 driver when AD5758 is selected iio: dac: mcp4725: Fix i2c_master_send() return value handling iio: light: vcnl4035: fixed chip ID check HID: wacom: avoid integer overflow in wacom_intuos_inout() HID: google: add jewel USB id iio: adc: mxs-lradc: fix the order of two cleanup operations mailbox: mailbox-test: fix a locking issue in mbox_test_message_write() atm: hide unused procfs functions ALSA: oss: avoid missing-prototype warnings netfilter: conntrack: define variables exp_nat_nla_policy and any_addr with CONFIG_NF_NAT wifi: b43: fix incorrect __packed annotation scsi: core: Decrease scsi_device's iorequest_cnt if dispatch failed arm64/mm: mark private VM_FAULT_X defines as vm_fault_t ARM: dts: stm32: add pin map for CAN controller on stm32f7 wifi: rtl8xxxu: fix authentication timeout due to incorrect RCR value media: dvb-core: Fix use-after-free due to race condition at dvb_ca_en50221 media: dvb-core: Fix kernel WARNING for blocking operation in wait_event*() media: dvb-core: Fix use-after-free due on race condition at dvb_net media: mn88443x: fix !CONFIG_OF error by drop of_match_ptr from ID table media: ttusb-dec: fix memory leak in ttusb_dec_exit_dvb() media: dvb_ca_en50221: fix a size write bug media: netup_unidvb: fix irq init by register it at the end of probe media: dvb-usb: dw2102: fix uninit-value in su3000_read_mac_address media: dvb-usb: digitv: fix null-ptr-deref in digitv_i2c_xfer() media: dvb-usb-v2: rtl28xxu: fix null-ptr-deref in rtl28xxu_i2c_xfer media: dvb-usb-v2: ce6230: fix null-ptr-deref in ce6230_i2c_master_xfer() media: dvb-usb-v2: ec168: fix null-ptr-deref in ec168_i2c_xfer() media: dvb-usb: az6027: fix three null-ptr-deref in az6027_i2c_xfer() media: dvb_demux: fix a bug for the continuity counter ASoC: ssm2602: Add workaround for playback distortions xfrm: Check if_id in inbound policy/secpath match ASoC: dwc: limit the number of overrun messages nbd: Fix debugfs_create_dir error checking fbdev: stifb: Fix info entry in sti_struct on error path fbdev: modedb: Add 1920x1080 at 60 Hz video mode media: rcar-vin: Select correct interrupt mode for V4L2_FIELD_ALTERNATE ARM: 9295/1: unwind:fix unwind abort for uleb128 case mailbox: mailbox-test: Fix potential double-free in mbox_test_message_write() watchdog: menz069_wdt: fix watchdog initialisation mtd: rawnand: marvell: don't set the NAND frequency select mtd: rawnand: marvell: ensure timing values are written net: dsa: mv88e6xxx: Increase wait after reset deactivation net/sched: flower: fix possible OOB write in fl_set_geneve_opt() udp6: Fix race condition in udp6_sendmsg & connect net/netlink: fix NETLINK_LIST_MEMBERSHIPS length report ocfs2/dlm: move BITS_TO_BYTES() to bitops.h for wider use net: sched: fix NULL pointer dereference in mq_attach net/sched: Prohibit regrafting ingress or clsact Qdiscs net/sched: Reserve TC_H_INGRESS (TC_H_CLSACT) for ingress (clsact) Qdiscs net/sched: sch_clsact: Only create under TC_H_CLSACT net/sched: sch_ingress: Only create under TC_H_INGRESS tcp: Return user_mss for TCP_MAXSEG in CLOSE/LISTEN state if user_mss set tcp: deny tcp_disconnect() when threads are waiting af_packet: do not use READ_ONCE() in packet_bind() mtd: rawnand: ingenic: fix empty stub helper definitions amd-xgbe: fix the false linkup in xgbe_phy_status af_packet: Fix data-races of pkt_sk(sk)->num. netrom: fix info-leak in nr_write_internal() net/mlx5: fw_tracer, Fix event handling dmaengine: pl330: rename _start to prevent build error iommu/amd: Don't block updates to GATag if guest mode is on iommu/rockchip: Fix unwind goto issue RDMA/bnxt_re: Fix return value of bnxt_re_process_raw_qp_pkt_rx RDMA/bnxt_re: Refactor queue pair creation code RDMA/bnxt_re: Enable SRIOV VF support on Broadcom's 57500 adapter series RDMA/efa: Fix unsupported page sizes in device Linux 5.4.245 netfilter: ctnetlink: Support offloaded conntrack entry deletion ipv{4,6}/raw: fix output xfrm lookup wrt protocol binder: fix UAF caused by faulty buffer cleanup bluetooth: Add cmd validity checks at the start of hci_sock_ioctl() io_uring: have io_kill_timeout() honor the request references io_uring: don't drop completion lock before timer is fully initialized io_uring: always grab lock in io_cancel_async_work() cdc_ncm: Fix the build warning net/mlx5: Devcom, serialize devcom registration net/mlx5: devcom only supports 2 ports fs: fix undefined behavior in bit shift for SB_NOUSER power: supply: bq24190: Call power_supply_changed() after updating input current power: supply: core: Refactor power_supply_set_input_current_limit_from_supplier() power: supply: bq27xxx: After charger plug in/out wait 0.5s for things to stabilize net: cdc_ncm: Deal with too low values of dwNtbOutMaxSize cdc_ncm: Implement the 32-bit version of NCM Transfer Block Linux 5.4.244 3c589_cs: Fix an error handling path in tc589_probe() net/mlx5: Devcom, fix error flow in mlx5_devcom_register_device net/mlx5: Fix error message when failing to allocate device memory forcedeth: Fix an error handling path in nv_probe() ASoC: Intel: Skylake: Fix declaration of enum skl_ch_cfg x86/show_trace_log_lvl: Ensure stack pointer is aligned, again xen/pvcalls-back: fix double frees with pvcalls_new_active_socket() coresight: Fix signedness bug in tmc_etr_buf_insert_barrier_packet() power: supply: sbs-charger: Fix INHIBITED bit for Status reg power: supply: bq27xxx: Fix poll_interval handling and races on remove power: supply: bq27xxx: Fix I2C IRQ race on remove power: supply: bq27xxx: Fix bq27xxx_battery_update() race condition power: supply: leds: Fix blink to LED on transition ipv6: Fix out-of-bounds access in ipv6_find_tlv() bpf: Fix mask generation for 32-bit narrow loads of 64-bit fields selftests: fib_tests: mute cleanup error message net: fix skb leak in __skb_tstamp_tx() media: radio-shark: Add endpoint checks USB: sisusbvga: Add endpoint checks USB: core: Add routines for endpoint checks in old drivers udplite: Fix NULL pointer dereference in __sk_mem_raise_allocated(). net: fix stack overflow when LRO is disabled for virtual interfaces fbdev: udlfb: Fix endpoint check debugobjects: Don't wake up kswapd from fill_pool() x86/topology: Fix erroneous smp_num_siblings on Intel Hybrid platforms parisc: Fix flush_dcache_page() for usage from irq context selftests/memfd: Fix unknown type name build failure x86/mm: Avoid incomplete Global INVLPG flushes btrfs: use nofs when cleaning up aborted transactions gpio: mockup: Fix mode of debugfs files parisc: Allow to reboot machine after system halt parisc: Handle kgdb breakpoints only in kernel context m68k: Move signal frame following exception on 68020/030 ALSA: hda/realtek: Enable headset onLenovo M70/M90 ALSA: hda/ca0132: add quirk for EVGA X299 DARK mt76: mt7615: Fix build with older compilers spi: fsl-cpm: Use 16 bit mode for large transfers with even size spi: fsl-spi: Re-organise transfer bits_per_word adaptation watchdog: sp5100_tco: Immediately trigger upon starting. s390/qdio: fix do_sqbs() inline assembly constraint s390/qdio: get rid of register asm vc_screen: reload load of struct vc_data pointer in vcs_write() to avoid UAF vc_screen: rewrite vcs_size to accept vc, not inode usb: gadget: u_ether: Fix host MAC address case usb: gadget: u_ether: Convert prints to device prints lib/string_helpers: Introduce string_upper() and string_lower() helpers HID: wacom: add three styli to wacom_intuos_get_tool_type HID: wacom: Add new Intuos Pro Small (PTH-460) device IDs HID: wacom: Force pen out of prox if no events have been received in a while netfilter: nf_tables: hold mutex on netns pre_exit path netfilter: nf_tables: validate NFTA_SET_ELEM_OBJREF based on NFT_SET_OBJECT flag netfilter: nf_tables: stricter validation of element data netfilter: nf_tables: allow up to 64 bytes in the set element data area netfilter: nf_tables: add nft_setelem_parse_key() netfilter: nf_tables: validate registers coming from userspace. netfilter: nftables: statify nft_parse_register() netfilter: nftables: add nft_parse_register_store() and use it netfilter: nftables: add nft_parse_register_load() and use it nilfs2: fix use-after-free bug of nilfs_root in nilfs_evict_inode() powerpc/64s/radix: Fix soft dirty tracking tpm/tpm_tis: Disable interrupts for more Lenovo devices ceph: force updating the msg pointer in non-split case serial: Add support for Advantech PCI-1611U card statfs: enforce statfs[64] structure initialization KVM: x86: do not report a vCPU as preempted outside instruction boundaries can: kvaser_pciefd: Disable interrupts in probe error path can: kvaser_pciefd: Do not send EFLUSH command on TFD interrupt can: kvaser_pciefd: Clear listen-only bit if not explicitly requested can: kvaser_pciefd: Empty SRB buffer in probe can: kvaser_pciefd: Call request_irq() before enabling interrupts can: kvaser_pciefd: Set CAN_STATE_STOPPED in kvaser_pciefd_stop() can: j1939: recvmsg(): allow MSG_CMSG_COMPAT flag ALSA: hda/realtek: Add quirk for 2nd ASUS GU603 ALSA: hda/realtek: Add a quirk for HP EliteDesk 805 ALSA: hda: Add NVIDIA codec IDs a3 through a7 to patch table ALSA: hda: Fix Oops by 9.1 surround channel names usb: typec: altmodes/displayport: fix pin_assignment_show usb: dwc3: debugfs: Resume dwc3 before accessing registers USB: UHCI: adjust zhaoxin UHCI controllers OverCurrent bit value usb-storage: fix deadlock when a scsi command timeouts more than once USB: usbtmc: Fix direction for 0-length ioctl control messages vlan: fix a potential uninit-value in vlan_dev_hard_start_xmit() igb: fix bit_shift to be in [1..8] range cassini: Fix a memory leak in the error handling path of cas_init_one() wifi: iwlwifi: mvm: don't trust firmware n_channels net: bcmgenet: Restore phy_stop() depending upon suspend/close net: bcmgenet: Remove phy_stop() from bcmgenet_netif_stop() net: nsh: Use correct mac_offset to unwind gso skb in nsh_gso_segment() drm/exynos: fix g2d_open/close helper function definitions media: netup_unidvb: fix use-after-free at del_timer() net: hns3: fix reset delay time to avoid configuration timeout net: hns3: fix sending pfc frames after reset issue erspan: get the proto with the md version for collect_md ip_gre, ip6_gre: Fix race condition on o_seqno in collect_md mode ip6_gre: Make o_seqno start from 0 in native mode ip6_gre: Fix skb_under_panic in __gre6_xmit() serial: arc_uart: fix of_iomap leak in `arc_serial_probe` vsock: avoid to close connected socket after the timeout ALSA: firewire-digi00x: prevent potential use after free net: fec: Better handle pm_runtime_get() failing in .remove() af_key: Reject optional tunnel/BEET mode templates in outbound policies cpupower: Make TSC read per CPU for Mperf monitor ASoC: fsl_micfil: register platform component before registering cpu dai btrfs: fix space cache inconsistency after error loading it from disk btrfs: replace calls to btrfs_find_free_ino with btrfs_find_free_objectid mfd: dln2: Fix memory leak in dln2_probe() phy: st: miphy28lp: use _poll_timeout functions for waits Input: xpad - add constants for GIP interface numbers iommu/arm-smmu-v3: Acknowledge pri/event queue overflow if any clk: tegra20: fix gcc-7 constant overflow warning RDMA/core: Fix multiple -Warray-bounds warnings recordmcount: Fix memory leaks in the uwrite function sched: Fix KCSAN noinstr violation mcb-pci: Reallocate memory region to avoid memory overlapping serial: 8250: Reinit port->pm on port specific driver unbind usb: typec: tcpm: fix multiple times discover svids error HID: wacom: generic: Set battery quirk only when we see battery data spi: spi-imx: fix MX51_ECSPI_* macros when cs > 3 HID: logitech-hidpp: Reconcile USB and Unifying serials HID: logitech-hidpp: Don't use the USB serial for USB devices staging: rtl8192e: Replace macro RTL_PCI_DEVICE with PCI_DEVICE Bluetooth: L2CAP: fix "bad unlock balance" in l2cap_disconnect_rsp wifi: iwlwifi: dvm: Fix memcpy: detected field-spanning write backtrace wifi: iwlwifi: pcie: Fix integer overflow in iwl_write_to_user_buf wifi: iwlwifi: pcie: fix possible NULL pointer dereference samples/bpf: Fix fout leak in hbm's run_bpf_prog f2fs: fix to drop all dirty pages during umount() if cp_error is set ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa() ext4: set goal start correctly in ext4_mb_normalize_request gfs2: Fix inode height consistency check scsi: message: mptlan: Fix use after free bug in mptlan_remove() due to race condition lib: cpu_rmap: Avoid use after free on rmap->obj array entries scsi: target: iscsit: Free cmds before session free net: Catch invalid index in XPS mapping net: pasemi: Fix return type of pasemi_mac_start_tx() scsi: lpfc: Prevent lpfc_debugfs_lockstat_write() buffer overflow ext2: Check block size validity during mount wifi: brcmfmac: cfg80211: Pass the PMK in binary instead of hex ACPICA: ACPICA: check null return of ACPI_ALLOCATE_ZEROED in acpi_db_display_objects ACPICA: Avoid undefined behavior: applying zero offset to null pointer drm/tegra: Avoid potential 32-bit integer overflow ACPI: EC: Fix oops when removing custom query handlers firmware: arm_sdei: Fix sleep from invalid context BUG memstick: r592: Fix UAF bug in r592_remove due to race condition regmap: cache: Return error in cache sync operations for REGCACHE_NONE drm/amd/display: Use DC_LOG_DC in the trasform pixel function fs: hfsplus: remove WARN_ON() from hfsplus_cat_{read,write}_inode() af_unix: Fix data races around sk->sk_shutdown. af_unix: Fix a data race of sk->sk_receive_queue->qlen. net: datagram: fix data-races in datagram_poll() ipvlan:Fix out-of-bounds caused by unclear skb->cb net: add vlan_get_protocol_and_depth() helper net: tap: check vlan with eth_type_vlan() method net: annotate sk->sk_err write from do_recvmmsg() netlink: annotate accesses to nlk->cb_running netfilter: conntrack: fix possible bug_on with enable_hooks=1 net: Fix load-tearing on sk->sk_stamp in sock_recv_cmsgs(). linux/dim: Do nothing if no time delta between samples ARM: 9296/1: HP Jornada 7XX: fix kernel-doc warnings drm/mipi-dsi: Set the fwnode for mipi_dsi_device driver core: add a helper to setup both the of_node and fwnode of a device Linux 5.4.243 drm/amd/display: Fix hang when skipping modeset mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock drm/exynos: move to use request_irq by IRQF_NO_AUTOEN flag drm/msm/adreno: Fix null ptr access in adreno_gpu_cleanup() firmware: raspberrypi: fix possible memory leak in rpi_firmware_probe() drm/msm: Fix double pm_runtime_disable() call PM: domains: Restore comment indentation for generic_pm_domain.child_links printk: declare printk_deferred_{enter,safe}() in include/linux/printk.h PCI: pciehp: Fix AB-BA deadlock between reset_lock and device_lock PCI: pciehp: Use down_read/write_nested(reset_lock) to fix lockdep errors drbd: correctly submit flush bio on barrier serial: 8250: Fix serial8250_tx_empty() race with DMA Tx tty: Prevent writing chars during tcsetattr TCSADRAIN/FLUSH ext4: fix invalid free tracking in ext4_xattr_move_to_block() ext4: remove a BUG_ON in ext4_mb_release_group_pa() ext4: bail out of ext4_xattr_ibody_get() fails for any reason ext4: add bounds checking in get_max_inline_xattr_value_size() ext4: fix deadlock when converting an inline directory in nojournal mode ext4: improve error recovery code paths in __ext4_remount() ext4: fix data races when using cached status extents ext4: avoid a potential slab-out-of-bounds in ext4_group_desc_csum ext4: fix WARNING in mb_find_extent HID: wacom: insert timestamp to packed Bluetooth (BT) events HID: wacom: Set a default resolution for older tablets drm/amdgpu: disable sdma ecc irq only when sdma RAS is enabled in suspend drm/amdgpu/gfx: disable gfx9 cp_ecc_error_irq only when enabling legacy gfx ras drm/amdgpu: fix an amdgpu_irq_put() issue in gmc_v9_0_hw_fini() drm/panel: otm8009a: Set backlight parent to panel device f2fs: fix potential corruption when moving a directory ARM: dts: s5pv210: correct MIPI CSIS clock name ARM: dts: exynos: fix WM8960 clock name in Itop Elite remoteproc: st: Call of_node_put() on iteration error remoteproc: stm32: Call of_node_put() on iteration error sh: nmi_debug: fix return value of __setup handler sh: init: use OF_EARLY_FLATTREE for early init sh: math-emu: fix macro redefined warning inotify: Avoid reporting event with invalid wd platform/x86: touchscreen_dmi: Add info for the Dexp Ursus KX210i cifs: fix pcchunk length type in smb2_copychunk_range btrfs: print-tree: parent bytenr must be aligned to sector size btrfs: don't free qgroup space unless specified btrfs: fix btrfs_prev_leaf() to not return the same key twice perf symbols: Fix return incorrect build_id size in elf_read_build_id() perf map: Delete two variable initialisations before null pointer checks in sort__sym_from_cmp() perf vendor events power9: Remove UTF-8 characters from JSON files virtio_net: suppress cpu stall when free_unused_bufs virtio_net: split free_unused_bufs() net: dsa: mt7530: fix corrupt frames using trgmii on 40 MHz XTAL MT7621 ALSA: caiaq: input: Add error handling for unsupported input methods in `snd_usb_caiaq_input_init` drm/amdgpu: add a missing lock for AMDGPU_SCHED af_packet: Don't send zero-byte data in packet_sendmsg_spkt(). ionic: remove noise from ethtool rxnfc error msg rxrpc: Fix hard call timeout units net/sched: act_mirred: Add carrier check writeback: fix call of incorrect macro net: dsa: mv88e6xxx: add mv88e6321 rsvd2cpu sit: update dev->needed_headroom in ipip6_tunnel_bind_dev() net/sched: cls_api: remove block_cb from driver_list before freeing net/ncsi: clear Tx enable mode when handling a Config required AEN relayfs: fix out-of-bounds access in relay_file_read kernel/relay.c: fix read_pos error when multiple readers crypto: safexcel - Cleanup ring IRQ workqueues on load failure crypto: inside-secure - irq balance dm verity: fix error handling for check_at_most_once on FEC dm verity: skip redundant verity_handle_err() on I/O errors mailbox: zynqmp: Fix counts of child nodes mailbox: zynq: Switch to flexible array to simplify code tick/nohz: Fix cpu_is_hotpluggable() by checking with nohz subsystem nohz: Add TICK_DEP_BIT_RCU netfilter: nf_tables: deactivate anonymous set from preparation phase debugobject: Ensure pool refill (again) perf intel-pt: Fix CYC timestamps after standalone CBR perf auxtrace: Fix address filter entire kernel size dm ioctl: fix nested locking in table_clear() to remove deadlock concern dm flakey: fix a crash with invalid table line dm integrity: call kmem_cache_destroy() in dm_integrity_init() error path dm clone: call kmem_cache_destroy() in dm_clone_init() error path s390/dasd: fix hanging blockdevice after request requeue btrfs: scrub: reject unsupported scrub flags scripts/gdb: fix lx-timerlist for Python3 clk: rockchip: rk3399: allow clk_cifout to force clk_cifout_src to reparent wifi: rtl8xxxu: RTL8192EU always needs full init mailbox: zynqmp: Fix typo in IPI documentation mailbox: zynqmp: Fix IPI isr handling md/raid10: fix null-ptr-deref in raid10_sync_request nilfs2: fix infinite loop in nilfs_mdt_get_block() nilfs2: do not write dirty data after degenerating to read-only parisc: Fix argument pointer in real64_call_asm() afs: Fix updating of i_size with dv jump from server dmaengine: at_xdmac: do not enable all cyclic channels dmaengine: dw-edma: Fix to enable to issue dma request on DMA processing dmaengine: dw-edma: Fix to change for continuous transfer phy: tegra: xusb: Add missing tegra_xusb_port_unregister for usb2_port and ulpi_port pwm: mtk-disp: Disable shadow registers before setting backlight values pwm: mtk-disp: Adjust the clocks to avoid them mismatch pwm: mtk-disp: Don't check the return code of pwmchip_remove() dmaengine: mv_xor_v2: Fix an error code. leds: TI_LMU_COMMON: select REGMAP instead of depending on it ext4: fix use-after-free read in ext4_find_extent for bigalloc + inline openrisc: Properly store r31 to pt_regs on unhandled exceptions clocksource/drivers/davinci: Fix memory leak in davinci_timer_register when init fails clocksource: davinci: axe a pointless __GFP_NOFAIL clocksource/drivers/davinci: Avoid trailing '\n' hidden in pr_fmt() RDMA/mlx5: Use correct device num_ports when modify DC SUNRPC: remove the maximum number of retries in call_bind_status Input: raspberrypi-ts - fix refcount leak in rpi_ts_probe input: raspberrypi-ts: Release firmware handle when not needed firmware: raspberrypi: Introduce devm_rpi_firmware_get() firmware: raspberrypi: Keep count of all consumers NFSv4.1: Always send a RECLAIM_COMPLETE after establishing lease IB/hfi1: Fix SDMA mmu_rb_node not being evicted in LRU order RDMA/siw: Remove namespace check from siw_netdev_event() clk: add missing of_node_put() in "assigned-clocks" property parsing power: supply: generic-adc-battery: fix unit scaling rtc: meson-vrtc: Use ktime_get_real_ts64() to get the current time RDMA/mlx4: Prevent shift wrapping in set_user_sq_size() rtc: omap: include header for omap_rtc_power_off_program prototype RDMA/rdmavt: Delete unnecessary NULL check RDMA/siw: Fix potential page_array out of range access perf/core: Fix hardlockup failure caused by perf throttle powerpc/rtas: use memmove for potentially overlapping buffer copy macintosh: via-pmu-led: requires ATA to be set powerpc/sysdev/tsi108: fix resource printk format warnings powerpc/wii: fix resource printk format warnings powerpc/mpc512x: fix resource printk format warning macintosh/windfarm_smu_sat: Add missing of_node_put() spmi: Add a check for remove callback when removing a SPMI driver staging: rtl8192e: Fix W_DISABLE# does not work after stop/start serial: 8250: Add missing wakeup event reporting tty: serial: fsl_lpuart: adjust buffer length to the intended size firmware: stratix10-svc: Fix an NULL vs IS_ERR() bug in probe usb: mtu3: fix kernel panic at qmu transfer done irq handler usb: chipidea: fix missing goto in `ci_hdrc_probe` sh: sq: Fix incorrect element size for allocating bitmap buffer uapi/linux/const.h: prefer ISO-friendly __typeof__ spi: cadence-quadspi: fix suspend-resume implementations mtd: spi-nor: cadence-quadspi: Handle probe deferral while requesting DMA channel mtd: spi-nor: cadence-quadspi: Don't initialize rx_dma_complete on failure mtd: spi-nor: cadence-quadspi: Provide a way to disable DAC mode mtd: spi-nor: cadence-quadspi: Make driver independent of flash geometry scripts/gdb: bail early if there are no generic PD PM: domains: Fix up terminology with parent/child scripts/gdb: bail early if there are no clocks ia64: salinfo: placate defined-but-not-used warning ia64: mm/contig: fix section mismatch warning/error of: Fix modalias string generation vmci_host: fix a race condition in vmci_host_poll() causing GPF spi: fsl-spi: Fix CPM/QE mode Litte Endian spi: qup: Don't skip cleanup in remove's error path linux/vt_buffer.h: allow either builtin or modular for macros ASoC: es8316: Handle optional IRQ assignment ASoC: es8316: Use IRQF_NO_AUTOEN when requesting the IRQ genirq: Add IRQF_NO_AUTOEN for request_irq/nmi() PCI: imx6: Install the fault handler only on compatible match usb: gadget: udc: renesas_usb3: Fix use after free bug in renesas_usb3_remove due to race condition iio: light: max44009: add missing OF device matching fpga: bridge: fix kernel-doc parameter description usb: host: xhci-rcar: remove leftover quirk handling pstore: Revert pmsg_lock back to a normal mutex tcp/udp: Fix memleaks of sk and zerocopy skbs with TX timestamp. net: amd: Fix link leak when verifying config failed netlink: Use copy_to_user() for optval in netlink_getsockopt(). Revert "Bluetooth: btsdio: fix use after free bug in btsdio_remove due to unfinished work" ipv4: Fix potential uninit variable access bug in __ip_make_skb() netfilter: nf_tables: don't write table validation state without mutex bpf: Don't EFAULT for getsockopt with optval=NULL ixgbe: Enable setting RSS table to default values ixgbe: Allow flow hash to be set via ethtool wifi: iwlwifi: mvm: check firmware response size wifi: iwlwifi: make the loop for card preparation effective md/raid10: fix memleak of md thread md: update the optimal I/O size on reshape md/raid10: fix memleak for 'conf->bio_split' md/raid10: fix leak of 'r10bio->remaining' for recovery bpf, sockmap: Revert buggy deadlock fix in the sockhash and sockmap nvme-fcloop: fix "inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage" nvme: fix async event trace event nvme: handle the persistent internal error AER bpf, sockmap: fix deadlocks in the sockhash and sockmap scsi: lpfc: Fix ioremap issues in lpfc_sli4_pci_mem_setup() crypto: drbg - Only fail when jent is unavailable in FIPS mode crypto: drbg - make drbg_prepare_hrng() handle jent instantiation errors bpftool: Fix bug for long instructions in program CFG dumps wifi: rtlwifi: fix incorrect error codes in rtl_debugfs_set_write_reg() wifi: rtlwifi: fix incorrect error codes in rtl_debugfs_set_write_rfreg() rtlwifi: Replace RT_TRACE with rtl_dbg rtlwifi: Start changing RT_TRACE into rtl_dbg f2fs: handle dqget error in f2fs_transfer_project_quota() scsi: megaraid: Fix mega_cmd_done() CMDID_INT_CMDS scsi: target: iscsit: Fix TAS handling during conn cleanup net/packet: convert po->auxdata to an atomic flag net/packet: convert po->origdev to an atomic flag net/packet: annotate accesses to po->xmit vlan: partially enable SIOCSHWTSTAMP in container scm: fix MSG_CTRUNC setting condition for SO_PASSSEC wifi: rtw88: mac: Return the original error from rtw_mac_power_switch() wifi: rtw88: mac: Return the original error from rtw_pwr_seq_parser() tools: bpftool: Remove invalid \' json escape wifi: ath6kl: reduce WARN to dev_dbg() in callback wifi: ath5k: fix an off by one check in ath5k_eeprom_read_freq_list() wifi: ath9k: hif_usb: fix memory leak of remain_skbs wifi: ath6kl: minor fix for allocation size tick/common: Align tick period with the HZ tick. tick: Get rid of tick_period tick/sched: Optimize tick_do_update_jiffies64() further tick/sched: Reduce seqcount held scope in tick_do_update_jiffies64() tick/sched: Use tick_next_period for lockless quick check timekeeping: Split jiffies seqlock debugobject: Prevent init race with static objects arm64: kgdb: Set PSTATE.SS to 1 to re-enable single-step x86/ioapic: Don't return 0 from arch_dynirq_lower_bound() regulator: stm32-pwr: fix of_iomap leak media: rc: gpio-ir-recv: Fix support for wake-up media: rcar_fdp1: Fix refcount leak in probe and remove function media: rcar_fdp1: Fix the correct variable assignments media: rcar_fdp1: Make use of the helper function devm_platform_ioremap_resource() media: rcar_fdp1: fix pm_runtime_get_sync() usage count media: rcar_fdp1: simplify error check logic at fdp_open() media: saa7134: fix use after free bug in saa7134_finidev due to race condition media: dm1105: Fix use after free bug in dm1105_remove due to race condition x86/apic: Fix atomic update of offset in reserve_eilvt_offset() regulator: core: Avoid lockdep reports when resolving supplies regulator: core: Consistently set mutex_owner when using ww_mutex_lock_slow() drm/lima/lima_drv: Add missing unwind goto in lima_pdev_probe() mmc: sdhci-of-esdhc: fix quirk to ignore command inhibit for data drm/msm/adreno: drop bogus pm_runtime_set_active() drm/msm/adreno: Defer enabling runpm until hw_init() drm/msm: fix unbalanced pm_runtime_enable in adreno_gpu_{init, cleanup} firmware: qcom_scm: Clear download bit during reboot media: av7110: prevent underflow in write_ts_to_decoder() media: uapi: add MEDIA_BUS_FMT_METADATA_FIXED media bus format. media: bdisp: Add missing check for create_workqueue ARM: dts: qcom: ipq8064: Fix the PCI I/O port range ARM: dts: qcom: ipq8064: reduce pci IO size to 64K ARM: dts: qcom: ipq4019: Fix the PCI I/O port range EDAC/skx: Fix overflows on the DRAM row address mapping arrays arm64: dts: renesas: r8a774c0: Remove bogus voltages from OPP table arm64: dts: renesas: r8a77990: Remove bogus voltages from OPP table drm/probe-helper: Cancel previous job before starting new one drm/vgem: add missing mutex_destroy drm/rockchip: Drop unbalanced obj unref erofs: fix potential overflow calculating xattr_isize erofs: stop parsing non-compact HEAD index if clusterofs is invalid tpm, tpm_tis: Do not skip reset of original interrupt vector selinux: ensure av_permissions.h is built when needed selinux: fix Makefile dependencies of flask.h ubifs: Free memory for tmpfile name ubi: Fix return value overwrite issue in try_write_vid_and_data() ubifs: Fix memleak when insert_old_idx() failed Revert "ubifs: dirty_cow_znode: Fix memleak in error handling path" i2c: omap: Fix standard mode false ACK readings KVM: nVMX: Emulate NOPs in L2, and PAUSE if it's not intercepted reiserfs: Add security prefix to xattr name in reiserfs_security_write() ring-buffer: Sync IRQ works before buffer destruction pwm: meson: Fix g12a ao clk81 name pwm: meson: Fix axg ao mux parents kheaders: Use array declaration instead of char ipmi: fix SSIF not responding under certain cond. ipmi:ssif: Add send_retries increment MIPS: fw: Allow firmware to pass a empty env xhci: fix debugfs register accesses while suspended debugfs: regset32: Add Runtime PM support staging: iio: resolver: ads1210: fix config mode perf sched: Cast PTHREAD_STACK_MIN to int as it may turn into sysconf(__SC_THREAD_STACK_MIN_VALUE) USB: dwc3: fix runtime pm imbalance on unbind USB: dwc3: fix runtime pm imbalance on probe errors asm-generic/io.h: suppress endianness warnings for readq() and writeq() ASoC: Intel: bytcr_rt5640: Add quirk for the Acer Iconia One 7 B1-750 iio: adc: palmas_gpadc: fix NULL dereference on rmmod USB: serial: option: add UNISOC vendor and TOZED LT70C product bluetooth: Perform careful capability checks in hci_sock_ioctl() drm/fb-helper: set x/yres_virtual in drm_fb_helper_check_var wifi: brcmfmac: slab-out-of-bounds read in brcmf_get_assoc_ies() counter: 104-quad-8: Fix race condition between FLAG and CNTR reads Conflicts: drivers/firmware/qcom_scm.c drivers/md/dm-verity-target.c drivers/usb/dwc3/core.c drivers/usb/dwc3/debugfs.c drivers/usb/gadget/function/f_fs.c Change-Id: Iedad1fcca99a9b739e08ea6d60988800b3a7aefa
1783 lines
44 KiB
C
1783 lines
44 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* linux/kernel/exit.c
|
|
*
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched/autogroup.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/sched/stat.h>
|
|
#include <linux/sched/task.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/sched/cputime.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/module.h>
|
|
#include <linux/capability.h>
|
|
#include <linux/completion.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/tty.h>
|
|
#include <linux/iocontext.h>
|
|
#include <linux/key.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/acct.h>
|
|
#include <linux/tsacct_kern.h>
|
|
#include <linux/file.h>
|
|
#include <linux/fdtable.h>
|
|
#include <linux/freezer.h>
|
|
#include <linux/binfmts.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/pid_namespace.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/profile.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/taskstats_kern.h>
|
|
#include <linux/delayacct.h>
|
|
#include <linux/cgroup.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/signal.h>
|
|
#include <linux/posix-timers.h>
|
|
#include <linux/cn_proc.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/futex.h>
|
|
#include <linux/pipe_fs_i.h>
|
|
#include <linux/audit.h> /* for audit_free() */
|
|
#include <linux/resource.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/task_io_accounting_ops.h>
|
|
#include <linux/tracehook.h>
|
|
#include <linux/fs_struct.h>
|
|
#include <linux/init_task.h>
|
|
#include <linux/perf_event.h>
|
|
#include <trace/events/sched.h>
|
|
#include <linux/hw_breakpoint.h>
|
|
#include <linux/oom.h>
|
|
#include <linux/writeback.h>
|
|
#include <linux/shm.h>
|
|
#include <linux/kcov.h>
|
|
#include <linux/random.h>
|
|
#include <linux/rcuwait.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/sysfs.h>
|
|
|
|
#include <linux/uaccess.h>
|
|
#include <asm/unistd.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/mmu_context.h>
|
|
|
|
/*
|
|
* The default value should be high enough to not crash a system that randomly
|
|
* crashes its kernel from time to time, but low enough to at least not permit
|
|
* overflowing 32-bit refcounts or the ldsem writer count.
|
|
*/
|
|
static unsigned int oops_limit = 10000;
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
static struct ctl_table kern_exit_table[] = {
|
|
{
|
|
.procname = "oops_limit",
|
|
.data = &oops_limit,
|
|
.maxlen = sizeof(oops_limit),
|
|
.mode = 0644,
|
|
.proc_handler = proc_douintvec,
|
|
},
|
|
{ }
|
|
};
|
|
|
|
static __init int kernel_exit_sysctls_init(void)
|
|
{
|
|
register_sysctl_init("kernel", kern_exit_table);
|
|
return 0;
|
|
}
|
|
late_initcall(kernel_exit_sysctls_init);
|
|
#endif
|
|
|
|
static atomic_t oops_count = ATOMIC_INIT(0);
|
|
|
|
#ifdef CONFIG_SYSFS
|
|
static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr,
|
|
char *page)
|
|
{
|
|
return sysfs_emit(page, "%d\n", atomic_read(&oops_count));
|
|
}
|
|
|
|
static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count);
|
|
|
|
static __init int kernel_exit_sysfs_init(void)
|
|
{
|
|
sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL);
|
|
return 0;
|
|
}
|
|
late_initcall(kernel_exit_sysfs_init);
|
|
#endif
|
|
|
|
static void __unhash_process(struct task_struct *p, bool group_dead)
|
|
{
|
|
nr_threads--;
|
|
detach_pid(p, PIDTYPE_PID);
|
|
if (group_dead) {
|
|
detach_pid(p, PIDTYPE_TGID);
|
|
detach_pid(p, PIDTYPE_PGID);
|
|
detach_pid(p, PIDTYPE_SID);
|
|
|
|
list_del_rcu(&p->tasks);
|
|
list_del_init(&p->sibling);
|
|
__this_cpu_dec(process_counts);
|
|
}
|
|
list_del_rcu(&p->thread_group);
|
|
list_del_rcu(&p->thread_node);
|
|
}
|
|
|
|
/*
|
|
* This function expects the tasklist_lock write-locked.
|
|
*/
|
|
static void __exit_signal(struct task_struct *tsk)
|
|
{
|
|
struct signal_struct *sig = tsk->signal;
|
|
bool group_dead = thread_group_leader(tsk);
|
|
struct sighand_struct *sighand;
|
|
struct tty_struct *tty;
|
|
u64 utime, stime;
|
|
|
|
sighand = rcu_dereference_check(tsk->sighand,
|
|
lockdep_tasklist_lock_is_held());
|
|
spin_lock(&sighand->siglock);
|
|
|
|
#ifdef CONFIG_POSIX_TIMERS
|
|
posix_cpu_timers_exit(tsk);
|
|
if (group_dead) {
|
|
posix_cpu_timers_exit_group(tsk);
|
|
} else {
|
|
/*
|
|
* This can only happen if the caller is de_thread().
|
|
* FIXME: this is the temporary hack, we should teach
|
|
* posix-cpu-timers to handle this case correctly.
|
|
*/
|
|
if (unlikely(has_group_leader_pid(tsk)))
|
|
posix_cpu_timers_exit_group(tsk);
|
|
}
|
|
#endif
|
|
|
|
if (group_dead) {
|
|
tty = sig->tty;
|
|
sig->tty = NULL;
|
|
} else {
|
|
/*
|
|
* If there is any task waiting for the group exit
|
|
* then notify it:
|
|
*/
|
|
if (sig->notify_count > 0 && !--sig->notify_count)
|
|
wake_up_process(sig->group_exit_task);
|
|
|
|
if (tsk == sig->curr_target)
|
|
sig->curr_target = next_thread(tsk);
|
|
}
|
|
|
|
add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
|
|
sizeof(unsigned long long));
|
|
|
|
/*
|
|
* Accumulate here the counters for all threads as they die. We could
|
|
* skip the group leader because it is the last user of signal_struct,
|
|
* but we want to avoid the race with thread_group_cputime() which can
|
|
* see the empty ->thread_head list.
|
|
*/
|
|
task_cputime(tsk, &utime, &stime);
|
|
write_seqlock(&sig->stats_lock);
|
|
sig->utime += utime;
|
|
sig->stime += stime;
|
|
sig->gtime += task_gtime(tsk);
|
|
sig->min_flt += tsk->min_flt;
|
|
sig->maj_flt += tsk->maj_flt;
|
|
sig->nvcsw += tsk->nvcsw;
|
|
sig->nivcsw += tsk->nivcsw;
|
|
sig->inblock += task_io_get_inblock(tsk);
|
|
sig->oublock += task_io_get_oublock(tsk);
|
|
task_io_accounting_add(&sig->ioac, &tsk->ioac);
|
|
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
|
|
sig->nr_threads--;
|
|
__unhash_process(tsk, group_dead);
|
|
write_sequnlock(&sig->stats_lock);
|
|
|
|
/*
|
|
* Do this under ->siglock, we can race with another thread
|
|
* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
|
|
*/
|
|
flush_sigqueue(&tsk->pending);
|
|
tsk->sighand = NULL;
|
|
spin_unlock(&sighand->siglock);
|
|
|
|
__cleanup_sighand(sighand);
|
|
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
|
|
if (group_dead) {
|
|
flush_sigqueue(&sig->shared_pending);
|
|
tty_kref_put(tty);
|
|
}
|
|
}
|
|
|
|
static void delayed_put_task_struct(struct rcu_head *rhp)
|
|
{
|
|
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
|
|
|
|
perf_event_delayed_put(tsk);
|
|
trace_sched_process_free(tsk);
|
|
put_task_struct(tsk);
|
|
}
|
|
|
|
void put_task_struct_rcu_user(struct task_struct *task)
|
|
{
|
|
if (refcount_dec_and_test(&task->rcu_users))
|
|
call_rcu(&task->rcu, delayed_put_task_struct);
|
|
}
|
|
|
|
void release_task(struct task_struct *p)
|
|
{
|
|
struct task_struct *leader;
|
|
int zap_leader;
|
|
repeat:
|
|
/* don't need to get the RCU readlock here - the process is dead and
|
|
* can't be modifying its own credentials. But shut RCU-lockdep up */
|
|
rcu_read_lock();
|
|
atomic_dec(&__task_cred(p)->user->processes);
|
|
rcu_read_unlock();
|
|
|
|
proc_flush_task(p);
|
|
cgroup_release(p);
|
|
|
|
write_lock_irq(&tasklist_lock);
|
|
ptrace_release_task(p);
|
|
__exit_signal(p);
|
|
|
|
/*
|
|
* If we are the last non-leader member of the thread
|
|
* group, and the leader is zombie, then notify the
|
|
* group leader's parent process. (if it wants notification.)
|
|
*/
|
|
zap_leader = 0;
|
|
leader = p->group_leader;
|
|
if (leader != p && thread_group_empty(leader)
|
|
&& leader->exit_state == EXIT_ZOMBIE) {
|
|
/*
|
|
* If we were the last child thread and the leader has
|
|
* exited already, and the leader's parent ignores SIGCHLD,
|
|
* then we are the one who should release the leader.
|
|
*/
|
|
zap_leader = do_notify_parent(leader, leader->exit_signal);
|
|
if (zap_leader)
|
|
leader->exit_state = EXIT_DEAD;
|
|
}
|
|
|
|
write_unlock_irq(&tasklist_lock);
|
|
release_thread(p);
|
|
put_task_struct_rcu_user(p);
|
|
|
|
p = leader;
|
|
if (unlikely(zap_leader))
|
|
goto repeat;
|
|
}
|
|
|
|
void rcuwait_wake_up(struct rcuwait *w)
|
|
{
|
|
struct task_struct *task;
|
|
|
|
rcu_read_lock();
|
|
|
|
/*
|
|
* Order condition vs @task, such that everything prior to the load
|
|
* of @task is visible. This is the condition as to why the user called
|
|
* rcuwait_trywake() in the first place. Pairs with set_current_state()
|
|
* barrier (A) in rcuwait_wait_event().
|
|
*
|
|
* WAIT WAKE
|
|
* [S] tsk = current [S] cond = true
|
|
* MB (A) MB (B)
|
|
* [L] cond [L] tsk
|
|
*/
|
|
smp_mb(); /* (B) */
|
|
|
|
task = rcu_dereference(w->task);
|
|
if (task)
|
|
wake_up_process(task);
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
/*
|
|
* Determine if a process group is "orphaned", according to the POSIX
|
|
* definition in 2.2.2.52. Orphaned process groups are not to be affected
|
|
* by terminal-generated stop signals. Newly orphaned process groups are
|
|
* to receive a SIGHUP and a SIGCONT.
|
|
*
|
|
* "I ask you, have you ever known what it is to be an orphan?"
|
|
*/
|
|
static int will_become_orphaned_pgrp(struct pid *pgrp,
|
|
struct task_struct *ignored_task)
|
|
{
|
|
struct task_struct *p;
|
|
|
|
do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
|
|
if ((p == ignored_task) ||
|
|
(p->exit_state && thread_group_empty(p)) ||
|
|
is_global_init(p->real_parent))
|
|
continue;
|
|
|
|
if (task_pgrp(p->real_parent) != pgrp &&
|
|
task_session(p->real_parent) == task_session(p))
|
|
return 0;
|
|
} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
|
|
|
|
return 1;
|
|
}
|
|
|
|
int is_current_pgrp_orphaned(void)
|
|
{
|
|
int retval;
|
|
|
|
read_lock(&tasklist_lock);
|
|
retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
|
|
read_unlock(&tasklist_lock);
|
|
|
|
return retval;
|
|
}
|
|
|
|
static bool has_stopped_jobs(struct pid *pgrp)
|
|
{
|
|
struct task_struct *p;
|
|
|
|
do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
|
|
if (p->signal->flags & SIGNAL_STOP_STOPPED)
|
|
return true;
|
|
} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Check to see if any process groups have become orphaned as
|
|
* a result of our exiting, and if they have any stopped jobs,
|
|
* send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
|
|
*/
|
|
static void
|
|
kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
|
|
{
|
|
struct pid *pgrp = task_pgrp(tsk);
|
|
struct task_struct *ignored_task = tsk;
|
|
|
|
if (!parent)
|
|
/* exit: our father is in a different pgrp than
|
|
* we are and we were the only connection outside.
|
|
*/
|
|
parent = tsk->real_parent;
|
|
else
|
|
/* reparent: our child is in a different pgrp than
|
|
* we are, and it was the only connection outside.
|
|
*/
|
|
ignored_task = NULL;
|
|
|
|
if (task_pgrp(parent) != pgrp &&
|
|
task_session(parent) == task_session(tsk) &&
|
|
will_become_orphaned_pgrp(pgrp, ignored_task) &&
|
|
has_stopped_jobs(pgrp)) {
|
|
__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
|
|
__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_MEMCG
|
|
/*
|
|
* A task is exiting. If it owned this mm, find a new owner for the mm.
|
|
*/
|
|
void mm_update_next_owner(struct mm_struct *mm)
|
|
{
|
|
struct task_struct *c, *g, *p = current;
|
|
|
|
retry:
|
|
/*
|
|
* If the exiting or execing task is not the owner, it's
|
|
* someone else's problem.
|
|
*/
|
|
if (mm->owner != p)
|
|
return;
|
|
/*
|
|
* The current owner is exiting/execing and there are no other
|
|
* candidates. Do not leave the mm pointing to a possibly
|
|
* freed task structure.
|
|
*/
|
|
if (atomic_read(&mm->mm_users) <= 1) {
|
|
WRITE_ONCE(mm->owner, NULL);
|
|
return;
|
|
}
|
|
|
|
read_lock(&tasklist_lock);
|
|
/*
|
|
* Search in the children
|
|
*/
|
|
list_for_each_entry(c, &p->children, sibling) {
|
|
if (c->mm == mm)
|
|
goto assign_new_owner;
|
|
}
|
|
|
|
/*
|
|
* Search in the siblings
|
|
*/
|
|
list_for_each_entry(c, &p->real_parent->children, sibling) {
|
|
if (c->mm == mm)
|
|
goto assign_new_owner;
|
|
}
|
|
|
|
/*
|
|
* Search through everything else, we should not get here often.
|
|
*/
|
|
for_each_process(g) {
|
|
if (g->flags & PF_KTHREAD)
|
|
continue;
|
|
for_each_thread(g, c) {
|
|
if (c->mm == mm)
|
|
goto assign_new_owner;
|
|
if (c->mm)
|
|
break;
|
|
}
|
|
}
|
|
read_unlock(&tasklist_lock);
|
|
/*
|
|
* We found no owner yet mm_users > 1: this implies that we are
|
|
* most likely racing with swapoff (try_to_unuse()) or /proc or
|
|
* ptrace or page migration (get_task_mm()). Mark owner as NULL.
|
|
*/
|
|
WRITE_ONCE(mm->owner, NULL);
|
|
return;
|
|
|
|
assign_new_owner:
|
|
BUG_ON(c == p);
|
|
get_task_struct(c);
|
|
/*
|
|
* The task_lock protects c->mm from changing.
|
|
* We always want mm->owner->mm == mm
|
|
*/
|
|
task_lock(c);
|
|
/*
|
|
* Delay read_unlock() till we have the task_lock()
|
|
* to ensure that c does not slip away underneath us
|
|
*/
|
|
read_unlock(&tasklist_lock);
|
|
if (c->mm != mm) {
|
|
task_unlock(c);
|
|
put_task_struct(c);
|
|
goto retry;
|
|
}
|
|
WRITE_ONCE(mm->owner, c);
|
|
task_unlock(c);
|
|
put_task_struct(c);
|
|
}
|
|
#endif /* CONFIG_MEMCG */
|
|
|
|
/*
|
|
* Turn us into a lazy TLB process if we
|
|
* aren't already..
|
|
*/
|
|
static void exit_mm(void)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
struct core_state *core_state;
|
|
|
|
exit_mm_release(current, mm);
|
|
if (!mm)
|
|
return;
|
|
sync_mm_rss(mm);
|
|
/*
|
|
* Serialize with any possible pending coredump.
|
|
* We must hold mmap_sem around checking core_state
|
|
* and clearing tsk->mm. The core-inducing thread
|
|
* will increment ->nr_threads for each thread in the
|
|
* group with ->mm != NULL.
|
|
*/
|
|
down_read(&mm->mmap_sem);
|
|
core_state = mm->core_state;
|
|
if (core_state) {
|
|
struct core_thread self;
|
|
|
|
up_read(&mm->mmap_sem);
|
|
|
|
self.task = current;
|
|
if (self.task->flags & PF_SIGNALED)
|
|
self.next = xchg(&core_state->dumper.next, &self);
|
|
else
|
|
self.task = NULL;
|
|
/*
|
|
* Implies mb(), the result of xchg() must be visible
|
|
* to core_state->dumper.
|
|
*/
|
|
if (atomic_dec_and_test(&core_state->nr_threads))
|
|
complete(&core_state->startup);
|
|
|
|
for (;;) {
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
|
if (!self.task) /* see coredump_finish() */
|
|
break;
|
|
freezable_schedule();
|
|
}
|
|
__set_current_state(TASK_RUNNING);
|
|
down_read(&mm->mmap_sem);
|
|
}
|
|
mmgrab(mm);
|
|
BUG_ON(mm != current->active_mm);
|
|
/* more a memory barrier than a real lock */
|
|
task_lock(current);
|
|
current->mm = NULL;
|
|
up_read(&mm->mmap_sem);
|
|
enter_lazy_tlb(mm, current);
|
|
task_unlock(current);
|
|
mm_update_next_owner(mm);
|
|
mmput(mm);
|
|
if (test_thread_flag(TIF_MEMDIE))
|
|
exit_oom_victim();
|
|
}
|
|
|
|
static struct task_struct *find_alive_thread(struct task_struct *p)
|
|
{
|
|
struct task_struct *t;
|
|
|
|
for_each_thread(p, t) {
|
|
if (!(t->flags & PF_EXITING))
|
|
return t;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static struct task_struct *find_child_reaper(struct task_struct *father,
|
|
struct list_head *dead)
|
|
__releases(&tasklist_lock)
|
|
__acquires(&tasklist_lock)
|
|
{
|
|
struct pid_namespace *pid_ns = task_active_pid_ns(father);
|
|
struct task_struct *reaper = pid_ns->child_reaper;
|
|
struct task_struct *p, *n;
|
|
|
|
if (likely(reaper != father))
|
|
return reaper;
|
|
|
|
reaper = find_alive_thread(father);
|
|
if (reaper) {
|
|
pid_ns->child_reaper = reaper;
|
|
return reaper;
|
|
}
|
|
|
|
write_unlock_irq(&tasklist_lock);
|
|
|
|
list_for_each_entry_safe(p, n, dead, ptrace_entry) {
|
|
list_del_init(&p->ptrace_entry);
|
|
release_task(p);
|
|
}
|
|
|
|
zap_pid_ns_processes(pid_ns);
|
|
write_lock_irq(&tasklist_lock);
|
|
|
|
return father;
|
|
}
|
|
|
|
/*
|
|
* When we die, we re-parent all our children, and try to:
|
|
* 1. give them to another thread in our thread group, if such a member exists
|
|
* 2. give it to the first ancestor process which prctl'd itself as a
|
|
* child_subreaper for its children (like a service manager)
|
|
* 3. give it to the init process (PID 1) in our pid namespace
|
|
*/
|
|
static struct task_struct *find_new_reaper(struct task_struct *father,
|
|
struct task_struct *child_reaper)
|
|
{
|
|
struct task_struct *thread, *reaper;
|
|
|
|
thread = find_alive_thread(father);
|
|
if (thread)
|
|
return thread;
|
|
|
|
if (father->signal->has_child_subreaper) {
|
|
unsigned int ns_level = task_pid(father)->level;
|
|
/*
|
|
* Find the first ->is_child_subreaper ancestor in our pid_ns.
|
|
* We can't check reaper != child_reaper to ensure we do not
|
|
* cross the namespaces, the exiting parent could be injected
|
|
* by setns() + fork().
|
|
* We check pid->level, this is slightly more efficient than
|
|
* task_active_pid_ns(reaper) != task_active_pid_ns(father).
|
|
*/
|
|
for (reaper = father->real_parent;
|
|
task_pid(reaper)->level == ns_level;
|
|
reaper = reaper->real_parent) {
|
|
if (reaper == &init_task)
|
|
break;
|
|
if (!reaper->signal->is_child_subreaper)
|
|
continue;
|
|
thread = find_alive_thread(reaper);
|
|
if (thread)
|
|
return thread;
|
|
}
|
|
}
|
|
|
|
return child_reaper;
|
|
}
|
|
|
|
/*
|
|
* Any that need to be release_task'd are put on the @dead list.
|
|
*/
|
|
static void reparent_leader(struct task_struct *father, struct task_struct *p,
|
|
struct list_head *dead)
|
|
{
|
|
if (unlikely(p->exit_state == EXIT_DEAD))
|
|
return;
|
|
|
|
/* We don't want people slaying init. */
|
|
p->exit_signal = SIGCHLD;
|
|
|
|
/* If it has exited notify the new parent about this child's death. */
|
|
if (!p->ptrace &&
|
|
p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
|
|
if (do_notify_parent(p, p->exit_signal)) {
|
|
p->exit_state = EXIT_DEAD;
|
|
list_add(&p->ptrace_entry, dead);
|
|
}
|
|
}
|
|
|
|
kill_orphaned_pgrp(p, father);
|
|
}
|
|
|
|
/*
|
|
* This does two things:
|
|
*
|
|
* A. Make init inherit all the child processes
|
|
* B. Check to see if any process groups have become orphaned
|
|
* as a result of our exiting, and if they have any stopped
|
|
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
|
|
*/
|
|
static void forget_original_parent(struct task_struct *father,
|
|
struct list_head *dead)
|
|
{
|
|
struct task_struct *p, *t, *reaper;
|
|
|
|
if (unlikely(!list_empty(&father->ptraced)))
|
|
exit_ptrace(father, dead);
|
|
|
|
/* Can drop and reacquire tasklist_lock */
|
|
reaper = find_child_reaper(father, dead);
|
|
if (list_empty(&father->children))
|
|
return;
|
|
|
|
reaper = find_new_reaper(father, reaper);
|
|
list_for_each_entry(p, &father->children, sibling) {
|
|
for_each_thread(p, t) {
|
|
t->real_parent = reaper;
|
|
BUG_ON((!t->ptrace) != (t->parent == father));
|
|
if (likely(!t->ptrace))
|
|
t->parent = t->real_parent;
|
|
if (t->pdeath_signal)
|
|
group_send_sig_info(t->pdeath_signal,
|
|
SEND_SIG_NOINFO, t,
|
|
PIDTYPE_TGID);
|
|
}
|
|
/*
|
|
* If this is a threaded reparent there is no need to
|
|
* notify anyone anything has happened.
|
|
*/
|
|
if (!same_thread_group(reaper, father))
|
|
reparent_leader(father, p, dead);
|
|
}
|
|
list_splice_tail_init(&father->children, &reaper->children);
|
|
}
|
|
|
|
/*
|
|
* Send signals to all our closest relatives so that they know
|
|
* to properly mourn us..
|
|
*/
|
|
static void exit_notify(struct task_struct *tsk, int group_dead)
|
|
{
|
|
bool autoreap;
|
|
struct task_struct *p, *n;
|
|
LIST_HEAD(dead);
|
|
|
|
write_lock_irq(&tasklist_lock);
|
|
forget_original_parent(tsk, &dead);
|
|
|
|
if (group_dead)
|
|
kill_orphaned_pgrp(tsk->group_leader, NULL);
|
|
|
|
tsk->exit_state = EXIT_ZOMBIE;
|
|
if (unlikely(tsk->ptrace)) {
|
|
int sig = thread_group_leader(tsk) &&
|
|
thread_group_empty(tsk) &&
|
|
!ptrace_reparented(tsk) ?
|
|
tsk->exit_signal : SIGCHLD;
|
|
autoreap = do_notify_parent(tsk, sig);
|
|
} else if (thread_group_leader(tsk)) {
|
|
autoreap = thread_group_empty(tsk) &&
|
|
do_notify_parent(tsk, tsk->exit_signal);
|
|
} else {
|
|
autoreap = true;
|
|
}
|
|
|
|
if (autoreap) {
|
|
tsk->exit_state = EXIT_DEAD;
|
|
list_add(&tsk->ptrace_entry, &dead);
|
|
}
|
|
|
|
/* mt-exec, de_thread() is waiting for group leader */
|
|
if (unlikely(tsk->signal->notify_count < 0))
|
|
wake_up_process(tsk->signal->group_exit_task);
|
|
write_unlock_irq(&tasklist_lock);
|
|
|
|
list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
|
|
list_del_init(&p->ptrace_entry);
|
|
release_task(p);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_DEBUG_STACK_USAGE
|
|
static void check_stack_usage(void)
|
|
{
|
|
static DEFINE_SPINLOCK(low_water_lock);
|
|
static int lowest_to_date = THREAD_SIZE;
|
|
unsigned long free;
|
|
|
|
free = stack_not_used(current);
|
|
|
|
if (free >= lowest_to_date)
|
|
return;
|
|
|
|
spin_lock(&low_water_lock);
|
|
if (free < lowest_to_date) {
|
|
pr_info("%s (%d) used greatest stack depth: %lu bytes left\n",
|
|
current->comm, task_pid_nr(current), free);
|
|
lowest_to_date = free;
|
|
}
|
|
spin_unlock(&low_water_lock);
|
|
}
|
|
#else
|
|
static inline void check_stack_usage(void) {}
|
|
#endif
|
|
|
|
void __noreturn do_exit(long code)
|
|
{
|
|
struct task_struct *tsk = current;
|
|
int group_dead;
|
|
|
|
/*
|
|
* We can get here from a kernel oops, sometimes with preemption off.
|
|
* Start by checking for critical errors.
|
|
* Then fix up important state like USER_DS and preemption.
|
|
* Then do everything else.
|
|
*/
|
|
|
|
WARN_ON(blk_needs_flush_plug(tsk));
|
|
|
|
if (unlikely(in_interrupt()))
|
|
panic("Aiee, killing interrupt handler!");
|
|
if (unlikely(!tsk->pid))
|
|
panic("Attempted to kill the idle task!");
|
|
|
|
/*
|
|
* If do_exit is called because this processes oopsed, it's possible
|
|
* that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
|
|
* continuing. Amongst other possible reasons, this is to prevent
|
|
* mm_release()->clear_child_tid() from writing to a user-controlled
|
|
* kernel address.
|
|
*/
|
|
set_fs(USER_DS);
|
|
|
|
if (unlikely(in_atomic())) {
|
|
pr_info("note: %s[%d] exited with preempt_count %d\n",
|
|
current->comm, task_pid_nr(current),
|
|
preempt_count());
|
|
preempt_count_set(PREEMPT_ENABLED);
|
|
}
|
|
|
|
profile_task_exit(tsk);
|
|
kcov_task_exit(tsk);
|
|
|
|
ptrace_event(PTRACE_EVENT_EXIT, code);
|
|
|
|
validate_creds_for_do_exit(tsk);
|
|
|
|
/*
|
|
* We're taking recursive faults here in do_exit. Safest is to just
|
|
* leave this task alone and wait for reboot.
|
|
*/
|
|
if (unlikely(tsk->flags & PF_EXITING)) {
|
|
pr_alert("Fixing recursive fault but reboot is needed!\n");
|
|
futex_exit_recursive(tsk);
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
|
schedule();
|
|
}
|
|
|
|
exit_signals(tsk); /* sets PF_EXITING */
|
|
|
|
/* sync mm's RSS info before statistics gathering */
|
|
if (tsk->mm)
|
|
sync_mm_rss(tsk->mm);
|
|
acct_update_integrals(tsk);
|
|
group_dead = atomic_dec_and_test(&tsk->signal->live);
|
|
if (group_dead) {
|
|
/*
|
|
* If the last thread of global init has exited, panic
|
|
* immediately to get a useable coredump.
|
|
*/
|
|
if (unlikely(is_global_init(tsk)))
|
|
panic("Attempted to kill init! exitcode=0x%08x\n",
|
|
tsk->signal->group_exit_code ?: (int)code);
|
|
|
|
#ifdef CONFIG_POSIX_TIMERS
|
|
hrtimer_cancel(&tsk->signal->real_timer);
|
|
exit_itimers(tsk->signal);
|
|
#endif
|
|
if (tsk->mm)
|
|
setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
|
|
}
|
|
acct_collect(code, group_dead);
|
|
if (group_dead)
|
|
tty_audit_exit();
|
|
audit_free(tsk);
|
|
|
|
tsk->exit_code = code;
|
|
taskstats_exit(tsk, group_dead);
|
|
|
|
exit_mm();
|
|
|
|
if (group_dead)
|
|
acct_process();
|
|
trace_sched_process_exit(tsk);
|
|
|
|
exit_sem(tsk);
|
|
exit_shm(tsk);
|
|
exit_files(tsk);
|
|
exit_fs(tsk);
|
|
if (group_dead)
|
|
disassociate_ctty(1);
|
|
exit_task_namespaces(tsk);
|
|
exit_task_work(tsk);
|
|
exit_thread(tsk);
|
|
exit_umh(tsk);
|
|
|
|
/*
|
|
* Flush inherited counters to the parent - before the parent
|
|
* gets woken up by child-exit notifications.
|
|
*
|
|
* because of cgroup mode, must be called before cgroup_exit()
|
|
*/
|
|
perf_event_exit_task(tsk);
|
|
|
|
sched_autogroup_exit_task(tsk);
|
|
cgroup_exit(tsk);
|
|
|
|
/*
|
|
* FIXME: do that only when needed, using sched_exit tracepoint
|
|
*/
|
|
flush_ptrace_hw_breakpoint(tsk);
|
|
|
|
exit_tasks_rcu_start();
|
|
exit_notify(tsk, group_dead);
|
|
proc_exit_connector(tsk);
|
|
mpol_put_task_policy(tsk);
|
|
#ifdef CONFIG_FUTEX
|
|
if (unlikely(current->pi_state_cache))
|
|
kfree(current->pi_state_cache);
|
|
#endif
|
|
/*
|
|
* Make sure we are holding no locks:
|
|
*/
|
|
debug_check_no_locks_held();
|
|
|
|
if (tsk->io_context)
|
|
exit_io_context(tsk);
|
|
|
|
if (tsk->splice_pipe)
|
|
free_pipe_info(tsk->splice_pipe);
|
|
|
|
if (tsk->task_frag.page)
|
|
put_page(tsk->task_frag.page);
|
|
|
|
validate_creds_for_do_exit(tsk);
|
|
|
|
check_stack_usage();
|
|
preempt_disable();
|
|
if (tsk->nr_dirtied)
|
|
__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
|
|
exit_rcu();
|
|
exit_tasks_rcu_finish();
|
|
|
|
lockdep_free_task(tsk);
|
|
do_task_dead();
|
|
}
|
|
EXPORT_SYMBOL_GPL(do_exit);
|
|
|
|
void __noreturn make_task_dead(int signr)
|
|
{
|
|
/*
|
|
* Take the task off the cpu after something catastrophic has
|
|
* happened.
|
|
*/
|
|
unsigned int limit;
|
|
|
|
/*
|
|
* Every time the system oopses, if the oops happens while a reference
|
|
* to an object was held, the reference leaks.
|
|
* If the oops doesn't also leak memory, repeated oopsing can cause
|
|
* reference counters to wrap around (if they're not using refcount_t).
|
|
* This means that repeated oopsing can make unexploitable-looking bugs
|
|
* exploitable through repeated oopsing.
|
|
* To make sure this can't happen, place an upper bound on how often the
|
|
* kernel may oops without panic().
|
|
*/
|
|
limit = READ_ONCE(oops_limit);
|
|
if (atomic_inc_return(&oops_count) >= limit && limit)
|
|
panic("Oopsed too often (kernel.oops_limit is %d)", limit);
|
|
|
|
do_exit(signr);
|
|
}
|
|
|
|
void complete_and_exit(struct completion *comp, long code)
|
|
{
|
|
if (comp)
|
|
complete(comp);
|
|
|
|
do_exit(code);
|
|
}
|
|
EXPORT_SYMBOL(complete_and_exit);
|
|
|
|
SYSCALL_DEFINE1(exit, int, error_code)
|
|
{
|
|
do_exit((error_code&0xff)<<8);
|
|
}
|
|
|
|
/*
|
|
* Take down every thread in the group. This is called by fatal signals
|
|
* as well as by sys_exit_group (below).
|
|
*/
|
|
void
|
|
do_group_exit(int exit_code)
|
|
{
|
|
struct signal_struct *sig = current->signal;
|
|
|
|
BUG_ON(exit_code & 0x80); /* core dumps don't get here */
|
|
|
|
if (signal_group_exit(sig))
|
|
exit_code = sig->group_exit_code;
|
|
else if (!thread_group_empty(current)) {
|
|
struct sighand_struct *const sighand = current->sighand;
|
|
|
|
spin_lock_irq(&sighand->siglock);
|
|
if (signal_group_exit(sig))
|
|
/* Another thread got here before we took the lock. */
|
|
exit_code = sig->group_exit_code;
|
|
else {
|
|
sig->group_exit_code = exit_code;
|
|
sig->flags = SIGNAL_GROUP_EXIT;
|
|
zap_other_threads(current);
|
|
}
|
|
spin_unlock_irq(&sighand->siglock);
|
|
}
|
|
|
|
do_exit(exit_code);
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
/*
|
|
* this kills every thread in the thread group. Note that any externally
|
|
* wait4()-ing process will get the correct exit code - even if this
|
|
* thread is not the thread group leader.
|
|
*/
|
|
SYSCALL_DEFINE1(exit_group, int, error_code)
|
|
{
|
|
do_group_exit((error_code & 0xff) << 8);
|
|
/* NOTREACHED */
|
|
return 0;
|
|
}
|
|
|
|
struct waitid_info {
|
|
pid_t pid;
|
|
uid_t uid;
|
|
int status;
|
|
int cause;
|
|
};
|
|
|
|
struct wait_opts {
|
|
enum pid_type wo_type;
|
|
int wo_flags;
|
|
struct pid *wo_pid;
|
|
|
|
struct waitid_info *wo_info;
|
|
int wo_stat;
|
|
struct rusage *wo_rusage;
|
|
|
|
wait_queue_entry_t child_wait;
|
|
int notask_error;
|
|
};
|
|
|
|
static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
|
|
{
|
|
return wo->wo_type == PIDTYPE_MAX ||
|
|
task_pid_type(p, wo->wo_type) == wo->wo_pid;
|
|
}
|
|
|
|
static int
|
|
eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p)
|
|
{
|
|
if (!eligible_pid(wo, p))
|
|
return 0;
|
|
|
|
/*
|
|
* Wait for all children (clone and not) if __WALL is set or
|
|
* if it is traced by us.
|
|
*/
|
|
if (ptrace || (wo->wo_flags & __WALL))
|
|
return 1;
|
|
|
|
/*
|
|
* Otherwise, wait for clone children *only* if __WCLONE is set;
|
|
* otherwise, wait for non-clone children *only*.
|
|
*
|
|
* Note: a "clone" child here is one that reports to its parent
|
|
* using a signal other than SIGCHLD, or a non-leader thread which
|
|
* we can only see if it is traced by us.
|
|
*/
|
|
if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold
|
|
* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
|
|
* the lock and this task is uninteresting. If we return nonzero, we have
|
|
* released the lock and the system call should return.
|
|
*/
|
|
static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
|
|
{
|
|
int state, status;
|
|
pid_t pid = task_pid_vnr(p);
|
|
uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
|
|
struct waitid_info *infop;
|
|
|
|
if (!likely(wo->wo_flags & WEXITED))
|
|
return 0;
|
|
|
|
if (unlikely(wo->wo_flags & WNOWAIT)) {
|
|
status = p->exit_code;
|
|
get_task_struct(p);
|
|
read_unlock(&tasklist_lock);
|
|
sched_annotate_sleep();
|
|
if (wo->wo_rusage)
|
|
getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
|
|
put_task_struct(p);
|
|
goto out_info;
|
|
}
|
|
/*
|
|
* Move the task's state to DEAD/TRACE, only one thread can do this.
|
|
*/
|
|
state = (ptrace_reparented(p) && thread_group_leader(p)) ?
|
|
EXIT_TRACE : EXIT_DEAD;
|
|
if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
|
|
return 0;
|
|
/*
|
|
* We own this thread, nobody else can reap it.
|
|
*/
|
|
read_unlock(&tasklist_lock);
|
|
sched_annotate_sleep();
|
|
|
|
/*
|
|
* Check thread_group_leader() to exclude the traced sub-threads.
|
|
*/
|
|
if (state == EXIT_DEAD && thread_group_leader(p)) {
|
|
struct signal_struct *sig = p->signal;
|
|
struct signal_struct *psig = current->signal;
|
|
unsigned long maxrss;
|
|
u64 tgutime, tgstime;
|
|
|
|
/*
|
|
* The resource counters for the group leader are in its
|
|
* own task_struct. Those for dead threads in the group
|
|
* are in its signal_struct, as are those for the child
|
|
* processes it has previously reaped. All these
|
|
* accumulate in the parent's signal_struct c* fields.
|
|
*
|
|
* We don't bother to take a lock here to protect these
|
|
* p->signal fields because the whole thread group is dead
|
|
* and nobody can change them.
|
|
*
|
|
* psig->stats_lock also protects us from our sub-theads
|
|
* which can reap other children at the same time. Until
|
|
* we change k_getrusage()-like users to rely on this lock
|
|
* we have to take ->siglock as well.
|
|
*
|
|
* We use thread_group_cputime_adjusted() to get times for
|
|
* the thread group, which consolidates times for all threads
|
|
* in the group including the group leader.
|
|
*/
|
|
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
|
|
spin_lock_irq(¤t->sighand->siglock);
|
|
write_seqlock(&psig->stats_lock);
|
|
psig->cutime += tgutime + sig->cutime;
|
|
psig->cstime += tgstime + sig->cstime;
|
|
psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
|
|
psig->cmin_flt +=
|
|
p->min_flt + sig->min_flt + sig->cmin_flt;
|
|
psig->cmaj_flt +=
|
|
p->maj_flt + sig->maj_flt + sig->cmaj_flt;
|
|
psig->cnvcsw +=
|
|
p->nvcsw + sig->nvcsw + sig->cnvcsw;
|
|
psig->cnivcsw +=
|
|
p->nivcsw + sig->nivcsw + sig->cnivcsw;
|
|
psig->cinblock +=
|
|
task_io_get_inblock(p) +
|
|
sig->inblock + sig->cinblock;
|
|
psig->coublock +=
|
|
task_io_get_oublock(p) +
|
|
sig->oublock + sig->coublock;
|
|
maxrss = max(sig->maxrss, sig->cmaxrss);
|
|
if (psig->cmaxrss < maxrss)
|
|
psig->cmaxrss = maxrss;
|
|
task_io_accounting_add(&psig->ioac, &p->ioac);
|
|
task_io_accounting_add(&psig->ioac, &sig->ioac);
|
|
write_sequnlock(&psig->stats_lock);
|
|
spin_unlock_irq(¤t->sighand->siglock);
|
|
}
|
|
|
|
if (wo->wo_rusage)
|
|
getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
|
|
status = (p->signal->flags & SIGNAL_GROUP_EXIT)
|
|
? p->signal->group_exit_code : p->exit_code;
|
|
wo->wo_stat = status;
|
|
|
|
if (state == EXIT_TRACE) {
|
|
write_lock_irq(&tasklist_lock);
|
|
/* We dropped tasklist, ptracer could die and untrace */
|
|
ptrace_unlink(p);
|
|
|
|
/* If parent wants a zombie, don't release it now */
|
|
state = EXIT_ZOMBIE;
|
|
if (do_notify_parent(p, p->exit_signal))
|
|
state = EXIT_DEAD;
|
|
p->exit_state = state;
|
|
write_unlock_irq(&tasklist_lock);
|
|
}
|
|
if (state == EXIT_DEAD)
|
|
release_task(p);
|
|
|
|
out_info:
|
|
infop = wo->wo_info;
|
|
if (infop) {
|
|
if ((status & 0x7f) == 0) {
|
|
infop->cause = CLD_EXITED;
|
|
infop->status = status >> 8;
|
|
} else {
|
|
infop->cause = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
|
|
infop->status = status & 0x7f;
|
|
}
|
|
infop->pid = pid;
|
|
infop->uid = uid;
|
|
}
|
|
|
|
return pid;
|
|
}
|
|
|
|
static int *task_stopped_code(struct task_struct *p, bool ptrace)
|
|
{
|
|
if (ptrace) {
|
|
if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING))
|
|
return &p->exit_code;
|
|
} else {
|
|
if (p->signal->flags & SIGNAL_STOP_STOPPED)
|
|
return &p->signal->group_exit_code;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
|
|
* @wo: wait options
|
|
* @ptrace: is the wait for ptrace
|
|
* @p: task to wait for
|
|
*
|
|
* Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
|
|
*
|
|
* CONTEXT:
|
|
* read_lock(&tasklist_lock), which is released if return value is
|
|
* non-zero. Also, grabs and releases @p->sighand->siglock.
|
|
*
|
|
* RETURNS:
|
|
* 0 if wait condition didn't exist and search for other wait conditions
|
|
* should continue. Non-zero return, -errno on failure and @p's pid on
|
|
* success, implies that tasklist_lock is released and wait condition
|
|
* search should terminate.
|
|
*/
|
|
static int wait_task_stopped(struct wait_opts *wo,
|
|
int ptrace, struct task_struct *p)
|
|
{
|
|
struct waitid_info *infop;
|
|
int exit_code, *p_code, why;
|
|
uid_t uid = 0; /* unneeded, required by compiler */
|
|
pid_t pid;
|
|
|
|
/*
|
|
* Traditionally we see ptrace'd stopped tasks regardless of options.
|
|
*/
|
|
if (!ptrace && !(wo->wo_flags & WUNTRACED))
|
|
return 0;
|
|
|
|
if (!task_stopped_code(p, ptrace))
|
|
return 0;
|
|
|
|
exit_code = 0;
|
|
spin_lock_irq(&p->sighand->siglock);
|
|
|
|
p_code = task_stopped_code(p, ptrace);
|
|
if (unlikely(!p_code))
|
|
goto unlock_sig;
|
|
|
|
exit_code = *p_code;
|
|
if (!exit_code)
|
|
goto unlock_sig;
|
|
|
|
if (!unlikely(wo->wo_flags & WNOWAIT))
|
|
*p_code = 0;
|
|
|
|
uid = from_kuid_munged(current_user_ns(), task_uid(p));
|
|
unlock_sig:
|
|
spin_unlock_irq(&p->sighand->siglock);
|
|
if (!exit_code)
|
|
return 0;
|
|
|
|
/*
|
|
* Now we are pretty sure this task is interesting.
|
|
* Make sure it doesn't get reaped out from under us while we
|
|
* give up the lock and then examine it below. We don't want to
|
|
* keep holding onto the tasklist_lock while we call getrusage and
|
|
* possibly take page faults for user memory.
|
|
*/
|
|
get_task_struct(p);
|
|
pid = task_pid_vnr(p);
|
|
why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
|
|
read_unlock(&tasklist_lock);
|
|
sched_annotate_sleep();
|
|
if (wo->wo_rusage)
|
|
getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
|
|
put_task_struct(p);
|
|
|
|
if (likely(!(wo->wo_flags & WNOWAIT)))
|
|
wo->wo_stat = (exit_code << 8) | 0x7f;
|
|
|
|
infop = wo->wo_info;
|
|
if (infop) {
|
|
infop->cause = why;
|
|
infop->status = exit_code;
|
|
infop->pid = pid;
|
|
infop->uid = uid;
|
|
}
|
|
return pid;
|
|
}
|
|
|
|
/*
|
|
* Handle do_wait work for one task in a live, non-stopped state.
|
|
* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
|
|
* the lock and this task is uninteresting. If we return nonzero, we have
|
|
* released the lock and the system call should return.
|
|
*/
|
|
static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
|
|
{
|
|
struct waitid_info *infop;
|
|
pid_t pid;
|
|
uid_t uid;
|
|
|
|
if (!unlikely(wo->wo_flags & WCONTINUED))
|
|
return 0;
|
|
|
|
if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
|
|
return 0;
|
|
|
|
spin_lock_irq(&p->sighand->siglock);
|
|
/* Re-check with the lock held. */
|
|
if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
|
|
spin_unlock_irq(&p->sighand->siglock);
|
|
return 0;
|
|
}
|
|
if (!unlikely(wo->wo_flags & WNOWAIT))
|
|
p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
|
|
uid = from_kuid_munged(current_user_ns(), task_uid(p));
|
|
spin_unlock_irq(&p->sighand->siglock);
|
|
|
|
pid = task_pid_vnr(p);
|
|
get_task_struct(p);
|
|
read_unlock(&tasklist_lock);
|
|
sched_annotate_sleep();
|
|
if (wo->wo_rusage)
|
|
getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
|
|
put_task_struct(p);
|
|
|
|
infop = wo->wo_info;
|
|
if (!infop) {
|
|
wo->wo_stat = 0xffff;
|
|
} else {
|
|
infop->cause = CLD_CONTINUED;
|
|
infop->pid = pid;
|
|
infop->uid = uid;
|
|
infop->status = SIGCONT;
|
|
}
|
|
return pid;
|
|
}
|
|
|
|
/*
|
|
* Consider @p for a wait by @parent.
|
|
*
|
|
* -ECHILD should be in ->notask_error before the first call.
|
|
* Returns nonzero for a final return, when we have unlocked tasklist_lock.
|
|
* Returns zero if the search for a child should continue;
|
|
* then ->notask_error is 0 if @p is an eligible child,
|
|
* or still -ECHILD.
|
|
*/
|
|
static int wait_consider_task(struct wait_opts *wo, int ptrace,
|
|
struct task_struct *p)
|
|
{
|
|
/*
|
|
* We can race with wait_task_zombie() from another thread.
|
|
* Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition
|
|
* can't confuse the checks below.
|
|
*/
|
|
int exit_state = READ_ONCE(p->exit_state);
|
|
int ret;
|
|
|
|
if (unlikely(exit_state == EXIT_DEAD))
|
|
return 0;
|
|
|
|
ret = eligible_child(wo, ptrace, p);
|
|
if (!ret)
|
|
return ret;
|
|
|
|
if (unlikely(exit_state == EXIT_TRACE)) {
|
|
/*
|
|
* ptrace == 0 means we are the natural parent. In this case
|
|
* we should clear notask_error, debugger will notify us.
|
|
*/
|
|
if (likely(!ptrace))
|
|
wo->notask_error = 0;
|
|
return 0;
|
|
}
|
|
|
|
if (likely(!ptrace) && unlikely(p->ptrace)) {
|
|
/*
|
|
* If it is traced by its real parent's group, just pretend
|
|
* the caller is ptrace_do_wait() and reap this child if it
|
|
* is zombie.
|
|
*
|
|
* This also hides group stop state from real parent; otherwise
|
|
* a single stop can be reported twice as group and ptrace stop.
|
|
* If a ptracer wants to distinguish these two events for its
|
|
* own children it should create a separate process which takes
|
|
* the role of real parent.
|
|
*/
|
|
if (!ptrace_reparented(p))
|
|
ptrace = 1;
|
|
}
|
|
|
|
/* slay zombie? */
|
|
if (exit_state == EXIT_ZOMBIE) {
|
|
/* we don't reap group leaders with subthreads */
|
|
if (!delay_group_leader(p)) {
|
|
/*
|
|
* A zombie ptracee is only visible to its ptracer.
|
|
* Notification and reaping will be cascaded to the
|
|
* real parent when the ptracer detaches.
|
|
*/
|
|
if (unlikely(ptrace) || likely(!p->ptrace))
|
|
return wait_task_zombie(wo, p);
|
|
}
|
|
|
|
/*
|
|
* Allow access to stopped/continued state via zombie by
|
|
* falling through. Clearing of notask_error is complex.
|
|
*
|
|
* When !@ptrace:
|
|
*
|
|
* If WEXITED is set, notask_error should naturally be
|
|
* cleared. If not, subset of WSTOPPED|WCONTINUED is set,
|
|
* so, if there are live subthreads, there are events to
|
|
* wait for. If all subthreads are dead, it's still safe
|
|
* to clear - this function will be called again in finite
|
|
* amount time once all the subthreads are released and
|
|
* will then return without clearing.
|
|
*
|
|
* When @ptrace:
|
|
*
|
|
* Stopped state is per-task and thus can't change once the
|
|
* target task dies. Only continued and exited can happen.
|
|
* Clear notask_error if WCONTINUED | WEXITED.
|
|
*/
|
|
if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
|
|
wo->notask_error = 0;
|
|
} else {
|
|
/*
|
|
* @p is alive and it's gonna stop, continue or exit, so
|
|
* there always is something to wait for.
|
|
*/
|
|
wo->notask_error = 0;
|
|
}
|
|
|
|
/*
|
|
* Wait for stopped. Depending on @ptrace, different stopped state
|
|
* is used and the two don't interact with each other.
|
|
*/
|
|
ret = wait_task_stopped(wo, ptrace, p);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/*
|
|
* Wait for continued. There's only one continued state and the
|
|
* ptracer can consume it which can confuse the real parent. Don't
|
|
* use WCONTINUED from ptracer. You don't need or want it.
|
|
*/
|
|
return wait_task_continued(wo, p);
|
|
}
|
|
|
|
/*
|
|
* Do the work of do_wait() for one thread in the group, @tsk.
|
|
*
|
|
* -ECHILD should be in ->notask_error before the first call.
|
|
* Returns nonzero for a final return, when we have unlocked tasklist_lock.
|
|
* Returns zero if the search for a child should continue; then
|
|
* ->notask_error is 0 if there were any eligible children,
|
|
* or still -ECHILD.
|
|
*/
|
|
static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
|
|
{
|
|
struct task_struct *p;
|
|
|
|
list_for_each_entry(p, &tsk->children, sibling) {
|
|
int ret = wait_consider_task(wo, 0, p);
|
|
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
|
|
{
|
|
struct task_struct *p;
|
|
|
|
list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
|
|
int ret = wait_consider_task(wo, 1, p);
|
|
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
|
|
int sync, void *key)
|
|
{
|
|
struct wait_opts *wo = container_of(wait, struct wait_opts,
|
|
child_wait);
|
|
struct task_struct *p = key;
|
|
|
|
if (!eligible_pid(wo, p))
|
|
return 0;
|
|
|
|
if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
|
|
return 0;
|
|
|
|
return default_wake_function(wait, mode, sync, key);
|
|
}
|
|
|
|
void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
|
|
{
|
|
__wake_up_sync_key(&parent->signal->wait_chldexit,
|
|
TASK_INTERRUPTIBLE, 1, p);
|
|
}
|
|
|
|
static long do_wait(struct wait_opts *wo)
|
|
{
|
|
struct task_struct *tsk;
|
|
int retval;
|
|
|
|
trace_sched_process_wait(wo->wo_pid);
|
|
|
|
init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
|
|
wo->child_wait.private = current;
|
|
add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait);
|
|
repeat:
|
|
/*
|
|
* If there is nothing that can match our criteria, just get out.
|
|
* We will clear ->notask_error to zero if we see any child that
|
|
* might later match our criteria, even if we are not able to reap
|
|
* it yet.
|
|
*/
|
|
wo->notask_error = -ECHILD;
|
|
if ((wo->wo_type < PIDTYPE_MAX) &&
|
|
(!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
|
|
goto notask;
|
|
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
read_lock(&tasklist_lock);
|
|
tsk = current;
|
|
do {
|
|
retval = do_wait_thread(wo, tsk);
|
|
if (retval)
|
|
goto end;
|
|
|
|
retval = ptrace_do_wait(wo, tsk);
|
|
if (retval)
|
|
goto end;
|
|
|
|
if (wo->wo_flags & __WNOTHREAD)
|
|
break;
|
|
} while_each_thread(current, tsk);
|
|
read_unlock(&tasklist_lock);
|
|
|
|
notask:
|
|
retval = wo->notask_error;
|
|
if (!retval && !(wo->wo_flags & WNOHANG)) {
|
|
retval = -ERESTARTSYS;
|
|
if (!signal_pending(current)) {
|
|
schedule();
|
|
goto repeat;
|
|
}
|
|
}
|
|
end:
|
|
__set_current_state(TASK_RUNNING);
|
|
remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait);
|
|
return retval;
|
|
}
|
|
|
|
static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
|
|
int options, struct rusage *ru)
|
|
{
|
|
struct wait_opts wo;
|
|
struct pid *pid = NULL;
|
|
enum pid_type type;
|
|
long ret;
|
|
|
|
if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
|
|
__WNOTHREAD|__WCLONE|__WALL))
|
|
return -EINVAL;
|
|
if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
|
|
return -EINVAL;
|
|
|
|
switch (which) {
|
|
case P_ALL:
|
|
type = PIDTYPE_MAX;
|
|
break;
|
|
case P_PID:
|
|
type = PIDTYPE_PID;
|
|
if (upid <= 0)
|
|
return -EINVAL;
|
|
|
|
pid = find_get_pid(upid);
|
|
break;
|
|
case P_PGID:
|
|
type = PIDTYPE_PGID;
|
|
if (upid < 0)
|
|
return -EINVAL;
|
|
|
|
if (upid)
|
|
pid = find_get_pid(upid);
|
|
else
|
|
pid = get_task_pid(current, PIDTYPE_PGID);
|
|
break;
|
|
case P_PIDFD:
|
|
type = PIDTYPE_PID;
|
|
if (upid < 0)
|
|
return -EINVAL;
|
|
|
|
pid = pidfd_get_pid(upid);
|
|
if (IS_ERR(pid))
|
|
return PTR_ERR(pid);
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
wo.wo_type = type;
|
|
wo.wo_pid = pid;
|
|
wo.wo_flags = options;
|
|
wo.wo_info = infop;
|
|
wo.wo_rusage = ru;
|
|
ret = do_wait(&wo);
|
|
|
|
put_pid(pid);
|
|
return ret;
|
|
}
|
|
|
|
SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
|
|
infop, int, options, struct rusage __user *, ru)
|
|
{
|
|
struct rusage r;
|
|
struct waitid_info info = {.status = 0};
|
|
long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
|
|
int signo = 0;
|
|
|
|
if (err > 0) {
|
|
signo = SIGCHLD;
|
|
err = 0;
|
|
if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
|
|
return -EFAULT;
|
|
}
|
|
if (!infop)
|
|
return err;
|
|
|
|
if (!user_access_begin(infop, sizeof(*infop)))
|
|
return -EFAULT;
|
|
|
|
unsafe_put_user(signo, &infop->si_signo, Efault);
|
|
unsafe_put_user(0, &infop->si_errno, Efault);
|
|
unsafe_put_user(info.cause, &infop->si_code, Efault);
|
|
unsafe_put_user(info.pid, &infop->si_pid, Efault);
|
|
unsafe_put_user(info.uid, &infop->si_uid, Efault);
|
|
unsafe_put_user(info.status, &infop->si_status, Efault);
|
|
user_access_end();
|
|
return err;
|
|
Efault:
|
|
user_access_end();
|
|
return -EFAULT;
|
|
}
|
|
|
|
long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
|
|
struct rusage *ru)
|
|
{
|
|
struct wait_opts wo;
|
|
struct pid *pid = NULL;
|
|
enum pid_type type;
|
|
long ret;
|
|
|
|
if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
|
|
__WNOTHREAD|__WCLONE|__WALL))
|
|
return -EINVAL;
|
|
|
|
/* -INT_MIN is not defined */
|
|
if (upid == INT_MIN)
|
|
return -ESRCH;
|
|
|
|
if (upid == -1)
|
|
type = PIDTYPE_MAX;
|
|
else if (upid < 0) {
|
|
type = PIDTYPE_PGID;
|
|
pid = find_get_pid(-upid);
|
|
} else if (upid == 0) {
|
|
type = PIDTYPE_PGID;
|
|
pid = get_task_pid(current, PIDTYPE_PGID);
|
|
} else /* upid > 0 */ {
|
|
type = PIDTYPE_PID;
|
|
pid = find_get_pid(upid);
|
|
}
|
|
|
|
wo.wo_type = type;
|
|
wo.wo_pid = pid;
|
|
wo.wo_flags = options | WEXITED;
|
|
wo.wo_info = NULL;
|
|
wo.wo_stat = 0;
|
|
wo.wo_rusage = ru;
|
|
ret = do_wait(&wo);
|
|
put_pid(pid);
|
|
if (ret > 0 && stat_addr && put_user(wo.wo_stat, stat_addr))
|
|
ret = -EFAULT;
|
|
|
|
return ret;
|
|
}
|
|
|
|
SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
|
|
int, options, struct rusage __user *, ru)
|
|
{
|
|
struct rusage r;
|
|
long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL);
|
|
|
|
if (err > 0) {
|
|
if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
|
|
return -EFAULT;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
#ifdef __ARCH_WANT_SYS_WAITPID
|
|
|
|
/*
|
|
* sys_waitpid() remains for compatibility. waitpid() should be
|
|
* implemented by calling sys_wait4() from libc.a.
|
|
*/
|
|
SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
|
|
{
|
|
return kernel_wait4(pid, stat_addr, options, NULL);
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
COMPAT_SYSCALL_DEFINE4(wait4,
|
|
compat_pid_t, pid,
|
|
compat_uint_t __user *, stat_addr,
|
|
int, options,
|
|
struct compat_rusage __user *, ru)
|
|
{
|
|
struct rusage r;
|
|
long err = kernel_wait4(pid, stat_addr, options, ru ? &r : NULL);
|
|
if (err > 0) {
|
|
if (ru && put_compat_rusage(&r, ru))
|
|
return -EFAULT;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
COMPAT_SYSCALL_DEFINE5(waitid,
|
|
int, which, compat_pid_t, pid,
|
|
struct compat_siginfo __user *, infop, int, options,
|
|
struct compat_rusage __user *, uru)
|
|
{
|
|
struct rusage ru;
|
|
struct waitid_info info = {.status = 0};
|
|
long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL);
|
|
int signo = 0;
|
|
if (err > 0) {
|
|
signo = SIGCHLD;
|
|
err = 0;
|
|
if (uru) {
|
|
/* kernel_waitid() overwrites everything in ru */
|
|
if (COMPAT_USE_64BIT_TIME)
|
|
err = copy_to_user(uru, &ru, sizeof(ru));
|
|
else
|
|
err = put_compat_rusage(&ru, uru);
|
|
if (err)
|
|
return -EFAULT;
|
|
}
|
|
}
|
|
|
|
if (!infop)
|
|
return err;
|
|
|
|
if (!user_access_begin(infop, sizeof(*infop)))
|
|
return -EFAULT;
|
|
|
|
unsafe_put_user(signo, &infop->si_signo, Efault);
|
|
unsafe_put_user(0, &infop->si_errno, Efault);
|
|
unsafe_put_user(info.cause, &infop->si_code, Efault);
|
|
unsafe_put_user(info.pid, &infop->si_pid, Efault);
|
|
unsafe_put_user(info.uid, &infop->si_uid, Efault);
|
|
unsafe_put_user(info.status, &infop->si_status, Efault);
|
|
user_access_end();
|
|
return err;
|
|
Efault:
|
|
user_access_end();
|
|
return -EFAULT;
|
|
}
|
|
#endif
|
|
|
|
__weak void abort(void)
|
|
{
|
|
BUG();
|
|
|
|
/* if that doesn't kill us, halt */
|
|
panic("Oops failed to kill thread");
|
|
}
|
|
EXPORT_SYMBOL(abort);
|