From b90ca32531bf913845a5104689345f9f461f97bb Mon Sep 17 00:00:00 2001 From: yu kuai Date: Mon, 1 Jun 2020 20:38:56 +0800 Subject: [PATCH 001/174] block/bio-integrity: don't free 'buf' if bio_integrity_add_page() failed commit a75ca9303175d36af93c0937dd9b1a6422908b8d upstream. commit e7bf90e5afe3 ("block/bio-integrity: fix a memory leak bug") added a kfree() for 'buf' if bio_integrity_add_page() returns '0'. However, the object will be freed in bio_integrity_free() since 'bio->bi_opf' and 'bio->bi_integrity' were set previousy in bio_integrity_alloc(). Fixes: commit e7bf90e5afe3 ("block/bio-integrity: fix a memory leak bug") Signed-off-by: yu kuai Reviewed-by: Ming Lei Reviewed-by: Bob Liu Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- block/bio-integrity.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index bf62c25cde8f..ae07dd78e951 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -278,7 +278,6 @@ bool bio_integrity_prep(struct bio *bio) if (ret == 0) { printk(KERN_ERR "could not attach integrity payload\n"); - kfree(buf); status = BLK_STS_RESOURCE; goto err_end_io; } From ce06fcb6a66d22b080385768113ee28bf3593984 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 26 Jun 2020 19:17:29 +0300 Subject: [PATCH 002/174] enetc: Fix tx rings bitmap iteration range, irq handling [ Upstream commit 0574e2000fc3103cbc69ba82ec1175ce171fdf5e ] The rings bitmap of an interrupt vector encodes which of the device's rings were assigned to that interrupt vector. Hence the iteration range of the tx rings bitmap (for_each_set_bit()) should be the total number of Tx rings of that netdevice instead of the number of rings assigned to the interrupt vector. Since there are 2 cores, and one interrupt vector for each core, the number of rings asigned to an interrupt vector is half the number of available rings. The impact of this error is that the upper half of the tx rings could still generate interrupts during napi polling. Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index b6ff89307409..4ef4d41b0d8d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -254,7 +254,7 @@ static irqreturn_t enetc_msix(int irq, void *data) /* disable interrupts */ enetc_wr_reg(v->rbier, 0); - for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings) + for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0); napi_schedule_irqoff(&v->napi); @@ -290,7 +290,7 @@ static int enetc_poll(struct napi_struct *napi, int budget) /* enable interrupts */ enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE); - for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings) + for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), ENETC_TBIER_TXTIE); From f060107ccc97619074faa6ee4cfe43664abcbd82 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 18 Jun 2020 12:13:22 +0200 Subject: [PATCH 003/174] geneve: allow changing DF behavior after creation [ Upstream commit 56c09de347e40804fc8dad155272fb9609e0a97b ] Currently, trying to change the DF parameter of a geneve device does nothing: # ip -d link show geneve1 14: geneve1: link/ether geneve id 1 remote 10.0.0.1 ttl auto df set dstport 6081 # ip link set geneve1 type geneve id 1 df unset # ip -d link show geneve1 14: geneve1: link/ether geneve id 1 remote 10.0.0.1 ttl auto df set dstport 6081 We just need to update the value in geneve_changelink. Fixes: a025fb5f49ad ("geneve: Allow configuration of DF behaviour") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index cac75c7d1d01..19d9d78a6df2 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1649,6 +1649,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], geneve->collect_md = metadata; geneve->use_udp6_rx_checksums = use_udp6_rx_checksums; geneve->ttl_inherit = ttl_inherit; + geneve->df = df; geneve_unquiesce(geneve, gs4, gs6); return 0; From 009b3e294a901cb0286ecda179d6e6f0d0ffaa65 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 18 Jun 2020 10:43:46 -0500 Subject: [PATCH 004/174] ibmveth: Fix max MTU limit [ Upstream commit 5948378b26d89f8aa5eac37629dbd0616ce8d7a7 ] The max MTU limit defined for ibmveth is not accounting for virtual ethernet buffer overhead, which is twenty-two additional bytes set aside for the ethernet header and eight additional bytes of an opaque handle reserved for use by the hypervisor. Update the max MTU to reflect this overhead. Fixes: d894be57ca92 ("ethernet: use net core MTU range checking in more drivers") Fixes: 110447f8269a ("ethernet: fix min/max MTU typos") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmveth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index c5be4ebd8437..aa32a5b04112 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1682,7 +1682,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) } netdev->min_mtu = IBMVETH_MIN_MTU; - netdev->max_mtu = ETH_MAX_MTU; + netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH; memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); From fa0d7e09da1df4aa14555e9d79ff14e331c6a9ea Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 11 Jun 2020 15:57:50 +0800 Subject: [PATCH 005/174] mld: fix memory leak in ipv6_mc_destroy_dev() [ Upstream commit ea2fce88d2fd678ed9d45354ff49b73f1d5615dd ] Commit a84d01647989 ("mld: fix memory leak in mld_del_delrec()") fixed the memory leak of MLD, but missing the ipv6_mc_destroy_dev() path, in which mca_sources are leaked after ma_put(). Using ip6_mc_clear_src() to take care of the missing free. BUG: memory leak unreferenced object 0xffff8881113d3180 (size 64): comm "syz-executor071", pid 389, jiffies 4294887985 (age 17.943s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 ff 02 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 ................ backtrace: [<000000002cbc483c>] kmalloc include/linux/slab.h:555 [inline] [<000000002cbc483c>] kzalloc include/linux/slab.h:669 [inline] [<000000002cbc483c>] ip6_mc_add1_src net/ipv6/mcast.c:2237 [inline] [<000000002cbc483c>] ip6_mc_add_src+0x7f5/0xbb0 net/ipv6/mcast.c:2357 [<0000000058b8b1ff>] ip6_mc_source+0xe0c/0x1530 net/ipv6/mcast.c:449 [<000000000bfc4fb5>] do_ipv6_setsockopt.isra.12+0x1b2c/0x3b30 net/ipv6/ipv6_sockglue.c:754 [<00000000e4e7a722>] ipv6_setsockopt+0xda/0x150 net/ipv6/ipv6_sockglue.c:950 [<0000000029260d9a>] rawv6_setsockopt+0x45/0x100 net/ipv6/raw.c:1081 [<000000005c1b46f9>] __sys_setsockopt+0x131/0x210 net/socket.c:2132 [<000000008491f7db>] __do_sys_setsockopt net/socket.c:2148 [inline] [<000000008491f7db>] __se_sys_setsockopt net/socket.c:2145 [inline] [<000000008491f7db>] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2145 [<00000000c7bc11c5>] do_syscall_64+0xa1/0x530 arch/x86/entry/common.c:295 [<000000005fb7a3f3>] entry_SYSCALL_64_after_hwframe+0x49/0xb3 Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when set link down") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index eaa4c2cc2fbb..c875c9b6edbe 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2618,6 +2618,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) idev->mc_list = i->next; write_unlock_bh(&idev->lock); + ip6_mc_clear_src(i); ma_put(i); write_lock_bh(&idev->lock); } From 57a976e676e1caff0614089fd9a61b3c111bd3c6 Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Sun, 14 Jun 2020 09:19:17 +0200 Subject: [PATCH 006/174] mvpp2: ethtool rxtx stats fix [ Upstream commit cc970925feb9a38c2f0d34305518e00a3084ce85 ] The ethtool rx and tx queue statistics are reporting wrong values. Fix reading out the correct ones. Signed-off-by: Sven Auhagen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index cf5d447af7db..0f136f1af5d1 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1541,7 +1541,7 @@ static void mvpp2_read_stats(struct mvpp2_port *port) for (q = 0; q < port->ntxqs; q++) for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_txq_regs); i++) *pstats++ += mvpp2_read_index(port->priv, - MVPP22_CTRS_TX_CTR(port->id, i), + MVPP22_CTRS_TX_CTR(port->id, q), mvpp2_ethtool_txq_regs[i].offset); /* Rxqs are numbered from 0 from the user standpoint, but not from the @@ -1550,7 +1550,7 @@ static void mvpp2_read_stats(struct mvpp2_port *port) for (q = 0; q < port->nrxqs; q++) for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_rxq_regs); i++) *pstats++ += mvpp2_read_index(port->priv, - port->first_rxq + i, + port->first_rxq + q, mvpp2_ethtool_rxq_regs[i].offset); } From f32325b100eaf9d13063b193ca326ba4adf8310f Mon Sep 17 00:00:00 2001 From: Thomas Martitz Date: Thu, 25 Jun 2020 14:26:03 +0200 Subject: [PATCH 007/174] net: bridge: enfore alignment for ethernet address [ Upstream commit db7202dec92e6caa2706c21d6fc359af318bde2e ] The eth_addr member is passed to ether_addr functions that require 2-byte alignment, therefore the member must be properly aligned to avoid unaligned accesses. The problem is in place since the initial merge of multicast to unicast: commit 6db6f0eae6052b70885562e1733896647ec1d807 bridge: multicast to unicast Fixes: 6db6f0eae605 ("bridge: multicast to unicast") Cc: Roopa Prabhu Cc: Nikolay Aleksandrov Cc: David S. Miller Cc: Jakub Kicinski Cc: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: Thomas Martitz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index ce2ab14ee605..cecb4223440e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -208,8 +208,8 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct br_ip addr; + unsigned char eth_addr[ETH_ALEN] __aligned(2); unsigned char flags; - unsigned char eth_addr[ETH_ALEN]; }; struct net_bridge_mdb_entry { From 9f217d6dd79663efeb89d307ec3edf77116c6577 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 16 Jun 2020 15:52:05 +0000 Subject: [PATCH 008/174] net: core: reduce recursion limit value [ Upstream commit fb7861d14c8d7edac65b2fcb6e8031cb138457b2 ] In the current code, ->ndo_start_xmit() can be executed recursively only 10 times because of stack memory. But, in the case of the vxlan, 10 recursion limit value results in a stack overflow. In the current code, the nested interface is limited by 8 depth. There is no critical reason that the recursion limitation value should be 10. So, it would be good to be the same value with the limitation value of nesting interface depth. Test commands: ip link add vxlan10 type vxlan vni 10 dstport 4789 srcport 4789 4789 ip link set vxlan10 up ip a a 192.168.10.1/24 dev vxlan10 ip n a 192.168.10.2 dev vxlan10 lladdr fc:22:33:44:55:66 nud permanent for i in {9..0} do let A=$i+1 ip link add vxlan$i type vxlan vni $i dstport 4789 srcport 4789 4789 ip link set vxlan$i up ip a a 192.168.$i.1/24 dev vxlan$i ip n a 192.168.$i.2 dev vxlan$i lladdr fc:22:33:44:55:66 nud permanent bridge fdb add fc:22:33:44:55:66 dev vxlan$A dst 192.168.$i.2 self done hping3 192.168.10.2 -2 -d 60000 Splat looks like: [ 103.814237][ T1127] ============================================================================= [ 103.871955][ T1127] BUG kmalloc-2k (Tainted: G B ): Padding overwritten. 0x00000000897a2e4f-0x000 [ 103.873187][ T1127] ----------------------------------------------------------------------------- [ 103.873187][ T1127] [ 103.874252][ T1127] INFO: Slab 0x000000005cccc724 objects=5 used=5 fp=0x0000000000000000 flags=0x10000000001020 [ 103.881323][ T1127] CPU: 3 PID: 1127 Comm: hping3 Tainted: G B 5.7.0+ #575 [ 103.882131][ T1127] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 103.883006][ T1127] Call Trace: [ 103.883324][ T1127] dump_stack+0x96/0xdb [ 103.883716][ T1127] slab_err+0xad/0xd0 [ 103.884106][ T1127] ? _raw_spin_unlock+0x1f/0x30 [ 103.884620][ T1127] ? get_partial_node.isra.78+0x140/0x360 [ 103.885214][ T1127] slab_pad_check.part.53+0xf7/0x160 [ 103.885769][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.886316][ T1127] check_slab+0x97/0xb0 [ 103.886763][ T1127] alloc_debug_processing+0x84/0x1a0 [ 103.887308][ T1127] ___slab_alloc+0x5a5/0x630 [ 103.887765][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.888265][ T1127] ? lock_downgrade+0x730/0x730 [ 103.888762][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.889244][ T1127] ? __slab_alloc+0x3e/0x80 [ 103.889675][ T1127] __slab_alloc+0x3e/0x80 [ 103.890108][ T1127] __kmalloc_node_track_caller+0xc7/0x420 [ ... ] Fixes: 11a766ce915f ("net: Increase xmit RECURSION_LIMIT to 10.") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b580a35f50ea..ec3081ab04c0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3043,7 +3043,7 @@ static inline int dev_recursion_level(void) return this_cpu_read(softnet_data.xmit.recursion); } -#define XMIT_RECURSION_LIMIT 10 +#define XMIT_RECURSION_LIMIT 8 static inline bool dev_xmit_recursion(void) { return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > From 9e693934cd59b67ba10014ae7a72cec9c0ea659c Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 22 Jun 2020 23:26:04 +0300 Subject: [PATCH 009/174] net: Do not clear the sock TX queue in sk_set_socket() [ Upstream commit 41b14fb8724d5a4b382a63cb4a1a61880347ccb8 ] Clearing the sock TX queue in sk_set_socket() might cause unexpected out-of-order transmit when called from sock_orphan(), as outstanding packets can pick a different TX queue and bypass the ones already queued. This is undesired in general. More specifically, it breaks the in-order scheduling property guarantee for device-offloaded TLS sockets. Remove the call to sk_tx_queue_clear() in sk_set_socket(), and add it explicitly only where needed. Fixes: e022f0b4a03f ("net: Introduce sk_tx_queue_mapping") Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 1 - net/core/sock.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 6c5a3809483e..8263bbf756a2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1803,7 +1803,6 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { - sk_tx_queue_clear(sk); sk->sk_socket = sock; } diff --git a/net/core/sock.c b/net/core/sock.c index 0adf7a9e5a90..0a2aef870d00 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1679,6 +1679,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); + sk_tx_queue_clear(sk); } return sk; @@ -1895,6 +1896,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ sk_refcnt_debug_inc(newsk); sk_set_socket(newsk, NULL); + sk_tx_queue_clear(newsk); RCU_INIT_POINTER(newsk->sk_wq, NULL); if (newsk->sk_prot->sockets_allocated) From 742f2358b3248832bf46745e5bd7a06dfa3aae53 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 16 Jun 2020 09:39:21 +0000 Subject: [PATCH 010/174] net: fix memleak in register_netdevice() [ Upstream commit 814152a89ed52c722ab92e9fbabcac3cb8a39245 ] I got a memleak report when doing some fuzz test: unreferenced object 0xffff888112584000 (size 13599): comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) hex dump (first 32 bytes): 74 61 70 30 00 00 00 00 00 00 00 00 00 00 00 00 tap0............ 00 ee d9 19 81 88 ff ff 00 00 00 00 00 00 00 00 ................ backtrace: [<000000002f60ba65>] __kmalloc_node+0x309/0x3a0 [<0000000075b211ec>] kvmalloc_node+0x7f/0xc0 [<00000000d3a97396>] alloc_netdev_mqs+0x76/0xfc0 [<00000000609c3655>] __tun_chr_ioctl+0x1456/0x3d70 [<000000001127ca24>] ksys_ioctl+0xe5/0x130 [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 [<00000000e1023498>] do_syscall_64+0x56/0xa0 [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff888111845cc0 (size 8): comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) hex dump (first 8 bytes): 74 61 70 30 00 88 ff ff tap0.... backtrace: [<000000004c159777>] kstrdup+0x35/0x70 [<00000000d8b496ad>] kstrdup_const+0x3d/0x50 [<00000000494e884a>] kvasprintf_const+0xf1/0x180 [<0000000097880a2b>] kobject_set_name_vargs+0x56/0x140 [<000000008fbdfc7b>] dev_set_name+0xab/0xe0 [<000000005b99e3b4>] netdev_register_kobject+0xc0/0x390 [<00000000602704fe>] register_netdevice+0xb61/0x1250 [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70 [<000000001127ca24>] ksys_ioctl+0xe5/0x130 [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 [<00000000e1023498>] do_syscall_64+0x56/0xa0 [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff88811886d800 (size 512): comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff c0 66 3d a3 ff ff ff ff .........f=..... backtrace: [<0000000050315800>] device_add+0x61e/0x1950 [<0000000021008dfb>] netdev_register_kobject+0x17e/0x390 [<00000000602704fe>] register_netdevice+0xb61/0x1250 [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70 [<000000001127ca24>] ksys_ioctl+0xe5/0x130 [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 [<00000000e1023498>] do_syscall_64+0x56/0xa0 [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 If call_netdevice_notifiers() failed, then rollback_registered() calls netdev_unregister_kobject() which holds the kobject. The reference cannot be put because the netdev won't be add to todo list, so it will leads a memleak, we need put the reference to avoid memleak. Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 204d87e7c9b1..5b7d9c2b821d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9114,6 +9114,13 @@ int register_netdevice(struct net_device *dev) rcu_barrier(); dev->reg_state = NETREG_UNREGISTERED; + /* We should put the kobject that hold in + * netdev_unregister_kobject(), otherwise + * the net device cannot be freed when + * driver calls free_netdev(), because the + * kobject is being hold. + */ + kobject_put(&dev->dev.kobj); } /* * Prevent userspace races by waiting until the network From 97a1d2aa6cdd2a1e9bda8b25be09baf8f70de2ed Mon Sep 17 00:00:00 2001 From: guodeqing Date: Wed, 17 Jun 2020 10:07:16 +0800 Subject: [PATCH 011/174] net: Fix the arp error in some cases [ Upstream commit 5eea3a63ff4aba6a26002e657a6d21934b7e2b96 ] ie., $ ifconfig eth0 6.6.6.6 netmask 255.255.255.0 $ ip rule add from 6.6.6.6 table 6666 $ ip route add 9.9.9.9 via 6.6.6.6 $ ping -I 6.6.6.6 9.9.9.9 PING 9.9.9.9 (9.9.9.9) from 6.6.6.6 : 56(84) bytes of data. 3 packets transmitted, 0 received, 100% packet loss, time 2079ms $ arp Address HWtype HWaddress Flags Mask Iface 6.6.6.6 (incomplete) eth0 The arp request address is error, this is because fib_table_lookup in fib_check_nh lookup the destnation 9.9.9.9 nexthop, the scope of the fib result is RT_SCOPE_LINK,the correct scope is RT_SCOPE_HOST. Here I add a check of whether this is RT_TABLE_MAIN to solve this problem. Fixes: 3bfd847203c6 ("net: Use passed in table for nexthop lookups") Signed-off-by: guodeqing Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 01588eef0cee..b1b3220917ca 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1100,7 +1100,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, if (fl4.flowi4_scope < RT_SCOPE_LINK) fl4.flowi4_scope = RT_SCOPE_LINK; - if (table) + if (table && table != RT_TABLE_MAIN) tbl = fib_get_table(net, table); if (tbl) From 67571b1ab2963dca6a031a08b785a1bc7c214433 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 22:23:25 -0700 Subject: [PATCH 012/174] net: increment xmit_recursion level in dev_direct_xmit() [ Upstream commit 0ad6f6e767ec2f613418cbc7ebe5ec4c35af540c ] Back in commit f60e5990d9c1 ("ipv6: protect skb->sk accesses from recursive dereference inside the stack") Hannes added code so that IPv6 stack would not trust skb->sk for typical cases where packet goes through 'standard' xmit path (__dev_queue_xmit()) Alas af_packet had a dev_direct_xmit() path that was not dealing yet with xmit_recursion level. Also change sk_mc_loop() to dump a stack once only. Without this patch, syzbot was able to trigger : [1] [ 153.567378] WARNING: CPU: 7 PID: 11273 at net/core/sock.c:721 sk_mc_loop+0x51/0x70 [ 153.567378] Modules linked in: nfnetlink ip6table_raw ip6table_filter iptable_raw iptable_nat nf_nat nf_conntrack nf_defrag_ipv4 nf_defrag_ipv6 iptable_filter macsec macvtap tap macvlan 8021q hsr wireguard libblake2s blake2s_x86_64 libblake2s_generic udp_tunnel ip6_udp_tunnel libchacha20poly1305 poly1305_x86_64 chacha_x86_64 libchacha curve25519_x86_64 libcurve25519_generic netdevsim batman_adv dummy team bridge stp llc w1_therm wire i2c_mux_pca954x i2c_mux cdc_acm ehci_pci ehci_hcd mlx4_en mlx4_ib ib_uverbs ib_core mlx4_core [ 153.567386] CPU: 7 PID: 11273 Comm: b159172088 Not tainted 5.8.0-smp-DEV #273 [ 153.567387] RIP: 0010:sk_mc_loop+0x51/0x70 [ 153.567388] Code: 66 83 f8 0a 75 24 0f b6 4f 12 b8 01 00 00 00 31 d2 d3 e0 a9 bf ef ff ff 74 07 48 8b 97 f0 02 00 00 0f b6 42 3a 83 e0 01 5d c3 <0f> 0b b8 01 00 00 00 5d c3 0f b6 87 18 03 00 00 5d c0 e8 04 83 e0 [ 153.567388] RSP: 0018:ffff95c69bb93990 EFLAGS: 00010212 [ 153.567388] RAX: 0000000000000011 RBX: ffff95c6e0ee3e00 RCX: 0000000000000007 [ 153.567389] RDX: ffff95c69ae50000 RSI: ffff95c6c30c3000 RDI: ffff95c6c30c3000 [ 153.567389] RBP: ffff95c69bb93990 R08: ffff95c69a77f000 R09: 0000000000000008 [ 153.567389] R10: 0000000000000040 R11: 00003e0e00026128 R12: ffff95c6c30c3000 [ 153.567390] R13: ffff95c6cc4fd500 R14: ffff95c6f84500c0 R15: ffff95c69aa13c00 [ 153.567390] FS: 00007fdc3a283700(0000) GS:ffff95c6ff9c0000(0000) knlGS:0000000000000000 [ 153.567390] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 153.567391] CR2: 00007ffee758e890 CR3: 0000001f9ba20003 CR4: 00000000001606e0 [ 153.567391] Call Trace: [ 153.567391] ip6_finish_output2+0x34e/0x550 [ 153.567391] __ip6_finish_output+0xe7/0x110 [ 153.567391] ip6_finish_output+0x2d/0xb0 [ 153.567392] ip6_output+0x77/0x120 [ 153.567392] ? __ip6_finish_output+0x110/0x110 [ 153.567392] ip6_local_out+0x3d/0x50 [ 153.567392] ipvlan_queue_xmit+0x56c/0x5e0 [ 153.567393] ? ksize+0x19/0x30 [ 153.567393] ipvlan_start_xmit+0x18/0x50 [ 153.567393] dev_direct_xmit+0xf3/0x1c0 [ 153.567393] packet_direct_xmit+0x69/0xa0 [ 153.567394] packet_sendmsg+0xbf0/0x19b0 [ 153.567394] ? plist_del+0x62/0xb0 [ 153.567394] sock_sendmsg+0x65/0x70 [ 153.567394] sock_write_iter+0x93/0xf0 [ 153.567394] new_sync_write+0x18e/0x1a0 [ 153.567395] __vfs_write+0x29/0x40 [ 153.567395] vfs_write+0xb9/0x1b0 [ 153.567395] ksys_write+0xb1/0xe0 [ 153.567395] __x64_sys_write+0x1a/0x20 [ 153.567395] do_syscall_64+0x43/0x70 [ 153.567396] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 153.567396] RIP: 0033:0x453549 [ 153.567396] Code: Bad RIP value. [ 153.567396] RSP: 002b:00007fdc3a282cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 153.567397] RAX: ffffffffffffffda RBX: 00000000004d32d0 RCX: 0000000000453549 [ 153.567397] RDX: 0000000000000020 RSI: 0000000020000300 RDI: 0000000000000003 [ 153.567398] RBP: 00000000004d32d8 R08: 0000000000000000 R09: 0000000000000000 [ 153.567398] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004d32dc [ 153.567398] R13: 00007ffee742260f R14: 00007fdc3a282dc0 R15: 00007fdc3a283700 [ 153.567399] ---[ end trace c1d5ae2b1059ec62 ]--- f60e5990d9c1 ("ipv6: protect skb->sk accesses from recursive dereference inside the stack") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 2 ++ net/core/sock.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 5b7d9c2b821d..727965565d31 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3832,10 +3832,12 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) local_bh_disable(); + dev_xmit_recursion_inc(); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); + dev_xmit_recursion_dec(); local_bh_enable(); diff --git a/net/core/sock.c b/net/core/sock.c index 0a2aef870d00..078791a6649a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -709,7 +709,7 @@ bool sk_mc_loop(struct sock *sk) return inet6_sk(sk)->mc_loop; #endif } - WARN_ON(1); + WARN_ON_ONCE(1); return true; } EXPORT_SYMBOL(sk_mc_loop); From 27b70214fc6995087c76a24cc3ff2742a23aff5d Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 15 Jun 2020 10:54:56 +0800 Subject: [PATCH 013/174] net: usb: ax88179_178a: fix packet alignment padding [ Upstream commit e869e7a17798d85829fa7d4f9bbe1eebd4b2d3f6 ] Using a AX88179 device (0b95:1790), I see two bytes of appended data on every RX packet. For example, this 48-byte ping, using 0xff as a payload byte: 04:20:22.528472 IP 192.168.1.1 > 192.168.1.2: ICMP echo request, id 2447, seq 1, length 64 0x0000: 000a cd35 ea50 000a cd35 ea4f 0800 4500 0x0010: 0054 c116 4000 4001 f63e c0a8 0101 c0a8 0x0020: 0102 0800 b633 098f 0001 87ea cd5e 0000 0x0030: 0000 dcf2 0600 0000 0000 ffff ffff ffff 0x0040: ffff ffff ffff ffff ffff ffff ffff ffff 0x0050: ffff ffff ffff ffff ffff ffff ffff ffff 0x0060: ffff 961f Those last two bytes - 96 1f - aren't part of the original packet. In the ax88179 RX path, the usbnet rx_fixup function trims a 2-byte 'alignment pseudo header' from the start of the packet, and sets the length from a per-packet field populated by hardware. It looks like that length field *includes* the 2-byte header; the current driver assumes that it's excluded. This change trims the 2-byte alignment header after we've set the packet length, so the resulting packet length is correct. While we're moving the comment around, this also fixes the spelling of 'pseudo'. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index daa54486ab09..df2f7cc6dc03 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1387,10 +1387,10 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) } if (pkt_cnt == 0) { - /* Skip IP alignment psudo header */ - skb_pull(skb, 2); skb->len = pkt_len; - skb_set_tail_pointer(skb, pkt_len); + /* Skip IP alignment pseudo header */ + skb_pull(skb, 2); + skb_set_tail_pointer(skb, skb->len); skb->truesize = pkt_len + sizeof(struct sk_buff); ax88179_rx_checksum(skb, pkt_hdr); return 1; @@ -1399,8 +1399,9 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) ax_skb = skb_clone(skb, GFP_ATOMIC); if (ax_skb) { ax_skb->len = pkt_len; - ax_skb->data = skb->data + 2; - skb_set_tail_pointer(ax_skb, pkt_len); + /* Skip IP alignment pseudo header */ + skb_pull(ax_skb, 2); + skb_set_tail_pointer(ax_skb, ax_skb->len); ax_skb->truesize = pkt_len + sizeof(struct sk_buff); ax88179_rx_checksum(ax_skb, pkt_hdr); usbnet_skb_return(dev, ax_skb); From a908f986ddf7d989fda232c5e7cecbf9f6e5165b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 23 Jun 2020 18:33:15 +0200 Subject: [PATCH 014/174] openvswitch: take into account de-fragmentation/gso_size in execute_check_pkt_len [ Upstream commit 17843655708e1941c0653af3cd61be6948e36f43 ] ovs connection tracking module performs de-fragmentation on incoming fragmented traffic. Take info account if traffic has been de-fragmented in execute_check_pkt_len action otherwise we will perform the wrong nested action considering the original packet size. This issue typically occurs if ovs-vswitchd adds a rule in the pipeline that requires connection tracking (e.g. OVN stateful ACLs) before execute_check_pkt_len action. Moreover take into account GSO fragment size for GSO packet in execute_check_pkt_len routine Fixes: 4d5ec89fc8d14 ("net: openvswitch: Add a new action check_pkt_len") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/actions.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 99352f09deaa..3d96dab10449 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1146,9 +1146,10 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, bool last) { + struct ovs_skb_cb *ovs_cb = OVS_CB(skb); const struct nlattr *actions, *cpl_arg; + int len, max_len, rem = nla_len(attr); const struct check_pkt_len_arg *arg; - int rem = nla_len(attr); bool clone_flow_key; /* The first netlink attribute in 'attr' is always @@ -1157,7 +1158,11 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, cpl_arg = nla_data(attr); arg = nla_data(cpl_arg); - if (skb->len <= arg->pkt_len) { + len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len; + max_len = arg->pkt_len; + + if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) || + len <= max_len) { /* Second netlink attribute in 'attr' is always * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. */ From 6956830cf9817f934fefa5df8bd6b1a18b83d5e7 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Fri, 12 Jun 2020 15:27:55 -0500 Subject: [PATCH 015/174] rocker: fix incorrect error handling in dma_rings_init [ Upstream commit 58d0c864e1a759a15c9df78f50ea5a5c32b3989e ] In rocker_dma_rings_init, the goto blocks in case of errors caused by the functions rocker_dma_cmd_ring_waits_alloc() and rocker_dma_ring_create() are incorrect. The patch fixes the order consistent with cleanup in rocker_dma_rings_fini(). Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/rocker/rocker_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 786b158bd305..5abb3f9684ff 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -647,10 +647,10 @@ static int rocker_dma_rings_init(struct rocker *rocker) err_dma_event_ring_bufs_alloc: rocker_dma_ring_destroy(rocker, &rocker->event_ring); err_dma_event_ring_create: + rocker_dma_cmd_ring_waits_free(rocker); +err_dma_cmd_ring_waits_alloc: rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring, PCI_DMA_BIDIRECTIONAL); -err_dma_cmd_ring_waits_alloc: - rocker_dma_cmd_ring_waits_free(rocker); err_dma_cmd_ring_bufs_alloc: rocker_dma_ring_destroy(rocker, &rocker->cmd_ring); return err; From fea864489c90a07e7e84f8f6f5c531b313ad6593 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 19 Jun 2020 23:38:16 +0100 Subject: [PATCH 016/174] rxrpc: Fix notification call on completion of discarded calls [ Upstream commit 0041cd5a50442db6e456b145892a0eaf2dff061f ] When preallocated service calls are being discarded, they're passed to ->discard_new_call() to have the caller clean up any attached higher-layer preallocated pieces before being marked completed. However, the act of marking them completed now invokes the call's notification function - which causes a problem because that function might assume that the previously freed pieces of memory are still there. Fix this by setting a dummy notification function on the socket after calling ->discard_new_call(). This results in the following kasan message when the kafs module is removed. ================================================================== BUG: KASAN: use-after-free in afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707 Write of size 1 at addr ffff8880946c39e4 by task kworker/u4:1/21 CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.8.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x18f/0x20d lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd3/0x413 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707 rxrpc_notify_socket+0x1db/0x5d0 net/rxrpc/recvmsg.c:40 __rxrpc_set_call_completion.part.0+0x172/0x410 net/rxrpc/recvmsg.c:76 __rxrpc_call_completed net/rxrpc/recvmsg.c:112 [inline] rxrpc_call_completed+0xca/0xf0 net/rxrpc/recvmsg.c:111 rxrpc_discard_prealloc+0x781/0xab0 net/rxrpc/call_accept.c:233 rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245 afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110 afs_net_exit+0x1bc/0x310 fs/afs/main.c:155 ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186 cleanup_net+0x511/0xa50 net/core/net_namespace.c:603 process_one_work+0x965/0x1690 kernel/workqueue.c:2269 worker_thread+0x96/0xe10 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:291 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 Allocated by task 6820: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:494 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:467 kmem_cache_alloc_trace+0x153/0x7d0 mm/slab.c:3551 kmalloc include/linux/slab.h:555 [inline] kzalloc include/linux/slab.h:669 [inline] afs_alloc_call+0x55/0x630 fs/afs/rxrpc.c:141 afs_charge_preallocation+0xe9/0x2d0 fs/afs/rxrpc.c:757 afs_open_socket+0x292/0x360 fs/afs/rxrpc.c:92 afs_net_init+0xa6c/0xe30 fs/afs/main.c:125 ops_init+0xaf/0x420 net/core/net_namespace.c:151 setup_net+0x2de/0x860 net/core/net_namespace.c:341 copy_net_ns+0x293/0x590 net/core/net_namespace.c:482 create_new_namespaces+0x3fb/0xb30 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xbd/0x1f0 kernel/nsproxy.c:231 ksys_unshare+0x43d/0x8e0 kernel/fork.c:2983 __do_sys_unshare kernel/fork.c:3051 [inline] __se_sys_unshare kernel/fork.c:3049 [inline] __x64_sys_unshare+0x2d/0x40 kernel/fork.c:3049 do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 21: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0xf7/0x140 mm/kasan/common.c:455 __cache_free mm/slab.c:3426 [inline] kfree+0x109/0x2b0 mm/slab.c:3757 afs_put_call+0x585/0xa40 fs/afs/rxrpc.c:190 rxrpc_discard_prealloc+0x764/0xab0 net/rxrpc/call_accept.c:230 rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245 afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110 afs_net_exit+0x1bc/0x310 fs/afs/main.c:155 ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186 cleanup_net+0x511/0xa50 net/core/net_namespace.c:603 process_one_work+0x965/0x1690 kernel/workqueue.c:2269 worker_thread+0x96/0xe10 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:291 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 The buggy address belongs to the object at ffff8880946c3800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 484 bytes inside of 1024-byte region [ffff8880946c3800, ffff8880946c3c00) The buggy address belongs to the page: page:ffffea000251b0c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea0002546508 ffffea00024fa248 ffff8880aa000c40 raw: 0000000000000000 ffff8880946c3000 0000000100000002 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880946c3880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880946c3900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8880946c3980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880946c3a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880946c3a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: syzbot+d3eccef36ddbd02713e9@syzkaller.appspotmail.com Fixes: 5ac0d62226a0 ("rxrpc: Fix missing notification") Signed-off-by: David Howells Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/call_accept.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index b7611cc159e5..032ed76c0166 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -22,6 +22,11 @@ #include #include "ar-internal.h" +static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, + unsigned long user_call_ID) +{ +} + /* * Preallocate a single service call, connection and peer and, if possible, * give them a user ID and attach the user's side of the ID to them. @@ -228,6 +233,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); + if (call->notify_rx) + call->notify_rx = rxrpc_dummy_notify; rxrpc_put_call(call, rxrpc_call_put_kernel); } rxrpc_call_completed(call); From dc43f7e807a8bd9cc7deecf1980fcb3349477646 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 24 Jun 2020 17:34:18 -0300 Subject: [PATCH 017/174] sctp: Don't advertise IPv4 addresses if ipv6only is set on the socket [ Upstream commit 471e39df96b9a4c4ba88a2da9e25a126624d7a9c ] If a socket is set ipv6only, it will still send IPv4 addresses in the INIT and INIT_ACK packets. This potentially misleads the peer into using them, which then would cause association termination. The fix is to not add IPv4 addresses to ipv6only sockets. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Corey Minyard Signed-off-by: Marcelo Ricardo Leitner Tested-by: Corey Minyard Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/constants.h | 8 +++++--- net/sctp/associola.c | 5 ++++- net/sctp/bind_addr.c | 1 + net/sctp/protocol.c | 3 ++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 823afc42a3aa..06e1deeef464 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -341,11 +341,13 @@ enum { ipv4_is_anycast_6to4(a)) /* Flags used for the bind address copy functions. */ -#define SCTP_ADDR6_ALLOWED 0x00000001 /* IPv6 address is allowed by +#define SCTP_ADDR4_ALLOWED 0x00000001 /* IPv4 address is allowed by local sock family */ -#define SCTP_ADDR4_PEERSUPP 0x00000002 /* IPv4 address is supported by +#define SCTP_ADDR6_ALLOWED 0x00000002 /* IPv6 address is allowed by + local sock family */ +#define SCTP_ADDR4_PEERSUPP 0x00000004 /* IPv4 address is supported by peer */ -#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by +#define SCTP_ADDR6_PEERSUPP 0x00000008 /* IPv6 address is supported by peer */ /* Reasons to retransmit. */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 41839b85c268..fb6f62264e87 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1569,12 +1569,15 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, enum sctp_scope scope, gfp_t gfp) { + struct sock *sk = asoc->base.sk; int flags; /* Use scoping rules to determine the subset of addresses from * the endpoint. */ - flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; + flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; + if (!inet_v6_ipv6only(sk)) + flags |= SCTP_ADDR4_ALLOWED; if (asoc->peer.ipv4_address) flags |= SCTP_ADDR4_PEERSUPP; if (asoc->peer.ipv6_address) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 53bc61537f44..701c5a4e441d 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -461,6 +461,7 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, * well as the remote peer. */ if ((((AF_INET == addr->sa.sa_family) && + (flags & SCTP_ADDR4_ALLOWED) && (flags & SCTP_ADDR4_PEERSUPP))) || (((AF_INET6 == addr->sa.sa_family) && (flags & SCTP_ADDR6_ALLOWED) && diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 237c88eeb538..981c7cbca46a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -148,7 +148,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, * sock as well as the remote peer. */ if (addr->a.sa.sa_family == AF_INET && - !(copy_flags & SCTP_ADDR4_PEERSUPP)) + (!(copy_flags & SCTP_ADDR4_ALLOWED) || + !(copy_flags & SCTP_ADDR4_PEERSUPP))) continue; if (addr->a.sa.sa_family == AF_INET6 && (!(copy_flags & SCTP_ADDR6_ALLOWED) || From cb22ce3346f2167b532f113df959d466e288aa3c Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Thu, 25 Jun 2020 14:51:06 +0300 Subject: [PATCH 018/174] tcp: don't ignore ECN CWR on pure ACK [ Upstream commit 2570284060b48f3f79d8f1a2698792f36c385e9a ] there is a problem with the CWR flag set in an incoming ACK segment and it leads to the situation when the ECE flag is latched forever the following packetdrill script shows what happens: // Stack receives incoming segments with CE set +0.1 <[ect0] . 11001:12001(1000) ack 1001 win 65535 +0.0 <[ce] . 12001:13001(1000) ack 1001 win 65535 +0.0 <[ect0] P. 13001:14001(1000) ack 1001 win 65535 // Stack repsonds with ECN ECHO +0.0 >[noecn] . 1001:1001(0) ack 12001 +0.0 >[noecn] E. 1001:1001(0) ack 13001 +0.0 >[noecn] E. 1001:1001(0) ack 14001 // Write a packet +0.1 write(3, ..., 1000) = 1000 +0.0 >[ect0] PE. 1001:2001(1000) ack 14001 // Pure ACK received +0.01 <[noecn] W. 14001:14001(0) ack 2001 win 65535 // Since CWR was sent, this packet should NOT have ECE set +0.1 write(3, ..., 1000) = 1000 +0.0 >[ect0] P. 2001:3001(1000) ack 14001 // but Linux will still keep ECE latched here, with packetdrill // flagging a missing ECE flag, expecting // >[ect0] PE. 2001:3001(1000) ack 14001 // in the script In the situation above we will continue to send ECN ECHO packets and trigger the peer to reduce the congestion window. To avoid that we can check CWR on pure ACKs received. v3: - Add a sequence check to avoid sending an ACK to an ACK v2: - Adjusted the comment - move CWR check before checking for unacknowledged packets Signed-off-by: Denis Kirjanov Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 677facbeed26..cc8411c98f28 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -260,7 +260,8 @@ static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb) * cwnd may be very low (even just 1 packet), so we should ACK * immediately. */ - inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } } @@ -3682,6 +3683,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_in_ack_event(sk, ack_ev_flags); } + /* This is a deviation from RFC3168 since it states that: + * "When the TCP data sender is ready to set the CWR bit after reducing + * the congestion window, it SHOULD set the CWR bit only on the first + * new data packet that it transmits." + * We accept CWR on pure ACKs to be more robust + * with widely-deployed TCP implementations that do this. + */ + tcp_ecn_accept_cwr(sk, skb); + /* We passed data and got it acked, remove any soft error * log. Something worked... */ @@ -4771,8 +4781,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); - tcp_ecn_accept_cwr(sk, skb); - tp->rx_opt.dsack = 0; /* Queue data for delivery to the user. From fe3a5d8fc372285a292af315ea1ea75a7cafc8ce Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jun 2020 20:37:07 -0700 Subject: [PATCH 019/174] tcp: grow window for OOO packets only for SACK flows [ Upstream commit 662051215c758ae8545451628816204ed6cd372d ] Back in 2013, we made a change that broke fast retransmit for non SACK flows. Indeed, for these flows, a sender needs to receive three duplicate ACK before starting fast retransmit. Sending ACK with different receive window do not count. Even if enabling SACK is strongly recommended these days, there still are some cases where it has to be disabled. Not increasing the window seems better than having to rely on RTO. After the fix, following packetdrill test gives : // Initialize connection 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 +0 < . 1:1(0) ack 1 win 514 +0 accept(3, ..., ...) = 4 +0 < . 1:1001(1000) ack 1 win 514 // Quick ack +0 > . 1:1(0) ack 1001 win 264 +0 < . 2001:3001(1000) ack 1 win 514 // DUPACK : Normally we should not change the window +0 > . 1:1(0) ack 1001 win 264 +0 < . 3001:4001(1000) ack 1 win 514 // DUPACK : Normally we should not change the window +0 > . 1:1(0) ack 1001 win 264 +0 < . 4001:5001(1000) ack 1 win 514 // DUPACK : Normally we should not change the window +0 > . 1:1(0) ack 1001 win 264 +0 < . 1001:2001(1000) ack 1 win 514 // Hole is repaired. +0 > . 1:1(0) ack 5001 win 272 Fixes: 4e4f1fc22681 ("tcp: properly increase rcv_ssthresh for ofo packets") Signed-off-by: Eric Dumazet Reported-by: Venkat Venkatsubra Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc8411c98f28..3e63dc9c3eba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4597,7 +4597,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { coalesce_done: - tcp_grow_window(sk, skb); + /* For non sack flows, do not grow window to force DUPACK + * and trigger fast retransmit. + */ + if (tcp_is_sack(tp)) + tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); skb = NULL; goto add_sack; @@ -4681,7 +4685,11 @@ add_sack: tcp_sack_new_ofo_skb(sk, seq, end_seq); end: if (skb) { - tcp_grow_window(sk, skb); + /* For non sack flows, do not grow window to force DUPACK + * and trigger fast retransmit. + */ + if (tcp_is_sack(tp)) + tcp_grow_window(sk, skb); skb_condense(skb); skb_set_owner_r(skb, sk); } From 35db638692db1e696855b78897e363ef150306d0 Mon Sep 17 00:00:00 2001 From: David Christensen Date: Wed, 17 Jun 2020 11:51:17 -0700 Subject: [PATCH 020/174] tg3: driver sleeps indefinitely when EEH errors exceed eeh_max_freezes [ Upstream commit 3a2656a211caf35e56afc9425e6e518fa52f7fbc ] The driver function tg3_io_error_detected() calls napi_disable twice, without an intervening napi_enable, when the number of EEH errors exceeds eeh_max_freezes, resulting in an indefinite sleep while holding rtnl_lock. Add check for pcierr_recovery which skips code already executed for the "Frozen" state. Signed-off-by: David Christensen Reviewed-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ca3aa1250dd1..e12ba81288e6 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -18176,8 +18176,8 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, rtnl_lock(); - /* We probably don't have netdev yet */ - if (!netdev || !netif_running(netdev)) + /* Could be second call or maybe we don't have netdev yet */ + if (!netdev || tp->pcierr_recovery || !netif_running(netdev)) goto done; /* We needn't recover from permanent error */ From 568c5aaf6c2d922f44a7431a4155646f5d7253d2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 16 Jun 2020 16:04:00 +0000 Subject: [PATCH 021/174] ip6_gre: fix use-after-free in ip6gre_tunnel_lookup() [ Upstream commit dafabb6590cb15f300b77c095d50312e2c7c8e0f ] In the datapath, the ip6gre_tunnel_lookup() is used and it internally uses fallback tunnel device pointer, which is fb_tunnel_dev. This pointer variable should be set to NULL when a fb interface is deleted. But there is no routine to set fb_tunnel_dev pointer to NULL. So, this pointer will be still used after interface is deleted and it eventually results in the use-after-free problem. Test commands: ip netns add A ip netns add B ip link add eth0 type veth peer name eth1 ip link set eth0 netns A ip link set eth1 netns B ip netns exec A ip link set lo up ip netns exec A ip link set eth0 up ip netns exec A ip link add ip6gre1 type ip6gre local fc:0::1 \ remote fc:0::2 ip netns exec A ip -6 a a fc:100::1/64 dev ip6gre1 ip netns exec A ip link set ip6gre1 up ip netns exec A ip -6 a a fc:0::1/64 dev eth0 ip netns exec A ip link set ip6gre0 up ip netns exec B ip link set lo up ip netns exec B ip link set eth1 up ip netns exec B ip link add ip6gre1 type ip6gre local fc:0::2 \ remote fc:0::1 ip netns exec B ip -6 a a fc:100::2/64 dev ip6gre1 ip netns exec B ip link set ip6gre1 up ip netns exec B ip -6 a a fc:0::2/64 dev eth1 ip netns exec B ip link set ip6gre0 up ip netns exec A ping fc:100::2 -s 60000 & ip netns del B Splat looks like: [ 73.087285][ C1] BUG: KASAN: use-after-free in ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.088361][ C1] Read of size 4 at addr ffff888040559218 by task ping/1429 [ 73.089317][ C1] [ 73.089638][ C1] CPU: 1 PID: 1429 Comm: ping Not tainted 5.7.0+ #602 [ 73.090531][ C1] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 73.091725][ C1] Call Trace: [ 73.092160][ C1] [ 73.092556][ C1] dump_stack+0x96/0xdb [ 73.093122][ C1] print_address_description.constprop.6+0x2cc/0x450 [ 73.094016][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.094894][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.095767][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.096619][ C1] kasan_report+0x154/0x190 [ 73.097209][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.097989][ C1] ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] [ 73.098750][ C1] ? gre_del_protocol+0x60/0x60 [gre] [ 73.099500][ C1] gre_rcv+0x1c5/0x1450 [ip6_gre] [ 73.100199][ C1] ? ip6gre_header+0xf00/0xf00 [ip6_gre] [ 73.100985][ C1] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 73.101830][ C1] ? ip6_input_finish+0x5/0xf0 [ 73.102483][ C1] ip6_protocol_deliver_rcu+0xcbb/0x1510 [ 73.103296][ C1] ip6_input_finish+0x5b/0xf0 [ 73.103920][ C1] ip6_input+0xcd/0x2c0 [ 73.104473][ C1] ? ip6_input_finish+0xf0/0xf0 [ 73.105115][ C1] ? rcu_read_lock_held+0x90/0xa0 [ 73.105783][ C1] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 73.106548][ C1] ipv6_rcv+0x1f1/0x300 [ ... ] Suggested-by: Eric Dumazet Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9ec05a1df5e1..04d76f043e18 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -127,6 +127,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, gre_proto == htons(ETH_P_ERSPAN2)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; + struct net_device *ndev; for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || @@ -238,9 +239,9 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, if (t && t->dev->flags & IFF_UP) return t; - dev = ign->fb_tunnel_dev; - if (dev && dev->flags & IFF_UP) - return netdev_priv(dev); + ndev = READ_ONCE(ign->fb_tunnel_dev); + if (ndev && ndev->flags & IFF_UP) + return netdev_priv(ndev); return NULL; } @@ -413,6 +414,8 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); + if (ign->fb_tunnel_dev == dev) + WRITE_ONCE(ign->fb_tunnel_dev, NULL); dst_cache_reset(&t->dst_cache); dev_put(dev); } From 9baf076d797f8b593b0f912677407de48a82dad4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 19 Jun 2020 11:47:47 -0700 Subject: [PATCH 022/174] net: phy: Check harder for errors in get_phy_id() [ Upstream commit b2ffc75e2e990b09903f9d15ccd53bc5f3a4217c ] Commit 02a6efcab675 ("net: phy: allow scanning busses with missing phys") added a special condition to return -ENODEV in case -ENODEV or -EIO was returned from the first read of the MII_PHYSID1 register. In case the MDIO bus data line pull-up is not strong enough, the MDIO bus controller will not flag this as a read error. This can happen when a pluggable daughter card is not connected and weak internal pull-ups are used (since that is the only option, otherwise the pins are floating). The second read of MII_PHYSID2 will be correctly flagged an error though, but now we will return -EIO which will be treated as a hard error, thus preventing MDIO bus scanning loops to continue succesfully. Apply the same logic to both register reads, thus allowing the scanning logic to proceed. Fixes: 02a6efcab675 ("net: phy: allow scanning busses with missing phys") Reviewed-by: Andrew Lunn Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0907c3d8d94a..dba52a5c378a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -797,8 +797,10 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id, /* Grab the bits from PHYIR2, and put them in the lower half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID2); - if (phy_reg < 0) - return -EIO; + if (phy_reg < 0) { + /* returning -ENODEV doesn't stop bus scanning */ + return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; + } *phy_id |= phy_reg; From 3c6208267218d4e04692c9af2501570eaec28e2a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 16 Jun 2020 16:51:51 +0000 Subject: [PATCH 023/174] ip_tunnel: fix use-after-free in ip_tunnel_lookup() [ Upstream commit ba61539c6ae57f4146284a5cb4f7b7ed8d42bf45 ] In the datapath, the ip_tunnel_lookup() is used and it internally uses fallback tunnel device pointer, which is fb_tunnel_dev. This pointer variable should be set to NULL when a fb interface is deleted. But there is no routine to set fb_tunnel_dev pointer to NULL. So, this pointer will be still used after interface is deleted and it eventually results in the use-after-free problem. Test commands: ip netns add A ip netns add B ip link add eth0 type veth peer name eth1 ip link set eth0 netns A ip link set eth1 netns B ip netns exec A ip link set lo up ip netns exec A ip link set eth0 up ip netns exec A ip link add gre1 type gre local 10.0.0.1 \ remote 10.0.0.2 ip netns exec A ip link set gre1 up ip netns exec A ip a a 10.0.100.1/24 dev gre1 ip netns exec A ip a a 10.0.0.1/24 dev eth0 ip netns exec B ip link set lo up ip netns exec B ip link set eth1 up ip netns exec B ip link add gre1 type gre local 10.0.0.2 \ remote 10.0.0.1 ip netns exec B ip link set gre1 up ip netns exec B ip a a 10.0.100.2/24 dev gre1 ip netns exec B ip a a 10.0.0.2/24 dev eth1 ip netns exec A hping3 10.0.100.2 -2 --flood -d 60000 & ip netns del B Splat looks like: [ 77.793450][ C3] ================================================================== [ 77.794702][ C3] BUG: KASAN: use-after-free in ip_tunnel_lookup+0xcc4/0xf30 [ 77.795573][ C3] Read of size 4 at addr ffff888060bd9c84 by task hping3/2905 [ 77.796398][ C3] [ 77.796664][ C3] CPU: 3 PID: 2905 Comm: hping3 Not tainted 5.8.0-rc1+ #616 [ 77.797474][ C3] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 77.798453][ C3] Call Trace: [ 77.798815][ C3] [ 77.799142][ C3] dump_stack+0x9d/0xdb [ 77.799605][ C3] print_address_description.constprop.7+0x2cc/0x450 [ 77.800365][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 [ 77.800908][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 [ 77.801517][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 [ 77.802145][ C3] kasan_report+0x154/0x190 [ 77.802821][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 [ 77.803503][ C3] ip_tunnel_lookup+0xcc4/0xf30 [ 77.804165][ C3] __ipgre_rcv+0x1ab/0xaa0 [ip_gre] [ 77.804862][ C3] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 77.805621][ C3] gre_rcv+0x304/0x1910 [ip_gre] [ 77.806293][ C3] ? lock_acquire+0x1a9/0x870 [ 77.806925][ C3] ? gre_rcv+0xfe/0x354 [gre] [ 77.807559][ C3] ? erspan_xmit+0x2e60/0x2e60 [ip_gre] [ 77.808305][ C3] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 77.809032][ C3] ? rcu_read_lock_held+0x90/0xa0 [ 77.809713][ C3] gre_rcv+0x1b8/0x354 [gre] [ ... ] Suggested-by: Eric Dumazet Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cd4b84310d92..a0b4dc54f8a6 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -85,9 +85,10 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, __be32 remote, __be32 local, __be32 key) { - unsigned int hash; struct ip_tunnel *t, *cand = NULL; struct hlist_head *head; + struct net_device *ndev; + unsigned int hash; hash = ip_tunnel_hash(key, remote); head = &itn->tunnels[hash]; @@ -162,8 +163,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, if (t && t->dev->flags & IFF_UP) return t; - if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) - return netdev_priv(itn->fb_tunnel_dev); + ndev = READ_ONCE(itn->fb_tunnel_dev); + if (ndev && ndev->flags & IFF_UP) + return netdev_priv(ndev); return NULL; } @@ -1245,9 +1247,9 @@ void ip_tunnel_uninit(struct net_device *dev) struct ip_tunnel_net *itn; itn = net_generic(net, tunnel->ip_tnl_net_id); - /* fb_tunnel_dev will be unregisted in net-exit call. */ - if (itn->fb_tunnel_dev != dev) - ip_tunnel_del(itn, netdev_priv(dev)); + ip_tunnel_del(itn, netdev_priv(dev)); + if (itn->fb_tunnel_dev == dev) + WRITE_ONCE(itn->fb_tunnel_dev, NULL); dst_cache_reset(&tunnel->dst_cache); } From ea2628dd586d23e1412baced76b519a2f1c66d1c Mon Sep 17 00:00:00 2001 From: Ilya Ponetayev Date: Thu, 25 Jun 2020 22:12:07 +0200 Subject: [PATCH 024/174] sch_cake: don't try to reallocate or unshare skb unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9208d2863ac689a563b92f2161d8d1e7127d0add ] cake_handle_diffserv() tries to linearize mac and network header parts of skb and to make it writable unconditionally. In some cases it leads to full skb reallocation, which reduces throughput and increases CPU load. Some measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable() reallocates skb, if skb was allocated in ethernet driver via so-called 'build skb' method from page cache (it was discovered by strange increase of kmalloc-2048 slab at first). Obtain DSCP value via read-only skb_header_pointer() call, and leave linearization only for DSCP bleaching or ECN CE setting. And, as an additional optimisation, skip diffserv parsing entirely if it is not needed by the current configuration. Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits") Signed-off-by: Ilya Ponetayev [ fix a few style issues, reflow commit message ] Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_cake.c | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 2277369feae5..8020d0829f1a 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1517,30 +1517,49 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { - int wlen = skb_network_offset(skb); + const int offset = skb_network_offset(skb); + u16 *buf, buf_; u8 dscp; switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - wlen += sizeof(struct iphdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) + buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); + if (unlikely(!buf)) return 0; - dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; - if (wash && dscp) + /* ToS is in the second byte of iphdr */ + dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2; + + if (wash && dscp) { + const int wlen = offset + sizeof(struct iphdr); + + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); + } + return dscp; case htons(ETH_P_IPV6): - wlen += sizeof(struct ipv6hdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) + buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); + if (unlikely(!buf)) return 0; - dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; - if (wash && dscp) + /* Traffic class is in the first and second bytes of ipv6hdr */ + dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2; + + if (wash && dscp) { + const int wlen = offset + sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); + } + return dscp; case htons(ETH_P_ARP): From b1aa7e5fa163f491546a536e29643b1d04f8287c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 25 Jun 2020 22:12:08 +0200 Subject: [PATCH 025/174] sch_cake: don't call diffserv parsing code when it is not needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8c95eca0bb8c4bd2231a0d581f1ad0d50c90488c ] As a further optimisation of the diffserv parsing codepath, we can skip it entirely if CAKE is configured to neither use diffserv-based classification, nor to zero out the diffserv bits. Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_cake.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 8020d0829f1a..e1b93ff79ab5 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1515,7 +1515,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) return idx + (tin << 16); } -static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) +static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash) { const int offset = skb_network_offset(skb); u16 *buf, buf_; @@ -1576,14 +1576,17 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, { struct cake_sched_data *q = qdisc_priv(sch); u32 tin, mark; + bool wash; u8 dscp; /* Tin selection: Default to diffserv-based selection, allow overriding - * using firewall marks or skb->priority. + * using firewall marks or skb->priority. Call DSCP parsing early if + * wash is enabled, otherwise defer to below to skip unneeded parsing. */ - dscp = cake_handle_diffserv(skb, - q->rate_flags & CAKE_FLAG_WASH); mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; + wash = !!(q->rate_flags & CAKE_FLAG_WASH); + if (wash) + dscp = cake_handle_diffserv(skb, wash); if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) tin = 0; @@ -1597,6 +1600,8 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; else { + if (!wash) + dscp = cake_handle_diffserv(skb, wash); tin = q->tin_index[dscp]; if (unlikely(tin >= q->tin_cnt)) From 942315134313e6bf33e03039e1e4eb64fec91647 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 25 Jun 2020 22:12:09 +0200 Subject: [PATCH 026/174] sch_cake: fix a few style nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3f608f0c41360b11b04c763f348b712f651c8bac ] I spotted a few nits when comparing the in-tree version of sch_cake with the out-of-tree one: A redundant error variable declaration shadowing an outer declaration, and an indentation alignment issue. Fix both of these. Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_cake.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index e1b93ff79ab5..5d605bab9afc 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2703,7 +2703,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, qdisc_watchdog_init(&q->watchdog, sch); if (opt) { - int err = cake_change(sch, opt, extack); + err = cake_change(sch, opt, extack); if (err) return err; @@ -3020,7 +3020,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, PUT_STAT_S32(BLUE_TIMER_US, ktime_to_us( ktime_sub(now, - flow->cvars.blue_timer))); + flow->cvars.blue_timer))); } if (flow->cvars.dropping) { PUT_STAT_S32(DROP_NEXT_US, From 052a7fdd86fba7820618717571df6bb23018a5c1 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 24 Jun 2020 12:42:02 -0400 Subject: [PATCH 027/174] tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT [ Upstream commit b344579ca8478598937215f7005d6c7b84d28aee ] Mirja Kuehlewind reported a bug in Linux TCP CUBIC Hystart, where Hystart HYSTART_DELAY mechanism can exit Slow Start spuriously on an ACK when the minimum rtt of a connection goes down. From inspection it is clear from the existing code that this could happen in an example like the following: o The first 8 RTT samples in a round trip are 150ms, resulting in a curr_rtt of 150ms and a delay_min of 150ms. o The 9th RTT sample is 100ms. The curr_rtt does not change after the first 8 samples, so curr_rtt remains 150ms. But delay_min can be lowered at any time, so delay_min falls to 100ms. The code executes the HYSTART_DELAY comparison between curr_rtt of 150ms and delay_min of 100ms, and the curr_rtt is declared far enough above delay_min to force a (spurious) exit of Slow start. The fix here is simple: allow every RTT sample in a round trip to lower the curr_rtt. Fixes: ae27e98a5152 ("[TCP] CUBIC v2.3") Reported-by: Mirja Kuehlewind Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_cubic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 1b3d032a4df2..ee6c38a73325 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -404,6 +404,8 @@ static void hystart_update(struct sock *sk, u32 delay) if (hystart_detect & HYSTART_DELAY) { /* obtain the minimum delay of more than sampling packets */ + if (ca->curr_rtt > delay) + ca->curr_rtt = delay; if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { if (ca->curr_rtt == 0 || ca->curr_rtt > delay) ca->curr_rtt = delay; From 8ae850cddf27b7b354b6abe207c5b7c05d615a53 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 30 Apr 2020 13:31:58 +0200 Subject: [PATCH 028/174] Revert "i2c: tegra: Fix suspending in active runtime PM state" [ Upstream commit 78ad73421831247e46c31899a7bead02740e4bef ] This reverts commit 9f42de8d4ec2304f10bbc51dc0484f3503d61196. It's not safe to use pm_runtime_force_{suspend,resume}(), especially during the noirq phase of suspend. See also the guidance provided in commit 1e2ef05bb8cf ("PM: Limit race conditions between runtime PM and system sleep (v2)"). Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-tegra.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index dbc43cfec19d..331f7cca9bab 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1719,14 +1719,9 @@ static int tegra_i2c_remove(struct platform_device *pdev) static int __maybe_unused tegra_i2c_suspend(struct device *dev) { struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); - int err; i2c_mark_adapter_suspended(&i2c_dev->adapter); - err = pm_runtime_force_suspend(dev); - if (err < 0) - return err; - return 0; } @@ -1747,10 +1742,6 @@ static int __maybe_unused tegra_i2c_resume(struct device *dev) if (err) return err; - err = pm_runtime_force_resume(dev); - if (err < 0) - return err; - i2c_mark_adapter_resumed(&i2c_dev->adapter); return 0; From 9d3d40ec7dee70838cf5c48773dcbcddced2485a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 25 Jun 2020 17:03:10 -0400 Subject: [PATCH 029/174] btrfs: fix a block group ref counter leak after failure to remove block group [ Upstream commit 9fecd13202f520f3f25d5b1c313adb740fe19773 ] When removing a block group, if we fail to delete the block group's item from the extent tree, we jump to the 'out' label and end up decrementing the block group's reference count once only (by 1), resulting in a counter leak because the block group at that point was already removed from the block group cache rbtree - so we have to decrement the reference count twice, once for the rbtree and once for our lookup at the start of the function. There is a second bug where if removing the free space tree entries (the call to remove_block_group_free_space()) fails we end up jumping to the 'out_put_group' label but end up decrementing the reference count only once, when we should have done it twice, since we have already removed the block group from the block group cache rbtree. This happens because the reference count decrement for the rbtree reference happens after attempting to remove the free space tree entries, which is far away from the place where we remove the block group from the rbtree. To make things less error prone, decrement the reference count for the rbtree immediately after removing the block group from it. This also eleminates the need for two different exit labels on error, renaming 'out_put_label' to just 'out' and removing the old 'out'. Fixes: f6033c5e333238 ("btrfs: fix block group leak when removing fails") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/block-group.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c2dd94e1b274..42d69e77f89d 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -910,7 +910,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto out_put_group; + goto out; } /* @@ -948,7 +948,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(inode); - goto out_put_group; + goto out; } clear_nlink(inode); /* One for the block groups ref */ @@ -971,13 +971,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); if (ret < 0) - goto out_put_group; + goto out; if (ret > 0) btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) - goto out_put_group; + goto out; btrfs_release_path(path); } @@ -986,6 +986,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, &fs_info->block_group_cache_tree); RB_CLEAR_NODE(&block_group->cache_node); + /* Once for the block groups rbtree */ + btrfs_put_block_group(block_group); + if (fs_info->first_logical_byte == block_group->key.objectid) fs_info->first_logical_byte = (u64)-1; spin_unlock(&fs_info->block_group_cache_lock); @@ -1094,10 +1097,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = remove_block_group_free_space(trans, block_group); if (ret) - goto out_put_group; - - /* Once for the block groups rbtree */ - btrfs_put_block_group(block_group); + goto out; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) @@ -1120,10 +1120,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, free_extent_map(em); } -out_put_group: +out: /* Once for the lookup reference */ btrfs_put_block_group(block_group); -out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); btrfs_free_path(path); From 32e5a15f10847905eb7932f2a837a9e664dba665 Mon Sep 17 00:00:00 2001 From: Valentin Longchamp Date: Tue, 9 Jun 2020 22:11:54 +0200 Subject: [PATCH 030/174] net: sched: export __netdev_watchdog_up() [ Upstream commit 1a3db27ad9a72d033235b9673653962c02e3486e ] Since the quiesce/activate rework, __netdev_watchdog_up() is directly called in the ucc_geth driver. Unfortunately, this function is not available for modules and thus ucc_geth cannot be built as a module anymore. Fix it by exporting __netdev_watchdog_up(). Since the commit introducing the regression was backported to stable branches, this one should ideally be as well. Fixes: 79dde73cf9bc ("net/ethernet/freescale: rework quiesce/activate for ucc_geth") Signed-off-by: Valentin Longchamp Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7c3c5fdb82a9..896c9037155a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -469,6 +469,7 @@ void __netdev_watchdog_up(struct net_device *dev) dev_hold(dev); } } +EXPORT_SYMBOL_GPL(__netdev_watchdog_up); static void dev_watchdog_up(struct net_device *dev) { From 335add4ac891c3c5094e5b30cce004322f284ca4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 6 Jun 2020 23:44:24 -0400 Subject: [PATCH 031/174] fix a braino in "sparc32: fix register window handling in genregs32_[gs]et()" [ Upstream commit 9d964e1b82d8182184153b70174f445ea616f053 ] lost npc in PTRACE_SETREGSET, breaking PTRACE_SETREGS as well Fixes: cf51e129b968 "sparc32: fix register window handling in genregs32_[gs]et()" Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- arch/sparc/kernel/ptrace_32.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index 60f7205ebe40..646dd58169ec 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -168,12 +168,17 @@ static int genregs32_set(struct task_struct *target, if (ret || !count) return ret; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s->y, + ®s->npc, 34 * sizeof(u32), 35 * sizeof(u32)); if (ret || !count) return ret; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->y, + 35 * sizeof(u32), 36 * sizeof(u32)); + if (ret || !count) + return ret; return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - 35 * sizeof(u32), 38 * sizeof(u32)); + 36 * sizeof(u32), 38 * sizeof(u32)); } static int fpregs32_get(struct task_struct *target, From 3621616af99bac4cf55728265ce886f36e6a3fed Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Jun 2020 14:09:21 +0200 Subject: [PATCH 032/174] ALSA: usb-audio: Fix potential use-after-free of streams [ Upstream commit ff58bbc7b9704a5869204176f804eff57307fef0 ] With the recent full-duplex support of implicit feedback streams, an endpoint can be still running after closing the capture stream as long as the playback stream with the sync-endpoint is running. In such a state, the URBs are still be handled and they may call retire_data_urb callback, which tries to transfer the data from the PCM buffer. Since the PCM stream gets closed, this may lead to use-after-free. This patch adds the proper clearance of the callback at stopping the capture stream for addressing the possible UAF above. Fixes: 10ce77e4817f ("ALSA: usb-audio: Add duplex sound support for USB devices using implicit feedback") Link: https://lore.kernel.org/r/20200616120921.12249-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 6c391e5fad2a..c36e6c97d09a 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1778,6 +1778,7 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream return 0; case SNDRV_PCM_TRIGGER_STOP: stop_endpoints(subs, false); + subs->data_endpoint->retire_data_urb = NULL; subs->running = 0; return 0; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: From 2274a7421e732b9be095029537062cc00a54c19a Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 22 Jun 2020 13:07:15 -0700 Subject: [PATCH 033/174] binder: fix null deref of proc->context commit d35d3660e065b69fdb8bf512f3d899f350afce52 upstream. The binder driver makes the assumption proc->context pointer is invariant after initialization (as documented in the kerneldoc header for struct proc). However, in commit f0fe2c0f050d ("binder: prevent UAF for binderfs devices II") proc->context is set to NULL during binder_deferred_release(). Another proc was in the middle of setting up a transaction to the dying process and crashed on a NULL pointer deref on "context" which is a local set to &proc->context: new_ref->data.desc = (node == context->binder_context_mgr_node) ? 0 : 1; Here's the stack: [ 5237.855435] Call trace: [ 5237.855441] binder_get_ref_for_node_olocked+0x100/0x2ec [ 5237.855446] binder_inc_ref_for_node+0x140/0x280 [ 5237.855451] binder_translate_binder+0x1d0/0x388 [ 5237.855456] binder_transaction+0x2228/0x3730 [ 5237.855461] binder_thread_write+0x640/0x25bc [ 5237.855466] binder_ioctl_write_read+0xb0/0x464 [ 5237.855471] binder_ioctl+0x30c/0x96c [ 5237.855477] do_vfs_ioctl+0x3e0/0x700 [ 5237.855482] __arm64_sys_ioctl+0x78/0xa4 [ 5237.855488] el0_svc_common+0xb4/0x194 [ 5237.855493] el0_svc_handler+0x74/0x98 [ 5237.855497] el0_svc+0x8/0xc The fix is to move the kfree of the binder_device to binder_free_proc() so the binder_device is freed when we know there are no references remaining on the binder_proc. Fixes: f0fe2c0f050d ("binder: prevent UAF for binderfs devices II") Acked-by: Christian Brauner Signed-off-by: Todd Kjos Cc: stable Link: https://lore.kernel.org/r/20200622200715.114382-1-tkjos@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 34a6de65aa7e..5e6586af21b7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4688,8 +4688,15 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc) static void binder_free_proc(struct binder_proc *proc) { + struct binder_device *device; + BUG_ON(!list_empty(&proc->todo)); BUG_ON(!list_empty(&proc->delivered_death)); + device = container_of(proc->context, struct binder_device, context); + if (refcount_dec_and_test(&device->ref)) { + kfree(proc->context->name); + kfree(device); + } binder_alloc_deferred_release(&proc->alloc); put_task_struct(proc->tsk); binder_stats_deleted(BINDER_STAT_PROC); @@ -5408,7 +5415,6 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; - struct binder_device *device; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, active_transactions; @@ -5425,12 +5431,6 @@ static void binder_deferred_release(struct binder_proc *proc) context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); - device = container_of(proc->context, struct binder_device, context); - if (refcount_dec_and_test(&device->ref)) { - kfree(context->name); - kfree(device); - } - proc->context = NULL; binder_inner_proc_lock(proc); /* * Make sure proc stays alive after we From 195c1d1dd8cf957a2bbec5cfbb331539760032ea Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 10 Jun 2020 10:48:44 +0800 Subject: [PATCH 034/174] USB: ohci-sm501: Add missed iounmap() in remove commit 07c112fb09c86c0231f6ff0061a000ffe91c8eb9 upstream. This driver misses calling iounmap() in remove to undo the ioremap() called in probe. Add the missed call to fix it. Fixes: f54aab6ebcec ("usb: ohci-sm501 driver") Cc: stable Signed-off-by: Chuhong Yuan Acked-by: Alan Stern Link: https://lore.kernel.org/r/20200610024844.3628408-1-hslester96@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-sm501.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/ohci-sm501.c b/drivers/usb/host/ohci-sm501.c index cff965240327..b91d50da6127 100644 --- a/drivers/usb/host/ohci-sm501.c +++ b/drivers/usb/host/ohci-sm501.c @@ -191,6 +191,7 @@ static int ohci_hcd_sm501_drv_remove(struct platform_device *pdev) struct resource *mem; usb_remove_hcd(hcd); + iounmap(hcd->regs); release_mem_region(hcd->rsrc_start, hcd->rsrc_len); usb_put_hcd(hcd); mem = platform_get_resource(pdev, IORESOURCE_MEM, 1); From 6b3eb8af48cb6af7ebaca7cfb95d0698c778cc41 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 9 Jun 2020 12:28:11 +0400 Subject: [PATCH 035/174] usb: dwc2: Postponed gadget registration to the udc class driver commit 207324a321a866401b098cadf19e4a2dd6584622 upstream. During dwc2 driver probe, after gadget registration to the udc class driver, if exist any builtin function driver it immediately bound to dwc2 and after init host side (dwc2_hcd_init()) stucked in host mode. Patch postpone gadget registration after host side initialization done. Fixes: 117777b2c3bb9 ("usb: dwc2: Move gadget probe function into platform code") Reported-by: kbuild test robot Tested-by: Marek Vasut Cc: stable Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/f21cb38fecc72a230b86155d94c7e60c9cb66f58.1591690938.git.hminas@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 6 ------ drivers/usb/dwc2/platform.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 7fd0900a9cb0..f7528f732b2a 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -4886,12 +4886,6 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg) epnum, 0); } - ret = usb_add_gadget_udc(dev, &hsotg->gadget); - if (ret) { - dwc2_hsotg_ep_free_request(&hsotg->eps_out[0]->ep, - hsotg->ctrl_req); - return ret; - } dwc2_hsotg_dump(hsotg); return 0; diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 3c6ce09a6db5..15e55808cf4e 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -507,6 +507,17 @@ static int dwc2_driver_probe(struct platform_device *dev) if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) dwc2_lowlevel_hw_disable(hsotg); +#if IS_ENABLED(CONFIG_USB_DWC2_PERIPHERAL) || \ + IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE) + /* Postponed adding a new gadget to the udc class driver list */ + if (hsotg->gadget_enabled) { + retval = usb_add_gadget_udc(hsotg->dev, &hsotg->gadget); + if (retval) { + dwc2_hsotg_remove(hsotg); + goto error; + } + } +#endif /* CONFIG_USB_DWC2_PERIPHERAL || CONFIG_USB_DWC2_DUAL_ROLE */ return 0; error: From ffeb58a0daf07ef0d5508a7ba6845bc9aa3713fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Meresi=C5=84ski?= Date: Wed, 3 Jun 2020 22:33:46 +0200 Subject: [PATCH 036/174] usb: add USB_QUIRK_DELAY_INIT for Logitech C922 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5d8021923e8a8cc37a421a64e27c7221f0fee33c upstream. The Logitech C922, just like other Logitech webcams, needs the USB_QUIRK_DELAY_INIT or it will randomly not respond after device connection Signed-off-by: Tomasz MeresiÅ„ski Cc: stable Link: https://lore.kernel.org/r/20200603203347.7792-1-tomasz@meresinski.eu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 3e8efe759c3e..e0b77674869c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -218,11 +218,12 @@ static const struct usb_device_id usb_quirk_list[] = { /* Logitech HD Webcam C270 */ { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME }, - /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ + /* Logitech HD Pro Webcams C920, C920-C, C922, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x085c), .driver_info = USB_QUIRK_DELAY_INIT }, /* Logitech ConferenceCam CC3000e */ { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, From 9200037a6ab6cdd76b0d04240d5f61d4d076a8b4 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Mon, 8 Jun 2020 11:46:59 +0800 Subject: [PATCH 037/174] USB: ehci: reopen solution for Synopsys HC bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1ddcb71a3edf0e1682b6e056158e4c4b00325f66 upstream. A Synopsys USB2.0 core used in Huawei Kunpeng920 SoC has a bug which might cause the host controller not issuing ping. Bug description: After indicating an Interrupt on Async Advance, the software uses the doorbell mechanism to delete the Next Link queue head of the last executed queue head. At this time, the host controller still references the removed queue head(the queue head is NULL). NULL reference causes the host controller to lose the USB device. Solution: After deleting the Next Link queue head, when has_synopsys_hc_bug set to 1,the software can write one of the valid queue head addresses to the ASYNCLISTADDR register to allow the host controller to get the valid queue head. in order to solve that problem, this patch set the flag for Huawei Kunpeng920 There are detailed instructions and solutions in this patch: commit 2f7ac6c19997 ("USB: ehci: add workaround for Synopsys HC bug") Signed-off-by: Longfang Liu Cc: stable Acked-by: Alan Stern Link: https://lore.kernel.org/r/1591588019-44284-1-git-send-email-liulongfang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index b0882c13a1d1..66713c253765 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -216,6 +216,13 @@ static int ehci_pci_setup(struct usb_hcd *hcd) ehci_info(ehci, "applying MosChip frame-index workaround\n"); ehci->frame_index_bug = 1; break; + case PCI_VENDOR_ID_HUAWEI: + /* Synopsys HC bug */ + if (pdev->device == 0xa239) { + ehci_info(ehci, "applying Synopsys HC workaround\n"); + ehci->has_synopsys_hc_bug = 1; + } + break; } /* optional debug port, normally in the first BAR */ From 9d814bd14cffb51e418983faa1d899da2a3a55fc Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Wed, 24 Jun 2020 16:59:47 +0300 Subject: [PATCH 038/174] usb: host: xhci-mtk: avoid runtime suspend when removing hcd commit a24d5072e87457a14023ee1dd3fc8b1e76f899ef upstream. When runtime suspend was enabled, runtime suspend might happen when xhci is removing hcd. This might cause kernel panic when hcd has been freed but runtime pm suspend related handle need to reference it. Signed-off-by: Macpaul Lin Reviewed-by: Chunfeng Yun Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200624135949.22611-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index b18a6baef204..85f1ff0399a9 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -592,6 +592,9 @@ static int xhci_mtk_remove(struct platform_device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct usb_hcd *shared_hcd = xhci->shared_hcd; + pm_runtime_put_noidle(&dev->dev); + pm_runtime_disable(&dev->dev); + usb_remove_hcd(shared_hcd); xhci->shared_hcd = NULL; device_init_wakeup(&dev->dev, false); @@ -602,8 +605,6 @@ static int xhci_mtk_remove(struct platform_device *dev) xhci_mtk_sch_exit(mtk); xhci_mtk_clks_disable(mtk); xhci_mtk_ldos_disable(mtk); - pm_runtime_put_sync(&dev->dev); - pm_runtime_disable(&dev->dev); return 0; } From c1e71a51c2f522ebabcc289ea3366a47e48c4a3e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 24 Jun 2020 16:59:49 +0300 Subject: [PATCH 039/174] xhci: Poll for U0 after disabling USB2 LPM commit b3d71abd135e6919ca0b6cab463738472653ddfb upstream. USB2 devices with LPM enabled may interrupt the system suspend: [ 932.510475] usb 1-7: usb suspend, wakeup 0 [ 932.510549] hub 1-0:1.0: hub_suspend [ 932.510581] usb usb1: bus suspend, wakeup 0 [ 932.510590] xhci_hcd 0000:00:14.0: port 9 not suspended [ 932.510593] xhci_hcd 0000:00:14.0: port 8 not suspended .. [ 932.520323] xhci_hcd 0000:00:14.0: Port change event, 1-7, id 7, portsc: 0x400e03 .. [ 932.591405] PM: pci_pm_suspend(): hcd_pci_suspend+0x0/0x30 returns -16 [ 932.591414] PM: dpm_run_callback(): pci_pm_suspend+0x0/0x160 returns -16 [ 932.591418] PM: Device 0000:00:14.0 failed to suspend async: error -16 During system suspend, USB core will let HC suspends the device if it doesn't have remote wakeup enabled and doesn't have any children. However, from the log above we can see that the usb 1-7 doesn't get bus suspended due to not in U0. After a while the port finished U2 -> U0 transition, interrupts the suspend process. The observation is that after disabling LPM, port doesn't transit to U0 immediately and can linger in U2. xHCI spec 4.23.5.2 states that the maximum exit latency for USB2 LPM should be BESL + 10us. The BESL for the affected device is advertised as 400us, which is still not enough based on my testing result. So let's use the maximum permitted latency, 10000, to poll for U0 status to solve the issue. Cc: stable@vger.kernel.org Signed-off-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200624135949.22611-6-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 81b54a3d2910..f913bc9344d2 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4471,6 +4471,9 @@ static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd, mutex_lock(hcd->bandwidth_mutex); xhci_change_max_exit_latency(xhci, udev, 0); mutex_unlock(hcd->bandwidth_mutex); + readl_poll_timeout(ports[port_num]->addr, pm_val, + (pm_val & PORT_PLS_MASK) == XDEV_U0, + 100, 10000); return 0; } } From fe2daefad98a274eb6962e73d5b4dff7ade994dc Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Tue, 2 Jun 2020 19:47:08 +0800 Subject: [PATCH 040/174] usb: host: ehci-exynos: Fix error check in exynos_ehci_probe() commit 44ed240d62736ad29943ec01e41e194b96f7c5e9 upstream. If the function platform_get_irq() failed, the negative value returned will not be detected here. So fix error handling in exynos_ehci_probe(). And when get irq failed, the function platform_get_irq() logs an error message, so remove redundant message here. Fixes: 1bcc5aa87f04 ("USB: Add initial S5P EHCI driver") Cc: stable Signed-off-by: Zhang Shengju Signed-off-by: Tang Bin Link: https://lore.kernel.org/r/20200602114708.28620-1-tangbin@cmss.chinamobile.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-exynos.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/ehci-exynos.c b/drivers/usb/host/ehci-exynos.c index 01debfd03d4a..84d59a611511 100644 --- a/drivers/usb/host/ehci-exynos.c +++ b/drivers/usb/host/ehci-exynos.c @@ -203,9 +203,8 @@ static int exynos_ehci_probe(struct platform_device *pdev) hcd->rsrc_len = resource_size(res); irq = platform_get_irq(pdev, 0); - if (!irq) { - dev_err(&pdev->dev, "Failed to get IRQ\n"); - err = -ENODEV; + if (irq < 0) { + err = irq; goto fail_io; } From 895ec8c86e13f85b119c71d5f95491b48867955e Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 4 Jun 2020 19:21:18 +0800 Subject: [PATCH 041/174] usb: typec: tcpci_rt1711h: avoid screaming irq causing boot hangs commit 302c570bf36e997d55ad0d60628a2feec76954a4 upstream. John reported screaming irq caused by rt1711h when system boot[1], this is because irq request is done before tcpci_register_port(), so the chip->tcpci has not been setup, irq handler is entered but can't do anything, this patch is to address this by moving the irq request after tcpci_register_port(). [1] https://lore.kernel.org/linux-usb/20200530040157.31038-1-john.stultz@linaro.org Fixes: ce08eaeb6388 ("staging: typec: rt1711h typec chip driver") Cc: stable # v4.18+ Cc: John Stultz Reported-and-tested-by: John Stultz Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Signed-off-by: Li Jun Link: https://lore.kernel.org/r/20200604112118.38062-1-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci_rt1711h.c | 31 +++++++++----------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpci_rt1711h.c b/drivers/usb/typec/tcpm/tcpci_rt1711h.c index 017389021b96..b56a0880a044 100644 --- a/drivers/usb/typec/tcpm/tcpci_rt1711h.c +++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c @@ -179,26 +179,6 @@ out: return tcpci_irq(chip->tcpci); } -static int rt1711h_init_alert(struct rt1711h_chip *chip, - struct i2c_client *client) -{ - int ret; - - /* Disable chip interrupts before requesting irq */ - ret = rt1711h_write16(chip, TCPC_ALERT_MASK, 0); - if (ret < 0) - return ret; - - ret = devm_request_threaded_irq(chip->dev, client->irq, NULL, - rt1711h_irq, - IRQF_ONESHOT | IRQF_TRIGGER_LOW, - dev_name(chip->dev), chip); - if (ret < 0) - return ret; - enable_irq_wake(client->irq); - return 0; -} - static int rt1711h_sw_reset(struct rt1711h_chip *chip) { int ret; @@ -260,7 +240,8 @@ static int rt1711h_probe(struct i2c_client *client, if (ret < 0) return ret; - ret = rt1711h_init_alert(chip, client); + /* Disable chip interrupts before requesting irq */ + ret = rt1711h_write16(chip, TCPC_ALERT_MASK, 0); if (ret < 0) return ret; @@ -271,6 +252,14 @@ static int rt1711h_probe(struct i2c_client *client, if (IS_ERR_OR_NULL(chip->tcpci)) return PTR_ERR(chip->tcpci); + ret = devm_request_threaded_irq(chip->dev, client->irq, NULL, + rt1711h_irq, + IRQF_ONESHOT | IRQF_TRIGGER_LOW, + dev_name(chip->dev), chip); + if (ret < 0) + return ret; + enable_irq_wake(client->irq); + return 0; } From 74a7ad9d975c214a74e1ceddb4d349fd72e04eb7 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Fri, 12 Jun 2020 12:18:07 +0100 Subject: [PATCH 042/174] ALSA: usb-audio: Add implicit feedback quirk for SSL2+. commit e7585db1b0b5b4e4eb1967bb1472df308f7ffcbf upstream. This uses the same quirk as the Motu M2 and M4 to ensure the driver uses the audio interface's clock. Tested on an SSL2+. Signed-off-by: Laurence Tratt Cc: Link: https://lore.kernel.org/r/20200612111807.dgnig6rwhmsl2bod@overdrive.tratt.net Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index c36e6c97d09a..b971d9aaa64a 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -349,6 +349,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ifnum = 0; goto add_sync_ep_from_ifnum; case USB_ID(0x07fd, 0x0008): /* MOTU M Series */ + case USB_ID(0x31e9, 0x0002): /* Solid State Logic SSL2+ */ ep = 0x81; ifnum = 2; goto add_sync_ep_from_ifnum; From 0c4ff206043eb43a57888801cae30750a7e4b7db Mon Sep 17 00:00:00 2001 From: "Yick W. Tse" Date: Sat, 13 Jun 2020 11:40:06 +0000 Subject: [PATCH 043/174] ALSA: usb-audio: add quirk for Denon DCD-1500RE commit c9808bbfed3cfc911ecb60fe8e80c0c27876c657 upstream. fix error "clock source 41 is not valid, cannot use" [] New USB device found, idVendor=154e, idProduct=1002, bcdDevice= 1.00 [] New USB device strings: Mfr=1, Product=2, SerialNumber=0 [] Product: DCD-1500RE [] Manufacturer: D & M Holdings Inc. [] [] clock source 41 is not valid, cannot use [] usbcore: registered new interface driver snd-usb-audio Signed-off-by: Yick W. Tse Cc: Link: https://lore.kernel.org/r/1373857985.210365.1592048406997@mail.yahoo.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 092720ce2c55..eb3da0b71269 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1461,6 +1461,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) static bool is_itf_usb_dsd_dac(unsigned int id) { switch (id) { + case USB_ID(0x154e, 0x1002): /* Denon DCD-1500RE */ case USB_ID(0x154e, 0x1003): /* Denon DA-300USB */ case USB_ID(0x154e, 0x3005): /* Marantz HD-DAC1 */ case USB_ID(0x154e, 0x3006): /* Marantz SA-14S1 */ From 1cc2d29710c2e9c96b6d026f71b5cfc09d96ea8c Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Tue, 23 Jun 2020 19:03:23 +0800 Subject: [PATCH 044/174] ALSA: usb-audio: add quirk for Samsung USBC Headset (AKG) commit a32a1fc99807244d920d274adc46ba04b538cc8a upstream. We've found Samsung USBC Headset (AKG) (VID: 0x04e8, PID: 0xa051) need a tiny delay after each class compliant request. Otherwise the device might not be able to be recognized each times. Signed-off-by: Chihhao Chen Signed-off-by: Macpaul Lin Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1592910203-24035-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index eb3da0b71269..bf5083a20b6d 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1603,6 +1603,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, chip->usb_id == USB_ID(0x0951, 0x16ad)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) usleep_range(1000, 2000); + + /* + * Samsung USBC Headset (AKG) need a tiny delay after each + * class compliant request. (Model number: AAM625R or AAM627R) + */ + if (chip->usb_id == USB_ID(0x04e8, 0xa051) && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) + usleep_range(5000, 6000); } /* From 25e1bb1e6c368d09da13007b60f0c92b1c4101b4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Jun 2020 14:23:40 +0200 Subject: [PATCH 045/174] ALSA: usb-audio: Fix OOB access of mixer element list commit 220345e98f1cdc768eeb6e3364a0fa7ab9647fe7 upstream. The USB-audio mixer code holds a linked list of usb_mixer_elem_list, and several operations are performed for each mixer element. A few of them (snd_usb_mixer_notify_id() and snd_usb_mixer_interrupt_v2()) assume each mixer element being a usb_mixer_elem_info object that is a subclass of usb_mixer_elem_list, cast via container_of() and access it members. This may result in an out-of-bound access when a non-standard list element has been added, as spotted by syzkaller recently. This patch adds a new field, is_std_info, in usb_mixer_elem_list to indicate that the element is the usb_mixer_elem_info type or not, and skip the access to such an element if needed. Reported-by: syzbot+fb14314433463ad51625@syzkaller.appspotmail.com Reported-by: syzbot+2405ca3401e943c538b5@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20200624122340.9615-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 15 +++++++++++---- sound/usb/mixer.h | 9 +++++++-- sound/usb/mixer_quirks.c | 3 ++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index f55afe3a98e3..9079c380228f 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -576,8 +576,9 @@ static int check_matrix_bitmap(unsigned char *bmap, * if failed, give up and free the control instance. */ -int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list, - struct snd_kcontrol *kctl) +int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list, + struct snd_kcontrol *kctl, + bool is_std_info) { struct usb_mixer_interface *mixer = list->mixer; int err; @@ -591,6 +592,7 @@ int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list, return err; } list->kctl = kctl; + list->is_std_info = is_std_info; list->next_id_elem = mixer->id_elems[list->id]; mixer->id_elems[list->id] = list; return 0; @@ -3213,8 +3215,11 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid) unitid = delegate_notify(mixer, unitid, NULL, NULL); for_each_mixer_elem(list, mixer, unitid) { - struct usb_mixer_elem_info *info = - mixer_elem_list_to_info(list); + struct usb_mixer_elem_info *info; + + if (!list->is_std_info) + continue; + info = mixer_elem_list_to_info(list); /* invalidate cache, so the value is read from the device */ info->cached = 0; snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE, @@ -3294,6 +3299,8 @@ static void snd_usb_mixer_interrupt_v2(struct usb_mixer_interface *mixer, if (!list->kctl) continue; + if (!list->is_std_info) + continue; info = mixer_elem_list_to_info(list); if (count > 1 && info->control != control) diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h index 8e0fb7fdf1a0..01b5e5cc2221 100644 --- a/sound/usb/mixer.h +++ b/sound/usb/mixer.h @@ -66,6 +66,7 @@ struct usb_mixer_elem_list { struct usb_mixer_elem_list *next_id_elem; /* list of controls with same id */ struct snd_kcontrol *kctl; unsigned int id; + bool is_std_info; usb_mixer_elem_dump_func_t dump; usb_mixer_elem_resume_func_t resume; }; @@ -103,8 +104,12 @@ void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid); int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval, int request, int validx, int value_set); -int snd_usb_mixer_add_control(struct usb_mixer_elem_list *list, - struct snd_kcontrol *kctl); +int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list, + struct snd_kcontrol *kctl, + bool is_std_info); + +#define snd_usb_mixer_add_control(list, kctl) \ + snd_usb_mixer_add_list(list, kctl, true) void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list, struct usb_mixer_interface *mixer, diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index dc181066c799..d39bf5b648d1 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -157,7 +157,8 @@ static int add_single_ctl_with_resume(struct usb_mixer_interface *mixer, return -ENOMEM; } kctl->private_free = snd_usb_mixer_elem_free; - return snd_usb_mixer_add_control(list, kctl); + /* don't use snd_usb_mixer_add_control() here, this is a special list element */ + return snd_usb_mixer_add_list(list, kctl, false); } /* From 79175ae5f9968863f41fb20063bd5840f478686f Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 23 Jun 2020 11:09:17 +0800 Subject: [PATCH 046/174] usb: cdns3: trace: using correct dir value commit ba3a80fe0fb67d8790f62b7bc60df97406d89871 upstream. It should use the correct direction value from register, not depends on previous software setting. It fixed the EP number wrong issue at trace when the TRBERR interrupt occurs for EP0IN. When the EP0IN IOC has finished, software prepares the setup packet request, the expected direction is OUT, but at that time, the TRBERR for EP0IN may occur since it is DMULT mode, the DMA does not stop until TRBERR has met. Cc: Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Reviewed-by: Pawel Laszczak Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/trace.h b/drivers/usb/cdns3/trace.h index e92348c9b4d7..7cc8bebaa07d 100644 --- a/drivers/usb/cdns3/trace.h +++ b/drivers/usb/cdns3/trace.h @@ -150,7 +150,7 @@ DECLARE_EVENT_CLASS(cdns3_log_ep0_irq, __dynamic_array(char, str, CDNS3_MSG_MAX) ), TP_fast_assign( - __entry->ep_dir = priv_dev->ep0_data_dir; + __entry->ep_dir = priv_dev->selected_ep; __entry->ep_sts = ep_sts; ), TP_printk("%s", cdns3_decode_ep0_irq(__get_str(str), From a0668653be26acdf16272c7eb79017dbdbf69298 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 23 Jun 2020 11:09:16 +0800 Subject: [PATCH 047/174] usb: cdns3: ep0: fix the test mode set incorrectly commit b51e1cf64f93acebb6d8afbacd648a6ecefc39b4 upstream. The 'tmode' is ctrl->wIndex, changing it as the real test mode value for register assignment. Cc: Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Reviewed-by: Jun Li Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/ep0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/ep0.c b/drivers/usb/cdns3/ep0.c index e71240b386b4..f785ce84aba6 100644 --- a/drivers/usb/cdns3/ep0.c +++ b/drivers/usb/cdns3/ep0.c @@ -327,7 +327,8 @@ static int cdns3_ep0_feature_handle_device(struct cdns3_device *priv_dev, if (!set || (tmode & 0xff) != 0) return -EINVAL; - switch (tmode >> 8) { + tmode >>= 8; + switch (tmode) { case TEST_J: case TEST_K: case TEST_SE0_NAK: From be8df027079b856a89e85778330394cea2b0b290 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 23 Jun 2020 11:09:18 +0800 Subject: [PATCH 048/174] usb: cdns3: ep0: add spinlock for cdns3_check_new_setup commit 2587a029fa2a877d0a8dda955ef1b24c94b4bd0e upstream. The other thread may access other endpoints when the cdns3_check_new_setup is handling, add spinlock to protect it. Cc: Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Reviewed-by: Pawel Laszczak Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/ep0.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/cdns3/ep0.c b/drivers/usb/cdns3/ep0.c index f785ce84aba6..da4c5eb03d7e 100644 --- a/drivers/usb/cdns3/ep0.c +++ b/drivers/usb/cdns3/ep0.c @@ -712,15 +712,17 @@ static int cdns3_gadget_ep0_queue(struct usb_ep *ep, int ret = 0; u8 zlp = 0; + spin_lock_irqsave(&priv_dev->lock, flags); trace_cdns3_ep0_queue(priv_dev, request); /* cancel the request if controller receive new SETUP packet. */ - if (cdns3_check_new_setup(priv_dev)) + if (cdns3_check_new_setup(priv_dev)) { + spin_unlock_irqrestore(&priv_dev->lock, flags); return -ECONNRESET; + } /* send STATUS stage. Should be called only for SET_CONFIGURATION */ if (priv_dev->ep0_stage == CDNS3_STATUS_STAGE) { - spin_lock_irqsave(&priv_dev->lock, flags); cdns3_select_ep(priv_dev, 0x00); erdy_sent = !priv_dev->hw_configured_flag; @@ -745,7 +747,6 @@ static int cdns3_gadget_ep0_queue(struct usb_ep *ep, return 0; } - spin_lock_irqsave(&priv_dev->lock, flags); if (!list_empty(&priv_ep->pending_req_list)) { dev_err(priv_dev->dev, "can't handle multiple requests for ep0\n"); From d3a251b8479767a396f89c360ae8ca92cd614cdf Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Fri, 5 Jun 2020 17:44:37 +0300 Subject: [PATCH 049/174] scsi: qla2xxx: Keep initiator ports after RSCN commit 632f24f09d5b7c8a2f94932c3391ca957ae76cc4 upstream. The driver performs SCR (state change registration) in all modes including pure target mode. For each RSCN, scan_needed flag is set in qla2x00_handle_rscn() for the port mentioned in the RSCN and fabric rescan is scheduled. During the rescan, GNN_FT handler, qla24xx_async_gnnft_done() deletes session of the port that caused the RSCN. In target mode, the session deletion has an impact on ATIO handler, qlt_24xx_atio_pkt(). Target responds with SAM STATUS BUSY to I/O incoming from the deleted session. qlt_handle_cmd_for_atio() and qlt_handle_task_mgmt() return -EFAULT if they are not able to find session of the command/TMF, and that results in invocation of qlt_send_busy(): qlt_24xx_atio_pkt_all_vps: qla_target(0): type 6 ox_id 0014 qla_target(0): Unable to send command to target, sending BUSY status Such response causes command timeout on the initiator. Error handler thread on the initiator will be spawned to abort the commands: scsi 23:0:0:0: tag#0 abort scheduled scsi 23:0:0:0: tag#0 aborting command qla2xxx [0000:af:00.0]-188c:23: Entered qla24xx_abort_command. qla2xxx [0000:af:00.0]-801c:23: Abort command issued nexus=23:0:0 -- 0 2003. Command abort is rejected by target and fails (2003), error handler then tries to perform DEVICE RESET and TARGET RESET but they're also doomed to fail because TMFs are ignored for the deleted sessions. Then initiator makes BUS RESET that resets the link via qla2x00_full_login_lip(). BUS RESET succeeds and brings initiator port up, SAN switch detects that and sends RSCN to the target port and it fails again the same way as described above. It never goes out of the loop. The change breaks the RSCN loop by keeping initiator sessions mentioned in RSCN payload in all modes, including dual and pure target mode. Link: https://lore.kernel.org/r/20200605144435.27023-1-r.bolshakov@yadro.com Fixes: 2037ce49d30a ("scsi: qla2xxx: Fix stale session") Cc: Quinn Tran Cc: Arun Easi Cc: Nilesh Javali Cc: Bart Van Assche Cc: Daniel Wagner Cc: Himanshu Madhani Cc: Martin Wilck Cc: stable@vger.kernel.org # v5.4+ Reviewed-by: Daniel Wagner Reviewed-by: Shyam Sundar Reviewed-by: Himanshu Madhani Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_gs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 84bb4a048016..a44de4c5dcf6 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3638,7 +3638,9 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp) qla2x00_clear_loop_id(fcport); fcport->flags |= FCF_FABRIC_DEVICE; } else if (fcport->d_id.b24 != rp->id.b24 || - fcport->scan_needed) { + (fcport->scan_needed && + fcport->port_type != FCT_INITIATOR && + fcport->port_type != FCT_NVME_INITIATOR)) { qlt_schedule_sess_for_deletion(fcport); } fcport->d_id.b24 = rp->id.b24; From 1f551a056b30b898df6690078d0632e41fad6174 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 23 Jun 2020 16:02:42 +0200 Subject: [PATCH 050/174] scsi: zfcp: Fix panic on ERP timeout for previously dismissed ERP action commit 936e6b85da0476dd2edac7c51c68072da9fb4ba2 upstream. Suppose that, for unrelated reasons, FSF requests on behalf of recovery are very slow and can run into the ERP timeout. In the case at hand, we did adapter recovery to a large degree. However due to the slowness a LUN open is pending so the corresponding fc_rport remains blocked. After fast_io_fail_tmo we trigger close physical port recovery for the port under which the LUN should have been opened. The new higher order port recovery dismisses the pending LUN open ERP action and dismisses the pending LUN open FSF request. Such dismissal decouples the ERP action from the pending corresponding FSF request by setting zfcp_fsf_req->erp_action to NULL (among other things) [zfcp_erp_strategy_check_fsfreq()]. If now the ERP timeout for the pending open LUN request runs out, we must not use zfcp_fsf_req->erp_action in the ERP timeout handler. This is a problem since v4.15 commit 75492a51568b ("s390/scsi: Convert timers to use timer_setup()"). Before that we intentionally only passed zfcp_erp_action as context argument to zfcp_erp_timeout_handler(). Note: The lifetime of the corresponding zfcp_fsf_req object continues until a (late) response or an (unrelated) adapter recovery. Just like the regular response path ignores dismissed requests [zfcp_fsf_req_complete() => zfcp_fsf_protstatus_eval() => return early] the ERP timeout handler now needs to ignore dismissed requests. So simply return early in the ERP timeout handler if the FSF request is marked as dismissed in its status flags. To protect against the race where zfcp_erp_strategy_check_fsfreq() dismisses and sets zfcp_fsf_req->erp_action to NULL after our previous status flag check, return early if zfcp_fsf_req->erp_action is NULL. After all, the former ERP action does not need to be woken up as that was already done as part of the dismissal above [zfcp_erp_action_dismiss()]. This fixes the following panic due to kernel page fault in IRQ context: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0000000000000000 TEID: 0000000000000483 Fault in home space mode while using kernel ASCE. AS:000009859238c00b R2:00000e3e7ffd000b R3:00000e3e7ffcc007 S:00000e3e7ffd7000 P:000000000000013d Oops: 0004 ilc:2 [#1] SMP Modules linked in: ... CPU: 82 PID: 311273 Comm: stress Kdump: loaded Tainted: G E X ... Hardware name: IBM 8561 T01 701 (LPAR) Krnl PSW : 0404c00180000000 001fffff80549be0 (zfcp_erp_notify+0x40/0xc0 [zfcp]) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000080 00000e3d00000000 00000000000000f0 0000000000030000 000000010028e700 000000000400a39c 000000010028e700 00000e3e7cf87e02 0000000010000000 0700098591cb67f0 0000000000000000 0000000000000000 0000033840e9a000 0000000000000000 001fffe008d6bc18 001fffe008d6bbc8 Krnl Code: 001fffff80549bd4: a7180000 lhi %r1,0 001fffff80549bd8: 4120a0f0 la %r2,240(%r10) #001fffff80549bdc: a53e0003 llilh %r3,3 >001fffff80549be0: ba132000 cs %r1,%r3,0(%r2) 001fffff80549be4: a7740037 brc 7,1fffff80549c52 001fffff80549be8: e320b0180004 lg %r2,24(%r11) 001fffff80549bee: e31020e00004 lg %r1,224(%r2) 001fffff80549bf4: 412020e0 la %r2,224(%r2) Call Trace: [<001fffff80549be0>] zfcp_erp_notify+0x40/0xc0 [zfcp] [<00000985915e26f0>] call_timer_fn+0x38/0x190 [<00000985915e2944>] expire_timers+0xfc/0x190 [<00000985915e2ac4>] run_timer_softirq+0xec/0x218 [<0000098591ca7c4c>] __do_softirq+0x144/0x398 [<00000985915110aa>] do_softirq_own_stack+0x72/0x88 [<0000098591551b58>] irq_exit+0xb0/0xb8 [<0000098591510c6a>] do_IRQ+0x82/0xb0 [<0000098591ca7140>] ext_int_handler+0x128/0x12c [<0000098591722d98>] clear_subpage.constprop.13+0x38/0x60 ([<000009859172ae4c>] clear_huge_page+0xec/0x250) [<000009859177e7a2>] do_huge_pmd_anonymous_page+0x32a/0x768 [<000009859172a712>] __handle_mm_fault+0x88a/0x900 [<000009859172a860>] handle_mm_fault+0xd8/0x1b0 [<0000098591529ef6>] do_dat_exception+0x136/0x3e8 [<0000098591ca6d34>] pgm_check_handler+0x1c8/0x220 Last Breaking-Event-Address: [<001fffff80549c88>] zfcp_erp_timeout_handler+0x10/0x18 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt Link: https://lore.kernel.org/r/20200623140242.98864-1-maier@linux.ibm.com Fixes: 75492a51568b ("s390/scsi: Convert timers to use timer_setup()") Cc: #4.15+ Reviewed-by: Julian Wiedmann Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_erp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index cb84125ab80d..08dc2efb7d8a 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -576,7 +576,10 @@ static void zfcp_erp_strategy_check_fsfreq(struct zfcp_erp_action *act) ZFCP_STATUS_ERP_TIMEDOUT)) { req->status |= ZFCP_STATUS_FSFREQ_DISMISSED; zfcp_dbf_rec_run("erscf_1", act); - req->erp_action = NULL; + /* lock-free concurrent access with + * zfcp_erp_timeout_handler() + */ + WRITE_ONCE(req->erp_action, NULL); } if (act->status & ZFCP_STATUS_ERP_TIMEDOUT) zfcp_dbf_rec_run("erscf_2", act); @@ -612,8 +615,14 @@ void zfcp_erp_notify(struct zfcp_erp_action *erp_action, unsigned long set_mask) void zfcp_erp_timeout_handler(struct timer_list *t) { struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); - struct zfcp_erp_action *act = fsf_req->erp_action; + struct zfcp_erp_action *act; + if (fsf_req->status & ZFCP_STATUS_FSFREQ_DISMISSED) + return; + /* lock-free concurrent access with zfcp_erp_strategy_check_fsfreq() */ + act = READ_ONCE(fsf_req->erp_action); + if (!act) + return; zfcp_erp_notify(act, ZFCP_STATUS_ERP_TIMEDOUT); } From 572a11131ad3ec7737dbf68828618025160bbd57 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Sat, 13 Jun 2020 20:27:09 +0800 Subject: [PATCH 051/174] cifs: Fix cached_fid refcnt leak in open_shroot commit 77577de64167aa0643d47ffbaacf3642632b321b upstream. open_shroot() invokes kref_get(), which increases the refcount of the "tcon->crfid" object. When open_shroot() returns not zero, it means the open operation failed and close_shroot() will not be called to decrement the refcount of the "tcon->crfid". The reference counting issue happens in one normal path of open_shroot(). When the cached root have been opened successfully in a concurrent process, the function increases the refcount and jump to "oshr_free" to return. However the current return value "rc" may not equal to 0, thus the increased refcount will not be balanced outside the function, causing a refcnt leak. Fix this issue by setting the value of "rc" to 0 before jumping to "oshr_free" label. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 58915d882285..f5e893ed48aa 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -736,6 +736,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) /* close extra handle outside of crit sec */ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } + rc = 0; goto oshr_free; } From bd2f2ac0ab96327a62a790f99116e7544a3fe0cf Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 23 Jun 2020 07:31:53 -0400 Subject: [PATCH 052/174] cifs/smb3: Fix data inconsistent when punch hole commit acc91c2d8de4ef46ed751c5f9df99ed9a109b100 upstream. When punch hole success, we also can read old data from file: # strace -e trace=pread64,fallocate xfs_io -f -c "pread 20 40" \ -c "fpunch 20 40" -c"pread 20 40" file pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40 fallocate(3, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 20, 40) = 0 pread64(3, " version 5.8.0-rc1+"..., 40, 20) = 40 CIFS implements the fallocate(FALLOCATE_FL_PUNCH_HOLE) with send SMB ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the remote file to zero, but local page caches not updated, then the local page caches inconsistent with server. Also can be found by xfstests generic/316. So, we need to remove the page caches before send the SMB ioctl(FSCTL_SET_ZERO_DATA) to server. Fixes: 31742c5a33176 ("enable fallocate punch hole ("fallocate -p") for SMB3") Suggested-by: Pavel Shilovsky Reviewed-by: Pavel Shilovsky Signed-off-by: Zhang Xiaoxu Cc: stable@vger.kernel.org # v3.17 Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f5e893ed48aa..e38f21c44133 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3036,6 +3036,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, return rc; } + /* + * We implement the punch hole through ioctl, so we need remove the page + * caches first, otherwise the data may be inconsistent with the server. + */ + truncate_pagecache_range(inode, offset, offset + len - 1); + cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); fsctl_buf.FileOffset = cpu_to_le64(offset); From e615f58fa86c3aa4a6f7f56df77c654bc92323de Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 23 Jun 2020 07:31:54 -0400 Subject: [PATCH 053/174] cifs/smb3: Fix data inconsistent when zero file range commit 6b69040247e14b43419a520f841f2b3052833df9 upstream. CIFS implements the fallocate(FALLOC_FL_ZERO_RANGE) with send SMB ioctl(FSCTL_SET_ZERO_DATA) to server. It just set the range of the remote file to zero, but local page cache not update, then the data inconsistent with server, which leads the xfstest generic/008 failed. So we need to remove the local page caches before send SMB ioctl(FSCTL_SET_ZERO_DATA) to server. After next read, it will re-cache it. Fixes: 30175628bf7f5 ("[SMB3] Enable fallocate -z support for SMB3 mounts") Reported-by: Hulk Robot Signed-off-by: Zhang Xiaoxu Reviewed-by: Pavel Shilovsky Cc: stable@vger.kernel.org # v3.17 Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e38f21c44133..7ccbfc656478 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2970,6 +2970,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid, ses->Suid, offset, len); + /* + * We zero the range through ioctl, so we need remove the page caches + * first, otherwise the data may be inconsistent with the server. + */ + truncate_pagecache_range(inode, offset, offset + len - 1); /* if file not oplocked can't be sure whether asking to extend size */ if (!CIFS_CACHE_READ(cifsi)) From c09be4f57956deae7b9dbee3a35811a2a9c45d99 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 24 Jun 2020 16:59:45 +0300 Subject: [PATCH 054/174] xhci: Fix incorrect EP_STATE_MASK commit dceea67058fe22075db3aed62d5cb62092be5053 upstream. EP_STATE_MASK should be 0x7 instead of 0xf xhci spec 6.2.3 shows that the EP state field in the endpoint context data structure consist of bits [2:0]. The old value included a bit from the next field which fortunately is a RsvdZ region. So hopefully this hasn't caused too much harm Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200624135949.22611-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 23a1abdc2b43..c656b41b57b5 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -716,7 +716,7 @@ struct xhci_ep_ctx { * 4 - TRB error * 5-7 - reserved */ -#define EP_STATE_MASK (0xf) +#define EP_STATE_MASK (0x7) #define EP_STATE_DISABLED 0 #define EP_STATE_RUNNING 1 #define EP_STATE_HALTED 2 From 14d46386226e102e43ee535ac74306b7489180b0 Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Wed, 24 Jun 2020 16:59:46 +0300 Subject: [PATCH 055/174] xhci: Fix enumeration issue when setting max packet size for FS devices. commit a73d9d9cfc3cfceabd91fb0b0c13e4062b6dbcd7 upstream. Unable to complete the enumeration of a USB TV Tuner device. Per XHCI spec (4.6.5), the EP state field of the input context shall be cleared for a set address command. In the special case of an FS device that has "MaxPacketSize0 = 8", the Linux XHCI driver does not do this before evaluating the context. With an XHCI controller that checks the EP state field for parameter context error this causes a problem in cases such as the device getting reset again after enumeration. When that field is cleared, the problem does not occur. This was found and fixed by Sasi Kumar. Cc: stable@vger.kernel.org Signed-off-by: Al Cooper Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200624135949.22611-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index f913bc9344d2..13d2971c3544 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1430,6 +1430,7 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id, xhci->devs[slot_id]->out_ctx, ep_index); ep_ctx = xhci_get_ep_ctx(xhci, command->in_ctx, ep_index); + ep_ctx->ep_info &= cpu_to_le32(~EP_STATE_MASK);/* must clear */ ep_ctx->ep_info2 &= cpu_to_le32(~MAX_PACKET_MASK); ep_ctx->ep_info2 |= cpu_to_le32(MAX_PACKET(max_packet_size)); From d6522bc320d7968a7977c30b6f2f3c6d79f1448f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 24 Jun 2020 16:59:48 +0300 Subject: [PATCH 056/174] xhci: Return if xHCI doesn't support LPM commit f0c472a6da51f9fac15e80fe2fd9c83b68754cff upstream. Just return if xHCI is quirked to disable LPM. We can save some time from reading registers and doing spinlocks. Add stable tag as we want this patch together with the next one, "Poll for U0 after disabling USB2 LPM" which fixes a suspend issue for some USB2 LPM devices Cc: stable@vger.kernel.org Signed-off-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200624135949.22611-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 13d2971c3544..11a65854d3f0 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4391,6 +4391,9 @@ static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd, int hird, exit_latency; int ret; + if (xhci->quirks & XHCI_HW_LPM_DISABLE) + return -EPERM; + if (hcd->speed >= HCD_USB3 || !xhci->hw_lpm_support || !udev->lpm_capable) return -EPERM; @@ -4413,7 +4416,7 @@ static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd, xhci_dbg(xhci, "%s port %d USB2 hardware LPM\n", enable ? "enable" : "disable", port_num + 1); - if (enable && !(xhci->quirks & XHCI_HW_LPM_DISABLE)) { + if (enable) { /* Host supports BESL timeout instead of HIRD */ if (udev->usb2_hw_lpm_besl_capable) { /* if device doesn't have a preferred BESL value use a From d9a74e455070cf95a845cd153f44d915fa110da5 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Fri, 5 Jun 2020 12:54:18 +0200 Subject: [PATCH 057/174] cdc-acm: Add DISABLE_ECHO quirk for Microchip/SMSC chip commit 03894573f2913181ee5aae0089f333b2131f2d4b upstream. USB_DEVICE(0x0424, 0x274e) can send data before cdc_acm is ready, causing garbage chars on the TTY causing stray input to the shell and/or login prompt. Signed-off-by: Joakim Tjernlund Cc: stable@vger.kernel.org Acked-by: Oliver Neukum Link: https://lore.kernel.org/r/20200605105418.22263-1-joakim.tjernlund@infinera.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index f67088bb8218..d5187b50fc82 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1689,6 +1689,8 @@ static int acm_pre_reset(struct usb_interface *intf) static const struct usb_device_id acm_ids[] = { /* quirky and broken devices */ + { USB_DEVICE(0x0424, 0x274e), /* Microchip Technology, Inc. (formerly SMSC) */ + .driver_info = DISABLE_ECHO, }, /* DISABLE ECHO in termios flag */ { USB_DEVICE(0x076d, 0x0006), /* Denso Cradle CU-321 */ .driver_info = NO_UNION_NORMAL, },/* has no union descriptor */ { USB_DEVICE(0x17ef, 0x7000), /* Lenovo USB modem */ From 588ad2b29ea3d4fe95da5ae161b370b539e16382 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Thu, 18 Jun 2020 12:21:37 +0800 Subject: [PATCH 058/174] loop: replace kill_bdev with invalidate_bdev commit f4bd34b139a3fa2808c4205f12714c65e1548c6c upstream. When a filesystem is mounted on a loop device and on a loop ioctl LOOP_SET_STATUS64, because of kill_bdev, buffer_head mappings are getting destroyed. kill_bdev truncate_inode_pages truncate_inode_pages_range do_invalidatepage block_invalidatepage discard_buffer -->clear BH_Mapped flag sb_bread __bread_gfp bh = __getblk_gfp -->discard_buffer clear BH_Mapped flag __bread_slow submit_bh submit_bh_wbc BUG_ON(!buffer_mapped(bh)) --> hit this BUG_ON Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed") Signed-off-by: Zheng Bin Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 57ed6b70d295..565e35e69f24 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1284,7 +1284,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); + invalidate_bdev(lo->lo_device); } /* I/O need to be drained during transfer transition */ @@ -1558,12 +1558,12 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (lo->lo_queue->limits.logical_block_size != arg) { sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); + invalidate_bdev(lo->lo_device); } blk_mq_freeze_queue(lo->lo_queue); - /* kill_bdev should have truncated all the pages */ + /* invalidate_bdev should have truncated all the pages */ if (lo->lo_queue->limits.logical_block_size != arg && lo->lo_device->bd_inode->i_mapping->nrpages) { err = -EAGAIN; From 2a4c0bf5c70eabd2b1f6f730a32bcad53e860085 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 21 Jun 2020 13:47:35 +0300 Subject: [PATCH 059/174] IB/mad: Fix use after free when destroying MAD agent commit 116a1b9f1cb769b83e5adff323f977a62b1dcb2e upstream. Currently, when RMPP MADs are processed while the MAD agent is destroyed, it could result in use after free of rmpp_recv, as decribed below: cpu-0 cpu-1 ----- ----- ib_mad_recv_done() ib_mad_complete_recv() ib_process_rmpp_recv_wc() unregister_mad_agent() ib_cancel_rmpp_recvs() cancel_delayed_work() process_rmpp_data() start_rmpp() queue_delayed_work(rmpp_recv->cleanup_work) destroy_rmpp_recv() free_rmpp_recv() cleanup_work()[1] spin_lock_irqsave(&rmpp_recv->agent->lock) <-- use after free [1] cleanup_work() == recv_cleanup_handler Fix it by waiting for the MAD agent reference count becoming zero before calling to ib_cancel_rmpp_recvs(). Fixes: 9a41e38a467c ("IB/mad: Use IDR for agent IDs") Link: https://lore.kernel.org/r/20200621104738.54850-2-leon@kernel.org Signed-off-by: Shay Drory Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9947d16edef2..a9e00cdf717b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -639,10 +639,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); - ib_cancel_rmpp_recvs(mad_agent_priv); deref_mad_agent(mad_agent_priv); wait_for_completion(&mad_agent_priv->comp); + ib_cancel_rmpp_recvs(mad_agent_priv); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); From 21d511c6c9c25bd45acfafd9768027394a0fddb3 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 23 Jun 2020 16:32:30 -0400 Subject: [PATCH 060/174] IB/hfi1: Fix module use count flaw due to leftover module put calls commit 822fbd37410639acdae368ea55477ddd3498651d upstream. When the try_module_get calls were removed from opening and closing of the i2c debugfs file, the corresponding module_put calls were missed. This results in an inaccurate module use count that requires a power cycle to fix. Fixes: 09fbca8e6240 ("IB/hfi1: No need to use try_module_get for debugfs") Link: https://lore.kernel.org/r/20200623203230.106975.76240.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Kaike Wan Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/debugfs.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index d268bf9c42ee..c29da2f4e339 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -985,15 +985,10 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf, static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target) { struct hfi1_pportdata *ppd; - int ret; ppd = private2ppd(fp); - ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); - if (ret) /* failed - release the module */ - module_put(THIS_MODULE); - - return ret; + return acquire_chip_resource(ppd->dd, i2c_target(target), 0); } static int i2c1_debugfs_open(struct inode *in, struct file *fp) @@ -1013,7 +1008,6 @@ static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target) ppd = private2ppd(fp); release_chip_resource(ppd->dd, i2c_target(target)); - module_put(THIS_MODULE); return 0; } @@ -1031,18 +1025,10 @@ static int i2c2_debugfs_release(struct inode *in, struct file *fp) static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target) { struct hfi1_pportdata *ppd; - int ret; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; ppd = private2ppd(fp); - ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); - if (ret) /* failed - release the module */ - module_put(THIS_MODULE); - - return ret; + return acquire_chip_resource(ppd->dd, i2c_target(target), 0); } static int qsfp1_debugfs_open(struct inode *in, struct file *fp) @@ -1062,7 +1048,6 @@ static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target) ppd = private2ppd(fp); release_chip_resource(ppd->dd, i2c_target(target)); - module_put(THIS_MODULE); return 0; } From 1f5197a5ad973c9a5511c1d4f27c9dea4ec5dcad Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 27 May 2020 06:49:29 -0700 Subject: [PATCH 061/174] bus: ti-sysc: Flush posted write on enable and disable [ Upstream commit 5ce8aee81be6c8bc19051d7c7b0d3cbb7ac5fc3f ] Looks like we're missing flush of posted write after module enable and disable. I've seen occasional errors accessing various modules, and it is suspected that the lack of posted writes can also cause random reboots. The errors we can see are similar to the one below from spi for example: 44000000.ocp:L3 Custom Error: MASTER MPU TARGET L4CFG (Read): Data Access in User mode during Functional access ... mcspi_wait_for_reg_bit omap2_mcspi_transfer_one spi_transfer_one_message ... We also want to also flush posted write for disable. The clkctrl clock disable happens after module disable, and we don't want to have the module potentially stay active while we're trying to disable the clock. Fixes: d59b60564cbf ("bus: ti-sysc: Add generic enable/disable functions") Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- drivers/bus/ti-sysc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index f0bc0841cbc4..c088c6f4adcf 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -938,6 +938,9 @@ set_autoidle: sysc_write(ddata, ddata->offsets[SYSC_SYSCONFIG], reg); } + /* Flush posted write */ + sysc_read(ddata, ddata->offsets[SYSC_SYSCONFIG]); + if (ddata->module_enable_quirk) ddata->module_enable_quirk(ddata); @@ -1018,6 +1021,9 @@ set_sidle: reg |= 1 << regbits->autoidle_shift; sysc_write(ddata, ddata->offsets[SYSC_SYSCONFIG], reg); + /* Flush posted write */ + sysc_read(ddata, ddata->offsets[SYSC_SYSCONFIG]); + return 0; } From 527ddb339d6b8770b87a962cbc61e96932f32a9c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 31 May 2020 12:37:54 -0700 Subject: [PATCH 062/174] bus: ti-sysc: Ignore clockactivity unless specified as a quirk [ Upstream commit 08b91dd6e547467fad61a7c201ff71080d7ad65a ] We must ignore the clockactivity bit for most modules and not set it unless specified for the module with SYSC_QUIRK_USE_CLOCKACT. Otherwise the interface clock can be automatically gated constantly causing unexpected performance issues. Fixes: ae9ae12e9daa ("bus: ti-sysc: Handle clockactivity for enable and disable") Cc: Laurent Pinchart Cc: Tomi Valkeinen Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- drivers/bus/ti-sysc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index c088c6f4adcf..553c0e279621 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -880,10 +880,13 @@ static int sysc_enable_module(struct device *dev) regbits = ddata->cap->regbits; reg = sysc_read(ddata, ddata->offsets[SYSC_SYSCONFIG]); - /* Set CLOCKACTIVITY, we only use it for ick */ + /* + * Set CLOCKACTIVITY, we only use it for ick. And we only configure it + * based on the SYSC_QUIRK_USE_CLOCKACT flag, not based on the hardware + * capabilities. See the old HWMOD_SET_DEFAULT_CLOCKACT flag. + */ if (regbits->clkact_shift >= 0 && - (ddata->cfg.quirks & SYSC_QUIRK_USE_CLOCKACT || - ddata->cfg.sysc_val & BIT(regbits->clkact_shift))) + (ddata->cfg.quirks & SYSC_QUIRK_USE_CLOCKACT)) reg |= SYSC_CLOCACT_ICK << regbits->clkact_shift; /* Set SIDLE mode */ From 475a7b09b4bbc9a3801c60c3ac8375d142ed3def Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 27 May 2020 16:32:06 -0700 Subject: [PATCH 063/174] ARM: OMAP2+: Fix legacy mode dss_reset [ Upstream commit 77cad9dbc957f23a73169e8a8971186744296614 ] We must check for "dss_core" instead of "dss" to avoid also matching also "dss_dispc". This only matters for the mixed case of data configured in device tree but with legacy booting ti,hwmods property still enabled. Fixes: 8b30919a4e3c ("ARM: OMAP2+: Handle reset quirks for dynamically allocated modules") Cc: Laurent Pinchart Cc: Tomi Valkeinen Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/omap_hwmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 203664c40d3d..eb74aa182661 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3535,7 +3535,7 @@ static const struct omap_hwmod_reset dra7_reset_quirks[] = { }; static const struct omap_hwmod_reset omap_reset_quirks[] = { - { .match = "dss", .len = 3, .reset = omap_dss_reset, }, + { .match = "dss_core", .len = 8, .reset = omap_dss_reset, }, { .match = "hdq1w", .len = 5, .reset = omap_hdq1w_reset, }, { .match = "i2c", .len = 3, .reset = omap_i2c_reset, }, { .match = "wd_timer", .len = 8, .reset = omap2_wd_timer_reset, }, From 41b2debf35ef5bc5719971d2470b1b7ed876a3c2 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Mon, 1 Jun 2020 16:39:37 -0500 Subject: [PATCH 064/174] xfrm: Fix double ESP trailer insertion in IPsec crypto offload. [ Upstream commit 94579ac3f6d0820adc83b5dc5358ead0158101e9 ] During IPsec performance testing, we see bad ICMP checksum. The error packet has duplicated ESP trailer due to double validate_xmit_xfrm calls. The first call is from ip_output, but the packet cannot be sent because netif_xmit_frozen_or_stopped is true and the packet gets dev_requeue_skb. The second call is from NET_TX softirq. However after the first call, the packet already has the ESP trailer. Fix by marking the skb with XFRM_XMIT bit after the packet is handled by validate_xmit_xfrm to avoid duplicate ESP trailer insertion. Fixes: f6e27114a60a ("net: Add a xfrm validate function to validate_xmit_skb") Signed-off-by: Huy Nguyen Reviewed-by: Boris Pismenny Reviewed-by: Raed Salem Reviewed-by: Saeed Mahameed Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- include/net/xfrm.h | 1 + net/xfrm/xfrm_device.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa08a7a5f6ac..fb391c00c19a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1012,6 +1012,7 @@ struct xfrm_offload { #define XFRM_GRO 32 #define XFRM_ESP_NO_TRAILER 64 #define XFRM_DEV_RESUME 128 +#define XFRM_XMIT 256 __u32 status; #define CRYPTO_SUCCESS 1 diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index c365b918be35..bb2292b5260c 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -82,7 +82,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; - if (!xo) + if (!xo || (xo->flags & XFRM_XMIT)) return skb; if (!(features & NETIF_F_HW_ESP)) @@ -103,6 +103,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } + xo->flags |= XFRM_XMIT; + if (skb_is_gso(skb)) { struct net_device *dev = skb->dev; From e50cf858d118eb2c792b206ac26a2f9845321b20 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 11 Jun 2020 13:41:53 +0100 Subject: [PATCH 065/174] ASoC: q6asm: handle EOS correctly [ Upstream commit 6476b60f32866be49d05e2e0163f337374c55b06 ] Successful send of EOS command does not indicate that EOS is actually finished, correct event to wait EOS is finished is EOS_RENDERED event. EOS_RENDERED means that the DSP has finished processing all the buffers for that particular session and stream. This patch fixes EOS handling! Fixes: 68fd8480bb7b ("ASoC: qdsp6: q6asm: Add support to audio stream apis") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20200611124159.20742-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/qdsp6/q6asm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c index e8141a33a55e..835ac98a789c 100644 --- a/sound/soc/qcom/qdsp6/q6asm.c +++ b/sound/soc/qcom/qdsp6/q6asm.c @@ -25,6 +25,7 @@ #define ASM_STREAM_CMD_FLUSH 0x00010BCE #define ASM_SESSION_CMD_PAUSE 0x00010BD3 #define ASM_DATA_CMD_EOS 0x00010BDB +#define ASM_DATA_EVENT_RENDERED_EOS 0x00010C1C #define ASM_NULL_POPP_TOPOLOGY 0x00010C68 #define ASM_STREAM_CMD_FLUSH_READBUFS 0x00010C09 #define ASM_STREAM_CMD_SET_ENCDEC_PARAM 0x00010C10 @@ -546,9 +547,6 @@ static int32_t q6asm_stream_callback(struct apr_device *adev, case ASM_SESSION_CMD_SUSPEND: client_event = ASM_CLIENT_EVENT_CMD_SUSPEND_DONE; break; - case ASM_DATA_CMD_EOS: - client_event = ASM_CLIENT_EVENT_CMD_EOS_DONE; - break; case ASM_STREAM_CMD_FLUSH: client_event = ASM_CLIENT_EVENT_CMD_FLUSH_DONE; break; @@ -651,6 +649,9 @@ static int32_t q6asm_stream_callback(struct apr_device *adev, spin_unlock_irqrestore(&ac->lock, flags); } + break; + case ASM_DATA_EVENT_RENDERED_EOS: + client_event = ASM_CLIENT_EVENT_CMD_EOS_DONE; break; } From 8c236ac4376aa838f62fb86d21f73ffe1852b551 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Mon, 15 Jun 2020 09:16:36 +0200 Subject: [PATCH 066/174] efi/tpm: Verify event log header before parsing [ Upstream commit 7dfc06a0f25b593a9f51992f540c0f80a57f3629 ] It is possible that the first event in the event log is not actually a log header at all, but rather a normal event. This leads to the cast in __calc_tpm2_event_size being an invalid conversion, which means that the values read are effectively garbage. Depending on the first event's contents, this leads either to apparently normal behaviour, a crash or a freeze. While this behaviour of the firmware is not in accordance with the TCG Client EFI Specification, this happens on a Dell Precision 5510 with the TPM enabled but hidden from the OS ("TPM On" disabled, state otherwise untouched). The EFI firmware claims that the TPM is present and active and that it supports the TCG 2.0 event log format. Fortunately, this can be worked around by simply checking the header of the first event and the event log header signature itself. Commit b4f1874c6216 ("tpm: check event log version before reading final events") addressed a similar issue also found on Dell models. Fixes: 6b0326190205 ("efi: Attempt to get the TCG2 event log in the boot stub") Signed-off-by: Fabian Vogt Link: https://lore.kernel.org/r/1927248.evlx2EsYKh@linux-e202.suse.de Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1165773 Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- include/linux/tpm_eventlog.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 131ea1bad458..eccfd3a4e4c8 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -81,6 +81,8 @@ struct tcg_efi_specid_event_algs { u16 digest_size; } __packed; +#define TCG_SPECID_SIG "Spec ID Event03" + struct tcg_efi_specid_event_head { u8 signature[16]; u32 platform_class; @@ -171,6 +173,7 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, int i; int j; u32 count, event_type; + const u8 zero_digest[sizeof(event_header->digest)] = {0}; marker = event; marker_start = marker; @@ -198,10 +201,19 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, count = READ_ONCE(event->count); event_type = READ_ONCE(event->event_type); + /* Verify that it's the log header */ + if (event_header->pcr_idx != 0 || + event_header->event_type != NO_ACTION || + memcmp(event_header->digest, zero_digest, sizeof(zero_digest))) { + size = 0; + goto out; + } + efispecid = (struct tcg_efi_specid_event_head *)event_header->event; /* Check if event is malformed. */ - if (count > efispecid->num_algs) { + if (memcmp(efispecid->signature, TCG_SPECID_SIG, + sizeof(TCG_SPECID_SIG)) || count > efispecid->num_algs) { size = 0; goto out; } From 92444a57e3656ac0c11972f2ace4b356ecb15795 Mon Sep 17 00:00:00 2001 From: Qiushi Wu Date: Thu, 28 May 2020 13:38:04 -0500 Subject: [PATCH 067/174] efi/esrt: Fix reference count leak in esre_create_sysfs_entry. [ Upstream commit 4ddf4739be6e375116c375f0a68bf3893ffcee21 ] kobject_init_and_add() takes reference even when it fails. If this function returns an error, kobject_put() must be called to properly clean up the memory associated with the object. Previous commit "b8eb718348b8" fixed a similar problem. Fixes: 0bb549052d33 ("efi: Add esrt support") Signed-off-by: Qiushi Wu Link: https://lore.kernel.org/r/20200528183804.4497-1-wu000273@umn.edu Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/esrt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index d6dd5f503fa2..e8f71a50ba89 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -181,7 +181,7 @@ static int esre_create_sysfs_entry(void *esre, int entry_num) rc = kobject_init_and_add(&entry->kobj, &esre1_ktype, NULL, "entry%d", entry_num); if (rc) { - kfree(entry); + kobject_put(&entry->kobj); return rc; } } From 47c7ae0ca9d7f96a53abbffdc4f00103253f82cb Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 12 Jun 2020 13:37:10 +0100 Subject: [PATCH 068/174] ASoc: q6afe: add support to get port direction [ Upstream commit 4a95737440d426e93441d49d11abf4c6526d4666 ] This patch adds support to q6afe_is_rx_port() to get direction of DSP BE dai port, this is useful for setting dailink directions correctly. Fixes: c25e295cd77b (ASoC: qcom: Add support to parse common audio device nodes) Reported-by: John Stultz Signed-off-by: Srinivas Kandagatla Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20200612123711.29130-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/qdsp6/q6afe.c | 8 ++++++++ sound/soc/qcom/qdsp6/q6afe.h | 1 + 2 files changed, 9 insertions(+) diff --git a/sound/soc/qcom/qdsp6/q6afe.c b/sound/soc/qcom/qdsp6/q6afe.c index e0945f7a58c8..0ce4eb60f984 100644 --- a/sound/soc/qcom/qdsp6/q6afe.c +++ b/sound/soc/qcom/qdsp6/q6afe.c @@ -800,6 +800,14 @@ int q6afe_get_port_id(int index) } EXPORT_SYMBOL_GPL(q6afe_get_port_id); +int q6afe_is_rx_port(int index) +{ + if (index < 0 || index >= AFE_PORT_MAX) + return -EINVAL; + + return port_maps[index].is_rx; +} +EXPORT_SYMBOL_GPL(q6afe_is_rx_port); static int afe_apr_send_pkt(struct q6afe *afe, struct apr_pkt *pkt, struct q6afe_port *port) { diff --git a/sound/soc/qcom/qdsp6/q6afe.h b/sound/soc/qcom/qdsp6/q6afe.h index c7ed5422baff..1a0f80a14afe 100644 --- a/sound/soc/qcom/qdsp6/q6afe.h +++ b/sound/soc/qcom/qdsp6/q6afe.h @@ -198,6 +198,7 @@ int q6afe_port_start(struct q6afe_port *port); int q6afe_port_stop(struct q6afe_port *port); void q6afe_port_put(struct q6afe_port *port); int q6afe_get_port_id(int index); +int q6afe_is_rx_port(int index); void q6afe_hdmi_port_prepare(struct q6afe_port *port, struct q6afe_hdmi_cfg *cfg); void q6afe_slim_port_prepare(struct q6afe_port *port, From 1fa27418054f74b428035ed228af18865be6df0b Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 12 Jun 2020 13:37:11 +0100 Subject: [PATCH 069/174] ASoC: qcom: common: set correct directions for dailinks [ Upstream commit a2120089251f1fe221305e88df99af16f940e236 ] Currently both FE and BE dai-links are configured bi-directional, However the DSP BE dais are only single directional, so set the directions as supported by the BE dais. Fixes: c25e295cd77b (ASoC: qcom: Add support to parse common audio device nodes) Reported-by: John Stultz Signed-off-by: Srinivas Kandagatla Tested-by: John Stultz Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20200612123711.29130-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/common.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c index 6c20bdd850f3..8ada4ecba847 100644 --- a/sound/soc/qcom/common.c +++ b/sound/soc/qcom/common.c @@ -4,6 +4,7 @@ #include #include "common.h" +#include "qdsp6/q6afe.h" int qcom_snd_parse_of(struct snd_soc_card *card) { @@ -101,6 +102,15 @@ int qcom_snd_parse_of(struct snd_soc_card *card) } link->no_pcm = 1; link->ignore_pmdown_time = 1; + + if (q6afe_is_rx_port(link->id)) { + link->dpcm_playback = 1; + link->dpcm_capture = 0; + } else { + link->dpcm_playback = 0; + link->dpcm_capture = 1; + } + } else { dlc = devm_kzalloc(dev, sizeof(*dlc), GFP_KERNEL); if (!dlc) @@ -113,12 +123,12 @@ int qcom_snd_parse_of(struct snd_soc_card *card) link->codecs->dai_name = "snd-soc-dummy-dai"; link->codecs->name = "snd-soc-dummy"; link->dynamic = 1; + link->dpcm_playback = 1; + link->dpcm_capture = 1; } link->ignore_suspend = 1; link->nonatomic = 1; - link->dpcm_playback = 1; - link->dpcm_capture = 1; link->stream_name = link->name; link++; From 34f105349369095b1f75e4cc422b674456d171ca Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Mon, 15 Jun 2020 05:54:08 +0800 Subject: [PATCH 070/174] regualtor: pfuze100: correct sw1a/sw2 on pfuze3000 [ Upstream commit 6f1cf5257acc6e6242ddf2f52bc7912aed77b79f ] PFUZE100_SWB_REG is not proper for sw1a/sw2, because enable_mask/enable_reg is not correct. On PFUZE3000, sw1a/sw2 should be the same as sw1a/sw2 on pfuze100 except that voltages are not linear, so add new PFUZE3000_SW_REG and pfuze3000_sw_regulator_ops which like the non-linear PFUZE100_SW_REG and pfuze100_sw_regulator_ops. Fixes: 1dced996ee70 ("regulator: pfuze100: update voltage setting for pfuze3000 sw1a") Reported-by: Christophe Meynard Signed-off-by: Robin Gong Link: https://lore.kernel.org/r/1592171648-8752-1-git-send-email-yibin.gong@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/pfuze100-regulator.c | 60 +++++++++++++++++--------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 689537927f6f..4c8e8b472287 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -209,6 +209,19 @@ static const struct regulator_ops pfuze100_swb_regulator_ops = { }; +static const struct regulator_ops pfuze3000_sw_regulator_ops = { + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .list_voltage = regulator_list_voltage_table, + .map_voltage = regulator_map_voltage_ascend, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .set_ramp_delay = pfuze100_set_ramp_delay, + +}; + #define PFUZE100_FIXED_REG(_chip, _name, base, voltage) \ [_chip ## _ ## _name] = { \ .desc = { \ @@ -318,23 +331,28 @@ static const struct regulator_ops pfuze100_swb_regulator_ops = { .stby_mask = 0x20, \ } - -#define PFUZE3000_SW2_REG(_chip, _name, base, min, max, step) { \ - .desc = { \ - .name = #_name,\ - .n_voltages = ((max) - (min)) / (step) + 1, \ - .ops = &pfuze100_sw_regulator_ops, \ - .type = REGULATOR_VOLTAGE, \ - .id = _chip ## _ ## _name, \ - .owner = THIS_MODULE, \ - .min_uV = (min), \ - .uV_step = (step), \ - .vsel_reg = (base) + PFUZE100_VOL_OFFSET, \ - .vsel_mask = 0x7, \ - }, \ - .stby_reg = (base) + PFUZE100_STANDBY_OFFSET, \ - .stby_mask = 0x7, \ -} +/* No linar case for the some switches of PFUZE3000 */ +#define PFUZE3000_SW_REG(_chip, _name, base, mask, voltages) \ + [_chip ## _ ## _name] = { \ + .desc = { \ + .name = #_name, \ + .n_voltages = ARRAY_SIZE(voltages), \ + .ops = &pfuze3000_sw_regulator_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = _chip ## _ ## _name, \ + .owner = THIS_MODULE, \ + .volt_table = voltages, \ + .vsel_reg = (base) + PFUZE100_VOL_OFFSET, \ + .vsel_mask = (mask), \ + .enable_reg = (base) + PFUZE100_MODE_OFFSET, \ + .enable_mask = 0xf, \ + .enable_val = 0x8, \ + .enable_time = 500, \ + }, \ + .stby_reg = (base) + PFUZE100_STANDBY_OFFSET, \ + .stby_mask = (mask), \ + .sw_reg = true, \ + } #define PFUZE3000_SW3_REG(_chip, _name, base, min, max, step) { \ .desc = { \ @@ -391,9 +409,9 @@ static struct pfuze_regulator pfuze200_regulators[] = { }; static struct pfuze_regulator pfuze3000_regulators[] = { - PFUZE100_SWB_REG(PFUZE3000, SW1A, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a), + PFUZE3000_SW_REG(PFUZE3000, SW1A, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a), PFUZE100_SW_REG(PFUZE3000, SW1B, PFUZE100_SW1CVOL, 700000, 1475000, 25000), - PFUZE100_SWB_REG(PFUZE3000, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo), + PFUZE3000_SW_REG(PFUZE3000, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo), PFUZE3000_SW3_REG(PFUZE3000, SW3, PFUZE100_SW3AVOL, 900000, 1650000, 50000), PFUZE100_SWB_REG(PFUZE3000, SWBST, PFUZE100_SWBSTCON1, 0x3, pfuze100_swbst), PFUZE100_SWB_REG(PFUZE3000, VSNVS, PFUZE100_VSNVSVOL, 0x7, pfuze100_vsnvs), @@ -407,8 +425,8 @@ static struct pfuze_regulator pfuze3000_regulators[] = { }; static struct pfuze_regulator pfuze3001_regulators[] = { - PFUZE100_SWB_REG(PFUZE3001, SW1, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a), - PFUZE100_SWB_REG(PFUZE3001, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo), + PFUZE3000_SW_REG(PFUZE3001, SW1, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a), + PFUZE3000_SW_REG(PFUZE3001, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo), PFUZE3000_SW3_REG(PFUZE3001, SW3, PFUZE100_SW3AVOL, 900000, 1650000, 50000), PFUZE100_SWB_REG(PFUZE3001, VSNVS, PFUZE100_VSNVSVOL, 0x7, pfuze100_vsnvs), PFUZE100_VGEN_REG(PFUZE3001, VLDO1, PFUZE100_VGEN1VOL, 1800000, 3300000, 100000), From 3947dd237ef56dc303f379f4127f321c043124bf Mon Sep 17 00:00:00 2001 From: Tom Seewald Date: Wed, 10 Jun 2020 12:47:17 -0500 Subject: [PATCH 071/174] RDMA/siw: Fix pointer-to-int-cast warning in siw_rx_pbl() [ Upstream commit 6769b275a313c76ddcd7d94c632032326db5f759 ] The variable buf_addr is type dma_addr_t, which may not be the same size as a pointer. To ensure it is the correct size, cast to a uintptr_t. Fixes: c536277e0db1 ("RDMA/siw: Fix 64/32bit pointer inconsistency") Link: https://lore.kernel.org/r/20200610174717.15932-1-tseewald@gmail.com Signed-off-by: Tom Seewald Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/siw/siw_qp_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index c0a887240325..0520e70084f9 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -139,7 +139,8 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, break; bytes = min(bytes, len); - if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) { + if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) == + bytes) { copied += bytes; offset += bytes; len -= bytes; From b59ed5668c4ef162c9357317e889c8cfc96f3477 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 16 Jun 2020 10:53:48 +0800 Subject: [PATCH 072/174] ASoC: fsl_ssi: Fix bclk calculation for mono channel [ Upstream commit ed1220df6e666500ebf58c4f2fccc681941646fb ] For mono channel, SSI will switch to Normal mode. In Normal mode and Network mode, the Word Length Control bits control the word length divider in clock generator, which is different with I2S Master mode (the word length is fixed to 32bit), it should be the value of params_width(hw_params). The condition "slots == 2" is not good for I2S Master mode, because for Network mode and Normal mode, the slots can also be 2. Then we need to use (ssi->i2s_net & SSI_SCR_I2S_MODE_MASK) to check if it is I2S Master mode. So we refine the formula for mono channel, otherwise there will be sound issue for S24_LE. Fixes: b0a7043d5c2c ("ASoC: fsl_ssi: Caculate bit clock rate using slot number and width") Signed-off-by: Shengjiu Wang Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/034eff1435ff6ce300b6c781130cefd9db22ab9a.1592276147.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_ssi.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 537dc69256f0..a4ebd6ddaba1 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -678,8 +678,9 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, struct regmap *regs = ssi->regs; u32 pm = 999, div2, psr, stccr, mask, afreq, factor, i; unsigned long clkrate, baudrate, tmprate; - unsigned int slots = params_channels(hw_params); - unsigned int slot_width = 32; + unsigned int channels = params_channels(hw_params); + unsigned int slot_width = params_width(hw_params); + unsigned int slots = 2; u64 sub, savesub = 100000; unsigned int freq; bool baudclk_is_used; @@ -688,10 +689,14 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, /* Override slots and slot_width if being specifically set... */ if (ssi->slots) slots = ssi->slots; - /* ...but keep 32 bits if slots is 2 -- I2S Master mode */ - if (ssi->slot_width && slots != 2) + if (ssi->slot_width) slot_width = ssi->slot_width; + /* ...but force 32 bits for stereo audio using I2S Master Mode */ + if (channels == 2 && + (ssi->i2s_net & SSI_SCR_I2S_MODE_MASK) == SSI_SCR_I2S_MODE_MASTER) + slot_width = 32; + /* Generate bit clock based on the slot number and slot width */ freq = slots * slot_width * params_rate(hw_params); From 0608288c6caab98e7819dd017b4a8818819ddfed Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 12 May 2020 18:30:40 +0200 Subject: [PATCH 073/174] samples/bpf: xdp_redirect_cpu: Set MAX_CPUS according to NR_CPUS [ Upstream commit 6a09815428547657f3ffd2f5c31ac2a191e7fdf3 ] xdp_redirect_cpu is currently failing in bpf_prog_load_xattr() allocating cpu_map map if CONFIG_NR_CPUS is less than 64 since cpu_map_alloc() requires max_entries to be less than NR_CPUS. Set cpu_map max_entries according to NR_CPUS in xdp_redirect_cpu_kern.c and get currently running cpus in xdp_redirect_cpu_user.c Signed-off-by: Lorenzo Bianconi Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/374472755001c260158c4e4b22f193bdd3c56fb7.1589300442.git.lorenzo@kernel.org Signed-off-by: Sasha Levin --- samples/bpf/xdp_redirect_cpu_kern.c | 2 +- samples/bpf/xdp_redirect_cpu_user.c | 29 ++++++++++++++++------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c index cfcc31e51197..d94a999b4b4b 100644 --- a/samples/bpf/xdp_redirect_cpu_kern.c +++ b/samples/bpf/xdp_redirect_cpu_kern.c @@ -15,7 +15,7 @@ #include "bpf_helpers.h" #include "hash_func01.h" -#define MAX_CPUS 64 /* WARNING - sync with _user.c */ +#define MAX_CPUS NR_CPUS /* Special map type that can XDP_REDIRECT frames to another CPU */ struct { diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 8b862a7a6c6a..767869e3b308 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -13,6 +13,7 @@ static const char *__doc__ = #include #include #include +#include #include #include #include @@ -24,8 +25,6 @@ static const char *__doc__ = #include #include -#define MAX_CPUS 64 /* WARNING - sync with _kern.c */ - /* How many xdp_progs are defined in _kern.c */ #define MAX_PROG 6 @@ -40,6 +39,7 @@ static char *ifname; static __u32 prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static int n_cpus; static int cpu_map_fd; static int rx_cnt_map_fd; static int redirect_err_cnt_map_fd; @@ -170,7 +170,7 @@ struct stats_record { struct record redir_err; struct record kthread; struct record exception; - struct record enq[MAX_CPUS]; + struct record enq[]; }; static bool map_collect_percpu(int fd, __u32 key, struct record *rec) @@ -225,10 +225,11 @@ static struct datarec *alloc_record_per_cpu(void) static struct stats_record *alloc_stats_record(void) { struct stats_record *rec; - int i; + int i, size; - rec = malloc(sizeof(*rec)); - memset(rec, 0, sizeof(*rec)); + size = sizeof(*rec) + n_cpus * sizeof(struct record); + rec = malloc(size); + memset(rec, 0, size); if (!rec) { fprintf(stderr, "Mem alloc error\n"); exit(EXIT_FAIL_MEM); @@ -237,7 +238,7 @@ static struct stats_record *alloc_stats_record(void) rec->redir_err.cpu = alloc_record_per_cpu(); rec->kthread.cpu = alloc_record_per_cpu(); rec->exception.cpu = alloc_record_per_cpu(); - for (i = 0; i < MAX_CPUS; i++) + for (i = 0; i < n_cpus; i++) rec->enq[i].cpu = alloc_record_per_cpu(); return rec; @@ -247,7 +248,7 @@ static void free_stats_record(struct stats_record *r) { int i; - for (i = 0; i < MAX_CPUS; i++) + for (i = 0; i < n_cpus; i++) free(r->enq[i].cpu); free(r->exception.cpu); free(r->kthread.cpu); @@ -350,7 +351,7 @@ static void stats_print(struct stats_record *stats_rec, } /* cpumap enqueue stats */ - for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) { + for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) { char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n"; char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n"; char *errstr = ""; @@ -475,7 +476,7 @@ static void stats_collect(struct stats_record *rec) map_collect_percpu(fd, 1, &rec->redir_err); fd = cpumap_enqueue_cnt_map_fd; - for (i = 0; i < MAX_CPUS; i++) + for (i = 0; i < n_cpus; i++) map_collect_percpu(fd, i, &rec->enq[i]); fd = cpumap_kthread_cnt_map_fd; @@ -549,10 +550,10 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size, */ static void mark_cpus_unavailable(void) { - __u32 invalid_cpu = MAX_CPUS; + __u32 invalid_cpu = n_cpus; int ret, i; - for (i = 0; i < MAX_CPUS; i++) { + for (i = 0; i < n_cpus; i++) { ret = bpf_map_update_elem(cpus_available_map_fd, &i, &invalid_cpu, 0); if (ret) { @@ -688,6 +689,8 @@ int main(int argc, char **argv) int prog_fd; __u32 qsize; + n_cpus = get_nprocs_conf(); + /* Notice: choosing he queue size is very important with the * ixgbe driver, because it's driver page recycling trick is * dependend on pages being returned quickly. The number of @@ -757,7 +760,7 @@ int main(int argc, char **argv) case 'c': /* Add multiple CPUs */ add_cpu = strtoul(optarg, NULL, 0); - if (add_cpu >= MAX_CPUS) { + if (add_cpu >= n_cpus) { fprintf(stderr, "--cpu nr too large for cpumap err(%d):%s\n", errno, strerror(errno)); From d909f9db0caa2e7a582cc47f8366743e0873efcc Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Fri, 12 Jun 2020 14:53:27 -0400 Subject: [PATCH 074/174] bpf, xdp, samples: Fix null pointer dereference in *_user code [ Upstream commit 6903cdae9f9f08d61e49c16cbef11c293e33a615 ] Memset on the pointer right after malloc can cause a NULL pointer deference if it failed to allocate memory. A simple fix is to replace malloc()/memset() pair with a simple call to calloc(). Fixes: 0fca931a6f21 ("samples/bpf: program demonstrating access to xdp_rxq_info") Signed-off-by: Gaurav Singh Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: Sasha Levin --- samples/bpf/xdp_monitor_user.c | 8 ++------ samples/bpf/xdp_redirect_cpu_user.c | 7 ++----- samples/bpf/xdp_rxq_info_user.c | 13 +++---------- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index dd558cbb2309..ef53b93db573 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -509,11 +509,8 @@ static void *alloc_rec_per_cpu(int record_size) { unsigned int nr_cpus = bpf_num_possible_cpus(); void *array; - size_t size; - size = record_size * nr_cpus; - array = malloc(size); - memset(array, 0, size); + array = calloc(nr_cpus, record_size); if (!array) { fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); exit(EXIT_FAIL_MEM); @@ -528,8 +525,7 @@ static struct stats_record *alloc_stats_record(void) int i; /* Alloc main stats_record structure */ - rec = malloc(sizeof(*rec)); - memset(rec, 0, sizeof(*rec)); + rec = calloc(1, sizeof(*rec)); if (!rec) { fprintf(stderr, "Mem alloc error\n"); exit(EXIT_FAIL_MEM); diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 767869e3b308..0a7672556822 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -210,11 +210,8 @@ static struct datarec *alloc_record_per_cpu(void) { unsigned int nr_cpus = bpf_num_possible_cpus(); struct datarec *array; - size_t size; - size = sizeof(struct datarec) * nr_cpus; - array = malloc(size); - memset(array, 0, size); + array = calloc(nr_cpus, sizeof(struct datarec)); if (!array) { fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); exit(EXIT_FAIL_MEM); @@ -229,11 +226,11 @@ static struct stats_record *alloc_stats_record(void) size = sizeof(*rec) + n_cpus * sizeof(struct record); rec = malloc(size); - memset(rec, 0, size); if (!rec) { fprintf(stderr, "Mem alloc error\n"); exit(EXIT_FAIL_MEM); } + memset(rec, 0, size); rec->rx_cnt.cpu = alloc_record_per_cpu(); rec->redir_err.cpu = alloc_record_per_cpu(); rec->kthread.cpu = alloc_record_per_cpu(); diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index b88df17853b8..21d6e5067a83 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -198,11 +198,8 @@ static struct datarec *alloc_record_per_cpu(void) { unsigned int nr_cpus = bpf_num_possible_cpus(); struct datarec *array; - size_t size; - size = sizeof(struct datarec) * nr_cpus; - array = malloc(size); - memset(array, 0, size); + array = calloc(nr_cpus, sizeof(struct datarec)); if (!array) { fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); exit(EXIT_FAIL_MEM); @@ -214,11 +211,8 @@ static struct record *alloc_record_per_rxq(void) { unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; struct record *array; - size_t size; - size = sizeof(struct record) * nr_rxqs; - array = malloc(size); - memset(array, 0, size); + array = calloc(nr_rxqs, sizeof(struct record)); if (!array) { fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs); exit(EXIT_FAIL_MEM); @@ -232,8 +226,7 @@ static struct stats_record *alloc_stats_record(void) struct stats_record *rec; int i; - rec = malloc(sizeof(*rec)); - memset(rec, 0, sizeof(*rec)); + rec = calloc(1, sizeof(struct stats_record)); if (!rec) { fprintf(stderr, "Mem alloc error\n"); exit(EXIT_FAIL_MEM); From 07f7c547698b1590288af8b33e4c9a19c341bdb8 Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Tue, 9 Jun 2020 23:45:21 +0200 Subject: [PATCH 075/174] ARM: dts: am335x-pocketbeagle: Fix mmc0 Write Protect [ Upstream commit d7af722344e6dc52d87649100516515263e15c75 ] AM3358 pin mcasp0_aclkr (ZCZ ball B13) [0] is routed to P1.31 header [1] Mode 4 of this pin is mmc0_sdwp (SD Write Protect). A signal connected to P1.31 may accidentally trigger mmc0 write protection. To avoid this situation, do not put mcasp0_aclkr in mode 4 (mmc0_sdwp) by default. [0] http://www.ti.com/lit/ds/symlink/am3358.pdf [1] https://github.com/beagleboard/pocketbeagle/wiki/System-Reference-Manual#531_Expansion_Headers Fixes: 047905376a16 (ARM: dts: Add am335x-pocketbeagle) Signed-off-by: Robert Nelson Signed-off-by: Drew Fustini Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/am335x-pocketbeagle.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/am335x-pocketbeagle.dts b/arch/arm/boot/dts/am335x-pocketbeagle.dts index ff4f919d22f6..abf2badce53d 100644 --- a/arch/arm/boot/dts/am335x-pocketbeagle.dts +++ b/arch/arm/boot/dts/am335x-pocketbeagle.dts @@ -88,7 +88,6 @@ AM33XX_PADCONF(AM335X_PIN_MMC0_DAT3, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CMD, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CLK, PIN_INPUT_PULLUP, MUX_MODE0) - AM33XX_PADCONF(AM335X_PIN_MCASP0_ACLKR, PIN_INPUT, MUX_MODE4) /* (B12) mcasp0_aclkr.mmc0_sdwp */ >; }; From 99ab61cc8b75566899bd61864f0ff9b7f6eef41e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 12 Jun 2020 10:19:50 -0700 Subject: [PATCH 076/174] ARM: dts: Fix duovero smsc interrupt for suspend [ Upstream commit 9cf28e41f9f768791f54ee18333239fda6927ed8 ] While testing the recent suspend and resume regressions I noticed that duovero can still end up losing edge gpio interrupts on runtime suspend. This causes NFSroot easily stopping working after resume on duovero. Let's fix the issue by using gpio level interrupts for smsc as then the gpio interrupt state is seen by the gpio controller on resume. Fixes: 731b409878a3 ("ARM: dts: Configure duovero for to allow core retention during idle") Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap4-duovero-parlor.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap4-duovero-parlor.dts b/arch/arm/boot/dts/omap4-duovero-parlor.dts index 8047e8cdb3af..4548d87534e3 100644 --- a/arch/arm/boot/dts/omap4-duovero-parlor.dts +++ b/arch/arm/boot/dts/omap4-duovero-parlor.dts @@ -139,7 +139,7 @@ ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio2>; - interrupts = <12 IRQ_TYPE_EDGE_FALLING>; /* gpio_44 */ + interrupts = <12 IRQ_TYPE_LEVEL_LOW>; /* gpio_44 */ phy-mode = "mii"; From 5f6b834e110bc44171acdb8f9258d42813ff548f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Jun 2020 22:36:11 +0300 Subject: [PATCH 077/174] x86/resctrl: Fix a NULL vs IS_ERR() static checker warning in rdt_cdp_peer_get() [ Upstream commit cc5277fe66cf3ad68f41f1c539b2ef0d5e432974 ] The callers don't expect *d_cdp to be set to an error pointer, they only check for NULL. This leads to a static checker warning: arch/x86/kernel/cpu/resctrl/rdtgroup.c:2648 __init_one_rdt_domain() warn: 'd_cdp' could be an error pointer This would not trigger a bug in this specific case because __init_one_rdt_domain() calls it with a valid domain that would not have a negative id and thus not trigger the return of the ERR_PTR(). If this was a negative domain id then the call to rdt_find_domain() in domain_add_cpu() would have returned the ERR_PTR() much earlier and the creation of the domain with an invalid id would have been prevented. Even though a bug is not triggered currently the right and safe thing to do is to set the pointer to NULL because that is what can be checked for when the caller is handling the CDP and non-CDP cases. Fixes: 52eb74339a62 ("x86/resctrl: Fix rdt_find_domain() return value and checks") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov Acked-by: Reinette Chatre Acked-by: Fenghua Yu Link: https://lkml.kernel.org/r/20200602193611.GA190851@mwanda Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 20856d80dce3..54b711bc0607 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1027,6 +1027,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { _r_cdp = NULL; + _d_cdp = NULL; ret = -EINVAL; } From b93df0f6d7cdabe69d91720e5b799001f2667dcc Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 17 Jun 2020 16:21:29 +0100 Subject: [PATCH 078/174] regmap: Fix memory leak from regmap_register_patch [ Upstream commit 95b2c3ec4cb1689db2389c251d39f64490ba641c ] When a register patch is registered the reg_sequence is copied but the memory allocated is never freed. Add a kfree in regmap_exit to clean it up. Fixes: 22f0d90a3482 ("regmap: Support register patch sets") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20200617152129.19655-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/base/regmap/regmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 59f911e57719..508bbd6ea439 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1356,6 +1356,7 @@ void regmap_exit(struct regmap *map) if (map->hwlock) hwspin_lock_free(map->hwlock); kfree_const(map->name); + kfree(map->patch); kfree(map); } EXPORT_SYMBOL_GPL(regmap_exit); From 0f3aa6c6d6adf4578164e9c5b164dd6e4d6b4800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 16 Jun 2020 16:28:29 +0200 Subject: [PATCH 079/174] devmap: Use bpf_map_area_alloc() for allocating hash buckets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 99c51064fb06146b3d494b745c947e438a10aaa7 ] Syzkaller discovered that creating a hash of type devmap_hash with a large number of entries can hit the memory allocator limit for allocating contiguous memory regions. There's really no reason to use kmalloc_array() directly in the devmap code, so just switch it to the existing bpf_map_area_alloc() function that is used elsewhere. Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") Reported-by: Xiumei Mu Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200616142829.114173-1-toke@redhat.com Signed-off-by: Sasha Levin --- kernel/bpf/devmap.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index b4b6b77f309c..6684696fa457 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -88,12 +88,13 @@ struct bpf_dtab { static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); -static struct hlist_head *dev_map_create_hash(unsigned int entries) +static struct hlist_head *dev_map_create_hash(unsigned int entries, + int numa_node) { int i; struct hlist_head *hash; - hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL); + hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node); if (hash != NULL) for (i = 0; i < entries; i++) INIT_HLIST_HEAD(&hash[i]); @@ -151,7 +152,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu)); if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { - dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets); + dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, + dtab->map.numa_node); if (!dtab->dev_index_head) goto free_percpu; @@ -249,7 +251,7 @@ static void dev_map_free(struct bpf_map *map) } } - kfree(dtab->dev_index_head); + bpf_map_area_free(dtab->dev_index_head); } else { for (i = 0; i < dtab->map.max_entries; i++) { struct bpf_dtab_netdev *dev; From f1ee7d3a2c1ab17802321879c9a62ca690e71ab9 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 16 Jun 2020 18:04:14 -0700 Subject: [PATCH 080/174] bpf: Don't return EINVAL from {get,set}sockopt when optlen > PAGE_SIZE [ Upstream commit d8fe449a9c51a37d844ab607e14e2f5c657d3cf2 ] Attaching to these hooks can break iptables because its optval is usually quite big, or at least bigger than the current PAGE_SIZE limit. David also mentioned some SCTP options can be big (around 256k). For such optvals we expose only the first PAGE_SIZE bytes to the BPF program. BPF program has two options: 1. Set ctx->optlen to 0 to indicate that the BPF's optval should be ignored and the kernel should use original userspace value. 2. Set ctx->optlen to something that's smaller than the PAGE_SIZE. v5: * use ctx->optlen == 0 with trimmed buffer (Alexei Starovoitov) * update the docs accordingly v4: * use temporary buffer to avoid optval == optval_end == NULL; this removes the corner case in the verifier that might assume non-zero PTR_TO_PACKET/PTR_TO_PACKET_END. v3: * don't increase the limit, bypass the argument v2: * proper comments formatting (Jakub Kicinski) Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Cc: David Laight Link: https://lore.kernel.org/bpf/20200617010416.93086-1-sdf@google.com Signed-off-by: Sasha Levin --- kernel/bpf/cgroup.c | 53 ++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 869e2e1860e8..b701af27a779 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -966,16 +966,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen) { - if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0) + if (unlikely(max_optlen < 0)) return -EINVAL; + if (unlikely(max_optlen > PAGE_SIZE)) { + /* We don't expose optvals that are greater than PAGE_SIZE + * to the BPF program. + */ + max_optlen = PAGE_SIZE; + } + ctx->optval = kzalloc(max_optlen, GFP_USER); if (!ctx->optval) return -ENOMEM; ctx->optval_end = ctx->optval + max_optlen; - return 0; + return max_optlen; } static void sockopt_free_buf(struct bpf_sockopt_kern *ctx) @@ -1009,13 +1016,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, */ max_optlen = max_t(int, 16, *optlen); - ret = sockopt_alloc_buf(&ctx, max_optlen); - if (ret) - return ret; + max_optlen = sockopt_alloc_buf(&ctx, max_optlen); + if (max_optlen < 0) + return max_optlen; ctx.optlen = *optlen; - if (copy_from_user(ctx.optval, optval, *optlen) != 0) { + if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) { ret = -EFAULT; goto out; } @@ -1043,8 +1050,14 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, /* export any potential modifications */ *level = ctx.level; *optname = ctx.optname; - *optlen = ctx.optlen; - *kernel_optval = ctx.optval; + + /* optlen == 0 from BPF indicates that we should + * use original userspace data. + */ + if (ctx.optlen != 0) { + *optlen = ctx.optlen; + *kernel_optval = ctx.optval; + } } out: @@ -1076,12 +1089,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT)) return retval; - ret = sockopt_alloc_buf(&ctx, max_optlen); - if (ret) - return ret; - ctx.optlen = max_optlen; + max_optlen = sockopt_alloc_buf(&ctx, max_optlen); + if (max_optlen < 0) + return max_optlen; + if (!retval) { /* If kernel getsockopt finished successfully, * copy whatever was returned to the user back @@ -1095,10 +1108,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } - if (ctx.optlen > max_optlen) - ctx.optlen = max_optlen; - - if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) { + if (copy_from_user(ctx.optval, optval, + min(ctx.optlen, max_optlen)) != 0) { ret = -EFAULT; goto out; } @@ -1127,10 +1138,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } - if (copy_to_user(optval, ctx.optval, ctx.optlen) || - put_user(ctx.optlen, optlen)) { - ret = -EFAULT; - goto out; + if (ctx.optlen != 0) { + if (copy_to_user(optval, ctx.optval, ctx.optlen) || + put_user(ctx.optlen, optlen)) { + ret = -EFAULT; + goto out; + } } ret = ctx.retval; From 73cff44e66e3552ace8f985a4f0e021f0e1278e3 Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Sun, 14 Jun 2020 15:19:00 -0700 Subject: [PATCH 081/174] ARM: dts: NSP: Correct FA2 mailbox node [ Upstream commit ac4e106d8934a5894811fc263f4b03fc8ed0fb7a ] The FA2 mailbox is specified at 0x18025000 but should actually be 0x18025c00, length 0x400 according to socregs_nsp.h and board_bu.c. Also the interrupt was off by one and should be GIC SPI 151 instead of 150. Fixes: 17d517172300 ("ARM: dts: NSP: Add mailbox (PDC) to NSP") Signed-off-by: Matthew Hagan Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/bcm-nsp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index da6d70f09ef1..418e6b97cb2e 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -257,10 +257,10 @@ status = "disabled"; }; - mailbox: mailbox@25000 { + mailbox: mailbox@25c00 { compatible = "brcm,iproc-fa2-mbox"; - reg = <0x25000 0x445>; - interrupts = ; + reg = <0x25c00 0x400>; + interrupts = ; #mbox-cells = <1>; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; From d6fb7f457456b528804b4c21fd8e781b18519cde Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Jun 2020 23:01:23 +0100 Subject: [PATCH 082/174] rxrpc: Fix handling of rwind from an ACK packet [ Upstream commit a2ad7c21ad8cf1ce4ad65e13df1c2a1c29b38ac5 ] The handling of the receive window size (rwind) from a received ACK packet is not correct. The rxrpc_input_ackinfo() function currently checks the current Tx window size against the rwind from the ACK to see if it has changed, but then limits the rwind size before storing it in the tx_winsize member and, if it increased, wake up the transmitting process. This means that if rwind > RXRPC_RXTX_BUFF_SIZE - 1, this path will always be followed. Fix this by limiting rwind before we compare it to tx_winsize. The effect of this can be seen by enabling the rxrpc_rx_rwind_change tracepoint. Fixes: 702f2ac87a9a ("rxrpc: Wake up the transmitter if Rx window size increases on the peer") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3be4177baf70..22dec6049e1b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -723,13 +723,12 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), rwind, ntohl(ackinfo->jumbo_max)); + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (call->tx_winsize != rwind) { - if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) - rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (rwind > call->tx_winsize) wake = true; - trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, - ntohl(ackinfo->rwind), wake); + trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, rwind, wake); call->tx_winsize = rwind; } From 66143ecb9e3cfb1428b1b82feec35178e6ece844 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sat, 13 Jun 2020 23:11:48 -0500 Subject: [PATCH 083/174] RDMA/rvt: Fix potential memory leak caused by rvt_alloc_rq [ Upstream commit 90a239ee25fa3a483facec3de7c144361a3d3a51 ] In case of failure of alloc_ud_wq_attr(), the memory allocated by rvt_alloc_rq() is not freed. Fix it by calling rvt_free_rq() using the existing clean-up code. Fixes: d310c4bf8aea ("IB/{rdmavt, hfi1, qib}: Remove AH refcount for UD QPs") Link: https://lore.kernel.org/r/20200614041148.131983-1-pakki001@umn.edu Signed-off-by: Aditya Pakki Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rdmavt/qp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d35465389357..19556c62c7ea 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1196,7 +1196,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, err = alloc_ud_wq_attr(qp, rdi->dparms.node); if (err) { ret = (ERR_PTR(err)); - goto bail_driver_priv; + goto bail_rq_rvt; } err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, @@ -1300,9 +1300,11 @@ bail_qpn: rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - rvt_free_rq(&qp->r_rq); free_ud_wq_attr(qp); +bail_rq_rvt: + rvt_free_rq(&qp->r_rq); + bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); From f0078dc6750fe46c7955de081b05cbf7d00d9093 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 16 Jun 2020 12:34:08 +0300 Subject: [PATCH 084/174] RDMA/qedr: Fix KASAN: use-after-free in ucma_event_handler+0x532 [ Upstream commit 0dfbd5ecf28cbcb81674c49d34ee97366db1be44 ] Private data passed to iwarp_cm_handler is copied for connection request / response, but ignored otherwise. If junk is passed, it is stored in the event and used later in the event processing. The driver passes an old junk pointer during connection close which leads to a use-after-free on event processing. Set private data to NULL for events that don 't have private data. BUG: KASAN: use-after-free in ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: Read of size 4 at addr ffff8886caa71200 by task kworker/u128:1/5250 kernel: kernel: Workqueue: iw_cm_wq cm_work_handler [iw_cm] kernel: Call Trace: kernel: dump_stack+0x8c/0xc0 kernel: print_address_description.constprop.0+0x1b/0x210 kernel: ? ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: ? ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: __kasan_report.cold+0x1a/0x33 kernel: ? ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: kasan_report+0xe/0x20 kernel: check_memory_region+0x130/0x1a0 kernel: memcpy+0x20/0x50 kernel: ucma_event_handler+0x532/0x560 [rdma_ucm] kernel: ? __rpc_execute+0x608/0x620 [sunrpc] kernel: cma_iw_handler+0x212/0x330 [rdma_cm] kernel: ? iw_conn_req_handler+0x6e0/0x6e0 [rdma_cm] kernel: ? enqueue_timer+0x86/0x140 kernel: ? _raw_write_lock_irq+0xd0/0xd0 kernel: cm_work_handler+0xd3d/0x1070 [iw_cm] Fixes: e411e0587e0d ("RDMA/qedr: Add iWARP connection management functions") Link: https://lore.kernel.org/r/20200616093408.17827-1-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 5e9732990be5..a7a926b7b562 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -150,8 +150,17 @@ qedr_iw_issue_event(void *context, if (params->cm_info) { event.ird = params->cm_info->ird; event.ord = params->cm_info->ord; - event.private_data_len = params->cm_info->private_data_len; - event.private_data = (void *)params->cm_info->private_data; + /* Only connect_request and reply have valid private data + * the rest of the events this may be left overs from + * connection establishment. CONNECT_REQUEST is issued via + * qedr_iw_mpa_request + */ + if (event_type == IW_CM_EVENT_CONNECT_REPLY) { + event.private_data_len = + params->cm_info->private_data_len; + event.private_data = + (void *)params->cm_info->private_data; + } } if (ep->cm_id) From 4aeb21584e550f20bc829c812f24764b9a0dbb09 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Tue, 16 Jun 2020 13:43:04 +0300 Subject: [PATCH 085/174] RDMA/cma: Protect bind_list and listen_list while finding matching cm id [ Upstream commit 730c8912484186d4623d0c76509066d285c3a755 ] The bind_list and listen_list must be accessed under a lock, add the missing locking around the access in cm_ib_id_from_event() In addition add lockdep asserts to make it clearer what the locking semantic is here. general protection fault: 0000 [#1] SMP NOPTI CPU: 226 PID: 126135 Comm: kworker/226:1 Tainted: G OE 4.12.14-150.47-default #1 SLE15 Hardware name: Cray Inc. Windom/Windom, BIOS 0.8.7 01-10-2020 Workqueue: ib_cm cm_work_handler [ib_cm] task: ffff9c5a60a1d2c0 task.stack: ffffc1d91f554000 RIP: 0010:cma_ib_req_handler+0x3f1/0x11b0 [rdma_cm] RSP: 0018:ffffc1d91f557b40 EFLAGS: 00010286 RAX: deacffffffffff30 RBX: 0000000000000001 RCX: ffff9c2af5bb6000 RDX: 00000000000000a9 RSI: ffff9c5aa4ed2f10 RDI: ffffc1d91f557b08 RBP: ffffc1d91f557d90 R08: ffff9c340cc80000 R09: ffff9c2c0f901900 R10: 0000000000000000 R11: 0000000000000001 R12: deacffffffffff30 R13: ffff9c5a48aeec00 R14: ffffc1d91f557c30 R15: ffff9c5c2eea3688 FS: 0000000000000000(0000) GS:ffff9c5c2fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00002b5cc03fa320 CR3: 0000003f8500a000 CR4: 00000000003406e0 Call Trace: ? rdma_addr_cancel+0xa0/0xa0 [ib_core] ? cm_process_work+0x28/0x140 [ib_cm] cm_process_work+0x28/0x140 [ib_cm] ? cm_get_bth_pkey.isra.44+0x34/0xa0 [ib_cm] cm_work_handler+0xa06/0x1a6f [ib_cm] ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to+0x7c/0x4b0 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 process_one_work+0x1da/0x400 worker_thread+0x2b/0x3f0 ? process_one_work+0x400/0x400 kthread+0x118/0x140 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x22/0x40 Code: 00 66 83 f8 02 0f 84 ca 05 00 00 49 8b 84 24 d0 01 00 00 48 85 c0 0f 84 68 07 00 00 48 2d d0 01 00 00 49 89 c4 0f 84 59 07 00 00 <41> 0f b7 44 24 20 49 8b 77 50 66 83 f8 0a 75 9e 49 8b 7c 24 28 Fixes: 4c21b5bcef73 ("IB/cma: Add net_dev and private data checks to RDMA CM") Link: https://lore.kernel.org/r/20200616104304.2426081-1-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 8f776b7de45e..e3cd9d2b0dd2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1631,6 +1631,8 @@ static struct rdma_id_private *cma_find_listener( { struct rdma_id_private *id_priv, *id_priv_dev; + lockdep_assert_held(&lock); + if (!bind_list) return ERR_PTR(-EINVAL); @@ -1677,6 +1679,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, } } + mutex_lock(&lock); /* * Net namespace might be getting deleted while route lookup, * cm_id lookup is in progress. Therefore, perform netdevice @@ -1718,6 +1721,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev); err: rcu_read_unlock(); + mutex_unlock(&lock); if (IS_ERR(id_priv) && *net_dev) { dev_put(*net_dev); *net_dev = NULL; @@ -2473,6 +2477,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; + lockdep_assert_held(&lock); + if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; @@ -3245,6 +3251,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list, u64 sid, mask; __be16 port; + lockdep_assert_held(&lock); + addr = cma_src_addr(id_priv); port = htons(bind_list->port); @@ -3273,6 +3281,8 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps, struct rdma_bind_list *bind_list; int ret; + lockdep_assert_held(&lock); + bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; @@ -3299,6 +3309,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list, struct sockaddr *saddr = cma_src_addr(id_priv); __be16 dport = cma_port(daddr); + lockdep_assert_held(&lock); + hlist_for_each_entry(cur_id, &bind_list->owners, node) { struct sockaddr *cur_daddr = cma_dst_addr(cur_id); struct sockaddr *cur_saddr = cma_src_addr(cur_id); @@ -3338,6 +3350,8 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps, unsigned int rover; struct net *net = id_priv->id.route.addr.dev_addr.net; + lockdep_assert_held(&lock); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; @@ -3385,6 +3399,8 @@ static int cma_check_port(struct rdma_bind_list *bind_list, struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; + lockdep_assert_held(&lock); + addr = cma_src_addr(id_priv); hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) @@ -3415,6 +3431,8 @@ static int cma_use_port(enum rdma_ucm_port_space ps, unsigned short snum; int ret; + lockdep_assert_held(&lock); + snum = ntohs(cma_port(cma_src_addr(id_priv))); if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; From 9e89c2d5da87017f15f9be3c411023f917cd4e58 Mon Sep 17 00:00:00 2001 From: Qiushi Wu Date: Sat, 13 Jun 2020 15:51:58 -0500 Subject: [PATCH 086/174] ASoC: rockchip: Fix a reference count leak. [ Upstream commit f141a422159a199f4c8dedb7e0df55b3b2cf16cd ] Calling pm_runtime_get_sync increments the counter even in case of failure, causing incorrect ref count if pm_runtime_put is not called in error handling paths. Call pm_runtime_put if pm_runtime_get_sync fails. Fixes: fc05a5b22253 ("ASoC: rockchip: add support for pdm controller") Signed-off-by: Qiushi Wu Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20200613205158.27296-1-wu000273@umn.edu Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/rockchip/rockchip_pdm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c index 7cd42fcfcf38..1707414cfa92 100644 --- a/sound/soc/rockchip/rockchip_pdm.c +++ b/sound/soc/rockchip/rockchip_pdm.c @@ -590,8 +590,10 @@ static int rockchip_pdm_resume(struct device *dev) int ret; ret = pm_runtime_get_sync(dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put(dev); return ret; + } ret = regcache_sync(pdm->regmap); From d3edf648f39833a31c8b584c64a6d7f05cad6a6f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 17 Jun 2020 16:54:52 +0200 Subject: [PATCH 087/174] s390/qeth: fix error handling for isolation mode cmds [ Upstream commit e2dfcfba00ba4a414617ef4c5a8501fe21567eb3 ] Current(?) OSA devices also store their cmd-specific return codes for SET_ACCESS_CONTROL cmds into the top-level cmd->hdr.return_code. So once we added stricter checking for the top-level field a while ago, none of the error logic that rolls back the user's configuration to its old state is applied any longer. For this specific cmd, go back to the old model where we peek into the cmd structure even though the top-level field indicated an error. Fixes: 686c97ee29c8 ("s390/qeth: fix error handling in adapter command callbacks") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index fe70e9875bde..5043f0fcf399 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4163,9 +4163,6 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, int fallback = *(int *)reply->param; QETH_CARD_TEXT(card, 4, "setaccb"); - if (cmd->hdr.return_code) - return -EIO; - qeth_setadpparms_inspect_rc(cmd); access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl; QETH_CARD_TEXT_(card, 2, "rc=%d", @@ -4175,7 +4172,7 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%#x) on device %x: %#x\n", access_ctrl_req->subcmd_code, CARD_DEVID(card), cmd->data.setadapterparms.hdr.return_code); - switch (cmd->data.setadapterparms.hdr.return_code) { + switch (qeth_setadpparms_inspect_rc(cmd)) { case SET_ACCESS_CTRL_RC_SUCCESS: if (card->options.isolation == ISOLATION_MODE_NONE) { dev_info(&card->gdev->dev, From 34f45567462d07f697e9a4ec6a0c8c374f432b83 Mon Sep 17 00:00:00 2001 From: Fan Guo Date: Fri, 12 Jun 2020 14:38:24 +0800 Subject: [PATCH 088/174] RDMA/mad: Fix possible memory leak in ib_mad_post_receive_mads() [ Upstream commit a17f4bed811c60712d8131883cdba11a105d0161 ] If ib_dma_mapping_error() returns non-zero value, ib_mad_post_receive_mads() will jump out of loops and return -ENOMEM without freeing mad_priv. Fix this memory-leak problem by freeing mad_priv in this case. Fixes: 2c34e68f4261 ("IB/mad: Check and handle potential DMA mapping errors") Link: https://lore.kernel.org/r/20200612063824.180611-1-guofan5@huawei.com Signed-off-by: Fan Guo Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/mad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a9e00cdf717b..2284930b5f91 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2960,6 +2960,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { + kfree(mad_priv); ret = -ENOMEM; break; } From 67db9e032b6e5db640e643375c43c82af34a02a6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 18 Jun 2020 12:40:43 -0400 Subject: [PATCH 089/174] selftests/net: report etf errors correctly [ Upstream commit ca8826095e4d4afc0ccaead27bba6e4b623a12ae ] The ETF qdisc can queue skbs that it could not pace on the errqueue. Address a few issues in the selftest - recv buffer size was too small, and incorrectly calculated - compared errno to ee_code instead of ee_errno - missed invalid request error type v2: - fix a few checkpatch --strict indentation warnings Fixes: ea6a547669b3 ("selftests/net: make so_txtime more robust to timer variance") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/so_txtime.c | 33 +++++++++++++++++++------ 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 383bac05ac32..ceaad78e9667 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -15,8 +15,9 @@ #include #include #include +#include #include -#include +#include #include #include #include @@ -140,8 +141,8 @@ static void do_recv_errqueue_timeout(int fdt) { char control[CMSG_SPACE(sizeof(struct sock_extended_err)) + CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; - char data[sizeof(struct ipv6hdr) + - sizeof(struct tcphdr) + 1]; + char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + + sizeof(struct udphdr) + 1]; struct sock_extended_err *err; struct msghdr msg = {0}; struct iovec iov = {0}; @@ -159,6 +160,8 @@ static void do_recv_errqueue_timeout(int fdt) msg.msg_controllen = sizeof(control); while (1) { + const char *reason; + ret = recvmsg(fdt, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) break; @@ -176,14 +179,30 @@ static void do_recv_errqueue_timeout(int fdt) err = (struct sock_extended_err *)CMSG_DATA(cm); if (err->ee_origin != SO_EE_ORIGIN_TXTIME) error(1, 0, "errqueue: origin 0x%x\n", err->ee_origin); - if (err->ee_code != ECANCELED) - error(1, 0, "errqueue: code 0x%x\n", err->ee_code); + + switch (err->ee_errno) { + case ECANCELED: + if (err->ee_code != SO_EE_CODE_TXTIME_MISSED) + error(1, 0, "errqueue: unknown ECANCELED %u\n", + err->ee_code); + reason = "missed txtime"; + break; + case EINVAL: + if (err->ee_code != SO_EE_CODE_TXTIME_INVALID_PARAM) + error(1, 0, "errqueue: unknown EINVAL %u\n", + err->ee_code); + reason = "invalid txtime"; + break; + default: + error(1, 0, "errqueue: errno %u code %u\n", + err->ee_errno, err->ee_code); + }; tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info; tstamp -= (int64_t) glob_tstart; tstamp /= 1000 * 1000; - fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped\n", - data[ret - 1], tstamp); + fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped: %s\n", + data[ret - 1], tstamp, reason); msg.msg_flags = 0; msg.msg_controllen = sizeof(control); From ede796e5ac870aa0badb474678eb7752d3208d7f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Jun 2020 07:13:43 +0800 Subject: [PATCH 090/174] iommu/vt-d: Enable PCI ACS for platform opt in hint [ Upstream commit 50310600ebda74b9988467e2e6128711c7ba56fc ] PCI ACS is disabled if Intel IOMMU is off by default or intel_iommu=off is used in command line. Unfortunately, Intel IOMMU will be forced on if there're devices sitting on an external facing PCI port that is marked as untrusted (for example, thunderbolt peripherals). That means, PCI ACS is disabled while Intel IOMMU is forced on to isolate those devices. As the result, the devices of an MFD will be grouped by a single group even the ACS is supported on device. [ 0.691263] pci 0000:00:07.1: Adding to iommu group 3 [ 0.691277] pci 0000:00:07.2: Adding to iommu group 3 [ 0.691292] pci 0000:00:07.3: Adding to iommu group 3 Fix it by requesting PCI ACS when Intel IOMMU is detected with platform opt in hint. Fixes: 89a6079df791a ("iommu/vt-d: Force IOMMU on for platform opt in hint") Co-developed-by: Lalithambika Krishnakumar Signed-off-by: Lalithambika Krishnakumar Signed-off-by: Lu Baolu Reviewed-by: Mika Westerberg Cc: Mika Westerberg Cc: Ashok Raj Link: https://lore.kernel.org/r/20200622231345.29722-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/dmar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9e393b9c5091..30ac0ba55864 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -898,7 +898,8 @@ int __init detect_intel_iommu(void) if (!ret) ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl, &validate_drhd_cb); - if (!ret && !no_iommu && !iommu_detected && !dmar_disabled) { + if (!ret && !no_iommu && !iommu_detected && + (!dmar_disabled || dmar_platform_optin())) { iommu_detected = 1; /* Make sure ACS will be enabled */ pci_request_acs(); From 33104571648b400ec211025f57c2d10b5ccc04a5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Jun 2020 07:13:44 +0800 Subject: [PATCH 091/174] iommu/vt-d: Update scalable mode paging structure coherency [ Upstream commit 04c00956ee3cd138fd38560a91452a804a8c5550 ] The Scalable-mode Page-walk Coherency (SMPWC) field in the VT-d extended capability register indicates the hardware coherency behavior on paging structures accessed through the pasid table entry. This is ignored in current code and using ECAP.C instead which is only valid in legacy mode. Fix this so that paging structure updates could be manually flushed from the cache line if hardware page walking is not snooped. Fixes: 765b6a98c1de3 ("iommu/vt-d: Enumerate the scalable mode capability") Signed-off-by: Lu Baolu Cc: Ashok Raj Cc: Kevin Tian Cc: Jacob Pan Link: https://lore.kernel.org/r/20200622231345.29722-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-iommu.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 773ac2b0d606..6366b5fbb3a4 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -611,6 +611,12 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) return g_iommus[iommu_id]; } +static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu) +{ + return sm_supported(iommu) ? + ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap); +} + static void domain_update_iommu_coherency(struct dmar_domain *domain) { struct dmar_drhd_unit *drhd; @@ -622,7 +628,7 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) for_each_domain_iommu(i, domain) { found = true; - if (!ecap_coherent(g_iommus[i]->ecap)) { + if (!iommu_paging_structure_coherency(g_iommus[i])) { domain->iommu_coherency = 0; break; } @@ -633,7 +639,7 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) /* No hardware attached; use lowest common denominator */ rcu_read_lock(); for_each_active_iommu(iommu, drhd) { - if (!ecap_coherent(iommu->ecap)) { + if (!iommu_paging_structure_coherency(iommu)) { domain->iommu_coherency = 0; break; } @@ -2090,7 +2096,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_set_fault_enable(context); context_set_present(context); - domain_flush_cache(domain, context, sizeof(*context)); + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(context, sizeof(*context)); /* * It's a non-present to present mapping. If hardware doesn't cache From 5cf7f0c68405ec6d08b9e45eb9cb072876537793 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:29 +0300 Subject: [PATCH 092/174] net: qed: fix left elements count calculation [ Upstream commit 97dd1abd026ae4e6a82fa68645928404ad483409 ] qed_chain_get_element_left{,_u32} returned 0 when the difference between producer and consumer page count was equal to the total page count. Fix this by conditional expanding of producer value (vs unconditional). This allowed to eliminate normalizaton against total page count, which was the cause of this bug. Misc: replace open-coded constants with common defines. Fixes: a91eb52abb50 ("qed: Revisit chain implementation") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/qed/qed_chain.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 733fad7dfbed..6d15040c642c 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -207,28 +207,34 @@ static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain) static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u32 prod = p_chain->u.chain16.prod_idx; + u32 cons = p_chain->u.chain16.cons_idx; u16 used; - used = (u16) (((u32)0x10000 + - (u32)p_chain->u.chain16.prod_idx) - - (u32)p_chain->u.chain16.cons_idx); + if (prod < cons) + prod += (u32)U16_MAX + 1; + + used = (u16)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain16.prod_idx / p_chain->elem_per_page - - p_chain->u.chain16.cons_idx / p_chain->elem_per_page; + used -= prod / elem_per_page - cons / elem_per_page; return (u16)(p_chain->capacity - used); } static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u64 prod = p_chain->u.chain32.prod_idx; + u64 cons = p_chain->u.chain32.cons_idx; u32 used; - used = (u32) (((u64)0x100000000ULL + - (u64)p_chain->u.chain32.prod_idx) - - (u64)p_chain->u.chain32.cons_idx); + if (prod < cons) + prod += (u64)U32_MAX + 1; + + used = (u32)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain32.prod_idx / p_chain->elem_per_page - - p_chain->u.chain32.cons_idx / p_chain->elem_per_page; + used -= (u32)(prod / elem_per_page - cons / elem_per_page); return p_chain->capacity - used; } From 164d9a15652f2f03aad22091709756b4b9fea8d8 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:30 +0300 Subject: [PATCH 093/174] net: qed: fix async event callbacks unregistering [ Upstream commit 31333c1a2521ff4b4ceb0c29de492549cd4a8de3 ] qed_spq_unregister_async_cb() should be called before qed_rdma_info_free() to avoid crash-spawning uses-after-free. Instead of calling it from each subsystem exit code, do it in one place on PF down. Fixes: 291d57f67d24 ("qed: Fix rdma_info structure allocation") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 9 +++++++-- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 2 -- drivers/net/ethernet/qlogic/qed/qed_roce.c | 1 - 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 0bf91df80d47..ecd14474a603 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1368,6 +1368,8 @@ static void qed_dbg_user_data_free(struct qed_hwfn *p_hwfn) void qed_resc_free(struct qed_dev *cdev) { + struct qed_rdma_info *rdma_info; + struct qed_hwfn *p_hwfn; int i; if (IS_VF(cdev)) { @@ -1385,7 +1387,8 @@ void qed_resc_free(struct qed_dev *cdev) qed_llh_free(cdev); for_each_hwfn(cdev, i) { - struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + p_hwfn = cdev->hwfns + i; + rdma_info = p_hwfn->p_rdma_info; qed_cxt_mngr_free(p_hwfn); qed_qm_info_free(p_hwfn); @@ -1404,8 +1407,10 @@ void qed_resc_free(struct qed_dev *cdev) qed_ooo_free(p_hwfn); } - if (QED_IS_RDMA_PERSONALITY(p_hwfn)) + if (QED_IS_RDMA_PERSONALITY(p_hwfn) && rdma_info) { + qed_spq_unregister_async_cb(p_hwfn, rdma_info->proto); qed_rdma_info_free(p_hwfn); + } qed_iov_free(p_hwfn); qed_l2_free(p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 65ec16a31658..2b3102a2fe5c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -2832,8 +2832,6 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn) if (rc) return rc; - qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP); - return qed_iwarp_ll2_stop(p_hwfn); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index e49fada85410..83817bb50e9f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -113,7 +113,6 @@ void qed_roce_stop(struct qed_hwfn *p_hwfn) break; } } - qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_ROCE); } static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid, From d0717a95a06b5671551c70aacc5d784992f70c8b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:31 +0300 Subject: [PATCH 094/174] net: qede: stop adding events on an already destroyed workqueue [ Upstream commit 4079c7f7a2a00ab403c177ce723b560de59139c3 ] Set rdma_wq pointer to NULL after destroying the workqueue and check for it when adding new events to fix crashes on driver unload. Fixes: cee9fbd8e2e9 ("qede: Add qedr framework") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qede/qede_rdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 2d873ae8a234..668ccc9d49f8 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -105,6 +105,7 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev) qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); + edev->rdma_info.rdma_wq = NULL; } int qede_rdma_dev_add(struct qede_dev *edev, bool recovery) @@ -325,7 +326,7 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (edev->rdma_info.exp_recovery) return; - if (!edev->rdma_info.qedr_dev) + if (!edev->rdma_info.qedr_dev || !edev->rdma_info.rdma_wq) return; /* We don't want the cleanup flow to start while we're allocating and From 32cf4ff4ec523a4551155978498512b186c47943 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:32 +0300 Subject: [PATCH 095/174] net: qed: fix NVMe login fails over VFs [ Upstream commit ccd7c7ce167a21dbf2b698ffcf00f11d96d44f9b ] 25ms sleep cycles in waiting for PF response are excessive and may lead to different timeout failures. Start to wait with short udelays, and in most cases polling will end here. If the time was not sufficient, switch to msleeps. usleep_range() may go far beyond 100us depending on platform and tick configuration, hence atomic udelays for consistency. Also add explicit DMA barriers since 'done' always comes from a shared request-response DMA pool, and note that in the comment nearby. Fixes: 1408cc1fa48c ("qed: Introduce VFs") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 856051f50eb7..adc2c8f3d48e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -81,12 +81,17 @@ static void qed_vf_pf_req_end(struct qed_hwfn *p_hwfn, int req_status) mutex_unlock(&(p_hwfn->vf_iov_info->mutex)); } +#define QED_VF_CHANNEL_USLEEP_ITERATIONS 90 +#define QED_VF_CHANNEL_USLEEP_DELAY 100 +#define QED_VF_CHANNEL_MSLEEP_ITERATIONS 10 +#define QED_VF_CHANNEL_MSLEEP_DELAY 25 + static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size) { union vfpf_tlvs *p_req = p_hwfn->vf_iov_info->vf2pf_request; struct ustorm_trigger_vf_zone trigger; struct ustorm_vf_zone *zone_data; - int rc = 0, time = 100; + int iter, rc = 0; zone_data = (struct ustorm_vf_zone *)PXP_VF_BAR0_START_USDM_ZONE_B; @@ -126,11 +131,19 @@ static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size) REG_WR(p_hwfn, (uintptr_t)&zone_data->trigger, *((u32 *)&trigger)); /* When PF would be done with the response, it would write back to the - * `done' address. Poll until then. + * `done' address from a coherent DMA zone. Poll until then. */ - while ((!*done) && time) { - msleep(25); - time--; + + iter = QED_VF_CHANNEL_USLEEP_ITERATIONS; + while (!*done && iter--) { + udelay(QED_VF_CHANNEL_USLEEP_DELAY); + dma_rmb(); + } + + iter = QED_VF_CHANNEL_MSLEEP_ITERATIONS; + while (!*done && iter--) { + msleep(QED_VF_CHANNEL_MSLEEP_DELAY); + dma_rmb(); } if (!*done) { From 7180c8fc4a03f8120f8ece1c165b797f1f752ec6 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:33 +0300 Subject: [PATCH 096/174] net: qed: fix excessive QM ILT lines consumption [ Upstream commit d434d02f7e7c24c721365fd594ed781acb18e0da ] This is likely a copy'n'paste mistake. The amount of ILT lines to reserve for a single VF was being multiplied by the total VFs count. This led to a huge redundancy in reservation and potential lines drainouts. Fixes: 1408cc1fa48c ("qed: Introduce VFs") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 8e1bdf58b9e7..1d6dfba0c034 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -396,7 +396,7 @@ static void qed_cxt_qm_iids(struct qed_hwfn *p_hwfn, vf_tids += segs[NUM_TASK_PF_SEGMENTS].count; } - iids->vf_cids += vf_cids * p_mngr->vf_count; + iids->vf_cids = vf_cids; iids->tids += vf_tids * p_mngr->vf_count; DP_VERBOSE(p_hwfn, QED_MSG_ILT, From f490e70fa7eb9a237ff112d2ffe100f107e41bb2 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:34 +0300 Subject: [PATCH 097/174] net: qede: fix PTP initialization on recovery [ Upstream commit 1c85f394c2206ea3835f43534d5675f0574e1b70 ] Currently PTP cyclecounter and timecounter are initialized only on the first probing and are cleaned up during removal. This means that PTP becomes non-functional after device recovery. Fix this by unconditional PTP initialization on probing and clearing Tx pending bit on exiting. Fixes: ccc67ef50b90 ("qede: Error recovery process") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 29 ++++++++------------ drivers/net/ethernet/qlogic/qede/qede_ptp.h | 2 +- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 1da6b5bda80a..361a1d759b0b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1158,7 +1158,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, /* PTP not supported on VFs */ if (!is_vf) - qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL)); + qede_ptp_enable(edev); edev->ops->register_ops(cdev, &qede_ll_ops, edev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index f815435cf106..2d3b2fa92df5 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -411,6 +411,7 @@ void qede_ptp_disable(struct qede_dev *edev) if (ptp->tx_skb) { dev_kfree_skb_any(ptp->tx_skb); ptp->tx_skb = NULL; + clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags); } /* Disable PTP in HW */ @@ -422,7 +423,7 @@ void qede_ptp_disable(struct qede_dev *edev) edev->ptp = NULL; } -static int qede_ptp_init(struct qede_dev *edev, bool init_tc) +static int qede_ptp_init(struct qede_dev *edev) { struct qede_ptp *ptp; int rc; @@ -443,25 +444,19 @@ static int qede_ptp_init(struct qede_dev *edev, bool init_tc) /* Init work queue for Tx timestamping */ INIT_WORK(&ptp->work, qede_ptp_task); - /* Init cyclecounter and timecounter. This is done only in the first - * load. If done in every load, PTP application will fail when doing - * unload / load (e.g. MTU change) while it is running. - */ - if (init_tc) { - memset(&ptp->cc, 0, sizeof(ptp->cc)); - ptp->cc.read = qede_ptp_read_cc; - ptp->cc.mask = CYCLECOUNTER_MASK(64); - ptp->cc.shift = 0; - ptp->cc.mult = 1; + /* Init cyclecounter and timecounter */ + memset(&ptp->cc, 0, sizeof(ptp->cc)); + ptp->cc.read = qede_ptp_read_cc; + ptp->cc.mask = CYCLECOUNTER_MASK(64); + ptp->cc.shift = 0; + ptp->cc.mult = 1; - timecounter_init(&ptp->tc, &ptp->cc, - ktime_to_ns(ktime_get_real())); - } + timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); - return rc; + return 0; } -int qede_ptp_enable(struct qede_dev *edev, bool init_tc) +int qede_ptp_enable(struct qede_dev *edev) { struct qede_ptp *ptp; int rc; @@ -482,7 +477,7 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc) edev->ptp = ptp; - rc = qede_ptp_init(edev, init_tc); + rc = qede_ptp_init(edev); if (rc) goto err1; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.h b/drivers/net/ethernet/qlogic/qede/qede_ptp.h index 691a14c4b2c5..89c7f3cf3ee2 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.h +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.h @@ -41,7 +41,7 @@ void qede_ptp_rx_ts(struct qede_dev *edev, struct sk_buff *skb); void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb); int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *req); void qede_ptp_disable(struct qede_dev *edev); -int qede_ptp_enable(struct qede_dev *edev, bool init_tc); +int qede_ptp_enable(struct qede_dev *edev); int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *ts); static inline void qede_ptp_record_rx_ts(struct qede_dev *edev, From 065f225ef975513e5ba0edaaa7c7d0b74bbc0b75 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:35 +0300 Subject: [PATCH 098/174] net: qede: fix use-after-free on recovery and AER handling [ Upstream commit ec6c80590bde6b5dfa4970fffa3572f1acd313ca ] Set edev->cdev pointer to NULL after calling remove() callback to avoid using of already freed object. Fixes: ccc67ef50b90 ("qede: Error recovery process") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qede/qede_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 361a1d759b0b..2c3d654c8454 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1247,6 +1247,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) if (system_state == SYSTEM_POWER_OFF) return; qed_ops->common->remove(cdev); + edev->cdev = NULL; /* Since this can happen out-of-sync with other flows, * don't release the netdevice until after slowpath stop From 61f488765dffabd91087072c21bd424299e17125 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Wed, 24 Jun 2020 01:51:31 +0530 Subject: [PATCH 099/174] cxgb4: move handling L2T ARP failures to caller [ Upstream commit 11d8cd5c9f3b46f397f889cefdb66795518aaebd ] Move code handling L2T ARP failures to the only caller. Fixes following sparse warning: skbuff.h:2091:29: warning: context imbalance in 'handle_failed_resolution' - unexpected unlock Fixes: 749cb5fe48bb ("cxgb4: Replace arpq_head/arpq_tail with SKB double link-list code") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/l2t.c | 52 +++++++++++------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index e6fe2870137b..a440c1cf0b61 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -506,41 +506,20 @@ u64 cxgb4_select_ntuple(struct net_device *dev, } EXPORT_SYMBOL(cxgb4_select_ntuple); -/* - * Called when address resolution fails for an L2T entry to handle packets - * on the arpq head. If a packet specifies a failure handler it is invoked, - * otherwise the packet is sent to the device. - */ -static void handle_failed_resolution(struct adapter *adap, struct l2t_entry *e) -{ - struct sk_buff *skb; - - while ((skb = __skb_dequeue(&e->arpq)) != NULL) { - const struct l2t_skb_cb *cb = L2T_SKB_CB(skb); - - spin_unlock(&e->lock); - if (cb->arp_err_handler) - cb->arp_err_handler(cb->handle, skb); - else - t4_ofld_send(adap, skb); - spin_lock(&e->lock); - } -} - /* * Called when the host's neighbor layer makes a change to some entry that is * loaded into the HW L2 table. */ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) { - struct l2t_entry *e; - struct sk_buff_head *arpq = NULL; - struct l2t_data *d = adap->l2t; unsigned int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *) neigh->primary_key; - int ifidx = neigh->dev->ifindex; - int hash = addr_hash(d, addr, addr_len, ifidx); + int hash, ifidx = neigh->dev->ifindex; + struct sk_buff_head *arpq = NULL; + struct l2t_data *d = adap->l2t; + struct l2t_entry *e; + hash = addr_hash(d, addr, addr_len, ifidx); read_lock_bh(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) if (!addreq(e, addr) && e->ifindex == ifidx) { @@ -573,8 +552,25 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) write_l2e(adap, e, 0); } - if (arpq) - handle_failed_resolution(adap, e); + if (arpq) { + struct sk_buff *skb; + + /* Called when address resolution fails for an L2T + * entry to handle packets on the arpq head. If a + * packet specifies a failure handler it is invoked, + * otherwise the packet is sent to the device. + */ + while ((skb = __skb_dequeue(&e->arpq)) != NULL) { + const struct l2t_skb_cb *cb = L2T_SKB_CB(skb); + + spin_unlock(&e->lock); + if (cb->arp_err_handler) + cb->arp_err_handler(cb->handle, skb); + else + t4_ofld_send(adap, skb); + spin_lock(&e->lock); + } + } spin_unlock_bh(&e->lock); } From 1e812023f47884be63a4db24ff2e25001280d1c1 Mon Sep 17 00:00:00 2001 From: yu kuai Date: Thu, 4 Jun 2020 20:42:06 +0800 Subject: [PATCH 100/174] ARM: imx5: add missing put_device() call in imx_suspend_alloc_ocram() [ Upstream commit 586745f1598ccf71b0a5a6df2222dee0a865954e ] if of_find_device_by_node() succeed, imx_suspend_alloc_ocram() doesn't have a corresponding put_device(). Thus add a jump target to fix the exception handling for this function implementation. Fixes: 1579c7b9fe01 ("ARM: imx53: Set DDR pins to high impedance when in suspend to RAM.") Signed-off-by: yu kuai Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/mach-imx/pm-imx5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-imx/pm-imx5.c b/arch/arm/mach-imx/pm-imx5.c index f057df813f83..e9962b48e30c 100644 --- a/arch/arm/mach-imx/pm-imx5.c +++ b/arch/arm/mach-imx/pm-imx5.c @@ -295,14 +295,14 @@ static int __init imx_suspend_alloc_ocram( if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, size); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } phys = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -312,6 +312,8 @@ static int __init imx_suspend_alloc_ocram( if (virt_out) *virt_out = virt; +put_device: + put_device(&pdev->dev); put_node: of_node_put(node); From a3a86515d017081673dcb1ec42a5f38f5024a6f4 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 23 Jun 2020 10:41:22 +0200 Subject: [PATCH 101/174] scsi: lpfc: Avoid another null dereference in lpfc_sli4_hba_unset() [ Upstream commit 46da547e21d6cefceec3fb3dba5ebbca056627fc ] Commit cdb42becdd40 ("scsi: lpfc: Replace io_channels for nvme and fcp with general hdw_queues per cpu") has introduced static checker warnings for potential null dereferences in 'lpfc_sli4_hba_unset()' and commit 1ffdd2c0440d ("scsi: lpfc: resolve static checker warning in lpfc_sli4_hba_unset") has tried to fix it. However, yet another potential null dereference is remaining. This commit fixes it. This bug was discovered and resolved using Coverity Static Analysis Security Testing (SAST) by Synopsys, Inc. Link: https://lore.kernel.org/r/20200623084122.30633-1-sjpark@amazon.com Fixes: 1ffdd2c0440d ("scsi: lpfc: resolve static checker warning inlpfc_sli4_hba_unset") Fixes: cdb42becdd40 ("scsi: lpfc: Replace io_channels for nvme and fcp with general hdw_queues per cpu") Reviewed-by: James Smart Signed-off-by: SeongJae Park Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 14d9f41977f1..95abffd9ad10 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -11542,7 +11542,8 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) lpfc_sli4_xri_exchange_busy_wait(phba); /* per-phba callback de-registration for hotplug event */ - lpfc_cpuhp_remove(phba); + if (phba->pport) + lpfc_cpuhp_remove(phba); /* Disable PCI subsystem interrupt */ lpfc_sli4_disable_intr(phba); From 56ee0ed97df5cace405e86a9632ad648b5b8f00b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 15 Jun 2020 14:27:19 +0300 Subject: [PATCH 102/174] usb: gadget: udc: Potential Oops in error handling code [ Upstream commit e55f3c37cb8d31c7e301f46396b2ac6a19eb3a7c ] If this is in "transceiver" mode the the ->qwork isn't required and is a NULL pointer. This can lead to a NULL dereference when we call destroy_workqueue(udc->qwork). Fixes: 3517c31a8ece ("usb: gadget: mv_udc: use devm_xxx for probe") Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/mv_udc_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/mv_udc_core.c b/drivers/usb/gadget/udc/mv_udc_core.c index cafde053788b..80a1b52c656e 100644 --- a/drivers/usb/gadget/udc/mv_udc_core.c +++ b/drivers/usb/gadget/udc/mv_udc_core.c @@ -2313,7 +2313,8 @@ static int mv_udc_probe(struct platform_device *pdev) return 0; err_create_workqueue: - destroy_workqueue(udc->qwork); + if (udc->qwork) + destroy_workqueue(udc->qwork); err_destroy_dma: dma_pool_destroy(udc->dtd_pool); err_free_dma: From f9dc5e708ab8a6e0593d9064308939d8a34e3f91 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 18 Jun 2020 21:11:17 +0900 Subject: [PATCH 103/174] usb: renesas_usbhs: getting residue from callback_result [ Upstream commit ea0efd687b01355cd799c8643d0c636ba4859ffc ] This driver assumed that dmaengine_tx_status() could return the residue even if the transfer was completed. However, this was not correct usage [1] and this caused to break getting the residue after the commit 24461d9792c2 ("dmaengine: virt-dma: Fix access after free in vchan_complete()") actually. So, this is possible to get wrong received size if the usb controller gets a short packet. For example, g_zero driver causes "bad OUT byte" errors. The usb-dmac driver will support the callback_result, so this driver can use it to get residue correctly. Note that even if the usb-dmac driver has not supported the callback_result yet, this patch doesn't cause any side-effects. [1] https://lore.kernel.org/dmaengine/20200616165550.GP2324254@vkoul-mobl/ Reported-by: Hien Dang Fixes: 24461d9792c2 ("dmaengine: virt-dma: Fix access after free in vchan_complete()") Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1592482277-19563-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/renesas_usbhs/fifo.c | 23 +++++++++++++---------- drivers/usb/renesas_usbhs/fifo.h | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 86637cd066cf..05cdad13933b 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -803,7 +803,8 @@ static int __usbhsf_dma_map_ctrl(struct usbhs_pkt *pkt, int map) return info->dma_map_ctrl(chan->device->dev, pkt, map); } -static void usbhsf_dma_complete(void *arg); +static void usbhsf_dma_complete(void *arg, + const struct dmaengine_result *result); static void usbhsf_dma_xfer_preparing(struct usbhs_pkt *pkt) { struct usbhs_pipe *pipe = pkt->pipe; @@ -813,6 +814,7 @@ static void usbhsf_dma_xfer_preparing(struct usbhs_pkt *pkt) struct dma_chan *chan; struct device *dev = usbhs_priv_to_dev(priv); enum dma_transfer_direction dir; + dma_cookie_t cookie; fifo = usbhs_pipe_to_fifo(pipe); if (!fifo) @@ -827,11 +829,11 @@ static void usbhsf_dma_xfer_preparing(struct usbhs_pkt *pkt) if (!desc) return; - desc->callback = usbhsf_dma_complete; - desc->callback_param = pipe; + desc->callback_result = usbhsf_dma_complete; + desc->callback_param = pkt; - pkt->cookie = dmaengine_submit(desc); - if (pkt->cookie < 0) { + cookie = dmaengine_submit(desc); + if (cookie < 0) { dev_err(dev, "Failed to submit dma descriptor\n"); return; } @@ -1152,12 +1154,10 @@ static size_t usbhs_dma_calc_received_size(struct usbhs_pkt *pkt, struct dma_chan *chan, int dtln) { struct usbhs_pipe *pipe = pkt->pipe; - struct dma_tx_state state; size_t received_size; int maxp = usbhs_pipe_get_maxpacket(pipe); - dmaengine_tx_status(chan, pkt->cookie, &state); - received_size = pkt->length - state.residue; + received_size = pkt->length - pkt->dma_result->residue; if (dtln) { received_size -= USBHS_USB_DMAC_XFER_SIZE; @@ -1363,13 +1363,16 @@ static int usbhsf_irq_ready(struct usbhs_priv *priv, return 0; } -static void usbhsf_dma_complete(void *arg) +static void usbhsf_dma_complete(void *arg, + const struct dmaengine_result *result) { - struct usbhs_pipe *pipe = arg; + struct usbhs_pkt *pkt = arg; + struct usbhs_pipe *pipe = pkt->pipe; struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe); struct device *dev = usbhs_priv_to_dev(priv); int ret; + pkt->dma_result = result; ret = usbhsf_pkt_handler(pipe, USBHSF_PKT_DMA_DONE); if (ret < 0) dev_err(dev, "dma_complete run_error %d : %d\n", diff --git a/drivers/usb/renesas_usbhs/fifo.h b/drivers/usb/renesas_usbhs/fifo.h index c3d3cc35cee0..4a7dc23ce3d3 100644 --- a/drivers/usb/renesas_usbhs/fifo.h +++ b/drivers/usb/renesas_usbhs/fifo.h @@ -50,7 +50,7 @@ struct usbhs_pkt { struct usbhs_pkt *pkt); struct work_struct work; dma_addr_t dma; - dma_cookie_t cookie; + const struct dmaengine_result *dma_result; void *buf; int length; int trans; From 8d93603bf60c7a6760ec79d24aa3987b22a68411 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 24 Jun 2020 01:53:10 -0700 Subject: [PATCH 104/174] nvme: don't protect ns mutation with ns->head->lock [ Upstream commit e164471dcf19308d154adb69e7760d8ba426a77f ] Right now ns->head->lock is protecting namespace mutation which is wrong and unneeded. Move it to only protect against head mutations. While we're at it, remove unnecessary ns->head reference as we already have head pointer. The problem with this is that the head->lock spans mpath disk node I/O that may block under some conditions (if for example the controller is disconnecting or the path became inaccessible), The locking scheme does not allow any other path to enable itself, preventing blocked I/O to complete and forward-progress from there. This is a preparation patch for the fix in a subsequent patch where the disk I/O will also be done outside the head->lock. Fixes: 0d0b660f214d ("nvme: add ANA support") Signed-off-by: Anton Eidelman Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/multipath.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 56caddeabb5e..772eb05e57af 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -413,11 +413,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; - lockdep_assert_held(&ns->head->lock); - if (!head->disk) return; + mutex_lock(&head->lock); if (!(head->disk->flags & GENHD_FL_UP)) device_add_disk(&head->subsys->dev, head->disk, nvme_ns_id_attr_groups); @@ -430,9 +429,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) __nvme_find_path(head, node); srcu_read_unlock(&head->srcu, srcu_idx); } + mutex_unlock(&head->lock); - synchronize_srcu(&ns->head->srcu); - kblockd_schedule_work(&ns->head->requeue_work); + synchronize_srcu(&head->srcu); + kblockd_schedule_work(&head->requeue_work); } static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, @@ -483,14 +483,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state) static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, struct nvme_ns *ns) { - mutex_lock(&ns->head->lock); ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); if (nvme_state_is_live(ns->ana_state)) nvme_mpath_set_live(ns); - mutex_unlock(&ns->head->lock); } static int nvme_update_ana_state(struct nvme_ctrl *ctrl, @@ -661,10 +659,8 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state); mutex_unlock(&ns->ctrl->ana_lock); } else { - mutex_lock(&ns->head->lock); ns->ana_state = NVME_ANA_OPTIMIZED; nvme_mpath_set_live(ns); - mutex_unlock(&ns->head->lock); } } From ae6d80f62c739d1b0a9a3f3cfd17c45293bb3a33 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 10 Jun 2020 21:51:11 +0100 Subject: [PATCH 105/174] netfilter: ipset: fix unaligned atomic access [ Upstream commit 715028460082d07a7ec6fcd87b14b46784346a72 ] When using ip_set with counters and comment, traffic causes the kernel to panic on 32-bit ARM: Alignment trap: not handling instruction e1b82f9f at [] Unhandled fault: alignment exception (0x221) at 0xea08133c PC is at ip_set_match_extensions+0xe0/0x224 [ip_set] The problem occurs when we try to update the 64-bit counters - the faulting address above is not 64-bit aligned. The problem occurs due to the way elements are allocated, for example: set->dsize = ip_set_elem_len(set, tb, 0, 0); map = ip_set_alloc(sizeof(*map) + elements * set->dsize); If the element has a requirement for a member to be 64-bit aligned, and set->dsize is not a multiple of 8, but is a multiple of four, then every odd numbered elements will be misaligned - and hitting an atomic64_add() on that element will cause the kernel to panic. ip_set_elem_len() must return a size that is rounded to the maximum alignment of any extension field stored in the element. This change ensures that is the case. Fixes: 95ad1f4a9358 ("netfilter: ipset: Fix extension alignment") Signed-off-by: Russell King Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 75da200aa5d8..133a3f1b6f56 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -382,6 +382,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, for (id = 0; id < IPSET_EXT_ID_MAX; id++) { if (!add_extension(id, cadt_flags, tb)) continue; + if (align < ip_set_extensions[id].align) + align = ip_set_extensions[id].align; len = ALIGN(len, ip_set_extensions[id].align); set->offset[id] = len; set->extensions |= ip_set_extensions[id].type; From b78bae5ab399b2e73705dcacc85ef2b613e439b3 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 24 Jun 2020 18:14:55 -0700 Subject: [PATCH 106/174] net: bcmgenet: use hardware padding of runt frames [ Upstream commit 20d1f2d1b024f6be199a3bedf1578a1d21592bc5 ] When commit 474ea9cafc45 ("net: bcmgenet: correctly pad short packets") added the call to skb_padto() it should have been located before the nr_frags parameter was read since that value could be changed when padding packets with lengths between 55 and 59 bytes (inclusive). The use of a stale nr_frags value can cause corruption of the pad data when tx-scatter-gather is enabled. This corruption of the pad can cause invalid checksum computation when hardware offload of tx-checksum is also enabled. Since the original reason for the padding was corrected by commit 7dd399130efb ("net: bcmgenet: fix skb_len in bcmgenet_xmit_single()") we can remove the software padding all together and make use of hardware padding of short frames as long as the hardware also always appends the FCS value to the frame. Fixes: 474ea9cafc45 ("net: bcmgenet: correctly pad short packets") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 3d3b1005d076..03f82786c0b9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1591,11 +1591,6 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } - if (skb_padto(skb, ETH_ZLEN)) { - ret = NETDEV_TX_OK; - goto out; - } - /* Retain how many bytes will be sent on the wire, without TSB inserted * by transmit checksum offload */ @@ -1644,6 +1639,9 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) len_stat = (size << DMA_BUFLENGTH_SHIFT) | (priv->hw_params->qtag_mask << DMA_TX_QTAG_SHIFT); + /* Note: if we ever change from DMA_TX_APPEND_CRC below we + * will need to restore software padding of "runt" packets + */ if (!i) { len_stat |= DMA_TX_APPEND_CRC | DMA_SOP; if (skb->ip_summed == CHECKSUM_PARTIAL) From 0935afc39797b1305ae8fce4161c77bd94f068ff Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 09:24:17 +0800 Subject: [PATCH 107/174] clk: sifive: allocate sufficient memory for struct __prci_data [ Upstream commit d0a5fdf4cc83dabcdea668f971b8a2e916437711 ] The (struct __prci_data).hw_clks.hws is an array with dynamic elements. Using struct_size(pd, hw_clks.hws, ARRAY_SIZE(__prci_init_clocks)) instead of sizeof(*pd) to get the correct memory size of struct __prci_data for sifive/fu540-prci. After applying this modifications, the kernel runs smoothly with CONFIG_SLAB_FREELIST_RANDOM enabled on the HiFive unleashed board. Fixes: 30b8e27e3b58 ("clk: sifive: add a driver for the SiFive FU540 PRCI IP block") Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- drivers/clk/sifive/fu540-prci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/clk/sifive/fu540-prci.c b/drivers/clk/sifive/fu540-prci.c index 6282ee2f361c..a8901f90a61a 100644 --- a/drivers/clk/sifive/fu540-prci.c +++ b/drivers/clk/sifive/fu540-prci.c @@ -586,7 +586,10 @@ static int sifive_fu540_prci_probe(struct platform_device *pdev) struct __prci_data *pd; int r; - pd = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL); + pd = devm_kzalloc(dev, + struct_size(pd, hw_clks.hws, + ARRAY_SIZE(__prci_init_clocks)), + GFP_KERNEL); if (!pd) return -ENOMEM; From 4a6e6413059e8cbacb99ce797ec447612f407e43 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 9 Jun 2020 15:15:54 -0500 Subject: [PATCH 108/174] i2c: fsi: Fix the port number field in status register [ Upstream commit 502035e284cc7e9efef22b01771d822d49698ab9 ] The port number field in the status register was not correct, so fix it. Fixes: d6ffb6300116 ("i2c: Add FSI-attached I2C master algorithm") Signed-off-by: Eddie James Signed-off-by: Joel Stanley Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-fsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c index e0c256922d4f..977d6f524649 100644 --- a/drivers/i2c/busses/i2c-fsi.c +++ b/drivers/i2c/busses/i2c-fsi.c @@ -98,7 +98,7 @@ #define I2C_STAT_DAT_REQ BIT(25) #define I2C_STAT_CMD_COMP BIT(24) #define I2C_STAT_STOP_ERR BIT(23) -#define I2C_STAT_MAX_PORT GENMASK(19, 16) +#define I2C_STAT_MAX_PORT GENMASK(22, 16) #define I2C_STAT_ANY_INT BIT(15) #define I2C_STAT_SCL_IN BIT(11) #define I2C_STAT_SDA_IN BIT(10) From 5cee9e67596f7d694e94d7def7cd9d65a7599f66 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sat, 13 Jun 2020 11:41:09 +0100 Subject: [PATCH 109/174] i2c: core: check returned size of emulated smbus block read [ Upstream commit 40e05200593af06633f64ab0effff052eee6f076 ] If the i2c bus driver ignores the I2C_M_RECV_LEN flag (as some of them do), it is possible for an I2C_SMBUS_BLOCK_DATA read issued on some random device to return an arbitrary value in the first byte (and nothing else). When this happens, i2c_smbus_xfer_emulated() will happily write past the end of the supplied data buffer, thus causing Bad Things to happen. To prevent this, check the size before copying the data block and return an error if it is too large. Fixes: 209d27c3b167 ("i2c: Emulate SMBus block read over I2C") Signed-off-by: Mans Rullgard [wsa: use better errno] Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/i2c-core-smbus.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index 3ac426a8ab5a..c2ae8c8cd429 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c @@ -495,6 +495,13 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, break; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_BLOCK_PROC_CALL: + if (msg[1].buf[0] > I2C_SMBUS_BLOCK_MAX) { + dev_err(&adapter->dev, + "Invalid block size returned: %d\n", + msg[1].buf[0]); + status = -EPROTO; + goto cleanup; + } for (i = 0; i < msg[1].buf[0] + 1; i++) data->block[i] = msg[1].buf[i]; break; From bfd4981fe3d64b49662920925fead26cf37c4834 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Jun 2020 17:00:24 +0100 Subject: [PATCH 110/174] afs: Fix storage of cell names [ Upstream commit 719fdd32921fb7e3208db8832d32ae1c2d68900f ] The cell name stored in the afs_cell struct is a 64-char + NUL buffer - when it needs to be able to handle up to AFS_MAXCELLNAME (256 chars) + NUL. Fix this by changing the array to a pointer and allocating the string. Found using Coverity. Fixes: 989782dcdc91 ("afs: Overhaul cell database management") Reported-by: Colin Ian King Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/cell.c | 9 +++++++++ fs/afs/internal.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 78ba5f932287..296b489861a9 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -154,10 +154,17 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, return ERR_PTR(-ENOMEM); } + cell->name = kmalloc(namelen + 1, GFP_KERNEL); + if (!cell->name) { + kfree(cell); + return ERR_PTR(-ENOMEM); + } + cell->net = net; cell->name_len = namelen; for (i = 0; i < namelen; i++) cell->name[i] = tolower(name[i]); + cell->name[i] = 0; atomic_set(&cell->usage, 2); INIT_WORK(&cell->manager, afs_manage_cell); @@ -203,6 +210,7 @@ parse_failed: if (ret == -EINVAL) printk(KERN_ERR "kAFS: bad VL server IP address\n"); error: + kfree(cell->name); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); @@ -483,6 +491,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); key_put(cell->anonymous_key); + kfree(cell->name); kfree(cell); _leave(" [destroyed]"); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 555ad7c9afcb..7fe88d918b23 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -397,7 +397,7 @@ struct afs_cell { struct afs_vlserver_list __rcu *vl_servers; u8 name_len; /* Length of name */ - char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ + char *name; /* Cell name, case-flattened and NUL-padded */ }; /* From 3dc7138943b1af6e9bf0bcabec623e5cc1041306 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 17 Jun 2020 09:29:19 +0200 Subject: [PATCH 111/174] sched/deadline: Initialize ->dl_boosted [ Upstream commit ce9bc3b27f2a21a7969b41ffb04df8cf61bd1592 ] syzbot reported the following warning triggered via SYSC_sched_setattr(): WARNING: CPU: 0 PID: 6973 at kernel/sched/deadline.c:593 setup_new_dl_entity /kernel/sched/deadline.c:594 [inline] WARNING: CPU: 0 PID: 6973 at kernel/sched/deadline.c:593 enqueue_dl_entity /kernel/sched/deadline.c:1370 [inline] WARNING: CPU: 0 PID: 6973 at kernel/sched/deadline.c:593 enqueue_task_dl+0x1c17/0x2ba0 /kernel/sched/deadline.c:1441 This happens because the ->dl_boosted flag is currently not initialized by __dl_clear_params() (unlike the other flags) and setup_new_dl_entity() rightfully complains about it. Initialize dl_boosted to 0. Fixes: 2d3d891d3344 ("sched/deadline: Add SCHED_DEADLINE inheritance logic") Reported-by: syzbot+5ac8bac25f95e8b221e7@syzkaller.appspotmail.com Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Daniel Wagner Link: https://lkml.kernel.org/r/20200617072919.818409-1-juri.lelli@redhat.com Signed-off-by: Sasha Levin --- kernel/sched/deadline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 08bdee0480b3..4cb00538a207 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2693,6 +2693,7 @@ void __dl_clear_params(struct task_struct *p) dl_se->dl_bw = 0; dl_se->dl_density = 0; + dl_se->dl_boosted = 0; dl_se->dl_throttled = 0; dl_se->dl_yielded = 0; dl_se->dl_non_contending = 0; From 83bdf7f8b7127f3e52960ca1c690a8cd1252b9ca Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 19 Nov 2018 16:32:01 +0100 Subject: [PATCH 112/174] sched/core: Fix PI boosting between RT and DEADLINE tasks [ Upstream commit 740797ce3a124b7dd22b7fb832d87bc8fba1cf6f ] syzbot reported the following warning: WARNING: CPU: 1 PID: 6351 at kernel/sched/deadline.c:628 enqueue_task_dl+0x22da/0x38a0 kernel/sched/deadline.c:1504 At deadline.c:628 we have: 623 static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) 624 { 625 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 626 struct rq *rq = rq_of_dl_rq(dl_rq); 627 628 WARN_ON(dl_se->dl_boosted); 629 WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline)); [...] } Which means that setup_new_dl_entity() has been called on a task currently boosted. This shouldn't happen though, as setup_new_dl_entity() is only called when the 'dynamic' deadline of the new entity is in the past w.r.t. rq_clock and boosted tasks shouldn't verify this condition. Digging through the PI code I noticed that what above might in fact happen if an RT tasks blocks on an rt_mutex hold by a DEADLINE task. In the first branch of boosting conditions we check only if a pi_task 'dynamic' deadline is earlier than mutex holder's and in this case we set mutex holder to be dl_boosted. However, since RT 'dynamic' deadlines are only initialized if such tasks get boosted at some point (or if they become DEADLINE of course), in general RT 'dynamic' deadlines are usually equal to 0 and this verifies the aforementioned condition. Fix it by checking that the potential donor task is actually (even if temporary because in turn boosted) running at DEADLINE priority before using its 'dynamic' deadline value. Fixes: 2d3d891d3344 ("sched/deadline: Add SCHED_DEADLINE inheritance logic") Reported-by: syzbot+119ba87189432ead09b4@syzkaller.appspotmail.com Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Daniel Bristot de Oliveira Tested-by: Daniel Wagner Link: https://lkml.kernel.org/r/20181119153201.GB2119@localhost.localdomain Signed-off-by: Sasha Levin --- kernel/sched/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 361cbc2dc966..7238ef445daf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4447,7 +4447,8 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) */ if (dl_prio(prio)) { if (!dl_prio(p->normal_prio) || - (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { + (pi_task && dl_prio(pi_task->prio) && + dl_entity_preempt(&pi_task->dl, &p->dl))) { p->dl.dl_boosted = 1; queue_flag |= ENQUEUE_REPLENISH; } else From 4dfc238a2441668703081d5ca60d98d531efa5e4 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 4 Jun 2020 22:06:43 -0500 Subject: [PATCH 113/174] sata_rcar: handle pm_runtime_get_sync failure cases [ Upstream commit eea1238867205b9e48a67c1a63219529a73c46fd ] Calling pm_runtime_get_sync increments the counter even in case of failure, causing incorrect ref count. Call pm_runtime_put if pm_runtime_get_sync fails. Signed-off-by: Navid Emamdoost Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/sata_rcar.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 3495e1733a8e..c35b7b993133 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -905,7 +905,7 @@ static int sata_rcar_probe(struct platform_device *pdev) pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); if (ret < 0) - goto err_pm_disable; + goto err_pm_put; host = ata_host_alloc(dev, 1); if (!host) { @@ -935,7 +935,6 @@ static int sata_rcar_probe(struct platform_device *pdev) err_pm_put: pm_runtime_put(dev); -err_pm_disable: pm_runtime_disable(dev); return ret; } @@ -989,8 +988,10 @@ static int sata_rcar_resume(struct device *dev) int ret; ret = pm_runtime_get_sync(dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put(dev); return ret; + } if (priv->type == RCAR_GEN3_SATA) { sata_rcar_init_module(priv); @@ -1015,8 +1016,10 @@ static int sata_rcar_restore(struct device *dev) int ret; ret = pm_runtime_get_sync(dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put(dev); return ret; + } sata_rcar_setup_port(host); From f7f181582f1b863c2f49e78205c65059819948f6 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 5 Jun 2020 09:41:49 +0800 Subject: [PATCH 114/174] ata/libata: Fix usage of page address by page_address in ata_scsi_mode_select_xlat function [ Upstream commit f650ef61e040bcb175dd8762164b00a5d627f20e ] BUG: KASAN: use-after-free in ata_scsi_mode_select_xlat+0x10bd/0x10f0 drivers/ata/libata-scsi.c:4045 Read of size 1 at addr ffff88803b8cd003 by task syz-executor.6/12621 CPU: 1 PID: 12621 Comm: syz-executor.6 Not tainted 4.19.95 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xac/0xee lib/dump_stack.c:118 print_address_description+0x60/0x223 mm/kasan/report.c:253 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report mm/kasan/report.c:409 [inline] kasan_report.cold+0xae/0x2d8 mm/kasan/report.c:393 ata_scsi_mode_select_xlat+0x10bd/0x10f0 drivers/ata/libata-scsi.c:4045 ata_scsi_translate+0x2da/0x680 drivers/ata/libata-scsi.c:2035 __ata_scsi_queuecmd drivers/ata/libata-scsi.c:4360 [inline] ata_scsi_queuecmd+0x2e4/0x790 drivers/ata/libata-scsi.c:4409 scsi_dispatch_cmd+0x2ee/0x6c0 drivers/scsi/scsi_lib.c:1867 scsi_queue_rq+0xfd7/0x1990 drivers/scsi/scsi_lib.c:2170 blk_mq_dispatch_rq_list+0x1e1/0x19a0 block/blk-mq.c:1186 blk_mq_do_dispatch_sched+0x147/0x3d0 block/blk-mq-sched.c:108 blk_mq_sched_dispatch_requests+0x427/0x680 block/blk-mq-sched.c:204 __blk_mq_run_hw_queue+0xbc/0x200 block/blk-mq.c:1308 __blk_mq_delay_run_hw_queue+0x3c0/0x460 block/blk-mq.c:1376 blk_mq_run_hw_queue+0x152/0x310 block/blk-mq.c:1413 blk_mq_sched_insert_request+0x337/0x6c0 block/blk-mq-sched.c:397 blk_execute_rq_nowait+0x124/0x320 block/blk-exec.c:64 blk_execute_rq+0xc5/0x112 block/blk-exec.c:101 sg_scsi_ioctl+0x3b0/0x6a0 block/scsi_ioctl.c:507 sg_ioctl+0xd37/0x23f0 drivers/scsi/sg.c:1106 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:501 [inline] do_vfs_ioctl+0xae6/0x1030 fs/ioctl.c:688 ksys_ioctl+0x76/0xa0 fs/ioctl.c:705 __do_sys_ioctl fs/ioctl.c:712 [inline] __se_sys_ioctl fs/ioctl.c:710 [inline] __x64_sys_ioctl+0x6f/0xb0 fs/ioctl.c:710 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45c479 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fb0e9602c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fb0e96036d4 RCX: 000000000045c479 RDX: 0000000020000040 RSI: 0000000000000001 RDI: 0000000000000003 RBP: 000000000076bfc0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000046d R14: 00000000004c6e1a R15: 000000000076bfcc Allocated by task 12577: set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc mm/kasan/kasan.c:553 [inline] kasan_kmalloc+0xbf/0xe0 mm/kasan/kasan.c:531 __kmalloc+0xf3/0x1e0 mm/slub.c:3749 kmalloc include/linux/slab.h:520 [inline] load_elf_phdrs+0x118/0x1b0 fs/binfmt_elf.c:441 load_elf_binary+0x2de/0x4610 fs/binfmt_elf.c:737 search_binary_handler fs/exec.c:1654 [inline] search_binary_handler+0x15c/0x4e0 fs/exec.c:1632 exec_binprm fs/exec.c:1696 [inline] __do_execve_file.isra.0+0xf52/0x1a90 fs/exec.c:1820 do_execveat_common fs/exec.c:1866 [inline] do_execve fs/exec.c:1883 [inline] __do_sys_execve fs/exec.c:1964 [inline] __se_sys_execve fs/exec.c:1959 [inline] __x64_sys_execve+0x8a/0xb0 fs/exec.c:1959 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 12577: set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x129/0x170 mm/kasan/kasan.c:521 slab_free_hook mm/slub.c:1370 [inline] slab_free_freelist_hook mm/slub.c:1397 [inline] slab_free mm/slub.c:2952 [inline] kfree+0x8b/0x1a0 mm/slub.c:3904 load_elf_binary+0x1be7/0x4610 fs/binfmt_elf.c:1118 search_binary_handler fs/exec.c:1654 [inline] search_binary_handler+0x15c/0x4e0 fs/exec.c:1632 exec_binprm fs/exec.c:1696 [inline] __do_execve_file.isra.0+0xf52/0x1a90 fs/exec.c:1820 do_execveat_common fs/exec.c:1866 [inline] do_execve fs/exec.c:1883 [inline] __do_sys_execve fs/exec.c:1964 [inline] __se_sys_execve fs/exec.c:1959 [inline] __x64_sys_execve+0x8a/0xb0 fs/exec.c:1959 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff88803b8ccf00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 259 bytes inside of 512-byte region [ffff88803b8ccf00, ffff88803b8cd100) The buggy address belongs to the page: page:ffffea0000ee3300 count:1 mapcount:0 mapping:ffff88806cc03080 index:0xffff88803b8cc780 compound_mapcount: 0 flags: 0x100000000008100(slab|head) raw: 0100000000008100 ffffea0001104080 0000000200000002 ffff88806cc03080 raw: ffff88803b8cc780 00000000800c000b 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88803b8ccf00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803b8ccf80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88803b8cd000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88803b8cd080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803b8cd100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc You can refer to "https://www.lkml.org/lkml/2019/1/17/474" reproduce this error. The exception code is "bd_len = p[3];", "p" value is ffff88803b8cd000 which belongs to the cache kmalloc-512 of size 512. The "page_address(sg_page(scsi_sglist(scmd)))" maybe from sg_scsi_ioctl function "buffer" which allocated by kzalloc, so "buffer" may not page aligned. This also looks completely buggy on highmem systems and really needs to use a kmap_atomic. --Christoph Hellwig To address above bugs, Paolo Bonzini advise to simpler to just make a char array of size CACHE_MPAGE_LEN+8+8+4-2(or just 64 to make it easy), use sg_copy_to_buffer to copy from the sglist into the buffer, and workthere. Signed-off-by: Ye Bin Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libata-scsi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 5af34a3201ed..5596c9b6ebf2 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3978,12 +3978,13 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; const u8 *cdb = scmd->cmnd; - const u8 *p; u8 pg, spg; unsigned six_byte, pg_len, hdr_len, bd_len; int len; u16 fp = (u16)-1; u8 bp = 0xff; + u8 buffer[64]; + const u8 *p = buffer; VPRINTK("ENTER\n"); @@ -4017,12 +4018,14 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) if (!scsi_sg_count(scmd) || scsi_sglist(scmd)->length < len) goto invalid_param_len; - p = page_address(sg_page(scsi_sglist(scmd))); - /* Move past header and block descriptors. */ if (len < hdr_len) goto invalid_param_len; + if (!sg_copy_to_buffer(scsi_sglist(scmd), scsi_sg_count(scmd), + buffer, sizeof(buffer))) + goto invalid_param_len; + if (six_byte) bd_len = p[3]; else From 81616542adcc777d82e1679c429c87bb44251dd1 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Fri, 5 Jun 2020 20:37:44 +0300 Subject: [PATCH 115/174] drm/amd/display: Use kfree() to free rgb_user in calculate_user_regamma_ramp() [ Upstream commit 43a562774fceba867e8eebba977d7d42f8a2eac7 ] Use kfree() instead of kvfree() to free rgb_user in calculate_user_regamma_ramp() because the memory is allocated with kcalloc(). Signed-off-by: Denis Efremov Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/modules/color/color_gamma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 207435fa4f2c..51d07a4561ce 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1862,7 +1862,7 @@ bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, kfree(rgb_regamma); rgb_regamma_alloc_fail: - kvfree(rgb_user); + kfree(rgb_user); rgb_user_alloc_fail: return ret; } From b8403f7e45e57ee705bcc2fe99d34ea44703ec3a Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Thu, 11 Jun 2020 18:32:35 +0000 Subject: [PATCH 116/174] riscv/atomic: Fix sign extension for RV64I [ Upstream commit 6c58f25e6938c073198af8b1e1832f83f8f0df33 ] The argument passed to cmpxchg is not guaranteed to be sign extended, but lr.w sign extends on RV64I. This makes cmpxchg fail on clang built kernels when __old is negative. To fix this, we just cast __old to long which sign extends on RV64I. With this fix, clang built RISC-V kernels now boot. Link: https://github.com/ClangBuiltLinux/linux/issues/867 Signed-off-by: Nathan Huckleberry Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/cmpxchg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index d969bab4a26b..262e5bbb2776 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -179,7 +179,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -224,7 +224,7 @@ RISCV_ACQUIRE_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -270,7 +270,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -316,7 +316,7 @@ " fence rw, rw\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ From 07b8b2d463710afa9b9d275177d8801b8de60606 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 28 May 2020 15:21:04 +0800 Subject: [PATCH 117/174] hwrng: ks-sa - Fix runtime PM imbalance on error [ Upstream commit 95459261c99f1621d90bc628c2a48e60b7cf9a88 ] pm_runtime_get_sync() increments the runtime PM usage counter even the call returns an error code. Thus a pairing decrement is needed on the error handling path to keep the counter balanced. Signed-off-by: Dinghao Liu Reviewed-by: Alexander Sverdlin Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/char/hw_random/ks-sa-rng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/hw_random/ks-sa-rng.c b/drivers/char/hw_random/ks-sa-rng.c index a67430010aa6..5c7d3dfcfdd0 100644 --- a/drivers/char/hw_random/ks-sa-rng.c +++ b/drivers/char/hw_random/ks-sa-rng.c @@ -208,6 +208,7 @@ static int ks_sa_rng_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(dev); if (ret < 0) { dev_err(dev, "Failed to enable SA power-domain\n"); + pm_runtime_put_noidle(dev); pm_runtime_disable(dev); return ret; } From 855dbf10c9a7eec8ee37f304a7704c19003c24fc Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 15 Jun 2020 10:29:23 -0500 Subject: [PATCH 118/174] ibmvnic: Harden device login requests [ Upstream commit dff515a3e71dc8ab3b9dcc2e23a9b5fca88b3c18 ] The VNIC driver's "login" command sequence is the final step in the driver's initialization process with device firmware, confirming the available device queue resources to be utilized by the driver. Under high system load, firmware may not respond to the request in a timely manner or may abort the request. In such cases, the driver should reattempt the login command sequence. In case of a device error, the number of retries is bounded. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5a42ddeecfe5..4f503b9a674c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -779,12 +779,13 @@ static int ibmvnic_login(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); unsigned long timeout = msecs_to_jiffies(30000); int retry_count = 0; + int retries = 10; bool retry; int rc; do { retry = false; - if (retry_count > IBMVNIC_MAX_QUEUES) { + if (retry_count > retries) { netdev_warn(netdev, "Login attempts exceeded\n"); return -1; } @@ -799,11 +800,23 @@ static int ibmvnic_login(struct net_device *netdev) if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { - netdev_warn(netdev, "Login timed out\n"); - return -1; + netdev_warn(netdev, "Login timed out, retrying...\n"); + retry = true; + adapter->init_done_rc = 0; + retry_count++; + continue; } - if (adapter->init_done_rc == PARTIALSUCCESS) { + if (adapter->init_done_rc == ABORTED) { + netdev_warn(netdev, "Login aborted, retrying...\n"); + retry = true; + adapter->init_done_rc = 0; + retry_count++; + /* FW or device may be busy, so + * wait a bit before retrying login + */ + msleep(500); + } else if (adapter->init_done_rc == PARTIALSUCCESS) { retry_count++; release_sub_crqs(adapter, 1); From 6fd916e249357a65a4924795613eb1bf69514209 Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Mon, 15 Jun 2020 11:50:29 -0400 Subject: [PATCH 119/174] net: alx: fix race condition in alx_remove [ Upstream commit e89df5c4322c1bf495f62d74745895b5fd2a4393 ] There is a race condition exist during termination. The path is alx_stop and then alx_remove. An alx_schedule_link_check could be called before alx_stop by interrupt handler and invoke alx_link_check later. Alx_stop frees the napis, and alx_remove cancels any pending works. If any of the work is scheduled before termination and invoked before alx_remove, a null-ptr-deref occurs because both expect alx->napis[i]. This patch fix the race condition by moving cancel_work_sync functions before alx_free_napis inside alx_stop. Because interrupt handler can call alx_schedule_link_check again, alx_free_irq is moved before cancel_work_sync calls too. Signed-off-by: Zekun Shen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/atheros/alx/main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index d4bbcdfd691a..aa693c8e285a 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1249,8 +1249,12 @@ out_disable_adv_intr: static void __alx_stop(struct alx_priv *alx) { - alx_halt(alx); alx_free_irq(alx); + + cancel_work_sync(&alx->link_check_wk); + cancel_work_sync(&alx->reset_wk); + + alx_halt(alx); alx_free_rings(alx); alx_free_napis(alx); } @@ -1858,9 +1862,6 @@ static void alx_remove(struct pci_dev *pdev) struct alx_priv *alx = pci_get_drvdata(pdev); struct alx_hw *hw = &alx->hw; - cancel_work_sync(&alx->link_check_wk); - cancel_work_sync(&alx->reset_wk); - /* restore permanent mac address */ alx_set_macaddr(hw, hw->perm_addr); From 8dba9173a37a53197971c39f752d07e6bc1a50d0 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Fri, 12 Jun 2020 15:01:54 -0500 Subject: [PATCH 120/174] test_objagg: Fix potential memory leak in error handling [ Upstream commit a6379f0ad6375a707e915518ecd5c2270afcd395 ] In case of failure of check_expect_hints_stats(), the resources allocated by objagg_hints_get should be freed. The patch fixes this issue. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- lib/test_objagg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/test_objagg.c b/lib/test_objagg.c index 72c1abfa154d..da137939a410 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -979,10 +979,10 @@ err_check_expect_stats2: err_world2_obj_get: for (i--; i >= 0; i--) world_obj_put(&world2, objagg, hints_case->key_ids[i]); - objagg_hints_put(hints); - objagg_destroy(objagg2); i = hints_case->key_ids_count; + objagg_destroy(objagg2); err_check_expect_hints_stats: + objagg_hints_put(hints); err_hints_get: err_check_expect_stats: err_world_obj_get: From 7851257375ae77ab0a2a779f6bed5becd2f0c4d6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 4 Jun 2020 03:28:17 +0300 Subject: [PATCH 121/174] pinctrl: qcom: spmi-gpio: fix warning about irq chip reusage [ Upstream commit 5e50311556c9f409a85740e3cb4c4511e7e27da0 ] Fix the following warnings caused by reusage of the same irq_chip instance for all spmi-gpio gpio_irq_chip instances. Instead embed irq_chip into pmic_gpio_state struct. gpio gpiochip2: (c440000.qcom,spmi:pmic@2:gpio@c000): detected irqchip that is shared with multiple gpiochips: please fix the driver. gpio gpiochip3: (c440000.qcom,spmi:pmic@4:gpio@c000): detected irqchip that is shared with multiple gpiochips: please fix the driver. gpio gpiochip4: (c440000.qcom,spmi:pmic@a:gpio@c000): detected irqchip that is shared with multiple gpiochips: please fix the driver. Signed-off-by: Dmitry Baryshkov Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200604002817.667160-1-dmitry.baryshkov@linaro.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index f1fece5b9c06..3769ad08eadf 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -170,6 +170,7 @@ struct pmic_gpio_state { struct regmap *map; struct pinctrl_dev *ctrl; struct gpio_chip chip; + struct irq_chip irq; }; static const struct pinconf_generic_params pmic_gpio_bindings[] = { @@ -917,16 +918,6 @@ static int pmic_gpio_populate(struct pmic_gpio_state *state, return 0; } -static struct irq_chip pmic_gpio_irq_chip = { - .name = "spmi-gpio", - .irq_ack = irq_chip_ack_parent, - .irq_mask = irq_chip_mask_parent, - .irq_unmask = irq_chip_unmask_parent, - .irq_set_type = irq_chip_set_type_parent, - .irq_set_wake = irq_chip_set_wake_parent, - .flags = IRQCHIP_MASK_ON_SUSPEND, -}; - static int pmic_gpio_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec, unsigned long *hwirq, @@ -1053,8 +1044,16 @@ static int pmic_gpio_probe(struct platform_device *pdev) if (!parent_domain) return -ENXIO; + state->irq.name = "spmi-gpio", + state->irq.irq_ack = irq_chip_ack_parent, + state->irq.irq_mask = irq_chip_mask_parent, + state->irq.irq_unmask = irq_chip_unmask_parent, + state->irq.irq_set_type = irq_chip_set_type_parent, + state->irq.irq_set_wake = irq_chip_set_wake_parent, + state->irq.flags = IRQCHIP_MASK_ON_SUSPEND, + girq = &state->chip.irq; - girq->chip = &pmic_gpio_irq_chip; + girq->chip = &state->irq; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_level_irq; girq->fwnode = of_node_to_fwnode(state->dev->of_node); From 453cfe187723d5e63003d213742df2d6797e937d Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Thu, 4 Jun 2020 23:19:35 +0530 Subject: [PATCH 122/174] pinctrl: tegra: Use noirq suspend/resume callbacks [ Upstream commit 782b6b69847f34dda330530493ea62b7de3fd06a ] Use noirq suspend/resume callbacks as other drivers which implement noirq suspend/resume callbacks (Ex:- PCIe) depend on pinctrl driver to configure the signals used by their respective devices in the noirq phase. Signed-off-by: Vidya Sagar Reviewed-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20200604174935.26560-1-vidyas@nvidia.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/tegra/pinctrl-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/tegra/pinctrl-tegra.c b/drivers/pinctrl/tegra/pinctrl-tegra.c index e9a7cbb9aa33..01bcef2c01bc 100644 --- a/drivers/pinctrl/tegra/pinctrl-tegra.c +++ b/drivers/pinctrl/tegra/pinctrl-tegra.c @@ -685,8 +685,8 @@ static int tegra_pinctrl_resume(struct device *dev) } const struct dev_pm_ops tegra_pinctrl_pm = { - .suspend = &tegra_pinctrl_suspend, - .resume = &tegra_pinctrl_resume + .suspend_noirq = &tegra_pinctrl_suspend, + .resume_noirq = &tegra_pinctrl_resume }; static bool gpio_node_has_range(const char *compatible) From 64f7b10a91a4177e7d620b545a00c7c86d209770 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 6 Mar 2020 13:19:34 +0100 Subject: [PATCH 123/174] s390/ptrace: pass invalid syscall numbers to tracing [ Upstream commit 00332c16b1604242a56289ff2b26e283dbad0812 ] tracing expects to see invalid syscalls, so pass it through. The syscall path in entry.S checks the syscall number before looking up the handler, so it is still safe. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/entry.S | 2 +- arch/s390/kernel/ptrace.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index bc85987727f0..c544b7a11ebb 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -368,9 +368,9 @@ ENTRY(system_call) jnz .Lsysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 + sth %r1,__PT_INT_CODE+2(%r11) cghi %r1,NR_syscalls jnl .Lsysc_nr_ok - sth %r1,__PT_INT_CODE+2(%r11) slag %r8,%r1,3 .Lsysc_nr_ok: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ad71132374f0..5a2b1501d998 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -844,11 +844,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * call number to gprs[2]. */ if (test_thread_flag(TIF_SYSCALL_TRACE) && - (tracehook_report_syscall_entry(regs) || - regs->gprs[2] >= NR_syscalls)) { + tracehook_report_syscall_entry(regs)) { /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip + * Tracing decided this syscall should not happen. Skip * the system call and the system call restart handling. */ clear_pt_regs_flag(regs, PIF_SYSCALL); From 7c17909a889d05abf3401a13823834d076651dd5 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 9 Mar 2020 16:44:50 +0100 Subject: [PATCH 124/174] s390/ptrace: fix setting syscall number [ Upstream commit 873e5a763d604c32988c4a78913a8dab3862d2f9 ] When strace wants to update the syscall number, it sets GPR2 to the desired number and updates the GPR via PTRACE_SETREGSET. It doesn't update regs->int_code which would cause the old syscall executed on syscall restart. As we cannot change the ptrace ABI and don't have a field for the interruption code, check whether the tracee is in a syscall and the last instruction was svc. In that case assume that the tracer wants to update the syscall number and copy the GPR2 value to regs->int_code. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/ptrace.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 5a2b1501d998..5aa786063eb3 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -324,6 +324,25 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } +static void fixup_int_code(struct task_struct *child, addr_t data) +{ + struct pt_regs *regs = task_pt_regs(child); + int ilc = regs->int_code >> 16; + u16 insn; + + if (ilc > 6) + return; + + if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), + &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) + return; + + /* double check that tracee stopped on svc instruction */ + if ((insn >> 8) != 0xa) + return; + + regs->int_code = 0x20000 | (data & 0xffff); +} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -335,7 +354,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) struct user *dummy = NULL; addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ @@ -353,7 +374,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* Invalid addressing mode bits */ return -EINVAL; } - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct user, regs.gprs[2])) + fixup_int_code(child, data); + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* @@ -719,6 +744,10 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct compat_user, regs.gprs[2])) + fixup_int_code(child, data); /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } From 68a3cbc4466058eb61752a494606e2d192dae7cc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 2 Jun 2020 12:25:24 -0700 Subject: [PATCH 125/174] s390/vdso: Use $(LD) instead of $(CC) to link vDSO [ Upstream commit 2b2a25845d534ac6d55086e35c033961fdd83a26 ] Currently, the VDSO is being linked through $(CC). This does not match how the rest of the kernel links objects, which is through the $(LD) variable. When clang is built in a default configuration, it first attempts to use the target triple's default linker, which is just ld. However, the user can override this through the CLANG_DEFAULT_LINKER cmake define so that clang uses another linker by default, such as LLVM's own linker, ld.lld. This can be useful to get more optimized links across various different projects. However, this is problematic for the s390 vDSO because ld.lld does not have any s390 emulatiom support: https://github.com/llvm/llvm-project/blob/llvmorg-10.0.1-rc1/lld/ELF/Driver.cpp#L132-L150 Thus, if a user is using a toolchain with ld.lld as the default, they will see an error, even if they have specified ld.bfd through the LD make variable: $ make -j"$(nproc)" -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- LLVM=1 \ LD=s390x-linux-gnu-ld \ defconfig arch/s390/kernel/vdso64/ ld.lld: error: unknown emulation: elf64_s390 clang-11: error: linker command failed with exit code 1 (use -v to see invocation) Normally, '-fuse-ld=bfd' could be used to get around this; however, this can be fragile, depending on paths and variable naming. The cleaner solution for the kernel is to take advantage of the fact that $(LD) can be invoked directly, which bypasses the heuristics of $(CC) and respects the user's choice. Similar changes have been done for ARM, ARM64, and MIPS. Link: https://lkml.kernel.org/r/20200602192523.32758-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/1041 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers [heiko.carstens@de.ibm.com: add --build-id flag] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/vdso64/Makefile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index bec19e7e6e1c..4a66a1cb919b 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -18,8 +18,8 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - -Wl,--hash-style=both +ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ + --hash-style=both --build-id -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) @@ -37,8 +37,8 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE + $(call if_changed,ld) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -50,8 +50,6 @@ $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< From a9a3b33b20aac7308e72cb886d08128c75689883 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 24 Mar 2020 12:10:27 +0000 Subject: [PATCH 126/174] s390/vdso: fix vDSO clock_getres() [ Upstream commit 478237a595120a18e9b52fd2c57a6e8b7a01e411 ] clock_getres in the vDSO library has to preserve the same behaviour of posix_get_hrtimer_res(). In particular, posix_get_hrtimer_res() does: sec = 0; ns = hrtimer_resolution; and hrtimer_resolution depends on the enablement of the high resolution timers that can happen either at compile or at run time. Fix the s390 vdso implementation of clock_getres keeping a copy of hrtimer_resolution in vdso data and using that directly. Link: https://lkml.kernel.org/r/20200324121027.21665-1-vincenzo.frascino@arm.com Signed-off-by: Vincenzo Frascino Acked-by: Martin Schwidefsky [heiko.carstens@de.ibm.com: use llgf for proper zero extension] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/include/asm/vdso.h | 1 + arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/time.c | 1 + arch/s390/kernel/vdso64/clock_getres.S | 10 +++++----- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 169d7604eb80..f3ba84fa9bd1 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -36,6 +36,7 @@ struct vdso_data { __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ __u32 ts_dir; /* TOD steering direction 0x64 */ __u64 ts_end; /* TOD steering end 0x68 */ + __u32 hrtimer_res; /* hrtimer resolution 0x70 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index b6628586ab70..a65cb4924bdb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -76,6 +76,7 @@ int main(void) OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir); OFFSET(__VDSO_TS_END, vdso_data, ts_end); + OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr); @@ -87,7 +88,6 @@ int main(void) DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e8766beee5ad..8ea9db599d38 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -310,6 +310,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->tk_mult = tk->tkr_mono.mult; vdso_data->tk_shift = tk->tkr_mono.shift; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++vdso_data->tb_update_count; } diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 081435398e0a..0c79caa32b59 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -17,12 +17,14 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: CFI_STARTPROC - larl %r1,4f + larl %r1,3f + lg %r0,0(%r1) cghi %r2,__CLOCK_REALTIME_COARSE je 0f cghi %r2,__CLOCK_MONOTONIC_COARSE je 0f - larl %r1,3f + larl %r1,_vdso_data + llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1) cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -36,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ 1: lghi %r2,0 @@ -45,6 +46,5 @@ __kernel_clock_getres: svc 0 br %r14 CFI_ENDPROC -3: .quad __CLOCK_REALTIME_RES -4: .quad __CLOCK_COARSE_RES +3: .quad __CLOCK_COARSE_RES .size __kernel_clock_getres,.-__kernel_clock_getres From cbed4eb0a92fa990b83f9b106c69e905d5c80ece Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jun 2020 18:29:11 +0100 Subject: [PATCH 127/174] arm64: sve: Fix build failure when ARM64_SVE=y and SYSCTL=n [ Upstream commit e575fb9e76c8e33440fb859572a8b7d430f053d6 ] When I squashed the 'allnoconfig' compiler warning about the set_sve_default_vl() function being defined but not used in commit 1e570f512cbd ("arm64/sve: Eliminate data races on sve_default_vl"), I accidentally broke the build for configs where ARM64_SVE is enabled, but SYSCTL is not. Fix this by only compiling the SVE sysctl support if both CONFIG_SVE=y and CONFIG_SYSCTL=y. Cc: Dave Martin Reported-by: Qian Cai Link: https://lore.kernel.org/r/20200616131808.GA1040@lca.pw Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1765e5284994..04b982a2799e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -338,7 +338,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) return sve_vl_from_vq(__bit_to_vq(bit)); } -#ifdef CONFIG_SYSCTL +#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -384,9 +384,9 @@ static int __init sve_sysctl_init(void) return 0; } -#else /* ! CONFIG_SYSCTL */ +#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ static int __init sve_sysctl_init(void) { return 0; } -#endif /* ! CONFIG_SYSCTL */ +#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) From 803d114e8f19cb5e9e40b5c26d8ea33829916d09 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 14 Jun 2020 23:43:40 +0900 Subject: [PATCH 128/174] kbuild: improve cc-option to clean up all temporary files [ Upstream commit f2f02ebd8f3833626642688b2d2c6a7b3c141fa9 ] When cc-option and friends evaluate compiler flags, the temporary file $$TMP is created as an output object, and automatically cleaned up. The actual file path of $$TMP is ..tmp, here is the process ID of $(shell ...) invoked from cc-option. (Please note $$$$ is the escape sequence of $$). Such garbage files are cleaned up in most cases, but some compiler flags create additional output files. For example, -gsplit-dwarf creates a .dwo file. When CONFIG_DEBUG_INFO_SPLIT=y, you will see a bunch of ..dwo files left in the top of build directories. You may not notice them unless you do 'ls -a', but the garbage files will increase every time you run 'make'. This commit changes the temporary object path to .tmp_/tmp, and removes .tmp_ directory when exiting. Separate build artifacts such as *.dwo will be cleaned up all together because their file paths are usually determined based on the base name of the object. Another example is -ftest-coverage, which outputs the coverage data into .gcno Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/Kbuild.include | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index d1dd4a6b6adb..7da10afc92c6 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -82,20 +82,21 @@ cc-cross-prefix = $(firstword $(foreach c, $(1), \ $(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c)))) # output directory for tests below -TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/) +TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$ # try-run # Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) # Exit code chooses option. "$$TMP" serves as a temporary file and is # automatically cleaned up. try-run = $(shell set -e; \ - TMP="$(TMPOUT).$$$$.tmp"; \ - TMPO="$(TMPOUT).$$$$.o"; \ + TMP=$(TMPOUT)/tmp; \ + TMPO=$(TMPOUT)/tmp.o; \ + mkdir -p $(TMPOUT); \ + trap "rm -rf $(TMPOUT)" EXIT; \ if ($(1)) >/dev/null 2>&1; \ then echo "$(2)"; \ else echo "$(3)"; \ - fi; \ - rm -f "$$TMP" "$$TMPO") + fi) # as-option # Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,) From 8ed391a3dbc497f83a5fc6eabdd86784a3172b3c Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 24 Apr 2020 12:30:46 -0700 Subject: [PATCH 129/174] recordmcount: support >64k sections [ Upstream commit 4ef57b21d6fb49d2b25c47e4cff467a0c2c8b6b7 ] When compiling a kernel with Clang and LTO, we need to run recordmcount on vmlinux.o with a large number of sections, which currently fails as the program doesn't understand extended section indexes. This change adds support for processing binaries with >64k sections. Link: https://lkml.kernel.org/r/20200424193046.160744-1-samitolvanen@google.com Link: https://lore.kernel.org/lkml/CAK7LNARbZhoaA=Nnuw0=gBrkuKbr_4Ng_Ei57uafujZf7Xazgw@mail.gmail.com/ Cc: Kees Cook Reviewed-by: Matt Helsley Signed-off-by: Sami Tolvanen Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- scripts/recordmcount.h | 98 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 6 deletions(-) diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 74eab03e31d4..f9b19524da11 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -29,6 +29,11 @@ #undef has_rel_mcount #undef tot_relsize #undef get_mcountsym +#undef find_symtab +#undef get_shnum +#undef set_shnum +#undef get_shstrndx +#undef get_symindex #undef get_sym_str_and_relp #undef do_func #undef Elf_Addr @@ -58,6 +63,11 @@ # define __has_rel_mcount __has64_rel_mcount # define has_rel_mcount has64_rel_mcount # define tot_relsize tot64_relsize +# define find_symtab find_symtab64 +# define get_shnum get_shnum64 +# define set_shnum set_shnum64 +# define get_shstrndx get_shstrndx64 +# define get_symindex get_symindex64 # define get_sym_str_and_relp get_sym_str_and_relp_64 # define do_func do64 # define get_mcountsym get_mcountsym_64 @@ -91,6 +101,11 @@ # define __has_rel_mcount __has32_rel_mcount # define has_rel_mcount has32_rel_mcount # define tot_relsize tot32_relsize +# define find_symtab find_symtab32 +# define get_shnum get_shnum32 +# define set_shnum set_shnum32 +# define get_shstrndx get_shstrndx32 +# define get_symindex get_symindex32 # define get_sym_str_and_relp get_sym_str_and_relp_32 # define do_func do32 # define get_mcountsym get_mcountsym_32 @@ -173,6 +188,67 @@ static int MIPS_is_fake_mcount(Elf_Rel const *rp) return is_fake; } +static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab, + Elf32_Word const *symtab_shndx) +{ + unsigned long offset; + int index; + + if (sym->st_shndx != SHN_XINDEX) + return w2(sym->st_shndx); + + offset = (unsigned long)sym - (unsigned long)symtab; + index = offset / sizeof(*sym); + + return w(symtab_shndx[index]); +} + +static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0) +{ + if (shdr0 && !ehdr->e_shnum) + return w(shdr0->sh_size); + + return w2(ehdr->e_shnum); +} + +static void set_shnum(Elf_Ehdr *ehdr, Elf_Shdr *shdr0, unsigned int new_shnum) +{ + if (new_shnum >= SHN_LORESERVE) { + ehdr->e_shnum = 0; + shdr0->sh_size = w(new_shnum); + } else + ehdr->e_shnum = w2(new_shnum); +} + +static int get_shstrndx(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0) +{ + if (ehdr->e_shstrndx != SHN_XINDEX) + return w2(ehdr->e_shstrndx); + + return w(shdr0->sh_link); +} + +static void find_symtab(Elf_Ehdr *const ehdr, Elf_Shdr const *shdr0, + unsigned const nhdr, Elf32_Word **symtab, + Elf32_Word **symtab_shndx) +{ + Elf_Shdr const *relhdr; + unsigned k; + + *symtab = NULL; + *symtab_shndx = NULL; + + for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) { + if (relhdr->sh_type == SHT_SYMTAB) + *symtab = (void *)ehdr + relhdr->sh_offset; + else if (relhdr->sh_type == SHT_SYMTAB_SHNDX) + *symtab_shndx = (void *)ehdr + relhdr->sh_offset; + + if (*symtab && *symtab_shndx) + break; + } +} + /* Append the new shstrtab, Elf_Shdr[], __mcount_loc and its relocations. */ static int append_func(Elf_Ehdr *const ehdr, Elf_Shdr *const shstr, @@ -188,10 +264,12 @@ static int append_func(Elf_Ehdr *const ehdr, char const *mc_name = (sizeof(Elf_Rela) == rel_entsize) ? ".rela__mcount_loc" : ".rel__mcount_loc"; - unsigned const old_shnum = w2(ehdr->e_shnum); uint_t const old_shoff = _w(ehdr->e_shoff); uint_t const old_shstr_sh_size = _w(shstr->sh_size); uint_t const old_shstr_sh_offset = _w(shstr->sh_offset); + Elf_Shdr *const shdr0 = (Elf_Shdr *)(old_shoff + (void *)ehdr); + unsigned int const old_shnum = get_shnum(ehdr, shdr0); + unsigned int const new_shnum = 2 + old_shnum; /* {.rel,}__mcount_loc */ uint_t t = 1 + strlen(mc_name) + _w(shstr->sh_size); uint_t new_e_shoff; @@ -201,6 +279,8 @@ static int append_func(Elf_Ehdr *const ehdr, t += (_align & -t); /* word-byte align */ new_e_shoff = t; + set_shnum(ehdr, shdr0, new_shnum); + /* body for new shstrtab */ if (ulseek(sb.st_size, SEEK_SET) < 0) return -1; @@ -255,7 +335,6 @@ static int append_func(Elf_Ehdr *const ehdr, return -1; ehdr->e_shoff = _w(new_e_shoff); - ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum)); /* {.rel,}__mcount_loc */ if (ulseek(0, SEEK_SET) < 0) return -1; if (uwrite(ehdr, sizeof(*ehdr)) < 0) @@ -434,6 +513,8 @@ static int find_secsym_ndx(unsigned const txtndx, uint_t *const recvalp, unsigned int *sym_index, Elf_Shdr const *const symhdr, + Elf32_Word const *symtab, + Elf32_Word const *symtab_shndx, Elf_Ehdr const *const ehdr) { Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symhdr->sh_offset) @@ -445,7 +526,7 @@ static int find_secsym_ndx(unsigned const txtndx, for (symp = sym0, t = nsym; t; --t, ++symp) { unsigned int const st_bind = ELF_ST_BIND(symp->st_info); - if (txtndx == w2(symp->st_shndx) + if (txtndx == get_symindex(symp, symtab, symtab_shndx) /* avoid STB_WEAK */ && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) { /* function symbols on ARM have quirks, avoid them */ @@ -516,21 +597,23 @@ static unsigned tot_relsize(Elf_Shdr const *const shdr0, return totrelsz; } - /* Overall supervision for Elf32 ET_REL file. */ static int do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) { Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) + (void *)ehdr); - unsigned const nhdr = w2(ehdr->e_shnum); - Elf_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)]; + unsigned const nhdr = get_shnum(ehdr, shdr0); + Elf_Shdr *const shstr = &shdr0[get_shstrndx(ehdr, shdr0)]; char const *const shstrtab = (char const *)(_w(shstr->sh_offset) + (void *)ehdr); Elf_Shdr const *relhdr; unsigned k; + Elf32_Word *symtab; + Elf32_Word *symtab_shndx; + /* Upper bound on space: assume all relevant relocs are for mcount. */ unsigned totrelsz; @@ -561,6 +644,8 @@ static int do_func(Elf_Ehdr *const ehdr, char const *const fname, return -1; } + find_symtab(ehdr, shdr0, nhdr, &symtab, &symtab_shndx); + for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) { char const *const txtname = has_rel_mcount(relhdr, shdr0, shstrtab, fname); @@ -577,6 +662,7 @@ static int do_func(Elf_Ehdr *const ehdr, char const *const fname, result = find_secsym_ndx(w(relhdr->sh_info), txtname, &recval, &recsym, &shdr0[symsec_sh_link], + symtab, symtab_shndx, ehdr); if (result) goto out; From d14eb5d8f0f4a2783e2125e0ba8a6732b345cc35 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 12 May 2020 17:02:33 +0900 Subject: [PATCH 130/174] kprobes: Suppress the suspicious RCU warning on kprobes [ Upstream commit 6743ad432ec92e680cd0d9db86cb17b949cf5a43 ] Anders reported that the lockdep warns that suspicious RCU list usage in register_kprobe() (detected by CONFIG_PROVE_RCU_LIST.) This is because get_kprobe() access kprobe_table[] by hlist_for_each_entry_rcu() without rcu_read_lock. If we call get_kprobe() from the breakpoint handler context, it is run with preempt disabled, so this is not a problem. But in other cases, instead of rcu_read_lock(), we locks kprobe_mutex so that the kprobe_table[] is not updated. So, current code is safe, but still not good from the view point of RCU. Joel suggested that we can silent that warning by passing lockdep_is_held() to the last argument of hlist_for_each_entry_rcu(). Add lockdep_is_held(&kprobe_mutex) at the end of the hlist_for_each_entry_rcu() to suppress the warning. Link: http://lkml.kernel.org/r/158927055350.27680.10261450713467997503.stgit@devnote2 Reported-by: Anders Roxell Suggested-by: Joel Fernandes Reviewed-by: Joel Fernandes (Google) Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/kprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 195ecb955fcc..950a5cfd262c 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -326,7 +326,8 @@ struct kprobe *get_kprobe(void *addr) struct kprobe *p; head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; - hlist_for_each_entry_rcu(p, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist, + lockdep_is_held(&kprobe_mutex)) { if (p->addr == addr) return p; } From 72647ea37700b7baefbc4df15371e3e75cb8c301 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 5 Jun 2020 16:58:36 +0200 Subject: [PATCH 131/174] blktrace: break out of blktrace setup on concurrent calls [ Upstream commit 1b0b283648163dae2a214ca28ed5a99f62a77319 ] We use one blktrace per request_queue, that means one per the entire disk. So we cannot run one blktrace on say /dev/vda and then /dev/vda1, or just two calls on /dev/vda. We check for concurrent setup only at the very end of the blktrace setup though. If we try to run two concurrent blktraces on the same block device the second one will fail, and the first one seems to go on. However when one tries to kill the first one one will see things like this: The kernel will show these: ``` debugfs: File 'dropped' in directory 'nvme1n1' already present! debugfs: File 'msg' in directory 'nvme1n1' already present! debugfs: File 'trace0' in directory 'nvme1n1' already present! `` And userspace just sees this error message for the second call: ``` blktrace /dev/nvme1n1 BLKTRACESETUP(2) /dev/nvme1n1 failed: 5/Input/output error ``` The first userspace process #1 will also claim that the files were taken underneath their nose as well. The files are taken away form the first process given that when the second blktrace fails, it will follow up with a BLKTRACESTOP and BLKTRACETEARDOWN. This means that even if go-happy process #1 is waiting for blktrace data, we *have* been asked to take teardown the blktrace. This can easily be reproduced with break-blktrace [0] run_0005.sh test. Just break out early if we know we're already going to fail, this will prevent trying to create the files all over again, which we know still exist. [0] https://github.com/mcgrof/break-blktrace Signed-off-by: Luis Chamberlain Signed-off-by: Jan Kara Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- kernel/trace/blktrace.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index a677aa84ccb6..eaee960153e1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -3,6 +3,9 @@ * Copyright (C) 2006 Jens Axboe * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -495,6 +498,16 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, */ strreplace(buts->name, '/', '_'); + /* + * bdev can be NULL, as with scsi-generic, this is a helpful as + * we can be. + */ + if (q->blk_trace) { + pr_warn("Concurrent blktraces are not allowed on %s\n", + buts->name); + return -EBUSY; + } + bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) return -ENOMEM; From 26b0956cb374171832b730996a59411649d7047d Mon Sep 17 00:00:00 2001 From: Weiping Zhang Date: Wed, 17 Jun 2020 14:18:37 +0800 Subject: [PATCH 132/174] block: update hctx map when use multiple maps [ Upstream commit fe35ec58f0d339221643287bbb7cee15c93a5389 ] There is an issue when tune the number for read and write queues, if the total queue count was not changed. The hctx->type cannot be updated, since __blk_mq_update_nr_hw_queues will return directly if the total queue count has not been changed. Reproduce: dmesg | grep "default/read/poll" [ 2.607459] nvme nvme0: 48/0/0 default/read/poll queues cat /sys/kernel/debug/block/nvme0n1/hctx*/type | sort | uniq -c 48 default tune the write queues to 24: echo 24 > /sys/module/nvme/parameters/write_queues echo 1 > /sys/block/nvme0n1/device/reset_controller dmesg | grep "default/read/poll" [ 433.547235] nvme nvme0: 24/24/0 default/read/poll queues cat /sys/kernel/debug/block/nvme0n1/hctx*/type | sort | uniq -c 48 default The driver's hardware queue mapping is not same as block layer. Signed-off-by: Weiping Zhang Reviewed-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-mq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 0550366e25d8..f1b930a300a3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3279,7 +3279,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids) nr_hw_queues = nr_cpu_ids; - if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues) + if (nr_hw_queues < 1) + return; + if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) return; list_for_each_entry(q, &set->tag_list, tag_set_list) From f06a6294e113e7f530c0942a886a10243a762b77 Mon Sep 17 00:00:00 2001 From: Yash Shah Date: Tue, 16 Jun 2020 19:33:06 +0530 Subject: [PATCH 133/174] RISC-V: Don't allow write+exec only page mapping request in mmap [ Upstream commit e0d17c842c0f824fd4df9f4688709fc6907201e1 ] As per the table 4.4 of version "20190608-Priv-MSU-Ratified" of the RISC-V instruction set manual[0], the PTE permission bit combination of "write+exec only" is reserved for future use. Hence, don't allow such mapping request in mmap call. An issue is been reported by David Abdurachmanov, that while running stress-ng with "sysbadaddr" argument, RCU stalls are observed on RISC-V specific kernel. This issue arises when the stress-sysbadaddr request for pages with "write+exec only" permission bits and then passes the address obtain from this mmap call to various system call. For the riscv kernel, the mmap call should fail for this particular combination of permission bits since it's not valid. [0]: http://dabbelt.com/~palmer/keep/riscv-isa-manual/riscv-privileged-20190608-1.pdf Signed-off-by: Yash Shah Reported-by: David Abdurachmanov [Palmer: Refer to the latest ISA specification at the only link I could find, and update the terminology.] Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/sys_riscv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f3619f59d85c..12f8a7fce78b 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -8,6 +8,7 @@ #include #include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -16,6 +17,11 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, { if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; + + if ((prot & PROT_WRITE) && (prot & PROT_EXEC)) + if (unlikely(!(prot & PROT_READ))) + return -EINVAL; + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } From 0211e0d7f185b4ff51960196b035654f1a2c068a Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Thu, 11 Jun 2020 11:08:45 -0700 Subject: [PATCH 134/174] ALSA: hda: Add NVIDIA codec IDs 9a & 9d through a0 to patch table commit adb36a8203831e40494a92095dacd566b2ad4a69 upstream. These IDs are for upcoming NVIDIA chips with audio functions that are largely similar to the existing ones. Signed-off-by: Aaron Plattner Cc: Link: https://lore.kernel.org/r/20200611180845.39942-1-aplattner@nvidia.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index d41c91468ab3..e78c4367b6c8 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4146,6 +4146,11 @@ HDA_CODEC_ENTRY(0x10de0095, "GPU 95 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0097, "GPU 97 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0098, "GPU 98 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0099, "GPU 99 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009a, "GPU 9a HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009d, "GPU 9d HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009e, "GPU 9e HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009f, "GPU 9f HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a0, "GPU a0 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP", patch_via_hdmi), From 1a1bc3ae6935bfda9bf2c37a71f0712225b89b3f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Jun 2020 15:21:50 +0200 Subject: [PATCH 135/174] ALSA: hda/realtek - Add quirk for MSI GE63 laptop commit a0b03952a797591d4b6d6fa7b9b7872e27783729 upstream. MSI GE63 laptop with ALC1220 codec requires the very same quirk (ALC1220_FIXUP_CLEVO_P950) as other MSI devices for the proper sound output. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208057 Cc: Link: https://lore.kernel.org/r/20200616132150.8778-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 459a7d61326e..45315b14abe6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2460,6 +2460,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1275, "MSI-GL63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), From 6ee4d61403d25e2f34f9428acfa99537a6b45639 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 17 Jun 2020 18:29:02 +0800 Subject: [PATCH 136/174] ALSA: hda/realtek: Add mute LED and micmute LED support for HP systems commit b2c22910fe5aae10b7e17b0721e63a3edf0c9553 upstream. There are two more HP systems control mute LED from HDA codec and need to expose micmute led class so SoF can control micmute LED. Add quirks to support them. Signed-off-by: Kai-Heng Feng Cc: Link: https://lore.kernel.org/r/20200617102906.16156-2-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 45315b14abe6..34868459104d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7436,6 +7436,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), From 3c4f9a5541bb481b11904fc4c9b1c755fc3cd8f9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 11 Jun 2020 21:51:50 -0700 Subject: [PATCH 137/174] ACPI: sysfs: Fix pm_profile_attr type commit e6d701dca9893990d999fd145e3e07223c002b06 upstream. When running a kernel with Clang's Control Flow Integrity implemented, there is a violation that happens when accessing /sys/firmware/acpi/pm_profile: $ cat /sys/firmware/acpi/pm_profile 0 $ dmesg ... [ 17.352564] ------------[ cut here ]------------ [ 17.352568] CFI failure (target: acpi_show_profile+0x0/0x8): [ 17.352572] WARNING: CPU: 3 PID: 497 at kernel/cfi.c:29 __cfi_check_fail+0x33/0x40 [ 17.352573] Modules linked in: [ 17.352575] CPU: 3 PID: 497 Comm: cat Tainted: G W 5.7.0-microsoft-standard+ #1 [ 17.352576] RIP: 0010:__cfi_check_fail+0x33/0x40 [ 17.352577] Code: 48 c7 c7 50 b3 85 84 48 c7 c6 50 0a 4e 84 e8 a4 d8 60 00 85 c0 75 02 5b c3 48 c7 c7 dc 5e 49 84 48 89 de 31 c0 e8 7d 06 eb ff <0f> 0b 5b c3 00 00 cc cc 00 00 cc cc 00 85 f6 74 25 41 b9 ea ff ff [ 17.352577] RSP: 0018:ffffaa6dc3c53d30 EFLAGS: 00010246 [ 17.352578] RAX: 331267e0c06cee00 RBX: ffffffff83d85890 RCX: ffffffff8483a6f8 [ 17.352579] RDX: ffff9cceabbb37c0 RSI: 0000000000000082 RDI: ffffffff84bb9e1c [ 17.352579] RBP: ffffffff845b2bc8 R08: 0000000000000001 R09: ffff9cceabbba200 [ 17.352579] R10: 000000000000019d R11: 0000000000000000 R12: ffff9cc947766f00 [ 17.352580] R13: ffffffff83d6bd50 R14: ffff9ccc6fa80000 R15: ffffffff845bd328 [ 17.352582] FS: 00007fdbc8d13580(0000) GS:ffff9cce91ac0000(0000) knlGS:0000000000000000 [ 17.352582] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 17.352583] CR2: 00007fdbc858e000 CR3: 00000005174d0000 CR4: 0000000000340ea0 [ 17.352584] Call Trace: [ 17.352586] ? rev_id_show+0x8/0x8 [ 17.352587] ? __cfi_check+0x45bac/0x4b640 [ 17.352589] ? kobj_attr_show+0x73/0x80 [ 17.352590] ? sysfs_kf_seq_show+0xc1/0x140 [ 17.352592] ? ext4_seq_options_show.cfi_jt+0x8/0x8 [ 17.352593] ? seq_read+0x180/0x600 [ 17.352595] ? sysfs_create_file_ns.cfi_jt+0x10/0x10 [ 17.352596] ? tlbflush_read_file+0x8/0x8 [ 17.352597] ? __vfs_read+0x6b/0x220 [ 17.352598] ? handle_mm_fault+0xa23/0x11b0 [ 17.352599] ? vfs_read+0xa2/0x130 [ 17.352599] ? ksys_read+0x6a/0xd0 [ 17.352601] ? __do_sys_getpgrp+0x8/0x8 [ 17.352602] ? do_syscall_64+0x72/0x120 [ 17.352603] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 17.352604] ---[ end trace 7b1fa81dc897e419 ]--- When /sys/firmware/acpi/pm_profile is read, sysfs_kf_seq_show is called, which in turn calls kobj_attr_show, which gets the ->show callback member by calling container_of on attr (casting it to struct kobj_attribute) then calls it. There is a CFI violation because pm_profile_attr is of type struct device_attribute but kobj_attr_show calls ->show expecting it to be from struct kobj_attribute. CFI checking ensures that function pointer types match when doing indirect calls. Fix pm_profile_attr to be defined in terms of kobj_attribute so there is no violation or mismatch. Fixes: 362b646062b2 ("ACPI: Export FADT pm_profile integer value to userspace") Link: https://github.com/ClangBuiltLinux/linux/issues/1051 Reported-by: yuu ichii Signed-off-by: Nathan Chancellor Cc: 3.10+ # 3.10+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 3a89909b50a6..76c668c05fa0 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -938,13 +938,13 @@ static void __exit interrupt_stats_exit(void) } static ssize_t -acpi_show_profile(struct device *dev, struct device_attribute *attr, +acpi_show_profile(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", acpi_gbl_FADT.preferred_profile); } -static const struct device_attribute pm_profile_attr = +static const struct kobj_attribute pm_profile_attr = __ATTR(pm_profile, S_IRUGO, acpi_show_profile, NULL); static ssize_t hotplug_enabled_show(struct kobject *kobj, From 824d0b6225f3fa2992704478a8df520537cfcb56 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 15 Jun 2020 04:43:32 -0600 Subject: [PATCH 138/174] ACPI: configfs: Disallow loading ACPI tables when locked down commit 75b0cea7bf307f362057cc778efe89af4c615354 upstream. Like other vectors already patched, this one here allows the root user to load ACPI tables, which enables arbitrary physical address writes, which in turn makes it possible to disable lockdown. Prevents this by checking the lockdown status before allowing a new ACPI table to be installed. The link in the trailer shows a PoC of how this might be used. Link: https://git.zx2c4.com/american-unsigned-language/tree/american-unsigned-language-2.sh Cc: 5.4+ # 5.4+ Signed-off-by: Jason A. Donenfeld Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_configfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c index 57d9d574d4dd..01738d8e888e 100644 --- a/drivers/acpi/acpi_configfs.c +++ b/drivers/acpi/acpi_configfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "acpica/accommon.h" #include "acpica/actables.h" @@ -28,7 +29,10 @@ static ssize_t acpi_table_aml_write(struct config_item *cfg, { const struct acpi_table_header *header = data; struct acpi_table *table; - int ret; + int ret = security_locked_down(LOCKDOWN_ACPI_TABLES); + + if (ret) + return ret; table = container_of(cfg, struct acpi_table, cfg); From 0236040fcf97d7d9e807d61e08f745678885271d Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 19 Jun 2020 07:43:49 +0800 Subject: [PATCH 139/174] erofs: fix partially uninitialized misuse in z_erofs_onlinepage_fixup commit 3c597282887fd55181578996dca52ce697d985a5 upstream. Hongyu reported "id != index" in z_erofs_onlinepage_fixup() with specific aarch64 environment easily, which wasn't shown before. After digging into that, I found that high 32 bits of page->private was set to 0xaaaaaaaa rather than 0 (due to z_erofs_onlinepage_init behavior with specific compiler options). Actually we only use low 32 bits to keep the page information since page->private is only 4 bytes on most 32-bit platforms. However z_erofs_onlinepage_fixup() uses the upper 32 bits by mistake. Let's fix it now. Reported-and-tested-by: Hongyu Jin Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Cc: # 4.19+ Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20200618234349.22553-1-hsiangkao@aol.com Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- fs/erofs/zdata.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index faf950189bd7..568d5a493876 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -148,22 +148,22 @@ static inline void z_erofs_onlinepage_init(struct page *page) static inline void z_erofs_onlinepage_fixup(struct page *page, uintptr_t index, bool down) { - unsigned long *p, o, v, id; -repeat: - p = &page_private(page); - o = READ_ONCE(*p); + union z_erofs_onlinepage_converter u = { .v = &page_private(page) }; + int orig, orig_index, val; - id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; - if (id) { +repeat: + orig = atomic_read(u.o); + orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; + if (orig_index) { if (!index) return; - DBG_BUGON(id != index); + DBG_BUGON(orig_index != index); } - v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | - ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); - if (cmpxchg(p, o, v) != o) + val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | + ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); + if (atomic_cmpxchg(u.o, orig, val) != orig) goto repeat; } From 5774f9fa563bb51294dd3262b4fac1f4dc6f0c3c Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 16 Jun 2020 15:33:07 +0800 Subject: [PATCH 140/174] KVM: X86: Fix MSR range of APIC registers in X2APIC mode commit bf10bd0be53282183f374af23577b18b5fbf7801 upstream. Only MSR address range 0x800 through 0x8ff is architecturally reserved and dedicated for accessing APIC registers in x2APIC mode. Fixes: 0105d1a52640 ("KVM: x2apic interface to lapic") Signed-off-by: Xiaoyao Li Message-Id: <20200616073307.16440-1-xiaoyao.li@intel.com> Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fff279fb173b..eed1866ae4d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2753,7 +2753,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_mtrr_set_msr(vcpu, msr, data); case MSR_IA32_APICBASE: return kvm_set_apic_base(vcpu, msr_info); - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_write(vcpu, msr, data); case MSR_IA32_TSCDEADLINE: kvm_set_lapic_tscdeadline_msr(vcpu, data); @@ -3057,7 +3057,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_APICBASE: msr_info->data = kvm_get_apic_base(vcpu); break; - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); break; case MSR_IA32_TSCDEADLINE: From 8ccc6ac51eac8d0735328a54a7bf44ec6d82a18c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jun 2020 14:58:29 -0700 Subject: [PATCH 141/174] KVM: nVMX: Plumb L2 GPA through to PML emulation commit 2dbebf7ae1ed9a420d954305e2c9d5ed39ec57c3 upstream. Explicitly pass the L2 GPA to kvm_arch_write_log_dirty(), which for all intents and purposes is vmx_write_pml_buffer(), instead of having the latter pull the GPA from vmcs.GUEST_PHYSICAL_ADDRESS. If the dirty bit update is the result of KVM emulation (rare for L2), then the GPA in the VMCS may be stale and/or hold a completely unrelated GPA. Fixes: c5f983f6e8455 ("nVMX: Implement emulated Page Modification Logging") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200622215832.22090-2-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/paging_tmpl.h | 7 ++++--- arch/x86/kvm/vmx/vmx.c | 6 +++--- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7d91a3f5b26a..742de9d97ba1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1160,7 +1160,7 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); - int (*write_log_dirty)(struct kvm_vcpu *vcpu); + int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a3824ae9a634..aab02ea2d2cb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1819,10 +1819,10 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, * Emulate arch specific page modification logging for the * nested hypervisor */ -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu) +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa) { if (kvm_x86_ops->write_log_dirty) - return kvm_x86_ops->write_log_dirty(vcpu); + return kvm_x86_ops->write_log_dirty(vcpu, l2_gpa); return 0; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index d55674f44a18..6f2208cf30df 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -209,7 +209,7 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa); int kvm_mmu_post_init_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4e3f137ffa8c..a20fc1ba607f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -220,7 +220,7 @@ static inline unsigned FNAME(gpte_access)(u64 gpte) static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct guest_walker *walker, - int write_fault) + gpa_t addr, int write_fault) { unsigned level, index; pt_element_t pte, orig_pte; @@ -245,7 +245,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, !(pte & PT_GUEST_DIRTY_MASK)) { trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); #if PTTYPE == PTTYPE_EPT - if (kvm_arch_write_log_dirty(vcpu)) + if (kvm_arch_write_log_dirty(vcpu, addr)) return -EINVAL; #endif pte |= PT_GUEST_DIRTY_MASK; @@ -442,7 +442,8 @@ retry_walk: (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); if (unlikely(!accessed_dirty)) { - ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); + ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, + addr, write_fault); if (unlikely(ret < 0)) goto error; else if (ret) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5fac01865a2d..2e6d400ce0a0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7272,11 +7272,11 @@ static void vmx_flush_log_dirty(struct kvm *kvm) kvm_flush_pml_buffers(kvm); } -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) { struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); - gpa_t gpa, dst; + gpa_t dst; if (is_guest_mode(vcpu)) { WARN_ON_ONCE(vmx->nested.pml_full); @@ -7295,7 +7295,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) return 1; } - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; + gpa &= ~0xFFFull; dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, From 96a80133559f1585814fccf110c6ca17ef17b9d7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jun 2020 17:51:35 -0700 Subject: [PATCH 142/174] KVM: VMX: Stop context switching MSR_IA32_UMWAIT_CONTROL commit bf09fb6cba4f7099620cc9ed32d94c27c4af992e upstream. Remove support for context switching between the guest's and host's desired UMWAIT_CONTROL. Propagating the guest's value to hardware isn't required for correct functionality, e.g. KVM intercepts reads and writes to the MSR, and the latency effects of the settings controlled by the MSR are not architecturally visible. As a general rule, KVM should not allow the guest to control power management settings unless explicitly enabled by userspace, e.g. see KVM_CAP_X86_DISABLE_EXITS. E.g. Intel's SDM explicitly states that C0.2 can improve the performance of SMT siblings. A devious guest could disable C0.2 so as to improve the performance of their workloads at the detriment to workloads running in the host or on other VMs. Wholesale removal of UMWAIT_CONTROL context switching also fixes a race condition where updates from the host may cause KVM to enter the guest with the incorrect value. Because updates are are propagated to all CPUs via IPI (SMP function callback), the value in hardware may be stale with respect to the cached value and KVM could enter the guest with the wrong value in hardware. As above, the guest can't observe the bad value, but it's a weird and confusing wart in the implementation. Removal also fixes the unnecessary usage of VMX's atomic load/store MSR lists. Using the lists is only necessary for MSRs that are required for correct functionality immediately upon VM-Enter/VM-Exit, e.g. EFER on old hardware, or for MSRs that need to-the-uop precision, e.g. perf related MSRs. For UMWAIT_CONTROL, the effects are only visible in the kernel via TPAUSE/delay(), and KVM doesn't do any form of delay in vcpu_vmx_run(). Using the atomic lists is undesirable as they are more expensive than direct RDMSR/WRMSR. Furthermore, even if giving the guest control of the MSR is legitimate, e.g. in pass-through scenarios, it's not clear that the benefits would outweigh the overhead. E.g. saving and restoring an MSR across a VMX roundtrip costs ~250 cycles, and if the guest diverged from the host that cost would be paid on every run of the guest. In other words, if there is a legitimate use case then it should be enabled by a new per-VM capability. Note, KVM still needs to emulate MSR_IA32_UMWAIT_CONTROL so that it can correctly expose other WAITPKG features to the guest, e.g. TPAUSE, UMWAIT and UMONITOR. Fixes: 6e3ba4abcea56 ("KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL") Cc: stable@vger.kernel.org Cc: Jingqi Liu Cc: Tao Xu Signed-off-by: Sean Christopherson Message-Id: <20200623005135.10414-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/umwait.c | 6 ------ arch/x86/kvm/vmx/vmx.c | 18 ------------------ arch/x86/kvm/vmx/vmx.h | 2 -- 3 files changed, 26 deletions(-) diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index c222f283b456..32b4dc9030aa 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -17,12 +17,6 @@ */ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); -u32 get_umwait_control_msr(void) -{ - return umwait_control_cached; -} -EXPORT_SYMBOL_GPL(get_umwait_control_msr); - /* * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2e6d400ce0a0..10e6471896cd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6427,23 +6427,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } -static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) -{ - u32 host_umwait_control; - - if (!vmx_has_waitpkg(vmx)) - return; - - host_umwait_control = get_umwait_control_msr(); - - if (vmx->msr_ia32_umwait_control != host_umwait_control) - add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, - vmx->msr_ia32_umwait_control, - host_umwait_control, false); - else - clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); -} - static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6533,7 +6516,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); - atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) vmx_update_hv_timer(vcpu); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 295c5f83842e..a1919ec7fd10 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -14,8 +14,6 @@ extern const u32 vmx_msr_index[]; extern u64 host_efer; -extern u32 get_umwait_control_msr(void); - #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 From 3ceaf206b706c1a81ac48d76ced9028e460b27b0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 Jun 2020 20:15:09 -0700 Subject: [PATCH 143/174] x86/cpu: Use pinning mask for CR4 bits needing to be 0 commit a13b9d0b97211579ea63b96c606de79b963c0f47 upstream. The X86_CR4_FSGSBASE bit of CR4 should not change after boot[1]. Older kernels should enforce this bit to zero, and newer kernels need to enforce it depending on boot-time configuration (e.g. "nofsgsbase"). To support a pinned bit being either 1 or 0, use an explicit mask in combination with the expected pinned bit values. [1] https://lore.kernel.org/lkml/20200527103147.GI325280@hirez.programming.kicks-ass.net Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/202006082013.71E29A42@keescook Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 650df6d21049..9b3f25e14608 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -366,6 +366,9 @@ out: cr4_clear_bits(X86_CR4_UMIP); } +/* These bits should not change their value after CPU init is finished. */ +static const unsigned long cr4_pinned_mask = + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; @@ -390,20 +393,20 @@ EXPORT_SYMBOL(native_write_cr0); void native_write_cr4(unsigned long val) { - unsigned long bits_missing = 0; + unsigned long bits_changed = 0; set_register: asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); if (static_branch_likely(&cr_pinning)) { - if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) { - bits_missing = ~val & cr4_pinned_bits; - val |= bits_missing; + if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { + bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits; + val = (val & ~cr4_pinned_mask) | cr4_pinned_bits; goto set_register; } - /* Warn after we've set the missing bits. */ - WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n", - bits_missing); + /* Warn after we've corrected the changed bits. */ + WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n", + bits_changed); } } EXPORT_SYMBOL(native_write_cr4); @@ -415,7 +418,7 @@ void cr4_init(void) if (boot_cpu_has(X86_FEATURE_PCID)) cr4 |= X86_CR4_PCIDE; if (static_branch_likely(&cr_pinning)) - cr4 |= cr4_pinned_bits; + cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits; __write_cr4(cr4); @@ -430,10 +433,7 @@ void cr4_init(void) */ static void __init setup_cr_pinning(void) { - unsigned long mask; - - mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP); - cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask; + cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask; static_key_enable(&cr_pinning.key); } From df13086490db6155f43986dda4e006c5644cfe22 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 18 Jun 2020 11:20:02 +0100 Subject: [PATCH 144/174] x86/asm/64: Align start of __clear_user() loop to 16-bytes commit bb5570ad3b54e7930997aec76ab68256d5236d94 upstream. x86 CPUs can suffer severe performance drops if a tight loop, such as the ones in __clear_user(), straddles a 16-byte instruction fetch window, or worse, a 64-byte cacheline. This issues was discovered in the SUSE kernel with the following commit, 1153933703d9 ("x86/asm/64: Micro-optimize __clear_user() - Use immediate constants") which increased the code object size from 10 bytes to 15 bytes and caused the 8-byte copy loop in __clear_user() to be split across a 64-byte cacheline. Aligning the start of the loop to 16-bytes makes this fit neatly inside a single instruction fetch window again and restores the performance of __clear_user() which is used heavily when reading from /dev/zero. Here are some numbers from running libmicro's read_z* and pread_z* microbenchmarks which read from /dev/zero: Zen 1 (Naples) libmicro-file 5.7.0-rc6 5.7.0-rc6 5.7.0-rc6 revert-1153933703d9+ align16+ Time mean95-pread_z100k 9.9195 ( 0.00%) 5.9856 ( 39.66%) 5.9938 ( 39.58%) Time mean95-pread_z10k 1.1378 ( 0.00%) 0.7450 ( 34.52%) 0.7467 ( 34.38%) Time mean95-pread_z1k 0.2623 ( 0.00%) 0.2251 ( 14.18%) 0.2252 ( 14.15%) Time mean95-pread_zw100k 9.9974 ( 0.00%) 6.0648 ( 39.34%) 6.0756 ( 39.23%) Time mean95-read_z100k 9.8940 ( 0.00%) 5.9885 ( 39.47%) 5.9994 ( 39.36%) Time mean95-read_z10k 1.1394 ( 0.00%) 0.7483 ( 34.33%) 0.7482 ( 34.33%) Note that this doesn't affect Haswell or Broadwell microarchitectures which seem to avoid the alignment issue by executing the loop straight out of the Loop Stream Detector (verified using perf events). Fixes: 1153933703d9 ("x86/asm/64: Micro-optimize __clear_user() - Use immediate constants") Signed-off-by: Matt Fleming Signed-off-by: Borislav Petkov Cc: # v4.19+ Link: https://lkml.kernel.org/r/20200618102002.30034-1-matt@codeblueprint.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/usercopy_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index fff28c6f73a2..b0dfac3d3df7 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -24,6 +24,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) asm volatile( " testq %[size8],%[size8]\n" " jz 4f\n" + " .align 16\n" "0: movq $0,(%[dst])\n" " addq $8,%[dst]\n" " decl %%ecx ; jnz 0b\n" From 0a4dfc69ea7ebe6ed0f0af9bfa10f2096dfa4c49 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Jun 2020 13:33:05 +0100 Subject: [PATCH 145/174] btrfs: fix bytes_may_use underflow when running balance and scrub in parallel commit 6bd335b469f945f75474c11e3f577f85409f39c3 upstream. When balance and scrub are running in parallel it is possible to end up with an underflow of the bytes_may_use counter of the data space_info object, which triggers a warning like the following: [134243.793196] BTRFS info (device sdc): relocating block group 1104150528 flags data [134243.806891] ------------[ cut here ]------------ [134243.807561] WARNING: CPU: 1 PID: 26884 at fs/btrfs/space-info.h:125 btrfs_add_reserved_bytes+0x1da/0x280 [btrfs] [134243.808819] Modules linked in: btrfs blake2b_generic xor (...) [134243.815779] CPU: 1 PID: 26884 Comm: kworker/u8:8 Tainted: G W 5.6.0-rc7-btrfs-next-58 #5 [134243.816944] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [134243.818389] Workqueue: writeback wb_workfn (flush-btrfs-108483) [134243.819186] RIP: 0010:btrfs_add_reserved_bytes+0x1da/0x280 [btrfs] [134243.819963] Code: 0b f2 85 (...) [134243.822271] RSP: 0018:ffffa4160aae7510 EFLAGS: 00010287 [134243.822929] RAX: 000000000000c000 RBX: ffff96159a8c1000 RCX: 0000000000000000 [134243.823816] RDX: 0000000000008000 RSI: 0000000000000000 RDI: ffff96158067a810 [134243.824742] RBP: ffff96158067a800 R08: 0000000000000001 R09: 0000000000000000 [134243.825636] R10: ffff961501432a40 R11: 0000000000000000 R12: 000000000000c000 [134243.826532] R13: 0000000000000001 R14: ffffffffffff4000 R15: ffff96158067a810 [134243.827432] FS: 0000000000000000(0000) GS:ffff9615baa00000(0000) knlGS:0000000000000000 [134243.828451] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [134243.829184] CR2: 000055bd7e414000 CR3: 00000001077be004 CR4: 00000000003606e0 [134243.830083] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [134243.830975] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [134243.831867] Call Trace: [134243.832211] find_free_extent+0x4a0/0x16c0 [btrfs] [134243.832846] btrfs_reserve_extent+0x91/0x180 [btrfs] [134243.833487] cow_file_range+0x12d/0x490 [btrfs] [134243.834080] fallback_to_cow+0x82/0x1b0 [btrfs] [134243.834689] ? release_extent_buffer+0x121/0x170 [btrfs] [134243.835370] run_delalloc_nocow+0x33f/0xa30 [btrfs] [134243.836032] btrfs_run_delalloc_range+0x1ea/0x6d0 [btrfs] [134243.836725] ? find_lock_delalloc_range+0x221/0x250 [btrfs] [134243.837450] writepage_delalloc+0xe8/0x150 [btrfs] [134243.838059] __extent_writepage+0xe8/0x4c0 [btrfs] [134243.838674] extent_write_cache_pages+0x237/0x530 [btrfs] [134243.839364] extent_writepages+0x44/0xa0 [btrfs] [134243.839946] do_writepages+0x23/0x80 [134243.840401] __writeback_single_inode+0x59/0x700 [134243.841006] writeback_sb_inodes+0x267/0x5f0 [134243.841548] __writeback_inodes_wb+0x87/0xe0 [134243.842091] wb_writeback+0x382/0x590 [134243.842574] ? wb_workfn+0x4a2/0x6c0 [134243.843030] wb_workfn+0x4a2/0x6c0 [134243.843468] process_one_work+0x26d/0x6a0 [134243.843978] worker_thread+0x4f/0x3e0 [134243.844452] ? process_one_work+0x6a0/0x6a0 [134243.844981] kthread+0x103/0x140 [134243.845400] ? kthread_create_worker_on_cpu+0x70/0x70 [134243.846030] ret_from_fork+0x3a/0x50 [134243.846494] irq event stamp: 0 [134243.846892] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [134243.847682] hardirqs last disabled at (0): [] copy_process+0x74f/0x2020 [134243.848687] softirqs last enabled at (0): [] copy_process+0x74f/0x2020 [134243.849913] softirqs last disabled at (0): [<0000000000000000>] 0x0 [134243.850698] ---[ end trace bd7c03622e0b0a96 ]--- [134243.851335] ------------[ cut here ]------------ When relocating a data block group, for each extent allocated in the block group we preallocate another extent with the same size for the data relocation inode (we do it at prealloc_file_extent_cluster()). We reserve space by calling btrfs_check_data_free_space(), which ends up incrementing the data space_info's bytes_may_use counter, and then call btrfs_prealloc_file_range() to allocate the extent, which always decrements the bytes_may_use counter by the same amount. The expectation is that writeback of the data relocation inode always follows a NOCOW path, by writing into the preallocated extents. However, when starting writeback we might end up falling back into the COW path, because the block group that contains the preallocated extent was turned into RO mode by a scrub running in parallel. The COW path then calls the extent allocator which ends up calling btrfs_add_reserved_bytes(), and this function decrements the bytes_may_use counter of the data space_info object by an amount corresponding to the size of the allocated extent, despite we haven't previously incremented it. When the counter currently has a value smaller then the allocated extent we reset the counter to 0 and emit a warning, otherwise we just decrement it and slowly mess up with this counter which is crucial for space reservation, the end result can be granting reserved space to tasks when there isn't really enough free space, and having the tasks fail later in critical places where error handling consists of a transaction abort or hitting a BUG_ON(). Fix this by making sure that if we fallback to the COW path for a data relocation inode, we increment the bytes_may_use counter of the data space_info object. The COW path will then decrement it at btrfs_add_reserved_bytes() on success or through its error handling part by a call to extent_clear_unlock_delalloc() (which ends up calling btrfs_clear_delalloc_extent() that does the decrement operation) in case of an error. Test case btrfs/061 from fstests could sporadically trigger this. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 127cdecbe872..48c8e5a4c912 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1328,6 +1328,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page, int *page_started, unsigned long *nr_written) { const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode)); + const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID); const u64 range_bytes = end + 1 - start; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 range_start = start; @@ -1358,18 +1360,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page, * data space info, which we incremented in the step above. * * If we need to fallback to cow and the inode corresponds to a free - * space cache inode, we must also increment bytes_may_use of the data - * space_info for the same reason. Space caches always get a prealloc + * space cache inode or an inode of the data relocation tree, we must + * also increment bytes_may_use of the data space_info for the same + * reason. Space caches and relocated data extents always get a prealloc * extent for them, however scrub or balance may have set the block - * group that contains that extent to RO mode. + * group that contains that extent to RO mode and therefore force COW + * when starting writeback. */ count = count_range_bits(io_tree, &range_start, end, range_bytes, EXTENT_NORESERVE, 0); - if (count > 0 || is_space_ino) { - const u64 bytes = is_space_ino ? range_bytes : count; + if (count > 0 || is_space_ino || is_reloc_ino) { + u64 bytes = count; struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct btrfs_space_info *sinfo = fs_info->data_sinfo; + if (is_space_ino || is_reloc_ino) + bytes = range_bytes; + spin_lock(&sinfo->lock); btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes); spin_unlock(&sinfo->lock); From 53a0816610478ef21f72fa3898c96930203255b6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Jun 2020 13:32:55 +0100 Subject: [PATCH 146/174] btrfs: fix data block group relocation failure due to concurrent scrub commit 432cd2a10f1c10cead91fe706ff5dc52f06d642a upstream. When running relocation of a data block group while scrub is running in parallel, it is possible that the relocation will fail and abort the current transaction with an -EINVAL error: [134243.988595] BTRFS info (device sdc): found 14 extents, stage: move data extents [134243.999871] ------------[ cut here ]------------ [134244.000741] BTRFS: Transaction aborted (error -22) [134244.001692] WARNING: CPU: 0 PID: 26954 at fs/btrfs/ctree.c:1071 __btrfs_cow_block+0x6a7/0x790 [btrfs] [134244.003380] Modules linked in: btrfs blake2b_generic xor raid6_pq (...) [134244.012577] CPU: 0 PID: 26954 Comm: btrfs Tainted: G W 5.6.0-rc7-btrfs-next-58 #5 [134244.014162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [134244.016184] RIP: 0010:__btrfs_cow_block+0x6a7/0x790 [btrfs] [134244.017151] Code: 48 c7 c7 (...) [134244.020549] RSP: 0018:ffffa41607863888 EFLAGS: 00010286 [134244.021515] RAX: 0000000000000000 RBX: ffff9614bdfe09c8 RCX: 0000000000000000 [134244.022822] RDX: 0000000000000001 RSI: ffffffffb3d63980 RDI: 0000000000000001 [134244.024124] RBP: ffff961589e8c000 R08: 0000000000000000 R09: 0000000000000001 [134244.025424] R10: ffffffffc0ae5955 R11: 0000000000000000 R12: ffff9614bd530d08 [134244.026725] R13: ffff9614ced41b88 R14: ffff9614bdfe2a48 R15: 0000000000000000 [134244.028024] FS: 00007f29b63c08c0(0000) GS:ffff9615ba600000(0000) knlGS:0000000000000000 [134244.029491] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [134244.030560] CR2: 00007f4eb339b000 CR3: 0000000130d6e006 CR4: 00000000003606f0 [134244.031997] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [134244.033153] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [134244.034484] Call Trace: [134244.034984] btrfs_cow_block+0x12b/0x2b0 [btrfs] [134244.035859] do_relocation+0x30b/0x790 [btrfs] [134244.036681] ? do_raw_spin_unlock+0x49/0xc0 [134244.037460] ? _raw_spin_unlock+0x29/0x40 [134244.038235] relocate_tree_blocks+0x37b/0x730 [btrfs] [134244.039245] relocate_block_group+0x388/0x770 [btrfs] [134244.040228] btrfs_relocate_block_group+0x161/0x2e0 [btrfs] [134244.041323] btrfs_relocate_chunk+0x36/0x110 [btrfs] [134244.041345] btrfs_balance+0xc06/0x1860 [btrfs] [134244.043382] ? btrfs_ioctl_balance+0x27c/0x310 [btrfs] [134244.045586] btrfs_ioctl_balance+0x1ed/0x310 [btrfs] [134244.045611] btrfs_ioctl+0x1880/0x3760 [btrfs] [134244.049043] ? do_raw_spin_unlock+0x49/0xc0 [134244.049838] ? _raw_spin_unlock+0x29/0x40 [134244.050587] ? __handle_mm_fault+0x11b3/0x14b0 [134244.051417] ? ksys_ioctl+0x92/0xb0 [134244.052070] ksys_ioctl+0x92/0xb0 [134244.052701] ? trace_hardirqs_off_thunk+0x1a/0x1c [134244.053511] __x64_sys_ioctl+0x16/0x20 [134244.054206] do_syscall_64+0x5c/0x280 [134244.054891] entry_SYSCALL_64_after_hwframe+0x49/0xbe [134244.055819] RIP: 0033:0x7f29b51c9dd7 [134244.056491] Code: 00 00 00 (...) [134244.059767] RSP: 002b:00007ffcccc1dd08 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [134244.061168] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f29b51c9dd7 [134244.062474] RDX: 00007ffcccc1dda0 RSI: 00000000c4009420 RDI: 0000000000000003 [134244.063771] RBP: 0000000000000003 R08: 00005565cea4b000 R09: 0000000000000000 [134244.065032] R10: 0000000000000541 R11: 0000000000000202 R12: 00007ffcccc2060a [134244.066327] R13: 00007ffcccc1dda0 R14: 0000000000000002 R15: 00007ffcccc1dec0 [134244.067626] irq event stamp: 0 [134244.068202] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [134244.069351] hardirqs last disabled at (0): [] copy_process+0x74f/0x2020 [134244.070909] softirqs last enabled at (0): [] copy_process+0x74f/0x2020 [134244.072392] softirqs last disabled at (0): [<0000000000000000>] 0x0 [134244.073432] ---[ end trace bd7c03622e0b0a99 ]--- The -EINVAL error comes from the following chain of function calls: __btrfs_cow_block() <-- aborts the transaction btrfs_reloc_cow_block() replace_file_extents() get_new_location() <-- returns -EINVAL When relocating a data block group, for each allocated extent of the block group, we preallocate another extent (at prealloc_file_extent_cluster()), associated with the data relocation inode, and then dirty all its pages. These preallocated extents have, and must have, the same size that extents from the data block group being relocated have. Later before we start the relocation stage that updates pointers (bytenr field of file extent items) to point to the the new extents, we trigger writeback for the data relocation inode. The expectation is that writeback will write the pages to the previously preallocated extents, that it follows the NOCOW path. That is generally the case, however, if a scrub is running it may have turned the block group that contains those extents into RO mode, in which case writeback falls back to the COW path. However in the COW path instead of allocating exactly one extent with the expected size, the allocator may end up allocating several smaller extents due to free space fragmentation - because we tell it at cow_file_range() that the minimum allocation size can match the filesystem's sector size. This later breaks the relocation's expectation that an extent associated to a file extent item in the data relocation inode has the same size as the respective extent pointed by a file extent item in another tree - in this case the extent to which the relocation inode poins to is smaller, causing relocation.c:get_new_location() to return -EINVAL. For example, if we are relocating a data block group X that has a logical address of X and the block group has an extent allocated at the logical address X + 128KiB with a size of 64KiB: 1) At prealloc_file_extent_cluster() we allocate an extent for the data relocation inode with a size of 64KiB and associate it to the file offset 128KiB (X + 128KiB - X) of the data relocation inode. This preallocated extent was allocated at block group Z; 2) A scrub running in parallel turns block group Z into RO mode and starts scrubing its extents; 3) Relocation triggers writeback for the data relocation inode; 4) When running delalloc (btrfs_run_delalloc_range()), we try first the NOCOW path because the data relocation inode has BTRFS_INODE_PREALLOC set in its flags. However, because block group Z is in RO mode, the NOCOW path (run_delalloc_nocow()) falls back into the COW path, by calling cow_file_range(); 5) At cow_file_range(), in the first iteration of the while loop we call btrfs_reserve_extent() to allocate a 64KiB extent and pass it a minimum allocation size of 4KiB (fs_info->sectorsize). Due to free space fragmentation, btrfs_reserve_extent() ends up allocating two extents of 32KiB each, each one on a different iteration of that while loop; 6) Writeback of the data relocation inode completes; 7) Relocation proceeds and ends up at relocation.c:replace_file_extents(), with a leaf which has a file extent item that points to the data extent from block group X, that has a logical address (bytenr) of X + 128KiB and a size of 64KiB. Then it calls get_new_location(), which does a lookup in the data relocation tree for a file extent item starting at offset 128KiB (X + 128KiB - X) and belonging to the data relocation inode. It finds a corresponding file extent item, however that item points to an extent that has a size of 32KiB, which doesn't match the expected size of 64KiB, resuling in -EINVAL being returned from this function and propagated up to __btrfs_cow_block(), which aborts the current transaction. To fix this make sure that at cow_file_range() when we call the allocator we pass it a minimum allocation size corresponding the desired extent size if the inode belongs to the data relocation tree, otherwise pass it the filesystem's sector size as the minimum allocation size. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 48c8e5a4c912..0984601418e5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -975,6 +975,7 @@ static noinline int cow_file_range(struct inode *inode, u64 num_bytes; unsigned long ram_size; u64 cur_alloc_size = 0; + u64 min_alloc_size; u64 blocksize = fs_info->sectorsize; struct btrfs_key ins; struct extent_map *em; @@ -1025,10 +1026,26 @@ static noinline int cow_file_range(struct inode *inode, btrfs_drop_extent_cache(BTRFS_I(inode), start, start + num_bytes - 1, 0); + /* + * Relocation relies on the relocated extents to have exactly the same + * size as the original extents. Normally writeback for relocation data + * extents follows a NOCOW path because relocation preallocates the + * extents. However, due to an operation such as scrub turning a block + * group to RO mode, it may fallback to COW mode, so we must make sure + * an extent allocated during COW has exactly the requested size and can + * not be split into smaller extents, otherwise relocation breaks and + * fails during the stage where it updates the bytenr of file extent + * items. + */ + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + min_alloc_size = num_bytes; + else + min_alloc_size = fs_info->sectorsize; + while (num_bytes > 0) { cur_alloc_size = num_bytes; ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, - fs_info->sectorsize, 0, alloc_hint, + min_alloc_size, 0, alloc_hint, &ins, 1, 1); if (ret < 0) goto out_unlock; From 863a197f7f102bfbd42f79b40c50947e9019f011 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 10:38:44 +0100 Subject: [PATCH 147/174] btrfs: check if a log root exists before locking the log_mutex on unlink commit e7a79811d0db136dc2d336b56d54cf1b774ce972 upstream. This brings back an optimization that commit e678934cbe5f02 ("btrfs: Remove unnecessary check from join_running_log_trans") removed, but in a different form. So it's almost equivalent to a revert. That commit removed an optimization where we avoid locking a root's log_mutex when there is no log tree created in the current transaction. The affected code path is triggered through unlink operations. That commit was based on the assumption that the optimization was not necessary because we used to have the following checks when the patch was authored: int btrfs_del_dir_entries_in_log(...) { (...) if (dir->logged_trans < trans->transid) return 0; ret = join_running_log_trans(root); (...) } int btrfs_del_inode_ref_in_log(...) { (...) if (inode->logged_trans < trans->transid) return 0; ret = join_running_log_trans(root); (...) } However before that patch was merged, another patch was merged first which replaced those checks because they were buggy. That other patch corresponds to commit 803f0f64d17769 ("Btrfs: fix fsync not persisting dentry deletions due to inode evictions"). The assumption that if the logged_trans field of an inode had a smaller value then the current transaction's generation (transid) meant that the inode was not logged in the current transaction was only correct if the inode was not evicted and reloaded in the current transaction. So the corresponding bug fix changed those checks and replaced them with the following helper function: static bool inode_logged(struct btrfs_trans_handle *trans, struct btrfs_inode *inode) { if (inode->logged_trans == trans->transid) return true; if (inode->last_trans == trans->transid && test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) && !test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags)) return true; return false; } So if we have a subvolume without a log tree in the current transaction (because we had no fsyncs), every time we unlink an inode we can end up trying to lock the log_mutex of the root through join_running_log_trans() twice, once for the inode being unlinked (by btrfs_del_inode_ref_in_log()) and once for the parent directory (with btrfs_del_dir_entries_in_log()). This means if we have several unlink operations happening in parallel for inodes in the same subvolume, and the those inodes and/or their parent inode were changed in the current transaction, we end up having a lot of contention on the log_mutex. The test robots from intel reported a -30.7% performance regression for a REAIM test after commit e678934cbe5f02 ("btrfs: Remove unnecessary check from join_running_log_trans"). So just bring back the optimization to join_running_log_trans() where we check first if a log root exists before trying to lock the log_mutex. This is done by checking for a bit that is set on the root when a log tree is created and removed when a log tree is freed (at transaction commit time). Commit e678934cbe5f02 ("btrfs: Remove unnecessary check from join_running_log_trans") was merged in the 5.4 merge window while commit 803f0f64d17769 ("Btrfs: fix fsync not persisting dentry deletions due to inode evictions") was merged in the 5.3 merge window. But the first commit was actually authored before the second commit (May 23 2019 vs June 19 2019). Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/20200611090233.GL12456@shao2-debian/ Fixes: e678934cbe5f02 ("btrfs: Remove unnecessary check from join_running_log_trans") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/tree-log.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6d2c277c6e0a..36cd210ee2ef 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -940,6 +940,8 @@ enum { BTRFS_ROOT_DEAD_RELOC_TREE, /* Mark dead root stored on device whose cleanup needs to be resumed */ BTRFS_ROOT_DEAD_TREE, + /* The root has a log tree. Used only for subvolume roots. */ + BTRFS_ROOT_HAS_LOG_TREE, }; /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7d464b049507..f46afbff668e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -167,6 +167,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, if (ret) goto out; + set_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state); clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); root->log_start_pid = current->pid; } @@ -193,6 +194,9 @@ static int join_running_log_trans(struct btrfs_root *root) { int ret = -ENOENT; + if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state)) + return ret; + mutex_lock(&root->log_mutex); if (root->log_root) { ret = 0; @@ -3327,6 +3331,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) if (root->log_root) { free_log_tree(trans, root->log_root); root->log_root = NULL; + clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state); } return 0; } From a79c3a99ac8187d79ce7cc6be8329454d2321ef5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Jun 2020 18:48:58 +0100 Subject: [PATCH 148/174] btrfs: fix failure of RWF_NOWAIT write into prealloc extent beyond eof commit 4b1946284dd6641afdb9457101056d9e6ee6204c upstream. If we attempt to write to prealloc extent located after eof using a RWF_NOWAIT write, we always fail with -EAGAIN. We do actually check if we have an allocated extent for the write at the start of btrfs_file_write_iter() through a call to check_can_nocow(), but later when we go into the actual direct IO write path we simply return -EAGAIN if the write starts at or beyond EOF. Trivial to reproduce: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ touch /mnt/foo $ chattr +C /mnt/foo $ xfs_io -d -c "pwrite -S 0xab 0 64K" /mnt/foo wrote 65536/65536 bytes at offset 0 64 KiB, 16 ops; 0.0004 sec (135.575 MiB/sec and 34707.1584 ops/sec) $ xfs_io -c "falloc -k 64K 1M" /mnt/foo $ xfs_io -d -c "pwrite -N -V 1 -S 0xfe -b 64K 64K 64K" /mnt/foo pwrite: Resource temporarily unavailable On xfs and ext4 the write succeeds, as expected. Fix this by removing the wrong check at btrfs_direct_IO(). Fixes: edf064e7c6fec3 ("btrfs: nowait aio support") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0984601418e5..280c45c91ddc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8857,9 +8857,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) dio_data.overwrite = 1; inode_unlock(inode); relock = true; - } else if (iocb->ki_flags & IOCB_NOWAIT) { - ret = -EAGAIN; - goto out; } ret = btrfs_delalloc_reserve_space(inode, &data_reserved, offset, count); From fbca1aee13974bade598c9d0906ef89d1bdc63c3 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 25 Jun 2020 20:29:52 -0700 Subject: [PATCH 149/174] mm/slab: use memzero_explicit() in kzfree() commit 8982ae527fbef170ef298650c15d55a9ccd33973 upstream. The kzfree() function is normally used to clear some sensitive information, like encryption keys, in the buffer before freeing it back to the pool. Memset() is currently used for buffer clearing. However unlikely, there is still a non-zero probability that the compiler may choose to optimize away the memory clearing especially if LTO is being used in the future. To make sure that this optimization will never happen, memzero_explicit(), which is introduced in v3.18, is now used in kzfree() to future-proof it. Link: http://lkml.kernel.org/r/20200616154311.12314-2-longman@redhat.com Fixes: 3ef0e5ba4673 ("slab: introduce kzfree()") Signed-off-by: Waiman Long Acked-by: Michal Hocko Cc: David Howells Cc: Jarkko Sakkinen Cc: James Morris Cc: "Serge E. Hallyn" Cc: Joe Perches Cc: Matthew Wilcox Cc: David Rientjes Cc: Johannes Weiner Cc: Dan Carpenter Cc: "Jason A . Donenfeld" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index ade6c257d4b4..8c1ffbf7de45 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1740,7 +1740,7 @@ void kzfree(const void *p) if (unlikely(ZERO_OR_NULL_PTR(mem))) return; ks = ksize(mem); - memset(mem, 0, ks); + memzero_explicit(mem, ks); kfree(mem); } EXPORT_SYMBOL(kzfree); From 7fa716a594a6ec6830222260f6502b25f3f86ed5 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 25 Jun 2020 20:29:30 -0700 Subject: [PATCH 150/174] ocfs2: avoid inode removal while nfsd is accessing it commit 4cd9973f9ff69e37dd0ba2bd6e6423f8179c329a upstream. Patch series "ocfs2: fix nfsd over ocfs2 issues", v2. This is a series of patches to fix issues on nfsd over ocfs2. patch 1 is to avoid inode removed while nfsd access it patch 2 & 3 is to fix a panic issue. This patch (of 4): When nfsd is getting file dentry using handle or parent dentry of some dentry, one cluster lock is used to avoid inode removed from other node, but it still could be removed from local node, so use a rw lock to avoid this. Link: http://lkml.kernel.org/r/20200616183829.87211-1-junxiao.bi@oracle.com Link: http://lkml.kernel.org/r/20200616183829.87211-2-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Joel Becker Cc: Jun Piao Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlmglue.c | 17 ++++++++++++++++- fs/ocfs2/ocfs2.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8a2e284ccfcd..e2c34c704185 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, &ocfs2_nfs_sync_lops, osb); } +static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb) +{ + ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); + init_rwsem(&osb->nfs_sync_rwlock); +} + void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb) { struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; @@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) if (ocfs2_is_hard_readonly(osb)) return -EROFS; + if (ex) + down_write(&osb->nfs_sync_rwlock); + else + down_read(&osb->nfs_sync_rwlock); + if (ocfs2_mount_local(osb)) return 0; @@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) if (!ocfs2_mount_local(osb)) ocfs2_cluster_unlock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE); + if (ex) + up_write(&osb->nfs_sync_rwlock); + else + up_read(&osb->nfs_sync_rwlock); } int ocfs2_trim_fs_lock(struct ocfs2_super *osb, @@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) local: ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); - ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); + ocfs2_nfs_sync_lock_init(osb); ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); osb->cconn = conn; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9150cfa4df7d..9461bd3e1c0c 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -394,6 +394,7 @@ struct ocfs2_super struct ocfs2_lock_res osb_super_lockres; struct ocfs2_lock_res osb_rename_lockres; struct ocfs2_lock_res osb_nfs_sync_lockres; + struct rw_semaphore nfs_sync_rwlock; struct ocfs2_lock_res osb_trim_fs_lockres; struct mutex obs_trim_fs_mutex; struct ocfs2_dlm_debug *osb_dlm_debug; From 4685df862c8b8fcc16d754f3135e2c7ee4baf5de Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 25 Jun 2020 20:29:33 -0700 Subject: [PATCH 151/174] ocfs2: load global_inode_alloc commit 7569d3c754e452769a5747eeeba488179e38a5da upstream. Set global_inode_alloc as OCFS2_FIRST_ONLINE_SYSTEM_INODE, that will make it load during mount. It can be used to test whether some global/system inodes are valid. One use case is that nfsd will test whether root inode is valid. Link: http://lkml.kernel.org/r/20200616183829.87211-3-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Joel Becker Cc: Jun Piao Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/ocfs2_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 0db4a7ec58a2..46700f9b6b4c 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -326,8 +326,8 @@ struct ocfs2_system_inode_info { enum { BAD_BLOCK_SYSTEM_INODE = 0, GLOBAL_INODE_ALLOC_SYSTEM_INODE, +#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE, -#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE HEARTBEAT_SYSTEM_INODE, GLOBAL_BITMAP_SYSTEM_INODE, USER_QUOTA_SYSTEM_INODE, From a8d82ebaee9747302d6ec9cbed0ced84943ebacc Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 25 Jun 2020 20:29:40 -0700 Subject: [PATCH 152/174] ocfs2: fix value of OCFS2_INVALID_SLOT commit 9277f8334ffc719fe922d776444d6e4e884dbf30 upstream. In the ocfs2 disk layout, slot number is 16 bits, but in ocfs2 implementation, slot number is 32 bits. Usually this will not cause any issue, because slot number is converted from u16 to u32, but OCFS2_INVALID_SLOT was defined as -1, when an invalid slot number from disk was obtained, its value was (u16)-1, and it was converted to u32. Then the following checking in get_local_system_inode will be always skipped: static struct inode **get_local_system_inode(struct ocfs2_super *osb, int type, u32 slot) { BUG_ON(slot == OCFS2_INVALID_SLOT); ... } Link: http://lkml.kernel.org/r/20200616183829.87211-5-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/ocfs2_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 46700f9b6b4c..dcef83c8796d 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -290,7 +290,7 @@ #define OCFS2_MAX_SLOTS 255 /* Slot map indicator for an empty slot */ -#define OCFS2_INVALID_SLOT -1 +#define OCFS2_INVALID_SLOT ((u16)-1) #define OCFS2_VOL_UUID_LEN 16 #define OCFS2_MAX_VOL_LABEL_LEN 64 From ff6aff13a8cfc421418b902647ac84bf027bedb9 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 25 Jun 2020 20:29:37 -0700 Subject: [PATCH 153/174] ocfs2: fix panic on nfs server over ocfs2 commit e5a15e17a78d58f933d17cafedfcf7486a29f5b4 upstream. The following kernel panic was captured when running nfs server over ocfs2, at that time ocfs2_test_inode_bit() was checking whether one inode locating at "blkno" 5 was valid, that is ocfs2 root inode, its "suballoc_slot" was OCFS2_INVALID_SLOT(65535) and it was allocted from //global_inode_alloc, but here it wrongly assumed that it was got from per slot inode alloctor which would cause array overflow and trigger kernel panic. BUG: unable to handle kernel paging request at 0000000000001088 IP: [] _raw_spin_lock+0x18/0xf0 PGD 1e06ba067 PUD 1e9e7d067 PMD 0 Oops: 0002 [#1] SMP CPU: 6 PID: 24873 Comm: nfsd Not tainted 4.1.12-124.36.1.el6uek.x86_64 #2 Hardware name: Huawei CH121 V3/IT11SGCA1, BIOS 3.87 02/02/2018 RIP: _raw_spin_lock+0x18/0xf0 RSP: e02b:ffff88005ae97908 EFLAGS: 00010206 RAX: ffff88005ae98000 RBX: 0000000000001088 RCX: 0000000000000000 RDX: 0000000000020000 RSI: 0000000000000009 RDI: 0000000000001088 RBP: ffff88005ae97928 R08: 0000000000000000 R09: ffff880212878e00 R10: 0000000000007ff0 R11: 0000000000000000 R12: 0000000000001088 R13: ffff8800063c0aa8 R14: ffff8800650c27d0 R15: 000000000000ffff FS: 0000000000000000(0000) GS:ffff880218180000(0000) knlGS:ffff880218180000 CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000001088 CR3: 00000002033d0000 CR4: 0000000000042660 Call Trace: igrab+0x1e/0x60 ocfs2_get_system_file_inode+0x63/0x3a0 [ocfs2] ocfs2_test_inode_bit+0x328/0xa00 [ocfs2] ocfs2_get_parent+0xba/0x3e0 [ocfs2] reconnect_path+0xb5/0x300 exportfs_decode_fh+0xf6/0x2b0 fh_verify+0x350/0x660 [nfsd] nfsd4_putfh+0x4d/0x60 [nfsd] nfsd4_proc_compound+0x3d3/0x6f0 [nfsd] nfsd_dispatch+0xe0/0x290 [nfsd] svc_process_common+0x412/0x6a0 [sunrpc] svc_process+0x123/0x210 [sunrpc] nfsd+0xff/0x170 [nfsd] kthread+0xcb/0xf0 ret_from_fork+0x61/0x90 Code: 83 c2 02 0f b7 f2 e8 18 dc 91 ff 66 90 eb bf 0f 1f 40 00 55 48 89 e5 41 56 41 55 41 54 53 0f 1f 44 00 00 48 89 fb ba 00 00 02 00 0f c1 17 89 d0 45 31 e4 45 31 ed c1 e8 10 66 39 d0 41 89 c6 RIP _raw_spin_lock+0x18/0xf0 CR2: 0000000000001088 ---[ end trace 7264463cd1aac8f9 ]--- Kernel panic - not syncing: Fatal exception Link: http://lkml.kernel.org/r/20200616183829.87211-4-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Joel Becker Cc: Jun Piao Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/suballoc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 69c21a3843af..503e724d39f5 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2827,9 +2827,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) goto bail; } - inode_alloc_inode = - ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, - suballoc_slot); + if (suballoc_slot == (u16)OCFS2_INVALID_SLOT) + inode_alloc_inode = ocfs2_get_system_file_inode(osb, + GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot); + else + inode_alloc_inode = ocfs2_get_system_file_inode(osb, + INODE_ALLOC_SYSTEM_INODE, suballoc_slot); if (!inode_alloc_inode) { /* the error code could be inaccurate, but we are not able to * get the correct one. */ From 7a9e3e25a9d25b58d565f79e0da9ed494e9952ea Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 25 Jun 2020 20:30:19 -0700 Subject: [PATCH 154/174] mm/memcontrol.c: add missed css_put() commit 3a98990ae2150277ed34d3b248c60e68bf2244b2 upstream. We should put the css reference when memory allocation failed. Link: http://lkml.kernel.org/r/20200614122653.98829-1-songmuchun@bytedance.com Fixes: f0a3a24b532d ("mm: memcg/slab: rework non-root kmem_cache lifecycle management") Signed-off-by: Muchun Song Acked-by: Roman Gushchin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Qian Cai Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0d6f3ea86738..a3f4c35bb5fa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2895,8 +2895,10 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, return; cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN); - if (!cw) + if (!cw) { + css_put(&memcg->css); return; + } cw->memcg = memcg; cw->cachep = cachep; From 73f79b420bd0e2f93ee8896dca07313b8ab3eb0a Mon Sep 17 00:00:00 2001 From: Jiping Ma Date: Mon, 11 May 2020 10:52:07 +0800 Subject: [PATCH 155/174] arm64: perf: Report the PC value in REGS_ABI_32 mode commit 8dfe804a4031ca6ba3a3efb2048534249b64f3a5 upstream. A 32-bit perf querying the registers of a compat task using REGS_ABI_32 will receive zeroes from w15, when it expects to find the PC. Return the PC value for register dwarf register 15 when returning register values for a compat task to perf. Cc: Acked-by: Mark Rutland Signed-off-by: Jiping Ma Link: https://lore.kernel.org/r/1589165527-188401-1-git-send-email-jiping.ma2@windriver.com [will: Shuffled code and added a comment] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/perf_regs.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 0bbac612146e..666b225aeb3a 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -15,15 +15,34 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return 0; /* - * Compat (i.e. 32 bit) mode: - * - PC has been set in the pt_regs struct in kernel_entry, - * - Handle SP and LR here. + * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but + * we're stuck with it for ABI compatability reasons. + * + * For a 32-bit consumer inspecting a 32-bit task, then it will look at + * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). + * These correspond directly to a prefix of the registers saved in our + * 'struct pt_regs', with the exception of the PC, so we copy that down + * (x15 corresponds to SP_hyp in the architecture). + * + * So far, so good. + * + * The oddity arises when a 64-bit consumer looks at a 32-bit task and + * asks for registers beyond PERF_REG_ARM_MAX. In this case, we return + * SP_usr, LR_usr and PC in the positions where the AArch64 SP, LR and + * PC registers would normally live. The initial idea was to allow a + * 64-bit unwinder to unwind a 32-bit task and, although it's not clear + * how well that works in practice, somebody might be relying on it. + * + * At the time we make a sample, we don't know whether the consumer is + * 32-bit or 64-bit, so we have to cater for both possibilities. */ if (compat_user_mode(regs)) { if ((u32)idx == PERF_REG_ARM64_SP) return regs->compat_sp; if ((u32)idx == PERF_REG_ARM64_LR) return regs->compat_lr; + if (idx == 15) + return regs->pc; } if ((u32)idx == PERF_REG_ARM64_SP) From 90bd9c611f21b7461d199f2e331cdde1563c72e3 Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 22 May 2020 18:44:50 +0800 Subject: [PATCH 156/174] arm64: dts: imx8mm-evk: correct ldo1/ldo2 voltage range commit 4fd6b5735c03c0955d93960d31f17d7144f5578f upstream. Correct ldo1 voltage range from wrong high group(3.0V~3.3V) to low group (1.6V~1.9V) because the ldo1 should be 1.8V. Actually, two voltage groups have been supported at bd718x7-regulator driver, hence, just corrrect the voltage range to 1.6V~3.3V. For ldo2@0.8V, correct voltage range too. Otherwise, ldo1 would be kept @3.0V and ldo2@0.9V which violate i.mx8mm datasheet as the below warning log in kernel: [ 0.995524] LDO1: Bringing 1800000uV into 3000000-3000000uV [ 0.999196] LDO2: Bringing 800000uV into 900000-900000uV Fixes: 78cc25fa265d ("arm64: dts: imx8mm-evk: Add BD71847 PMIC") Cc: stable@vger.kernel.org Signed-off-by: Robin Gong Reviewed-by: Dong Aisheng Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/freescale/imx8mm-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts index 13137451b438..b9f8b7aac8ff 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts @@ -231,7 +231,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -239,7 +239,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; From c036eb65fdfc1a5d43f7edf3d6e6fb562bde5120 Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 22 May 2020 18:44:51 +0800 Subject: [PATCH 157/174] arm64: dts: imx8mn-ddr4-evk: correct ldo1/ldo2 voltage range commit cfb12c8952f617df58d73d24161e539a035d82b0 upstream. Correct ldo1 voltage range from wrong high group(3.0V~3.3V) to low group (1.6V~1.9V) because the ldo1 should be 1.8V. Actually, two voltage groups have been supported at bd718x7-regulator driver, hence, just corrrect the voltage range to 1.6V~3.3V. For ldo2@0.8V, correct voltage range too. Otherwise, ldo1 would be kept @3.0V and ldo2@0.9V which violate i.mx8mn datasheet as the below warning log in kernel: [ 0.995524] LDO1: Bringing 1800000uV into 3000000-3000000uV [ 0.999196] LDO2: Bringing 800000uV into 900000-900000uV Fixes: 3e44dd09736d ("arm64: dts: imx8mn-ddr4-evk: Add rohm,bd71847 PMIC support") Cc: stable@vger.kernel.org Signed-off-by: Robin Gong Reviewed-by: Dong Aisheng Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index 11c705d225d0..9ad1d43b8ce7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -268,7 +268,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -276,7 +276,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; From 9a59a88b3d3144136f78f5d83a23b80a47178811 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 20 Jun 2020 12:46:03 +0900 Subject: [PATCH 158/174] tracing: Fix event trigger to accept redundant spaces commit 6784beada631800f2c5afd567e5628c843362cee upstream. Fix the event trigger to accept redundant spaces in the trigger input. For example, these return -EINVAL echo " traceon" > events/ftrace/print/trigger echo "traceon if common_pid == 0" > events/ftrace/print/trigger echo "disable_event:kmem:kmalloc " > events/ftrace/print/trigger But these are hard to find what is wrong. To fix this issue, use skip_spaces() to remove spaces in front of actual tokens, and set NULL if there is no token. Link: http://lkml.kernel.org/r/159262476352.185015.5261566783045364186.stgit@devnote2 Cc: Tom Zanussi Cc: stable@vger.kernel.org Fixes: 85f2b08268c0 ("tracing: Add basic event trigger framework") Reviewed-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index de840de87a18..e913d41a4194 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -216,11 +216,17 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) static int trigger_process_regex(struct trace_event_file *file, char *buff) { - char *command, *next = buff; + char *command, *next; struct event_command *p; int ret = -EINVAL; + next = buff = skip_spaces(buff); command = strsep(&next, ": \t"); + if (next) { + next = skip_spaces(next); + if (!*next) + next = NULL; + } command = (command[0] != '!') ? command : command + 1; mutex_lock(&trigger_cmd_mutex); @@ -630,8 +636,14 @@ event_trigger_callback(struct event_command *cmd_ops, int ret; /* separate the trigger from the filter (t:n [if filter]) */ - if (param && isdigit(param[0])) + if (param && isdigit(param[0])) { trigger = strsep(¶m, " \t"); + if (param) { + param = skip_spaces(param); + if (!*param) + param = NULL; + } + } trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); @@ -1368,6 +1380,11 @@ int event_enable_trigger_func(struct event_command *cmd_ops, trigger = strsep(¶m, " \t"); if (!trigger) return -EINVAL; + if (param) { + param = skip_spaces(param); + if (!*param) + param = NULL; + } system = strsep(&trigger, ":"); if (!trigger) From 0b3cc973f16fab7507297eaec1c0c262020b08c9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 22 Jun 2020 15:18:15 -0400 Subject: [PATCH 159/174] ring-buffer: Zero out time extend if it is nested and not absolute commit 097350d1c6e1f5808cae142006f18a0bbc57018d upstream. Currently the ring buffer makes events that happen in interrupts that preempt another event have a delta of zero. (Hopefully we can change this soon). But this is to deal with the races of updating a global counter with lockless and nesting functions updating deltas. With the addition of absolute time stamps, the time extend didn't follow this rule. A time extend can happen if two events happen longer than 2^27 nanoseconds appart, as the delta time field in each event is only 27 bits. If that happens, then a time extend is injected with 2^59 bits of nanoseconds to use (18 years). But if the 2^27 nanoseconds happen between two events, and as it is writing the event, an interrupt triggers, it will see the 2^27 difference as well and inject a time extend of its own. But a recent change made the time extend logic not take into account the nesting, and this can cause two time extend deltas to happen moving the time stamp much further ahead than the current time. This gets all reset when the ring buffer moves to the next page, but that can cause time to appear to go backwards. This was observed in a trace-cmd recording, and since the data is saved in a file, with trace-cmd report --debug, it was possible to see that this indeed did happen! bash-52501 110d... 81778.908247: sched_switch: bash:52501 [120] S ==> swapper/110:0 [120] [12770284:0x2e8:64] -0 110d... 81778.908757: sched_switch: swapper/110:0 [120] R ==> bash:52501 [120] [509947:0x32c:64] TIME EXTEND: delta:306454770 length:0 bash-52501 110.... 81779.215212: sched_swap_numa: src_pid=52501 src_tgid=52388 src_ngid=52501 src_cpu=110 src_nid=2 dst_pid=52509 dst_tgid=52388 dst_ngid=52501 dst_cpu=49 dst_nid=1 [0:0x378:48] TIME EXTEND: delta:306458165 length:0 bash-52501 110dNh. 81779.521670: sched_wakeup: migration/110:565 [0] success=1 CPU:110 [0:0x3b4:40] and at the next page, caused the time to go backwards: bash-52504 110d... 81779.685411: sched_switch: bash:52504 [120] S ==> swapper/110:0 [120] [8347057:0xfb4:64] CPU:110 [SUBBUFFER START] [81779379165886:0x1320000] -0 110dN.. 81779.379166: sched_wakeup: bash:52504 [120] success=1 CPU:110 [0:0x10:40] -0 110d... 81779.379167: sched_switch: swapper/110:0 [120] R ==> bash:52504 [120] [1168:0x3c:64] Link: https://lkml.kernel.org/r/20200622151815.345d1bf5@oasis.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: Tom Zanussi Cc: stable@vger.kernel.org Fixes: dc4e2801d400b ("ring-buffer: Redefine the unimplemented RINGBUF_TYPE_TIME_STAMP") Reported-by: Julia Lawall Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 4bf050fcfe3b..9a2581fe7ed5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2358,7 +2358,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, if (unlikely(info->add_timestamp)) { bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer); - event = rb_add_time_stamp(event, info->delta, abs); + event = rb_add_time_stamp(event, abs ? info->delta : delta, abs); length -= RB_LEN_TIME_EXTEND; delta = 0; } From 176a3c488476d077114d54810e221634aff94659 Mon Sep 17 00:00:00 2001 From: Bernard Zhao Date: Sat, 20 Jun 2020 17:11:52 +0800 Subject: [PATCH 160/174] drm/amd: fix potential memleak in err branch commit b5b78a6c8d8cb9c307bc6b16a754603424459d6e upstream. The function kobject_init_and_add alloc memory like: kobject_init_and_add->kobject_add_varg->kobject_set_name_vargs ->kvasprintf_const->kstrdup_const->kstrdup->kmalloc_track_caller ->kmalloc_slab, in err branch this memory not free. If use kmemleak, this path maybe catched. These changes are to add kobject_put in kobject_init_and_add failed branch, fix potential memleak. Signed-off-by: Bernard Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 40e3fc0c6942..aa0a617b8d44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -312,6 +312,7 @@ struct kfd_process *kfd_create_process(struct file *filep) (int)process->lead_thread->pid); if (ret) { pr_warn("Creating procfs pid directory failed"); + kobject_put(process->kobj); goto out; } From 834a3aa2ceb4369f39b8b8b82dd68d278934623f Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Mon, 18 May 2020 22:16:46 +0200 Subject: [PATCH 161/174] drm: rcar-du: Fix build error commit 5f9af404eec82981c4345c9943be48422234e7ab upstream. Select DRM_KMS_HELPER dependency. Build error when DRM_KMS_HELPER is not selected: drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xd48): undefined reference to `drm_atomic_helper_bridge_duplicate_state' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xd50): undefined reference to `drm_atomic_helper_bridge_destroy_state' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xd70): undefined reference to `drm_atomic_helper_bridge_reset' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xdc8): undefined reference to `drm_atomic_helper_connector_reset' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xde0): undefined reference to `drm_helper_probe_single_connector_modes' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xe08): undefined reference to `drm_atomic_helper_connector_duplicate_state' drivers/gpu/drm/rcar-du/rcar_lvds.o:(.rodata+0xe10): undefined reference to `drm_atomic_helper_connector_destroy_state' Fixes: c6a27fa41fab ("drm: rcar-du: Convert LVDS encoder code to bridge driver") Cc: Signed-off-by: Daniel Gomez Reviewed-by: Emil Velikov Reviewed-by: Kieran Bingham Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rcar-du/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig index 1529849e217e..7cdba77b1420 100644 --- a/drivers/gpu/drm/rcar-du/Kconfig +++ b/drivers/gpu/drm/rcar-du/Kconfig @@ -23,6 +23,7 @@ config DRM_RCAR_DW_HDMI config DRM_RCAR_LVDS tristate "R-Car DU LVDS Encoder Support" depends on DRM && DRM_BRIDGE && OF + select DRM_KMS_HELPER select DRM_PANEL select OF_FLATTREE select OF_OVERLAY From 05124abe1fb0deb9df35445885f89043eddbba9a Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Mon, 22 Jun 2020 23:31:22 +0300 Subject: [PATCH 162/174] drm/radeon: fix fb_div check in ni_init_smc_spll_table() commit 35f760b44b1b9cb16a306bdcc7220fbbf78c4789 upstream. clk_s is checked twice in a row in ni_init_smc_spll_table(). fb_div should be checked instead. Fixes: 69e0b57a91ad ("drm/radeon/kms: add dpm support for cayman (v5)") Cc: stable@vger.kernel.org Signed-off-by: Denis Efremov Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/ni_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index d9e62ca65ab8..bd2e577c701f 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -2128,7 +2128,7 @@ static int ni_init_smc_spll_table(struct radeon_device *rdev) if (clk_s & ~(SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_SHIFT)) ret = -EINVAL; - if (clk_s & ~(SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_SHIFT)) + if (fb_div & ~(SMC_NISLANDS_SPLL_DIV_TABLE_FBDIV_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_FBDIV_SHIFT)) ret = -EINVAL; if (clk_v & ~(SMC_NISLANDS_SPLL_DIV_TABLE_CLKV_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_CLKV_SHIFT)) From c6f88afa6ae1485f505a860700f52e6a2d32b7dd Mon Sep 17 00:00:00 2001 From: Wenhui Sheng Date: Thu, 18 Jun 2020 15:37:04 +0800 Subject: [PATCH 163/174] drm/amdgpu: add fw release for sdma v5_0 commit edfaf6fa73f15568d4337f208b2333f647c35810 upstream. sdma fw isn't released when module exit Reviewed-by: Hawking Zhang Signed-off-by: Wenhui Sheng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index bd715012185c..23fc16dc92b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1273,8 +1273,12 @@ static int sdma_v5_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.instance[i].fw != NULL) + release_firmware(adev->sdma.instance[i].fw); + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + } return 0; } From 60bdb51d44faf3723aa037e501eb84d500fb4dfd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Jun 2020 13:19:58 +0300 Subject: [PATCH 164/174] Staging: rtl8723bs: prevent buffer overflow in update_sta_support_rate() commit b65a2d8c8614386f7e8d38ea150749f8a862f431 upstream. The "ie_len" variable is in the 0-255 range and it comes from the network. If it's over NDIS_802_11_LENGTH_RATES_EX (16) then that will lead to memory corruption. Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver") Signed-off-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/20200603101958.GA1845750@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_wlan_util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_wlan_util.c b/drivers/staging/rtl8723bs/core/rtw_wlan_util.c index ea3ea2a6b314..f6678ba6d4bc 100644 --- a/drivers/staging/rtl8723bs/core/rtw_wlan_util.c +++ b/drivers/staging/rtl8723bs/core/rtw_wlan_util.c @@ -1845,12 +1845,14 @@ int update_sta_support_rate(struct adapter *padapter, u8 *pvar_ie, uint var_ie_l pIE = (struct ndis_80211_var_ie *)rtw_get_ie(pvar_ie, _SUPPORTEDRATES_IE_, &ie_len, var_ie_len); if (!pIE) return _FAIL; + if (ie_len > sizeof(pmlmeinfo->FW_sta_info[cam_idx].SupportedRates)) + return _FAIL; memcpy(pmlmeinfo->FW_sta_info[cam_idx].SupportedRates, pIE->data, ie_len); supportRateNum = ie_len; pIE = (struct ndis_80211_var_ie *)rtw_get_ie(pvar_ie, _EXT_SUPPORTEDRATES_IE_, &ie_len, var_ie_len); - if (pIE) + if (pIE && (ie_len <= sizeof(pmlmeinfo->FW_sta_info[cam_idx].SupportedRates) - supportRateNum)) memcpy((pmlmeinfo->FW_sta_info[cam_idx].SupportedRates + supportRateNum), pIE->data, ie_len); return _SUCCESS; From c27d205baa8255ad152f26064ecf1fddd0258a5f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 1 Jun 2020 11:54:57 +0300 Subject: [PATCH 165/174] sunrpc: fixed rollback in rpc_gssd_dummy_populate() commit b7ade38165ca0001c5a3bd5314a314abbbfbb1b7 upstream. __rpc_depopulate(gssd_dentry) was lost on error path cc: stable@vger.kernel.org Fixes: commit 4b9a445e3eeb ("sunrpc: create a new dummy pipe for gssd to hold open") Signed-off-by: Vasily Averin Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/rpc_pipe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b71a39ded930..37792675ed57 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1317,6 +1317,7 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) q.len = strlen(gssd_dummy_clnt_dir[0].name); clnt_dentry = d_hash_and_lookup(gssd_dentry, &q); if (!clnt_dentry) { + __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(-ENOENT); goto out; } From 7b99577ff376d58addf149eebe0ffff46351b3d7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Jun 2020 11:32:34 -0400 Subject: [PATCH 166/174] SUNRPC: Properly set the @subbuf parameter of xdr_buf_subsegment() commit 89a3c9f5b9f0bcaa9aea3e8b2a616fcaea9aad78 upstream. @subbuf is an output parameter of xdr_buf_subsegment(). A survey of call sites shows that @subbuf is always uninitialized before xdr_buf_segment() is invoked by callers. There are some execution paths through xdr_buf_subsegment() that do not set all of the fields in @subbuf, leaving some pointer fields containing garbage addresses. Subsequent processing of that buffer then results in a page fault. Signed-off-by: Chuck Lever Cc: Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xdr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 451ca7ec321c..7ef37054071f 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1118,6 +1118,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->head[0].iov_len; + subbuf->head[0].iov_base = buf->head[0].iov_base; subbuf->head[0].iov_len = 0; } @@ -1130,6 +1131,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->page_len; + subbuf->pages = buf->pages; + subbuf->page_base = 0; subbuf->page_len = 0; } @@ -1141,6 +1144,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->tail[0].iov_len; + subbuf->tail[0].iov_base = buf->tail[0].iov_base; subbuf->tail[0].iov_len = 0; } From 02917bef8f1b08cde7027446177c5cb288456f99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 22 Jun 2020 15:04:15 -0400 Subject: [PATCH 167/174] pNFS/flexfiles: Fix list corruption if the mirror count changes commit 8b04013737341442ed914b336cde866b902664ae upstream. If the mirror count changes in the new layout we pick up inside ff_layout_pg_init_write(), then we can end up adding the request to the wrong mirror and corrupting the mirror->pg_list. Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to application") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayout.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 5657b7f2611f..1741d902b0d8 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -984,9 +984,8 @@ retry: goto out_mds; /* Use a direct mapping of ds_idx to pgio mirror_idx */ - if (WARN_ON_ONCE(pgio->pg_mirror_count != - FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) - goto out_mds; + if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)) + goto out_eagain; for (i = 0; i < pgio->pg_mirror_count; i++) { mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); @@ -1008,7 +1007,10 @@ retry: (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) pgio->pg_maxretrans = io_maxretrans; return; - +out_eagain: + pnfs_generic_pg_cleanup(pgio); + pgio->pg_error = -EAGAIN; + return; out_mds: trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode, 0, NFS4_MAX_UINT64, IOMODE_RW, @@ -1018,6 +1020,7 @@ out_mds: pgio->pg_lseg = NULL; pgio->pg_maxretrans = 0; nfs_pageio_reset_write_mds(pgio); + pgio->pg_error = -EAGAIN; } static unsigned int From 4d35ca872ac3c7464f062231f6a55444dbbcc106 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 24 Jun 2020 13:54:08 -0400 Subject: [PATCH 168/174] NFSv4 fix CLOSE not waiting for direct IO compeletion commit d03727b248d0dae6199569a8d7b629a681154633 upstream. Figuring out the root case for the REMOVE/CLOSE race and suggesting the solution was done by Neil Brown. Currently what happens is that direct IO calls hold a reference on the open context which is decremented as an asynchronous task in the nfs_direct_complete(). Before reference is decremented, control is returned to the application which is free to close the file. When close is being processed, it decrements its reference on the open_context but since directIO still holds one, it doesn't sent a close on the wire. It returns control to the application which is free to do other operations. For instance, it can delete a file. Direct IO is finally releasing its reference and triggering an asynchronous close. Which races with the REMOVE. On the server, REMOVE can be processed before the CLOSE, failing the REMOVE with EACCES as the file is still opened. Signed-off-by: Olga Kornievskaia Suggested-by: Neil Brown CC: stable@vger.kernel.org Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/direct.c | 13 +++++++++---- fs/nfs/file.c | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6b0bf4ebd812..70cf8c5760c7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -367,8 +367,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) { struct inode *inode = dreq->inode; - inode_dio_end(inode); - if (dreq->iocb) { long res = (long) dreq->error; if (dreq->count != 0) { @@ -380,7 +378,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) complete(&dreq->completion); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); } static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) @@ -510,8 +511,10 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - inode_dio_end(inode); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); return result < 0 ? result : -EIO; } @@ -923,8 +926,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - inode_dio_end(inode); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); return result < 0 ? result : -EIO; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 95dc90570786..7b3136753205 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -83,6 +83,7 @@ nfs_file_release(struct inode *inode, struct file *filp) dprintk("NFS: release(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); + inode_dio_wait(inode); nfs_file_clear_open_context(filp); return 0; } From de1d70dad6f22b5d02c04e4d127463b51c727c2c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 Jun 2020 09:21:13 -0400 Subject: [PATCH 169/174] xprtrdma: Fix handling of RDMA_ERROR replies commit 7b2182ec381f8ea15c7eb1266d6b5d7da620ad93 upstream. The RPC client currently doesn't handle ERR_CHUNK replies correctly. rpcrdma_complete_rqst() incorrectly passes a negative number to xprt_complete_rqst() as the number of bytes copied. Instead, set task->tk_status to the error value, and return zero bytes copied. In these cases, return -EIO rather than -EREMOTEIO. The RPC client's finite state machine doesn't know what to do with -EREMOTEIO. Additional clean ups: - Don't double-count RDMA_ERROR replies - Remove a stale comment Signed-off-by: Chuck Lever Cc: Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/rpc_rdma.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index ef5102b60589..c56e6cfc4a62 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1246,8 +1246,7 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, be32_to_cpup(p), be32_to_cpu(rep->rr_xid)); } - r_xprt->rx_stats.bad_reply_count++; - return -EREMOTEIO; + return -EIO; } /* Perform XID lookup, reconstruction of the RPC reply, and @@ -1284,13 +1283,11 @@ out: spin_unlock(&xprt->queue_lock); return; -/* If the incoming reply terminated a pending RPC, the next - * RPC call will post a replacement receive buffer as it is - * being marshaled. - */ out_badheader: trace_xprtrdma_reply_hdr(rep); r_xprt->rx_stats.bad_reply_count++; + rqst->rq_task->tk_status = status; + status = 0; goto out; } From a51e71cbf6e64b170ef01a78f94df01cb46acb9b Mon Sep 17 00:00:00 2001 From: Huaisheng Ye Date: Fri, 12 Jun 2020 23:59:11 +0800 Subject: [PATCH 170/174] dm writecache: correct uncommitted_block when discarding uncommitted entry commit 39495b12ef1cf602e6abd350dce2ef4199906531 upstream. When uncommitted entry has been discarded, correct wc->uncommitted_block for getting the exact number. Fixes: 48debafe4f2fe ("dm: add writecache target") Cc: stable@vger.kernel.org Signed-off-by: Huaisheng Ye Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-writecache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 0d6ca723257f..64d75bbefda1 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -805,6 +805,8 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_ writecache_wait_for_ios(wc, WRITE); discarded_something = true; } + if (!writecache_entry_is_committed(wc, e)) + wc->uncommitted_blocks--; writecache_free_entry(wc, e); } From cc66553004f4589f76804fb56c19331a1c00a043 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 19 Jun 2020 11:51:34 -0400 Subject: [PATCH 171/174] dm writecache: add cond_resched to loop in persistent_memory_claim() commit d35bd764e6899a7bea71958f08d16cea5bfa1919 upstream. Add cond_resched() to a loop that fills in the mapper memory area because the loop can be executed many times. Fixes: 48debafe4f2fe ("dm: add writecache target") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-writecache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 64d75bbefda1..67eb4e972cc3 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -279,6 +279,8 @@ static int persistent_memory_claim(struct dm_writecache *wc) while (daa-- && i < p) { pages[i++] = pfn_t_to_page(pfn); pfn.val++; + if (!(i & 15)) + cond_resched(); } } while (i < p); wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL); From ffd40b7962d463daa531a8110e5b708bcb5c6da7 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Fri, 21 Feb 2020 07:38:20 -0800 Subject: [PATCH 172/174] xfs: add agf freeblocks verify in xfs_agf_verify [ Upstream commit d0c7feaf87678371c2c09b3709400be416b2dc62 ] We recently used fuzz(hydra) to test XFS and automatically generate tmp.img(XFS v5 format, but some metadata is wrong) xfs_repair information(just one AG): agf_freeblks 0, counted 3224 in ag 0 agf_longest 536874136, counted 3224 in ag 0 sb_fdblocks 613, counted 3228 Test as follows: mount tmp.img tmpdir cp file1M tmpdir sync In 4.19-stable, sync will stuck, the reason is: xfs_mountfs xfs_check_summary_counts if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) || XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) && !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS)) return 0; -->just return, incore sb_fdblocks still be 613 xfs_initialize_perag_data cp file1M tmpdir -->ok(write file to pagecache) sync -->stuck(write pagecache to disk) xfs_map_blocks xfs_iomap_write_allocate while (count_fsb != 0) { nimaps = 0; while (nimaps == 0) { --> endless loop nimaps = 1; xfs_bmapi_write(..., &nimaps) --> nimaps becomes 0 again xfs_bmapi_write xfs_bmap_alloc xfs_bmap_btalloc xfs_alloc_vextent xfs_alloc_fix_freelist xfs_alloc_space_available -->fail(agf_freeblks is 0) In linux-next, sync not stuck, cause commit c2b3164320b5 ("xfs: use the latest extent at writeback delalloc conversion time") remove the above while, dmesg is as follows: [ 55.250114] XFS (loop0): page discard on page ffffea0008bc7380, inode 0x1b0c, offset 0. Users do not know why this page is discard, the better soultion is: 1. Like xfs_repair, make sure sb_fdblocks is equal to counted (xfs_initialize_perag_data did this, who is not called at this mount) 2. Add agf verify, if fail, will tell users to repair This patch use the second soultion. Signed-off-by: Zheng Bin Signed-off-by: Ren Xudong Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_alloc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 533b04aaf6f6..0a36f532cf86 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2598,6 +2598,13 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) return __this_address; + if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks) + return __this_address; + + if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) || + be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length)) + return __this_address; + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || @@ -2609,6 +2616,10 @@ xfs_agf_verify( be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) return __this_address; + if (xfs_sb_version_hasrmapbt(&mp->m_sb) && + be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length)) + return __this_address; + /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -2622,6 +2633,11 @@ xfs_agf_verify( be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) return __this_address; + if (xfs_sb_version_hasreflink(&mp->m_sb) && + be32_to_cpu(agf->agf_refcount_blocks) > + be32_to_cpu(agf->agf_length)) + return __this_address; + if (xfs_sb_version_hasreflink(&mp->m_sb) && (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) From a160afebd779bd2fdb0b21dd1680490b1e248d5d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 12 May 2020 10:22:44 +0200 Subject: [PATCH 173/174] Revert "tty: hvc: Fix data abort due to race in hvc_open" commit cf9c94456ebafc6d75a834e58dfdc8ae71a3acbc upstream. This reverts commit e2bd1dcbe1aa34ff5570b3427c530e4332ecf0fe. In discussion on the mailing list, it has been determined that this is not the correct type of fix for this issue. Revert it so that we can do this correctly. Reported-by: Jiri Slaby Reported-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200428032601.22127-1-rananta@codeaurora.org Cc: Raghavendra Rao Ananta Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_console.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index f8e43a6faea9..cdcc64ea2554 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -75,8 +75,6 @@ static LIST_HEAD(hvc_structs); */ static DEFINE_MUTEX(hvc_structs_mutex); -/* Mutex to serialize hvc_open */ -static DEFINE_MUTEX(hvc_open_mutex); /* * This value is used to assign a tty->index value to a hvc_struct based * upon order of exposure via hvc_probe(), when we can not match it to @@ -348,24 +346,16 @@ static int hvc_install(struct tty_driver *driver, struct tty_struct *tty) */ static int hvc_open(struct tty_struct *tty, struct file * filp) { - struct hvc_struct *hp; + struct hvc_struct *hp = tty->driver_data; unsigned long flags; int rc = 0; - mutex_lock(&hvc_open_mutex); - - hp = tty->driver_data; - if (!hp) { - rc = -EIO; - goto out; - } - spin_lock_irqsave(&hp->port.lock, flags); /* Check and then increment for fast path open. */ if (hp->port.count++ > 0) { spin_unlock_irqrestore(&hp->port.lock, flags); hvc_kick(); - goto out; + return 0; } /* else count == 0 */ spin_unlock_irqrestore(&hp->port.lock, flags); @@ -393,8 +383,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) /* Force wakeup of the polling thread */ hvc_kick(); -out: - mutex_unlock(&hvc_open_mutex); return rc; } From e75220890bf6b37c5f7b1dbd81d8292ed6d96643 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 30 Jun 2020 16:21:55 -0400 Subject: [PATCH 174/174] Linux 5.4.50 Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 72230ad23299..380e398b2995 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 49 +SUBLEVEL = 50 EXTRAVERSION = NAME = Kleptomaniac Octopus