From d8fe0af636c54074c293b8c553e05614420e91e3 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 18 Nov 2019 14:02:52 +0100 Subject: [PATCH 001/191] drm/mcde: dsi: Fix invalid pointer dereference if panel cannot be found [ Upstream commit c131280c03bd1c225c2e64e9ef75873ffca3d96e ] The "panel" pointer is not reset to NULL if of_drm_find_panel() returns an error. Therefore we later assume that a panel was found, and try to dereference the error pointer, resulting in: mcde-dsi a0351000.dsi: failed to find panel try bridge (4294966779) Unable to handle kernel paging request at virtual address fffffe03 PC is at drm_panel_bridge_add.part.0+0x10/0x5c LR is at mcde_dsi_bind+0x120/0x464 ... Reset "panel" to NULL to avoid this problem. Also change the format string of the error to %ld to print the negative errors correctly. The crash above then becomes: mcde-dsi a0351000.dsi: failed to find panel try bridge (-517) mcde-dsi a0351000.dsi: no panel or bridge ... Fixes: 5fc537bfd000 ("drm/mcde: Add new driver for ST-Ericsson MCDE") Signed-off-by: Stephan Gerhold Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20191118130252.170324-1-stephan@gerhold.net Signed-off-by: Sasha Levin --- drivers/gpu/drm/mcde/mcde_dsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c index f9c9e32b299c..35bb825d1918 100644 --- a/drivers/gpu/drm/mcde/mcde_dsi.c +++ b/drivers/gpu/drm/mcde/mcde_dsi.c @@ -935,11 +935,13 @@ static int mcde_dsi_bind(struct device *dev, struct device *master, for_each_available_child_of_node(dev->of_node, child) { panel = of_drm_find_panel(child); if (IS_ERR(panel)) { - dev_err(dev, "failed to find panel try bridge (%lu)\n", + dev_err(dev, "failed to find panel try bridge (%ld)\n", PTR_ERR(panel)); + panel = NULL; + bridge = of_drm_find_bridge(child); if (IS_ERR(bridge)) { - dev_err(dev, "failed to find bridge (%lu)\n", + dev_err(dev, "failed to find bridge (%ld)\n", PTR_ERR(bridge)); return PTR_ERR(bridge); } From 6b49a5a9eb46ffa3b07ad14fe62e117a09787cae Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 14 Nov 2019 15:15:26 -0800 Subject: [PATCH 002/191] nvme_fc: add module to ops template to allow module references [ Upstream commit 863fbae929c7a5b64e96b8a3ffb34a29eefb9f8f ] In nvme-fc: it's possible to have connected active controllers and as no references are taken on the LLDD, the LLDD can be unloaded. The controller would enter a reconnect state and as long as the LLDD resumed within the reconnect timeout, the controller would resume. But if a namespace on the controller is the root device, allowing the driver to unload can be problematic. To reload the driver, it may require new io to the boot device, and as it's no longer connected we get into a catch-22 that eventually fails, and the system locks up. Fix this issue by taking a module reference for every connected controller (which is what the core layer did to the transport module). Reference is cleared when the controller is removed. Acked-by: Himanshu Madhani Reviewed-by: Christoph Hellwig Signed-off-by: James Smart Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/fc.c | 14 ++++++++++++-- drivers/nvme/target/fcloop.c | 1 + drivers/scsi/lpfc/lpfc_nvme.c | 2 ++ drivers/scsi/qla2xxx/qla_nvme.c | 1 + include/linux/nvme-fc-driver.h | 4 ++++ 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 265f89e11d8b..3f102d9f39b8 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -342,7 +342,8 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, !template->ls_req || !template->fcp_io || !template->ls_abort || !template->fcp_abort || !template->max_hw_queues || !template->max_sgl_segments || - !template->max_dif_sgl_segments || !template->dma_boundary) { + !template->max_dif_sgl_segments || !template->dma_boundary || + !template->module) { ret = -EINVAL; goto out_reghost_failed; } @@ -2015,6 +2016,7 @@ nvme_fc_ctrl_free(struct kref *ref) { struct nvme_fc_ctrl *ctrl = container_of(ref, struct nvme_fc_ctrl, ref); + struct nvme_fc_lport *lport = ctrl->lport; unsigned long flags; if (ctrl->ctrl.tagset) { @@ -2041,6 +2043,7 @@ nvme_fc_ctrl_free(struct kref *ref) if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); + module_put(lport->ops->module); } static void @@ -3056,10 +3059,15 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } + if (!try_module_get(lport->ops->module)) { + ret = -EUNATCH; + goto out_free_ctrl; + } + idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; - goto out_free_ctrl; + goto out_mod_put; } ctrl->ctrl.opts = opts; @@ -3212,6 +3220,8 @@ out_free_queues: out_free_ida: put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); +out_mod_put: + module_put(lport->ops->module); out_free_ctrl: kfree(ctrl); out_fail: diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index b50b53db3746..1c50af6219f3 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -850,6 +850,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_DMABOUND_4G 0xFFFFFFFF static struct nvme_fc_port_template fctemplate = { + .module = THIS_MODULE, .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index a227e36cbdc2..8e0f03ef346b 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1976,6 +1976,8 @@ out_unlock: /* Declare and initialization an instance of the FC NVME template. */ static struct nvme_fc_port_template lpfc_nvme_template = { + .module = THIS_MODULE, + /* initiator-based functions */ .localport_delete = lpfc_nvme_localport_delete, .remoteport_delete = lpfc_nvme_remoteport_delete, diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 941aa53363f5..bfcd02fdf2b8 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -610,6 +610,7 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) } static struct nvme_fc_port_template qla_nvme_fc_transport = { + .module = THIS_MODULE, .localport_delete = qla_nvme_localport_delete, .remoteport_delete = qla_nvme_remoteport_delete, .create_queue = qla_nvme_alloc_queue, diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 10f81629b9ce..6d0d70f3219c 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -270,6 +270,8 @@ struct nvme_fc_remote_port { * * Host/Initiator Transport Entrypoints/Parameters: * + * @module: The LLDD module using the interface + * * @localport_delete: The LLDD initiates deletion of a localport via * nvme_fc_deregister_localport(). However, the teardown is * asynchronous. This routine is called upon the completion of the @@ -383,6 +385,8 @@ struct nvme_fc_remote_port { * Value is Mandatory. Allowed to be zero. */ struct nvme_fc_port_template { + struct module *module; + /* initiator-based functions */ void (*localport_delete)(struct nvme_fc_local_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *); From afde69ecda83083ac3267571e7d09ca63f36be28 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 21 Nov 2019 09:59:37 -0800 Subject: [PATCH 003/191] nvme-fc: fix double-free scenarios on hw queues [ Upstream commit c869e494ef8b5846d9ba91f1e922c23cd444f0c1 ] If an error occurs on one of the ios used for creating an association, the creating routine has error paths that are invoked by the command failure and the error paths will free up the controller resources created to that point. But... the io was ultimately determined by an asynchronous completion routine that detected the error and which unconditionally invokes the error_recovery path which calls delete_association. Delete association deletes all outstanding io then tears down the controller resources. So the create_association thread can be running in parallel with the error_recovery thread. What was seen was the LLDD received a call to delete a queue, causing the LLDD to do a free of a resource, then the transport called the delete queue again causing the driver to repeat the free call. The second free routine corrupted the allocator. The transport shouldn't be making the duplicate call, and the delete queue is just one of the resources being freed. To fix, it is realized that the create_association path is completely serialized with one command at a time. So the failed io completion will always be seen by the create_association path and as of the failure, there are no ios to terminate and there is no reason to be manipulating queue freeze states, etc. The serialized condition stays true until the controller is transitioned to the LIVE state. Thus the fix is to change the error recovery path to check the controller state and only invoke the teardown path if not already in the CONNECTING state. Reviewed-by: Himanshu Madhani Reviewed-by: Ewan D. Milne Signed-off-by: James Smart Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/fc.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 3f102d9f39b8..59474bd0c728 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2910,10 +2910,22 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) static void __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) { - nvme_stop_keep_alive(&ctrl->ctrl); + /* + * if state is connecting - the error occurred as part of a + * reconnect attempt. The create_association error paths will + * clean up any outstanding io. + * + * if it's a different state - ensure all pending io is + * terminated. Given this can delay while waiting for the + * aborted io to return, we recheck adapter state below + * before changing state. + */ + if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { + nvme_stop_keep_alive(&ctrl->ctrl); - /* will block will waiting for io to terminate */ - nvme_fc_delete_association(ctrl); + /* will block will waiting for io to terminate */ + nvme_fc_delete_association(ctrl); + } if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) From 731a0f0d0d50ae2b8a8e4ffe65923c05e616cd4f Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Wed, 4 Dec 2019 15:51:16 +0800 Subject: [PATCH 004/191] drm/amdgpu: add check before enabling/disabling broadcast mode [ Upstream commit 6e807535dae5dbbd53bcc5e81047a20bf5eb08ea ] When security violation from new vbios happens, data fabric is risky to stop working. So prevent the direct access to DF mmFabricConfigAccessControl from the new vbios and onwards. Signed-off-by: Guchun Chen Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 36 ++++++++++++++++------------ 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 5850c8e34caa..97d11d792351 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -261,23 +261,29 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, { u32 tmp; - /* Put DF on broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, true); + if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) { + /* Put DF on broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, true); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { - tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); - tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY; - WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); - } else { - tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); - tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - tmp |= DF_V3_6_MGCG_DISABLE; - WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + if (enable) { + tmp = RREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY; + WREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } else { + tmp = RREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_DISABLE; + WREG32_SOC15(DF, 0, + mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } + + /* Exit broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, false); } - - /* Exit broadcast mode */ - adev->df_funcs->enable_broadcast_mode(adev, false); } static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, From b8caa4267a222d3c46289662253b66d15e603627 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Dec 2019 22:07:49 -0500 Subject: [PATCH 005/191] drm/amdgpu: add header line for power profile on Arcturus [ Upstream commit 14891c316ca7e15d81dba78f30fb630e3f9ee2c9 ] So the output is consistent with other asics. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index d493a3f8c07a..b68bf8dcfa78 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1388,12 +1388,17 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, "VR", "COMPUTE", "CUSTOM"}; + static const char *title[] = { + "PROFILE_INDEX(NAME)"}; uint32_t i, size = 0; int16_t workload_type = 0; if (!smu->pm_enabled || !buf) return -EINVAL; + size += sprintf(buf + size, "%16s\n", + title[0]); + for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) { /* * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT From 361d1d13fd2b41a785ba7c688f85778d62a8b115 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Thu, 28 Nov 2019 12:08:58 +0100 Subject: [PATCH 006/191] drm/amdgpu: add cache flush workaround to gfx8 emit_fence [ Upstream commit bf26da927a1cd57c9deb2db29ae8cf276ba8b17b ] The same workaround is used for gfx7. Both PAL and Mesa use it for gfx8 too, so port this commit to gfx_v8_0_ring_emit_fence_gfx. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 87dd55e9d72b..cc88ba76a8d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6184,7 +6184,23 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; - /* EVENT_WRITE_EOP - flush caches, send int */ + /* Workaround for cache flush problems. First send a dummy EOP + * event down the pipe with seq one below. + */ + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5))); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | + DATA_SEL(1) | INT_SEL(0)); + amdgpu_ring_write(ring, lower_32_bits(seq - 1)); + amdgpu_ring_write(ring, upper_32_bits(seq - 1)); + + /* Then send the real EOP event down the pipe: + * EVENT_WRITE_EOP - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | @@ -6926,7 +6942,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 5 + /* COND_EXEC */ 7 + /* PIPELINE_SYNC */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ - 8 + /* FENCE for VM_FLUSH */ + 12 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, the first COND_EXEC jump to the place just @@ -6938,7 +6954,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ - 8 + 8 + /* FENCE x2 */ + 12 + 12 + /* FENCE x2 */ 2, /* SWITCH_BUFFER */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, From 225a3070cf4dfd16686dd2f2eca7512c4e758ca2 Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Thu, 7 Nov 2019 13:06:48 -0500 Subject: [PATCH 007/191] drm/amd/display: Map DSC resources 1-to-1 if numbers of OPPs and DSCs are equal [ Upstream commit a1fc44b609b4e9c0941f0e4a1fc69d367af5ab69 ] [why] On ASICs where number of DSCs is the same as OPPs there's no need for DSC resource management. Mappping 1-to-1 fixes mode-set- or S3- -related issues for such platforms. [how] Map DSC resources 1-to-1 to pipes only if number of OPPs is the same as number of DSCs. This will still keep other ASICs working. A follow-up patch to fix mode-set issues on those ASICs will be required if testing shows issues with mode set. Signed-off-by: Nikola Cornij Reviewed-by: Dmytro Laktyushkin Acked-by: Leo Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 78b2cc2e122f..3b7769a3e67e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1419,13 +1419,20 @@ enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state static void acquire_dsc(struct resource_context *res_ctx, const struct resource_pool *pool, - struct display_stream_compressor **dsc) + struct display_stream_compressor **dsc, + int pipe_idx) { int i; ASSERT(*dsc == NULL); *dsc = NULL; + if (pool->res_cap->num_dsc == pool->res_cap->num_opp) { + *dsc = pool->dscs[pipe_idx]; + res_ctx->is_dsc_acquired[pipe_idx] = true; + return; + } + /* Find first free DSC */ for (i = 0; i < pool->res_cap->num_dsc; i++) if (!res_ctx->is_dsc_acquired[i]) { @@ -1468,7 +1475,7 @@ static enum dc_status add_dsc_to_stream_resource(struct dc *dc, if (pipe_ctx->stream != dc_stream) continue; - acquire_dsc(&dc_ctx->res_ctx, pool, &pipe_ctx->stream_res.dsc); + acquire_dsc(&dc_ctx->res_ctx, pool, &pipe_ctx->stream_res.dsc, i); /* The number of DSCs can be less than the number of pipes */ if (!pipe_ctx->stream_res.dsc) { @@ -1669,7 +1676,7 @@ static bool dcn20_split_stream_for_odm( next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx]; #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT if (next_odm_pipe->stream->timing.flags.DSC == 1) { - acquire_dsc(res_ctx, pool, &next_odm_pipe->stream_res.dsc); + acquire_dsc(res_ctx, pool, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx); ASSERT(next_odm_pipe->stream_res.dsc); if (next_odm_pipe->stream_res.dsc == NULL) return false; From b3abd3c9d993be875d205d25a904468c31f7e81a Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Thu, 7 Nov 2019 17:18:20 -0500 Subject: [PATCH 008/191] drm/amd/display: Fixed kernel panic when booting with DP-to-HDMI dongle [ Upstream commit a51d9f8fe756beac51ce26ef54195da00a260d13 ] [Why] In dc_link_is_dp_sink_present, if dal_ddc_open fails, then dal_gpio_destroy_ddc is called, destroying pin_data and pin_clock. They are created only on dc_construct, and next aux access will cause a panic. [How] Instead of calling dal_gpio_destroy_ddc, call dal_ddc_close. Signed-off-by: David Galiffi Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 067f5579f452..793aa8e8ec9a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -373,7 +373,7 @@ bool dc_link_is_dp_sink_present(struct dc_link *link) if (GPIO_RESULT_OK != dal_ddc_open( ddc, GPIO_MODE_INPUT, GPIO_DDC_CONFIG_TYPE_MODE_I2C)) { - dal_gpio_destroy_ddc(&ddc); + dal_ddc_close(ddc); return present; } From d950e9dc11dd38a64d91bdc57ae051ffdb907980 Mon Sep 17 00:00:00 2001 From: "Leo (Hanghong) Ma" Date: Thu, 7 Nov 2019 16:30:04 -0500 Subject: [PATCH 009/191] drm/amd/display: Change the delay time before enabling FEC [ Upstream commit 28fa24ad14e8f7d23c62283eaf9c79b4fd165c16 ] [why] DP spec requires 1000 symbols delay between the end of link training and enabling FEC in the stream. Currently we are using 1 miliseconds delay which is not accurate. [how] One lane RBR should have the maximum time for transmitting 1000 LL codes which is 6.173 us. So using 7 microseconds delay instead of 1 miliseconds. Signed-off-by: Leo (Hanghong) Ma Reviewed-by: Harry Wentland Reviewed-by: Nikola Cornij Acked-by: Leo Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 5a583707d198..0ab890c927ec 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3492,7 +3492,14 @@ void dp_set_fec_enable(struct dc_link *link, bool enable) if (link_enc->funcs->fec_set_enable && link->dpcd_caps.fec_cap.bits.FEC_CAPABLE) { if (link->fec_state == dc_link_fec_ready && enable) { - msleep(1); + /* Accord to DP spec, FEC enable sequence can first + * be transmitted anytime after 1000 LL codes have + * been transmitted on the link after link training + * completion. Using 1 lane RBR should have the maximum + * time for transmitting 1000 LL codes which is 6.173 us. + * So use 7 microseconds delay instead. + */ + udelay(7); link_enc->funcs->fec_set_enable(link_enc, true); link->fec_state = dc_link_fec_enabled; } else if (link->fec_state == dc_link_fec_enabled && !enable) { From baa4cd587a100318d0657eab94b87cb46128aabd Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Mon, 11 Nov 2019 18:03:59 -0500 Subject: [PATCH 010/191] drm/amd/display: Reset steer fifo before unblanking the stream [ Upstream commit 87de6cb2f28153bc74d0a001ca099c29453e145f ] [why] During mode transition steer fifo could overflow. Quite often it recovers by itself, but sometimes it doesn't. [how] Add steer fifo reset before unblanking the stream. Also add a short delay when resetting dig resync fifo to make sure register writes don't end up back-to-back, in which case the HW might miss the reset request. Signed-off-by: Nikola Cornij Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../drm/amd/display/dc/dcn20/dcn20_stream_encoder.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c index 5ab9d6240498..e95025b1d14d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c @@ -492,15 +492,23 @@ void enc2_stream_encoder_dp_unblank( DP_VID_N_MUL, n_multiply); } - /* set DIG_START to 0x1 to reset FIFO */ + /* make sure stream is disabled before resetting steer fifo */ + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false); + REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000); + /* set DIG_START to 0x1 to reset FIFO */ REG_UPDATE(DIG_FE_CNTL, DIG_START, 1); + udelay(1); /* write 0 to take the FIFO out of reset */ REG_UPDATE(DIG_FE_CNTL, DIG_START, 0); - /* switch DP encoder to CRTC data */ + /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen + * that it overflows during mode transition, and sometimes doesn't recover. + */ + REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1); + udelay(10); REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0); From 7376dbde76af6c3ca7e905b56f0cef1e395a6ad1 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Fri, 15 Nov 2019 12:04:25 -0500 Subject: [PATCH 011/191] drm/amd/display: update dispclk and dppclk vco frequency [ Upstream commit 44ce6c3dc8479bb3ed68df13b502b0901675e7d6 ] Value obtained from DV is not allowing 8k60 CTA mode with DSC to pass, after checking real value being used in hw, find out that correct value is 3600, which will allow that mode. Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index de182185fe1f..b0e5e64df212 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -258,7 +258,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .vmm_page_size_bytes = 4096, .dram_clock_change_latency_us = 23.84, .return_bus_width_bytes = 64, - .dispclk_dppclk_vco_speed_mhz = 3550, + .dispclk_dppclk_vco_speed_mhz = 3600, .xfc_bus_transport_time_us = 4, .xfc_xbuf_latency_tolerance_us = 4, .use_urgent_burst_bw = 1, From 29cfb7940e63b314f4842a3d12e219c056dccacf Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Sat, 7 Dec 2019 01:51:54 +0900 Subject: [PATCH 012/191] nvme/pci: Fix write and poll queue types [ Upstream commit 3f68baf706ec68c4120867c25bc439c845fe3e17 ] The number of poll or write queues should never be negative. Use unsigned types so that it's not possible to break have the driver not allocate any queues. Reviewed-by: Jens Axboe Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 869f462e6b6e..29d7427c2b19 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -68,14 +68,14 @@ static int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); -static int write_queues; -module_param(write_queues, int, 0644); +static unsigned int write_queues; +module_param(write_queues, uint, 0644); MODULE_PARM_DESC(write_queues, "Number of queues to use for writes. If not set, reads and writes " "will share a queue set."); -static int poll_queues; -module_param(poll_queues, int, 0644); +static unsigned int poll_queues; +module_param(poll_queues, uint, 0644); MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); struct nvme_dev; From 7a6cec43ba680a20d6ec94d35dee625be3d486c8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 6 Dec 2019 08:11:17 +0900 Subject: [PATCH 013/191] nvme/pci: Fix read queue count [ Upstream commit 7e4c6b9a5d22485acf009b3c3510a370f096dd54 ] If nvme.write_queues equals the number of CPUs, the driver had decreased the number of interrupts available such that there could only be one read queue even if the controller could support more. Remove the interrupt count reduction in this case. The driver wouldn't request more IRQs than it wants queues anyway. Reviewed-by: Jens Axboe Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 29d7427c2b19..14d513087a14 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2060,7 +2060,6 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) .priv = dev, }; unsigned int irq_queues, this_p_queues; - unsigned int nr_cpus = num_possible_cpus(); /* * Poll queues don't need interrupts, but we need at least one IO @@ -2071,10 +2070,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) this_p_queues = nr_io_queues - 1; irq_queues = 1; } else { - if (nr_cpus < nr_io_queues - this_p_queues) - irq_queues = nr_cpus + 1; - else - irq_queues = nr_io_queues - this_p_queues + 1; + irq_queues = nr_io_queues - this_p_queues + 1; } dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; From f7a667db91266dba9c868618e8020aa19daf0b7d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 11 Nov 2019 11:21:15 +0800 Subject: [PATCH 014/191] iio: st_accel: Fix unused variable warning [ Upstream commit 0163c1c521ff8b09cd8ca395003cc00178161d77 ] drivers/iio/accel/st_accel_core.c:1005:44: warning: mount_matrix_ext_info defined but not used [-Wunused-const-variable=] Using stub helper while CONFIG_ACPI is disabled to fix it. Suggested-by: Ladislav Michl Signed-off-by: YueHaibing Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/accel/st_accel_core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 2e37f8a6d8cf..be661396095c 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -993,6 +993,7 @@ static const struct iio_trigger_ops st_accel_trigger_ops = { #define ST_ACCEL_TRIGGER_OPS NULL #endif +#ifdef CONFIG_ACPI static const struct iio_mount_matrix * get_mount_matrix(const struct iio_dev *indio_dev, const struct iio_chan_spec *chan) @@ -1013,7 +1014,6 @@ static const struct iio_chan_spec_ext_info mount_matrix_ext_info[] = { static int apply_acpi_orientation(struct iio_dev *indio_dev, struct iio_chan_spec *channels) { -#ifdef CONFIG_ACPI struct st_sensor_data *adata = iio_priv(indio_dev); struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; struct acpi_device *adev; @@ -1141,10 +1141,14 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev, out: kfree(buffer.pointer); return ret; -#else /* !CONFIG_ACPI */ - return 0; -#endif } +#else /* !CONFIG_ACPI */ +static int apply_acpi_orientation(struct iio_dev *indio_dev, + struct iio_chan_spec *channels) +{ + return 0; +} +#endif /* * st_accel_get_settings() - get sensor settings from device name From 8db2bb9681b5c426b0fe2d3fde505ba779cf00aa Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 2 Dec 2019 09:55:46 +0100 Subject: [PATCH 015/191] iio: adc: max9611: Fix too short conversion time delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9fd229c478fbf77c41c8528aa757ef14210365f6 ] As of commit b9ddd5091160793e ("iio: adc: max9611: Fix temperature reading in probe"), max9611 initialization sometimes fails on the Salvator-X(S) development board with: max9611 4-007f: Invalid value received from ADC 0x8000: aborting max9611: probe of 4-007f failed with error -5 The max9611 driver tests communications with the chip by reading the die temperature during the probe function, which returns an invalid value. According to the datasheet, the typical ADC conversion time is 2 ms, but no minimum or maximum values are provided. Maxim Technical Support confirmed this was tested with temperature Ta=25 degreeC, and promised to inform me if a maximum/minimum value is available (they didn't get back to me, so I assume it is not). However, the driver assumes a 1 ms conversion time. Usually the usleep_range() call returns after more than 1.8 ms, hence it succeeds. When it returns earlier, the data register may be read too early, and the previous measurement value will be returned. After boot, this is the temperature POR (power-on reset) value, causing the failure above. Fix this by increasing the delay from 1000-2000 µs to 3000-3300 µs. Note that this issue has always been present, but it was exposed by the aformentioned commit. Fixes: 69780a3bbc0b1e7e ("iio: adc: Add Maxim max9611 ADC driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Jacopo Mondi Reviewed-by: Wolfram Sang Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/max9611.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c index da073d72f649..e480529b3f04 100644 --- a/drivers/iio/adc/max9611.c +++ b/drivers/iio/adc/max9611.c @@ -89,6 +89,12 @@ #define MAX9611_TEMP_SCALE_NUM 1000000 #define MAX9611_TEMP_SCALE_DIV 2083 +/* + * Conversion time is 2 ms (typically) at Ta=25 degreeC + * No maximum value is known, so play it safe. + */ +#define MAX9611_CONV_TIME_US_RANGE 3000, 3300 + struct max9611_dev { struct device *dev; struct i2c_client *i2c_client; @@ -236,11 +242,9 @@ static int max9611_read_single(struct max9611_dev *max9611, return ret; } - /* - * need a delay here to make register configuration - * stabilize. 1 msec at least, from empirical testing. - */ - usleep_range(1000, 2000); + /* need a delay here to make register configuration stabilize. */ + + usleep_range(MAX9611_CONV_TIME_US_RANGE); ret = i2c_smbus_read_word_swapped(max9611->i2c_client, reg_addr); if (ret < 0) { @@ -507,7 +511,7 @@ static int max9611_init(struct max9611_dev *max9611) MAX9611_REG_CTRL2, 0); return ret; } - usleep_range(1000, 2000); + usleep_range(MAX9611_CONV_TIME_US_RANGE); return 0; } From eae25dfc30249cf540a045365587a32e6bc9ca6a Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 31 Oct 2019 23:34:18 +0200 Subject: [PATCH 016/191] PM / devfreq: Fix devfreq_notifier_call returning errno [ Upstream commit e876e710ede23f670494331e062d643928e4142a ] Notifier callbacks shouldn't return negative errno but one of the NOTIFY_OK/DONE/BAD values. The OPP core will ignore return values from notifiers but returning a value that matches NOTIFY_STOP_MASK will stop the notification chain. Fix by always returning NOTIFY_OK. Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Reviewed-by: Chanwoo Choi Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/devfreq/devfreq.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 3a1484e7a3ae..e5c2afdc7b7f 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -551,26 +551,28 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type, void *devp) { struct devfreq *devfreq = container_of(nb, struct devfreq, nb); - int ret; + int err = -EINVAL; mutex_lock(&devfreq->lock); devfreq->scaling_min_freq = find_available_min_freq(devfreq); - if (!devfreq->scaling_min_freq) { - mutex_unlock(&devfreq->lock); - return -EINVAL; - } + if (!devfreq->scaling_min_freq) + goto out; devfreq->scaling_max_freq = find_available_max_freq(devfreq); - if (!devfreq->scaling_max_freq) { - mutex_unlock(&devfreq->lock); - return -EINVAL; - } + if (!devfreq->scaling_max_freq) + goto out; - ret = update_devfreq(devfreq); + err = update_devfreq(devfreq); + +out: mutex_unlock(&devfreq->lock); + if (err) + dev_err(devfreq->dev.parent, + "failed to update frequency from OPP notifier (%d)\n", + err); - return ret; + return NOTIFY_OK; } /** From 43894f3ae380f18e206d2dd496633fbe5f0a6806 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 31 Oct 2019 23:34:19 +0200 Subject: [PATCH 017/191] PM / devfreq: Set scaling_max_freq to max on OPP notifier error [ Upstream commit e7cc792d00049c874010b398a27c3cc7bc8fef34 ] The devfreq_notifier_call functions will update scaling_min_freq and scaling_max_freq when the OPP table is updated. If fetching the maximum frequency fails then scaling_max_freq remains set to zero which is confusing. Set to ULONG_MAX instead so we don't need special handling for this case in other places. Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Reviewed-by: Chanwoo Choi Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/devfreq/devfreq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index e5c2afdc7b7f..e185c8846916 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -560,8 +560,10 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type, goto out; devfreq->scaling_max_freq = find_available_max_freq(devfreq); - if (!devfreq->scaling_max_freq) + if (!devfreq->scaling_max_freq) { + devfreq->scaling_max_freq = ULONG_MAX; goto out; + } err = update_devfreq(devfreq); From 33191a1bd6327e20b074dae2d9f997543092f485 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 14 Nov 2019 01:21:31 +0200 Subject: [PATCH 018/191] PM / devfreq: Don't fail devfreq_dev_release if not in list [ Upstream commit 42a6b25e67df6ee6675e8d1eaf18065bd73328ba ] Right now devfreq_dev_release will print a warning and abort the rest of the cleanup if the devfreq instance is not part of the global devfreq_list. But this is a valid scenario, for example it can happen if the governor can't be found or on any other init error that happens after device_register. Initialize devfreq->node to an empty list head in devfreq_add_device so that list_del becomes a safe noop inside devfreq_dev_release and we can continue the rest of the cleanup. Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Reviewed-by: Chanwoo Choi Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/devfreq/devfreq.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index e185c8846916..ffd2d6b44dfb 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -588,11 +588,6 @@ static void devfreq_dev_release(struct device *dev) struct devfreq *devfreq = to_devfreq(dev); mutex_lock(&devfreq_list_lock); - if (IS_ERR(find_device_devfreq(devfreq->dev.parent))) { - mutex_unlock(&devfreq_list_lock); - dev_warn(&devfreq->dev, "releasing devfreq which doesn't exist\n"); - return; - } list_del(&devfreq->node); mutex_unlock(&devfreq_list_lock); @@ -647,6 +642,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->dev.parent = dev; devfreq->dev.class = devfreq_class; devfreq->dev.release = devfreq_dev_release; + INIT_LIST_HEAD(&devfreq->node); devfreq->profile = profile; strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN); devfreq->previous_freq = profile->initial_freq; From 79ce91d278498c3445a8d7007fa4dca82148edce Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Mon, 9 Dec 2019 15:04:43 +0000 Subject: [PATCH 019/191] afs: Fix afs_find_server lookups for ipv4 peers [ Upstream commit 9bd0160d12370a076e44f8d1320cde9c83f2c647 ] afs_find_server tries to find a server that has an address that matches the transport address of an rxrpc peer. The code assumes that the transport address is always ipv6, with ipv4 represented as ipv4 mapped addresses, but that's not the case. If the transport family is AF_INET, srx->transport.sin6.sin6_addr.s6_addr32[] will be beyond the actual ipv4 address and will always be 0, and all ipv4 addresses will be seen as matching. As a result, the first ipv4 address seen on any server will be considered a match, and the server returned may be the wrong one. One of the consequences is that callbacks received over ipv4 will only be correctly applied for the server that happens to have the first ipv4 address on the fs_addresses4 list. Callbacks over ipv4 from all other servers are dropped, causing the client to serve stale data. This is fixed by looking at the transport family, and comparing ipv4 addresses based on a sockaddr_in structure rather than a sockaddr_in6. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/afs/server.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/afs/server.c b/fs/afs/server.c index 64d440aaabc0..ca8115ba1724 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -32,18 +32,11 @@ static void afs_dec_servers_outstanding(struct afs_net *net) struct afs_server *afs_find_server(struct afs_net *net, const struct sockaddr_rxrpc *srx) { - const struct sockaddr_in6 *a = &srx->transport.sin6, *b; const struct afs_addr_list *alist; struct afs_server *server = NULL; unsigned int i; - bool ipv6 = true; int seq = 0, diff; - if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 || - srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 || - srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff)) - ipv6 = false; - rcu_read_lock(); do { @@ -52,7 +45,8 @@ struct afs_server *afs_find_server(struct afs_net *net, server = NULL; read_seqbegin_or_lock(&net->fs_addr_lock, &seq); - if (ipv6) { + if (srx->transport.family == AF_INET6) { + const struct sockaddr_in6 *a = &srx->transport.sin6, *b; hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { alist = rcu_dereference(server->addresses); for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { @@ -68,15 +62,16 @@ struct afs_server *afs_find_server(struct afs_net *net, } } } else { + const struct sockaddr_in *a = &srx->transport.sin, *b; hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { alist = rcu_dereference(server->addresses); for (i = 0; i < alist->nr_ipv4; i++) { - b = &alist->addrs[i].transport.sin6; - diff = ((u16 __force)a->sin6_port - - (u16 __force)b->sin6_port); + b = &alist->addrs[i].transport.sin; + diff = ((u16 __force)a->sin_port - + (u16 __force)b->sin_port); if (diff == 0) - diff = ((u32 __force)a->sin6_addr.s6_addr32[3] - - (u32 __force)b->sin6_addr.s6_addr32[3]); + diff = ((u32 __force)a->sin_addr.s_addr - + (u32 __force)b->sin_addr.s_addr); if (diff == 0) goto found; } From ec81b123abbf31bb48070e336082de2d30074c82 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 9 Dec 2019 15:04:45 +0000 Subject: [PATCH 020/191] afs: Fix SELinux setting security label on /afs [ Upstream commit bcbccaf2edcf1b76f73f890e968babef446151a4 ] Make the AFS dynamic root superblock R/W so that SELinux can set the security label on it. Without this, upgrades to, say, the Fedora filesystem-afs RPM fail if afs is mounted on it because the SELinux label can't be (re-)applied. It might be better to make it possible to bypass the R/O check for LSM label application through setxattr. Fixes: 4d673da14533 ("afs: Support the AFS dynamic root") Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: selinux@vger.kernel.org cc: linux-security-module@vger.kernel.org Signed-off-by: Sasha Levin --- fs/afs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/afs/super.c b/fs/afs/super.c index 488641b1a418..d9a6036b70b9 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -448,7 +448,6 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) /* allocate the root inode and dentry */ if (as->dyn_root) { inode = afs_iget_pseudo_dir(sb, true); - sb->s_flags |= SB_RDONLY; } else { sprintf(sb->s_id, "%llu", as->volume->vid); afs_activate_volume(as->volume); From 438e26506d39bbd71604e16ebab92ac718aff5a4 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 6 Dec 2019 09:24:26 +0800 Subject: [PATCH 021/191] RDMA/cma: add missed unregister_pernet_subsys in init failure [ Upstream commit 44a7b6759000ac51b92715579a7bba9e3f9245c2 ] The driver forgets to call unregister_pernet_subsys() in the error path of cma_init(). Add the missed call to fix it. Fixes: 4be74b42a6d0 ("IB/cma: Separate port allocation to network namespaces") Signed-off-by: Chuhong Yuan Reviewed-by: Parav Pandit Link: https://lore.kernel.org/r/20191206012426.12744-1-hslester96@gmail.com Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d78f67623f24..50052e9a1731 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4736,6 +4736,7 @@ err_ib: err: unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); err_wq: destroy_workqueue(cma_wq); return ret; From aff98343bd9a59dc64b9335197d58152636badb8 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 2 Dec 2019 20:03:20 -0600 Subject: [PATCH 022/191] rxe: correctly calculate iCRC for unaligned payloads [ Upstream commit 2030abddec6884aaf5892f5724c48fc340e6826f ] If RoCE PDUs being sent or received contain pad bytes, then the iCRC is miscalculated, resulting in PDUs being emitted by RXE with an incorrect iCRC, as well as ingress PDUs being dropped due to erroneously detecting a bad iCRC in the PDU. The fix is to include the pad bytes, if any, in iCRC computations. Note: This bug has caused broken on-the-wire compatibility with actual hardware RoCE devices since the soft-RoCE driver was first put into the mainstream kernel. Fixing it will create an incompatibility with the original soft-RoCE devices, but is necessary to be compatible with real hardware devices. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Steve Wise Link: https://lore.kernel.org/r/20191203020319.15036-2-larrystevenwise@gmail.com Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_recv.c | 2 +- drivers/infiniband/sw/rxe/rxe_req.c | 6 ++++++ drivers/infiniband/sw/rxe/rxe_resp.c | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index f9a492ed900b..831ad578a7b2 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -389,7 +389,7 @@ void rxe_rcv(struct sk_buff *skb) calc_icrc = rxe_icrc_hdr(pkt, skb); calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt), - payload_size(pkt)); + payload_size(pkt) + bth_pad(pkt)); calc_icrc = (__force u32)cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { if (skb->protocol == htons(ETH_P_IPV6)) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index c5d9b558fa90..e5031172c019 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -500,6 +500,12 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (err) return err; } + if (bth_pad(pkt)) { + u8 *pad = payload_addr(pkt) + paylen; + + memset(pad, 0, bth_pad(pkt)); + crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt)); + } } p = payload_addr(pkt) + paylen + bth_pad(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 1cbfbd98eb22..c4a8195bf670 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -732,6 +732,13 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (err) pr_err("Failed copying memory\n"); + if (bth_pad(&ack_pkt)) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u8 *pad = payload_addr(&ack_pkt) + payload; + + memset(pad, 0, bth_pad(&ack_pkt)); + icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt)); + } p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); *p = ~icrc; From f327fb083c6836c8bbad077305bb74b848d901db Mon Sep 17 00:00:00 2001 From: Bo Wu Date: Sat, 7 Dec 2019 03:22:46 +0000 Subject: [PATCH 023/191] scsi: lpfc: Fix memory leak on lpfc_bsg_write_ebuf_set func [ Upstream commit 9a1b0b9a6dab452fb0e39fe96880c4faf3878369 ] When phba->mbox_ext_buf_ctx.seqNum != phba->mbox_ext_buf_ctx.numBuf, dd_data should be freed before return SLI_CONFIG_HANDLED. When lpfc_sli_issue_mbox func return fails, pmboxq should be also freed in job_error tag. Link: https://lore.kernel.org/r/EDBAAA0BBBA2AC4E9C8B6B81DEEE1D6915E7A966@DGGEML525-MBS.china.huawei.com Signed-off-by: Bo Wu Reviewed-by: Zhiqiang Liu Reviewed-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_bsg.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 39a736b887b1..6c2b03415a2c 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -4489,12 +4489,6 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, phba->mbox_ext_buf_ctx.seqNum++; nemb_tp = phba->mbox_ext_buf_ctx.nembType; - dd_data = kmalloc(sizeof(struct bsg_job_data), GFP_KERNEL); - if (!dd_data) { - rc = -ENOMEM; - goto job_error; - } - pbuf = (uint8_t *)dmabuf->virt; size = job->request_payload.payload_len; sg_copy_to_buffer(job->request_payload.sg_list, @@ -4531,6 +4525,13 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, "2968 SLI_CONFIG ext-buffer wr all %d " "ebuffers received\n", phba->mbox_ext_buf_ctx.numBuf); + + dd_data = kmalloc(sizeof(struct bsg_job_data), GFP_KERNEL); + if (!dd_data) { + rc = -ENOMEM; + goto job_error; + } + /* mailbox command structure for base driver */ pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!pmboxq) { @@ -4579,6 +4580,8 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, return SLI_CONFIG_HANDLED; job_error: + if (pmboxq) + mempool_free(pmboxq, phba->mbox_mem_pool); lpfc_bsg_dma_page_free(phba, dmabuf); kfree(dd_data); From 99a1bdf6ae39501f5678c29bff6cb80906227519 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Mon, 25 Nov 2019 19:56:51 +0300 Subject: [PATCH 024/191] scsi: qla2xxx: Use explicit LOGO in target mode [ Upstream commit 86196a8fa8a84af1395a28ea0548f2ce6ae9bc22 ] Target makes implicit LOGO on session teardown. LOGO ELS is not send on the wire and initiator is not aware that target no longer wants talking to it. Initiator keeps sending I/O requests, target responds with BA_RJT, they time out and then initiator sends ABORT TASK (ABTS-LS). Current behaviour incurs unneeded I/O timeout and can be fixed for some initiators by making explicit LOGO on session deletion. Link: https://lore.kernel.org/r/20191125165702.1013-3-r.bolshakov@yadro.com Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_def.h | 1 + drivers/scsi/qla2xxx/qla_iocb.c | 16 ++++++++++++---- drivers/scsi/qla2xxx/qla_target.c | 1 + drivers/scsi/qla2xxx/tcm_qla2xxx.c | 1 + 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index d5386edddaf6..1eb3fe281cc3 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2401,6 +2401,7 @@ typedef struct fc_port { unsigned int id_changed:1; unsigned int scan_needed:1; unsigned int n2n_flag:1; + unsigned int explicit_logout:1; struct completion nvme_del_done; uint32_t nvme_prli_service_param; diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 518eb954cf42..44dc97cebb06 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2405,11 +2405,19 @@ qla2x00_login_iocb(srb_t *sp, struct mbx_entry *mbx) static void qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio) { + u16 control_flags = LCF_COMMAND_LOGO; logio->entry_type = LOGINOUT_PORT_IOCB_TYPE; - logio->control_flags = - cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO); - if (!sp->fcport->keep_nport_handle) - logio->control_flags |= cpu_to_le16(LCF_FREE_NPORT); + + if (sp->fcport->explicit_logout) { + control_flags |= LCF_EXPL_LOGO|LCF_FREE_NPORT; + } else { + control_flags |= LCF_IMPL_LOGO; + + if (!sp->fcport->keep_nport_handle) + control_flags |= LCF_FREE_NPORT; + } + + logio->control_flags = cpu_to_le16(control_flags); logio->nport_handle = cpu_to_le16(sp->fcport->loop_id); logio->port_id[0] = sp->fcport->d_id.b.al_pa; logio->port_id[1] = sp->fcport->d_id.b.area; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index a9bd0f513316..950764ed4ab2 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1104,6 +1104,7 @@ void qlt_free_session_done(struct work_struct *work) } } + sess->explicit_logout = 0; spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); sess->free_pending = 0; diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index bab2073c1f72..abe7f79bb789 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -350,6 +350,7 @@ static void tcm_qla2xxx_close_session(struct se_session *se_sess) target_sess_cmd_list_set_waiting(se_sess); spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); + sess->explicit_logout = 1; tcm_qla2xxx_put_sess(sess); } From 8ad67d0ec1442cf643ba9fe34cf081a0fab5284d Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 25 Nov 2019 19:56:53 +0300 Subject: [PATCH 025/191] scsi: qla2xxx: Drop superfluous INIT_WORK of del_work [ Upstream commit 600954e6f2df695434887dfc6a99a098859990cf ] del_work is already initialized inside qla2x00_alloc_fcport, there's no need to overwrite it. Indeed, it might prevent complete traversal of workqueue list. Fixes: a01c77d2cbc45 ("scsi: qla2xxx: Move session delete to driver work queue") Cc: Quinn Tran Link: https://lore.kernel.org/r/20191125165702.1013-5-r.bolshakov@yadro.com Acked-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 950764ed4ab2..18522ac79d9e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1265,7 +1265,6 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) "Scheduling sess %p for deletion %8phC\n", sess, sess->port_name); - INIT_WORK(&sess->del_work, qla24xx_delete_sess_fn); WARN_ON(!queue_work(sess->vha->hw->wq, &sess->del_work)); } From 7e56964da84f9f5100d148f050df2d538aa3b57c Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 25 Nov 2019 19:56:56 +0300 Subject: [PATCH 026/191] scsi: qla2xxx: Don't call qlt_async_event twice [ Upstream commit 2c2f4bed9b6299e6430a65a29b5d27b8763fdf25 ] MBA_PORT_UPDATE generates duplicate log lines in target mode because qlt_async_event is called twice. Drop the calls within the case as the function will be called right after the switch statement. Cc: Quinn Tran Link: https://lore.kernel.org/r/20191125165702.1013-8-r.bolshakov@yadro.com Acked-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Acked-by: Himanshu Madhani Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_isr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 9204e8467a4e..b3766b1879e3 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1061,8 +1061,6 @@ global_port_update: ql_dbg(ql_dbg_async, vha, 0x5011, "Asynchronous PORT UPDATE ignored %04x/%04x/%04x.\n", mb[1], mb[2], mb[3]); - - qlt_async_event(mb[0], vha, mb); break; } @@ -1079,8 +1077,6 @@ global_port_update: set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); set_bit(VP_CONFIG_OK, &vha->vp_flags); - - qlt_async_event(mb[0], vha, mb); break; case MBA_RSCN_UPDATE: /* State Change Registration */ From 97345ea931e5fbd2a082c972109ef90e4a0f6f3a Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 25 Nov 2019 19:56:57 +0300 Subject: [PATCH 027/191] scsi: qla2xxx: Fix PLOGI payload and ELS IOCB dump length [ Upstream commit 0334cdea1fba36fad8bdf9516f267ce01de625f7 ] The size of the buffer is hardcoded as 0x70 or 112 bytes, while the size of ELS IOCB is 0x40 and the size of PLOGI payload returned by Get Parameters command is 0x74. Cc: Quinn Tran Link: https://lore.kernel.org/r/20191125165702.1013-9-r.bolshakov@yadro.com Acked-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_iocb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 44dc97cebb06..bdf1994251b9 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2684,7 +2684,8 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb) ql_dbg(ql_dbg_io + ql_dbg_buffer, vha, 0x3073, "PLOGI ELS IOCB:\n"); ql_dump_buffer(ql_log_info, vha, 0x0109, - (uint8_t *)els_iocb, 0x70); + (uint8_t *)els_iocb, + sizeof(*els_iocb)); } else { els_iocb->control_flags = 1 << 13; els_iocb->tx_byte_count = @@ -2850,7 +2851,8 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, ql_dbg(ql_dbg_disc + ql_dbg_buffer, vha, 0x3073, "PLOGI buffer:\n"); ql_dump_buffer(ql_dbg_disc + ql_dbg_buffer, vha, 0x0109, - (uint8_t *)elsio->u.els_plogi.els_plogi_pyld, 0x70); + (uint8_t *)elsio->u.els_plogi.els_plogi_pyld, + sizeof(*elsio->u.els_plogi.els_plogi_pyld)); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { From 610ab9d567f8d6b9c8879029aeb90097f628ee0e Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 25 Nov 2019 19:56:58 +0300 Subject: [PATCH 028/191] scsi: qla2xxx: Configure local loop for N2N target [ Upstream commit fd1de5830a5abaf444cc4312871e02c41e24fdc1 ] qla2x00_configure_local_loop initializes PLOGI payload for PLOGI ELS using Get Parameters mailbox command. In the case when the driver is running in target mode, the topology is N2N and the target port has higher WWPN, LOCAL_LOOP_UPDATE bit is cleared too early and PLOGI payload is not initialized by the Get Parameters command. That causes a failure of ELS IOCB carrying the PLOGI with 0x15 aka Data Underrun error. LOCAL_LOOP_UPDATE has to be set to initialize PLOGI payload. Fixes: 48acad099074 ("scsi: qla2xxx: Fix N2N link re-connect") Link: https://lore.kernel.org/r/20191125165702.1013-10-r.bolshakov@yadro.com Acked-by: Quinn Tran Acked-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_init.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 5d31e3d52b6b..4e424f1ce5de 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4927,14 +4927,8 @@ qla2x00_configure_loop(scsi_qla_host_t *vha) set_bit(RSCN_UPDATE, &flags); clear_bit(LOCAL_LOOP_UPDATE, &flags); - } else if (ha->current_topology == ISP_CFG_N) { - clear_bit(RSCN_UPDATE, &flags); - if (qla_tgt_mode_enabled(vha)) { - /* allow the other side to start the login */ - clear_bit(LOCAL_LOOP_UPDATE, &flags); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - } - } else if (ha->current_topology == ISP_CFG_NL) { + } else if (ha->current_topology == ISP_CFG_NL || + ha->current_topology == ISP_CFG_N) { clear_bit(RSCN_UPDATE, &flags); set_bit(LOCAL_LOOP_UPDATE, &flags); } else if (!vha->flags.online || From 944e01086e0ef71771ce6db05d046621e1768b42 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 25 Nov 2019 19:56:59 +0300 Subject: [PATCH 029/191] scsi: qla2xxx: Send Notify ACK after N2N PLOGI [ Upstream commit 5e6b01d84b9d20bcd77fc7c4733a2a4149bf220a ] qlt_handle_login schedules session for deletion even if a login is in progress. That causes login bouncing, i.e. a few logins are made before it settles down. Complete the first login by sending Notify Acknowledge IOCB via qlt_plogi_ack_unref if the session is pending login completion. Fixes: 9cd883f07a54 ("scsi: qla2xxx: Fix session cleanup for N2N") Cc: Krishna Kant Cc: Alexei Potashnik Link: https://lore.kernel.org/r/20191125165702.1013-11-r.bolshakov@yadro.com Acked-by: Quinn Tran Acked-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 18522ac79d9e..74a378a91b71 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4803,6 +4803,7 @@ static int qlt_handle_login(struct scsi_qla_host *vha, switch (sess->disc_state) { case DSC_DELETED: + case DSC_LOGIN_PEND: qlt_plogi_ack_unref(vha, pla); break; From 7497032b17a8fb5e3e8eff05dc4e601a619bdfe4 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 25 Nov 2019 19:57:00 +0300 Subject: [PATCH 030/191] scsi: qla2xxx: Don't defer relogin unconditonally [ Upstream commit dabc5ec915f3a2c657ecfb529cd3d4ec303a4412 ] qla2x00_configure_local_loop sets RELOGIN_NEEDED bit and calls qla24xx_fcport_handle_login to perform the login. This bit triggers a wake up of DPC later after a successful login. The deferred call is not needed if login succeeds, and it's set in qla24xx_fcport_handle_login in case of errors, hence it should be safe to drop. Link: https://lore.kernel.org/r/20191125165702.1013-12-r.bolshakov@yadro.com Acked-by: Himanshu Madhani Acked-by: Quinn Tran Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 4e424f1ce5de..80f276d67c14 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5045,7 +5045,6 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) memcpy(&ha->plogi_els_payld.data, (void *)ha->init_cb, sizeof(ha->plogi_els_payld.data)); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); } else { ql_dbg(ql_dbg_init, vha, 0x00d1, "PLOGI ELS param read fail.\n"); From 68204b46da46b8fae902285557fd9d7c524d9f18 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 25 Nov 2019 19:57:01 +0300 Subject: [PATCH 031/191] scsi: qla2xxx: Ignore PORT UPDATE after N2N PLOGI [ Upstream commit af22f0c7b052c5c203207f1e5ebd6aa65f87c538 ] PORT UPDATE asynchronous event is generated on the host that issues PLOGI ELS (in the case of higher WWPN). In that case, the event shouldn't be handled as it sets unwanted DPC flags (i.e. LOOP_RESYNC_NEEDED) that trigger link flap. Ignore the event if the host has higher WWPN, but handle otherwise. Cc: Quinn Tran Link: https://lore.kernel.org/r/20191125165702.1013-13-r.bolshakov@yadro.com Acked-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_mbx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 4d90cf101f5f..eac76e934cbe 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3920,6 +3920,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, vha->d_id.b24 = 0; vha->d_id.b.al_pa = 1; ha->flags.n2n_bigger = 1; + ha->flags.n2n_ae = 0; id.b.al_pa = 2; ql_dbg(ql_dbg_async, vha, 0x5075, @@ -3930,6 +3931,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, "Format 1: Remote login - Waiting for WWPN %8phC.\n", rptid_entry->u.f1.port_name); ha->flags.n2n_bigger = 0; + ha->flags.n2n_ae = 1; } qla24xx_post_newsess_work(vha, &id, rptid_entry->u.f1.port_name, @@ -3941,7 +3943,6 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, /* if our portname is higher then initiate N2N login */ set_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags); - ha->flags.n2n_ae = 1; return; break; case TOPO_FL: From 505eead5e56938501cf8a76b34128b0780c0dc8f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Dec 2019 12:45:09 +0300 Subject: [PATCH 032/191] scsi: iscsi: qla4xxx: fix double free in probe [ Upstream commit fee92f25777789d73e1936b91472e9c4644457c8 ] On this error path we call qla4xxx_mem_free() and then the caller also calls qla4xxx_free_adapter() which calls qla4xxx_mem_free(). It leads to a couple double frees: drivers/scsi/qla4xxx/ql4_os.c:8856 qla4xxx_probe_adapter() warn: 'ha->chap_dma_pool' double freed drivers/scsi/qla4xxx/ql4_os.c:8856 qla4xxx_probe_adapter() warn: 'ha->fw_ddb_dma_pool' double freed Fixes: afaf5a2d341d ("[SCSI] Initial Commit of qla4xxx") Link: https://lore.kernel.org/r/20191203094421.hw7ex7qr3j2rbsmx@kili.mountain Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla4xxx/ql4_os.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 8c674eca09f1..2323432a0edb 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -4275,7 +4275,6 @@ static int qla4xxx_mem_alloc(struct scsi_qla_host *ha) return QLA_SUCCESS; mem_alloc_error_exit: - qla4xxx_mem_free(ha); return QLA_ERROR; } From 55c89290c7948e62ceac9eb3ffe6dd1555aa38d6 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 6 Dec 2019 09:11:18 +0800 Subject: [PATCH 033/191] scsi: libsas: stop discovering if oob mode is disconnected [ Upstream commit f70267f379b5e5e11bdc5d72a56bf17e5feed01f ] The discovering of sas port is driven by workqueue in libsas. When libsas is processing port events or phy events in workqueue, new events may rise up and change the state of some structures such as asd_sas_phy. This may cause some problems such as follows: ==>thread 1 ==>thread 2 ==>phy up ==>phy_up_v3_hw() ==>oob_mode = SATA_OOB_MODE; ==>phy down quickly ==>hisi_sas_phy_down() ==>sas_ha->notify_phy_event() ==>sas_phy_disconnected() ==>oob_mode = OOB_NOT_CONNECTED ==>workqueue wakeup ==>sas_form_port() ==>sas_discover_domain() ==>sas_get_port_device() ==>oob_mode is OOB_NOT_CONNECTED and device is wrongly taken as expander This at last lead to the panic when libsas trying to issue a command to discover the device. [183047.614035] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058 [183047.622896] Mem abort info: [183047.625762] ESR = 0x96000004 [183047.628893] Exception class = DABT (current EL), IL = 32 bits [183047.634888] SET = 0, FnV = 0 [183047.638015] EA = 0, S1PTW = 0 [183047.641232] Data abort info: [183047.644189] ISV = 0, ISS = 0x00000004 [183047.648100] CM = 0, WnR = 0 [183047.651145] user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000b7df67be [183047.657834] [0000000000000058] pgd=0000000000000000 [183047.662789] Internal error: Oops: 96000004 [#1] SMP [183047.667740] Process kworker/u16:2 (pid: 31291, stack limit = 0x00000000417c4974) [183047.675208] CPU: 0 PID: 3291 Comm: kworker/u16:2 Tainted: G W OE 4.19.36-vhulk1907.1.0.h410.eulerosv2r8.aarch64 #1 [183047.687015] Hardware name: N/A N/A/Kunpeng Desktop Board D920S10, BIOS 0.15 10/22/2019 [183047.695007] Workqueue: 0000:74:02.0_disco_q sas_discover_domain [183047.700999] pstate: 20c00009 (nzCv daif +PAN +UAO) [183047.705864] pc : prep_ata_v3_hw+0xf8/0x230 [hisi_sas_v3_hw] [183047.711510] lr : prep_ata_v3_hw+0xb0/0x230 [hisi_sas_v3_hw] [183047.717153] sp : ffff00000f28ba60 [183047.720541] x29: ffff00000f28ba60 x28: ffff8026852d7228 [183047.725925] x27: ffff8027dba3e0a8 x26: ffff8027c05fc200 [183047.731310] x25: 0000000000000000 x24: ffff8026bafa8dc0 [183047.736695] x23: ffff8027c05fc218 x22: ffff8026852d7228 [183047.742079] x21: ffff80007c2f2940 x20: ffff8027c05fc200 [183047.747464] x19: 0000000000f80800 x18: 0000000000000010 [183047.752848] x17: 0000000000000000 x16: 0000000000000000 [183047.758232] x15: ffff000089a5a4ff x14: 0000000000000005 [183047.763617] x13: ffff000009a5a50e x12: ffff8026bafa1e20 [183047.769001] x11: ffff0000087453b8 x10: ffff00000f28b870 [183047.774385] x9 : 0000000000000000 x8 : ffff80007e58f9b0 [183047.779770] x7 : 0000000000000000 x6 : 000000000000003f [183047.785154] x5 : 0000000000000040 x4 : ffffffffffffffe0 [183047.790538] x3 : 00000000000000f8 x2 : 0000000002000007 [183047.795922] x1 : 0000000000000008 x0 : 0000000000000000 [183047.801307] Call trace: [183047.803827] prep_ata_v3_hw+0xf8/0x230 [hisi_sas_v3_hw] [183047.809127] hisi_sas_task_prep+0x750/0x888 [hisi_sas_main] [183047.814773] hisi_sas_task_exec.isra.7+0x88/0x1f0 [hisi_sas_main] [183047.820939] hisi_sas_queue_command+0x28/0x38 [hisi_sas_main] [183047.826757] smp_execute_task_sg+0xec/0x218 [183047.831013] smp_execute_task+0x74/0xa0 [183047.834921] sas_discover_expander.part.7+0x9c/0x5f8 [183047.839959] sas_discover_root_expander+0x90/0x160 [183047.844822] sas_discover_domain+0x1b8/0x1e8 [183047.849164] process_one_work+0x1b4/0x3f8 [183047.853246] worker_thread+0x54/0x470 [183047.856981] kthread+0x134/0x138 [183047.860283] ret_from_fork+0x10/0x18 [183047.863931] Code: f9407a80 528000e2 39409281 72a04002 (b9405800) [183047.870097] kernel fault(0x1) notification starting on CPU 0 [183047.875828] kernel fault(0x1) notification finished on CPU 0 [183047.881559] Modules linked in: unibsp(OE) hns3(OE) hclge(OE) hnae3(OE) mem_drv(OE) hisi_sas_v3_hw(OE) hisi_sas_main(OE) [183047.892418] ---[ end trace 4cc26083fc11b783 ]--- [183047.897107] Kernel panic - not syncing: Fatal exception [183047.902403] kernel fault(0x5) notification starting on CPU 0 [183047.908134] kernel fault(0x5) notification finished on CPU 0 [183047.913865] SMP: stopping secondary CPUs [183047.917861] Kernel Offset: disabled [183047.921422] CPU features: 0x2,a2a00a38 [183047.925243] Memory Limit: none [183047.928372] kernel reboot(0x2) notification starting on CPU 0 [183047.934190] kernel reboot(0x2) notification finished on CPU 0 [183047.940008] ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: 2908d778ab3e ("[SCSI] aic94xx: new driver") Link: https://lore.kernel.org/r/20191206011118.46909-1-yanaijie@huawei.com Reported-by: Gao Chuan Reviewed-by: John Garry Signed-off-by: Jason Yan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_discover.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index f47b4b281b14..d7302c2052f9 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -81,12 +81,21 @@ static int sas_get_port_device(struct asd_sas_port *port) else dev->dev_type = SAS_SATA_DEV; dev->tproto = SAS_PROTOCOL_SATA; - } else { + } else if (port->oob_mode == SAS_OOB_MODE) { struct sas_identify_frame *id = (struct sas_identify_frame *) dev->frame_rcvd; dev->dev_type = id->dev_type; dev->iproto = id->initiator_bits; dev->tproto = id->target_bits; + } else { + /* If the oob mode is OOB_NOT_CONNECTED, the port is + * disconnected due to race with PHY down. We cannot + * continue to discover this port + */ + sas_put_device(dev); + pr_warn("Port %016llx is disconnected when discovering\n", + SAS_ADDR(port->attached_sas_addr)); + return -ENODEV; } sas_init_dev(dev); From d45a91713822e18f66348a10edea35f85dcef9ae Mon Sep 17 00:00:00 2001 From: Bo Wu Date: Wed, 20 Nov 2019 13:26:17 +0000 Subject: [PATCH 034/191] scsi: iscsi: Avoid potential deadlock in iscsi_if_rx func [ Upstream commit bba340c79bfe3644829db5c852fdfa9e33837d6d ] In iscsi_if_rx func, after receiving one request through iscsi_if_recv_msg func, iscsi_if_send_reply will be called to try to reply to the request in a do-while loop. If the iscsi_if_send_reply function keeps returning -EAGAIN, a deadlock will occur. For example, a client only send msg without calling recvmsg func, then it will result in the watchdog soft lockup. The details are given as follows: sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ISCSI); retval = bind(sock_fd, (struct sock addr*) & src_addr, sizeof(src_addr); while (1) { state_msg = sendmsg(sock_fd, &msg, 0); //Note: recvmsg(sock_fd, &msg, 0) is not processed here. } close(sock_fd); watchdog: BUG: soft lockup - CPU#7 stuck for 22s! [netlink_test:253305] Sample time: 4000897528 ns(HZ: 250) Sample stat: curr: user: 675503481560, nice: 321724050, sys: 448689506750, idle: 4654054240530, iowait: 40885550700, irq: 14161174020, softirq: 8104324140, st: 0 deta: user: 0, nice: 0, sys: 3998210100, idle: 0, iowait: 0, irq: 1547170, softirq: 242870, st: 0 Sample softirq: TIMER: 992 SCHED: 8 Sample irqstat: irq 2: delta 1003, curr: 3103802, arch_timer CPU: 7 PID: 253305 Comm: netlink_test Kdump: loaded Tainted: G OE Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 40400005 (nZcv daif +PAN -UAO) pc : __alloc_skb+0x104/0x1b0 lr : __alloc_skb+0x9c/0x1b0 sp : ffff000033603a30 x29: ffff000033603a30 x28: 00000000000002dd x27: ffff800b34ced810 x26: ffff800ba7569f00 x25: 00000000ffffffff x24: 0000000000000000 x23: ffff800f7c43f600 x22: 0000000000480020 x21: ffff0000091d9000 x20: ffff800b34eff200 x19: ffff800ba7569f00 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0001000101000100 x13: 0000000101010000 x12: 0101000001010100 x11: 0001010101010001 x10: 00000000000002dd x9 : ffff000033603d58 x8 : ffff800b34eff400 x7 : ffff800ba7569200 x6 : ffff800b34eff400 x5 : 0000000000000000 x4 : 00000000ffffffff x3 : 0000000000000000 x2 : 0000000000000001 x1 : ffff800b34eff2c0 x0 : 0000000000000300 Call trace: __alloc_skb+0x104/0x1b0 iscsi_if_rx+0x144/0x12bc [scsi_transport_iscsi] netlink_unicast+0x1e0/0x258 netlink_sendmsg+0x310/0x378 sock_sendmsg+0x4c/0x70 sock_write_iter+0x90/0xf0 __vfs_write+0x11c/0x190 vfs_write+0xac/0x1c0 ksys_write+0x6c/0xd8 __arm64_sys_write+0x24/0x30 el0_svc_common+0x78/0x130 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc Link: https://lore.kernel.org/r/EDBAAA0BBBA2AC4E9C8B6B81DEEE1D6915E3D4D2@dggeml505-mbx.china.huawei.com Signed-off-by: Bo Wu Reviewed-by: Zhiqiang Liu Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_transport_iscsi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 417b868d8735..ed8d9709b9b9 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -24,6 +24,8 @@ #define ISCSI_TRANSPORT_VERSION "2.0-870" +#define ISCSI_SEND_MAX_ALLOWED 10 + #define CREATE_TRACE_POINTS #include @@ -3682,6 +3684,7 @@ iscsi_if_rx(struct sk_buff *skb) struct nlmsghdr *nlh; struct iscsi_uevent *ev; uint32_t group; + int retries = ISCSI_SEND_MAX_ALLOWED; nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) || @@ -3712,6 +3715,10 @@ iscsi_if_rx(struct sk_buff *skb) break; err = iscsi_if_send_reply(portid, nlh->nlmsg_type, ev, sizeof(*ev)); + if (err == -EAGAIN && --retries < 0) { + printk(KERN_WARNING "Send reply failed, error %d\n", err); + break; + } } while (err < 0 && err != -ECONNREFUSED && err != -ESRCH); skb_pull(skb, rlen); } From 742d03aff89bd63f487219150c077fe446edeed2 Mon Sep 17 00:00:00 2001 From: Kay Friedrich Date: Wed, 27 Nov 2019 12:24:57 +0100 Subject: [PATCH 035/191] staging/wlan-ng: add CRC32 dependency in Kconfig [ Upstream commit 2740bd3351cd5a4351f458aabaa1c9b77de3867b ] wlan-ng uses the function crc32_le, but CRC32 wasn't a dependency of wlan-ng Co-developed-by: Michael Kupfer Signed-off-by: Michael Kupfer Signed-off-by: Kay Friedrich Link: https://lore.kernel.org/r/20191127112457.2301-1-kay.friedrich@fau.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/wlan-ng/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/wlan-ng/Kconfig b/drivers/staging/wlan-ng/Kconfig index ac136663fa8e..082c16a31616 100644 --- a/drivers/staging/wlan-ng/Kconfig +++ b/drivers/staging/wlan-ng/Kconfig @@ -4,6 +4,7 @@ config PRISM2_USB depends on WLAN && USB && CFG80211 select WIRELESS_EXT select WEXT_PRIV + select CRC32 help This is the wlan-ng prism 2.5/3 USB driver for a wide range of old USB wireless devices. From 29da513a3300f54981a6fbef1dfcb7c37978d42f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Oct 2019 10:52:52 +0200 Subject: [PATCH 036/191] drm/nouveau: Move the declaration of struct nouveau_conn_atom up a bit [ Upstream commit 37a68eab4cd92b507c9e8afd760fdc18e4fecac6 ] Place the declaration of struct nouveau_conn_atom above that of struct nouveau_connector. This commit makes no changes to the moved block what so ever, it just moves it up a bit. This is a preparation patch to fix some issues with connector handling on pre nv50 displays (which do not use atomic modesetting). Signed-off-by: Hans de Goede Reviewed-by: Lyude Paul Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_connector.h | 110 ++++++++++---------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h index f43a8d63aef8..de9588420884 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.h +++ b/drivers/gpu/drm/nouveau/nouveau_connector.h @@ -29,6 +29,7 @@ #include +#include #include #include #include @@ -44,6 +45,60 @@ struct dcb_output; struct nouveau_backlight; #endif +#define nouveau_conn_atom(p) \ + container_of((p), struct nouveau_conn_atom, state) + +struct nouveau_conn_atom { + struct drm_connector_state state; + + struct { + /* The enum values specifically defined here match nv50/gf119 + * hw values, and the code relies on this. + */ + enum { + DITHERING_MODE_OFF = 0x00, + DITHERING_MODE_ON = 0x01, + DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON, + DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON, + DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON, + DITHERING_MODE_AUTO + } mode; + enum { + DITHERING_DEPTH_6BPC = 0x00, + DITHERING_DEPTH_8BPC = 0x02, + DITHERING_DEPTH_AUTO + } depth; + } dither; + + struct { + int mode; /* DRM_MODE_SCALE_* */ + struct { + enum { + UNDERSCAN_OFF, + UNDERSCAN_ON, + UNDERSCAN_AUTO, + } mode; + u32 hborder; + u32 vborder; + } underscan; + bool full; + } scaler; + + struct { + int color_vibrance; + int vibrant_hue; + } procamp; + + union { + struct { + bool dither:1; + bool scaler:1; + bool procamp:1; + }; + u8 mask; + } set; +}; + struct nouveau_connector { struct drm_connector base; enum dcb_connector_type type; @@ -121,61 +176,6 @@ extern int nouveau_ignorelid; extern int nouveau_duallink; extern int nouveau_hdmimhz; -#include -#define nouveau_conn_atom(p) \ - container_of((p), struct nouveau_conn_atom, state) - -struct nouveau_conn_atom { - struct drm_connector_state state; - - struct { - /* The enum values specifically defined here match nv50/gf119 - * hw values, and the code relies on this. - */ - enum { - DITHERING_MODE_OFF = 0x00, - DITHERING_MODE_ON = 0x01, - DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON, - DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON, - DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON, - DITHERING_MODE_AUTO - } mode; - enum { - DITHERING_DEPTH_6BPC = 0x00, - DITHERING_DEPTH_8BPC = 0x02, - DITHERING_DEPTH_AUTO - } depth; - } dither; - - struct { - int mode; /* DRM_MODE_SCALE_* */ - struct { - enum { - UNDERSCAN_OFF, - UNDERSCAN_ON, - UNDERSCAN_AUTO, - } mode; - u32 hborder; - u32 vborder; - } underscan; - bool full; - } scaler; - - struct { - int color_vibrance; - int vibrant_hue; - } procamp; - - union { - struct { - bool dither:1; - bool scaler:1; - bool procamp:1; - }; - u8 mask; - } set; -}; - void nouveau_conn_attach_properties(struct drm_connector *); void nouveau_conn_reset(struct drm_connector *); struct drm_connector_state * From bcfa071bfeaa54fec44d819305d877d7b8a14b3c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Oct 2019 10:52:53 +0200 Subject: [PATCH 037/191] drm/nouveau: Fix drm-core using atomic code-paths on pre-nv50 hardware [ Upstream commit 64d17f25dcad518461ccf0c260544e1e379c5b35 ] We do not support atomic modesetting on pre-nv50 hardware, but until now our connector code was setting drm_connector->state on pre-nv50 hardware. This causes the core to enter atomic modesetting paths in at least: 1. drm_connector_get_encoder(), returning connector->state->best_encoder which is always 0, causing us to always report 0 as encoder_id in the drmModeConnector struct returned by drmModeGetConnector(). 2. drm_encoder_get_crtc(), returning NULL because uses_atomic get set, causing us to always report 0 as crtc_id in the drmModeEncoder struct returned by drmModeGetEncoder() Which in turn confuses userspace, at least plymouth thinks that the pipe has changed because of this and tries to reconfigure it unnecessarily. More in general we should not set drm_connector->state in the non-atomic code as this violates the drm-core's expectations. This commit fixes this by using a nouveau_conn_atom struct embedded in the nouveau_connector struct for property handling in the non-atomic case. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1706557 Signed-off-by: Hans de Goede Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_connector.c | 28 +++++++++++++++------ drivers/gpu/drm/nouveau/nouveau_connector.h | 6 +++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index a442a955f98c..eb31c5b6c8e9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -245,14 +245,22 @@ nouveau_conn_atomic_duplicate_state(struct drm_connector *connector) void nouveau_conn_reset(struct drm_connector *connector) { + struct nouveau_connector *nv_connector = nouveau_connector(connector); struct nouveau_conn_atom *asyc; - if (WARN_ON(!(asyc = kzalloc(sizeof(*asyc), GFP_KERNEL)))) - return; + if (drm_drv_uses_atomic_modeset(connector->dev)) { + if (WARN_ON(!(asyc = kzalloc(sizeof(*asyc), GFP_KERNEL)))) + return; + + if (connector->state) + nouveau_conn_atomic_destroy_state(connector, + connector->state); + + __drm_atomic_helper_connector_reset(connector, &asyc->state); + } else { + asyc = &nv_connector->properties_state; + } - if (connector->state) - nouveau_conn_atomic_destroy_state(connector, connector->state); - __drm_atomic_helper_connector_reset(connector, &asyc->state); asyc->dither.mode = DITHERING_MODE_AUTO; asyc->dither.depth = DITHERING_DEPTH_AUTO; asyc->scaler.mode = DRM_MODE_SCALE_NONE; @@ -276,8 +284,14 @@ void nouveau_conn_attach_properties(struct drm_connector *connector) { struct drm_device *dev = connector->dev; - struct nouveau_conn_atom *armc = nouveau_conn_atom(connector->state); struct nouveau_display *disp = nouveau_display(dev); + struct nouveau_connector *nv_connector = nouveau_connector(connector); + struct nouveau_conn_atom *armc; + + if (drm_drv_uses_atomic_modeset(connector->dev)) + armc = nouveau_conn_atom(connector->state); + else + armc = &nv_connector->properties_state; /* Init DVI-I specific properties. */ if (connector->connector_type == DRM_MODE_CONNECTOR_DVII) @@ -749,9 +763,9 @@ static int nouveau_connector_set_property(struct drm_connector *connector, struct drm_property *property, uint64_t value) { - struct nouveau_conn_atom *asyc = nouveau_conn_atom(connector->state); struct nouveau_connector *nv_connector = nouveau_connector(connector); struct nouveau_encoder *nv_encoder = nv_connector->detected_encoder; + struct nouveau_conn_atom *asyc = &nv_connector->properties_state; struct drm_encoder *encoder = to_drm_encoder(nv_encoder); int ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h index de9588420884..de84fb4708c7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.h +++ b/drivers/gpu/drm/nouveau/nouveau_connector.h @@ -118,6 +118,12 @@ struct nouveau_connector { #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT struct nouveau_backlight *backlight; #endif + /* + * Our connector property code expects a nouveau_conn_atom struct + * even on pre-nv50 where we do not support atomic. This embedded + * version gets used in the non atomic modeset case. + */ + struct nouveau_conn_atom properties_state; }; static inline struct nouveau_connector *nouveau_connector( From 0f7cb06912f63c9f9585d694e77585008e1731db Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 10 Dec 2019 12:15:44 +1000 Subject: [PATCH 038/191] drm/nouveau/kms/nv50-: fix panel scaling [ Upstream commit 3d1890ef8023e61934e070021b06cc9f417260c0 ] Under certain circumstances, encoder atomic_check() can be entered without adjusted_mode having been reset to the same as mode, which confuses the scaling logic and can lead to a misprogrammed display. Fix this by checking against the user-provided mode directly. Link: https://bugs.freedesktop.org/show_bug.cgi?id=108615 Link: https://gitlab.freedesktop.org/xorg/driver/xf86-video-nouveau/issues/464 Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index b5b1a34f896f..d735ea7e2d88 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -326,9 +326,9 @@ nv50_outp_atomic_check_view(struct drm_encoder *encoder, * same size as the native one (e.g. different * refresh rate) */ - if (adjusted_mode->hdisplay == native_mode->hdisplay && - adjusted_mode->vdisplay == native_mode->vdisplay && - adjusted_mode->type & DRM_MODE_TYPE_DRIVER) + if (mode->hdisplay == native_mode->hdisplay && + mode->vdisplay == native_mode->vdisplay && + mode->type & DRM_MODE_TYPE_DRIVER) break; mode = native_mode; asyc->scaler.full = true; From ae6e5f8d5108326390a444f4e04ee5c5053e5543 Mon Sep 17 00:00:00 2001 From: EJ Hsu Date: Tue, 3 Dec 2019 23:34:56 -0800 Subject: [PATCH 039/191] usb: gadget: fix wrong endpoint desc [ Upstream commit e5b5da96da50ef30abb39cb9f694e99366404d24 ] Gadget driver should always use config_ep_by_speed() to initialize usb_ep struct according to usb device's operating speed. Otherwise, usb_ep struct may be wrong if usb devcie's operating speed is changed. The key point in this patch is that we want to make sure the desc pointer in usb_ep struct will be set to NULL when gadget is disconnected. This will force it to call config_ep_by_speed() to correctly initialize usb_ep struct based on the new operating speed when gadget is re-connected later. Reviewed-by: Peter Chen Signed-off-by: EJ Hsu Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_ecm.c | 6 +++++- drivers/usb/gadget/function/f_rndis.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index 6ce044008cf6..460d5d7c984f 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -621,8 +621,12 @@ static void ecm_disable(struct usb_function *f) DBG(cdev, "ecm deactivated\n"); - if (ecm->port.in_ep->enabled) + if (ecm->port.in_ep->enabled) { gether_disconnect(&ecm->port); + } else { + ecm->port.in_ep->desc = NULL; + ecm->port.out_ep->desc = NULL; + } usb_ep_disable(ecm->notify); ecm->notify->desc = NULL; diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index d48df36622b7..0d8e4a364ca6 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c @@ -618,6 +618,7 @@ static void rndis_disable(struct usb_function *f) gether_disconnect(&rndis->port); usb_ep_disable(rndis->notify); + rndis->notify->desc = NULL; } /*-------------------------------------------------------------------------*/ From b1954fda6b2799ca631f956917499933b876baae Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Dec 2019 20:58:56 -0700 Subject: [PATCH 040/191] net: make socket read/write_iter() honor IOCB_NOWAIT [ Upstream commit ebfcd8955c0b52eb793bcbc9e71140e3d0cdb228 ] The socket read/write helpers only look at the file O_NONBLOCK. not the iocb IOCB_NOWAIT flag. This breaks users like preadv2/pwritev2 and io_uring that rely on not having the file itself marked nonblocking, but rather the iocb itself. Cc: netdev@vger.kernel.org Acked-by: David Miller Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- net/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/socket.c b/net/socket.c index d7a106028f0e..ca8de9e1582d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -955,7 +955,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) .msg_iocb = iocb}; ssize_t res; - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) msg.msg_flags = MSG_DONTWAIT; if (iocb->ki_pos != 0) @@ -980,7 +980,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_pos != 0) return -ESPIPE; - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) msg.msg_flags = MSG_DONTWAIT; if (sock->type == SOCK_SEQPACKET) From e4086478da1e75ce839db1f75fec9650d86b4e52 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 9 Dec 2019 15:04:45 +0000 Subject: [PATCH 041/191] afs: Fix mountpoint parsing [ Upstream commit 158d58335393af3956a9c06f0816ee75ed1f1447 ] Each AFS mountpoint has strings that define the target to be mounted. This is required to end in a dot that is supposed to be stripped off. The string can include suffixes of ".readonly" or ".backup" - which are supposed to come before the terminal dot. To add to the confusion, the "fs lsmount" afs utility does not show the terminal dot when displaying the string. The kernel mount source string parser, however, assumes that the terminal dot marks the suffix and that the suffix is always "" and is thus ignored. In most cases, there is no suffix and this is not a problem - but if there is a suffix, it is lost and this affects the ability to mount the correct volume. The command line mount command, on the other hand, is expected not to include a terminal dot - so the problem doesn't arise there. Fix this by making sure that the dot exists and then stripping it when passing the string to the mount configuration. Fixes: bec5eb614130 ("AFS: Implement an autocell mount capability [ver #2]") Reported-by: Jonathan Billings Signed-off-by: David Howells Reviewed-by: Marc Dionne Tested-by: Jonathan Billings Signed-off-by: Sasha Levin --- fs/afs/mntpt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index f532d6d3bd28..79bc5f1338ed 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -126,7 +126,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) if (src_as->cell) ctx->cell = afs_get_cell(src_as->cell); - if (size > PAGE_SIZE - 1) + if (size < 2 || size > PAGE_SIZE - 1) return -EINVAL; page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL); @@ -140,7 +140,9 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) } buf = kmap(page); - ret = vfs_parse_fs_string(fc, "source", buf, size); + ret = -EINVAL; + if (buf[size - 1] == '.') + ret = vfs_parse_fs_string(fc, "source", buf, size - 1); kunmap(page); put_page(page); if (ret < 0) From 57a21cdbec1a61c954d9682564cfe25955705eb6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Dec 2019 08:56:04 +0000 Subject: [PATCH 042/191] afs: Fix creation calls in the dynamic root to fail with EOPNOTSUPP [ Upstream commit 1da4bd9f9d187f53618890d7b66b9628bbec3c70 ] Fix the lookup method on the dynamic root directory such that creation calls, such as mkdir, open(O_CREAT), symlink, etc. fail with EOPNOTSUPP rather than failing with some odd error (such as EEXIST). lookup() itself tries to create automount directories when it is invoked. These are cached locally in RAM and not committed to storage. Signed-off-by: David Howells Reviewed-by: Marc Dionne Tested-by: Jonathan Billings Signed-off-by: Sasha Levin --- fs/afs/dynroot.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 4150280509ff..7503899c0a1b 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -136,6 +136,9 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr ASSERTCMP(d_inode(dentry), ==, NULL); + if (flags & LOOKUP_CREATE) + return ERR_PTR(-EOPNOTSUPP); + if (dentry->d_name.len >= AFSNAMEMAX) { _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); From aae93615aa8ccdfd2752756e6d9a21c519ba6fe8 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 27 Nov 2019 17:57:50 +0100 Subject: [PATCH 043/191] raid5: need to set STRIPE_HANDLE for batch head [ Upstream commit a7ede3d16808b8f3915c8572d783530a82b2f027 ] With commit 6ce220dd2f8ea71d6afc29b9a7524c12e39f374a ("raid5: don't set STRIPE_HANDLE to stripe which is in batch list"), we don't want to set STRIPE_HANDLE flag for sh which is already in batch list. However, the stripe which is the head of batch list should set this flag, otherwise panic could happen inside init_stripe at BUG_ON(sh->batch_head), it is reproducible with raid5 on top of nvdimm devices per Xiao oberserved. Thanks for Xiao's effort to verify the change. Fixes: 6ce220dd2f8ea ("raid5: don't set STRIPE_HANDLE to stripe which is in batch list") Reported-by: Xiao Ni Tested-by: Xiao Ni Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 12a8ce83786e..36cd7c2fbf40 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5726,7 +5726,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) do_flush = false; } - if (!sh->batch_head) + if (!sh->batch_head || sh == sh->batch_head) set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); if ((!sh->batch_head || sh == sh->batch_head) && From 25432fa3ac169ea8200f7e9b55aa2407d4458f15 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Tue, 10 Dec 2019 10:42:25 +0800 Subject: [PATCH 044/191] md: raid1: check rdev before reference in raid1_sync_request func [ Upstream commit 028288df635f5a9addd48ac4677b720192747944 ] In raid1_sync_request func, rdev should be checked before reference. Signed-off-by: Zhiqiang Liu Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index bb29aeefcbd0..c7137f50bd1d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2781,7 +2781,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, write_targets++; } } - if (bio->bi_end_io) { + if (rdev && bio->bi_end_io) { atomic_inc(&rdev->nr_pending); bio->bi_iter.bi_sector = sector_nr + rdev->data_offset; bio_set_dev(bio, rdev->bdev); From 9c320bb692623aac4decbf6f2b1ce4cd340b9bb2 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 28 Nov 2019 10:26:41 +0100 Subject: [PATCH 045/191] s390/cpum_sf: Adjust sampling interval to avoid hitting sample limits [ Upstream commit 39d4a501a9ef55c57b51e3ef07fc2aeed7f30b3b ] Function perf_event_ever_overflow() and perf_event_account_interrupt() are called every time samples are processed by the interrupt handler. However function perf_event_account_interrupt() has checks to avoid being flooded with interrupts (more then 1000 samples are received per task_tick). Samples are then dropped and a PERF_RECORD_THROTTLED is added to the perf data. The perf subsystem limit calculation is: maximum sample frequency := 100000 --> 1 samples per 10 us task_tick = 10ms = 10000us --> 1000 samples per task_tick The work flow is measurement_alert() uses SDBT head and each SBDT points to 511 SDB pages, each with 126 sample entries. After processing 8 SBDs and for each valid sample calling: perf_event_overflow() perf_event_account_interrupts() there is a considerable amount of samples being dropped, especially when the sample frequency is very high and near the 100000 limit. To avoid the high amount of samples being dropped near the end of a task_tick time frame, increment the sampling interval in case of dropped events. The CPU Measurement sampling facility on the s390 supports only intervals, specifiing how many CPU cycles have to be executed before a sample is generated. Increase the interval when the samples being generated hit the task_tick limit. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_sf.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7511b71d2931..47515c96032e 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1325,6 +1325,22 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) if (sampl_overflow) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) { + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n", + __func__, + DIV_ROUND_UP(SAMPL_RATE(hwc), 10)); + } + if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " "overflow stats: sample=%llu event=%llu\n", From 217c8169c6aab357dc11041a0706ea46b6b3f2fb Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 29 Nov 2019 15:24:25 +0100 Subject: [PATCH 046/191] s390/cpum_sf: Avoid SBD overflow condition in irq handler [ Upstream commit 0539ad0b22877225095d8adef0c376f52cc23834 ] The s390 CPU Measurement sampling facility has an overflow condition which fires when all entries in a SBD are used. The measurement alert interrupt is triggered and reads out all samples in this SDB. It then tests the successor SDB, if this SBD is not full, the interrupt handler does not read any samples at all from this SDB The design waits for the hardware to fill this SBD and then trigger another meassurement alert interrupt. This scheme works nicely until an perf_event_overflow() function call discards the sample due to a too high sampling rate. The interrupt handler has logic to read out a partially filled SDB when the perf event overflow condition in linux common code is met. This causes the CPUM sampling measurement hardware and the PMU device driver to operate on the same SBD's trailer entry. This should not happen. This can be seen here using this trace: cpumsf_pmu_add: tear:0xb5286000 hw_perf_event_update: sdbt 0xb5286000 full 1 over 0 flush_all:0 hw_perf_event_update: sdbt 0xb5286008 full 0 over 0 flush_all:0 above shows 1. interrupt hw_perf_event_update: sdbt 0xb5286008 full 1 over 0 flush_all:0 hw_perf_event_update: sdbt 0xb5286008 full 0 over 0 flush_all:0 above shows 2. interrupt ... this goes on fine until... hw_perf_event_update: sdbt 0xb5286068 full 1 over 0 flush_all:0 perf_push_sample1: overflow one or more samples read from the IRQ handler are rejected by perf_event_overflow() and the IRQ handler advances to the next SDB and modifies the trailer entry of a partially filled SDB. hw_perf_event_update: sdbt 0xb5286070 full 0 over 0 flush_all:1 timestamp: 14:32:52.519953 Next time the IRQ handler is called for this SDB the trailer entry shows an overflow count of 19 missed entries. hw_perf_event_update: sdbt 0xb5286070 full 1 over 19 flush_all:1 timestamp: 14:32:52.970058 Remove access to a follow on SDB when event overflow happened. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_sf.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 47515c96032e..fdb8083e7870 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1313,12 +1313,6 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) */ if (flush_all && done) break; - - /* If an event overflow happened, discard samples by - * processing any remaining sample-data-blocks. - */ - if (event_overflow) - flush_all = 1; } /* Account sample overflows in the event hardware structure */ From e0f34320f4c1741596eb080592a49c609183c0e4 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 12 Dec 2019 11:12:12 +0200 Subject: [PATCH 047/191] RDMA/counter: Prevent auto-binding a QP which are not tracked with res [ Upstream commit 33df2f1929df4a1cb13303e344fbf8a75f0dc41f ] Some QPs (e.g. XRC QP) are not tracked in kernel, in this case they have an invalid res and should not be bound to any dynamically-allocated counter in auto mode. This fixes below call trace: BUG: kernel NULL pointer dereference, address: 0000000000000390 PGD 80000001a7233067 P4D 80000001a7233067 PUD 1a7215067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 24822 Comm: ibv_xsrq_pingpo Not tainted 5.4.0-rc5+ #21 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 RIP: 0010:rdma_counter_bind_qp_auto+0x142/0x270 [ib_core] Code: e1 48 85 c0 48 89 c2 0f 84 bc 00 00 00 49 8b 06 48 39 42 48 75 d6 40 3a aa 90 00 00 00 75 cd 49 8b 86 00 01 00 00 48 8b 4a 28 <8b> 80 90 03 00 00 39 81 90 03 00 00 75 b4 85 c0 74 b0 48 8b 04 24 RSP: 0018:ffffc900003f39c0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: ffff88820020ec00 RSI: 0000000000000004 RDI: ffffffffffffffc0 RBP: 0000000000000001 R08: ffff888224149ff0 R09: ffffc900003f3968 R10: ffffffffffffffff R11: ffff8882249c5848 R12: ffffffffffffffff R13: ffff88821d5aca50 R14: ffff8881f7690800 R15: ffff8881ff890000 FS: 00007fe53a3e1740(0000) GS:ffff888237b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000390 CR3: 00000001a7292006 CR4: 00000000003606a0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: _ib_modify_qp+0x3a4/0x3f0 [ib_core] ? lookup_get_idr_uobject.part.8+0x23/0x40 [ib_uverbs] modify_qp+0x322/0x3e0 [ib_uverbs] ib_uverbs_modify_qp+0x43/0x70 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xb1/0xf0 [ib_uverbs] ib_uverbs_run_method+0x6be/0x760 [ib_uverbs] ? uverbs_disassociate_api+0xd0/0xd0 [ib_uverbs] ib_uverbs_cmd_verbs+0x18d/0x3a0 [ib_uverbs] ? get_acl+0x1a/0x120 ? __alloc_pages_nodemask+0x15d/0x2c0 ib_uverbs_ioctl+0xa7/0x110 [ib_uverbs] do_vfs_ioctl+0xa5/0x610 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x48/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 99fa331dc862 ("RDMA/counter: Add "auto" configuration mode support") Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Reviewed-by: Ido Kalir Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191212091214.315005-2-leon@kernel.org Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/core/counters.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 680ad27f497d..023478107f0e 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -282,6 +282,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; + if (!qp->res.valid) + return 0; + if (!rdma_is_port_valid(dev, port)) return -EINVAL; From c251d5f5b125c651413dc3bf230a8f25d161adb6 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Dec 2019 11:12:13 +0200 Subject: [PATCH 048/191] IB/mlx4: Follow mirror sequence of device add during device removal [ Upstream commit 89f988d93c62384758b19323c886db917a80c371 ] Current code device add sequence is: ib_register_device() ib_mad_init() init_sriov_init() register_netdev_notifier() Therefore, the remove sequence should be, unregister_netdev_notifier() close_sriov() mad_cleanup() ib_unregister_device() However it is not above. Hence, make do above remove sequence. Fixes: fa417f7b520ee ("IB/mlx4: Add support for IBoE") Signed-off-by: Parav Pandit Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191212091214.315005-3-leon@kernel.org Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx4/main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8d2f1e38b891..907d99822bf0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -3008,16 +3008,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ibdev->ib_active = false; flush_workqueue(wq); - mlx4_ib_close_sriov(ibdev); - mlx4_ib_mad_cleanup(ibdev); - ib_unregister_device(&ibdev->ib_dev); - mlx4_ib_diag_cleanup(ibdev); if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } + mlx4_ib_close_sriov(ibdev); + mlx4_ib_mad_cleanup(ibdev); + ib_unregister_device(&ibdev->ib_dev); + mlx4_ib_diag_cleanup(ibdev); + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); kfree(ibdev->ib_uc_qpns_bitmap); From e56db866ceac1fbbbbda57d305764b9d957819e1 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 12 Dec 2019 11:12:14 +0200 Subject: [PATCH 049/191] IB/mlx5: Fix steering rule of drop and count [ Upstream commit ed9085fed9d95d5921582e3c8474f3736c5d2782 ] There are two flow rule destinations: QP and packet. While users are setting DROP packet rule, the QP should not be set as a destination. Fixes: 3b3233fbf02e ("IB/mlx5: Add flow counters binding support") Signed-off-by: Maor Gottlieb Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191212091214.315005-4-leon@kernel.org Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 831539419c30..e1cfbedefcbc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3548,10 +3548,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } INIT_LIST_HEAD(&handler->list); - if (dst) { - memcpy(&dest_arr[0], dst, sizeof(*dst)); - dest_num++; - } for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec, @@ -3564,6 +3560,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ib_flow += ((union ib_flow_spec *)ib_flow)->size; } + if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } + if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); @@ -3604,10 +3605,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) { + if (!dest_num) rule_dst = NULL; - dest_num = 0; - } } else { if (is_egress) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; From ec177a46e9d0d35f81b254a616ff03fe757f14f6 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 10 Dec 2019 14:53:05 +0000 Subject: [PATCH 050/191] xen-blkback: prevent premature module unload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fa2ac657f9783f0891b2935490afe9a7fd29d3fa ] Objects allocated by xen_blkif_alloc come from the 'blkif_cache' kmem cache. This cache is destoyed when xen-blkif is unloaded so it is necessary to wait for the deferred free routine used for such objects to complete. This necessity was missed in commit 14855954f636 "xen-blkback: allow module to be cleanly unloaded". This patch fixes the problem by taking/releasing extra module references in xen_blkif_alloc/free() respectively. Signed-off-by: Paul Durrant Reviewed-by: Roger Pau Monné Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/block/xen-blkback/xenbus.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index b90dbcd99c03..c4cd68116e7f 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -171,6 +171,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->domid = domid; atomic_set(&blkif->refcnt, 1); init_completion(&blkif->drain_complete); + + /* + * Because freeing back to the cache may be deferred, it is not + * safe to unload the module (and hence destroy the cache) until + * this has completed. To prevent premature unloading, take an + * extra module reference here and release only when the object + * has been freed back to the cache. + */ + __module_get(THIS_MODULE); INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); return blkif; @@ -320,6 +329,7 @@ static void xen_blkif_free(struct xen_blkif *blkif) /* Make sure everything is drained before shutting down */ kmem_cache_free(xen_blkif_cachep, blkif); + module_put(THIS_MODULE); } int __init xen_blkif_interface_init(void) From 33fa919df6643a97e696bfce022f5a3b0a8205ff Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 12 Dec 2019 15:17:50 +0100 Subject: [PATCH 051/191] xen/balloon: fix ballooned page accounting without hotplug enabled [ Upstream commit c673ec61ade89bf2f417960f986bc25671762efb ] When CONFIG_XEN_BALLOON_MEMORY_HOTPLUG is not defined reserve_additional_memory() will set balloon_stats.target_pages to a wrong value in case there are still some ballooned pages allocated via alloc_xenballooned_pages(). This will result in balloon_process() no longer be triggered when ballooned pages are freed in batches. Reported-by: Nicholas Tsirakis Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/balloon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 5bae515c8e25..bed90d612e48 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -395,7 +395,8 @@ static struct notifier_block xen_memory_nb = { #else static enum bp_state reserve_additional_memory(void) { - balloon_stats.target_pages = balloon_stats.current_pages; + balloon_stats.target_pages = balloon_stats.current_pages + + balloon_stats.target_unpopulated; return BP_ECANCELED; } #endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ From 0360ce1eafbd00fb5169d8c19438554cde79cbb9 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 25 Sep 2019 15:39:12 +0100 Subject: [PATCH 052/191] PM / hibernate: memory_bm_find_bit(): Tighten node optimisation [ Upstream commit da6043fe85eb5ec621e34a92540735dcebbea134 ] When looking for a bit by number we make use of the cached result from the preceding lookup to speed up operation. Firstly we check if the requested pfn is within the cached zone and if not lookup the new zone. We then check if the offset for that pfn falls within the existing cached node. This happens regardless of whether the node is within the zone we are now scanning. With certain memory layouts it is possible for this to false trigger creating a temporary alias for the pfn to a different bit. This leads the hibernation code to free memory which it was never allocated with the expected fallout. Ensure the zone we are scanning matches the cached zone before considering the cached node. Deep thanks go to Andrea for many, many, many hours of hacking and testing that went into cornering this bug. Reported-by: Andrea Righi Tested-by: Andrea Righi Signed-off-by: Andy Whitcroft Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- kernel/power/snapshot.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 83105874f255..26b9168321e7 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -734,8 +734,15 @@ zone_found: * We have found the zone. Now walk the radix tree to find the leaf node * for our PFN. */ + + /* + * If the zone we wish to scan is the the current zone and the + * pfn falls into the current node then we do not need to walk + * the tree. + */ node = bm->cur.node; - if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) + if (zone == bm->cur.zone && + ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) goto node_found; node = zone->rtree; From 65e8768eb2642c697029b29673399b5418b2939e Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 19 Dec 2019 14:12:15 +0800 Subject: [PATCH 053/191] ALSA: hda/realtek - Add Bass Speaker and fixed dac for bass speaker [ Upstream commit e79c22695abd3b75a6aecf4ea4b9607e8d82c49c ] Dell has new platform which has dual speaker connecting. They want dual speaker which use same dac for output. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/229c7efa2b474a16b7d8a916cd096b68@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e1229dbad6b2..dfcd0e611068 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5896,6 +5896,8 @@ enum { ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC, ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, ALC294_FIXUP_ASUS_INTSPK_GPIO, + ALC289_FIXUP_DELL_SPK2, + ALC289_FIXUP_DUAL_SPK, }; static const struct hda_fixup alc269_fixups[] = { @@ -6993,6 +6995,21 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC }, + [ALC289_FIXUP_DELL_SPK2] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x17, 0x90170130 }, /* bass spk */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE + }, + [ALC289_FIXUP_DUAL_SPK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + .chained = true, + .chain_id = ALC289_FIXUP_DELL_SPK2 + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7065,6 +7082,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), + SND_PCI_QUIRK(0x1028, 0x097e, "Dell Precision", ALC289_FIXUP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x097d, "Dell Precision", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From f1f7ec8e5b5408b8723f2f29847dcc3488a4246c Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Mon, 30 Dec 2019 11:11:18 +0800 Subject: [PATCH 054/191] ALSA: hda/realtek - Enable the bass speaker of ASUS UX431FLC [ Upstream commit 48e01504cf5315cbe6de9b7412e792bfcc3dd9e1 ] ASUS reported that there's an bass speaker in addition to internal speaker and it uses DAC 0x02. It was not enabled in the commit 436e25505f34 ("ALSA: hda/realtek - Enable internal speaker of ASUS UX431FLC") which only enables the amplifier and the front speaker. This commit enables the bass speaker on top of the aforementioned work to improve the acoustic experience. Fixes: 436e25505f34 ("ALSA: hda/realtek - Enable internal speaker of ASUS UX431FLC") Signed-off-by: Chris Chiu Signed-off-by: Jian-Hong Pan Cc: Link: https://lore.kernel.org/r/20191230031118.95076-1-chiu@endlessm.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 38 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index dfcd0e611068..e849cf681e23 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5893,11 +5893,12 @@ enum { ALC256_FIXUP_ASUS_HEADSET_MIC, ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, ALC299_FIXUP_PREDATOR_SPK, - ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC, ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, - ALC294_FIXUP_ASUS_INTSPK_GPIO, ALC289_FIXUP_DELL_SPK2, ALC289_FIXUP_DUAL_SPK, + ALC294_FIXUP_SPK2_TO_DAC1, + ALC294_FIXUP_ASUS_DUAL_SPK, + }; static const struct hda_fixup alc269_fixups[] = { @@ -6968,16 +6969,6 @@ static const struct hda_fixup alc269_fixups[] = { { } } }, - [ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC] = { - .type = HDA_FIXUP_PINS, - .v.pins = (const struct hda_pintbl[]) { - { 0x14, 0x411111f0 }, /* disable confusing internal speaker */ - { 0x19, 0x04a11150 }, /* use as headset mic, without its own jack detect */ - { } - }, - .chained = true, - .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC - }, [ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -6988,13 +6979,6 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE }, - [ALC294_FIXUP_ASUS_INTSPK_GPIO] = { - .type = HDA_FIXUP_FUNC, - /* The GPIO must be pulled to initialize the AMP */ - .v.func = alc_fixup_gpio4, - .chained = true, - .chain_id = ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC - }, [ALC289_FIXUP_DELL_SPK2] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -7010,6 +6994,20 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC289_FIXUP_DELL_SPK2 }, + [ALC294_FIXUP_SPK2_TO_DAC1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC + }, + [ALC294_FIXUP_ASUS_DUAL_SPK] = { + .type = HDA_FIXUP_FUNC, + /* The GPIO must be pulled to initialize the AMP */ + .v.func = alc_fixup_gpio4, + .chained = true, + .chain_id = ALC294_FIXUP_SPK2_TO_DAC1 + }, + }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7171,7 +7169,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), - SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_INTSPK_GPIO), + SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), From eef2e98832a1cbea9a66b46b5a42d38fc96bccf2 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 18 Oct 2019 15:38:47 +0800 Subject: [PATCH 055/191] PCI: Add a helper to check Power Resource Requirements _PR3 existence [ Upstream commit 52525b7a3cf82adec5c6cf0ecbd23ff228badc94 ] A driver may want to know the existence of _PR3, to choose different runtime suspend behavior. A user will be add in next patch. This is mostly the same as nouveau_pr3_present(). Signed-off-by: Kai-Heng Feng Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20191018073848.14590-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- drivers/pci/pci.c | 18 ++++++++++++++++++ include/linux/pci.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a97e2571a527..fcfaadc774ee 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5854,6 +5854,24 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode, return 0; } +#ifdef CONFIG_ACPI +bool pci_pr3_present(struct pci_dev *pdev) +{ + struct acpi_device *adev; + + if (acpi_disabled) + return false; + + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) + return false; + + return adev->power.flags.power_resources && + acpi_has_method(adev->handle, "_PR3"); +} +EXPORT_SYMBOL_GPL(pci_pr3_present); +#endif + /** * pci_add_dma_alias - Add a DMA devfn alias for a device * @dev: the PCI device for which alias is added diff --git a/include/linux/pci.h b/include/linux/pci.h index f9088c89a534..1d15c5d49cdd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2310,9 +2310,11 @@ struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus); void pci_msi_register_fwnode_provider(struct fwnode_handle *(*fn)(struct device *)); +bool pci_pr3_present(struct pci_dev *pdev); #else static inline struct irq_domain * pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; } +static bool pci_pr3_present(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_EEH From 891f29feeb36415475588d834a26a9cb0c44a062 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 18 Oct 2019 15:38:48 +0800 Subject: [PATCH 056/191] ALSA: hda: Allow HDA to be runtime suspended when dGPU is not bound to a driver [ Upstream commit bacd861452d2be86a4df341b12e32db7dac8021e ] Nvidia proprietary driver doesn't support runtime power management, so when a user only wants to use the integrated GPU, it's a common practice to let dGPU not to bind any driver, and let its upstream port to be runtime suspended. At the end of runtime suspension the port uses platform power management to disable power through _OFF method of power resource, which is listed by _PR3. After commit b516ea586d71 ("PCI: Enable NVIDIA HDA controllers"), when the dGPU comes with an HDA function, the HDA won't be suspended if the dGPU is unbound, so the power resource can't be turned off by its upstream port driver. Commit 37a3a98ef601 ("ALSA: hda - Enable runtime PM only for discrete GPU") only allows HDA to be runtime suspended once GPU is bound, to keep APU's HDA working. However, HDA on dGPU isn't that useful if dGPU is not bound to any driver. So let's relax the runtime suspend requirement for dGPU's HDA function, to disable the power source to save lots of power. BugLink: https://bugs.launchpad.net/bugs/1840835 Fixes: b516ea586d71 ("PCI: Enable NVIDIA HDA controllers") Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20191018073848.14590-2-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_intel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 86a416cdeb29..4e757aa9d322 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1280,11 +1280,17 @@ static void init_vga_switcheroo(struct azx *chip) { struct hda_intel *hda = container_of(chip, struct hda_intel, chip); struct pci_dev *p = get_bound_vga(chip->pci); + struct pci_dev *parent; if (p) { dev_info(chip->card->dev, "Handle vga_switcheroo audio client\n"); hda->use_vga_switcheroo = 1; - chip->bus.keep_power = 1; /* cleared in either gpu_bound op or codec probe */ + + /* cleared in either gpu_bound op or codec probe, or when its + * upstream port has _PR3 (i.e. dGPU). + */ + parent = pci_upstream_bridge(p); + chip->bus.keep_power = parent ? !pci_pr3_present(parent) : 1; chip->driver_caps |= AZX_DCAPS_PM_RUNTIME; pci_dev_put(p); } From 9538659160cceadfeb5910e0d570c1f1df18e4b7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 21 Oct 2019 16:25:20 +0200 Subject: [PATCH 057/191] PCI: Fix missing inline for pci_pr3_present() [ Upstream commit 46b4bff6572b0552b1ee062043621e4b252638d8 ] The inline prefix was missing in the dummy function pci_pr3_present() definition. Fix it. Reported-by: kbuild test robot Fixes: 52525b7a3cf8 ("PCI: Add a helper to check Power Resource Requirements _PR3 existence") Link: https://lore.kernel.org/r/201910212111.qHm6OcWx%lkp@intel.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pci.h b/include/linux/pci.h index 1d15c5d49cdd..be529d311122 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2314,7 +2314,7 @@ bool pci_pr3_present(struct pci_dev *pdev); #else static inline struct irq_domain * pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; } -static bool pci_pr3_present(struct pci_dev *pdev) { return false; } +static inline bool pci_pr3_present(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_EEH From d53678610b863f7aa599f9c080b59674cb3aab07 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 29 Nov 2019 15:40:27 +0100 Subject: [PATCH 058/191] ALSA: hda - fixup for the bass speaker on Lenovo Carbon X1 7th gen [ Upstream commit d2cd795c4ece1a24fda170c35eeb4f17d9826cbb ] The auto-parser assigns the bass speaker to DAC3 (NID 0x06) which is without the volume control. I do not see a reason to use DAC2, because the shared output to all speakers produces the sufficient and well balanced sound. The stereo support is enough for this purpose (laptop). Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20191129144027.14765-1-perex@perex.cz Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e849cf681e23..62a471b5fc87 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5547,6 +5547,16 @@ static void alc295_fixup_disable_dac3(struct hda_codec *codec, } } +/* force NID 0x17 (Bass Speaker) to DAC1 to share it with the main speaker */ +static void alc285_fixup_speaker2_to_dac1(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + hda_nid_t conn[1] = { 0x02 }; + snd_hda_override_conn_list(codec, 0x17, 1, conn); + } +} + /* Hook to update amp GPIO4 for automute */ static void alc280_hp_gpio4_automute_hook(struct hda_codec *codec, struct hda_jack_callback *jack) @@ -5849,6 +5859,7 @@ enum { ALC225_FIXUP_DISABLE_MIC_VREF, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC295_FIXUP_DISABLE_DAC3, + ALC285_FIXUP_SPEAKER2_TO_DAC1, ALC280_FIXUP_HP_HEADSET_MIC, ALC221_FIXUP_HP_FRONT_MIC, ALC292_FIXUP_TPT460, @@ -6652,6 +6663,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc295_fixup_disable_dac3, }, + [ALC285_FIXUP_SPEAKER2_TO_DAC1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + }, [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -7241,6 +7256,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x2293, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), @@ -7425,6 +7441,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC255_FIXUP_DELL_SPK_NOISE, .name = "dell-spk-noise"}, {.id = ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "alc225-dell1"}, {.id = ALC295_FIXUP_DISABLE_DAC3, .name = "alc295-disable-dac3"}, + {.id = ALC285_FIXUP_SPEAKER2_TO_DAC1, .name = "alc285-speaker2-to-dac1"}, {.id = ALC280_FIXUP_HP_HEADSET_MIC, .name = "alc280-hp-headset"}, {.id = ALC221_FIXUP_HP_FRONT_MIC, .name = "alc221-hp-mic"}, {.id = ALC298_FIXUP_SPK_VOLUME, .name = "alc298-spk-volume"}, From d56c69c5ef98acd80d1a06467a06cc3e2d1f28b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Nov 2019 12:59:33 -0800 Subject: [PATCH 059/191] tcp: fix data-race in tcp_recvmsg() [ Upstream commit a5a7daa52edb5197a3b696afee13ef174dc2e993 ] Reading tp->recvmsg_inq after socket lock is released raises a KCSAN warning [1] Replace has_tss & has_cmsg by cmsg_flags and make sure to not read tp->recvmsg_inq a second time. [1] BUG: KCSAN: data-race in tcp_chrono_stop / tcp_recvmsg write to 0xffff888126adef24 of 2 bytes by interrupt on cpu 0: tcp_chrono_set net/ipv4/tcp_output.c:2309 [inline] tcp_chrono_stop+0x14c/0x280 net/ipv4/tcp_output.c:2338 tcp_clean_rtx_queue net/ipv4/tcp_input.c:3165 [inline] tcp_ack+0x274f/0x3170 net/ipv4/tcp_input.c:3688 tcp_rcv_established+0x37e/0xf50 net/ipv4/tcp_input.c:5696 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1561 tcp_v4_rcv+0x19dc/0x1bb0 net/ipv4/tcp_ipv4.c:1942 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5214 napi_skb_finish net/core/dev.c:5677 [inline] napi_gro_receive+0x28f/0x330 net/core/dev.c:5710 read to 0xffff888126adef25 of 1 bytes by task 7275 on cpu 1: tcp_recvmsg+0x77b/0x1a30 net/ipv4/tcp.c:2187 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec net/socket.c:871 [inline] sock_recvmsg net/socket.c:889 [inline] sock_recvmsg+0x92/0xb0 net/socket.c:885 sock_read_iter+0x15f/0x1e0 net/socket.c:967 call_read_iter include/linux/fs.h:1889 [inline] new_sync_read+0x389/0x4f0 fs/read_write.c:414 __vfs_read+0xb1/0xc0 fs/read_write.c:427 vfs_read fs/read_write.c:461 [inline] vfs_read+0x143/0x2c0 fs/read_write.c:446 ksys_read+0xd5/0x1b0 fs/read_write.c:587 __do_sys_read fs/read_write.c:597 [inline] __se_sys_read fs/read_write.c:595 [inline] __x64_sys_read+0x4c/0x60 fs/read_write.c:595 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 7275 Comm: sshd Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: b75eba76d3d7 ("tcp: send in-queue bytes in cmsg upon read") Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d8876f0e9672..e537a4b6531b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1958,8 +1958,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, struct sk_buff *skb, *last; u32 urg_hole = 0; struct scm_timestamping_internal tss; - bool has_tss = false; - bool has_cmsg; + int cmsg_flags; if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); @@ -1974,7 +1973,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (sk->sk_state == TCP_LISTEN) goto out; - has_cmsg = tp->recvmsg_inq; + cmsg_flags = tp->recvmsg_inq ? 1 : 0; timeo = sock_rcvtimeo(sk, nonblock); /* Urgent data needs to be handled specially. */ @@ -2157,8 +2156,7 @@ skip_copy: if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, &tss); - has_tss = true; - has_cmsg = true; + cmsg_flags |= 2; } if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; @@ -2183,10 +2181,10 @@ found_fin_ok: release_sock(sk); - if (has_cmsg) { - if (has_tss) + if (cmsg_flags) { + if (cmsg_flags & 2) tcp_recv_timestamp(msg, sk, &tss); - if (tp->recvmsg_inq) { + if (cmsg_flags & 1) { inq = tcp_inq_hint(sk); put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); } From d7af03159b88e72324639d216871aaf12c203b9e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Sat, 30 Nov 2019 17:50:26 -0800 Subject: [PATCH 060/191] shmem: pin the file in shmem_fault() if mmap_sem is dropped [ Upstream commit 8897c1b1a1795cab23d5ac13e4e23bf0b5f4e0c6 ] syzbot found the following crash: BUG: KASAN: use-after-free in perf_trace_lock_acquire+0x401/0x530 include/trace/events/lock.h:13 Read of size 8 at addr ffff8880a5cf2c50 by task syz-executor.0/26173 CPU: 0 PID: 26173 Comm: syz-executor.0 Not tainted 5.3.0-rc6 #146 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: perf_trace_lock_acquire+0x401/0x530 include/trace/events/lock.h:13 trace_lock_acquire include/trace/events/lock.h:13 [inline] lock_acquire+0x2de/0x410 kernel/locking/lockdep.c:4411 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:338 [inline] shmem_fault+0x5ec/0x7b0 mm/shmem.c:2034 __do_fault+0x111/0x540 mm/memory.c:3083 do_shared_fault mm/memory.c:3535 [inline] do_fault mm/memory.c:3613 [inline] handle_pte_fault mm/memory.c:3840 [inline] __handle_mm_fault+0x2adf/0x3f20 mm/memory.c:3964 handle_mm_fault+0x1b5/0x6b0 mm/memory.c:4001 do_user_addr_fault arch/x86/mm/fault.c:1441 [inline] __do_page_fault+0x536/0xdd0 arch/x86/mm/fault.c:1506 do_page_fault+0x38/0x590 arch/x86/mm/fault.c:1530 page_fault+0x39/0x40 arch/x86/entry/entry_64.S:1202 It happens if the VMA got unmapped under us while we dropped mmap_sem and inode got freed. Pinning the file if we drop mmap_sem fixes the issue. Link: http://lkml.kernel.org/r/20190927083908.rhifa4mmaxefc24r@box Signed-off-by: Kirill A. Shutemov Reported-by: syzbot+03ee87124ee05af991bd@syzkaller.appspotmail.com Acked-by: Johannes Weiner Reviewed-by: Matthew Wilcox (Oracle) Cc: Hillf Danton Cc: Hugh Dickins Cc: Josef Bacik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/shmem.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 7a22e3e03d11..6074714fdbd4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2022,16 +2022,14 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) shmem_falloc->waitq && vmf->pgoff >= shmem_falloc->start && vmf->pgoff < shmem_falloc->next) { + struct file *fpin; wait_queue_head_t *shmem_falloc_waitq; DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function); ret = VM_FAULT_NOPAGE; - if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { - /* It's polite to up mmap_sem if we can */ - up_read(&vma->vm_mm->mmap_sem); + fpin = maybe_unlock_mmap_for_io(vmf, NULL); + if (fpin) ret = VM_FAULT_RETRY; - } shmem_falloc_waitq = shmem_falloc->waitq; prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, @@ -2049,6 +2047,9 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) spin_lock(&inode->i_lock); finish_wait(shmem_falloc_waitq, &shmem_fault_wait); spin_unlock(&inode->i_lock); + + if (fpin) + fput(fpin); return ret; } spin_unlock(&inode->i_lock); From d25bf5a341468935906d87e9aa7c95c2175651c9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 9 Oct 2019 13:48:09 +0200 Subject: [PATCH 061/191] taskstats: fix data-race [ Upstream commit 0b8d616fb5a8ffa307b1d3af37f55c15dae14f28 ] When assiging and testing taskstats in taskstats_exit() there's a race when setting up and reading sig->stats when a thread-group with more than one thread exits: write to 0xffff8881157bbe10 of 8 bytes by task 7951 on cpu 0: taskstats_tgid_alloc kernel/taskstats.c:567 [inline] taskstats_exit+0x6b7/0x717 kernel/taskstats.c:596 do_exit+0x2c2/0x18e0 kernel/exit.c:864 do_group_exit+0xb4/0x1c0 kernel/exit.c:983 get_signal+0x2a2/0x1320 kernel/signal.c:2734 do_signal+0x3b/0xc00 arch/x86/kernel/signal.c:815 exit_to_usermode_loop+0x250/0x2c0 arch/x86/entry/common.c:159 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath arch/x86/entry/common.c:274 [inline] do_syscall_64+0x2d7/0x2f0 arch/x86/entry/common.c:299 entry_SYSCALL_64_after_hwframe+0x44/0xa9 read to 0xffff8881157bbe10 of 8 bytes by task 7949 on cpu 1: taskstats_tgid_alloc kernel/taskstats.c:559 [inline] taskstats_exit+0xb2/0x717 kernel/taskstats.c:596 do_exit+0x2c2/0x18e0 kernel/exit.c:864 do_group_exit+0xb4/0x1c0 kernel/exit.c:983 __do_sys_exit_group kernel/exit.c:994 [inline] __se_sys_exit_group kernel/exit.c:992 [inline] __x64_sys_exit_group+0x2e/0x30 kernel/exit.c:992 do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by using smp_load_acquire() and smp_store_release(). Reported-by: syzbot+c5d03165a1bd1dead0c1@syzkaller.appspotmail.com Fixes: 34ec12349c8a ("taskstats: cleanup ->signal->stats allocation") Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Acked-by: Marco Elver Reviewed-by: Will Deacon Reviewed-by: Andrea Parri Reviewed-by: Dmitry Vyukov Link: https://lore.kernel.org/r/20191009114809.8643-1-christian.brauner@ubuntu.com Signed-off-by: Sasha Levin --- kernel/taskstats.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 13a0f2e6ebc2..e2ac0e37c4ae 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -554,25 +554,33 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; - struct taskstats *stats; + struct taskstats *stats_new, *stats; - if (sig->stats || thread_group_empty(tsk)) - goto ret; + /* Pairs with smp_store_release() below. */ + stats = smp_load_acquire(&sig->stats); + if (stats || thread_group_empty(tsk)) + return stats; /* No problem if kmem_cache_zalloc() fails */ - stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); + stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); spin_lock_irq(&tsk->sighand->siglock); - if (!sig->stats) { - sig->stats = stats; - stats = NULL; + stats = sig->stats; + if (!stats) { + /* + * Pairs with smp_store_release() above and order the + * kmem_cache_zalloc(). + */ + smp_store_release(&sig->stats, stats_new); + stats = stats_new; + stats_new = NULL; } spin_unlock_irq(&tsk->sighand->siglock); - if (stats) - kmem_cache_free(taskstats_cache, stats); -ret: - return sig->stats; + if (stats_new) + kmem_cache_free(taskstats_cache, stats_new); + + return stats; } /* Send pid data out on exit */ From d8acc0f2c2989850e0e2e9122b225c89382bc2ca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Dec 2019 16:12:24 +0100 Subject: [PATCH 062/191] ALSA: hda - Downgrade error message for single-cmd fallback [ Upstream commit 475feec0c41ad71cb7d02f0310e56256606b57c5 ] We made the error message for the CORB/RIRB communication clearer by upgrading to dev_WARN() so that user can notice better. But this struck us like a boomerang: now it caught syzbot and reported back as a fatal issue although it's not really any too serious bug that worth for stopping the whole system. OK, OK, let's be softy, downgrade it to the standard dev_err() again. Fixes: dd65f7e19c69 ("ALSA: hda - Show the fatal CORB/RIRB error more clearly") Reported-by: syzbot+b3028ac3933f5c466389@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20191216151224.30013-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_controller.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 6387c7e90918..76b507058cb4 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -884,7 +884,7 @@ static int azx_rirb_get_response(struct hdac_bus *bus, unsigned int addr, return -EAGAIN; /* give a chance to retry */ } - dev_WARN(chip->card->dev, + dev_err(chip->card->dev, "azx_get_response timeout, switching to single_cmd mode: last cmd=0x%08x\n", bus->last_cmd[addr]); chip->single_cmd = 1; From 2922cf593f854ae2fe65e8666bb0330fcbdbddde Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 18 Dec 2019 00:59:29 +0100 Subject: [PATCH 063/191] netfilter: nft_tproxy: Fix port selector on Big Endian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8cb4ec44de42b99b92399b4d1daf3dc430ed0186 ] On Big Endian architectures, u16 port value was extracted from the wrong parts of u32 sreg_port, just like commit 10596608c4d62 ("netfilter: nf_tables: fix mismatch in big-endian system") describes. Fixes: 4ed8eb6570a49 ("netfilter: nf_tables: Add native tproxy support") Signed-off-by: Phil Sutter Acked-by: Florian Westphal Acked-by: Máté Eckl Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_tproxy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index f92a82c73880..95980154ef02 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -50,7 +50,7 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr, taddr = nf_tproxy_laddr4(skb, taddr, iph->daddr); if (priv->sreg_port) - tport = regs->data[priv->sreg_port]; + tport = nft_reg_load16(®s->data[priv->sreg_port]); if (!tport) tport = hp->dest; @@ -117,7 +117,7 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr, taddr = *nf_tproxy_laddr6(skb, &taddr, &iph->daddr); if (priv->sreg_port) - tport = regs->data[priv->sreg_port]; + tport = nft_reg_load16(®s->data[priv->sreg_port]); if (!tport) tport = hp->dest; From 943cd69efac437d82a7aea0659fccbcc071730de Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 28 Dec 2019 07:05:48 +0800 Subject: [PATCH 064/191] block: add bio_truncate to fix guard_bio_eod [ Upstream commit 85a8ce62c2eabe28b9d76ca4eecf37922402df93 ] Some filesystem, such as vfat, may send bio which crosses device boundary, and the worse thing is that the IO request starting within device boundaries can contain more than one segment past EOD. Commit dce30ca9e3b6 ("fs: fix guard_bio_eod to check for real EOD errors") tries to fix this issue by returning -EIO for this situation. However, this way lets fs user code lose chance to handle -EIO, then sync_inodes_sb() may hang for ever. Also the current truncating on last segment is dangerous by updating the last bvec, given bvec table becomes not immutable any more, and fs bio users may not retrieve the truncated pages via bio_for_each_segment_all() in its .end_io callback. Fixes this issue by supporting multi-segment truncating. And the approach is simpler: - just update bio size since block layer can make correct bvec with the updated bio size. Then bvec table becomes really immutable. - zero all truncated segments for read bio Cc: Carlos Maiolino Cc: linux-fsdevel@vger.kernel.org Fixed-by: dce30ca9e3b6 ("fs: fix guard_bio_eod to check for real EOD errors") Reported-by: syzbot+2b9e54155c8c25d8d165@syzkaller.appspotmail.com Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bio.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/buffer.c | 25 +------------------------ include/linux/bio.h | 1 + 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/block/bio.c b/block/bio.c index 43df756b68c4..c822ceb7c4de 100644 --- a/block/bio.c +++ b/block/bio.c @@ -535,6 +535,45 @@ void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start) } EXPORT_SYMBOL(zero_fill_bio_iter); +void bio_truncate(struct bio *bio, unsigned new_size) +{ + struct bio_vec bv; + struct bvec_iter iter; + unsigned int done = 0; + bool truncated = false; + + if (new_size >= bio->bi_iter.bi_size) + return; + + if (bio_data_dir(bio) != READ) + goto exit; + + bio_for_each_segment(bv, bio, iter) { + if (done + bv.bv_len > new_size) { + unsigned offset; + + if (!truncated) + offset = new_size - done; + else + offset = 0; + zero_user(bv.bv_page, offset, bv.bv_len - offset); + truncated = true; + } + done += bv.bv_len; + } + + exit: + /* + * Don't touch bvec table here and make it really immutable, since + * fs bio user has to retrieve all pages via bio_for_each_segment_all + * in its .end_bio() callback. + * + * It is enough to truncate bio by updating .bi_size since we can make + * correct bvec with the updated .bi_size for drivers. + */ + bio->bi_iter.bi_size = new_size; +} + /** * bio_put - release a reference to a bio * @bio: bio to release reference to diff --git a/fs/buffer.c b/fs/buffer.c index 86a38b979323..7744488f7bde 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2994,8 +2994,6 @@ static void end_bio_bh_io_sync(struct bio *bio) void guard_bio_eod(int op, struct bio *bio) { sector_t maxsector; - struct bio_vec *bvec = bio_last_bvec_all(bio); - unsigned truncated_bytes; struct hd_struct *part; rcu_read_lock(); @@ -3021,28 +3019,7 @@ void guard_bio_eod(int op, struct bio *bio) if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) return; - /* Uhhuh. We've got a bio that straddles the device size! */ - truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9); - - /* - * The bio contains more than one segment which spans EOD, just return - * and let IO layer turn it into an EIO - */ - if (truncated_bytes > bvec->bv_len) - return; - - /* Truncate the bio.. */ - bio->bi_iter.bi_size -= truncated_bytes; - bvec->bv_len -= truncated_bytes; - - /* ..and clear the end of the buffer for reads */ - if (op == REQ_OP_READ) { - struct bio_vec bv; - - mp_bvec_last_segment(bvec, &bv); - zero_user(bv.bv_page, bv.bv_offset + bv.bv_len, - truncated_bytes); - } + bio_truncate(bio, maxsector << 9); } static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, diff --git a/include/linux/bio.h b/include/linux/bio.h index 3cdb84cdc488..853d92ceee64 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -470,6 +470,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); +void bio_truncate(struct bio *bio, unsigned new_size); static inline void zero_fill_bio(struct bio *bio) { From 173fa52f7fd25519deb286173d80ed742007b28e Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 30 Nov 2019 17:50:22 -0800 Subject: [PATCH 065/191] mm: drop mmap_sem before calling balance_dirty_pages() in write fault [ Upstream commit 89b15332af7c0312a41e50846819ca6613b58b4c ] One of our services is observing hanging ps/top/etc under heavy write IO, and the task states show this is an mmap_sem priority inversion: A write fault is holding the mmap_sem in read-mode and waiting for (heavily cgroup-limited) IO in balance_dirty_pages(): balance_dirty_pages+0x724/0x905 balance_dirty_pages_ratelimited+0x254/0x390 fault_dirty_shared_page.isra.96+0x4a/0x90 do_wp_page+0x33e/0x400 __handle_mm_fault+0x6f0/0xfa0 handle_mm_fault+0xe4/0x200 __do_page_fault+0x22b/0x4a0 page_fault+0x45/0x50 Somebody tries to change the address space, contending for the mmap_sem in write-mode: call_rwsem_down_write_failed_killable+0x13/0x20 do_mprotect_pkey+0xa8/0x330 SyS_mprotect+0xf/0x20 do_syscall_64+0x5b/0x100 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 The waiting writer locks out all subsequent readers to avoid lock starvation, and several threads can be seen hanging like this: call_rwsem_down_read_failed+0x14/0x30 proc_pid_cmdline_read+0xa0/0x480 __vfs_read+0x23/0x140 vfs_read+0x87/0x130 SyS_read+0x42/0x90 do_syscall_64+0x5b/0x100 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 To fix this, do what we do for cache read faults already: drop the mmap_sem before calling into anything IO bound, in this case the balance_dirty_pages() function, and return VM_FAULT_RETRY. Link: http://lkml.kernel.org/r/20190924194238.GA29030@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Kirill A. Shutemov Cc: Josef Bacik Cc: Hillf Danton Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/filemap.c | 21 --------------------- mm/internal.h | 21 +++++++++++++++++++++ mm/memory.c | 42 +++++++++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 34 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 85b7d087eb45..1f5731768222 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2329,27 +2329,6 @@ EXPORT_SYMBOL(generic_file_read_iter); #ifdef CONFIG_MMU #define MMAP_LOTSAMISS (100) -static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, - struct file *fpin) -{ - int flags = vmf->flags; - - if (fpin) - return fpin; - - /* - * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or - * anything, so we only pin the file and drop the mmap_sem if only - * FAULT_FLAG_ALLOW_RETRY is set. - */ - if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) == - FAULT_FLAG_ALLOW_RETRY) { - fpin = get_file(vmf->vma->vm_file); - up_read(&vmf->vma->vm_mm->mmap_sem); - } - return fpin; -} - /* * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem * @vmf - the vm_fault for this fault. diff --git a/mm/internal.h b/mm/internal.h index 0d5f720c75ab..7dd7fbb577a9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -362,6 +362,27 @@ vma_address(struct page *page, struct vm_area_struct *vma) return max(start, vma->vm_start); } +static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, + struct file *fpin) +{ + int flags = vmf->flags; + + if (fpin) + return fpin; + + /* + * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or + * anything, so we only pin the file and drop the mmap_sem if only + * FAULT_FLAG_ALLOW_RETRY is set. + */ + if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) == + FAULT_FLAG_ALLOW_RETRY) { + fpin = get_file(vmf->vma->vm_file); + up_read(&vmf->vma->vm_mm->mmap_sem); + } + return fpin; +} + #else /* !CONFIG_MMU */ static inline void clear_page_mlock(struct page *page) { } static inline void mlock_vma_page(struct page *page) { } diff --git a/mm/memory.c b/mm/memory.c index b1ca51a079f2..cb7c940cf800 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2227,10 +2227,11 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) * * The function expects the page to be locked and unlocks it. */ -static void fault_dirty_shared_page(struct vm_area_struct *vma, - struct page *page) +static vm_fault_t fault_dirty_shared_page(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct address_space *mapping; + struct page *page = vmf->page; bool dirtied; bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; @@ -2245,16 +2246,30 @@ static void fault_dirty_shared_page(struct vm_area_struct *vma, mapping = page_rmapping(page); unlock_page(page); - if ((dirtied || page_mkwrite) && mapping) { - /* - * Some device drivers do not set page.mapping - * but still dirty their pages - */ - balance_dirty_pages_ratelimited(mapping); - } - if (!page_mkwrite) file_update_time(vma->vm_file); + + /* + * Throttle page dirtying rate down to writeback speed. + * + * mapping may be NULL here because some device drivers do not + * set page.mapping but still dirty their pages + * + * Drop the mmap_sem before waiting on IO, if we can. The file + * is pinning the mapping, as per above. + */ + if ((dirtied || page_mkwrite) && mapping) { + struct file *fpin; + + fpin = maybe_unlock_mmap_for_io(vmf, NULL); + balance_dirty_pages_ratelimited(mapping); + if (fpin) { + fput(fpin); + return VM_FAULT_RETRY; + } + } + + return 0; } /* @@ -2497,6 +2512,7 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; + vm_fault_t ret = VM_FAULT_WRITE; get_page(vmf->page); @@ -2520,10 +2536,10 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf) wp_page_reuse(vmf); lock_page(vmf->page); } - fault_dirty_shared_page(vma, vmf->page); + ret |= fault_dirty_shared_page(vmf); put_page(vmf->page); - return VM_FAULT_WRITE; + return ret; } /* @@ -3567,7 +3583,7 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) return ret; } - fault_dirty_shared_page(vma, vmf->page); + ret |= fault_dirty_shared_page(vmf); return ret; } From 60a6c5d810bce7d55637a1f00c7e236dfce22fcd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 18 Dec 2019 20:26:06 +0100 Subject: [PATCH 066/191] ALSA: ice1724: Fix sleep-in-atomic in Infrasonic Quartet support code commit 0aec96f5897ac16ad9945f531b4bef9a2edd2ebd upstream. Jia-Ju Bai reported a possible sleep-in-atomic scenario in the ice1724 driver with Infrasonic Quartet support code: namely, ice->set_rate callback gets called inside ice->reg_lock spinlock, while the callback in quartet.c holds ice->gpio_mutex. This patch fixes the invalid call: it simply moves the calls of ice->set_rate and ice->set_mclk callbacks outside the spinlock. Reported-by: Jia-Ju Bai Cc: Link: https://lore.kernel.org/r/5d43135e-73b9-a46a-2155-9e91d0dcdf83@gmail.com Link: https://lore.kernel.org/r/20191218192606.12866-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ice1712/ice1724.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index e62c11816683..f360b33a1042 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -647,6 +647,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate, unsigned long flags; unsigned char mclk_change; unsigned int i, old_rate; + bool call_set_rate = false; if (rate > ice->hw_rates->list[ice->hw_rates->count - 1]) return -EINVAL; @@ -670,7 +671,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate, * setting clock rate for internal clock mode */ old_rate = ice->get_rate(ice); if (force || (old_rate != rate)) - ice->set_rate(ice, rate); + call_set_rate = true; else if (rate == ice->cur_rate) { spin_unlock_irqrestore(&ice->reg_lock, flags); return 0; @@ -678,12 +679,14 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate, } ice->cur_rate = rate; + spin_unlock_irqrestore(&ice->reg_lock, flags); + + if (call_set_rate) + ice->set_rate(ice, rate); /* setting master clock */ mclk_change = ice->set_mclk(ice, rate); - spin_unlock_irqrestore(&ice->reg_lock, flags); - if (mclk_change && ice->gpio.i2s_mclk_changed) ice->gpio.i2s_mclk_changed(ice); if (ice->gpio.set_pro_rate) From f47e52124334c8e25e2a82e2ac65609af078c4a4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 20 Dec 2019 10:31:34 +0100 Subject: [PATCH 067/191] ALSA: usb-audio: fix set_format altsetting sanity check commit 0141254b0a74b37aa7eb13d42a56adba84d51c73 upstream. Make sure to check the return value of usb_altnum_to_altsetting() to avoid dereferencing a NULL pointer when the requested alternate settings is missing. The format altsetting number may come from a quirk table and there does not seem to be any other validation of it (the corresponding index is checked however). Fixes: b099b9693d23 ("ALSA: usb-audio: Avoid superfluous usb_set_interface() calls") Cc: stable # 4.18 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20191220093134.1248-1-johan@kernel.org Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index ff5ab24f3bd1..624bcef9fb63 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -506,9 +506,9 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt) if (WARN_ON(!iface)) return -EINVAL; alts = usb_altnum_to_altsetting(iface, fmt->altsetting); - altsd = get_iface_desc(alts); - if (WARN_ON(altsd->bAlternateSetting != fmt->altsetting)) + if (WARN_ON(!alts)) return -EINVAL; + altsd = get_iface_desc(alts); if (fmt == subs->cur_audiofmt) return 0; From 56f395fb0a7ce9b7d55026fe6a5e94fa1b84b374 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 18 Dec 2019 21:26:50 +0800 Subject: [PATCH 068/191] ALSA: usb-audio: set the interface format after resume on Dell WD19 commit 92adc96f8eecd9522a907c197cc3d62e405539fe upstream. Recently we found the headset-mic on the Dell Dock WD19 doesn't work anymore after s3 (s2i or deep), this problem could be workarounded by closing (pcm_close) the app and then reopening (pcm_open) the app, so this bug is not easy to be detected by users. When problem happens, retire_capture_urb() could still be called periodically, but the size of captured data is always 0, it could be a firmware bug on the dock. Anyway I found after resuming, the snd_usb_pcm_prepare() will be called, and if we forcibly run set_format() to set the interface and its endpoint, the capture size will be normal again. This problem and workaound also apply to playback. To fix it in the kernel, add a quirk to let set_format() run forcibly once after resume. Signed-off-by: Hui Wang Cc: Link: https://lore.kernel.org/r/20191218132650.6303-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/card.h | 1 + sound/usb/pcm.c | 21 +++++++++++++++++++-- sound/usb/quirks-table.h | 3 ++- sound/usb/quirks.c | 11 +++++++++++ sound/usb/usbaudio.h | 3 ++- 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/sound/usb/card.h b/sound/usb/card.h index 2991b9986f66..395403a2d33f 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -145,6 +145,7 @@ struct snd_usb_substream { struct snd_usb_endpoint *sync_endpoint; unsigned long flags; bool need_setup_ep; /* (re)configure EP at prepare? */ + bool need_setup_fmt; /* (re)configure fmt after resume? */ unsigned int speed; /* USB_SPEED_XXX */ u64 formats; /* format bitmasks (all or'ed) */ diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 624bcef9fb63..a04c727dcd19 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -510,11 +510,11 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt) return -EINVAL; altsd = get_iface_desc(alts); - if (fmt == subs->cur_audiofmt) + if (fmt == subs->cur_audiofmt && !subs->need_setup_fmt) return 0; /* close the old interface */ - if (subs->interface >= 0 && subs->interface != fmt->iface) { + if (subs->interface >= 0 && (subs->interface != fmt->iface || subs->need_setup_fmt)) { if (!subs->stream->chip->keep_iface) { err = usb_set_interface(subs->dev, subs->interface, 0); if (err < 0) { @@ -528,6 +528,9 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt) subs->altset_idx = 0; } + if (subs->need_setup_fmt) + subs->need_setup_fmt = false; + /* set interface */ if (iface->cur_altsetting != alts) { err = snd_usb_select_mode_quirk(subs, fmt); @@ -1735,6 +1738,13 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea subs->data_endpoint->retire_data_urb = retire_playback_urb; subs->running = 0; return 0; + case SNDRV_PCM_TRIGGER_SUSPEND: + if (subs->stream->chip->setup_fmt_after_resume_quirk) { + stop_endpoints(subs, true); + subs->need_setup_fmt = true; + return 0; + } + break; } return -EINVAL; @@ -1767,6 +1777,13 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream subs->data_endpoint->retire_data_urb = retire_capture_urb; subs->running = 1; return 0; + case SNDRV_PCM_TRIGGER_SUSPEND: + if (subs->stream->chip->setup_fmt_after_resume_quirk) { + stop_endpoints(subs, true); + subs->need_setup_fmt = true; + return 0; + } + break; } return -EINVAL; diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 70c338f3ae24..d187aa6d50db 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3466,7 +3466,8 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .vendor_name = "Dell", .product_name = "WD19 Dock", .profile_name = "Dell-WD15-Dock", - .ifnum = QUIRK_NO_INTERFACE + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_SETUP_FMT_AFTER_RESUME } }, /* MOTU Microbook II */ diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 349e1e52996d..a81c2066499f 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -508,6 +508,16 @@ static int create_standard_mixer_quirk(struct snd_usb_audio *chip, return snd_usb_create_mixer(chip, quirk->ifnum, 0); } + +static int setup_fmt_after_resume_quirk(struct snd_usb_audio *chip, + struct usb_interface *iface, + struct usb_driver *driver, + const struct snd_usb_audio_quirk *quirk) +{ + chip->setup_fmt_after_resume_quirk = 1; + return 1; /* Continue with creating streams and mixer */ +} + /* * audio-interface quirks * @@ -546,6 +556,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip, [QUIRK_AUDIO_EDIROL_UAXX] = create_uaxx_quirk, [QUIRK_AUDIO_ALIGN_TRANSFER] = create_align_transfer_quirk, [QUIRK_AUDIO_STANDARD_MIXER] = create_standard_mixer_quirk, + [QUIRK_SETUP_FMT_AFTER_RESUME] = setup_fmt_after_resume_quirk, }; if (quirk->type < QUIRK_TYPE_COUNT) { diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index feb30f9c1716..e360680f45f3 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -33,7 +33,7 @@ struct snd_usb_audio { wait_queue_head_t shutdown_wait; unsigned int txfr_quirk:1; /* Subframe boundaries on transfers */ unsigned int tx_length_quirk:1; /* Put length specifier in transfers */ - + unsigned int setup_fmt_after_resume_quirk:1; /* setup the format to interface after resume */ int num_interfaces; int num_suspended_intf; int sample_rate_read_error; @@ -98,6 +98,7 @@ enum quirk_type { QUIRK_AUDIO_EDIROL_UAXX, QUIRK_AUDIO_ALIGN_TRANSFER, QUIRK_AUDIO_STANDARD_MIXER, + QUIRK_SETUP_FMT_AFTER_RESUME, QUIRK_TYPE_COUNT }; From 0844feca25b3a3d34d509ea936e6d52ecd0be32c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 23 Dec 2019 23:18:16 +0100 Subject: [PATCH 069/191] ALSA: hda - Apply sync-write workaround to old Intel platforms, too commit c366b3dbbab14b28d044b94eb9ce77c23482ea35 upstream. Klaus Ethgen reported occasional high CPU usages in his system that seem caused by HD-audio driver. The perf output revealed that it's in the unsolicited event handling in the workqueue, and the problem seems triggered by some communication stall between the controller and the codec at the runtime or system resume. Actually a similar phenomenon was seen in the past for other Intel platforms, and we already applied the workaround to enforce sync-write for CORB/RIRB verbs for Skylake and newer chipsets (commit 2756d9143aa5 "ALSA: hda - Fix intermittent CORB/RIRB stall on Intel chips"). Fortunately, the same workaround is applicable to the old chipset, and the experiment showed the positive effect. Based on the experiment result, this patch enables the sync-write workaround for all Intel chipsets. The only reason I hesitated to apply this workaround was about the possibly slightly higher CPU usage. But if the lack of sync causes a much severer problem even for quite old chip, we should think this would be necessary for all Intel chips. Reported-by: Klaus Ethgen Cc: Link: https://lore.kernel.org/r/20191223171833.GA17053@chua Link: https://lore.kernel.org/r/20191223221816.32572-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4e757aa9d322..f6cbb831b86a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -280,12 +280,13 @@ enum { /* quirks for old Intel chipsets */ #define AZX_DCAPS_INTEL_ICH \ - (AZX_DCAPS_OLD_SSYNC | AZX_DCAPS_NO_ALIGN_BUFSIZE) + (AZX_DCAPS_OLD_SSYNC | AZX_DCAPS_NO_ALIGN_BUFSIZE |\ + AZX_DCAPS_SYNC_WRITE) /* quirks for Intel PCH */ #define AZX_DCAPS_INTEL_PCH_BASE \ (AZX_DCAPS_NO_ALIGN_BUFSIZE | AZX_DCAPS_COUNT_LPIB_DELAY |\ - AZX_DCAPS_SNOOP_TYPE(SCH)) + AZX_DCAPS_SNOOP_TYPE(SCH) | AZX_DCAPS_SYNC_WRITE) /* PCH up to IVB; no runtime PM; bind with i915 gfx */ #define AZX_DCAPS_INTEL_PCH_NOPM \ @@ -300,13 +301,13 @@ enum { #define AZX_DCAPS_INTEL_HASWELL \ (/*AZX_DCAPS_ALIGN_BUFSIZE |*/ AZX_DCAPS_COUNT_LPIB_DELAY |\ AZX_DCAPS_PM_RUNTIME | AZX_DCAPS_I915_COMPONENT |\ - AZX_DCAPS_SNOOP_TYPE(SCH)) + AZX_DCAPS_SNOOP_TYPE(SCH) | AZX_DCAPS_SYNC_WRITE) /* Broadwell HDMI can't use position buffer reliably, force to use LPIB */ #define AZX_DCAPS_INTEL_BROADWELL \ (/*AZX_DCAPS_ALIGN_BUFSIZE |*/ AZX_DCAPS_POSFIX_LPIB |\ AZX_DCAPS_PM_RUNTIME | AZX_DCAPS_I915_COMPONENT |\ - AZX_DCAPS_SNOOP_TYPE(SCH)) + AZX_DCAPS_SNOOP_TYPE(SCH) | AZX_DCAPS_SYNC_WRITE) #define AZX_DCAPS_INTEL_BAYTRAIL \ (AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_I915_COMPONENT) From 52702a539c33111fe35cc84593317cb2a9eb5f2b Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 9 Dec 2019 15:56:15 +0800 Subject: [PATCH 070/191] ALSA: hda/realtek - Add headset Mic no shutup for ALC283 commit 66c5d718e5a6f80153b5e8d6ad8ba8e9c3320839 upstream. Chrome machine had humming noise from external speaker plugin at codec D3 state. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/2692449396954c6c968f5b75e2660358@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 62a471b5fc87..252888f426de 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -501,6 +501,7 @@ static void alc_shutup_pins(struct hda_codec *codec) struct alc_spec *spec = codec->spec; switch (codec->core.vendor_id) { + case 0x10ec0283: case 0x10ec0286: case 0x10ec0288: case 0x10ec0298: From 55ab031c0a7db9617ab437f95bda85386696d2cf Mon Sep 17 00:00:00 2001 From: Stefan Mavrodiev Date: Tue, 17 Dec 2019 14:46:32 +0200 Subject: [PATCH 071/191] drm/sun4i: hdmi: Remove duplicate cleanup calls commit 57177d214ee0816c4436c23d6c933ccb32c571f1 upstream. When the HDMI unbinds drm_connector_cleanup() and drm_encoder_cleanup() are called. This also happens when the connector and the encoder are destroyed. This double call triggers a NULL pointer exception. The patch fixes this by removing the cleanup calls in the unbind function. Cc: Fixes: 9c5681011a0c ("drm/sun4i: Add HDMI support") Signed-off-by: Stefan Mavrodiev Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20191217124632.20820-1-stefan@olimex.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index eb8071a4d6d0..9c3bdfd20337 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -683,8 +683,6 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master, struct sun4i_hdmi *hdmi = dev_get_drvdata(dev); cec_unregister_adapter(hdmi->cec_adap); - drm_connector_cleanup(&hdmi->connector); - drm_encoder_cleanup(&hdmi->encoder); i2c_del_adapter(hdmi->i2c); i2c_put_adapter(hdmi->ddc_i2c); clk_disable_unprepare(hdmi->mod_clk); From 7514bbe97516963d6071c37bd06eadd9ad463d1d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Dec 2019 09:35:01 -0500 Subject: [PATCH 072/191] drm/amdgpu/smu: add metrics table lock commit 073d5eef9e043c2b7e3ef12bc6c879b1d248e831 upstream. This table is used for lots of things, add it's own lock. Bug: https://gitlab.freedesktop.org/drm/amd/issues/900 Reviewed-by: Kevin Wang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 1 + drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 58c091ab67b2..a066e9297777 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -844,6 +844,7 @@ static int smu_sw_init(void *handle) smu->smu_baco.platform_support = false; mutex_init(&smu->sensor_lock); + mutex_init(&smu->metrics_lock); smu->watermarks_bitmap = 0; smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 23171a4d9a31..5ad9a7878f6b 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -345,6 +345,7 @@ struct smu_context const struct pptable_funcs *ppt_funcs; struct mutex mutex; struct mutex sensor_lock; + struct mutex metrics_lock; uint64_t pool_size; struct smu_table_context smu_table; From 881b399da3a160273905010a154e07bdeb003109 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Dec 2019 09:49:52 -0500 Subject: [PATCH 073/191] drm/amdgpu/smu: add metrics table lock for arcturus (v2) commit 1da87c9f67c98d552679974dbfc1f0f65b6a0a53 upstream. To protect access to the metrics table. v2: unlock on error Bug: https://gitlab.freedesktop.org/drm/amd/issues/900 Reviewed-by: Kevin Wang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index b68bf8dcfa78..08a717a34bd6 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -910,18 +910,21 @@ static int arcturus_get_metrics_table(struct smu_context *smu, struct smu_table_context *smu_table= &smu->smu_table; int ret = 0; + mutex_lock(&smu->metrics_lock); if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) { ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)smu_table->metrics_table, false); if (ret) { pr_info("Failed to export SMU metrics table!\n"); + mutex_unlock(&smu->metrics_lock); return ret; } smu_table->metrics_time = jiffies; } memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t)); + mutex_unlock(&smu->metrics_lock); return ret; } From 86164784cfbe7a23f91d2cf6e0dba82d61f38c32 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Dec 2019 09:50:42 -0500 Subject: [PATCH 074/191] drm/amdgpu/smu: add metrics table lock for navi (v2) commit e0e384c398d4638e54b6d2098f0ceaafdab870ee upstream. To protect access to the metrics table. v2: unlock on error Bug: https://gitlab.freedesktop.org/drm/amd/issues/900 Reviewed-by: Kevin Wang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 328e258a6895..7d913a06ebac 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -547,17 +547,20 @@ static int navi10_get_metrics_table(struct smu_context *smu, struct smu_table_context *smu_table= &smu->smu_table; int ret = 0; + mutex_lock(&smu->metrics_lock); if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) { ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)smu_table->metrics_table, false); if (ret) { pr_info("Failed to export SMU metrics table!\n"); + mutex_unlock(&smu->metrics_lock); return ret; } smu_table->metrics_time = jiffies; } memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t)); + mutex_unlock(&smu->metrics_lock); return ret; } From f72e33675f3fd2d5808efd101d737b1517154bf3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Dec 2019 09:51:40 -0500 Subject: [PATCH 075/191] drm/amdgpu/smu: add metrics table lock for vega20 (v2) commit 1c455101c6d10c99b310d6bcf613244c97854012 upstream. To protect access to the metrics table. v2: unlock on error Bug: https://gitlab.freedesktop.org/drm/amd/issues/900 Reviewed-by: Kevin Wang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 92c393f613d3..3c3f719971f7 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -1691,17 +1691,20 @@ static int vega20_get_metrics_table(struct smu_context *smu, struct smu_table_context *smu_table= &smu->smu_table; int ret = 0; + mutex_lock(&smu->metrics_lock); if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) { ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)smu_table->metrics_table, false); if (ret) { pr_info("Failed to export SMU metrics table!\n"); + mutex_unlock(&smu->metrics_lock); return ret; } smu_table->metrics_time = jiffies; } memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t)); + mutex_unlock(&smu->metrics_lock); return ret; } From adbe05523e3411fbc6d013fb044d6e4e45f19479 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 5 Dec 2019 10:23:18 -0800 Subject: [PATCH 076/191] MIPS: BPF: Disable MIPS32 eBPF JIT commit f8fffebdea752a25757b906f3dffecf1a59a6194 upstream. Commit 716850ab104d ("MIPS: eBPF: Initial eBPF support for MIPS32 architecture.") enabled our eBPF JIT for MIPS32 kernels, whereas it has previously only been availailable for MIPS64. It was my understanding at the time that the BPF test suite was passing & JITing a comparable number of tests to our cBPF JIT [1], but it turns out that was not the case. The eBPF JIT has a number of problems on MIPS32: - Most notably various code paths still result in emission of MIPS64 instructions which will cause reserved instruction exceptions & kernel panics when run on MIPS32 CPUs. - The eBPF JIT doesn't account for differences between the O32 ABI used by MIPS32 kernels versus the N64 ABI used by MIPS64 kernels. Notably arguments beyond the first 4 are passed on the stack in O32, and this is entirely unhandled when JITing a BPF_CALL instruction. Stack space must be reserved for arguments even if they all fit in registers, and the callee is free to assume that stack space has been reserved for its use - with the eBPF JIT this is not the case, so calling any function can result in clobbering values on the stack & unpredictable behaviour. Function arguments in eBPF are always 64-bit values which is also entirely unhandled - the JIT still uses a single (32-bit) register per argument. As a result all function arguments are always passed incorrectly when JITing a BPF_CALL instruction, leading to kernel crashes or strange behavior. - The JIT attempts to bail our on use of ALU64 instructions or 64-bit memory access instructions. The code doing this at the start of build_one_insn() incorrectly checks whether BPF_OP() equals BPF_DW, when it should really be checking BPF_SIZE() & only doing so when BPF_CLASS() is one of BPF_{LD,LDX,ST,STX}. This results in false positives that cause more bailouts than intended, and that in turns hides some of the problems described above. - The kernel's cBPF->eBPF translation makes heavy use of 64-bit eBPF instructions that the MIPS32 eBPF JIT bails out on, leading to most cBPF programs not being JITed at all. Until these problems are resolved, revert the enabling of the eBPF JIT on MIPS32 done by commit 716850ab104d ("MIPS: eBPF: Initial eBPF support for MIPS32 architecture."). Note that this does not undo the changes made to the eBPF JIT by that commit, since they are a useful starting point to providing MIPS32 support - they're just not nearly complete. [1] https://lore.kernel.org/linux-mips/MWHPR2201MB13583388481F01A422CE7D66D4410@MWHPR2201MB1358.namprd22.prod.outlook.com/ Signed-off-by: Paul Burton Fixes: 716850ab104d ("MIPS: eBPF: Initial eBPF support for MIPS32 architecture.") Cc: Daniel Borkmann Cc: Hassan Naveed Cc: Tony Ambardar Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Cc: # v5.2+ Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index a0bd9bdb5f83..7fba770f1d48 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -46,7 +46,7 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS - select HAVE_EBPF_JIT if (!CPU_MICROMIPS) + select HAVE_EBPF_JIT if (64BIT && !CPU_MICROMIPS) select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT From 2e0bee36699da7ef7ee7f0e6facef305c2de281b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 6 Dec 2019 11:07:41 +0300 Subject: [PATCH 077/191] MIPS: BPF: eBPF JIT: check for MIPS ISA compliance in Kconfig commit f596cf0d8062cb5d0a4513a8b3afca318c13be10 upstream. It is completely wrong to check for compile-time MIPS ISA revision in the body of bpf_int_jit_compile() as it may lead to get MIPS JIT fully omitted by the CC while the rest system will think that the JIT is actually present and works [1]. We can check if the selected CPU really supports MIPS eBPF JIT at configure time and avoid such situations when kernel can be built without both JIT and interpreter, but with CONFIG_BPF_SYSCALL=y. [1] https://lore.kernel.org/linux-mips/09d713a59665d745e21d021deeaebe0a@dlink.ru/ Fixes: 716850ab104d ("MIPS: eBPF: Initial eBPF support for MIPS32 architecture.") Cc: # v5.2+ Signed-off-by: Alexander Lobakin Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: Hassan Naveed Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: Andrii Nakryiko Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Cc: bpf@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 2 +- arch/mips/net/ebpf_jit.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 7fba770f1d48..e5c2d47608fe 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -46,7 +46,7 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS - select HAVE_EBPF_JIT if (64BIT && !CPU_MICROMIPS) + select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 46b76751f3a5..a2405d5f7d1e 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1803,7 +1803,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int image_size; u8 *image_ptr; - if (!prog->jit_requested || MIPS_ISA_REV < 2) + if (!prog->jit_requested) return prog; tmp = bpf_jit_blind_constants(prog); From 5b004a238460113276319536534928c58d95e599 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 1 Jan 2020 20:50:38 -0800 Subject: [PATCH 078/191] MIPS: Avoid VDSO ABI breakage due to global register variable commit bbcc5672b0063b0e9d65dc8787a4f09c3b5bb5cc upstream. Declaring __current_thread_info as a global register variable has the effect of preventing GCC from saving & restoring its value in cases where the ABI would typically do so. To quote GCC documentation: > If the register is a call-saved register, call ABI is affected: the > register will not be restored in function epilogue sequences after the > variable has been assigned. Therefore, functions cannot safely return > to callers that assume standard ABI. When our position independent VDSO is built for the n32 or n64 ABIs all functions it exposes should be preserving the value of $gp/$28 for their caller, but in the presence of the __current_thread_info global register variable GCC stops doing so & simply clobbers $gp/$28 when calculating the address of the GOT. In cases where the VDSO returns success this problem will typically be masked by the caller in libc returning & restoring $gp/$28 itself, but that is by no means guaranteed. In cases where the VDSO returns an error libc will typically contain a fallback path which will now fail (typically with a bad memory access) if it attempts anything which relies upon the value of $gp/$28 - eg. accessing anything via the GOT. One fix for this would be to move the declaration of __current_thread_info inside the current_thread_info() function, demoting it from global register variable to local register variable & avoiding inadvertently creating a non-standard calling ABI for the VDSO. Unfortunately this causes issues for clang, which doesn't support local register variables as pointed out by commit fe92da0f355e ("MIPS: Changed current_thread_info() to an equivalent supported by both clang and GCC") which introduced the global register variable before we had a VDSO to worry about. Instead, fix this by continuing to use the global register variable for the kernel proper but declare __current_thread_info as a simple extern variable when building the VDSO. It should never be referenced, and will cause a link error if it is. This resolves the calling convention issue for the VDSO without having any impact upon the build of the kernel itself for either clang or gcc. Signed-off-by: Paul Burton Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Reported-by: Jason A. Donenfeld Reviewed-by: Jason A. Donenfeld Tested-by: Jason A. Donenfeld Cc: Arnd Bergmann Cc: Christian Brauner Cc: Vincenzo Frascino Cc: # v4.4+ Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/thread_info.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 4993db40482c..ee26f9a4575d 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -49,8 +49,26 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -/* How to get the thread information struct from C. */ +/* + * A pointer to the struct thread_info for the currently executing thread is + * held in register $28/$gp. + * + * We declare __current_thread_info as a global register variable rather than a + * local register variable within current_thread_info() because clang doesn't + * support explicit local register variables. + * + * When building the VDSO we take care not to declare the global register + * variable because this causes GCC to not preserve the value of $28/$gp in + * functions that change its value (which is common in the PIC VDSO when + * accessing the GOT). Since the VDSO shouldn't be accessing + * __current_thread_info anyway we declare it extern in order to cause a link + * failure if it's referenced. + */ +#ifdef __VDSO__ +extern struct thread_info *__current_thread_info; +#else register struct thread_info *__current_thread_info __asm__("$28"); +#endif static inline struct thread_info *current_thread_info(void) { From f868e597a3d9e66c61cb6a23f64dc9ec8361113c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 7 Dec 2019 23:43:23 +0100 Subject: [PATCH 079/191] media: pulse8-cec: fix lost cec_transmit_attempt_done() call commit e5a52a1d15c79bb48a430fb263852263ec1d3f11 upstream. The periodic PING command could interfere with the result of a CEC transmit, causing a lost cec_transmit_attempt_done() call. Signed-off-by: Hans Verkuil Cc: # for v4.10 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/pulse8-cec/pulse8-cec.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/media/usb/pulse8-cec/pulse8-cec.c b/drivers/media/usb/pulse8-cec/pulse8-cec.c index ac88ade94cda..59609556d969 100644 --- a/drivers/media/usb/pulse8-cec/pulse8-cec.c +++ b/drivers/media/usb/pulse8-cec/pulse8-cec.c @@ -116,6 +116,7 @@ struct pulse8 { unsigned int vers; struct completion cmd_done; struct work_struct work; + u8 work_result; struct delayed_work ping_eeprom_work; struct cec_msg rx_msg; u8 data[DATA_SIZE]; @@ -137,8 +138,10 @@ static void pulse8_irq_work_handler(struct work_struct *work) { struct pulse8 *pulse8 = container_of(work, struct pulse8, work); + u8 result = pulse8->work_result; - switch (pulse8->data[0] & 0x3f) { + pulse8->work_result = 0; + switch (result & 0x3f) { case MSGCODE_FRAME_DATA: cec_received_msg(pulse8->adap, &pulse8->rx_msg); break; @@ -172,12 +175,12 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data, pulse8->escape = false; } else if (data == MSGEND) { struct cec_msg *msg = &pulse8->rx_msg; + u8 msgcode = pulse8->buf[0]; if (debug) dev_info(pulse8->dev, "received: %*ph\n", pulse8->idx, pulse8->buf); - pulse8->data[0] = pulse8->buf[0]; - switch (pulse8->buf[0] & 0x3f) { + switch (msgcode & 0x3f) { case MSGCODE_FRAME_START: msg->len = 1; msg->msg[0] = pulse8->buf[1]; @@ -186,14 +189,20 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data, if (msg->len == CEC_MAX_MSG_SIZE) break; msg->msg[msg->len++] = pulse8->buf[1]; - if (pulse8->buf[0] & MSGCODE_FRAME_EOM) + if (msgcode & MSGCODE_FRAME_EOM) { + WARN_ON(pulse8->work_result); + pulse8->work_result = msgcode; schedule_work(&pulse8->work); + break; + } break; case MSGCODE_TRANSMIT_SUCCEEDED: case MSGCODE_TRANSMIT_FAILED_LINE: case MSGCODE_TRANSMIT_FAILED_ACK: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE: + WARN_ON(pulse8->work_result); + pulse8->work_result = msgcode; schedule_work(&pulse8->work); break; case MSGCODE_HIGH_ERROR: From e572db9a4dfe78bfa4a972832609de40907f21cb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 4 Dec 2019 08:52:08 +0100 Subject: [PATCH 080/191] media: cec: CEC 2.0-only bcast messages were ignored commit cec935ce69fc386f13959578deb40963ebbb85c3 upstream. Some messages are allowed to be a broadcast message in CEC 2.0 only, and should be ignored by CEC 1.4 devices. Unfortunately, the check was wrong, causing such messages to be marked as invalid under CEC 2.0. Signed-off-by: Hans Verkuil Cc: # for v4.10 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/cec/cec-adap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 5ef7daeb8cbd..7b46d91f4768 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1083,11 +1083,11 @@ void cec_received_msg_ts(struct cec_adapter *adap, valid_la = false; else if (!cec_msg_is_broadcast(msg) && !(dir_fl & DIRECTED)) valid_la = false; - else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST1_4)) + else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST)) valid_la = false; else if (cec_msg_is_broadcast(msg) && - adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0 && - !(dir_fl & BCAST2_0)) + adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0 && + !(dir_fl & BCAST1_4)) valid_la = false; } if (valid_la && min_len) { From 9a7130220ab48b251987c3ec5843116285e67b22 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 7 Dec 2019 23:48:09 +0100 Subject: [PATCH 081/191] media: cec: avoid decrementing transmit_queue_sz if it is 0 commit 95c29d46ab2a517e4c26d0a07300edca6768db17 upstream. WARN if transmit_queue_sz is 0 but do not decrement it. The CEC adapter will become unresponsive if it goes below 0 since then it thinks there are 4 billion messages in the queue. Obviously this should not happen, but a driver bug could cause this. Signed-off-by: Hans Verkuil Cc: # for v4.12 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/cec/cec-adap.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 7b46d91f4768..fb889720109f 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -378,7 +378,8 @@ static void cec_data_cancel(struct cec_data *data, u8 tx_status) } else { list_del_init(&data->list); if (!(data->msg.tx_status & CEC_TX_STATUS_OK)) - data->adap->transmit_queue_sz--; + if (!WARN_ON(!data->adap->transmit_queue_sz)) + data->adap->transmit_queue_sz--; } if (data->msg.tx_status & CEC_TX_STATUS_OK) { @@ -430,6 +431,14 @@ static void cec_flush(struct cec_adapter *adap) * need to do anything special in that case. */ } + /* + * If something went wrong and this counter isn't what it should + * be, then this will reset it back to 0. Warn if it is not 0, + * since it indicates a bug, either in this framework or in a + * CEC driver. + */ + if (WARN_ON(adap->transmit_queue_sz)) + adap->transmit_queue_sz = 0; } /* @@ -520,7 +529,8 @@ int cec_thread_func(void *_adap) data = list_first_entry(&adap->transmit_queue, struct cec_data, list); list_del_init(&data->list); - adap->transmit_queue_sz--; + if (!WARN_ON(!data->adap->transmit_queue_sz)) + adap->transmit_queue_sz--; /* Make this the current transmitting message */ adap->transmitting = data; From 80d9e63714a43a162ce79abe33dd18a2111f3c4e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 11 Dec 2019 12:47:57 +0100 Subject: [PATCH 082/191] media: cec: check 'transmit_in_progress', not 'transmitting' commit ac479b51f3f4aaa852b5d3f00ecfb9290230cf64 upstream. Currently wait_event_interruptible_timeout is called in cec_thread_func() when adap->transmitting is set. But if the adapter is unconfigured while transmitting, then adap->transmitting is set to NULL. But the hardware is still actually transmitting the message, and that's indicated by adap->transmit_in_progress and we should wait until that is finished or times out before transmitting new messages. As the original commit says: adap->transmitting is the userspace view, adap->transmit_in_progress reflects the hardware state. However, if adap->transmitting is NULL and adap->transmit_in_progress is true, then wait_event_interruptible is called (no timeout), which can get stuck indefinitely if the CEC driver is flaky and never marks the transmit-in-progress as 'done'. So test against transmit_in_progress when deciding whether to use the timeout variant or not, instead of testing against adap->transmitting. Signed-off-by: Hans Verkuil Fixes: 32804fcb612b ("media: cec: keep track of outstanding transmits") Cc: # for v4.19 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/cec/cec-adap.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index fb889720109f..b14c09cd9593 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -463,7 +463,7 @@ int cec_thread_func(void *_adap) bool timeout = false; u8 attempts; - if (adap->transmitting) { + if (adap->transmit_in_progress) { int err; /* @@ -498,7 +498,7 @@ int cec_thread_func(void *_adap) goto unlock; } - if (adap->transmitting && timeout) { + if (adap->transmit_in_progress && timeout) { /* * If we timeout, then log that. Normally this does * not happen and it is an indication of a faulty CEC @@ -507,14 +507,18 @@ int cec_thread_func(void *_adap) * so much traffic on the bus that the adapter was * unable to transmit for CEC_XFER_TIMEOUT_MS (2.1s). */ - pr_warn("cec-%s: message %*ph timed out\n", adap->name, - adap->transmitting->msg.len, - adap->transmitting->msg.msg); + if (adap->transmitting) { + pr_warn("cec-%s: message %*ph timed out\n", adap->name, + adap->transmitting->msg.len, + adap->transmitting->msg.msg); + /* Just give up on this. */ + cec_data_cancel(adap->transmitting, + CEC_TX_STATUS_TIMEOUT); + } else { + pr_warn("cec-%s: transmit timed out\n", adap->name); + } adap->transmit_in_progress = false; adap->tx_timeouts++; - /* Just give up on this. */ - cec_data_cancel(adap->transmitting, - CEC_TX_STATUS_TIMEOUT); goto unlock; } From e84c5b76173b3b49b054aa340066d5001ab20b70 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Sat, 4 Jan 2020 12:59:33 -0800 Subject: [PATCH 083/191] mm/memory_hotplug: shrink zones when offlining memory commit feee6b2989165631b17ac6d4ccdbf6759254e85a upstream. We currently try to shrink a single zone when removing memory. We use the zone of the first page of the memory we are removing. If that memmap was never initialized (e.g., memory was never onlined), we will read garbage and can trigger kernel BUGs (due to a stale pointer): BUG: unable to handle page fault for address: 000000000000353d #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 1 PID: 7 Comm: kworker/u8:0 Not tainted 5.3.0-rc5-next-20190820+ #317 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4 Workqueue: kacpi_hotplug acpi_hotplug_work_fn RIP: 0010:clear_zone_contiguous+0x5/0x10 Code: 48 89 c6 48 89 c3 e8 2a fe ff ff 48 85 c0 75 cf 5b 5d c3 c6 85 fd 05 00 00 01 5b 5d c3 0f 1f 840 RSP: 0018:ffffad2400043c98 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000200000000 RCX: 0000000000000000 RDX: 0000000000200000 RSI: 0000000000140000 RDI: 0000000000002f40 RBP: 0000000140000000 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000 R13: 0000000000140000 R14: 0000000000002f40 R15: ffff9e3e7aff3680 FS: 0000000000000000(0000) GS:ffff9e3e7bb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000353d CR3: 0000000058610000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __remove_pages+0x4b/0x640 arch_remove_memory+0x63/0x8d try_remove_memory+0xdb/0x130 __remove_memory+0xa/0x11 acpi_memory_device_remove+0x70/0x100 acpi_bus_trim+0x55/0x90 acpi_device_hotplug+0x227/0x3a0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x221/0x550 worker_thread+0x50/0x3b0 kthread+0x105/0x140 ret_from_fork+0x3a/0x50 Modules linked in: CR2: 000000000000353d Instead, shrink the zones when offlining memory or when onlining failed. Introduce and use remove_pfn_range_from_zone(() for that. We now properly shrink the zones, even if we have DIMMs whereby - Some memory blocks fall into no zone (never onlined) - Some memory blocks fall into multiple zones (offlined+re-onlined) - Multiple memory blocks that fall into different zones Drop the zone parameter (with a potential dubious value) from __remove_pages() and __remove_section(). Link: http://lkml.kernel.org/r/20191006085646.5768-6-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Aneesh Kumar K.V" Cc: Pavel Tatashin Cc: Greg Kroah-Hartman Cc: Dan Williams Cc: Logan Gunthorpe Cc: [5.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 4 +--- arch/ia64/mm/init.c | 4 +--- arch/powerpc/mm/mem.c | 3 +-- arch/s390/mm/init.c | 4 +--- arch/sh/mm/init.c | 4 +--- arch/x86/mm/init_32.c | 4 +--- arch/x86/mm/init_64.c | 4 +--- include/linux/memory_hotplug.h | 7 +++++-- mm/memory_hotplug.c | 31 ++++++++++++++++--------------- mm/memremap.c | 2 +- 10 files changed, 29 insertions(+), 38 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 60c929f3683b..d10247fab0fd 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1069,7 +1069,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; /* * FIXME: Cleanup page tables (also in arch_add_memory() in case @@ -1078,7 +1077,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be * unlocked yet. */ - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index bf9df2625bc8..a6dd80a2c939 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index be941d382c8d..97e5922cb52e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -130,10 +130,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); int ret; - __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index a124f19f7b3c..c1d96e588152 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -291,10 +291,8 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); vmem_remove_mapping(start, size); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index dfdbaa50946e..d1b1ff2be17a 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 930edeb41ec3..0a74407ef92e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a6b5c653727b..b8541d77452c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); - struct zone *zone = page_zone(page); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index f46ea71b4ffd..451efd4499cc 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -125,8 +125,8 @@ static inline bool movable_node_is_enabled(void) extern void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap); -extern void __remove_pages(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap); +extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap); /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, @@ -345,6 +345,9 @@ extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); +extern void remove_pfn_range_from_zone(struct zone *zone, + unsigned long start_pfn, + unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern int sparse_add_section(int nid, unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f307bd82d750..fab540685279 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -465,8 +465,9 @@ static void update_pgdat_span(struct pglist_data *pgdat) pgdat->node_spanned_pages = node_end_pfn - node_start_pfn; } -static void __remove_zone(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages) +void __ref remove_pfn_range_from_zone(struct zone *zone, + unsigned long start_pfn, + unsigned long nr_pages) { struct pglist_data *pgdat = zone->zone_pgdat; unsigned long flags; @@ -481,28 +482,30 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn, return; #endif + clear_zone_contiguous(zone); + pgdat_resize_lock(zone->zone_pgdat, &flags); shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); update_pgdat_span(pgdat); pgdat_resize_unlock(zone->zone_pgdat, &flags); + + set_zone_contiguous(zone); } -static void __remove_section(struct zone *zone, unsigned long pfn, - unsigned long nr_pages, unsigned long map_offset, - struct vmem_altmap *altmap) +static void __remove_section(unsigned long pfn, unsigned long nr_pages, + unsigned long map_offset, + struct vmem_altmap *altmap) { struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn)); if (WARN_ON_ONCE(!valid_section(ms))) return; - __remove_zone(zone, pfn, nr_pages); sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap); } /** - * __remove_pages() - remove sections of pages from a zone - * @zone: zone from which pages need to be removed + * __remove_pages() - remove sections of pages * @pfn: starting pageframe (must be aligned to start of a section) * @nr_pages: number of pages to remove (must be multiple of section size) * @altmap: alternative device page map or %NULL if default memmap is used @@ -512,16 +515,14 @@ static void __remove_section(struct zone *zone, unsigned long pfn, * sure that pages are marked reserved and zones are adjust properly by * calling offline_pages(). */ -void __remove_pages(struct zone *zone, unsigned long pfn, - unsigned long nr_pages, struct vmem_altmap *altmap) +void __remove_pages(unsigned long pfn, unsigned long nr_pages, + struct vmem_altmap *altmap) { unsigned long map_offset = 0; unsigned long nr, start_sec, end_sec; map_offset = vmem_altmap_offset(altmap); - clear_zone_contiguous(zone); - if (check_pfn_span(pfn, nr_pages, "remove")) return; @@ -533,13 +534,11 @@ void __remove_pages(struct zone *zone, unsigned long pfn, cond_resched(); pfns = min(nr_pages, PAGES_PER_SECTION - (pfn & ~PAGE_SECTION_MASK)); - __remove_section(zone, pfn, pfns, map_offset, altmap); + __remove_section(pfn, pfns, map_offset, altmap); pfn += pfns; nr_pages -= pfns; map_offset = 0; } - - set_zone_contiguous(zone); } int set_online_page_callback(online_page_callback_t callback) @@ -867,6 +866,7 @@ failed_addition: (unsigned long long) pfn << PAGE_SHIFT, (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1); memory_notify(MEM_CANCEL_ONLINE, &arg); + remove_pfn_range_from_zone(zone, pfn, nr_pages); mem_hotplug_done(); return ret; } @@ -1602,6 +1602,7 @@ static int __ref __offline_pages(unsigned long start_pfn, writeback_set_ratelimit(); memory_notify(MEM_OFFLINE, &arg); + remove_pfn_range_from_zone(zone, start_pfn, nr_pages); mem_hotplug_done(); return 0; diff --git a/mm/memremap.c b/mm/memremap.c index 03ccbdfeb697..c51c6bd2fe34 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -120,7 +120,7 @@ void memunmap_pages(struct dev_pagemap *pgmap) mem_hotplug_begin(); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { - __remove_pages(page_zone(first_page), PHYS_PFN(res->start), + __remove_pages(PHYS_PFN(res->start), PHYS_PFN(resource_size(res)), NULL); } else { arch_remove_memory(nid, res->start, resource_size(res), From cdc57bac994b100b3fc501f224cb1269812e8f31 Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Sat, 4 Jan 2020 12:59:36 -0800 Subject: [PATCH 084/191] mm/zsmalloc.c: fix the migrated zspage statistics. commit ac8f05da5174c560de122c499ce5dfb5d0dfbee5 upstream. When zspage is migrated to the other zone, the zone page state should be updated as well, otherwise the NR_ZSPAGE for each zone shows wrong counts including proc/zoneinfo in practice. Link: http://lkml.kernel.org/r/1575434841-48009-1-git-send-email-chanho.min@lge.com Fixes: 91537fee0013 ("mm: add NR_ZSMALLOC to vmstat") Signed-off-by: Chanho Min Signed-off-by: Jinsuk Choi Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 2b2b9aae8a3c..22d17ecfe7df 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2069,6 +2069,11 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, zs_pool_dec_isolated(pool); } + if (page_zone(newpage) != page_zone(page)) { + dec_zone_page_state(page, NR_ZSPAGES); + inc_zone_page_state(newpage, NR_ZSPAGES); + } + reset_page(page); put_page(page); page = newpage; From a54454d5d6fd70dd7401994ebd859f7e4c961d6d Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Sat, 4 Jan 2020 12:59:43 -0800 Subject: [PATCH 085/191] memcg: account security cred as well to kmemcg commit 84029fd04c201a4c7e0b07ba262664900f47c6f5 upstream. The cred_jar kmem_cache is already memcg accounted in the current kernel but cred->security is not. Account cred->security to kmemcg. Recently we saw high root slab usage on our production and on further inspection, we found a buggy application leaking processes. Though that buggy application was contained within its memcg but we observe much more system memory overhead, couple of GiBs, during that period. This overhead can adversely impact the isolation on the system. One source of high overhead we found was cred->security objects, which have a lifetime of at least the life of the process which allocated them. Link: http://lkml.kernel.org/r/20191205223721.40034-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Chris Down Reviewed-by: Roman Gushchin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/cred.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/cred.c b/kernel/cred.c index c0a4c12d38b2..9ed51b70ed80 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -223,7 +223,7 @@ struct cred *cred_alloc_blank(void) new->magic = CRED_MAGIC; #endif - if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) + if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; return new; @@ -282,7 +282,7 @@ struct cred *prepare_creds(void) new->security = NULL; #endif - if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; validate_creds(new); return new; @@ -715,7 +715,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) #ifdef CONFIG_SECURITY new->security = NULL; #endif - if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; put_cred(old); From 366b5dce6dcd8abe01953d58953d7bd4cea5d153 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Sat, 4 Jan 2020 12:59:46 -0800 Subject: [PATCH 086/191] mm: move_pages: return valid node id in status if the page is already on the target node commit e0153fc2c7606f101392b682e720a7a456d6c766 upstream. Felix Abecassis reports move_pages() would return random status if the pages are already on the target node by the below test program: int main(void) { const long node_id = 1; const long page_size = sysconf(_SC_PAGESIZE); const int64_t num_pages = 8; unsigned long nodemask = 1 << node_id; long ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)); if (ret < 0) return (EXIT_FAILURE); void **pages = malloc(sizeof(void*) * num_pages); for (int i = 0; i < num_pages; ++i) { pages[i] = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_POPULATE | MAP_ANONYMOUS, -1, 0); if (pages[i] == MAP_FAILED) return (EXIT_FAILURE); } ret = set_mempolicy(MPOL_DEFAULT, NULL, 0); if (ret < 0) return (EXIT_FAILURE); int *nodes = malloc(sizeof(int) * num_pages); int *status = malloc(sizeof(int) * num_pages); for (int i = 0; i < num_pages; ++i) { nodes[i] = node_id; status[i] = 0xd0; /* simulate garbage values */ } ret = move_pages(0, num_pages, pages, nodes, status, MPOL_MF_MOVE); printf("move_pages: %ld\n", ret); for (int i = 0; i < num_pages; ++i) printf("status[%d] = %d\n", i, status[i]); } Then running the program would return nonsense status values: $ ./move_pages_bug move_pages: 0 status[0] = 208 status[1] = 208 status[2] = 208 status[3] = 208 status[4] = 208 status[5] = 208 status[6] = 208 status[7] = 208 This is because the status is not set if the page is already on the target node, but move_pages() should return valid status as long as it succeeds. The valid status may be errno or node id. We can't simply initialize status array to zero since the pages may be not on node 0. Fix it by updating status with node id which the page is already on. Link: http://lkml.kernel.org/r/1575584353-125392-1-git-send-email-yang.shi@linux.alibaba.com Fixes: a49bd4d71637 ("mm, numa: rework do_pages_move") Signed-off-by: Yang Shi Reported-by: Felix Abecassis Tested-by: Felix Abecassis Suggested-by: Michal Hocko Reviewed-by: John Hubbard Acked-by: Christoph Lameter Acked-by: Michal Hocko Reviewed-by: Vlastimil Babka Cc: Mel Gorman Cc: [4.17+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 4fe45d1428c8..45d3303e0022 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1516,9 +1516,11 @@ static int do_move_pages_to_node(struct mm_struct *mm, /* * Resolves the given address to a struct page, isolates it from the LRU and * puts it to the given pagelist. - * Returns -errno if the page cannot be found/isolated or 0 when it has been - * queued or the page doesn't need to be migrated because it is already on - * the target node + * Returns: + * errno - if the page cannot be found/isolated + * 0 - when it doesn't have to be migrated because it is already on the + * target node + * 1 - when it has been queued */ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, int node, struct list_head *pagelist, bool migrate_all) @@ -1557,7 +1559,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, if (PageHuge(page)) { if (PageHead(page)) { isolate_huge_page(page, pagelist); - err = 0; + err = 1; } } else { struct page *head; @@ -1567,7 +1569,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, if (err) goto out_putpage; - err = 0; + err = 1; list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_cache(head), @@ -1644,8 +1646,17 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, */ err = add_page_for_migration(mm, addr, current_node, &pagelist, flags & MPOL_MF_MOVE_ALL); - if (!err) + + if (!err) { + /* The page is already on the target node */ + err = store_status(status, i, current_node, 1); + if (err) + goto out_flush; continue; + } else if (err > 0) { + /* The page is successfully queued for migration */ + continue; + } err = store_status(status, i, err, 1); if (err) From 7123ee7b3feeaf414bfe84fa388e4168d62e07fe Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 4 Jan 2020 13:00:09 -0800 Subject: [PATCH 087/191] mm/oom: fix pgtables units mismatch in Killed process message commit 941f762bcb276259a78e7931674668874ccbda59 upstream. pr_err() expects kB, but mm_pgtables_bytes() returns the number of bytes. As everything else is printed in kB, I chose to fix the value rather than the string. Before: [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name ... [ 1878] 1000 1878 217253 151144 1269760 0 0 python ... Out of memory: Killed process 1878 (python) total-vm:869012kB, anon-rss:604572kB, file-rss:4kB, shmem-rss:0kB, UID:1000 pgtables:1269760kB oom_score_adj:0 After: [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name ... [ 1436] 1000 1436 217253 151890 1294336 0 0 python ... Out of memory: Killed process 1436 (python) total-vm:869012kB, anon-rss:607516kB, file-rss:44kB, shmem-rss:0kB, UID:1000 pgtables:1264kB oom_score_adj:0 Link: http://lkml.kernel.org/r/20191211202830.1600-1-idryomov@gmail.com Fixes: 70cb6d267790 ("mm/oom: add oom_score_adj and pgtables to Killed process message") Signed-off-by: Ilya Dryomov Reviewed-by: Andrew Morton Acked-by: David Rientjes Acked-by: Michal Hocko Cc: Edward Chron Cc: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/oom_kill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 71e3acea7817..d58c481b3df8 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -890,7 +890,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES)), from_kuid(&init_user_ns, task_uid(victim)), - mm_pgtables_bytes(mm), victim->signal->oom_score_adj); + mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj); task_unlock(victim); /* From d3f82a7c17de0918df0775ffac3be2f6c9b33dfe Mon Sep 17 00:00:00 2001 From: Gang He Date: Sat, 4 Jan 2020 13:00:22 -0800 Subject: [PATCH 088/191] ocfs2: fix the crash due to call ocfs2_get_dlm_debug once less commit b73eba2a867e10b9b4477738677341f3307c07bb upstream. Because ocfs2_get_dlm_debug() function is called once less here, ocfs2 file system will trigger the system crash, usually after ocfs2 file system is unmounted. This system crash is caused by a generic memory corruption, these crash backtraces are not always the same, for exapmle, ocfs2: Unmounting device (253,16) on (node 172167785) general protection fault: 0000 [#1] SMP PTI CPU: 3 PID: 14107 Comm: fence_legacy Kdump: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:__kmalloc+0xa5/0x2a0 Code: 00 00 4d 8b 07 65 4d 8b RSP: 0018:ffffaa1fc094bbe8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: d310a8800d7a3faf RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000dc0 RDI: ffff96e68fc036c0 RBP: d310a8800d7a3faf R08: ffff96e6ffdb10a0 R09: 00000000752e7079 R10: 000000000001c513 R11: 0000000004091041 R12: 0000000000000dc0 R13: 0000000000000039 R14: ffff96e68fc036c0 R15: ffff96e68fc036c0 FS: 00007f699dfba540(0000) GS:ffff96e6ffd80000(0000) knlGS:00000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f3a9d9b768 CR3: 000000002cd1c000 CR4: 00000000000006e0 Call Trace: ext4_htree_store_dirent+0x35/0x100 [ext4] htree_dirblock_to_tree+0xea/0x290 [ext4] ext4_htree_fill_tree+0x1c1/0x2d0 [ext4] ext4_readdir+0x67c/0x9d0 [ext4] iterate_dir+0x8d/0x1a0 __x64_sys_getdents+0xab/0x130 do_syscall_64+0x60/0x1f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f699d33a9fb This regression problem was introduced by commit e581595ea29c ("ocfs: no need to check return value of debugfs_create functions"). Link: http://lkml.kernel.org/r/20191225061501.13587-1-ghe@suse.com Fixes: e581595ea29c ("ocfs: no need to check return value of debugfs_create functions") Signed-off-by: Gang He Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: [5.3+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlmglue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 6e774c5ea13b..8a2e284ccfcd 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3282,6 +3282,7 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb) debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root, &dlm_debug->d_filter_secs); + ocfs2_get_dlm_debug(dlm_debug); } static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) From b578c35ed97434ab8f17efce69b2722ebf94abc8 Mon Sep 17 00:00:00 2001 From: Aleksandr Yashkin Date: Mon, 23 Dec 2019 18:38:16 +0500 Subject: [PATCH 089/191] pstore/ram: Write new dumps to start of recycled zones commit 9e5f1c19800b808a37fb9815a26d382132c26c3d upstream. The ram_core.c routines treat przs as circular buffers. When writing a new crash dump, the old buffer needs to be cleared so that the new dump doesn't end up in the wrong place (i.e. at the end). The solution to this problem is to reset the circular buffer state before writing a new Oops dump. Signed-off-by: Aleksandr Yashkin Signed-off-by: Nikolay Merinov Signed-off-by: Ariel Gilman Link: https://lore.kernel.org/r/20191223133816.28155-1-n.merinov@inango-systems.com Fixes: 896fc1f0c4c6 ("pstore/ram: Switch to persistent_ram routines") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 8caff834f002..d9d805632573 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -407,6 +407,17 @@ static int notrace ramoops_pstore_write(struct pstore_record *record) prz = cxt->dprzs[cxt->dump_write_cnt]; + /* + * Since this is a new crash dump, we need to reset the buffer in + * case it still has an old dump present. Without this, the new dump + * will get appended, which would seriously confuse anything trying + * to check dump file contents. Specifically, ramoops_read_kmsg_hdr() + * expects to find a dump header in the beginning of buffer data, so + * we must to reset the buffer values, in order to ensure that the + * header will be written to the beginning of the buffer. + */ + persistent_ram_zap(prz); + /* Build header and append record contents. */ hlen = ramoops_write_kmsg_hdr(prz, record); if (!hlen) From 50d18b655bb0b7dce04d0594e9bf9390ff1a4bdf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Dec 2019 11:48:10 -0800 Subject: [PATCH 090/191] pstore/ram: Fix error-path memory leak in persistent_ram_new() callers commit 8df955a32a73315055e0cd187cbb1cea5820394b upstream. For callers that allocated a label for persistent_ram_new(), if the call fails, they must clean up the allocation. Suggested-by: Navid Emamdoost Fixes: 1227daa43bce ("pstore/ram: Clarify resource reservation labels") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/20191211191353.14385-1-navid.emamdoost@gmail.com Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index d9d805632573..487ee39b438a 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -588,6 +588,7 @@ static int ramoops_init_przs(const char *name, dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", name, record_size, (unsigned long long)*paddr, err); + kfree(label); while (i > 0) { i--; @@ -633,6 +634,7 @@ static int ramoops_init_prz(const char *name, dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", name, sz, (unsigned long long)*paddr, err); + kfree(label); return err; } From 3346358055dfc6e45b1c5c8ff29f39c0e9b9fde2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Dec 2019 14:39:28 +0100 Subject: [PATCH 091/191] gcc-plugins: make it possible to disable CONFIG_GCC_PLUGINS again commit a5b0dc5a46c221725c43bd9b01570239a4cd78b1 upstream. I noticed that randconfig builds with gcc no longer produce a lot of ccache hits, unlike with clang, and traced this back to plugins now being enabled unconditionally if they are supported. I am now working around this by adding export CCACHE_COMPILERCHECK=/usr/bin/size -A %compiler% to my top-level Makefile. This changes the heuristic that ccache uses to determine whether the plugins are the same after a 'make clean'. However, it also seems that being able to just turn off the plugins is generally useful, at least for build testing it adds noticeable overhead but does not find a lot of bugs additional bugs, and may be easier for ccache users than my workaround. Fixes: 9f671e58159a ("security: Create "kernel hardening" config area") Signed-off-by: Arnd Bergmann Acked-by: Ard Biesheuvel Reviewed-by: Masahiro Yamada Link: https://lore.kernel.org/r/20191211133951.401933-1-arnd@arndb.de Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- scripts/gcc-plugins/Kconfig | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig index d33de0b9f4f5..e3569543bdac 100644 --- a/scripts/gcc-plugins/Kconfig +++ b/scripts/gcc-plugins/Kconfig @@ -14,8 +14,8 @@ config HAVE_GCC_PLUGINS An arch should select this symbol if it supports building with GCC plugins. -config GCC_PLUGINS - bool +menuconfig GCC_PLUGINS + bool "GCC plugins" depends on HAVE_GCC_PLUGINS depends on PLUGIN_HOSTCC != "" default y @@ -25,8 +25,7 @@ config GCC_PLUGINS See Documentation/core-api/gcc-plugins.rst for details. -menu "GCC plugins" - depends on GCC_PLUGINS +if GCC_PLUGINS config GCC_PLUGIN_CYC_COMPLEXITY bool "Compute the cyclomatic complexity of a function" if EXPERT @@ -113,4 +112,4 @@ config GCC_PLUGIN_ARM_SSP_PER_TASK bool depends on GCC_PLUGINS && ARM -endmenu +endif From 72893303a6d2fb8d65c00f463bc443c3eab72808 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 22 Dec 2019 20:45:28 +0200 Subject: [PATCH 092/191] locks: print unsigned ino in /proc/locks commit 98ca480a8f22fdbd768e3dad07024c8d4856576c upstream. An ino is unsigned, so display it as such in /proc/locks. Cc: stable@vger.kernel.org Signed-off-by: Amir Goldstein Signed-off-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/locks.c b/fs/locks.c index 6970f55daf54..44b6da032842 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2853,7 +2853,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, } if (inode) { /* userspace relies on this representation of dev_t */ - seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, + seq_printf(f, "%d %02x:%02x:%lu ", fl_pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); } else { From e0c77c3fa8aeb4811155c485e5be24805d6842c3 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Sat, 28 Dec 2019 22:24:49 -0800 Subject: [PATCH 093/191] selftests/seccomp: Zero out seccomp_notif commit 88c13f8bd71472fbab5338b01d99122908c77e53 upstream. The seccomp_notif structure should be zeroed out prior to calling the SECCOMP_IOCTL_NOTIF_RECV ioctl. Previously, the kernel did not check whether these structures were zeroed out or not, so these worked. This patch zeroes out the seccomp_notif data structure prior to calling the ioctl. Signed-off-by: Sargun Dhillon Reviewed-by: Tycho Andersen Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20191229062451.9467-1-sargun@sargun.me Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index b505bb062d07..864793a611b1 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3267,6 +3267,7 @@ TEST(user_notification_signal) close(sk_pair[1]); + memset(&req, 0, sizeof(req)); EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); EXPECT_EQ(kill(pid, SIGUSR1), 0); @@ -3285,6 +3286,7 @@ TEST(user_notification_signal) EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); EXPECT_EQ(errno, ENOENT); + memset(&req, 0, sizeof(req)); EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); resp.id = req.id; From f75e60d239087a582ca3d828fb475422d8046b79 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Sat, 28 Dec 2019 22:24:50 -0800 Subject: [PATCH 094/191] seccomp: Check that seccomp_notif is zeroed out by the user commit 2882d53c9c6f3b8311d225062522f03772cf0179 upstream. This patch is a small change in enforcement of the uapi for SECCOMP_IOCTL_NOTIF_RECV ioctl. Specifically, the datastructure which is passed (seccomp_notif) must be zeroed out. Previously any of its members could be set to nonsense values, and we would ignore it. This ensures all fields are set to their zero value. Signed-off-by: Sargun Dhillon Reviewed-by: Christian Brauner Reviewed-by: Aleksa Sarai Acked-by: Tycho Andersen Link: https://lore.kernel.org/r/20191229062451.9467-2-sargun@sargun.me Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- kernel/seccomp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index dba52a7db5e8..614a557a0814 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1015,6 +1015,13 @@ static long seccomp_notify_recv(struct seccomp_filter *filter, struct seccomp_notif unotif; ssize_t ret; + /* Verify that we're not given garbage to keep struct extensible. */ + ret = check_zeroed_user(buf, sizeof(unotif)); + if (ret < 0) + return ret; + if (!ret) + return -EINVAL; + memset(&unotif, 0, sizeof(unotif)); ret = down_interruptible(&filter->notif->request); From de7cd9dfc578e8b1f6586ffbc0e24683c356d143 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 30 Dec 2019 12:35:03 -0800 Subject: [PATCH 095/191] samples/seccomp: Zero out members based on seccomp_notif_sizes commit 771b894f2f3dfedc2ba5561731fffa0e39b1bbb6 upstream. The sizes by which seccomp_notif and seccomp_notif_resp are allocated are based on the SECCOMP_GET_NOTIF_SIZES ioctl. This allows for graceful extension of these datastructures. If userspace zeroes out the datastructure based on its version, and it is lagging behind the kernel's version, it will end up sending trailing garbage. On the other hand, if it is ahead of the kernel version, it will write extra zero space, and potentially cause corruption. Signed-off-by: Sargun Dhillon Suggested-by: Tycho Andersen Link: https://lore.kernel.org/r/20191230203503.4925-1-sargun@sargun.me Fixes: fec7b6690541 ("samples: add an example of seccomp user trap") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- samples/seccomp/user-trap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c index 6d0125ca8af7..20291ec6489f 100644 --- a/samples/seccomp/user-trap.c +++ b/samples/seccomp/user-trap.c @@ -298,14 +298,14 @@ int main(void) req = malloc(sizes.seccomp_notif); if (!req) goto out_close; - memset(req, 0, sizeof(*req)); resp = malloc(sizes.seccomp_notif_resp); if (!resp) goto out_req; - memset(resp, 0, sizeof(*resp)); + memset(resp, 0, sizes.seccomp_notif_resp); while (1) { + memset(req, 0, sizes.seccomp_notif); if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) { perror("ioctl recv"); goto out_resp; From a61eb076ccaa22681ff225b4390b27f7a05e0b36 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 30 Dec 2019 12:38:11 -0800 Subject: [PATCH 096/191] selftests/seccomp: Catch garbage on SECCOMP_IOCTL_NOTIF_RECV commit e4ab5ccc357b978999328fadae164e098c26fa40 upstream. This adds logic to the user_notification_basic test to set a member of struct seccomp_notif to an invalid value to ensure that the kernel returns EINVAL if any of the struct seccomp_notif members are set to invalid values. Signed-off-by: Sargun Dhillon Suggested-by: Christian Brauner Link: https://lore.kernel.org/r/20191230203811.4996-1-sargun@sargun.me Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 864793a611b1..96bbda4f10fc 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3147,7 +3147,18 @@ TEST(user_notification_basic) EXPECT_GT(poll(&pollfd, 1, -1), 0); EXPECT_EQ(pollfd.revents, POLLIN); - EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + /* Test that we can't pass garbage to the kernel. */ + memset(&req, 0, sizeof(req)); + req.pid = -1; + errno = 0; + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + if (ret) { + req.pid = 0; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + } pollfd.fd = listener; pollfd.events = POLLIN | POLLOUT; From a71b9dd7bf4b9184f6b72a937c6f9f6f19709306 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 5 Dec 2019 12:54:49 +0100 Subject: [PATCH 097/191] dmaengine: Fix access to uninitialized dma_slave_caps commit 53a256a9b925b47c7e67fc1f16ca41561a7b877c upstream. dmaengine_desc_set_reuse() allocates a struct dma_slave_caps on the stack, populates it using dma_get_slave_caps() and then accesses one of its members. However dma_get_slave_caps() may fail and this isn't accounted for, leading to a legitimate warning of gcc-4.9 (but not newer versions): In file included from drivers/spi/spi-bcm2835.c:19:0: drivers/spi/spi-bcm2835.c: In function 'dmaengine_desc_set_reuse': >> include/linux/dmaengine.h:1370:10: warning: 'caps.descriptor_reuse' is used uninitialized in this function [-Wuninitialized] if (caps.descriptor_reuse) { Fix it, thereby also silencing the gcc-4.9 warning. The issue has been present for 4 years but surfaces only now that the first caller of dmaengine_desc_set_reuse() has been added in spi-bcm2835.c. Another user of reusable DMA descriptors has existed for a while in pxa_camera.c, but it sets the DMA_CTRL_REUSE flag directly instead of calling dmaengine_desc_set_reuse(). Nevertheless, tag this commit for stable in case there are out-of-tree users. Fixes: 272420214d26 ("dmaengine: Add DMA_CTRL_REUSE") Reported-by: kbuild test robot Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v4.3+ Link: https://lore.kernel.org/r/ca92998ccc054b4f2bfd60ef3adbab2913171eac.1575546234.git.lukas@wunner.de Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- include/linux/dmaengine.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 8fcdee1c0cf9..dad4a68fa009 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1364,8 +1364,11 @@ static inline int dma_get_slave_caps(struct dma_chan *chan, static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx) { struct dma_slave_caps caps; + int ret; - dma_get_slave_caps(tx->chan, &caps); + ret = dma_get_slave_caps(tx->chan, &caps); + if (ret) + return ret; if (caps.descriptor_reuse) { tx->flags |= DMA_CTRL_REUSE; From 61dc7d025d8061b1c92ab8aede836ee6942e9ad8 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 10 Dec 2019 17:55:45 +0100 Subject: [PATCH 098/191] dmaengine: dma-jz4780: Also break descriptor chains on JZ4725B commit a40c94be2336f3002563c9ae16572143ae3422e2 upstream. It turns out that the JZ4725B displays the same buggy behaviour as the JZ4740 that was described in commit f4c255f1a747 ("dmaengine: dma-jz4780: Break descriptor chains on JZ4740"). Work around it by using the same workaround previously used for the JZ4740. Fixes commit f4c255f1a747 ("dmaengine: dma-jz4780: Break descriptor chains on JZ4740") Cc: Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20191210165545.59690-1-paul@crapouillou.net Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dma-jz4780.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index cafb1cc065bb..bf95f1d551c5 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -1004,7 +1004,8 @@ static const struct jz4780_dma_soc_data jz4740_dma_soc_data = { static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = { .nb_channels = 6, .transfer_ord_max = 5, - .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC | + JZ_SOC_DATA_BREAK_LINKS, }; static const struct jz4780_dma_soc_data jz4770_dma_soc_data = { From 842c4cd688268859f077f552fd60f0521abf52dc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 11 Dec 2019 09:01:40 +0000 Subject: [PATCH 099/191] Btrfs: fix infinite loop during nocow writeback due to race commit de7999afedff02c6631feab3ea726a0e8f8c3d40 upstream. When starting writeback for a range that covers part of a preallocated extent, due to a race with writeback for another range that also covers another part of the same preallocated extent, we can end up in an infinite loop. Consider the following example where for inode 280 we have two dirty ranges: range A, from 294912 to 303103, 8192 bytes range B, from 348160 to 438271, 90112 bytes and we have the following file extent item layout for our inode: leaf 38895616 gen 24544 total ptrs 29 free space 13820 owner 5 (...) item 27 key (280 108 200704) itemoff 14598 itemsize 53 extent data disk bytenr 0 nr 0 type 1 (regular) extent data offset 0 nr 94208 ram 94208 item 28 key (280 108 294912) itemoff 14545 itemsize 53 extent data disk bytenr 10433052672 nr 81920 type 2 (prealloc) extent data offset 0 nr 81920 ram 81920 Then the following happens: 1) Writeback starts for range B (from 348160 to 438271), execution of run_delalloc_nocow() starts; 2) The first iteration of run_delalloc_nocow()'s whil loop leaves us at the extent item at slot 28, pointing to the prealloc extent item covering the range from 294912 to 376831. This extent covers part of our range; 3) An ordered extent is created against that extent, covering the file range from 348160 to 376831 (28672 bytes); 4) We adjust 'cur_offset' to 376832 and move on to the next iteration of the while loop; 5) The call to btrfs_lookup_file_extent() leaves us at the same leaf, pointing to slot 29, 1 slot after the last item (the extent item we processed in the previous iteration); 6) Because we are a slot beyond the last item, we call btrfs_next_leaf(), which releases the search path before doing a another search for the last key of the leaf (280 108 294912); 7) Right after btrfs_next_leaf() released the path, and before it did another search for the last key of the leaf, writeback for the range A (from 294912 to 303103) completes (it was previously started at some point); 8) Upon completion of the ordered extent for range A, the prealloc extent we previously found got split into two extent items, one covering the range from 294912 to 303103 (8192 bytes), with a type of regular extent (and no longer prealloc) and another covering the range from 303104 to 376831 (73728 bytes), with a type of prealloc and an offset of 8192 bytes. So our leaf now has the following layout: leaf 38895616 gen 24544 total ptrs 31 free space 13664 owner 5 (...) item 27 key (280 108 200704) itemoff 14598 itemsize 53 extent data disk bytenr 0 nr 0 type 1 extent data offset 0 nr 8192 ram 94208 item 28 key (280 108 208896) itemoff 14545 itemsize 53 extent data disk bytenr 10433142784 nr 86016 type 1 extent data offset 0 nr 86016 ram 86016 item 29 key (280 108 294912) itemoff 14492 itemsize 53 extent data disk bytenr 10433052672 nr 81920 type 1 extent data offset 0 nr 8192 ram 81920 item 30 key (280 108 303104) itemoff 14439 itemsize 53 extent data disk bytenr 10433052672 nr 81920 type 2 extent data offset 8192 nr 73728 ram 81920 9) After btrfs_next_leaf() returns, we have our path pointing to that same leaf and at slot 30, since it has a key we didn't have before and it's the first key greater then the key that was previously the last key of the leaf (key (280 108 294912)); 10) The extent item at slot 30 covers the range from 303104 to 376831 which is in our target range, so we process it, despite having already created an ordered extent against this extent for the file range from 348160 to 376831. This is because we skip to the next extent item only if its end is less than or equals to the start of our delalloc range, and not less than or equals to the current offset ('cur_offset'); 11) As a result we compute 'num_bytes' as: num_bytes = min(end + 1, extent_end) - cur_offset; = min(438271 + 1, 376832) - 376832 = 0 12) We then call create_io_em() for a 0 bytes range starting at offset 376832; 13) Then create_io_em() enters an infinite loop because its calls to btrfs_drop_extent_cache() do nothing due to the 0 length range passed to it. So no existing extent maps that cover the offset 376832 get removed, and therefore calls to add_extent_mapping() return -EEXIST, resulting in an infinite loop. This loop from create_io_em() is the following: do { btrfs_drop_extent_cache(BTRFS_I(inode), em->start, em->start + em->len - 1, 0); write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 1); write_unlock(&em_tree->lock); /* * The caller has taken lock_extent(), who could race with us * to add em? */ } while (ret == -EEXIST); Also, each call to btrfs_drop_extent_cache() triggers a warning because the start offset passed to it (376832) is smaller then the end offset (376832 - 1) passed to it by -1, due to the 0 length: [258532.052621] ------------[ cut here ]------------ [258532.052643] WARNING: CPU: 0 PID: 9987 at fs/btrfs/file.c:602 btrfs_drop_extent_cache+0x3f4/0x590 [btrfs] (...) [258532.052672] CPU: 0 PID: 9987 Comm: fsx Tainted: G W 5.4.0-rc7-btrfs-next-64 #1 [258532.052673] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 [258532.052691] RIP: 0010:btrfs_drop_extent_cache+0x3f4/0x590 [btrfs] (...) [258532.052695] RSP: 0018:ffffb4be0153f860 EFLAGS: 00010287 [258532.052700] RAX: ffff975b445ee360 RBX: ffff975b44eb3e08 RCX: 0000000000000000 [258532.052700] RDX: 0000000000038fff RSI: 0000000000039000 RDI: ffff975b445ee308 [258532.052700] RBP: 0000000000038fff R08: 0000000000000000 R09: 0000000000000001 [258532.052701] R10: ffff975b513c5c10 R11: 00000000e3c0cfa9 R12: 0000000000039000 [258532.052703] R13: ffff975b445ee360 R14: 00000000ffffffef R15: ffff975b445ee308 [258532.052705] FS: 00007f86a821de80(0000) GS:ffff975b76a00000(0000) knlGS:0000000000000000 [258532.052707] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [258532.052708] CR2: 00007fdacf0f3ab4 CR3: 00000001f9d26002 CR4: 00000000003606f0 [258532.052712] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [258532.052717] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [258532.052717] Call Trace: [258532.052718] ? preempt_schedule_common+0x32/0x70 [258532.052722] ? ___preempt_schedule+0x16/0x20 [258532.052741] create_io_em+0xff/0x180 [btrfs] [258532.052767] run_delalloc_nocow+0x942/0xb10 [btrfs] [258532.052791] btrfs_run_delalloc_range+0x30b/0x520 [btrfs] [258532.052812] ? find_lock_delalloc_range+0x221/0x250 [btrfs] [258532.052834] writepage_delalloc+0xe4/0x140 [btrfs] [258532.052855] __extent_writepage+0x110/0x4e0 [btrfs] [258532.052876] extent_write_cache_pages+0x21c/0x480 [btrfs] [258532.052906] extent_writepages+0x52/0xb0 [btrfs] [258532.052911] do_writepages+0x23/0x80 [258532.052915] __filemap_fdatawrite_range+0xd2/0x110 [258532.052938] btrfs_fdatawrite_range+0x1b/0x50 [btrfs] [258532.052954] start_ordered_ops+0x57/0xa0 [btrfs] [258532.052973] ? btrfs_sync_file+0x225/0x490 [btrfs] [258532.052988] btrfs_sync_file+0x225/0x490 [btrfs] [258532.052997] __x64_sys_msync+0x199/0x200 [258532.053004] do_syscall_64+0x5c/0x250 [258532.053007] entry_SYSCALL_64_after_hwframe+0x49/0xbe [258532.053010] RIP: 0033:0x7f86a7dfd760 (...) [258532.053014] RSP: 002b:00007ffd99af0368 EFLAGS: 00000246 ORIG_RAX: 000000000000001a [258532.053016] RAX: ffffffffffffffda RBX: 0000000000000ec9 RCX: 00007f86a7dfd760 [258532.053017] RDX: 0000000000000004 RSI: 000000000000836c RDI: 00007f86a8221000 [258532.053019] RBP: 0000000000021ec9 R08: 0000000000000003 R09: 00007f86a812037c [258532.053020] R10: 0000000000000001 R11: 0000000000000246 R12: 00000000000074a3 [258532.053021] R13: 00007f86a8221000 R14: 000000000000836c R15: 0000000000000001 [258532.053032] irq event stamp: 1653450494 [258532.053035] hardirqs last enabled at (1653450493): [] _raw_spin_unlock_irq+0x29/0x50 [258532.053037] hardirqs last disabled at (1653450494): [] trace_hardirqs_off_thunk+0x1a/0x20 [258532.053039] softirqs last enabled at (1653449852): [] __do_softirq+0x466/0x6bd [258532.053042] softirqs last disabled at (1653449845): [] irq_exit+0xec/0x120 [258532.053043] ---[ end trace 8476fce13d9ce20a ]--- Which results in flooding dmesg/syslog since btrfs_drop_extent_cache() uses WARN_ON() and not WARN_ON_ONCE(). So fix this issue by changing run_delalloc_nocow()'s loop to move to the next extent item when the current extent item ends at at offset less than or equals to the current offset instead of the start offset. Fixes: 80ff385665b7fc ("Btrfs: update nodatacow code v2") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e5758f62e8d8..bc6e7d15577a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1439,10 +1439,10 @@ next_slot: disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); /* - * If extent we got ends before our range starts, skip - * to next extent + * If the extent we got ends before our current offset, + * skip to the next extent. */ - if (extent_end <= start) { + if (extent_end <= cur_offset) { path->slots[0]++; goto next_slot; } From 755d02fcf855cb38df1f97dab60c0ecf15671975 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 Nov 2019 11:28:22 +0100 Subject: [PATCH 100/191] compat_ioctl: block: handle Persistent Reservations commit b2c0fcd28772f99236d261509bcd242135677965 upstream. These were added to blkdev_ioctl() in linux-5.5 but not blkdev_compat_ioctl, so add them now. Cc: # v4.4+ Fixes: bbd3e064362e ("block: add an API for Persistent Reservations") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman Fold in followup patch from Arnd with missing pr.h header include. Signed-off-by: Jens Axboe --- block/compat_ioctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 6ca015f92766..413bd5c5380b 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -401,6 +402,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKTRACETEARDOWN: /* compatible */ ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg)); return ret; + case IOC_PR_REGISTER: + case IOC_PR_RESERVE: + case IOC_PR_RELEASE: + case IOC_PR_PREEMPT: + case IOC_PR_PREEMPT_ABORT: + case IOC_PR_CLEAR: + return blkdev_ioctl(bdev, mode, cmd, + (unsigned long)compat_ptr(arg)); default: if (disk->fops->compat_ioctl) ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); From 17d3c07aaba71b23c10a6e1dce2320ac5fdfae3b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 Nov 2019 11:28:22 +0100 Subject: [PATCH 101/191] compat_ioctl: block: handle BLKREPORTZONE/BLKRESETZONE commit 673bdf8ce0a387ef585c13b69a2676096c6edfe9 upstream. These were added to blkdev_ioctl() but not blkdev_compat_ioctl, so add them now. Cc: # v4.10+ Fixes: 3ed05a987e0f ("blk-zoned: implement ioctls") Reviewed-by: Damien Le Moal Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/compat_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 413bd5c5380b..6490b2759bcb 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -355,6 +355,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) * but we call blkdev_ioctl, which gets the lock for us */ case BLKRRPART: + case BLKREPORTZONE: + case BLKRESETZONE: return blkdev_ioctl(bdev, mode, cmd, (unsigned long)compat_ptr(arg)); case BLKBSZSET_32: From 247aca0b6baef106ed99fbc43bd0a367c865ccce Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 Nov 2019 11:28:22 +0100 Subject: [PATCH 102/191] compat_ioctl: block: handle BLKGETZONESZ/BLKGETNRZONES commit 21d37340912d74b1222d43c11aa9dd0687162573 upstream. These were added to blkdev_ioctl() in v4.20 but not blkdev_compat_ioctl, so add them now. Cc: # v4.20+ Fixes: 72cd87576d1d ("block: Introduce BLKGETZONESZ ioctl") Fixes: 65e4e3eee83d ("block: Introduce BLKGETNRZONES ioctl") Reviewed-by: Damien Le Moal Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/compat_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 6490b2759bcb..7f053468b50d 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -357,6 +357,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKRRPART: case BLKREPORTZONE: case BLKRESETZONE: + case BLKGETZONESZ: + case BLKGETNRZONES: return blkdev_ioctl(bdev, mode, cmd, (unsigned long)compat_ptr(arg)); case BLKBSZSET_32: From abaf57360e3b1868615eacb71af466baf8156ffb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 22 Dec 2019 23:37:40 +0100 Subject: [PATCH 103/191] bpf: Fix precision tracking for unbounded scalars commit f54c7898ed1c3c9331376c0337a5049c38f66497 upstream. Anatoly has been fuzzing with kBdysch harness and reported a hang in one of the outcomes. Upon closer analysis, it turns out that precise scalar value tracking is missing a few precision markings for unknown scalars: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (b7) r0 = 0 1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (35) if r0 >= 0xf72e goto pc+0 --> only follow fallthrough 2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 2: (35) if r0 >= 0x80fe0000 goto pc+0 --> only follow fallthrough 3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 3: (14) w0 -= -536870912 4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0 4: (0f) r1 += r0 5: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0 5: (55) if r1 != 0x104c1500 goto pc+0 --> push other branch for later analysis R0_w=invP536870912 R1_w=inv273421568 R10=fp0 6: R0_w=invP536870912 R1_w=inv273421568 R10=fp0 6: (b7) r0 = 0 7: R0=invP0 R1=inv273421568 R10=fp0 7: (76) if w1 s>= 0xffffff00 goto pc+3 --> only follow goto 11: R0=invP0 R1=inv273421568 R10=fp0 11: (95) exit 6: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0 6: (b7) r0 = 0 propagating r0 7: safe processed 11 insns [...] In the analysis of the second path coming after the successful exit above, the path is being pruned at line 7. Pruning analysis found that both r0 are precise P0 and both R1 are non-precise scalars and given prior path with R1 as non-precise scalar succeeded, this one is therefore safe as well. However, problem is that given condition at insn 7 in the first run, we only followed goto and didn't push the other branch for later analysis, we've never walked the few insns in there and therefore dead-code sanitation rewrites it as goto pc-1, causing the hang depending on the skb address hitting these conditions. The issue is that R1 should have been marked as precise as well such that pruning enforces range check and conluded that new R1 is not in range of old R1. In insn 4, we mark R1 (skb) as unknown scalar via __mark_reg_unbounded() but not mark_reg_unbounded() and therefore regs->precise remains as false. Back in b5dc0163d8fd ("bpf: precise scalar_value tracking"), this was not the case since marking out of __mark_reg_unbounded() had this covered as well. Once in both are set as precise in 4 as they should have been, we conclude that given R1 was in prior fall-through path 0x104c1500 and now is completely unknown, the check at insn 7 concludes that we need to continue walking. Analysis after the fix: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (b7) r0 = 0 1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (35) if r0 >= 0xf72e goto pc+0 2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 2: (35) if r0 >= 0x80fe0000 goto pc+0 3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0 3: (14) w0 -= -536870912 4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0 4: (0f) r1 += r0 5: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0 5: (55) if r1 != 0x104c1500 goto pc+0 R0_w=invP536870912 R1_w=invP273421568 R10=fp0 6: R0_w=invP536870912 R1_w=invP273421568 R10=fp0 6: (b7) r0 = 0 7: R0=invP0 R1=invP273421568 R10=fp0 7: (76) if w1 s>= 0xffffff00 goto pc+3 11: R0=invP0 R1=invP273421568 R10=fp0 11: (95) exit 6: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0 6: (b7) r0 = 0 7: R0_w=invP0 R1_w=invP(id=0) R10=fp0 7: (76) if w1 s>= 0xffffff00 goto pc+3 R0_w=invP0 R1_w=invP(id=0) R10=fp0 8: R0_w=invP0 R1_w=invP(id=0) R10=fp0 8: (a5) if r0 < 0x2007002a goto pc+0 9: R0_w=invP0 R1_w=invP(id=0) R10=fp0 9: (57) r0 &= -16316416 10: R0_w=invP0 R1_w=invP(id=0) R10=fp0 10: (a6) if w0 < 0x1201 goto pc+0 11: R0_w=invP0 R1_w=invP(id=0) R10=fp0 11: (95) exit 11: R0=invP0 R1=invP(id=0) R10=fp0 11: (95) exit processed 16 insns [...] Fixes: 6754172c208d ("bpf: fix precision tracking in presence of bpf2bpf calls") Reported-by: Anatoly Trosinenko Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191222223740.25297-1-daniel@iogearbox.net Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9e7cee5307e0..5c51021775af 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -852,7 +852,8 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; -static void __mark_reg_not_init(struct bpf_reg_state *reg); +static void __mark_reg_not_init(const struct bpf_verifier_env *env, + struct bpf_reg_state *reg); /* Mark the unknown part of a register (variable offset or scalar value) as * known to have the value @imm. @@ -890,7 +891,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env, verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); /* Something bad happened, let's kill all regs */ for (regno = 0; regno < MAX_BPF_REG; regno++) - __mark_reg_not_init(regs + regno); + __mark_reg_not_init(env, regs + regno); return; } __mark_reg_known_zero(regs + regno); @@ -999,7 +1000,8 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg) } /* Mark a register as having a completely unknown (scalar) value. */ -static void __mark_reg_unknown(struct bpf_reg_state *reg) +static void __mark_reg_unknown(const struct bpf_verifier_env *env, + struct bpf_reg_state *reg) { /* * Clear type, id, off, and union(map_ptr, range) and @@ -1009,6 +1011,8 @@ static void __mark_reg_unknown(struct bpf_reg_state *reg) reg->type = SCALAR_VALUE; reg->var_off = tnum_unknown; reg->frameno = 0; + reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? + true : false; __mark_reg_unbounded(reg); } @@ -1019,19 +1023,16 @@ static void mark_reg_unknown(struct bpf_verifier_env *env, verbose(env, "mark_reg_unknown(regs, %u)\n", regno); /* Something bad happened, let's kill all regs except FP */ for (regno = 0; regno < BPF_REG_FP; regno++) - __mark_reg_not_init(regs + regno); + __mark_reg_not_init(env, regs + regno); return; } - regs += regno; - __mark_reg_unknown(regs); - /* constant backtracking is enabled for root without bpf2bpf calls */ - regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? - true : false; + __mark_reg_unknown(env, regs + regno); } -static void __mark_reg_not_init(struct bpf_reg_state *reg) +static void __mark_reg_not_init(const struct bpf_verifier_env *env, + struct bpf_reg_state *reg) { - __mark_reg_unknown(reg); + __mark_reg_unknown(env, reg); reg->type = NOT_INIT; } @@ -1042,10 +1043,10 @@ static void mark_reg_not_init(struct bpf_verifier_env *env, verbose(env, "mark_reg_not_init(regs, %u)\n", regno); /* Something bad happened, let's kill all regs except FP */ for (regno = 0; regno < BPF_REG_FP; regno++) - __mark_reg_not_init(regs + regno); + __mark_reg_not_init(env, regs + regno); return; } - __mark_reg_not_init(regs + regno); + __mark_reg_not_init(env, regs + regno); } #define DEF_NOT_SUBREG (0) @@ -3066,7 +3067,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, } if (state->stack[spi].slot_type[0] == STACK_SPILL && state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { - __mark_reg_unknown(&state->stack[spi].spilled_ptr); + __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); for (j = 0; j < BPF_REG_SIZE; j++) state->stack[spi].slot_type[j] = STACK_MISC; goto mark; @@ -3706,7 +3707,7 @@ static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, if (!reg) continue; if (reg_is_pkt_pointer_any(reg)) - __mark_reg_unknown(reg); + __mark_reg_unknown(env, reg); } } @@ -3734,7 +3735,7 @@ static void release_reg_references(struct bpf_verifier_env *env, if (!reg) continue; if (reg->ref_obj_id == ref_obj_id) - __mark_reg_unknown(reg); + __mark_reg_unknown(env, reg); } } @@ -4357,7 +4358,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, /* Taint dst register if offset had invalid bounds derived from * e.g. dead branches. */ - __mark_reg_unknown(dst_reg); + __mark_reg_unknown(env, dst_reg); return 0; } @@ -4609,13 +4610,13 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, /* Taint dst register if offset had invalid bounds derived from * e.g. dead branches. */ - __mark_reg_unknown(dst_reg); + __mark_reg_unknown(env, dst_reg); return 0; } if (!src_known && opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { - __mark_reg_unknown(dst_reg); + __mark_reg_unknown(env, dst_reg); return 0; } @@ -6746,7 +6747,7 @@ static void clean_func_state(struct bpf_verifier_env *env, /* since the register is unused, clear its state * to make further comparison simpler */ - __mark_reg_not_init(&st->regs[i]); + __mark_reg_not_init(env, &st->regs[i]); } for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { @@ -6754,7 +6755,7 @@ static void clean_func_state(struct bpf_verifier_env *env, /* liveness must not touch this stack slot anymore */ st->stack[i].spilled_ptr.live |= REG_LIVE_DONE; if (!(live & REG_LIVE_READ)) { - __mark_reg_not_init(&st->stack[i].spilled_ptr); + __mark_reg_not_init(env, &st->stack[i].spilled_ptr); for (j = 0; j < BPF_REG_SIZE; j++) st->stack[i].slot_type[j] = STACK_INVALID; } From 1ce0f1e6fa8824b1c09ef5f64d0be569e91e7dc8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Dec 2019 10:53:44 -0800 Subject: [PATCH 104/191] ata: libahci_platform: Export again ahci_platform_able_phys() commit 84b032dbfdf1c139cd2b864e43959510646975f8 upstream. This reverts commit 6bb86fefa086faba7b60bb452300b76a47cde1a5 ("libahci_platform: Staticize ahci_platform_able_phys()") we are going to need ahci_platform_{enable,disable}_phys() in a subsequent commit for ahci_brcm.c in order to properly control the PHY initialization order. Also make sure the function prototypes are declared in include/linux/ahci_platform.h as a result. Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Florian Fainelli Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libahci_platform.c | 6 ++++-- include/linux/ahci_platform.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 8befce036af8..129556fcf6be 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_ops); * RETURNS: * 0 on success otherwise a negative error code */ -static int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) +int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) { int rc, i; @@ -74,6 +74,7 @@ disable_phys: } return rc; } +EXPORT_SYMBOL_GPL(ahci_platform_enable_phys); /** * ahci_platform_disable_phys - Disable PHYs @@ -81,7 +82,7 @@ disable_phys: * * This function disables all PHYs found in hpriv->phys. */ -static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) +void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) { int i; @@ -90,6 +91,7 @@ static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) phy_exit(hpriv->phys[i]); } } +EXPORT_SYMBOL_GPL(ahci_platform_disable_phys); /** * ahci_platform_enable_clks - Enable platform clocks diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 6782f0d45ebe..49e5383d4222 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -19,6 +19,8 @@ struct ahci_host_priv; struct platform_device; struct scsi_host_template; +int ahci_platform_enable_phys(struct ahci_host_priv *hpriv); +void ahci_platform_disable_phys(struct ahci_host_priv *hpriv); int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv); From d9295852f8721ca714e4745f24dad0e617f19da0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Dec 2019 10:53:45 -0800 Subject: [PATCH 105/191] ata: ahci_brcm: Fix AHCI resources management commit c0cdf2ac4b5bf3e5ef2451ea29fb4104278cdabc upstream. The AHCI resources management within ahci_brcm.c is a little convoluted, largely because it historically had a dedicated clock that was managed within this file in the downstream tree. Once brough upstream though, the clock was left to be managed by libahci_platform.c which is entirely appropriate. This patch series ensures that the AHCI resources are fetched and enabled before any register access is done, thus avoiding bus errors on platforms which clock gate the controller by default. As a result we need to re-arrange the suspend() and resume() functions in order to avoid accessing registers after the clocks have been turned off respectively before the clocks have been turned on. Finally, we can refactor brcm_ahci_get_portmask() in order to fetch the number of ports from hpriv->mmio which is now accessible without jumping through hoops like we used to do. The commit pointed in the Fixes tag is both old and new enough not to require major headaches for backporting of this patch. Fixes: eba68f829794 ("ata: ahci_brcmstb: rename to support across Broadcom SoC's") Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Florian Fainelli Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci_brcm.c | 105 +++++++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index f41744b9b38a..a8b2f3f7bbbc 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -213,19 +213,12 @@ static void brcm_sata_phys_disable(struct brcm_ahci_priv *priv) brcm_sata_phy_disable(priv, i); } -static u32 brcm_ahci_get_portmask(struct platform_device *pdev, +static u32 brcm_ahci_get_portmask(struct ahci_host_priv *hpriv, struct brcm_ahci_priv *priv) { - void __iomem *ahci; - struct resource *res; u32 impl; - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ahci"); - ahci = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(ahci)) - return 0; - - impl = readl(ahci + HOST_PORTS_IMPL); + impl = readl(hpriv->mmio + HOST_PORTS_IMPL); if (fls(impl) > SATA_TOP_MAX_PHYS) dev_warn(priv->dev, "warning: more ports than PHYs (%#x)\n", @@ -233,9 +226,6 @@ static u32 brcm_ahci_get_portmask(struct platform_device *pdev, else if (!impl) dev_info(priv->dev, "no ports found\n"); - devm_iounmap(&pdev->dev, ahci); - devm_release_mem_region(&pdev->dev, res->start, resource_size(res)); - return impl; } @@ -347,11 +337,10 @@ static int brcm_ahci_suspend(struct device *dev) struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; struct brcm_ahci_priv *priv = hpriv->plat_data; - int ret; - ret = ahci_platform_suspend(dev); brcm_sata_phys_disable(priv); - return ret; + + return ahci_platform_suspend(dev); } static int brcm_ahci_resume(struct device *dev) @@ -359,11 +348,44 @@ static int brcm_ahci_resume(struct device *dev) struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; struct brcm_ahci_priv *priv = hpriv->plat_data; + int ret; + + /* Make sure clocks are turned on before re-configuration */ + ret = ahci_platform_enable_clks(hpriv); + if (ret) + return ret; brcm_sata_init(priv); brcm_sata_phys_enable(priv); brcm_sata_alpm_init(hpriv); - return ahci_platform_resume(dev); + + /* Since we had to enable clocks earlier on, we cannot use + * ahci_platform_resume() as-is since a second call to + * ahci_platform_enable_resources() would bump up the resources + * (regulators, clocks, PHYs) count artificially so we copy the part + * after ahci_platform_enable_resources(). + */ + ret = ahci_platform_enable_phys(hpriv); + if (ret) + goto out_disable_phys; + + ret = ahci_platform_resume_host(dev); + if (ret) + goto out_disable_platform_phys; + + /* We resumed so update PM runtime state */ + pm_runtime_disable(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + return 0; + +out_disable_platform_phys: + ahci_platform_disable_phys(hpriv); +out_disable_phys: + brcm_sata_phys_disable(priv); + ahci_platform_disable_clks(hpriv); + return ret; } #endif @@ -416,38 +438,63 @@ static int brcm_ahci_probe(struct platform_device *pdev) priv->quirks |= BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE; } + hpriv = ahci_platform_get_resources(pdev, 0); + if (IS_ERR(hpriv)) { + ret = PTR_ERR(hpriv); + goto out_reset; + } + + ret = ahci_platform_enable_clks(hpriv); + if (ret) + goto out_reset; + + /* Must be first so as to configure endianness including that + * of the standard AHCI register space. + */ brcm_sata_init(priv); - priv->port_mask = brcm_ahci_get_portmask(pdev, priv); - if (!priv->port_mask) - return -ENODEV; + /* Initializes priv->port_mask which is used below */ + priv->port_mask = brcm_ahci_get_portmask(hpriv, priv); + if (!priv->port_mask) { + ret = -ENODEV; + goto out_disable_clks; + } + /* Must be done before ahci_platform_enable_phys() */ brcm_sata_phys_enable(priv); - hpriv = ahci_platform_get_resources(pdev, 0); - if (IS_ERR(hpriv)) - return PTR_ERR(hpriv); hpriv->plat_data = priv; hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP; brcm_sata_alpm_init(hpriv); - ret = ahci_platform_enable_resources(hpriv); - if (ret) - return ret; - if (priv->quirks & BRCM_AHCI_QUIRK_NO_NCQ) hpriv->flags |= AHCI_HFLAG_NO_NCQ; hpriv->flags |= AHCI_HFLAG_NO_WRITE_TO_RO; + ret = ahci_platform_enable_phys(hpriv); + if (ret) + goto out_disable_phys; + ret = ahci_platform_init_host(pdev, hpriv, &ahci_brcm_port_info, &ahci_platform_sht); if (ret) - return ret; + goto out_disable_platform_phys; dev_info(dev, "Broadcom AHCI SATA3 registered\n"); return 0; + +out_disable_platform_phys: + ahci_platform_disable_phys(hpriv); +out_disable_phys: + brcm_sata_phys_disable(priv); +out_disable_clks: + ahci_platform_disable_clks(hpriv); +out_reset: + if (!IS_ERR_OR_NULL(priv->rcdev)) + reset_control_assert(priv->rcdev); + return ret; } static int brcm_ahci_remove(struct platform_device *pdev) @@ -457,12 +504,12 @@ static int brcm_ahci_remove(struct platform_device *pdev) struct brcm_ahci_priv *priv = hpriv->plat_data; int ret; + brcm_sata_phys_disable(priv); + ret = ata_platform_remove_one(pdev); if (ret) return ret; - brcm_sata_phys_disable(priv); - return 0; } From a3b6848e8de6c0251ba4a172812b2fa2fab9fda1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Dec 2019 10:53:47 -0800 Subject: [PATCH 106/191] ata: ahci_brcm: Add missing clock management during recovery commit bf0e5013bc2dcac205417e1252205dca39dfc005 upstream. The downstream implementation of ahci_brcm.c did contain clock management recovery, but until recently, did that outside of the libahci_platform helpers and this was unintentionally stripped out while forward porting the patch upstream. Add the missing clock management during recovery and sleep for 10 milliseconds per the design team recommendations to ensure the SATA PHY controller and AFE have been fully quiesced. Fixes: eb73390ae241 ("ata: ahci_brcm: Recover from failures to identify devices") Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Florian Fainelli Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci_brcm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index a8b2f3f7bbbc..21043cc70045 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -275,6 +275,13 @@ static unsigned int brcm_ahci_read_id(struct ata_device *dev, /* Perform the SATA PHY reset sequence */ brcm_sata_phy_disable(priv, ap->port_no); + /* Reset the SATA clock */ + ahci_platform_disable_clks(hpriv); + msleep(10); + + ahci_platform_enable_clks(hpriv); + msleep(10); + /* Bring the PHY back on */ brcm_sata_phy_enable(priv, ap->port_no); From fc6e6f322eaf7f4744b758898a7b6ef65d3f67fa Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Dec 2019 10:53:46 -0800 Subject: [PATCH 107/191] ata: ahci_brcm: BCM7425 AHCI requires AHCI_HFLAG_DELAY_ENGINE commit 1a3d78cb6e20779a19388315bd8efefbd8d4a656 upstream. Set AHCI_HFLAG_DELAY_ENGINE for the BCM7425 AHCI controller thus making it conforming to the 'strict' AHCI implementation which this controller is based on. This solves long link establishment with specific hard drives (e.g.: Seagate ST1000VM002-9ZL1 SC12) that would otherwise have to complete the error recovery handling before finally establishing a succesful SATA link at the desired speed. We re-order the hpriv->flags assignment to also remove the NONCQ quirk since we can set the flag directly. Fixes: 9586114cf1e9 ("ata: ahci_brcmstb: add support MIPS-based platforms") Fixes: 423be77daabe ("ata: ahci_brcmstb: add quirk for broken ncq") Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Florian Fainelli Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci_brcm.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index 21043cc70045..66a570d0da83 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -76,8 +76,7 @@ enum brcm_ahci_version { }; enum brcm_ahci_quirks { - BRCM_AHCI_QUIRK_NO_NCQ = BIT(0), - BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE = BIT(1), + BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE = BIT(0), }; struct brcm_ahci_priv { @@ -439,18 +438,27 @@ static int brcm_ahci_probe(struct platform_device *pdev) if (!IS_ERR_OR_NULL(priv->rcdev)) reset_control_deassert(priv->rcdev); - if ((priv->version == BRCM_SATA_BCM7425) || - (priv->version == BRCM_SATA_NSP)) { - priv->quirks |= BRCM_AHCI_QUIRK_NO_NCQ; - priv->quirks |= BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE; - } - hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) { ret = PTR_ERR(hpriv); goto out_reset; } + hpriv->plat_data = priv; + hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP | AHCI_HFLAG_NO_WRITE_TO_RO; + + switch (priv->version) { + case BRCM_SATA_BCM7425: + hpriv->flags |= AHCI_HFLAG_DELAY_ENGINE; + /* fall through */ + case BRCM_SATA_NSP: + hpriv->flags |= AHCI_HFLAG_NO_NCQ; + priv->quirks |= BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE; + break; + default: + break; + } + ret = ahci_platform_enable_clks(hpriv); if (ret) goto out_reset; @@ -470,15 +478,8 @@ static int brcm_ahci_probe(struct platform_device *pdev) /* Must be done before ahci_platform_enable_phys() */ brcm_sata_phys_enable(priv); - hpriv->plat_data = priv; - hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP; - brcm_sata_alpm_init(hpriv); - if (priv->quirks & BRCM_AHCI_QUIRK_NO_NCQ) - hpriv->flags |= AHCI_HFLAG_NO_NCQ; - hpriv->flags |= AHCI_HFLAG_NO_WRITE_TO_RO; - ret = ahci_platform_enable_phys(hpriv); if (ret) goto out_disable_phys; From c9738c3a4e1378f26d92d1c061f08ff0c12116b5 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 13 Dec 2019 09:04:08 +0100 Subject: [PATCH 108/191] libata: Fix retrieving of active qcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8385d756e114f2df8568e508902d5f9850817ffb upstream. ata_qc_complete_multiple() is called with a mask of the still active tags. mv_sata doesn't have this information directly and instead calculates the still active tags from the started tags (ap->qc_active) and the finished tags as (ap->qc_active ^ done_mask) Since 28361c40368 the hw_tag and tag are no longer the same and the equation is no longer valid. In ata_exec_internal_sg() ap->qc_active is initialized as 1ULL << ATA_TAG_INTERNAL, but in hardware tag 0 is started and this will be in done_mask on completion. ap->qc_active ^ done_mask becomes 0x100000000 ^ 0x1 = 0x100000001 and thus tag 0 used as the internal tag will never be reported as completed. This is fixed by introducing ata_qc_get_active() which returns the active hardware tags and calling it where appropriate. This is tested on mv_sata, but sata_fsl and sata_nv suffer from the same problem. There is another case in sata_nv that most likely needs fixing as well, but this looks a little different, so I wasn't confident enough to change that. Fixes: 28361c403683 ("libata: add extra internal command") Cc: stable@vger.kernel.org Tested-by: Pali Rohár Signed-off-by: Sascha Hauer Signed-off-by: Greg Kroah-Hartman Add missing export of ata_qc_get_active(), as per Pali. Signed-off-by: Jens Axboe --- drivers/ata/libata-core.c | 24 ++++++++++++++++++++++++ drivers/ata/sata_fsl.c | 2 +- drivers/ata/sata_mv.c | 2 +- drivers/ata/sata_nv.c | 2 +- include/linux/libata.h | 1 + 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 74c9b3032d46..84b183a6424e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5325,6 +5325,30 @@ void ata_qc_complete(struct ata_queued_cmd *qc) } } +/** + * ata_qc_get_active - get bitmask of active qcs + * @ap: port in question + * + * LOCKING: + * spin_lock_irqsave(host lock) + * + * RETURNS: + * Bitmask of active qcs + */ +u64 ata_qc_get_active(struct ata_port *ap) +{ + u64 qc_active = ap->qc_active; + + /* ATA_TAG_INTERNAL is sent to hw as tag 0 */ + if (qc_active & (1ULL << ATA_TAG_INTERNAL)) { + qc_active |= (1 << 0); + qc_active &= ~(1ULL << ATA_TAG_INTERNAL); + } + + return qc_active; +} +EXPORT_SYMBOL_GPL(ata_qc_get_active); + /** * ata_qc_complete_multiple - Complete multiple qcs successfully * @ap: port in question diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 8e9cb198fcd1..ca6c706e9c25 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1278,7 +1278,7 @@ static void sata_fsl_host_intr(struct ata_port *ap) i, ioread32(hcr_base + CC), ioread32(hcr_base + CA)); } - ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); + ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask); return; } else if ((ap->qc_active & (1ULL << ATA_TAG_INTERNAL))) { diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index ad385a113391..bde695a32097 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -2827,7 +2827,7 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp } if (work_done) { - ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); + ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask); /* Update the software queue position index in hardware */ writelfl((pp->crpb_dma & EDMA_RSP_Q_BASE_LO_MASK) | diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 56946012d113..7510303111fa 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -984,7 +984,7 @@ static irqreturn_t nv_adma_interrupt(int irq, void *dev_instance) check_commands = 0; check_commands &= ~(1 << pos); } - ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); + ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask); } } diff --git a/include/linux/libata.h b/include/linux/libata.h index 207e7ee764ce..fa0c3dae2094 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1174,6 +1174,7 @@ extern unsigned int ata_do_dev_read_id(struct ata_device *dev, struct ata_taskfile *tf, u16 *id); extern void ata_qc_complete(struct ata_queued_cmd *qc); extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active); +extern u64 ata_qc_get_active(struct ata_port *ap); extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd); extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, From 5654d5d25c17782e0a12554483bd345cd25b7bf6 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 26 Nov 2019 11:30:27 -0800 Subject: [PATCH 109/191] gpio: xtensa: fix driver build commit 634f0348fe336fce8f6cab1933139115e983ed2f upstream. Commit cad6fade6e78 ("xtensa: clean up WSR*/RSR*/get_sr/set_sr") removed {RSR,WSR}_CPENABLE from xtensa code, but did not fix up all users, breaking gpio-xtensa driver build. Update gpio-xtensa to use new xtensa_{get,set}_sr API. Cc: stable@vger.kernel.org # v5.0+ Fixes: cad6fade6e78 ("xtensa: clean up WSR*/RSR*/get_sr/set_sr") Signed-off-by: Max Filippov Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-xtensa.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-xtensa.c b/drivers/gpio/gpio-xtensa.c index 43d3fa5f511a..0fb2211f9573 100644 --- a/drivers/gpio/gpio-xtensa.c +++ b/drivers/gpio/gpio-xtensa.c @@ -44,15 +44,14 @@ static inline unsigned long enable_cp(unsigned long *cpenable) unsigned long flags; local_irq_save(flags); - RSR_CPENABLE(*cpenable); - WSR_CPENABLE(*cpenable | BIT(XCHAL_CP_ID_XTIOP)); - + *cpenable = xtensa_get_sr(cpenable); + xtensa_set_sr(*cpenable | BIT(XCHAL_CP_ID_XTIOP), cpenable); return flags; } static inline void disable_cp(unsigned long flags, unsigned long cpenable) { - WSR_CPENABLE(cpenable); + xtensa_set_sr(cpenable, cpenable); local_irq_restore(flags); } From 530f7c6df94e2190ae4499633910f36487da4d6c Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 7 Dec 2019 16:20:18 +0000 Subject: [PATCH 110/191] gpiolib: fix up emulated open drain outputs commit 256efaea1fdc4e38970489197409a26125ee0aaa upstream. gpiolib has a corner case with open drain outputs that are emulated. When such outputs are outputting a logic 1, emulation will set the hardware to input mode, which will cause gpiod_get_direction() to report that it is in input mode. This is different from the behaviour with a true open-drain output. Unify the semantics here. Cc: Suggested-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 104ed299d5ea..99d19f80440e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -220,6 +220,14 @@ int gpiod_get_direction(struct gpio_desc *desc) chip = gpiod_to_chip(desc); offset = gpio_chip_hwgpio(desc); + /* + * Open drain emulation using input mode may incorrectly report + * input here, fix that up. + */ + if (test_bit(FLAG_OPEN_DRAIN, &desc->flags) && + test_bit(FLAG_IS_OUT, &desc->flags)) + return 0; + if (!chip->get_direction) return -ENOTSUPP; From b3757ec3d9bf8697508dbd8fae2f5860806611f7 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 23 Dec 2019 16:46:14 +0800 Subject: [PATCH 111/191] clocksource: riscv: add notrace to riscv_sched_clock commit 9d05c18e8d7de566ff68f221fcae65e78708dd1d upstream. When enabling ftrace graph tracer, it gets the tracing clock in ftrace_push_return_trace(). Eventually, it invokes riscv_sched_clock() to get the clock value. If riscv_sched_clock() isn't marked with 'notrace', it will call ftrace_push_return_trace() and cause infinite loop. The result of failure as follow: command: echo function_graph >current_tracer [ 46.176787] Unable to handle kernel paging request at virtual address ffffffe04fb38c48 [ 46.177309] Oops [#1] [ 46.177478] Modules linked in: [ 46.177770] CPU: 0 PID: 256 Comm: $d Not tainted 5.5.0-rc1 #47 [ 46.177981] epc: ffffffe00035e59a ra : ffffffe00035e57e sp : ffffffe03a7569b0 [ 46.178216] gp : ffffffe000d29b90 tp : ffffffe03a756180 t0 : ffffffe03a756968 [ 46.178430] t1 : ffffffe00087f408 t2 : ffffffe03a7569a0 s0 : ffffffe03a7569f0 [ 46.178643] s1 : ffffffe00087f408 a0 : 0000000ac054cda4 a1 : 000000000087f411 [ 46.178856] a2 : 0000000ac054cda4 a3 : 0000000000373ca0 a4 : ffffffe04fb38c48 [ 46.179099] a5 : 00000000153e22a8 a6 : 00000000005522ff a7 : 0000000000000005 [ 46.179338] s2 : ffffffe03a756a90 s3 : ffffffe00032811c s4 : ffffffe03a756a58 [ 46.179570] s5 : ffffffe000d29fe0 s6 : 0000000000000001 s7 : 0000000000000003 [ 46.179809] s8 : 0000000000000003 s9 : 0000000000000002 s10: 0000000000000004 [ 46.180053] s11: 0000000000000000 t3 : 0000003fc815749c t4 : 00000000000efc90 [ 46.180293] t5 : ffffffe000d29658 t6 : 0000000000040000 [ 46.180482] status: 0000000000000100 badaddr: ffffffe04fb38c48 cause: 000000000000000f Signed-off-by: Zong Li Reviewed-by: Steven Rostedt (VMware) [paul.walmsley@sifive.com: cleaned up patch description] Fixes: 92e0d143fdef ("clocksource/drivers/riscv_timer: Provide the sched_clock") Cc: stable@vger.kernel.org Signed-off-by: Paul Walmsley Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-riscv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 470c7ef02ea4..4b04ffbe5e7e 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -41,7 +41,7 @@ static unsigned long long riscv_clocksource_rdtime(struct clocksource *cs) return get_cycles64(); } -static u64 riscv_sched_clock(void) +static u64 notrace riscv_sched_clock(void) { return get_cycles64(); } From 927cc45771d9ecf10f179fb43049fc58f6f2f48c Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 23 Dec 2019 16:46:13 +0800 Subject: [PATCH 112/191] riscv: ftrace: correct the condition logic in function graph tracer commit 1d8f65798240b6577d8c44d20c8ea8f1d429e495 upstream. The condition should be logical NOT to assign the hook address to parent address. Because the return value 0 of function_graph_enter upon success. Fixes: e949b6db51dc (riscv/function_graph: Simplify with function_graph_enter()) Signed-off-by: Zong Li Reviewed-by: Steven Rostedt (VMware) Cc: stable@vger.kernel.org Signed-off-by: Paul Walmsley Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index b94d8db5ddcc..c40fdcdeb950 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -142,7 +142,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - if (function_graph_enter(old, self_addr, frame_pointer, parent)) + if (!function_graph_enter(old, self_addr, frame_pointer, parent)) *parent = return_hooker; } From 0426c59b638c5fbd32dab1a54f3ccbeab1c2bcd3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 11 Dec 2019 11:17:13 -0500 Subject: [PATCH 113/191] rseq/selftests: Fix: Namespace gettid() for compatibility with glibc 2.30 commit 8df34c56321479bfa1ec732c675b686c2b4df412 upstream. glibc 2.30 introduces gettid() in public headers, which clashes with the internal static definition within rseq selftests. Rename gettid() to rseq_gettid() to eliminate this symbol name clash. Reported-by: Tommi T. Rantala Signed-off-by: Mathieu Desnoyers Cc: Shuah Khan Cc: Tommi T. Rantala Cc: Thomas Gleixner Cc: Peter Zijlstra (Intel) Cc: "Paul E. McKenney" Cc: Boqun Feng Cc: "H . Peter Anvin" Cc: Paul Turner Cc: Dmitry Vyukov Cc: # v4.18+ Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/rseq/param_test.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index eec2663261f2..e8a657a5f48a 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -15,7 +15,7 @@ #include #include -static inline pid_t gettid(void) +static inline pid_t rseq_gettid(void) { return syscall(__NR_gettid); } @@ -373,11 +373,12 @@ void *test_percpu_spinlock_thread(void *arg) rseq_percpu_unlock(&data->lock, cpu); #ifndef BENCHMARK if (i != 0 && !(i % (reps / 10))) - printf_verbose("tid %d: count %lld\n", (int) gettid(), i); + printf_verbose("tid %d: count %lld\n", + (int) rseq_gettid(), i); #endif } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && thread_data->reg && rseq_unregister_current_thread()) abort(); @@ -454,11 +455,12 @@ void *test_percpu_inc_thread(void *arg) } while (rseq_unlikely(ret)); #ifndef BENCHMARK if (i != 0 && !(i % (reps / 10))) - printf_verbose("tid %d: count %lld\n", (int) gettid(), i); + printf_verbose("tid %d: count %lld\n", + (int) rseq_gettid(), i); #endif } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && thread_data->reg && rseq_unregister_current_thread()) abort(); @@ -605,7 +607,7 @@ void *test_percpu_list_thread(void *arg) } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); @@ -796,7 +798,7 @@ void *test_percpu_buffer_thread(void *arg) } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); @@ -1011,7 +1013,7 @@ void *test_percpu_memcpy_buffer_thread(void *arg) } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); From c5361afa0ebe53b35e7609c544ebf66bb27ec846 Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Tue, 10 Dec 2019 09:15:16 +0000 Subject: [PATCH 114/191] tracing: Fix lock inversion in trace_event_enable_tgid_record() commit 3a53acf1d9bea11b57c1f6205e3fe73f9d8a3688 upstream. Task T2 Task T3 trace_options_core_write() subsystem_open() mutex_lock(trace_types_lock) mutex_lock(event_mutex) set_tracer_flag() trace_event_enable_tgid_record() mutex_lock(trace_types_lock) mutex_lock(event_mutex) This gives a circular dependency deadlock between trace_types_lock and event_mutex. To fix this invert the usage of trace_types_lock and event_mutex in trace_options_core_write(). This keeps the sequence of lock usage consistent. Link: http://lkml.kernel.org/r/0101016eef175e38-8ca71caf-a4eb-480d-a1e6-6f0bbc015495-000000@us-west-2.amazonses.com Cc: stable@vger.kernel.org Fixes: d914ba37d7145 ("tracing: Add support for recording tgid of tasks") Signed-off-by: Prateek Sood Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 8 ++++++++ kernel/trace/trace_events.c | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2fa72419bbd7..d8bd9b1d8bce 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4590,6 +4590,10 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) { + if ((mask == TRACE_ITER_RECORD_TGID) || + (mask == TRACE_ITER_RECORD_CMD)) + lockdep_assert_held(&event_mutex); + /* do nothing if flag is already set */ if (!!(tr->trace_flags & mask) == !!enabled) return 0; @@ -4657,6 +4661,7 @@ static int trace_set_options(struct trace_array *tr, char *option) cmp += len; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = match_string(trace_options, -1, cmp); @@ -4667,6 +4672,7 @@ static int trace_set_options(struct trace_array *tr, char *option) ret = set_tracer_flag(tr, 1 << ret, !neg); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); /* * If the first trailing whitespace is replaced with '\0' by strstrip, @@ -7972,9 +7978,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, if (val != 0 && val != 1) return -EINVAL; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = set_tracer_flag(tr, 1 << index, val); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); if (ret < 0) return ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index fba87d10f0c1..995061bb2dec 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -320,7 +320,8 @@ void trace_event_enable_cmd_record(bool enable) struct trace_event_file *file; struct trace_array *tr; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); + do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) @@ -334,7 +335,6 @@ void trace_event_enable_cmd_record(bool enable) clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } } while_for_each_event_file(); - mutex_unlock(&event_mutex); } void trace_event_enable_tgid_record(bool enable) @@ -342,7 +342,8 @@ void trace_event_enable_tgid_record(bool enable) struct trace_event_file *file; struct trace_array *tr; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); + do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; @@ -356,7 +357,6 @@ void trace_event_enable_tgid_record(bool enable) &file->flags); } } while_for_each_event_file(); - mutex_unlock(&event_mutex); } static int __ftrace_event_enable_disable(struct trace_event_file *file, From d50bc5758e28fda93e3fdc2c3fedb83700c5b724 Mon Sep 17 00:00:00 2001 From: Keita Suzuki Date: Wed, 11 Dec 2019 09:12:58 +0000 Subject: [PATCH 115/191] tracing: Avoid memory leak in process_system_preds() commit 79e65c27f09683fbb50c33acab395d0ddf5302d2 upstream. When failing in the allocation of filter_item, process_system_preds() goes to fail_mem, where the allocated filter is freed. However, this leads to memory leak of filter->filter_string and filter->prog, which is allocated before and in process_preds(). This bug has been detected by kmemleak as well. Fix this by changing kfree to __free_fiter. unreferenced object 0xffff8880658007c0 (size 32): comm "bash", pid 579, jiffies 4295096372 (age 17.752s) hex dump (first 32 bytes): 63 6f 6d 6d 6f 6e 5f 70 69 64 20 20 3e 20 31 30 common_pid > 10 00 00 00 00 00 00 00 00 65 73 00 00 00 00 00 00 ........es...... backtrace: [<0000000067441602>] kstrdup+0x2d/0x60 [<00000000141cf7b7>] apply_subsystem_event_filter+0x378/0x932 [<000000009ca32334>] subsystem_filter_write+0x5a/0x90 [<0000000072da2bee>] vfs_write+0xe1/0x240 [<000000004f14f473>] ksys_write+0xb4/0x150 [<00000000a968b4a0>] do_syscall_64+0x6d/0x1e0 [<000000001a189f40>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff888060c22d00 (size 64): comm "bash", pid 579, jiffies 4295096372 (age 17.752s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 e8 d7 41 80 88 ff ff ...........A.... 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000b8c1b109>] process_preds+0x243/0x1820 [<000000003972c7f0>] apply_subsystem_event_filter+0x3be/0x932 [<000000009ca32334>] subsystem_filter_write+0x5a/0x90 [<0000000072da2bee>] vfs_write+0xe1/0x240 [<000000004f14f473>] ksys_write+0xb4/0x150 [<00000000a968b4a0>] do_syscall_64+0x6d/0x1e0 [<000000001a189f40>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff888041d7e800 (size 512): comm "bash", pid 579, jiffies 4295096372 (age 17.752s) hex dump (first 32 bytes): 70 bc 85 97 ff ff ff ff 0a 00 00 00 00 00 00 00 p............... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000001e04af34>] process_preds+0x71a/0x1820 [<000000003972c7f0>] apply_subsystem_event_filter+0x3be/0x932 [<000000009ca32334>] subsystem_filter_write+0x5a/0x90 [<0000000072da2bee>] vfs_write+0xe1/0x240 [<000000004f14f473>] ksys_write+0xb4/0x150 [<00000000a968b4a0>] do_syscall_64+0x6d/0x1e0 [<000000001a189f40>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Link: http://lkml.kernel.org/r/20191211091258.11310-1-keitasuzuki.park@sslab.ics.keio.ac.jp Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 404a3add43c9c ("tracing: Only add filter list when needed") Signed-off-by: Keita Suzuki Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c9a74f82b14a..bf44f6bbd0c3 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1662,7 +1662,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); return -EINVAL; fail_mem: - kfree(filter); + __free_filter(filter); /* If any call succeeded, we still need to sync */ if (!fail) tracepoint_synchronize_unregister(); From 038840ff78f4d9c9699cf2eb7f0798152b671a51 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 11 Dec 2019 15:44:22 -0500 Subject: [PATCH 116/191] tracing: Have the histogram compare functions convert to u64 first commit 106f41f5a302cb1f36c7543fae6a05de12e96fa4 upstream. The compare functions of the histogram code would be specific for the size of the value being compared (byte, short, int, long long). It would reference the value from the array via the type of the compare, but the value was stored in a 64 bit number. This is fine for little endian machines, but for big endian machines, it would end up comparing zeros or all ones (depending on the sign) for anything but 64 bit numbers. To fix this, first derference the value as a u64 then convert it to the type being compared. Link: http://lkml.kernel.org/r/20191211103557.7bed6928@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 08d43a5fa063e ("tracing: Add lock-free tracing_map") Acked-by: Tom Zanussi Reported-by: Sven Schnelle Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/tracing_map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 9a1c22310323..9e31bfc818ff 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -148,8 +148,8 @@ static int tracing_map_cmp_atomic64(void *val_a, void *val_b) #define DEFINE_TRACING_MAP_CMP_FN(type) \ static int tracing_map_cmp_##type(void *val_a, void *val_b) \ { \ - type a = *(type *)val_a; \ - type b = *(type *)val_b; \ + type a = (type)(*(u64 *)val_a); \ + type b = (type)(*(u64 *)val_b); \ \ return (a > b) ? 1 : ((a < b) ? -1 : 0); \ } From 09f80e915f1421d9e04c4b691f0f2f3358d1c1e9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 18 Dec 2019 08:44:27 +0100 Subject: [PATCH 117/191] tracing: Fix endianness bug in histogram trigger commit fe6e096a5bbf73a142f09c72e7aa2835026eb1a3 upstream. At least on PA-RISC and s390 synthetic histogram triggers are failing selftests because trace_event_raw_event_synth() always writes a 64 bit values, but the reader expects a field->size sized value. On little endian machines this doesn't hurt, but on big endian this makes the reader always read zero values. Link: http://lore.kernel.org/linux-trace-devel/20191218074427.96184-4-svens@linux.ibm.com Cc: stable@vger.kernel.org Fixes: 4b147936fa509 ("tracing: Add support for 'synthetic' events") Acked-by: Tom Zanussi Signed-off-by: Sven Schnelle Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 7482a1466ebf..c2783915600c 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -911,7 +911,26 @@ static notrace void trace_event_raw_event_synth(void *__data, strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { - entry->fields[n_u64] = var_ref_vals[var_ref_idx + i]; + struct synth_field *field = event->fields[i]; + u64 val = var_ref_vals[var_ref_idx + i]; + + switch (field->size) { + case 1: + *(u8 *)&entry->fields[n_u64] = (u8)val; + break; + + case 2: + *(u16 *)&entry->fields[n_u64] = (u16)val; + break; + + case 4: + *(u32 *)&entry->fields[n_u64] = (u32)val; + break; + + default: + entry->fields[n_u64] = val; + break; + } n_u64++; } } From 7aa117bfc27a15bb2656b34bc6322417065a828d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 18 Dec 2019 08:44:26 +0100 Subject: [PATCH 118/191] samples/trace_printk: Wait for IRQ work to finish commit 01f36a554e3ef32f9fc4b81a4437cf08fd0e4742 upstream. trace_printk schedules work via irq_work_queue(), but doesn't wait until it was processed. The kprobe_module.tc testcase does: :;: "Load module again, which means the event1 should be recorded";: modprobe trace-printk grep "event1:" trace so the grep which checks the trace file might run before the irq work was processed. Fix this by adding a irq_work_sync(). Link: http://lore.kernel.org/linux-trace-devel/20191218074427.96184-3-svens@linux.ibm.com Cc: stable@vger.kernel.org Fixes: af2a0750f3749 ("selftests/ftrace: Improve kprobe on module testcase to load/unload module") Signed-off-by: Sven Schnelle Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- samples/trace_printk/trace-printk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/trace_printk/trace-printk.c b/samples/trace_printk/trace-printk.c index 7affc3b50b61..cfc159580263 100644 --- a/samples/trace_printk/trace-printk.c +++ b/samples/trace_printk/trace-printk.c @@ -36,6 +36,7 @@ static int __init trace_printk_init(void) /* Kick off printing in irq context */ irq_work_queue(&irqwork); + irq_work_sync(&irqwork); trace_printk("This is a %s that will use trace_bprintk()\n", "static string"); From d1b69aabcd4ae7efefb38edf73eceefdf560b24a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Dec 2019 08:50:00 -0700 Subject: [PATCH 119/191] io_uring: use current task creds instead of allocating a new one commit 0b8c0ec7eedcd8f9f1a1f238d87f9b512b09e71a upstream. syzbot reports: kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 9217 Comm: io_uring-sq Not tainted 5.4.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:creds_are_invalid kernel/cred.c:792 [inline] RIP: 0010:__validate_creds include/linux/cred.h:187 [inline] RIP: 0010:override_creds+0x9f/0x170 kernel/cred.c:550 Code: ac 25 00 81 fb 64 65 73 43 0f 85 a3 37 00 00 e8 17 ab 25 00 49 8d 7c 24 10 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 96 00 00 00 41 8b 5c 24 10 bf RSP: 0018:ffff88809c45fda0 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000043736564 RCX: ffffffff814f3318 RDX: 0000000000000002 RSI: ffffffff814f3329 RDI: 0000000000000010 RBP: ffff88809c45fdb8 R08: ffff8880a3aac240 R09: ffffed1014755849 R10: ffffed1014755848 R11: ffff8880a3aac247 R12: 0000000000000000 R13: ffff888098ab1600 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd51c40664 CR3: 0000000092641000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: io_sq_thread+0x1c7/0xa20 fs/io_uring.c:3274 kthread+0x361/0x430 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Modules linked in: ---[ end trace f2e1a4307fbe2245 ]--- RIP: 0010:creds_are_invalid kernel/cred.c:792 [inline] RIP: 0010:__validate_creds include/linux/cred.h:187 [inline] RIP: 0010:override_creds+0x9f/0x170 kernel/cred.c:550 Code: ac 25 00 81 fb 64 65 73 43 0f 85 a3 37 00 00 e8 17 ab 25 00 49 8d 7c 24 10 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 96 00 00 00 41 8b 5c 24 10 bf RSP: 0018:ffff88809c45fda0 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000043736564 RCX: ffffffff814f3318 RDX: 0000000000000002 RSI: ffffffff814f3329 RDI: 0000000000000010 RBP: ffff88809c45fdb8 R08: ffff8880a3aac240 R09: ffffed1014755849 R10: ffffed1014755848 R11: ffff8880a3aac247 R12: 0000000000000000 R13: ffff888098ab1600 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd51c40664 CR3: 0000000092641000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 which is caused by slab fault injection triggering a failure in prepare_creds(). We don't actually need to create a copy of the creds as we're not modifying it, we just need a reference on the current task creds. This avoids the failure case as well, and propagates the const throughout the stack. Fixes: 181e448d8709 ("io_uring: async workers should inherit the user creds") Reported-by: syzbot+5320383e16029ba057ff@syzkaller.appspotmail.com Signed-off-by: Jens Axboe [ only use the io_uring.c portion of the patch - gregkh] Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 74e786578c77..a60c6315a348 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -239,7 +239,7 @@ struct io_ring_ctx { struct user_struct *user; - struct cred *creds; + const struct cred *creds; struct completion ctx_done; @@ -3876,7 +3876,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) ctx->account_mem = account_mem; ctx->user = user; - ctx->creds = prepare_creds(); + ctx->creds = get_current_cred(); if (!ctx->creds) { ret = -ENOMEM; goto err; From 20170bfa3d9a58f788d5b4255c138ded5cefd7a1 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 4 Jan 2020 13:00:12 -0800 Subject: [PATCH 120/191] mm/gup: fix memory leak in __gup_benchmark_ioctl commit a7c46c0c0e3d62f2764cd08b90934cd2aaaf8545 upstream. In the implementation of __gup_benchmark_ioctl() the allocated pages should be released before returning in case of an invalid cmd. Release pages via kvfree(). [akpm@linux-foundation.org: rework code flow, return -EINVAL rather than -1] Link: http://lkml.kernel.org/r/20191211174653.4102-1-navid.emamdoost@gmail.com Fixes: 714a3a1ebafe ("mm/gup_benchmark.c: add additional pinning methods") Signed-off-by: Navid Emamdoost Reviewed-by: Andrew Morton Reviewed-by: Ira Weiny Reviewed-by: John Hubbard Cc: Keith Busch Cc: Kirill A. Shutemov Cc: Dave Hansen Cc: Dan Williams Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/gup_benchmark.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c index 7dd602d7f8db..ad9d5b1c4473 100644 --- a/mm/gup_benchmark.c +++ b/mm/gup_benchmark.c @@ -26,6 +26,7 @@ static int __gup_benchmark_ioctl(unsigned int cmd, unsigned long i, nr_pages, addr, next; int nr; struct page **pages; + int ret = 0; if (gup->size > ULONG_MAX) return -EINVAL; @@ -63,7 +64,9 @@ static int __gup_benchmark_ioctl(unsigned int cmd, NULL); break; default: - return -1; + kvfree(pages); + ret = -EINVAL; + goto out; } if (nr <= 0) @@ -85,7 +88,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd, gup->put_delta_usec = ktime_us_delta(end_time, start_time); kvfree(pages); - return 0; +out: + return ret; } static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd, From e0d2bf5a012974c2f718ce41dbff185666eae11f Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 2 Jan 2020 05:31:22 -0800 Subject: [PATCH 121/191] apparmor: fix aa_xattrs_match() may sleep while holding a RCU lock commit 8c62ed27a12c00e3db1c9f04bc0f272bdbb06734 upstream. aa_xattrs_match() is unfortunately calling vfs_getxattr_alloc() from a context protected by an rcu_read_lock. This can not be done as vfs_getxattr_alloc() may sleep regardles of the gfp_t value being passed to it. Fix this by breaking the rcu_read_lock on the policy search when the xattr match feature is requested and restarting the search if a policy changes occur. Fixes: 8e51f9087f40 ("apparmor: Add support for attaching profiles via xattr, presence and value") Reported-by: Jia-Ju Bai Reported-by: Al Viro Signed-off-by: John Johansen Signed-off-by: Greg Kroah-Hartman --- security/apparmor/apparmorfs.c | 2 +- security/apparmor/domain.c | 80 ++++++++++++++++++---------------- security/apparmor/policy.c | 4 +- 3 files changed, 45 insertions(+), 41 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 45d13b6462aa..90d21675c3ad 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -593,7 +593,7 @@ static __poll_t ns_revision_poll(struct file *file, poll_table *pt) void __aa_bump_ns_revision(struct aa_ns *ns) { - ns->revision++; + WRITE_ONCE(ns->revision, ns->revision + 1); wake_up_interruptible(&ns->wait); } diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 9e0492795267..039ca71872ce 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -317,6 +317,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm, if (!bprm || !profile->xattr_count) return 0; + might_sleep(); /* transition from exec match to xattr set */ state = aa_dfa_null_transition(profile->xmatch, state); @@ -361,10 +362,11 @@ out: } /** - * __attach_match_ - find an attachment match + * find_attach - do attachment search for unconfined processes * @bprm - binprm structure of transitioning task - * @name - to match against (NOT NULL) + * @ns: the current namespace (NOT NULL) * @head - profile list to walk (NOT NULL) + * @name - to match against (NOT NULL) * @info - info message if there was an error (NOT NULL) * * Do a linear search on the profiles in the list. There is a matching @@ -374,12 +376,11 @@ out: * * Requires: @head not be shared or have appropriate locks held * - * Returns: profile or NULL if no match found + * Returns: label or NULL if no match found */ -static struct aa_profile *__attach_match(const struct linux_binprm *bprm, - const char *name, - struct list_head *head, - const char **info) +static struct aa_label *find_attach(const struct linux_binprm *bprm, + struct aa_ns *ns, struct list_head *head, + const char *name, const char **info) { int candidate_len = 0, candidate_xattrs = 0; bool conflict = false; @@ -388,6 +389,8 @@ static struct aa_profile *__attach_match(const struct linux_binprm *bprm, AA_BUG(!name); AA_BUG(!head); + rcu_read_lock(); +restart: list_for_each_entry_rcu(profile, head, base.list) { if (profile->label.flags & FLAG_NULL && &profile->label == ns_unconfined(profile->ns)) @@ -413,16 +416,32 @@ static struct aa_profile *__attach_match(const struct linux_binprm *bprm, perm = dfa_user_allow(profile->xmatch, state); /* any accepting state means a valid match. */ if (perm & MAY_EXEC) { - int ret; + int ret = 0; if (count < candidate_len) continue; - ret = aa_xattrs_match(bprm, profile, state); - /* Fail matching if the xattrs don't match */ - if (ret < 0) - continue; + if (bprm && profile->xattr_count) { + long rev = READ_ONCE(ns->revision); + if (!aa_get_profile_not0(profile)) + goto restart; + rcu_read_unlock(); + ret = aa_xattrs_match(bprm, profile, + state); + rcu_read_lock(); + aa_put_profile(profile); + if (rev != + READ_ONCE(ns->revision)) + /* policy changed */ + goto restart; + /* + * Fail matching if the xattrs don't + * match + */ + if (ret < 0) + continue; + } /* * TODO: allow for more flexible best match * @@ -445,43 +464,28 @@ static struct aa_profile *__attach_match(const struct linux_binprm *bprm, candidate_xattrs = ret; conflict = false; } - } else if (!strcmp(profile->base.name, name)) + } else if (!strcmp(profile->base.name, name)) { /* * old exact non-re match, without conditionals such * as xattrs. no more searching required */ - return profile; + candidate = profile; + goto out; + } } - if (conflict) { - *info = "conflicting profile attachments"; + if (!candidate || conflict) { + if (conflict) + *info = "conflicting profile attachments"; + rcu_read_unlock(); return NULL; } - return candidate; -} - -/** - * find_attach - do attachment search for unconfined processes - * @bprm - binprm structure of transitioning task - * @ns: the current namespace (NOT NULL) - * @list: list to search (NOT NULL) - * @name: the executable name to match against (NOT NULL) - * @info: info message if there was an error - * - * Returns: label or NULL if no match found - */ -static struct aa_label *find_attach(const struct linux_binprm *bprm, - struct aa_ns *ns, struct list_head *list, - const char *name, const char **info) -{ - struct aa_profile *profile; - - rcu_read_lock(); - profile = aa_get_profile(__attach_match(bprm, name, list, info)); +out: + candidate = aa_get_newest_profile(candidate); rcu_read_unlock(); - return profile ? &profile->label : NULL; + return &candidate->label; } static const char *next_name(int xtype, const char *name) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index ade333074c8e..06355717ee84 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -1124,8 +1124,8 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj, if (!name) { /* remove namespace - can only happen if fqname[0] == ':' */ mutex_lock_nested(&ns->parent->lock, ns->level); - __aa_remove_ns(ns); __aa_bump_ns_revision(ns); + __aa_remove_ns(ns); mutex_unlock(&ns->parent->lock); } else { /* remove profile */ @@ -1137,9 +1137,9 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj, goto fail_ns_lock; } name = profile->base.hname; + __aa_bump_ns_revision(ns); __remove_profile(profile); __aa_labelset_update_subtree(ns); - __aa_bump_ns_revision(ns); mutex_unlock(&ns->lock); } From fdfd8f9ef2d3a35764e8fabc035bd0bbb382e318 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 20 Dec 2019 15:11:00 +0200 Subject: [PATCH 122/191] dmaengine: virt-dma: Fix access after free in vchan_complete() commit 24461d9792c2c706092805ff1b067628933441bd upstream. vchan_vdesc_fini() is freeing up 'vd' so the access to vd->tx_result is via already freed up memory. Move the vchan_vdesc_fini() after invoking the callback to avoid this. Fixes: 09d5b702b0f97 ("dmaengine: virt-dma: store result on dma descriptor") Signed-off-by: Peter Ujfalusi Reviewed-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20191220131100.21804-1-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/virt-dma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c index ec4adf4260a0..256fc662c500 100644 --- a/drivers/dma/virt-dma.c +++ b/drivers/dma/virt-dma.c @@ -104,9 +104,8 @@ static void vchan_complete(unsigned long arg) dmaengine_desc_get_callback(&vd->tx, &cb); list_del(&vd->node); - vchan_vdesc_fini(vd); - dmaengine_desc_callback_invoke(&cb, &vd->tx_result); + vchan_vdesc_fini(vd); } } From ebf3afa3cbd1778131af012531a0f7cc896c6621 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 Dec 2019 22:20:06 +0900 Subject: [PATCH 123/191] gen_initramfs_list.sh: fix 'bad variable name' error commit cc976614f59bd8e45de8ce988a6bcb5de711d994 upstream. Prior to commit 858805b336be ("kbuild: add $(BASH) to run scripts with bash-extension"), this shell script was almost always run by bash since bash is usually installed on the system by default. Now, this script is run by sh, which might be a symlink to dash. On such distributions, the following code emits an error: local dev=`LC_ALL=C ls -l "${location}"` You can reproduce the build error, for example by setting CONFIG_INITRAMFS_SOURCE="/dev". GEN usr/initramfs_data.cpio.gz ./usr/gen_initramfs_list.sh: 131: local: 1: bad variable name make[1]: *** [usr/Makefile:61: usr/initramfs_data.cpio.gz] Error 2 This is because `LC_ALL=C ls -l "${location}"` contains spaces. Surrounding it with double-quotes fixes the error. Fixes: 858805b336be ("kbuild: add $(BASH) to run scripts with bash-extension") Reported-by: Jory A. Pratt Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- usr/gen_initramfs_list.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr/gen_initramfs_list.sh b/usr/gen_initramfs_list.sh index 0aad760fcd8c..2bbac73e6477 100755 --- a/usr/gen_initramfs_list.sh +++ b/usr/gen_initramfs_list.sh @@ -128,7 +128,7 @@ parse() { str="${ftype} ${name} ${location} ${str}" ;; "nod") - local dev=`LC_ALL=C ls -l "${location}"` + local dev="`LC_ALL=C ls -l "${location}"`" local maj=`field 5 ${dev}` local min=`field 6 ${dev}` maj=${maj%,} From aea6343885a67cd0ff69cba720c7e8538e78cc40 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Nov 2019 13:13:54 +0000 Subject: [PATCH 124/191] ALSA: cs4236: fix error return comparison of an unsigned integer commit d60229d84846a8399257006af9c5444599f64361 upstream. The return from pnp_irq is an unsigned integer type resource_size_t and hence the error check for a positive non-error code is always going to be true. A check for a non-failure return from pnp_irq should in fact be for (resource_size_t)-1 rather than >= 0. Addresses-Coverity: ("Unsigned compared against 0") Fixes: a9824c868a2c ("[ALSA] Add CS4232 PnP BIOS support") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20191122131354.58042-1-colin.king@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/isa/cs423x/cs4236.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index 78dd213589b4..fa3c39cff5f8 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -278,7 +278,8 @@ static int snd_cs423x_pnp_init_mpu(int dev, struct pnp_dev *pdev) } else { mpu_port[dev] = pnp_port_start(pdev, 0); if (mpu_irq[dev] >= 0 && - pnp_irq_valid(pdev, 0) && pnp_irq(pdev, 0) >= 0) { + pnp_irq_valid(pdev, 0) && + pnp_irq(pdev, 0) != (resource_size_t)-1) { mpu_irq[dev] = pnp_irq(pdev, 0); } else { mpu_irq[dev] = -1; /* disable interrupt */ From d77ae7624782bb947cf71696d28aad2f476fd17e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Nov 2019 17:56:26 +0100 Subject: [PATCH 125/191] ALSA: pcm: Yet another missing check of non-cached buffer type commit 2406ff9b86aa1b77fe1a6d15f37195ac1fdb2a14 upstream. For non-x86 architectures, SNDRV_DMA_TYPE_DEV_UC should be treated equivalent with SNDRV_DMA_TYPE_DEV, where the default mmap handler still checks only about SNDRV_DMA_TYPE_DEV. Make the check more proper. Note that all existing users of *_UC buffer types are x86-only, so this doesn't fix any bug, but just for consistency. Fixes: 42e748a0b325 ("ALSA: memalloc: Add non-cached buffer type") Link: https://lore.kernel.org/r/20191108165626.5947-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_native.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index d4280568a41e..5c74ea2bb44b 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3408,7 +3408,8 @@ int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream, #endif /* CONFIG_GENERIC_ALLOCATOR */ #ifndef CONFIG_X86 /* for avoiding warnings arch/x86/mm/pat.c */ if (IS_ENABLED(CONFIG_HAS_DMA) && !substream->ops->page && - substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV) + (substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV || + substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV_UC)) return dma_mmap_coherent(substream->dma_buffer.dev.dev, area, substream->runtime->dma_area, From bbfc067c211d70f219c11ceaa58f4d759a410489 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Oct 2019 11:09:21 +0100 Subject: [PATCH 126/191] ALSA: firewire-motu: Correct a typo in the clock proc string commit 0929249e3be3bb82ee6cfec0025f4dde952210b3 upstream. Just fix a typo of "S/PDIF" in the clock name string. Fixes: 4638ec6ede08 ("ALSA: firewire-motu: add proc node to show current statuc of clock and packet formats") Acked-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20191030100921.3826-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/motu/motu-proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/motu/motu-proc.c b/sound/firewire/motu/motu-proc.c index ea46fb4c1b5a..126a7bd187bb 100644 --- a/sound/firewire/motu/motu-proc.c +++ b/sound/firewire/motu/motu-proc.c @@ -16,7 +16,7 @@ static const char *const clock_names[] = { [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT] = "S/PDIF on optical interface", [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_A] = "S/PDIF on optical interface A", [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_B] = "S/PDIF on optical interface B", - [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX] = "S/PCIF on coaxial interface", + [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX] = "S/PDIF on coaxial interface", [SND_MOTU_CLOCK_SOURCE_AESEBU_ON_XLR] = "AESEBU on XLR interface", [SND_MOTU_CLOCK_SOURCE_WORD_ON_BNC] = "Word clock on BNC interface", }; From 858f090696ca51f520ed002c781acd0f7d2475e3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:51 -0700 Subject: [PATCH 127/191] scsi: lpfc: Fix rpi release when deleting vport commit 97acd0019d5dadd9c0e111c2083c889bfe548f25 upstream. A prior use-after-free mailbox fix solved it's problem by null'ing a ndlp pointer. However, further testing has shown that this change causes a later state change to occasionally be skipped, which results in a reference count never being decremented thus the rpi is never released, which causes a vport delete to never succeed. Revise the fix in the prior patch to no longer null the ndlp. Instead the RELEASE_RPI flag is set which will drive the release of the rpi. Given the new code was added at a deep indentation level, refactor the code block using a new routine that avoids the indentation issues. Fixes: 9b1640686470 ("scsi: lpfc: Fix use-after-free mailbox cmd completion") Link: https://lore.kernel.org/r/20190922035906.10977-6-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_hbadisc.c | 88 +++++++++++++++++++++----------- drivers/scsi/lpfc/lpfc_sli.c | 2 + 2 files changed, 61 insertions(+), 29 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 1286c658ba34..ee70d14e7a9d 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4843,6 +4843,44 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } } +/* + * Sets the mailbox completion handler to be used for the + * unreg_rpi command. The handler varies based on the state of + * the port and what will be happening to the rpi next. + */ +static void +lpfc_set_unreg_login_mbx_cmpl(struct lpfc_hba *phba, struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, LPFC_MBOXQ_t *mbox) +{ + unsigned long iflags; + + if (ndlp->nlp_flag & NLP_ISSUE_LOGO) { + mbox->ctx_ndlp = ndlp; + mbox->mbox_cmpl = lpfc_nlp_logo_unreg; + + } else if (phba->sli_rev == LPFC_SLI_REV4 && + (!(vport->load_flag & FC_UNLOADING)) && + (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= + LPFC_SLI_INTF_IF_TYPE_2) && + (kref_read(&ndlp->kref) > 0)) { + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); + mbox->mbox_cmpl = lpfc_sli4_unreg_rpi_cmpl_clr; + } else { + if (vport->load_flag & FC_UNLOADING) { + if (phba->sli_rev == LPFC_SLI_REV4) { + spin_lock_irqsave(&vport->phba->ndlp_lock, + iflags); + ndlp->nlp_flag |= NLP_RELEASE_RPI; + spin_unlock_irqrestore(&vport->phba->ndlp_lock, + iflags); + } + lpfc_nlp_get(ndlp); + } + mbox->ctx_ndlp = ndlp; + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + } +} + /* * Free rpi associated with LPFC_NODELIST entry. * This routine is called from lpfc_freenode(), when we are removing @@ -4893,33 +4931,12 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_unreg_login(phba, vport->vpi, rpi, mbox); mbox->vport = vport; - if (ndlp->nlp_flag & NLP_ISSUE_LOGO) { - mbox->ctx_ndlp = ndlp; - mbox->mbox_cmpl = lpfc_nlp_logo_unreg; - } else { - if (phba->sli_rev == LPFC_SLI_REV4 && - (!(vport->load_flag & FC_UNLOADING)) && - (bf_get(lpfc_sli_intf_if_type, - &phba->sli4_hba.sli_intf) >= - LPFC_SLI_INTF_IF_TYPE_2) && - (kref_read(&ndlp->kref) > 0)) { - mbox->ctx_ndlp = lpfc_nlp_get(ndlp); - mbox->mbox_cmpl = - lpfc_sli4_unreg_rpi_cmpl_clr; - /* - * accept PLOGIs after unreg_rpi_cmpl - */ - acc_plogi = 0; - } else if (vport->load_flag & FC_UNLOADING) { - mbox->ctx_ndlp = NULL; - mbox->mbox_cmpl = - lpfc_sli_def_mbox_cmpl; - } else { - mbox->ctx_ndlp = ndlp; - mbox->mbox_cmpl = - lpfc_sli_def_mbox_cmpl; - } - } + lpfc_set_unreg_login_mbx_cmpl(phba, vport, ndlp, mbox); + if (mbox->mbox_cmpl == lpfc_sli4_unreg_rpi_cmpl_clr) + /* + * accept PLOGIs after unreg_rpi_cmpl + */ + acc_plogi = 0; if (((ndlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) && (!(vport->fc_flag & FC_OFFLINE_MODE))) @@ -5060,6 +5077,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) struct lpfc_hba *phba = vport->phba; LPFC_MBOXQ_t *mb, *nextmb; struct lpfc_dmabuf *mp; + unsigned long iflags; /* Cleanup node for NPort */ lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, @@ -5141,8 +5159,20 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_cleanup_vports_rrqs(vport, ndlp); if (phba->sli_rev == LPFC_SLI_REV4) ndlp->nlp_flag |= NLP_RELEASE_RPI; - lpfc_unreg_rpi(vport, ndlp); - + if (!lpfc_unreg_rpi(vport, ndlp)) { + /* Clean up unregistered and non freed rpis */ + if ((ndlp->nlp_flag & NLP_RELEASE_RPI) && + !(ndlp->nlp_rpi == LPFC_RPI_ALLOC_ERROR)) { + lpfc_sli4_free_rpi(vport->phba, + ndlp->nlp_rpi); + spin_lock_irqsave(&vport->phba->ndlp_lock, + iflags); + ndlp->nlp_flag &= ~NLP_RELEASE_RPI; + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; + spin_unlock_irqrestore(&vport->phba->ndlp_lock, + iflags); + } + } return 0; } diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 2b0e7b32c2df..8860f41af3ff 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2526,6 +2526,8 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } else { __lpfc_sli_rpi_release(vport, ndlp); } + if (vport->load_flag & FC_UNLOADING) + lpfc_nlp_put(ndlp); pmb->ctx_ndlp = NULL; } } From 66d9052e6c765f74fa4a1bce874fab29a6b9583d Mon Sep 17 00:00:00 2001 From: chenqiwu Date: Thu, 19 Dec 2019 14:29:53 +0800 Subject: [PATCH 128/191] exit: panic before exit_mm() on global init exit commit 43cf75d96409a20ef06b756877a2e72b10a026fc upstream. Currently, when global init and all threads in its thread-group have exited we panic via: do_exit() -> exit_notify() -> forget_original_parent() -> find_child_reaper() This makes it hard to extract a useable coredump for global init from a kernel crashdump because by the time we panic exit_mm() will have already released global init's mm. This patch moves the panic futher up before exit_mm() is called. As was the case previously, we only panic when global init and all its threads in the thread-group have exited. Signed-off-by: chenqiwu Acked-by: Christian Brauner Acked-by: Oleg Nesterov [christian.brauner@ubuntu.com: fix typo, rewrite commit message] Link: https://lore.kernel.org/r/1576736993-10121-1-git-send-email-qiwuchen55@gmail.com Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index d351fd09e739..22dfaac9e48c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -517,10 +517,6 @@ static struct task_struct *find_child_reaper(struct task_struct *father, } write_unlock_irq(&tasklist_lock); - if (unlikely(pid_ns == &init_pid_ns)) { - panic("Attempted to kill init! exitcode=0x%08x\n", - father->signal->group_exit_code ?: father->exit_code); - } list_for_each_entry_safe(p, n, dead, ptrace_entry) { list_del_init(&p->ptrace_entry); @@ -766,6 +762,14 @@ void __noreturn do_exit(long code) acct_update_integrals(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { + /* + * If the last thread of global init has exited, panic + * immediately to get a useable coredump. + */ + if (unlikely(is_global_init(tsk))) + panic("Attempted to kill init! exitcode=0x%08x\n", + tsk->signal->group_exit_code ?: (int)code); + #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk->signal); From 623e5ae074fe95e960ff588fd7d2ee2299c2704e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 6 Jan 2020 14:35:39 +0000 Subject: [PATCH 129/191] arm64: Revert support for execute-only user mappings commit 24cecc37746393432d994c0dbc251fb9ac7c5d72 upstream. The ARMv8 64-bit architecture supports execute-only user permissions by clearing the PTE_USER and PTE_UXN bits, practically making it a mostly privileged mapping but from which user running at EL0 can still execute. The downside, however, is that the kernel at EL1 inadvertently reading such mapping would not trip over the PAN (privileged access never) protection. Revert the relevant bits from commit cab15ce604e5 ("arm64: Introduce execute-only page access permissions") so that PROT_EXEC implies PROT_READ (and therefore PTE_USER) until the architecture gains proper support for execute-only user mappings. Fixes: cab15ce604e5 ("arm64: Introduce execute-only page access permissions") Cc: # 4.9.x- Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable-prot.h | 5 ++--- arch/arm64/include/asm/pgtable.h | 10 +++------- arch/arm64/mm/fault.c | 2 +- mm/mmap.c | 6 ------ 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 8dc6c5cdabe6..baf52baaa2a5 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -85,13 +85,12 @@ #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -100,7 +99,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 565aa45ef134..13ebe2bad79f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -96,12 +96,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -/* - * Execute-only user mappings do not have the PTE_USER bit set. All valid - * kernel mappings have the PTE_UXN bit set. - */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ @@ -117,8 +113,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; /* * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check) other than user execute-only which do not have the - * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set. + * write permission check). PROT_NONE mappings do not have the PTE_VALID bit + * set. */ #define pte_access_permitted(pte, write) \ (pte_valid_user(pte) && (!(write) || pte_write(pte))) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 9fc6db0bcbad..d26e6cd28953 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -454,7 +454,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE; + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (kprobe_page_fault(regs, esr)) diff --git a/mm/mmap.c b/mm/mmap.c index a7d8c84d19b7..4390dbea4aa5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -90,12 +90,6 @@ static void unmap_region(struct mm_struct *mm, * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes - * - * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and - * MAP_PRIVATE: - * r: (no) no - * w: (no) no - * x: (yes) yes */ pgprot_t protection_map[16] __ro_after_init = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, From f0629ee3922f10112584b1898491fecc74d98b3b Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 3 Jan 2020 11:02:48 +0800 Subject: [PATCH 130/191] ftrace: Avoid potential division by zero in function profiler commit e31f7939c1c27faa5d0e3f14519eaf7c89e8a69d upstream. The ftrace_profile->counter is unsigned long and do_div truncates it to 32 bits, which means it can test non-zero and be truncated to zero for division. Fix this issue by using div64_ul() instead. Link: http://lkml.kernel.org/r/20200103030248.14516-1-wenyang@linux.alibaba.com Cc: stable@vger.kernel.org Fixes: e330b3bcd8319 ("tracing: Show sample std dev in function profiling") Fixes: 34886c8bc590f ("tracing: add average time in function to function profiler") Signed-off-by: Wen Yang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f296d89be757..0708a41cfe2d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -524,8 +524,7 @@ static int function_stat_show(struct seq_file *m, void *v) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER - avg = rec->time; - do_div(avg, rec->counter); + avg = div64_ul(rec->time, rec->counter); if (tracing_thresh && (avg < tracing_thresh)) goto out; #endif @@ -551,7 +550,8 @@ static int function_stat_show(struct seq_file *m, void *v) * Divide only 1000 for ns^2 -> us^2 conversion. * trace_print_graph_duration will divide 1000 again. */ - do_div(stddev, rec->counter * (rec->counter - 1) * 1000); + stddev = div64_ul(stddev, + rec->counter * (rec->counter - 1) * 1000); } trace_seq_init(&s); From accc08f40655159c0906c724d80d42099bec4eef Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 28 Dec 2019 15:55:36 +0200 Subject: [PATCH 131/191] spi: spi-fsl-dspi: Fix 16-bit word order in 32-bit XSPI mode commit ca59d5a51690d5b9340343dc36792a252e9414ae upstream. When used in Extended SPI mode on LS1021A, the DSPI controller wants to have the least significant 16-bit word written first to the TX FIFO. In fact, the LS1021A reference manual says: 33.5.2.4.2 Draining the TX FIFO When Extended SPI Mode (DSPIx_MCR[XSPI]) is enabled, if the frame size of SPI Data to be transmitted is more than 16 bits, then it causes two Data entries to be popped from TX FIFO simultaneously which are transferred to the shift register. The first of the two popped entries forms the 16 least significant bits of the SPI frame to be transmitted. So given the following TX buffer: +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ | 0x0 | 0x1 | 0x2 | 0x3 | 0x4 | 0x5 | 0x6 | 0x7 | 0x8 | 0x9 | 0xa | 0xb | +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ | 32-bit word 1 | 32-bit word 2 | 32-bit word 3 | +-----------------------+-----------------------+-----------------------+ The correct way that a little-endian system should transmit it on the wire when bits_per_word is 32 is: 0x03020100 0x07060504 0x0b0a0908 But it is actually transmitted as following, as seen with a scope: 0x01000302 0x05040706 0x09080b0a It appears that this patch has been submitted at least once before: https://lkml.org/lkml/2018/9/21/286 but in that case Chuanhua Han did not manage to explain the problem clearly enough and the patch did not get merged, leaving XSPI mode broken. Fixes: 8fcd151d2619 ("spi: spi-fsl-dspi: XSPI FIFO handling (in TCFQ mode)") Cc: Esben Haabendal Cc: Chuanhua Han Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20191228135536.14284-1-olteanv@gmail.com Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-fsl-dspi.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index bec758e978fb..d47bd26577b3 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -583,21 +583,14 @@ static void dspi_tcfq_write(struct fsl_dspi *dspi) dspi->tx_cmd |= SPI_PUSHR_CMD_CTCNT; if (dspi->devtype_data->xspi_mode && dspi->bits_per_word > 16) { - /* Write two TX FIFO entries first, and then the corresponding - * CMD FIFO entry. + /* Write the CMD FIFO entry first, and then the two + * corresponding TX FIFO entries. */ u32 data = dspi_pop_tx(dspi); - if (dspi->cur_chip->ctar_val & SPI_CTAR_LSBFE) { - /* LSB */ - tx_fifo_write(dspi, data & 0xFFFF); - tx_fifo_write(dspi, data >> 16); - } else { - /* MSB */ - tx_fifo_write(dspi, data >> 16); - tx_fifo_write(dspi, data & 0xFFFF); - } cmd_fifo_write(dspi); + tx_fifo_write(dspi, data & 0xFFFF); + tx_fifo_write(dspi, data >> 16); } else { /* Write one entry to both TX FIFO and CMD FIFO * simultaneously. From fe8adb0aae33905bdd6b682907d8c48b1857ec99 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Sep 2019 21:57:07 +0200 Subject: [PATCH 132/191] drm/msm: include linux/sched/task.h commit 70082a52f96a45650dfc3d8cdcd2c42bdac9f6f0 upstream. Without this header file, compile-testing may run into a missing declaration: drivers/gpu/drm/msm/msm_gpu.c:444:4: error: implicit declaration of function 'put_task_struct' [-Werror,-Wimplicit-function-declaration] Fixes: 482f96324a4e ("drm/msm: Fix task dump in gpu recovery") Signed-off-by: Arnd Bergmann Reviewed-by: Jordan Crouse Signed-off-by: Rob Clark Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_gpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index a052364a5d74..edd45f434ccd 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * Power Management: From 89dda19e77ce019582ce1d7f19396de6ad846091 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 24 Sep 2019 10:26:53 +0300 Subject: [PATCH 133/191] PM / devfreq: Check NULL governor in available_governors_show commit d68adc8f85cd757bd33c8d7b2660ad6f16f7f3dc upstream. The governor is initialized after sysfs attributes become visible so in theory the governor field can be NULL here. Fixes: bcf23c79c4e46 ("PM / devfreq: Fix available_governor sysfs") Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Reviewed-by: Chanwoo Choi Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index ffd2d6b44dfb..c64d20fdc187 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1196,7 +1196,7 @@ static ssize_t available_governors_show(struct device *d, * The devfreq with immutable governor (e.g., passive) shows * only own governor. */ - if (df->governor->immutable) { + if (df->governor && df->governor->immutable) { count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, "%s ", df->governor_name); /* From 9d4a0a31ce70b3d45e7605a7f3ef94c072eb43de Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 1 Oct 2019 11:03:59 +0300 Subject: [PATCH 134/191] sunrpc: fix crash when cache_head become valid before update commit 5fcaf6982d1167f1cd9b264704f6d1ef4c505d54 upstream. I was investigating a crash in our Virtuozzo7 kernel which happened in in svcauth_unix_set_client. I found out that we access m_client field in ip_map structure, which was received from sunrpc_cache_lookup (we have a bit older kernel, now the code is in sunrpc_cache_add_entry), and these field looks uninitialized (m_client == 0x74 don't look like a pointer) but in the cache_head in flags we see 0x1 which is CACHE_VALID. It looks like the problem appeared from our previous fix to sunrpc (1): commit 4ecd55ea0742 ("sunrpc: fix cache_head leak due to queued request") And we've also found a patch already fixing our patch (2): commit d58431eacb22 ("sunrpc: don't mark uninitialised items as VALID.") Though the crash is eliminated, I think the core of the problem is not completely fixed: Neil in the patch (2) makes cache_head CACHE_NEGATIVE, before cache_fresh_locked which was added in (1) to fix crash. These way cache_is_valid won't say the cache is valid anymore and in svcauth_unix_set_client the function cache_check will return error instead of 0, and we don't count entry as initialized. But it looks like we need to remove cache_fresh_locked completely in sunrpc_cache_lookup: In (1) we've only wanted to make cache_fresh_unlocked->cache_dequeue so that cache_requests with no readers also release corresponding cache_head, to fix their leak. We with Vasily were not sure if cache_fresh_locked and cache_fresh_unlocked should be used in pair or not, so we've guessed to use them in pair. Now we see that we don't want the CACHE_VALID bit set here by cache_fresh_locked, as "valid" means "initialized" and there is no initialization in sunrpc_cache_add_entry. Both expiry_time and last_refresh are not used in cache_fresh_unlocked code-path and also not required for the initial fix. So to conclude cache_fresh_locked was called by mistake, and we can just safely remove it instead of crutching it with CACHE_NEGATIVE. It looks ideologically better for me. Hope I don't miss something here. Here is our crash backtrace: [13108726.326291] BUG: unable to handle kernel NULL pointer dereference at 0000000000000074 [13108726.326365] IP: [] svcauth_unix_set_client+0x2ab/0x520 [sunrpc] [13108726.326448] PGD 0 [13108726.326468] Oops: 0002 [#1] SMP [13108726.326497] Modules linked in: nbd isofs xfs loop kpatch_cumulative_81_0_r1(O) xt_physdev nfnetlink_queue bluetooth rfkill ip6table_nat nf_nat_ipv6 ip_vs_wrr ip_vs_wlc ip_vs_sh nf_conntrack_netlink ip_vs_sed ip_vs_pe_sip nf_conntrack_sip ip_vs_nq ip_vs_lc ip_vs_lblcr ip_vs_lblc ip_vs_ftp ip_vs_dh nf_nat_ftp nf_conntrack_ftp iptable_raw xt_recent nf_log_ipv6 xt_hl ip6t_rt nf_log_ipv4 nf_log_common xt_LOG xt_limit xt_TCPMSS xt_tcpmss vxlan ip6_udp_tunnel udp_tunnel xt_statistic xt_NFLOG nfnetlink_log dummy xt_mark xt_REDIRECT nf_nat_redirect raw_diag udp_diag tcp_diag inet_diag netlink_diag af_packet_diag unix_diag rpcsec_gss_krb5 xt_addrtype ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 ebtable_nat ebtable_broute nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_mangle ip6table_raw nfsv4 [13108726.327173] dns_resolver cls_u32 binfmt_misc arptable_filter arp_tables ip6table_filter ip6_tables devlink fuse_kio_pcs ipt_MASQUERADE nf_nat_masquerade_ipv4 xt_nat iptable_nat nf_nat_ipv4 xt_comment nf_conntrack_ipv4 nf_defrag_ipv4 xt_wdog_tmo xt_multiport bonding xt_set xt_conntrack iptable_filter iptable_mangle kpatch(O) ebtable_filter ebt_among ebtables ip_set_hash_ip ip_set nfnetlink vfat fat skx_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass fuse pcspkr ses enclosure joydev sg mei_me hpwdt hpilo lpc_ich mei ipmi_si shpchp ipmi_devintf ipmi_msghandler xt_ipvs acpi_power_meter ip_vs_rr nfsv3 nfsd auth_rpcgss nfs_acl nfs lockd grace fscache nf_nat cls_fw sch_htb sch_cbq sch_sfq ip_vs em_u32 nf_conntrack tun br_netfilter veth overlay ip6_vzprivnet ip6_vznetstat ip_vznetstat [13108726.327817] ip_vzprivnet vziolimit vzevent vzlist vzstat vznetstat vznetdev vzmon vzdev bridge pio_kaio pio_nfs pio_direct pfmt_raw pfmt_ploop1 ploop ip_tables ext4 mbcache jbd2 sd_mod crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper scsi_transport_iscsi 8021q syscopyarea sysfillrect garp sysimgblt fb_sys_fops mrp stp ttm llc bnx2x crct10dif_pclmul crct10dif_common crc32_pclmul crc32c_intel drm dm_multipath ghash_clmulni_intel uas aesni_intel lrw gf128mul glue_helper ablk_helper cryptd tg3 smartpqi scsi_transport_sas mdio libcrc32c i2c_core usb_storage ptp pps_core wmi sunrpc dm_mirror dm_region_hash dm_log dm_mod [last unloaded: kpatch_cumulative_82_0_r1] [13108726.328403] CPU: 35 PID: 63742 Comm: nfsd ve: 51332 Kdump: loaded Tainted: G W O ------------ 3.10.0-862.20.2.vz7.73.29 #1 73.29 [13108726.328491] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 10/02/2018 [13108726.328554] task: ffffa0a6a41b1160 ti: ffffa0c2a74bc000 task.ti: ffffa0c2a74bc000 [13108726.328610] RIP: 0010:[] [] svcauth_unix_set_client+0x2ab/0x520 [sunrpc] [13108726.328706] RSP: 0018:ffffa0c2a74bfd80 EFLAGS: 00010246 [13108726.328750] RAX: 0000000000000001 RBX: ffffa0a6183ae000 RCX: 0000000000000000 [13108726.328811] RDX: 0000000000000074 RSI: 0000000000000286 RDI: ffffa0c2a74bfcf0 [13108726.328864] RBP: ffffa0c2a74bfe00 R08: ffffa0bab8c22960 R09: 0000000000000001 [13108726.328916] R10: 0000000000000001 R11: 0000000000000001 R12: ffffa0a32aa7f000 [13108726.328969] R13: ffffa0a6183afac0 R14: ffffa0c233d88d00 R15: ffffa0c2a74bfdb4 [13108726.329022] FS: 0000000000000000(0000) GS:ffffa0e17f9c0000(0000) knlGS:0000000000000000 [13108726.329081] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [13108726.332311] CR2: 0000000000000074 CR3: 00000026a1b28000 CR4: 00000000007607e0 [13108726.334606] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [13108726.336754] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [13108726.338908] PKRU: 00000000 [13108726.341047] Call Trace: [13108726.343074] [] ? groups_alloc+0x34/0x110 [13108726.344837] [] svc_set_client+0x24/0x30 [sunrpc] [13108726.346631] [] svc_process_common+0x241/0x710 [sunrpc] [13108726.348332] [] svc_process+0x103/0x190 [sunrpc] [13108726.350016] [] nfsd+0xdf/0x150 [nfsd] [13108726.351735] [] ? nfsd_destroy+0x80/0x80 [nfsd] [13108726.353459] [] kthread+0xd1/0xe0 [13108726.355195] [] ? create_kthread+0x60/0x60 [13108726.356896] [] ret_from_fork_nospec_begin+0x7/0x21 [13108726.358577] [] ? create_kthread+0x60/0x60 [13108726.360240] Code: 4c 8b 45 98 0f 8e 2e 01 00 00 83 f8 fe 0f 84 76 fe ff ff 85 c0 0f 85 2b 01 00 00 49 8b 50 40 b8 01 00 00 00 48 89 93 d0 1a 00 00 0f c1 02 83 c0 01 83 f8 01 0f 8e 53 02 00 00 49 8b 44 24 38 [13108726.363769] RIP [] svcauth_unix_set_client+0x2ab/0x520 [sunrpc] [13108726.365530] RSP [13108726.367179] CR2: 0000000000000074 Fixes: d58431eacb22 ("sunrpc: don't mark uninitialised items as VALID.") Signed-off-by: Pavel Tikhomirov Acked-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/cache.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index a349094f6fb7..f740cb51802a 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -53,9 +53,6 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } -static inline int cache_is_valid(struct cache_head *h); -static void cache_fresh_locked(struct cache_head *head, time_t expiry, - struct cache_detail *detail); static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail); @@ -105,9 +102,6 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init_rcu(&tmp->cache_list); detail->entries --; - if (cache_is_valid(tmp) == -EAGAIN) - set_bit(CACHE_NEGATIVE, &tmp->flags); - cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; } From c87a84c5ebcc4d94146bba3360d3ae26ea93b25c Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Wed, 2 Oct 2019 12:07:56 -0700 Subject: [PATCH 135/191] arm64: dts: qcom: msm8998-clamshell: Remove retention idle state commit b40dd23f9a8987c8336df0a00e33f52b1f3f19ad upstream. The retention idle state does not appear to be supported by the firmware present on the msm8998 laptops since the state is advertised as disabled in ACPI, and attempting to enable the state in DT is observed to result in boot hangs. Therefore, remove the state from use to address the observed issues. Reviewed-by: Amit Kucheria Fixes: 2c6d2d3a580a (arm64: dts: qcom: Add Lenovo Miix 630) Signed-off-by: Jeffrey Hugo Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- .../boot/dts/qcom/msm8998-clamshell.dtsi | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi index 9682d4dd7496..1bae90705746 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi @@ -23,6 +23,43 @@ }; }; +/* + * The laptop FW does not appear to support the retention state as it is + * not advertised as enabled in ACPI, and enabling it in DT can cause boot + * hangs. + */ +&CPU0 { + cpu-idle-states = <&LITTLE_CPU_SLEEP_1>; +}; + +&CPU1 { + cpu-idle-states = <&LITTLE_CPU_SLEEP_1>; +}; + +&CPU2 { + cpu-idle-states = <&LITTLE_CPU_SLEEP_1>; +}; + +&CPU3 { + cpu-idle-states = <&LITTLE_CPU_SLEEP_1>; +}; + +&CPU4 { + cpu-idle-states = <&BIG_CPU_SLEEP_1>; +}; + +&CPU5 { + cpu-idle-states = <&BIG_CPU_SLEEP_1>; +}; + +&CPU6 { + cpu-idle-states = <&BIG_CPU_SLEEP_1>; +}; + +&CPU7 { + cpu-idle-states = <&BIG_CPU_SLEEP_1>; +}; + &qusb2phy { status = "okay"; From af8547549041936848c96bc3d5fddcd33ba9c0d2 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 9 Oct 2019 15:11:37 -0400 Subject: [PATCH 136/191] nfsd4: fix up replay_matches_cache() commit 6e73e92b155c868ff7fce9d108839668caf1d9be upstream. When running an nfs stress test, I see quite a few cached replies that don't match up with the actual request. The first comment in replay_matches_cache() makes sense, but the code doesn't seem to match... fix it. This isn't exactly a bugfix, as the server isn't required to catch every case of a false retry. So, we may as well do this, but if this is fixing a problem then that suggests there's a client bug. Fixes: 53da6a53e1d4 ("nfsd4: catch some false session retries") Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c65aeaa812d4..08f6eb2b73f8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3548,12 +3548,17 @@ static bool replay_matches_cache(struct svc_rqst *rqstp, (bool)seq->cachethis) return false; /* - * If there's an error than the reply can have fewer ops than - * the call. But if we cached a reply with *more* ops than the - * call you're sending us now, then this new call is clearly not - * really a replay of the old one: + * If there's an error then the reply can have fewer ops than + * the call. */ - if (slot->sl_opcnt < argp->opcnt) + if (slot->sl_opcnt < argp->opcnt && !slot->sl_status) + return false; + /* + * But if we cached a reply with *more* ops than the call you're + * sending us now, then this new call is clearly not really a + * replay of the old one: + */ + if (slot->sl_opcnt > argp->opcnt) return false; /* This is the only check explicitly called by spec: */ if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) From 0e9d2e5f7141cb48a52063a56f73aaa56652ec6b Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 4 Nov 2019 13:32:57 +1100 Subject: [PATCH 137/191] powerpc: Chunk calls to flush_dcache_range in arch_*_memory commit 076265907cf9633bbef861c7c2a1c26a8209f283 upstream. When presented with large amounts of memory being hotplugged (in my test case, ~890GB), the call to flush_dcache_range takes a while (~50 seconds), triggering RCU stalls. This patch breaks up the call into 1GB chunks, calling cond_resched() inbetween to allow the scheduler to run. Fixes: fb5924fddf9e ("powerpc/mm: Flush cache on memory hot(un)plug") Signed-off-by: Alastair D'Silva Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191104023305.9581-6-alastair@au1.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/mem.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 97e5922cb52e..460afa415434 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -104,6 +104,27 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } +#define FLUSH_CHUNK_SIZE SZ_1G +/** + * flush_dcache_range_chunked(): Write any modified data cache blocks out to + * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE + * Does not invalidate the corresponding instruction cache blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) + * @chunk: the max size of the chunks + */ +static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, + unsigned long chunk) +{ + unsigned long i; + + for (i = start; i < stop; i += chunk) { + flush_dcache_range(i, min(stop, start + chunk)); + cond_resched(); + } +} + int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_restrictions *restrictions) { @@ -120,7 +141,8 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, start, start + size, rc); return -EFAULT; } - flush_dcache_range(start, start + size); + + flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); return __add_pages(nid, start_pfn, nr_pages, restrictions); } @@ -136,7 +158,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); - flush_dcache_range(start, start + size); + flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); + ret = remove_section_mapping(start, start + size); WARN_ON_ONCE(ret); From 440d3e7beb60a19d82d75692a5f44c5d20a84923 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 7 Nov 2019 22:28:11 +0800 Subject: [PATCH 138/191] HID: i2c-hid: Reset ALPS touchpads on resume commit fd70466d37bf3fe0118d18c56ddde85b428f86cf upstream. Commit 52cf93e63ee6 ("HID: i2c-hid: Don't reset device upon system resume") fixes many touchpads and touchscreens, however ALPS touchpads start to trigger IRQ storm after system resume. Since it's total silence from ALPS, let's bring the old behavior back to ALPS touchpads. Fixes: 52cf93e63ee6 ("HID: i2c-hid: Don't reset device upon system resume") Signed-off-by: Kai-Heng Feng Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid-core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 7608ee053114..ac44bf752ff1 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -48,6 +48,7 @@ #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV BIT(0) #define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET BIT(1) #define I2C_HID_QUIRK_BOGUS_IRQ BIT(4) +#define I2C_HID_QUIRK_RESET_ON_RESUME BIT(5) /* flags */ #define I2C_HID_STARTED 0 @@ -174,6 +175,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, { USB_VENDOR_ID_ELAN, HID_ANY_ID, I2C_HID_QUIRK_BOGUS_IRQ }, + { USB_VENDOR_ID_ALPS_JP, HID_ANY_ID, + I2C_HID_QUIRK_RESET_ON_RESUME }, { 0, 0 } }; @@ -1214,8 +1217,15 @@ static int i2c_hid_resume(struct device *dev) * solves "incomplete reports" on Raydium devices 2386:3118 and * 2386:4B33 and fixes various SIS touchscreens no longer sending * data after a suspend/resume. + * + * However some ALPS touchpads generate IRQ storm without reset, so + * let's still reset them here. */ - ret = i2c_hid_set_power(client, I2C_HID_PWR_ON); + if (ihid->quirks & I2C_HID_QUIRK_RESET_ON_RESUME) + ret = i2c_hid_hwreset(client); + else + ret = i2c_hid_set_power(client, I2C_HID_PWR_ON); + if (ret) return ret; From 3b91237c52549da9890b69b751d96042c7c2dcdf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Nov 2019 08:45:23 -0800 Subject: [PATCH 139/191] net/sched: annotate lockless accesses to qdisc->empty commit 90b2be27bb0e56483f335cc10fb59ec66882b949 upstream. KCSAN reported the following race [1] BUG: KCSAN: data-race in __dev_queue_xmit / net_tx_action read to 0xffff8880ba403508 of 1 bytes by task 21814 on cpu 1: __dev_xmit_skb net/core/dev.c:3389 [inline] __dev_queue_xmit+0x9db/0x1b40 net/core/dev.c:3761 dev_queue_xmit+0x21/0x30 net/core/dev.c:3825 neigh_hh_output include/net/neighbour.h:500 [inline] neigh_output include/net/neighbour.h:509 [inline] ip6_finish_output2+0x873/0xec0 net/ipv6/ip6_output.c:116 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 dst_output include/net/dst.h:436 [inline] ip6_local_out+0x74/0x90 net/ipv6/output_core.c:179 ip6_send_skb+0x53/0x110 net/ipv6/ip6_output.c:1795 udp_v6_send_skb.isra.0+0x3ec/0xa70 net/ipv6/udp.c:1173 udpv6_sendmsg+0x1906/0x1c20 net/ipv6/udp.c:1471 inet6_sendmsg+0x6d/0x90 net/ipv6/af_inet6.c:576 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 ___sys_sendmsg+0x2b7/0x5d0 net/socket.c:2311 __sys_sendmmsg+0x123/0x350 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg net/socket.c:2439 [inline] __x64_sys_sendmmsg+0x64/0x80 net/socket.c:2439 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 write to 0xffff8880ba403508 of 1 bytes by interrupt on cpu 0: qdisc_run_begin include/net/sch_generic.h:160 [inline] qdisc_run include/net/pkt_sched.h:120 [inline] net_tx_action+0x2b1/0x6c0 net/core/dev.c:4551 __do_softirq+0x115/0x33f kernel/softirq.c:292 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082 do_softirq.part.0+0x6b/0x80 kernel/softirq.c:337 do_softirq kernel/softirq.c:329 [inline] __local_bh_enable_ip+0x76/0x80 kernel/softirq.c:189 local_bh_enable include/linux/bottom_half.h:32 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:688 [inline] ip6_finish_output2+0x7bb/0xec0 net/ipv6/ip6_output.c:117 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 dst_output include/net/dst.h:436 [inline] ip6_local_out+0x74/0x90 net/ipv6/output_core.c:179 ip6_send_skb+0x53/0x110 net/ipv6/ip6_output.c:1795 udp_v6_send_skb.isra.0+0x3ec/0xa70 net/ipv6/udp.c:1173 udpv6_sendmsg+0x1906/0x1c20 net/ipv6/udp.c:1471 inet6_sendmsg+0x6d/0x90 net/ipv6/af_inet6.c:576 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 ___sys_sendmsg+0x2b7/0x5d0 net/socket.c:2311 __sys_sendmmsg+0x123/0x350 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg net/socket.c:2439 [inline] __x64_sys_sendmmsg+0x64/0x80 net/socket.c:2439 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 21817 Comm: syz-executor.2 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: d518d2ed8640 ("net/sched: fix race between deactivation and dequeue for NOLOCK qdisc") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Paolo Abeni Cc: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sch_generic.h | 6 +++--- net/core/dev.c | 2 +- net/sched/sch_generic.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 47e61956168d..32e418dba133 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -149,8 +149,8 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) static inline bool qdisc_is_empty(const struct Qdisc *qdisc) { if (qdisc_is_percpu_stats(qdisc)) - return qdisc->empty; - return !qdisc->q.qlen; + return READ_ONCE(qdisc->empty); + return !READ_ONCE(qdisc->q.qlen); } static inline bool qdisc_run_begin(struct Qdisc *qdisc) @@ -158,7 +158,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) if (qdisc->flags & TCQ_F_NOLOCK) { if (!spin_trylock(&qdisc->seqlock)) return false; - qdisc->empty = false; + WRITE_ONCE(qdisc->empty, false); } else if (qdisc_is_running(qdisc)) { return false; } diff --git a/net/core/dev.c b/net/core/dev.c index 046307445ece..3e11c6bb4dd6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3386,7 +3386,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && + if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) && qdisc_run_begin(q)) { if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8769b4b8807d..7c3c5fdb82a9 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -657,7 +657,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) if (likely(skb)) { qdisc_update_stats_at_dequeue(qdisc, skb); } else { - qdisc->empty = true; + WRITE_ONCE(qdisc->empty, true); } return skb; From 1e102037f87b9ace4b8531bce1b97b7d0f83fcd7 Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Wed, 13 Nov 2019 12:29:50 +0300 Subject: [PATCH 140/191] kernel/module.c: wakeup processes in module_wq on module unload commit 5d603311615f612320bb77bd2a82553ef1ced5b7 upstream. Fix the race between load and unload a kernel module. sys_delete_module() try_stop_module() mod->state = _GOING add_unformed_module() old = find_module_all() (old->state == _GOING => wait_event_interruptible()) During pre-condition finished_loading() rets 0 schedule() (never gets waken up later) free_module() mod->state = _UNFORMED list_del_rcu(&mod->list) (dels mod from "modules" list) return The race above leads to modprobe hanging forever on loading a module. Error paths on loading module call wake_up_all(&module_wq) after freeing module, so let's do the same on straight module unload. Fixes: 6e6de3dee51a ("kernel/module.c: Only return -EEXIST for modules that have finished loading") Reviewed-by: Prarit Bhargava Signed-off-by: Konstantin Khorenko Signed-off-by: Jessica Yu Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/module.c b/kernel/module.c index ff2d7359a418..cb09a5f37a5f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1033,6 +1033,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); free_module(mod); + /* someone could wait for the module in add_unformed_module() */ + wake_up_all(&module_wq); return 0; out: mutex_unlock(&module_mutex); From 58501792851d3c1ed64a8df98933ebf233b2e2bf Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Thu, 14 Nov 2019 15:16:24 +0800 Subject: [PATCH 141/191] ACPI: sysfs: Change ACPI_MASKABLE_GPE_MAX to 0x100 commit a7583e72a5f22470d3e6fd3b6ba912892242339f upstream. The commit 0f27cff8597d ("ACPI: sysfs: Make ACPI GPE mask kernel parameter cover all GPEs") says: "Use a bitmap of size 0xFF instead of a u64 for the GPE mask so 256 GPEs can be masked" But the masking of GPE 0xFF it not supported and the check condition "gpe > ACPI_MASKABLE_GPE_MAX" is not valid because the type of gpe is u8. So modify the macro ACPI_MASKABLE_GPE_MAX to 0x100, and drop the "gpe > ACPI_MASKABLE_GPE_MAX" check. In addition, update the docs "Format" for acpi_mask_gpe parameter. Fixes: 0f27cff8597d ("ACPI: sysfs: Make ACPI GPE mask kernel parameter cover all GPEs") Signed-off-by: Yunfeng Ye [ rjw: Use u16 as gpe data type in acpi_gpe_apply_masked_gpes() ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 2 +- drivers/acpi/sysfs.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f5a551e4332d..5594c8bf1dcd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -113,7 +113,7 @@ the GPE dispatcher. This facility can be used to prevent such uncontrolled GPE floodings. - Format: + Format: acpi_no_auto_serialize [HW,ACPI] Disable auto-serialization of AML methods diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 75948a3f1a20..c60d2c6d31d6 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -819,14 +819,14 @@ end: * interface: * echo unmask > /sys/firmware/acpi/interrupts/gpe00 */ -#define ACPI_MASKABLE_GPE_MAX 0xFF +#define ACPI_MASKABLE_GPE_MAX 0x100 static DECLARE_BITMAP(acpi_masked_gpes_map, ACPI_MASKABLE_GPE_MAX) __initdata; static int __init acpi_gpe_set_masked_gpes(char *val) { u8 gpe; - if (kstrtou8(val, 0, &gpe) || gpe > ACPI_MASKABLE_GPE_MAX) + if (kstrtou8(val, 0, &gpe)) return -EINVAL; set_bit(gpe, acpi_masked_gpes_map); @@ -838,7 +838,7 @@ void __init acpi_gpe_apply_masked_gpes(void) { acpi_handle handle; acpi_status status; - u8 gpe; + u16 gpe; for_each_set_bit(gpe, acpi_masked_gpes_map, ACPI_MASKABLE_GPE_MAX) { status = acpi_get_gpe_device(gpe, &handle); From 632a300260a852069784802b7a33e954ed1dc31f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 14 Nov 2019 16:25:38 +0200 Subject: [PATCH 142/191] perf callchain: Fix segfault in thread__resolve_callchain_sample() commit aceb98261ea7d9fe38f9c140c5531f0b13623832 upstream. Do not dereference 'chain' when it is NULL. $ perf record -e intel_pt//u -e branch-misses:u uname $ perf report --itrace=l --branch-history perf: Segmentation fault Fixes: e9024d519d89 ("perf callchain: Honour the ordering of PERF_CONTEXT_{USER,KERNEL,etc}") Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191114142538.4097-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/machine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 70a9f8716a4b..888814df758d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2403,7 +2403,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: - if (callchain_param.order != ORDER_CALLEE) { + if (chain && callchain_param.order != ORDER_CALLEE) { err = find_prev_cpumode(chain, thread, cursor, parent, root_al, &cpumode, chain->nr - first_call); if (err) From 6f2c72738dce49a62a69e81ec1ceeab16d23eec3 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 20 Nov 2019 14:10:16 +0800 Subject: [PATCH 143/191] iommu/vt-d: Remove incorrect PSI capability check commit f81b846dcd9a1e6d120f73970a9a98b7fcaaffba upstream. The PSI (Page Selective Invalidation) bit in the capability register is only valid for second-level translation. Intel IOMMU supporting scalable mode must support page/address selective IOTLB invalidation for first-level translation. Remove the PSI capability check in SVA cache invalidation code. Fixes: 8744daf4b0699 ("iommu/vt-d: Remove global page flush support") Cc: Jacob Pan Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-svm.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 9b159132405d..dca88f9fdf29 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -104,11 +104,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d { struct qi_desc desc; - /* - * Do PASID granu IOTLB invalidation if page selective capability is - * not available. - */ - if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap)) { + if (pages == -1) { desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | From 6eaf5653ddab9ce8572f50d2e477e8cddba4994d Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Thu, 21 Nov 2019 13:16:56 -0600 Subject: [PATCH 144/191] of: overlay: add_changeset_property() memory leak commit 637392a8506a3a7dd24ab9094a14f7522adb73b4 upstream. No changeset entries are created for #address-cells and #size-cells properties, but the duplicated properties are never freed. This results in a memory leak which is detected by kmemleak: unreferenced object 0x85887180 (size 64): backtrace: kmem_cache_alloc_trace+0x1fb/0x1fc __of_prop_dup+0x25/0x7c add_changeset_property+0x17f/0x370 build_changeset_next_level+0x29/0x20c of_overlay_fdt_apply+0x32b/0x6b4 ... Fixes: 6f75118800ac ("of: overlay: validate overlay properties #address-cells and #size-cells") Reported-by: Vincent Whitchurch Signed-off-by: Frank Rowand Tested-by: Vincent Whitchurch Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/overlay.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index c423e94baf0f..9617b7df7c4d 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -305,7 +305,6 @@ static int add_changeset_property(struct overlay_changeset *ovcs, { struct property *new_prop = NULL, *prop; int ret = 0; - bool check_for_non_overlay_node = false; if (target->in_livetree) if (!of_prop_cmp(overlay_prop->name, "name") || @@ -318,6 +317,25 @@ static int add_changeset_property(struct overlay_changeset *ovcs, else prop = NULL; + if (prop) { + if (!of_prop_cmp(prop->name, "#address-cells")) { + if (!of_prop_val_eq(prop, overlay_prop)) { + pr_err("ERROR: changing value of #address-cells is not allowed in %pOF\n", + target->np); + ret = -EINVAL; + } + return ret; + + } else if (!of_prop_cmp(prop->name, "#size-cells")) { + if (!of_prop_val_eq(prop, overlay_prop)) { + pr_err("ERROR: changing value of #size-cells is not allowed in %pOF\n", + target->np); + ret = -EINVAL; + } + return ret; + } + } + if (is_symbols_prop) { if (prop) return -EINVAL; @@ -330,33 +348,18 @@ static int add_changeset_property(struct overlay_changeset *ovcs, return -ENOMEM; if (!prop) { - check_for_non_overlay_node = true; if (!target->in_livetree) { new_prop->next = target->np->deadprops; target->np->deadprops = new_prop; } ret = of_changeset_add_property(&ovcs->cset, target->np, new_prop); - } else if (!of_prop_cmp(prop->name, "#address-cells")) { - if (!of_prop_val_eq(prop, new_prop)) { - pr_err("ERROR: changing value of #address-cells is not allowed in %pOF\n", - target->np); - ret = -EINVAL; - } - } else if (!of_prop_cmp(prop->name, "#size-cells")) { - if (!of_prop_val_eq(prop, new_prop)) { - pr_err("ERROR: changing value of #size-cells is not allowed in %pOF\n", - target->np); - ret = -EINVAL; - } } else { - check_for_non_overlay_node = true; ret = of_changeset_update_property(&ovcs->cset, target->np, new_prop); } - if (check_for_non_overlay_node && - !of_node_check_flag(target->np, OF_OVERLAY)) + if (!of_node_check_flag(target->np, OF_OVERLAY)) pr_err("WARNING: memory leak will occur if overlay removed, property: %pOF/%s\n", target->np, new_prop->name); From 3d29dc60aac16831b1822c831a7f40b45a9686d9 Mon Sep 17 00:00:00 2001 From: "Paulo Alcantara (SUSE)" Date: Fri, 22 Nov 2019 12:30:53 -0300 Subject: [PATCH 145/191] cifs: Fix potential softlockups while refreshing DFS cache commit 84a1f5b1cc6fd7f6cd99fc5630c36f631b19fa60 upstream. We used to skip reconnects on all SMB2_IOCTL commands due to SMB3+ FSCTL_VALIDATE_NEGOTIATE_INFO - which made sense since we're still establishing a SMB session. However, when refresh_cache_worker() calls smb2_get_dfs_refer() and we're under reconnect, SMB2_ioctl() will not be able to get a proper status error (e.g. -EHOSTDOWN in case we failed to reconnect) but an -EAGAIN from cifs_send_recv() thus looping forever in refresh_cache_worker(). Fixes: e99c63e4d86d ("SMB3: Fix deadlock in validate negotiate hits reconnect") Signed-off-by: Paulo Alcantara (SUSE) Suggested-by: Aurelien Aptel Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c985caa2d955..e1d8cec6ba2e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -252,7 +252,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) if (tcon == NULL) return 0; - if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL) + if (smb2_command == SMB2_TREE_CONNECT) return 0; if (tcon->tidStatus == CifsExiting) { @@ -426,16 +426,9 @@ fill_small_buf(__le16 smb2_command, struct cifs_tcon *tcon, void *buf, * SMB information in the SMB header. If the return code is zero, this * function must have filled in request_buf pointer. */ -static int -smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, - void **request_buf, unsigned int *total_len) +static int __smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, + void **request_buf, unsigned int *total_len) { - int rc; - - rc = smb2_reconnect(smb2_command, tcon); - if (rc) - return rc; - /* BB eventually switch this to SMB2 specific small buf size */ if (smb2_command == SMB2_SET_INFO) *request_buf = cifs_buf_get(); @@ -456,7 +449,31 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, cifs_stats_inc(&tcon->num_smbs_sent); } - return rc; + return 0; +} + +static int smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, + void **request_buf, unsigned int *total_len) +{ + int rc; + + rc = smb2_reconnect(smb2_command, tcon); + if (rc) + return rc; + + return __smb2_plain_req_init(smb2_command, tcon, request_buf, + total_len); +} + +static int smb2_ioctl_req_init(u32 opcode, struct cifs_tcon *tcon, + void **request_buf, unsigned int *total_len) +{ + /* Skip reconnect only for FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs */ + if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) { + return __smb2_plain_req_init(SMB2_IOCTL, tcon, request_buf, + total_len); + } + return smb2_plain_req_init(SMB2_IOCTL, tcon, request_buf, total_len); } /* For explanation of negotiate contexts see MS-SMB2 section 2.2.3.1 */ @@ -2661,7 +2678,7 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, int rc; char *in_data_buf; - rc = smb2_plain_req_init(SMB2_IOCTL, tcon, (void **) &req, &total_len); + rc = smb2_ioctl_req_init(opcode, tcon, (void **) &req, &total_len); if (rc) return rc; From 7da501c21a4aab22956c9323e04453a7ebe329d2 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 25 Nov 2019 23:54:09 +0800 Subject: [PATCH 146/191] firmware: arm_scmi: Avoid double free in error flow commit 8305e90a894f82c278c17e51a28459deee78b263 upstream. If device_register() fails, both put_device() and kfree() are called, ending with a double free of the scmi_dev. Calling kfree() is needed only when a failure happens between the allocation of the scmi_dev and its registration, so move it to there and remove it from the error flow. Fixes: 46edb8d1322c ("firmware: arm_scmi: provide the mandatory device release callback") Signed-off-by: Wen Yang Signed-off-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/arm_scmi/bus.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 92f843eaf1e0..7a30952b463d 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -135,8 +135,10 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol) return NULL; id = ida_simple_get(&scmi_bus_id, 1, 0, GFP_KERNEL); - if (id < 0) - goto free_mem; + if (id < 0) { + kfree(scmi_dev); + return NULL; + } scmi_dev->id = id; scmi_dev->protocol_id = protocol; @@ -154,8 +156,6 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol) put_dev: put_device(&scmi_dev->dev); ida_simple_remove(&scmi_bus_id, id); -free_mem: - kfree(scmi_dev); return NULL; } From 5c440a4d1f3a088ff0923e3853829e0a7a9b610f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 26 Nov 2019 16:58:08 -0800 Subject: [PATCH 147/191] xfs: don't check for AG deadlock for realtime files in bunmapi commit 69ffe5960df16938bccfe1b65382af0b3de51265 upstream. Commit 5b094d6dac04 ("xfs: fix multi-AG deadlock in xfs_bunmapi") added a check in __xfs_bunmapi() to stop early if we would touch multiple AGs in the wrong order. However, this check isn't applicable for realtime files. In most cases, it just makes us do unnecessary commits. However, without the fix from the previous commit ("xfs: fix realtime file data space leak"), if the last and second-to-last extents also happen to have different "AG numbers", then the break actually causes __xfs_bunmapi() to return without making any progress, which sends xfs_itruncate_extents_flags() into an infinite loop. Fixes: 5b094d6dac04 ("xfs: fix multi-AG deadlock in xfs_bunmapi") Signed-off-by: Omar Sandoval Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 02469d59c787..3f76da11197c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5300,7 +5300,7 @@ __xfs_bunmapi( * Make sure we don't touch multiple AGF headers out of order * in a single transaction, as that could cause AB-BA deadlocks. */ - if (!wasdel) { + if (!wasdel && !isrt) { agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); if (prev_agno != NULLAGNUMBER && prev_agno > agno) break; From f6cc75f1d705fe420bb95179afad6bfe4e141f21 Mon Sep 17 00:00:00 2001 From: Michael Haener Date: Fri, 29 Nov 2019 10:16:49 +0100 Subject: [PATCH 148/191] platform/x86: pmc_atom: Add Siemens CONNECT X300 to critclk_systems DMI table commit e8796c6c69d129420ee94a1906b18d86b84644d4 upstream. The CONNECT X300 uses the PMC clock for on-board components and gets stuck during boot if the clock is disabled. Therefore, add this device to the critical systems list. Tested on CONNECT X300. Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") Signed-off-by: Michael Haener Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/pmc_atom.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 07d1b911e72f..52ef1419b671 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -429,6 +429,14 @@ static const struct dmi_system_id critclk_systems[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "6AV7882-0"), }, }, + { + .ident = "CONNECT X300", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"), + DMI_MATCH(DMI_PRODUCT_VERSION, "A5E45074588"), + }, + }, + { /*sentinel*/ } }; From 3d8ff70c73b14c12040e593f9aa60ea01238f686 Mon Sep 17 00:00:00 2001 From: Marco Oliverio Date: Mon, 2 Dec 2019 19:54:30 +0100 Subject: [PATCH 149/191] netfilter: nf_queue: enqueue skbs with NULL dst commit 0b9173f4688dfa7c5d723426be1d979c24ce3d51 upstream. Bridge packets that are forwarded have skb->dst == NULL and get dropped by the check introduced by b60a77386b1d4868f72f6353d35dabe5fbe981f2 (net: make skb_dst_force return true when dst is refcounted). To fix this we check skb_dst() before skb_dst_force(), so we don't drop skb packet with dst == NULL. This holds also for skb at the PRE_ROUTING hook so we remove the second check. Fixes: b60a77386b1d ("net: make skb_dst_force return true when dst is refcounted") Signed-off-by: Marco Oliverio Signed-off-by: Rocco Folino Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index a2b58de82600..f8f52ff99cfb 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -189,7 +189,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, goto err; } - if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) { + if (skb_dst(skb) && !skb_dst_force(skb)) { status = -ENETDOWN; goto err; } From bac47f2ee76ad9097ffdf5f08e209ef3952f9771 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 18 Dec 2019 12:18:21 +0300 Subject: [PATCH 150/191] net, sysctl: Fix compiler warning when only cBPF is present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1148f9adbe71415836a18a36c1b4ece999ab0973 upstream. proc_dointvec_minmax_bpf_restricted() has been firstly introduced in commit 2e4a30983b0f ("bpf: restrict access to core bpf sysctls") under CONFIG_HAVE_EBPF_JIT. Then, this ifdef has been removed in ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations"), because a new sysctl, bpf_jit_limit, made use of it. Finally, this parameter has become long instead of integer with fdadd04931c2 ("bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K") and thus, a new proc_dolongvec_minmax_bpf_restricted() has been added. With this last change, we got back to that proc_dointvec_minmax_bpf_restricted() is used only under CONFIG_HAVE_EBPF_JIT, but the corresponding ifdef has not been brought back. So, in configurations like CONFIG_BPF_JIT=y && CONFIG_HAVE_EBPF_JIT=n since v4.20 we have: CC net/core/sysctl_net_core.o net/core/sysctl_net_core.c:292:1: warning: ‘proc_dointvec_minmax_bpf_restricted’ defined but not used [-Wunused-function] 292 | proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Suppress this by guarding it with CONFIG_HAVE_EBPF_JIT again. Fixes: fdadd04931c2 ("bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K") Signed-off-by: Alexander Lobakin Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191218091821.7080-1-alobakin@dlink.ru Signed-off-by: Greg Kroah-Hartman --- net/core/sysctl_net_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index eb29e5adc84d..9f9e00ba3ad7 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -288,6 +288,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, return ret; } +# ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -298,6 +299,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } +# endif /* CONFIG_HAVE_EBPF_JIT */ static int proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, From c5b89e112483b4b9b1fb1825f4c924afc79611bf Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Dec 2019 20:42:59 +0800 Subject: [PATCH 151/191] watchdog: tqmx86_wdt: Fix build error commit 9a6c274ac1c4346f5384f2290caeb42dc674c471 upstream. If TQMX86_WDT is y and WATCHDOG_CORE is m, building fails: drivers/watchdog/tqmx86_wdt.o: In function `tqmx86_wdt_probe': tqmx86_wdt.c:(.text+0x46e): undefined reference to `watchdog_init_timeout' tqmx86_wdt.c:(.text+0x4e0): undefined reference to `devm_watchdog_register_device' Select WATCHDOG_CORE to fix this. Reported-by: Hulk Robot Fixes: e3c21e088f89 ("watchdog: tqmx86: Add watchdog driver for the IO controller") Signed-off-by: YueHaibing Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191206124259.25880-1-yuehaibing@huawei.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 58e7c100b6ad..4c761abc5688 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1444,6 +1444,7 @@ config SMSC37B787_WDT config TQMX86_WDT tristate "TQ-Systems TQMX86 Watchdog Timer" depends on X86 + select WATCHDOG_CORE help This is the driver for the hardware watchdog timer in the TQMX86 IO controller found on some of their ComExpress Modules. From 25b48ee49164de288a4fcdd2031c155da966c39d Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 21 Dec 2019 16:10:49 +0800 Subject: [PATCH 152/191] regulator: axp20x: Fix axp20x_set_ramp_delay commit 71dd2fe5dec171b34b71603a81bb46c24c498fde upstream. Current code set incorrect bits when set ramp_delay for AXP20X_DCDC2, fix it. Fixes: d29f54df8b16 ("regulator: axp20x: add support for set_ramp_delay for AXP209") Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20191221081049.32490-1-axel.lin@ingics.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/axp20x-regulator.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index 989506bd90b1..501cccc806d5 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -413,10 +413,13 @@ static int axp20x_set_ramp_delay(struct regulator_dev *rdev, int ramp) int i; for (i = 0; i < rate_count; i++) { - if (ramp <= slew_rates[i]) - cfg = AXP20X_DCDC2_LDO3_V_RAMP_LDO3_RATE(i); - else + if (ramp > slew_rates[i]) break; + + if (id == AXP20X_DCDC2) + cfg = AXP20X_DCDC2_LDO3_V_RAMP_DCDC2_RATE(i); + else + cfg = AXP20X_DCDC2_LDO3_V_RAMP_LDO3_RATE(i); } if (cfg == 0xff) { From 39add6227821dd1c23b8d221660a8a25f5536501 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 1 Jan 2020 10:24:06 +0800 Subject: [PATCH 153/191] regulator: bd70528: Remove .set_ramp_delay for bd70528_ldo_ops commit 6f1ff76154b8b36033efcbf6453a71a3d28f52cd upstream. The .set_ramp_delay should be for bd70528_buck_ops only. Setting .set_ramp_delay for for bd70528_ldo_ops causes problem because BD70528_MASK_BUCK_RAMP (0x10) overlaps with BD70528_MASK_LDO_VOLT (0x1f). So setting ramp_delay for LDOs may change the voltage output, fix it. Fixes: 99ea37bd1e7d ("regulator: bd70528: Support ROHM BD70528 regulator block") Signed-off-by: Axel Lin Acked-by: Matti Vaittinen Link: https://lore.kernel.org/r/20200101022406.15176-1-axel.lin@ingics.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/bd70528-regulator.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/regulator/bd70528-regulator.c b/drivers/regulator/bd70528-regulator.c index 0248a61f1006..6041839ec38c 100644 --- a/drivers/regulator/bd70528-regulator.c +++ b/drivers/regulator/bd70528-regulator.c @@ -101,7 +101,6 @@ static const struct regulator_ops bd70528_ldo_ops = { .set_voltage_sel = regulator_set_voltage_sel_regmap, .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_time_sel = regulator_set_voltage_time_sel, - .set_ramp_delay = bd70528_set_ramp_delay, }; static const struct regulator_ops bd70528_led_ops = { From d6eeb06587f4053fd586969499af90274f16dabf Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 24 Dec 2019 09:58:23 +0900 Subject: [PATCH 154/191] spi: uniphier: Fix FIFO threshold commit 9cd34efbd3012171c102910ce17ee632a3cccb44 upstream. Rx threshold means the value to inform the receiver when the number of words in Rx FIFO is equal to or more than the value. Similarly, Tx threshold means the value to inform the sender when the number of words in Tx FIFO is equal to or less than the value. The controller triggers the driver to start the transfer. In case of Rx, the driver wants to detect that the specified number of words N are in Rx FIFO, so the value of Rx threshold should be N. In case of Tx, the driver wants to detect that the same number of spaces as Rx are in Tx FIFO, so the value of Tx threshold should be (FIFO size - N). For example, in order for the driver to receive at least 3 words from Rx FIFO, set 3 to Rx threshold. +-+-+-+-+-+-+-+-+ | | | | | |*|*|*| +-+-+-+-+-+-+-+-+ In order for the driver to send at least 3 words to Tx FIFO, because it needs at least 3 spaces, set 8(FIFO size) - 3 = 5 to Tx threshold. +-+-+-+-+-+-+-+-+ |*|*|*|*|*| | | | +-+-+-+-+-+-+-+-+ This adds new function uniphier_spi_set_fifo_threshold() to set threshold value to the register. And more, FIFO counts by 'words', so this renames 'fill_bytes' with 'fill_words', and fixes the calculation using bytes_per_words. Fixes: 37ffab817098 ("spi: uniphier: introduce polling mode") Cc: Keiji Hayashibara Signed-off-by: Kunihiko Hayashi Link: https://lore.kernel.org/r/1577149107-30670-2-git-send-email-hayashi.kunihiko@socionext.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-uniphier.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c index 47cde1864630..ce9b30112e26 100644 --- a/drivers/spi/spi-uniphier.c +++ b/drivers/spi/spi-uniphier.c @@ -290,25 +290,32 @@ static void uniphier_spi_recv(struct uniphier_spi_priv *priv) } } -static void uniphier_spi_fill_tx_fifo(struct uniphier_spi_priv *priv) +static void uniphier_spi_set_fifo_threshold(struct uniphier_spi_priv *priv, + unsigned int threshold) { - unsigned int fifo_threshold, fill_bytes; u32 val; - fifo_threshold = DIV_ROUND_UP(priv->rx_bytes, - bytes_per_word(priv->bits_per_word)); - fifo_threshold = min(fifo_threshold, SSI_FIFO_DEPTH); - - fill_bytes = fifo_threshold - (priv->rx_bytes - priv->tx_bytes); - - /* set fifo threshold */ val = readl(priv->base + SSI_FC); val &= ~(SSI_FC_TXFTH_MASK | SSI_FC_RXFTH_MASK); - val |= FIELD_PREP(SSI_FC_TXFTH_MASK, fifo_threshold); - val |= FIELD_PREP(SSI_FC_RXFTH_MASK, fifo_threshold); + val |= FIELD_PREP(SSI_FC_TXFTH_MASK, SSI_FIFO_DEPTH - threshold); + val |= FIELD_PREP(SSI_FC_RXFTH_MASK, threshold); writel(val, priv->base + SSI_FC); +} - while (fill_bytes--) +static void uniphier_spi_fill_tx_fifo(struct uniphier_spi_priv *priv) +{ + unsigned int fifo_threshold, fill_words; + unsigned int bpw = bytes_per_word(priv->bits_per_word); + + fifo_threshold = DIV_ROUND_UP(priv->rx_bytes, bpw); + fifo_threshold = min(fifo_threshold, SSI_FIFO_DEPTH); + + uniphier_spi_set_fifo_threshold(priv, fifo_threshold); + + fill_words = fifo_threshold - + DIV_ROUND_UP(priv->rx_bytes - priv->tx_bytes, bpw); + + while (fill_words--) uniphier_spi_send(priv); } From cc51beb1637c89b0c81c3548061a689e9d708f18 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 18 Dec 2019 12:47:20 +0800 Subject: [PATCH 155/191] regulator: axp20x: Fix AXP22x ELDO2 regulator enable bitmask commit f40ddaa059fdfb472e3aeb733c6220d8e0633a47 upstream. A copy-paste error was introduced when bitmasks were converted to macros, incorrectly setting the enable bitmask for ELDO2 to the one for ELDO1 for the AXP22x units. Fix it by using the correct macro. On affected boards, ELDO1 and/or ELDO2 are used to power the camera, which is currently unsupported. Fixes: db4a555f7c4c ("regulator: axp20x: use defines for masks") Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20191218044720.21990-1-wens@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/axp20x-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index 501cccc806d5..16f0c8570036 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -608,7 +608,7 @@ static const struct regulator_desc axp22x_regulators[] = { AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_ELDO1_MASK), AXP_DESC(AXP22X, ELDO2, "eldo2", "eldoin", 700, 3300, 100, AXP22X_ELDO2_V_OUT, AXP22X_ELDO2_V_OUT_MASK, - AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_ELDO1_MASK), + AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_ELDO2_MASK), AXP_DESC(AXP22X, ELDO3, "eldo3", "eldoin", 700, 3300, 100, AXP22X_ELDO3_V_OUT, AXP22X_ELDO3_V_OUT_MASK, AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_ELDO3_MASK), From 2fb97a8e25aa54fa436734aa86f41b2813ba33cd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sat, 21 Dec 2019 14:16:54 +1100 Subject: [PATCH 156/191] powerpc/mm: Mark get_slice_psize() & slice_addr_is_low() as notrace commit 91a063c956084fb21cf2523bce6892514e3f1799 upstream. These slice routines are called from the SLB miss handler, which can lead to warnings from the IRQ code, because we have not reconciled the IRQ state properly: WARNING: CPU: 72 PID: 30150 at arch/powerpc/kernel/irq.c:258 arch_local_irq_restore.part.0+0xcc/0x100 Modules linked in: CPU: 72 PID: 30150 Comm: ftracetest Not tainted 5.5.0-rc2-gcc9x-g7e0165b2f1a9 #1 NIP: c00000000001d83c LR: c00000000029ab90 CTR: c00000000026cf90 REGS: c0000007eee3b960 TRAP: 0700 Not tainted (5.5.0-rc2-gcc9x-g7e0165b2f1a9) MSR: 8000000000021033 CR: 22242844 XER: 20000000 CFAR: c00000000001d780 IRQMASK: 0 ... NIP arch_local_irq_restore.part.0+0xcc/0x100 LR trace_graph_entry+0x270/0x340 Call Trace: trace_graph_entry+0x254/0x340 (unreliable) function_graph_enter+0xe4/0x1a0 prepare_ftrace_return+0xa0/0x130 ftrace_graph_caller+0x44/0x94 # (get_slice_psize()) slb_allocate_user+0x7c/0x100 do_slb_fault+0xf8/0x300 instruction_access_slb_common+0x140/0x180 Fixes: 48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191221121337.4894-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/slice.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 42bbcd47cc85..dffe1a45b6ed 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -50,7 +50,7 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) { #endif -static inline bool slice_addr_is_low(unsigned long addr) +static inline notrace bool slice_addr_is_low(unsigned long addr) { u64 tmp = (u64)addr; @@ -659,7 +659,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, mm_ctx_user_psize(¤t->mm->context), 1); } -unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) { unsigned char *psizes; int index, mask_index; From a1221702c41d0e760e4cc00ea12fbf45966b50d8 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 14 Nov 2019 16:01:18 +0100 Subject: [PATCH 157/191] Bluetooth: btusb: fix PM leak in error case of setup commit 3d44a6fd0775e6215e836423e27f8eedf8c871ea upstream. If setup() fails a reference for runtime PM has already been taken. Proper use of the error handling in btusb_open()is needed. You cannot just return. Fixes: ace31982585a3 ("Bluetooth: btusb: Add setup callback for chip init on USB") Signed-off-by: Oliver Neukum Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 23e606aaaea4..04cf767d0708 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1200,7 +1200,7 @@ static int btusb_open(struct hci_dev *hdev) if (data->setup_on_usb) { err = data->setup_on_usb(hdev); if (err < 0) - return err; + goto setup_fail; } data->intf->needs_remote_wakeup = 1; @@ -1239,6 +1239,7 @@ done: failed: clear_bit(BTUSB_INTR_RUNNING, &data->flags); +setup_fail: usb_autopm_put_interface(data->intf); return err; } From 081d57ac233a56fdb5655fb898b681088235fadc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Nov 2019 09:17:05 +0300 Subject: [PATCH 158/191] Bluetooth: delete a stray unlock commit df66499a1fab340c167250a5743931dc50d5f0fa upstream. We used to take a lock in amp_physical_cfm() but then we moved it to the caller function. Unfortunately the unlock on this error path was overlooked so it leads to a double unlock. Fixes: a514b17fab51 ("Bluetooth: Refactor locking in amp_physical_cfm") Signed-off-by: Dan Carpenter Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index da7fdbdf9c41..a845786258a0 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4936,10 +4936,8 @@ void __l2cap_physical_cfm(struct l2cap_chan *chan, int result) BT_DBG("chan %p, result %d, local_amp_id %d, remote_amp_id %d", chan, result, local_amp_id, remote_amp_id); - if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) { - l2cap_chan_unlock(chan); + if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) return; - } if (chan->state != BT_CONNECTED) { l2cap_do_create(chan, result, local_amp_id, remote_amp_id); From 36427907e08613382e469acf096d4efbdbf8bb1a Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 21 Nov 2019 14:20:36 -0600 Subject: [PATCH 159/191] Bluetooth: Fix memory leak in hci_connect_le_scan commit d088337c38a5cd8f0230fbf2d514ff7672f9d0d3 upstream. In the implementation of hci_connect_le_scan() when conn is added via hci_conn_add(), if hci_explicit_conn_params_set() fails the allocated memory for conn is leaked. Use hci_conn_del() to release it. Fixes: f75113a26008 ("Bluetooth: add hci_connect_le_scan") Signed-off-by: Navid Emamdoost Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_conn.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7ff92dd4c53c..87691404d0c6 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1176,8 +1176,10 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, if (!conn) return ERR_PTR(-ENOMEM); - if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) + if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) { + hci_conn_del(conn); return ERR_PTR(-EBUSY); + } conn->state = BT_CONNECT; set_bit(HCI_CONN_SCANNING, &conn->flags); From f05d1ede71b24405ca5c49382bf97382322a4b00 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Mon, 9 Sep 2019 19:01:23 +0400 Subject: [PATCH 160/191] arm64: dts: meson-gxl-s905x-khadas-vim: fix uart_A bluetooth node commit 1c6d575574ec87dbccf7af20ef9dc0df02614069 upstream. Fixes: dd5297cc8b8b ("arm64: dts: meson-gxl-s905x-khadas-vim enable Bluetooth") Signed-off-by: Christian Hewitt Reviewed-by: Kevin Hilman Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts index 2a5cd303123d..8d6f316a5c7b 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts @@ -192,6 +192,9 @@ bluetooth { compatible = "brcm,bcm43438-bt"; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; + max-speed = <2000000>; + clocks = <&wifi32k>; + clock-names = "lpo"; }; }; From 0123e668ca943ac9d9ceb9579f007e4581d92b63 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Mon, 9 Sep 2019 19:01:24 +0400 Subject: [PATCH 161/191] arm64: dts: meson-gxm-khadas-vim2: fix uart_A bluetooth node commit 388a2772979b625042524d8b91280616ab4ff5ee upstream. Fixes: 33344e2111a3 ("arm64: dts: meson-gxm-khadas-vim2: fix Bluetooth support") Signed-off-by: Christian Hewitt Reviewed-by: Kevin Hilman Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts index f25ddd18a607..4d67eb715b91 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts @@ -409,6 +409,9 @@ bluetooth { compatible = "brcm,bcm43438-bt"; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; + max-speed = <2000000>; + clocks = <&wifi32k>; + clock-names = "lpo"; }; }; From 6851d4004b6f770dd9e71b4da61a02d14cbacedb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 25 Oct 2019 15:33:39 +0200 Subject: [PATCH 162/191] media: flexcop-usb: ensure -EIO is returned on error condition commit 74a96b51a36de4d86660fbc56b05d86668162d6b upstream. An earlier commit hard coded a return 0 to function flexcop_usb_i2c_req even though the an -EIO was intended to be returned in the case where ret != buflen. Fix this by replacing the return 0 with the return of ret to return the error return code. Addresses-Coverity: ("Unused value") Fixes: b430eaba0be5 ("[media] flexcop-usb: don't use stack for DMA") Signed-off-by: Colin Ian King Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/b2c2/flexcop-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index d1331f828108..039963a7765b 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -295,7 +295,7 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, mutex_unlock(&fc_usb->data_mutex); - return 0; + return ret; } /* actual bus specific access functions, From 56eb000be1142c3fb3a60d08ffb1cfef49996bc5 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 6 Nov 2019 18:31:24 +0100 Subject: [PATCH 163/191] regulator: ab8500: Remove AB8505 USB regulator commit 99c4f70df3a6446c56ca817c2d0f9c12d85d4e7c upstream. The USB regulator was removed for AB8500 in commit 41a06aa738ad ("regulator: ab8500: Remove USB regulator"). It was then added for AB8505 in commit 547f384f33db ("regulator: ab8500: add support for ab8505"). However, there was never an entry added for it in ab8505_regulator_match. This causes all regulators after it to be initialized with the wrong device tree data, eventually leading to an out-of-bounds array read. Given that it is not used anywhere in the kernel, it seems likely that similar arguments against supporting it exist for AB8505 (it is controlled by hardware). Therefore, simply remove it like for AB8500 instead of adding an entry in ab8505_regulator_match. Fixes: 547f384f33db ("regulator: ab8500: add support for ab8505") Cc: Linus Walleij Signed-off-by: Stephan Gerhold Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20191106173125.14496-1-stephan@gerhold.net Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/ab8500.c | 17 ----------------- include/linux/regulator/ab8500.h | 1 - 2 files changed, 18 deletions(-) diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index efb2f01a9101..f60e1b26c2d2 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -953,23 +953,6 @@ static struct ab8500_regulator_info .update_val_idle = 0x82, .update_val_normal = 0x02, }, - [AB8505_LDO_USB] = { - .desc = { - .name = "LDO-USB", - .ops = &ab8500_regulator_mode_ops, - .type = REGULATOR_VOLTAGE, - .id = AB8505_LDO_USB, - .owner = THIS_MODULE, - .n_voltages = 1, - .volt_table = fixed_3300000_voltage, - }, - .update_bank = 0x03, - .update_reg = 0x82, - .update_mask = 0x03, - .update_val = 0x01, - .update_val_idle = 0x03, - .update_val_normal = 0x01, - }, [AB8505_LDO_AUDIO] = { .desc = { .name = "LDO-AUDIO", diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 7cf8f797e13a..505e94a6e3e8 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -37,7 +37,6 @@ enum ab8505_regulator_id { AB8505_LDO_AUX6, AB8505_LDO_INTCORE, AB8505_LDO_ADC, - AB8505_LDO_USB, AB8505_LDO_AUDIO, AB8505_LDO_ANAMIC1, AB8505_LDO_ANAMIC2, From 3dba6e50d09ee8c05d5ba68bd69624ac1ea0c814 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Wed, 9 Oct 2019 12:01:47 -0300 Subject: [PATCH 164/191] media: usb: fix memory leak in af9005_identify_state commit 2289adbfa559050d2a38bcd9caac1c18b800e928 upstream. In af9005_identify_state when returning -EIO the allocated buffer should be released. Replace the "return -EIO" with assignment into ret and move deb_info() under a check. Fixes: af4e067e1dcf ("V4L/DVB (5625): Add support for the AF9005 demodulator from Afatech") Signed-off-by: Navid Emamdoost Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/af9005.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb/af9005.c b/drivers/media/usb/dvb-usb/af9005.c index 02697d86e8c1..ac93e88d7038 100644 --- a/drivers/media/usb/dvb-usb/af9005.c +++ b/drivers/media/usb/dvb-usb/af9005.c @@ -976,8 +976,9 @@ static int af9005_identify_state(struct usb_device *udev, else if (reply == 0x02) *cold = 0; else - return -EIO; - deb_info("Identify state cold = %d\n", *cold); + ret = -EIO; + if (!ret) + deb_info("Identify state cold = %d\n", *cold); err: kfree(buf); From da9eb04eaa643d795bbf3dc642a8988d3f3c4b6c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 16 Oct 2019 16:56:50 +0200 Subject: [PATCH 165/191] dt-bindings: clock: renesas: rcar-usb2-clock-sel: Fix typo in example commit 830dbce7c76ea529decac7d23b808c1e7da3d891 upstream. The documented compatible value for R-Car H3 is "renesas,r8a7795-rcar-usb2-clock-sel", not "renesas,r8a77950-rcar-usb2-clock-sel". Fixes: 311accb64570db45 ("clk: renesas: rcar-usb2-clock-sel: Add R-Car USB 2.0 clock selector PHY") Signed-off-by: Geert Uytterhoeven Reviewed-by: Yoshihiro Shimoda Acked-by: Rob Herring Link: https://lore.kernel.org/r/20191016145650.30003-1-geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt index e96e085271c1..83f6c6a7c41c 100644 --- a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt +++ b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt @@ -46,7 +46,7 @@ Required properties: Example (R-Car H3): usb2_clksel: clock-controller@e6590630 { - compatible = "renesas,r8a77950-rcar-usb2-clock-sel", + compatible = "renesas,r8a7795-rcar-usb2-clock-sel", "renesas,rcar-gen3-usb2-clock-sel"; reg = <0 0xe6590630 0 0x02>; clocks = <&cpg CPG_MOD 703>, <&usb_extal>, <&usb_xtal>; From b3476cfa23804c03feb8047132f3d42e9f3d4456 Mon Sep 17 00:00:00 2001 From: Anand Moon Date: Mon, 2 Sep 2019 05:49:35 +0000 Subject: [PATCH 166/191] arm64: dts: meson: odroid-c2: Disable usb_otg bus to avoid power failed warning commit 72c9b5f6f75fbc6c47e0a2d02bc3838a2a47c90a upstream. usb_otg bus needs to get initialize from the u-boot to be configured to used as power source to SBC or usb otg port will get configured as host device. Right now this support is missing in the u-boot and phy driver so to avoid power failed warning, we would disable this feature until proper fix is found. [ 2.716048] phy phy-c0000000.phy.0: USB ID detect failed! [ 2.720186] phy phy-c0000000.phy.0: phy poweron failed --> -22 [ 2.726001] ------------[ cut here ]------------ [ 2.730583] WARNING: CPU: 0 PID: 12 at drivers/regulator/core.c:2039 _regulator_put+0x3c/0xe8 [ 2.738983] Modules linked in: [ 2.742005] CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.2.9-1-ARCH #1 [ 2.748643] Hardware name: Hardkernel ODROID-C2 (DT) [ 2.753566] Workqueue: events deferred_probe_work_func [ 2.758649] pstate: 60000005 (nZCv daif -PAN -UAO) [ 2.763394] pc : _regulator_put+0x3c/0xe8 [ 2.767361] lr : _regulator_put+0x3c/0xe8 [ 2.771326] sp : ffff000011aa3a50 [ 2.774604] x29: ffff000011aa3a50 x28: ffff80007ed1b600 [ 2.779865] x27: ffff80007f7036a8 x26: ffff80007f7036a8 [ 2.785126] x25: 0000000000000000 x24: ffff000011a44458 [ 2.790387] x23: ffff000011344218 x22: 0000000000000009 [ 2.795649] x21: ffff000011aa3b68 x20: ffff80007ed1b500 [ 2.800910] x19: ffff80007ed1b500 x18: 0000000000000010 [ 2.806171] x17: 000000005be5943c x16: 00000000f1c73b29 [ 2.811432] x15: ffffffffffffffff x14: ffff0000117396c8 [ 2.816694] x13: ffff000091aa37a7 x12: ffff000011aa37af [ 2.821955] x11: ffff000011763000 x10: ffff000011aa3730 [ 2.827216] x9 : 00000000ffffffd0 x8 : ffff000010871760 [ 2.832477] x7 : 00000000000000d0 x6 : ffff0000119d151b [ 2.837739] x5 : 000000000000000f x4 : 0000000000000000 [ 2.843000] x3 : 0000000000000000 x2 : 38104b2678c20100 [ 2.848261] x1 : 0000000000000000 x0 : 0000000000000024 [ 2.853523] Call trace: [ 2.855940] _regulator_put+0x3c/0xe8 [ 2.859562] regulator_put+0x34/0x48 [ 2.863098] regulator_bulk_free+0x40/0x58 [ 2.867153] devm_regulator_bulk_release+0x24/0x30 [ 2.871896] release_nodes+0x1f0/0x2e0 [ 2.875604] devres_release_all+0x64/0xa4 [ 2.879571] really_probe+0x1c8/0x3e0 [ 2.883194] driver_probe_device+0xe4/0x138 [ 2.887334] __device_attach_driver+0x90/0x110 [ 2.891733] bus_for_each_drv+0x8c/0xd8 [ 2.895527] __device_attach+0xdc/0x160 [ 2.899322] device_initial_probe+0x24/0x30 [ 2.903463] bus_probe_device+0x9c/0xa8 [ 2.907258] deferred_probe_work_func+0xa0/0xf0 [ 2.911745] process_one_work+0x1b4/0x408 [ 2.915711] worker_thread+0x54/0x4b8 [ 2.919334] kthread+0x12c/0x130 [ 2.922526] ret_from_fork+0x10/0x1c [ 2.926060] ---[ end trace 51a68f4c0035d6c0 ]--- [ 2.930691] ------------[ cut here ]------------ [ 2.935242] WARNING: CPU: 0 PID: 12 at drivers/regulator/core.c:2039 _regulator_put+0x3c/0xe8 [ 2.943653] Modules linked in: [ 2.946675] CPU: 0 PID: 12 Comm: kworker/0:1 Tainted: G W 5.2.9-1-ARCH #1 [ 2.954694] Hardware name: Hardkernel ODROID-C2 (DT) [ 2.959613] Workqueue: events deferred_probe_work_func [ 2.964700] pstate: 60000005 (nZCv daif -PAN -UAO) [ 2.969445] pc : _regulator_put+0x3c/0xe8 [ 2.973412] lr : _regulator_put+0x3c/0xe8 [ 2.977377] sp : ffff000011aa3a50 [ 2.980655] x29: ffff000011aa3a50 x28: ffff80007ed1b600 [ 2.985916] x27: ffff80007f7036a8 x26: ffff80007f7036a8 [ 2.991177] x25: 0000000000000000 x24: ffff000011a44458 [ 2.996439] x23: ffff000011344218 x22: 0000000000000009 [ 3.001700] x21: ffff000011aa3b68 x20: ffff80007ed1bd00 [ 3.006961] x19: ffff80007ed1bd00 x18: 0000000000000010 [ 3.012222] x17: 000000005be5943c x16: 00000000f1c73b29 [ 3.017484] x15: ffffffffffffffff x14: ffff0000117396c8 [ 3.022745] x13: ffff000091aa37a7 x12: ffff000011aa37af [ 3.028006] x11: ffff000011763000 x10: ffff000011aa3730 [ 3.033267] x9 : 00000000ffffffd0 x8 : ffff000010871760 [ 3.038528] x7 : 00000000000000fd x6 : ffff0000119d151b [ 3.043790] x5 : 000000000000000f x4 : 0000000000000000 [ 3.049051] x3 : 0000000000000000 x2 : 38104b2678c20100 [ 3.054312] x1 : 0000000000000000 x0 : 0000000000000024 [ 3.059574] Call trace: [ 3.061991] _regulator_put+0x3c/0xe8 [ 3.065613] regulator_put+0x34/0x48 [ 3.069149] regulator_bulk_free+0x40/0x58 [ 3.073203] devm_regulator_bulk_release+0x24/0x30 [ 3.077947] release_nodes+0x1f0/0x2e0 [ 3.081655] devres_release_all+0x64/0xa4 [ 3.085622] really_probe+0x1c8/0x3e0 [ 3.089245] driver_probe_device+0xe4/0x138 [ 3.093385] __device_attach_driver+0x90/0x110 [ 3.097784] bus_for_each_drv+0x8c/0xd8 [ 3.101578] __device_attach+0xdc/0x160 [ 3.105373] device_initial_probe+0x24/0x30 [ 3.109514] bus_probe_device+0x9c/0xa8 [ 3.113309] deferred_probe_work_func+0xa0/0xf0 [ 3.117796] process_one_work+0x1b4/0x408 [ 3.121762] worker_thread+0x54/0x4b8 [ 3.125384] kthread+0x12c/0x130 [ 3.128575] ret_from_fork+0x10/0x1c [ 3.132110] ---[ end trace 51a68f4c0035d6c1 ]--- [ 3.136753] dwc2: probe of c9000000.usb failed with error -22 Fixes: 5a0803bd5ae2 ("ARM64: dts: meson-gxbb-odroidc2: Enable USB Nodes") Cc: Martin Blumenstingl Cc: Jerome Brunet Cc: Neil Armstrong Acked-by: Martin Blumenstingl Signed-off-by: Anand Moon Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index 6039adda12ee..b0b12e389835 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -296,7 +296,7 @@ }; &usb0_phy { - status = "okay"; + status = "disabled"; phy-supply = <&usb_otg_pwr>; }; @@ -306,7 +306,7 @@ }; &usb0 { - status = "okay"; + status = "disabled"; }; &usb1 { From 985b411769c1604fc127f7dd859f324c42acdcb8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 1 Oct 2019 20:11:09 +0200 Subject: [PATCH 167/191] phy: renesas: rcar-gen3-usb2: Use platform_get_irq_optional() for optional irq commit b049e03ca57f238e74a79e44ffc85904db465e72 upstream. As platform_get_irq() now prints an error when the interrupt does not exist, a scary warning may be printed for an optional interrupt: phy_rcar_gen3_usb2 ee0a0200.usb-phy: IRQ index 0 not found Fix this by calling platform_get_irq_optional() instead. Fixes: 7723f4c5ecdb8d83 ("driver core: platform: Add an error message to platform_get_irq*()") Reviewed-by: Stephen Boyd Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Signed-off-by: Geert Uytterhoeven Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 6fd1390fd06e..bfb22f868857 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -615,7 +615,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) return PTR_ERR(channel->base); /* call request_irq for OTG */ - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq >= 0) { INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work); irq = devm_request_irq(dev, irq, rcar_gen3_phy_usb2_irq, From f4bd6190498baeb50862d72c3d16f97edb9554e3 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 27 Nov 2019 22:15:43 +0800 Subject: [PATCH 168/191] tty: serial: msm_serial: Fix lockup for sysrq and oops commit 0e4f7f920a5c6bfe5e851e989f27b35a0cc7fb7e upstream. As the commit 677fe555cbfb ("serial: imx: Fix recursive locking bug") has mentioned the uart driver might cause recursive locking between normal printing and the kernel debugging facilities (e.g. sysrq and oops). In the commit it gave out suggestion for fixing recursive locking issue: "The solution is to avoid locking in the sysrq case and trylock in the oops_in_progress case." This patch follows the suggestion (also used the exactly same code with other serial drivers, e.g. amba-pl011.c) to fix the recursive locking issue, this can avoid stuck caused by deadlock and print out log for sysrq and oops. Fixes: 04896a77a97b ("msm_serial: serial driver for MSM7K onboard serial peripheral.") Signed-off-by: Leo Yan Reviewed-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20191127141544.4277-2-leo.yan@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/msm_serial.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 00964b6e4ac1..e0718ee5d42a 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -1580,6 +1580,7 @@ static void __msm_console_write(struct uart_port *port, const char *s, int num_newlines = 0; bool replaced = false; void __iomem *tf; + int locked = 1; if (is_uartdm) tf = port->membase + UARTDM_TF; @@ -1592,7 +1593,13 @@ static void __msm_console_write(struct uart_port *port, const char *s, num_newlines++; count += num_newlines; - spin_lock(&port->lock); + if (port->sysrq) + locked = 0; + else if (oops_in_progress) + locked = spin_trylock(&port->lock); + else + spin_lock(&port->lock); + if (is_uartdm) msm_reset_dm_count(port, count); @@ -1628,7 +1635,9 @@ static void __msm_console_write(struct uart_port *port, const char *s, iowrite32_rep(tf, buf, 1); i += num_chars; } - spin_unlock(&port->lock); + + if (locked) + spin_unlock(&port->lock); } static void msm_console_write(struct console *co, const char *s, From 42692a61ab19151159b62887136039840b53f923 Mon Sep 17 00:00:00 2001 From: "Paulo Alcantara (SUSE)" Date: Fri, 22 Nov 2019 12:30:52 -0300 Subject: [PATCH 169/191] cifs: Fix lookup of root ses in DFS referral cache commit df3df923b31d298c3d3653a0380202b9f2df9864 upstream. We don't care about module aliasing validation in cifs_compose_mount_options(..., is_smb3) when finding the root SMB session of an DFS namespace in order to refresh DFS referral cache. The following issue has been observed when mounting with '-t smb3' and then specifying 'vers=2.0': ... Nov 08 15:27:08 tw kernel: address conversion returned 0 for FS0.WIN.LOCAL Nov 08 15:27:08 tw kernel: [kworke] ==> dns_query((null),FS0.WIN.LOCAL,13,(null)) Nov 08 15:27:08 tw kernel: [kworke] call request_key(,FS0.WIN.LOCAL,) Nov 08 15:27:08 tw kernel: [kworke] ==> dns_resolver_cmp(FS0.WIN.LOCAL,FS0.WIN.LOCAL) Nov 08 15:27:08 tw kernel: [kworke] <== dns_resolver_cmp() = 1 Nov 08 15:27:08 tw kernel: [kworke] <== dns_query() = 13 Nov 08 15:27:08 tw kernel: fs/cifs/dns_resolve.c: dns_resolve_server_name_to_ip: resolved: FS0.WIN.LOCAL to 192.168.30.26 ===> Nov 08 15:27:08 tw kernel: CIFS VFS: vers=2.0 not permitted when mounting with smb3 Nov 08 15:27:08 tw kernel: fs/cifs/dfs_cache.c: CIFS VFS: leaving refresh_tcon (xid = 26) rc = -22 ... Fixes: 5072010ccf05 ("cifs: Fix DFS cache refresher for DFS links") Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/dfs_cache.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 1692c0c6c23a..2faa05860a48 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1317,7 +1317,6 @@ static struct cifs_ses *find_root_ses(struct dfs_cache_vol_info *vi, int rc; struct dfs_info3_param ref = {0}; char *mdata = NULL, *devname = NULL; - bool is_smb3 = tcon->ses->server->vals->header_preamble_size == 0; struct TCP_Server_Info *server; struct cifs_ses *ses; struct smb_vol vol; @@ -1344,7 +1343,7 @@ static struct cifs_ses *find_root_ses(struct dfs_cache_vol_info *vi, goto out; } - rc = cifs_setup_volume_info(&vol, mdata, devname, is_smb3); + rc = cifs_setup_volume_info(&vol, mdata, devname, false); kfree(devname); if (rc) { From 52788b4af1b6379541b8d2b3a4e3d20c275cc42a Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Fri, 29 Nov 2019 21:30:25 -0800 Subject: [PATCH 170/191] fs: cifs: Fix atime update check vs mtime commit 69738cfdfa7032f45d9e7462d24490e61cf163dd upstream. According to the comment in the code and commit log, some apps expect atime >= mtime; but the introduced code results in atime==mtime. Fix the comparison to guard against atime Cc: stfrench@microsoft.com Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index df9377828e2f..ed59e4a8db59 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -163,7 +163,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) spin_lock(&inode->i_lock); /* we do not want atime to be less than mtime, it broke some apps */ - if (timespec64_compare(&fattr->cf_atime, &fattr->cf_mtime)) + if (timespec64_compare(&fattr->cf_atime, &fattr->cf_mtime) < 0) inode->i_atime = fattr->cf_mtime; else inode->i_atime = fattr->cf_atime; From d536e814a36cc81a53cba83350ffc536c788a8de Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Apr 2019 18:53:50 -0400 Subject: [PATCH 171/191] fix compat handling of FICLONERANGE, FIDEDUPERANGE and FS_IOC_FIEMAP commit 6b2daec19094a90435abe67d16fb43b1a5527254 upstream. Unlike FICLONE, all of those take a pointer argument; they do need compat_ptr() applied to arg. Fixes: d79bdd52d8be ("vfs: wire up compat ioctl for CLONE/CLONE_RANGE") Fixes: 54dbc1517237 ("vfs: hoist the btrfs deduplication ioctl to the vfs") Fixes: ceac204e1da9 ("fs: make fiemap work from compat_ioctl") Signed-off-by: Al Viro Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- fs/compat_ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index a7ec2d3dff92..e0226b2138d6 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1032,10 +1032,11 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, #endif case FICLONE: + goto do_ioctl; case FICLONERANGE: case FIDEDUPERANGE: case FS_IOC_FIEMAP: - goto do_ioctl; + goto found_handler; case FIBMAP: case FIGETBSZ: From ceea88623947dbd12c74a7063bbf75f92afb1529 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Fri, 27 Sep 2019 11:51:45 +0900 Subject: [PATCH 172/191] ath9k_htc: Modify byte order for an error message [ Upstream commit e01fddc19d215f6ad397894ec2a851d99bf154e2 ] rs_datalen is be16 so we need to convert it before printing. Signed-off-by: Masashi Honma Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 4e8e80ac8341..aba0d454c381 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -986,7 +986,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) { ath_err(common, "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n", - rxstatus->rs_datalen, skb->len); + be16_to_cpu(rxstatus->rs_datalen), skb->len); goto rx_next; } From e307bfda74a9c66426313eca25ccfea877a50036 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Fri, 27 Sep 2019 11:51:46 +0900 Subject: [PATCH 173/191] ath9k_htc: Discard undersized packets [ Upstream commit cd486e627e67ee9ab66914d36d3127ef057cc010 ] Sometimes the hardware will push small packets that trigger a WARN_ON in mac80211. Discard them early to avoid this issue. This patch ports 2 patches from ath9k to ath9k_htc. commit 3c0efb745a172bfe96459e20cbd37b0c945d5f8d "ath9k: discard undersized packets". commit df5c4150501ee7e86383be88f6490d970adcf157 "ath9k: correctly handle short radar pulses". [ 112.835889] ------------[ cut here ]------------ [ 112.835971] WARNING: CPU: 5 PID: 0 at net/mac80211/rx.c:804 ieee80211_rx_napi+0xaac/0xb40 [mac80211] [ 112.835973] Modules linked in: ath9k_htc ath9k_common ath9k_hw ath mac80211 cfg80211 libarc4 nouveau snd_hda_codec_hdmi intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_hda_codec video snd_hda_core ttm snd_hwdep drm_kms_helper snd_pcm crct10dif_pclmul snd_seq_midi drm snd_seq_midi_event crc32_pclmul snd_rawmidi ghash_clmulni_intel snd_seq aesni_intel aes_x86_64 crypto_simd cryptd snd_seq_device glue_helper snd_timer sch_fq_codel i2c_algo_bit fb_sys_fops snd input_leds syscopyarea sysfillrect sysimgblt intel_cstate mei_me intel_rapl_perf soundcore mxm_wmi lpc_ich mei kvm_intel kvm mac_hid irqbypass parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear e1000e ahci libahci wmi [ 112.836022] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.3.0-wt #1 [ 112.836023] Hardware name: MouseComputer Co.,Ltd. X99-S01/X99-S01, BIOS 1.0C-W7 04/01/2015 [ 112.836056] RIP: 0010:ieee80211_rx_napi+0xaac/0xb40 [mac80211] [ 112.836059] Code: 00 00 66 41 89 86 b0 00 00 00 e9 c8 fa ff ff 4c 89 b5 40 ff ff ff 49 89 c6 e9 c9 fa ff ff 48 c7 c7 e0 a2 a5 c0 e8 47 41 b0 e9 <0f> 0b 48 89 df e8 5a 94 2d ea e9 02 f9 ff ff 41 39 c1 44 89 85 60 [ 112.836060] RSP: 0018:ffffaa6180220da8 EFLAGS: 00010286 [ 112.836062] RAX: 0000000000000024 RBX: ffff909a20eeda00 RCX: 0000000000000000 [ 112.836064] RDX: 0000000000000000 RSI: ffff909a2f957448 RDI: ffff909a2f957448 [ 112.836065] RBP: ffffaa6180220e78 R08: 00000000000006e9 R09: 0000000000000004 [ 112.836066] R10: 000000000000000a R11: 0000000000000001 R12: 0000000000000000 [ 112.836068] R13: ffff909a261a47a0 R14: 0000000000000000 R15: 0000000000000004 [ 112.836070] FS: 0000000000000000(0000) GS:ffff909a2f940000(0000) knlGS:0000000000000000 [ 112.836071] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 112.836073] CR2: 00007f4e3ffffa08 CR3: 00000001afc0a006 CR4: 00000000001606e0 [ 112.836074] Call Trace: [ 112.836076] [ 112.836083] ? finish_td+0xb3/0xf0 [ 112.836092] ? ath9k_rx_prepare.isra.11+0x22f/0x2a0 [ath9k_htc] [ 112.836099] ath9k_rx_tasklet+0x10b/0x1d0 [ath9k_htc] [ 112.836105] tasklet_action_common.isra.22+0x63/0x110 [ 112.836108] tasklet_action+0x22/0x30 [ 112.836115] __do_softirq+0xe4/0x2da [ 112.836118] irq_exit+0xae/0xb0 [ 112.836121] do_IRQ+0x86/0xe0 [ 112.836125] common_interrupt+0xf/0xf [ 112.836126] [ 112.836130] RIP: 0010:cpuidle_enter_state+0xa9/0x440 [ 112.836133] Code: 3d bc 20 38 55 e8 f7 1d 84 ff 49 89 c7 0f 1f 44 00 00 31 ff e8 28 29 84 ff 80 7d d3 00 0f 85 e6 01 00 00 fb 66 0f 1f 44 00 00 <45> 85 ed 0f 89 ff 01 00 00 41 c7 44 24 10 00 00 00 00 48 83 c4 18 [ 112.836134] RSP: 0018:ffffaa61800e3e48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde [ 112.836136] RAX: ffff909a2f96b340 RBX: ffffffffabb58200 RCX: 000000000000001f [ 112.836137] RDX: 0000001a458adc5d RSI: 0000000026c9b581 RDI: 0000000000000000 [ 112.836139] RBP: ffffaa61800e3e88 R08: 0000000000000002 R09: 000000000002abc0 [ 112.836140] R10: ffffaa61800e3e18 R11: 000000000000002d R12: ffffca617fb40b00 [ 112.836141] R13: 0000000000000002 R14: ffffffffabb582d8 R15: 0000001a458adc5d [ 112.836145] ? cpuidle_enter_state+0x98/0x440 [ 112.836149] ? menu_select+0x370/0x600 [ 112.836151] cpuidle_enter+0x2e/0x40 [ 112.836154] call_cpuidle+0x23/0x40 [ 112.836156] do_idle+0x204/0x280 [ 112.836159] cpu_startup_entry+0x1d/0x20 [ 112.836164] start_secondary+0x167/0x1c0 [ 112.836169] secondary_startup_64+0xa4/0xb0 [ 112.836173] ---[ end trace 9f4cd18479cc5ae5 ]--- Signed-off-by: Masashi Honma Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index aba0d454c381..9cec5c216e1f 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -973,6 +973,8 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, struct ath_htc_rx_status *rxstatus; struct ath_rx_status rx_stats; bool decrypt_error = false; + __be16 rs_datalen; + bool is_phyerr; if (skb->len < HTC_RX_FRAME_HEADER_SIZE) { ath_err(common, "Corrupted RX frame, dropping (len: %d)\n", @@ -982,11 +984,24 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, rxstatus = (struct ath_htc_rx_status *)skb->data; - if (be16_to_cpu(rxstatus->rs_datalen) - - (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) { + rs_datalen = be16_to_cpu(rxstatus->rs_datalen); + if (unlikely(rs_datalen - + (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0)) { ath_err(common, "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n", - be16_to_cpu(rxstatus->rs_datalen), skb->len); + rs_datalen, skb->len); + goto rx_next; + } + + is_phyerr = rxstatus->rs_status & ATH9K_RXERR_PHY; + /* + * Discard zero-length packets and packets smaller than an ACK + * which are not PHY_ERROR (short radar pulses have a length of 3) + */ + if (unlikely(!rs_datalen || (rs_datalen < 10 && !is_phyerr))) { + ath_warn(common, + "Short RX data len, dropping (dlen: %d)\n", + rs_datalen); goto rx_next; } @@ -1011,7 +1026,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, * Process PHY errors and return so that the packet * can be dropped. */ - if (rx_stats.rs_status & ATH9K_RXERR_PHY) { + if (unlikely(is_phyerr)) { /* TODO: Not using DFS processing now. */ if (ath_cmn_process_fft(&priv->spec_priv, hdr, &rx_stats, rx_status->mactime)) { From c35de2e005b2bf1f59fa98bc69ea2dbb9a75380f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 20:47:58 +0100 Subject: [PATCH 174/191] drm/i915/execlists: Fix annotation for decoupling virtual request [ Upstream commit 08ad9a3846fc72b047b110b36d162ffbcf298fa2 ] As we may signal a request and take the engine->active.lock within the signaler, the engine submission paths have to use a nested annotation on their requests -- but we guarantee that we can never submit on the same engine as the signaling fence. <4>[ 723.763281] WARNING: possible circular locking dependency detected <4>[ 723.763285] 5.3.0-g80fa0e042cdb-drmtip_379+ #1 Tainted: G U <4>[ 723.763288] ------------------------------------------------------ <4>[ 723.763291] gem_exec_await/1388 is trying to acquire lock: <4>[ 723.763294] ffff93a7b53221d8 (&engine->active.lock){..-.}, at: execlists_submit_request+0x2b/0x1e0 [i915] <4>[ 723.763378] but task is already holding lock: <4>[ 723.763381] ffff93a7c25f6d20 (&i915_request_get(rq)->submit/1){-.-.}, at: __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 723.763420] which lock already depends on the new lock. <4>[ 723.763423] the existing dependency chain (in reverse order) is: <4>[ 723.763427] -> #2 (&i915_request_get(rq)->submit/1){-.-.}: <4>[ 723.763434] _raw_spin_lock_irqsave_nested+0x39/0x50 <4>[ 723.763478] __i915_sw_fence_complete+0x1b2/0x250 [i915] <4>[ 723.763513] intel_engine_breadcrumbs_irq+0x3aa/0x5e0 [i915] <4>[ 723.763600] cs_irq_handler+0x49/0x50 [i915] <4>[ 723.763659] gen11_gt_irq_handler+0x17b/0x280 [i915] <4>[ 723.763690] gen11_irq_handler+0x54/0xf0 [i915] <4>[ 723.763695] __handle_irq_event_percpu+0x41/0x2d0 <4>[ 723.763699] handle_irq_event_percpu+0x2b/0x70 <4>[ 723.763702] handle_irq_event+0x2f/0x50 <4>[ 723.763706] handle_edge_irq+0xee/0x1a0 <4>[ 723.763709] do_IRQ+0x7e/0x160 <4>[ 723.763712] ret_from_intr+0x0/0x1d <4>[ 723.763717] __slab_alloc.isra.28.constprop.33+0x4f/0x70 <4>[ 723.763720] kmem_cache_alloc+0x28d/0x2f0 <4>[ 723.763724] vm_area_dup+0x15/0x40 <4>[ 723.763727] dup_mm+0x2dd/0x550 <4>[ 723.763730] copy_process+0xf21/0x1ef0 <4>[ 723.763734] _do_fork+0x71/0x670 <4>[ 723.763737] __se_sys_clone+0x6e/0xa0 <4>[ 723.763741] do_syscall_64+0x4f/0x210 <4>[ 723.763744] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 723.763747] -> #1 (&(&rq->lock)->rlock#2){-.-.}: <4>[ 723.763752] _raw_spin_lock+0x2a/0x40 <4>[ 723.763789] __unwind_incomplete_requests+0x3eb/0x450 [i915] <4>[ 723.763825] __execlists_submission_tasklet+0x9ec/0x1d60 [i915] <4>[ 723.763864] execlists_submission_tasklet+0x34/0x50 [i915] <4>[ 723.763874] tasklet_action_common.isra.5+0x47/0xb0 <4>[ 723.763878] __do_softirq+0xd8/0x4ae <4>[ 723.763881] irq_exit+0xa9/0xc0 <4>[ 723.763883] smp_apic_timer_interrupt+0xb7/0x280 <4>[ 723.763887] apic_timer_interrupt+0xf/0x20 <4>[ 723.763892] cpuidle_enter_state+0xae/0x450 <4>[ 723.763895] cpuidle_enter+0x24/0x40 <4>[ 723.763899] do_idle+0x1e7/0x250 <4>[ 723.763902] cpu_startup_entry+0x14/0x20 <4>[ 723.763905] start_secondary+0x15f/0x1b0 <4>[ 723.763908] secondary_startup_64+0xa4/0xb0 <4>[ 723.763911] -> #0 (&engine->active.lock){..-.}: <4>[ 723.763916] __lock_acquire+0x15d8/0x1ea0 <4>[ 723.763919] lock_acquire+0xa6/0x1c0 <4>[ 723.763922] _raw_spin_lock_irqsave+0x33/0x50 <4>[ 723.763956] execlists_submit_request+0x2b/0x1e0 [i915] <4>[ 723.764002] submit_notify+0xa8/0x13c [i915] <4>[ 723.764035] __i915_sw_fence_complete+0x81/0x250 [i915] <4>[ 723.764054] i915_sw_fence_wake+0x51/0x64 [i915] <4>[ 723.764054] __i915_sw_fence_complete+0x1ee/0x250 [i915] <4>[ 723.764054] dma_i915_sw_fence_wake_timer+0x14/0x20 [i915] <4>[ 723.764054] dma_fence_signal_locked+0x9e/0x1c0 <4>[ 723.764054] dma_fence_signal+0x1f/0x40 <4>[ 723.764054] vgem_fence_signal_ioctl+0x67/0xc0 [vgem] <4>[ 723.764054] drm_ioctl_kernel+0x83/0xf0 <4>[ 723.764054] drm_ioctl+0x2f3/0x3b0 <4>[ 723.764054] do_vfs_ioctl+0xa0/0x6f0 <4>[ 723.764054] ksys_ioctl+0x35/0x60 <4>[ 723.764054] __x64_sys_ioctl+0x11/0x20 <4>[ 723.764054] do_syscall_64+0x4f/0x210 <4>[ 723.764054] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 723.764054] other info that might help us debug this: <4>[ 723.764054] Chain exists of: &engine->active.lock --> &(&rq->lock)->rlock#2 --> &i915_request_get(rq)->submit/1 <4>[ 723.764054] Possible unsafe locking scenario: <4>[ 723.764054] CPU0 CPU1 <4>[ 723.764054] ---- ---- <4>[ 723.764054] lock(&i915_request_get(rq)->submit/1); <4>[ 723.764054] lock(&(&rq->lock)->rlock#2); <4>[ 723.764054] lock(&i915_request_get(rq)->submit/1); <4>[ 723.764054] lock(&engine->active.lock); <4>[ 723.764054] *** DEADLOCK *** Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111862 Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004194758.19679-1-chris@chris-wilson.co.uk Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 06a506c29463..d564bfcab6a3 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -525,7 +525,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) */ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) { - spin_lock(&rq->lock); + spin_lock_nested(&rq->lock, + SINGLE_DEPTH_NESTING); i915_request_cancel_breadcrumb(rq); spin_unlock(&rq->lock); } From b8233f7b2f77336fddb089bb8b8bc17fa0934373 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 5 Nov 2019 15:33:57 -0800 Subject: [PATCH 175/191] xfs: periodically yield scrub threads to the scheduler [ Upstream commit 5d1116d4c6af3e580f1ed0382ca5a94bd65a34cf ] Christoph Hellwig complained about the following soft lockup warning when running scrub after generic/175 when preemption is disabled and slub debugging is enabled: watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [xfs_scrub:161] Modules linked in: irq event stamp: 41692326 hardirqs last enabled at (41692325): [] _raw_0 hardirqs last disabled at (41692326): [] trace0 softirqs last enabled at (41684994): [] __do_e softirqs last disabled at (41684987): [] irq_e0 CPU: 3 PID: 16189 Comm: xfs_scrub Not tainted 5.4.0-rc3+ #30 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.124 RIP: 0010:_raw_spin_unlock_irqrestore+0x39/0x40 Code: 89 f3 be 01 00 00 00 e8 d5 3a e5 fe 48 89 ef e8 ed 87 e5 f2 RSP: 0018:ffffc9000233f970 EFLAGS: 00000286 ORIG_RAX: ffffffffff3 RAX: ffff88813b398040 RBX: 0000000000000286 RCX: 0000000000000006 RDX: 0000000000000006 RSI: ffff88813b3988c0 RDI: ffff88813b398040 RBP: ffff888137958640 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffea00042b0c00 R13: 0000000000000001 R14: ffff88810ac32308 R15: ffff8881376fc040 FS: 00007f6113dea700(0000) GS:ffff88813bb80000(0000) knlGS:00000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6113de8ff8 CR3: 000000012f290000 CR4: 00000000000006e0 Call Trace: free_debug_processing+0x1dd/0x240 __slab_free+0x231/0x410 kmem_cache_free+0x30e/0x360 xchk_ag_btcur_free+0x76/0xb0 xchk_ag_free+0x10/0x80 xchk_bmap_iextent_xref.isra.14+0xd9/0x120 xchk_bmap_iextent+0x187/0x210 xchk_bmap+0x2e0/0x3b0 xfs_scrub_metadata+0x2e7/0x500 xfs_ioc_scrub_metadata+0x4a/0xa0 xfs_file_ioctl+0x58a/0xcd0 do_vfs_ioctl+0xa0/0x6f0 ksys_ioctl+0x5b/0x90 __x64_sys_ioctl+0x11/0x20 do_syscall_64+0x4b/0x1a0 entry_SYSCALL_64_after_hwframe+0x49/0xbe If preemption is disabled, all metadata buffers needed to perform the scrub are already in memory, and there are a lot of records to check, it's possible that the scrub thread will run for an extended period of time without sleeping for IO or any other reason. Then the watchdog timer or the RCU stall timeout can trigger, producing the backtrace above. To fix this problem, call cond_resched() from the scrub thread so that we back out to the scheduler whenever necessary. Reported-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin --- fs/xfs/scrub/common.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 003a772cd26c..2e50d146105d 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -14,8 +14,15 @@ static inline bool xchk_should_terminate( struct xfs_scrub *sc, - int *error) + int *error) { + /* + * If preemption is disabled, we need to yield to the scheduler every + * few seconds so that we don't run afoul of the soft lockup watchdog + * or RCU stall detector. + */ + cond_resched(); + if (fatal_signal_pending(current)) { if (*error == 0) *error = -EAGAIN; From 3900f9268a3d07de08286f82ea328120801008f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 18:29:11 -0800 Subject: [PATCH 176/191] net: add annotations on hh->hh_len lockless accesses [ Upstream commit c305c6ae79e2ce20c22660ceda94f0d86d639a82 ] KCSAN reported a data-race [1] While we can use READ_ONCE() on the read sides, we need to make sure hh->hh_len is written last. [1] BUG: KCSAN: data-race in eth_header_cache / neigh_resolve_output write to 0xffff8880b9dedcb8 of 4 bytes by task 29760 on cpu 0: eth_header_cache+0xa9/0xd0 net/ethernet/eth.c:247 neigh_hh_init net/core/neighbour.c:1463 [inline] neigh_resolve_output net/core/neighbour.c:1480 [inline] neigh_resolve_output+0x415/0x470 net/core/neighbour.c:1470 neigh_output include/net/neighbour.h:511 [inline] ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 dst_output include/net/dst.h:436 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505 ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647 rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 read to 0xffff8880b9dedcb8 of 4 bytes by task 29572 on cpu 1: neigh_resolve_output net/core/neighbour.c:1479 [inline] neigh_resolve_output+0x113/0x470 net/core/neighbour.c:1470 neigh_output include/net/neighbour.h:511 [inline] ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline] __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175 dst_output include/net/dst.h:436 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505 ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647 rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 29572 Comm: kworker/1:4 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events rt6_probe_deferred Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/firewire/net.c | 6 +++++- include/net/neighbour.h | 2 +- net/core/neighbour.c | 4 ++-- net/ethernet/eth.c | 7 ++++++- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index b132ab9ad607..715e491dfbc3 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -250,7 +250,11 @@ static int fwnet_header_cache(const struct neighbour *neigh, h = (struct fwnet_header *)((u8 *)hh->hh_data + HH_DATA_OFF(sizeof(*h))); h->h_proto = type; memcpy(h->h_dest, neigh->ha, net->addr_len); - hh->hh_len = FWNET_HLEN; + + /* Pairs with the READ_ONCE() in neigh_resolve_output(), + * neigh_hh_output() and neigh_update_hhs(). + */ + smp_store_release(&hh->hh_len, FWNET_HLEN); return 0; } diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 5e679c8dae0b..8ec77bfdc1a4 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -467,7 +467,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb do { seq = read_seqbegin(&hh->hh_lock); - hh_len = hh->hh_len; + hh_len = READ_ONCE(hh->hh_len); if (likely(hh_len <= HH_DATA_MOD)) { hh_alen = HH_DATA_MOD; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 08ebc3ac5343..f2452496ad9f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1194,7 +1194,7 @@ static void neigh_update_hhs(struct neighbour *neigh) if (update) { hh = &neigh->hh; - if (hh->hh_len) { + if (READ_ONCE(hh->hh_len)) { write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); write_sequnlock_bh(&hh->hh_lock); @@ -1473,7 +1473,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) struct net_device *dev = neigh->dev; unsigned int seq; - if (dev->header_ops->cache && !neigh->hh.hh_len) + if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len)) neigh_hh_init(neigh); do { diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 17374afee28f..9040fe55e0f5 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -244,7 +244,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 eth->h_proto = type; memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, neigh->ha, ETH_ALEN); - hh->hh_len = ETH_HLEN; + + /* Pairs with READ_ONCE() in neigh_resolve_output(), + * neigh_hh_output() and neigh_update_hhs(). + */ + smp_store_release(&hh->hh_len, ETH_HLEN); + return 0; } EXPORT_SYMBOL(eth_header_cache); From 0c261ca281733557413a2d93c8fd9fd2eec6156e Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Sat, 20 Jul 2019 14:05:20 +0800 Subject: [PATCH 177/191] ubifs: ubifs_tnc_start_commit: Fix OOB in layout_in_gaps [ Upstream commit 6abf57262166b4f4294667fb5206ae7ba1ba96f5 ] Running stress-test test_2 in mtd-utils on ubi device, sometimes we can get following oops message: BUG: unable to handle page fault for address: ffffffff00000140 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 280a067 P4D 280a067 PUD 0 Oops: 0000 [#1] SMP CPU: 0 PID: 60 Comm: kworker/u16:1 Kdump: loaded Not tainted 5.2.0 #13 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0 -0-ga698c8995f-prebuilt.qemu.org 04/01/2014 Workqueue: writeback wb_workfn (flush-ubifs_0_0) RIP: 0010:rb_next_postorder+0x2e/0xb0 Code: 80 db 03 01 48 85 ff 0f 84 97 00 00 00 48 8b 17 48 83 05 bc 80 db 03 01 48 83 e2 fc 0f 84 82 00 00 00 48 83 05 b2 80 db 03 01 <48> 3b 7a 10 48 89 d0 74 02 f3 c3 48 8b 52 08 48 83 05 a3 80 db 03 RSP: 0018:ffffc90000887758 EFLAGS: 00010202 RAX: ffff888129ae4700 RBX: ffff888138b08400 RCX: 0000000080800001 RDX: ffffffff00000130 RSI: 0000000080800024 RDI: ffff888138b08400 RBP: ffff888138b08400 R08: ffffea0004a6b920 R09: 0000000000000000 R10: ffffc90000887740 R11: 0000000000000001 R12: ffff888128d48000 R13: 0000000000000800 R14: 000000000000011e R15: 00000000000007c8 FS: 0000000000000000(0000) GS:ffff88813ba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffff00000140 CR3: 000000013789d000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: destroy_old_idx+0x5d/0xa0 [ubifs] ubifs_tnc_start_commit+0x4fe/0x1380 [ubifs] do_commit+0x3eb/0x830 [ubifs] ubifs_run_commit+0xdc/0x1c0 [ubifs] Above Oops are due to the slab-out-of-bounds happened in do-while of function layout_in_gaps indirectly called by ubifs_tnc_start_commit. In function layout_in_gaps, there is a do-while loop placing index nodes into the gaps created by obsolete index nodes in non-empty index LEBs until rest index nodes can totally be placed into pre-allocated empty LEBs. @c->gap_lebs points to a memory area(integer array) which records LEB numbers used by 'in-the-gaps' method. Whenever a fitable index LEB is found, corresponding lnum will be incrementally written into the memory area pointed by @c->gap_lebs. The size ((@c->lst.idx_lebs + 1) * sizeof(int)) of memory area is allocated before do-while loop and can not be changed in the loop. But @c->lst.idx_lebs could be increased by function ubifs_change_lp (called by layout_leb_in_gaps->ubifs_find_dirty_idx_leb->get_idx_gc_leb) during the loop. So, sometimes oob happens when number of cycles in do-while loop exceeds the original value of @c->lst.idx_lebs. See detail in https://bugzilla.kernel.org/show_bug.cgi?id=204229. This patch fixes oob in layout_in_gaps. Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- fs/ubifs/tnc_commit.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index a384a0f9ff32..234be1c4dc87 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -212,7 +212,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key, /** * layout_leb_in_gaps - layout index nodes using in-the-gaps method. * @c: UBIFS file-system description object - * @p: return LEB number here + * @p: return LEB number in @c->gap_lebs[p] * * This function lays out new index nodes for dirty znodes using in-the-gaps * method of TNC commit. @@ -221,7 +221,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key, * This function returns the number of index nodes written into the gaps, or a * negative error code on failure. */ -static int layout_leb_in_gaps(struct ubifs_info *c, int *p) +static int layout_leb_in_gaps(struct ubifs_info *c, int p) { struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; @@ -236,7 +236,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p) * filled, however we do not check there at present. */ return lnum; /* Error code */ - *p = lnum; + c->gap_lebs[p] = lnum; dbg_gc("LEB %d", lnum); /* * Scan the index LEB. We use the generic scan for this even though @@ -355,7 +355,7 @@ static int get_leb_cnt(struct ubifs_info *c, int cnt) */ static int layout_in_gaps(struct ubifs_info *c, int cnt) { - int err, leb_needed_cnt, written, *p; + int err, leb_needed_cnt, written, p = 0, old_idx_lebs, *gap_lebs; dbg_gc("%d znodes to write", cnt); @@ -364,9 +364,9 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) if (!c->gap_lebs) return -ENOMEM; - p = c->gap_lebs; + old_idx_lebs = c->lst.idx_lebs; do { - ubifs_assert(c, p < c->gap_lebs + c->lst.idx_lebs); + ubifs_assert(c, p < c->lst.idx_lebs); written = layout_leb_in_gaps(c, p); if (written < 0) { err = written; @@ -392,9 +392,29 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) leb_needed_cnt = get_leb_cnt(c, cnt); dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt, leb_needed_cnt, c->ileb_cnt); + /* + * Dynamically change the size of @c->gap_lebs to prevent + * oob, because @c->lst.idx_lebs could be increased by + * function @get_idx_gc_leb (called by layout_leb_in_gaps-> + * ubifs_find_dirty_idx_leb) during loop. Only enlarge + * @c->gap_lebs when needed. + * + */ + if (leb_needed_cnt > c->ileb_cnt && p >= old_idx_lebs && + old_idx_lebs < c->lst.idx_lebs) { + old_idx_lebs = c->lst.idx_lebs; + gap_lebs = krealloc(c->gap_lebs, sizeof(int) * + (old_idx_lebs + 1), GFP_NOFS); + if (!gap_lebs) { + kfree(c->gap_lebs); + c->gap_lebs = NULL; + return -ENOMEM; + } + c->gap_lebs = gap_lebs; + } } while (leb_needed_cnt > c->ileb_cnt); - *p = -1; + c->gap_lebs[p] = -1; return 0; } From 2bae3ee327c972896543c8f2f190568bbcf64b54 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Sep 2019 11:30:57 -0700 Subject: [PATCH 178/191] btrfs: get rid of unique workqueue helper functions [ Upstream commit a0cac0ec961f0d42828eeef196ac2246a2f07659 ] Commit 9e0af2376434 ("Btrfs: fix task hang under heavy compressed write") worked around the issue that a recycled work item could get a false dependency on the original work item due to how the workqueue code guarantees non-reentrancy. It did so by giving different work functions to different types of work. However, the fixes in the previous few patches are more complete, as they prevent a work item from being recycled at all (except for a tiny window that the kernel workqueue code handles for us). This obsoletes the previous fix, so we don't need the unique helpers for correctness. The only other reason to keep them would be so they show up in stack traces, but they always seem to be optimized to a tail call, so they don't show up anyways. So, let's just get rid of the extra indirection. While we're here, rename normal_work_helper() to the more informative btrfs_work_helper(). Reviewed-by: Nikolay Borisov Reviewed-by: Filipe Manana Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/async-thread.c | 58 +++++++++------------------------------- fs/btrfs/async-thread.h | 33 ++--------------------- fs/btrfs/block-group.c | 3 +-- fs/btrfs/delayed-inode.c | 4 +-- fs/btrfs/disk-io.c | 34 ++++++++--------------- fs/btrfs/inode.c | 36 ++++++++----------------- fs/btrfs/ordered-data.c | 1 - fs/btrfs/qgroup.c | 1 - fs/btrfs/raid56.c | 5 ++-- fs/btrfs/reada.c | 3 +-- fs/btrfs/scrub.c | 14 +++++----- fs/btrfs/volumes.c | 3 +-- 12 files changed, 50 insertions(+), 145 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 10a04b99798a..3f3110975f88 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -53,16 +53,6 @@ struct btrfs_workqueue { struct __btrfs_workqueue *high; }; -static void normal_work_helper(struct btrfs_work *work); - -#define BTRFS_WORK_HELPER(name) \ -noinline_for_stack void btrfs_##name(struct work_struct *arg) \ -{ \ - struct btrfs_work *work = container_of(arg, struct btrfs_work, \ - normal_work); \ - normal_work_helper(work); \ -} - struct btrfs_fs_info * btrfs_workqueue_owner(const struct __btrfs_workqueue *wq) { @@ -89,29 +79,6 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq) return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2; } -BTRFS_WORK_HELPER(worker_helper); -BTRFS_WORK_HELPER(delalloc_helper); -BTRFS_WORK_HELPER(flush_delalloc_helper); -BTRFS_WORK_HELPER(cache_helper); -BTRFS_WORK_HELPER(submit_helper); -BTRFS_WORK_HELPER(fixup_helper); -BTRFS_WORK_HELPER(endio_helper); -BTRFS_WORK_HELPER(endio_meta_helper); -BTRFS_WORK_HELPER(endio_meta_write_helper); -BTRFS_WORK_HELPER(endio_raid56_helper); -BTRFS_WORK_HELPER(endio_repair_helper); -BTRFS_WORK_HELPER(rmw_helper); -BTRFS_WORK_HELPER(endio_write_helper); -BTRFS_WORK_HELPER(freespace_write_helper); -BTRFS_WORK_HELPER(delayed_meta_helper); -BTRFS_WORK_HELPER(readahead_helper); -BTRFS_WORK_HELPER(qgroup_rescan_helper); -BTRFS_WORK_HELPER(extent_refs_helper); -BTRFS_WORK_HELPER(scrub_helper); -BTRFS_WORK_HELPER(scrubwrc_helper); -BTRFS_WORK_HELPER(scrubnc_helper); -BTRFS_WORK_HELPER(scrubparity_helper); - static struct __btrfs_workqueue * __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name, unsigned int flags, int limit_active, int thresh) @@ -302,12 +269,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq, * original work item cannot depend on the recycled work * item in that case (see find_worker_executing_work()). * - * Note that the work of one Btrfs filesystem may depend - * on the work of another Btrfs filesystem via, e.g., a - * loop device. Therefore, we must not allow the current - * work item to be recycled until we are really done, - * otherwise we break the above assumption and can - * deadlock. + * Note that different types of Btrfs work can depend on + * each other, and one type of work on one Btrfs + * filesystem may even depend on the same type of work + * on another Btrfs filesystem via, e.g., a loop device. + * Therefore, we must not allow the current work item to + * be recycled until we are really done, otherwise we + * break the above assumption and can deadlock. */ free_self = true; } else { @@ -331,8 +299,10 @@ static void run_ordered_work(struct __btrfs_workqueue *wq, } } -static void normal_work_helper(struct btrfs_work *work) +static void btrfs_work_helper(struct work_struct *normal_work) { + struct btrfs_work *work = container_of(normal_work, struct btrfs_work, + normal_work); struct __btrfs_workqueue *wq; void *wtag; int need_order = 0; @@ -362,15 +332,13 @@ static void normal_work_helper(struct btrfs_work *work) trace_btrfs_all_work_done(wq->fs_info, wtag); } -void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, - btrfs_func_t func, - btrfs_func_t ordered_func, - btrfs_func_t ordered_free) +void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func, + btrfs_func_t ordered_func, btrfs_func_t ordered_free) { work->func = func; work->ordered_func = ordered_func; work->ordered_free = ordered_free; - INIT_WORK(&work->normal_work, uniq_func); + INIT_WORK(&work->normal_work, btrfs_work_helper); INIT_LIST_HEAD(&work->ordered_list); work->flags = 0; } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 7861c9feba5f..c5bf2b117c05 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -29,42 +29,13 @@ struct btrfs_work { unsigned long flags; }; -#define BTRFS_WORK_HELPER_PROTO(name) \ -void btrfs_##name(struct work_struct *arg) - -BTRFS_WORK_HELPER_PROTO(worker_helper); -BTRFS_WORK_HELPER_PROTO(delalloc_helper); -BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper); -BTRFS_WORK_HELPER_PROTO(cache_helper); -BTRFS_WORK_HELPER_PROTO(submit_helper); -BTRFS_WORK_HELPER_PROTO(fixup_helper); -BTRFS_WORK_HELPER_PROTO(endio_helper); -BTRFS_WORK_HELPER_PROTO(endio_meta_helper); -BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); -BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); -BTRFS_WORK_HELPER_PROTO(endio_repair_helper); -BTRFS_WORK_HELPER_PROTO(rmw_helper); -BTRFS_WORK_HELPER_PROTO(endio_write_helper); -BTRFS_WORK_HELPER_PROTO(freespace_write_helper); -BTRFS_WORK_HELPER_PROTO(delayed_meta_helper); -BTRFS_WORK_HELPER_PROTO(readahead_helper); -BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper); -BTRFS_WORK_HELPER_PROTO(extent_refs_helper); -BTRFS_WORK_HELPER_PROTO(scrub_helper); -BTRFS_WORK_HELPER_PROTO(scrubwrc_helper); -BTRFS_WORK_HELPER_PROTO(scrubnc_helper); -BTRFS_WORK_HELPER_PROTO(scrubparity_helper); - - struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name, unsigned int flags, int limit_active, int thresh); -void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, - btrfs_func_t func, - btrfs_func_t ordered_func, - btrfs_func_t ordered_free); +void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func, + btrfs_func_t ordered_func, btrfs_func_t ordered_free); void btrfs_queue_work(struct btrfs_workqueue *wq, struct btrfs_work *work); void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 0d2da2366869..7dcfa7d7632a 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -695,8 +695,7 @@ int btrfs_cache_block_group(struct btrfs_block_group_cache *cache, caching_ctl->block_group = cache; caching_ctl->progress = cache->key.objectid; refcount_set(&caching_ctl->count, 1); - btrfs_init_work(&caching_ctl->work, btrfs_cache_helper, - caching_thread, NULL, NULL); + btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); spin_lock(&cache->lock); /* diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 57a9ad3e8c29..c7a53e79c66d 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1367,8 +1367,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, return -ENOMEM; async_work->delayed_root = delayed_root; - btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper, - btrfs_async_run_delayed_root, NULL, NULL); + btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL, + NULL); async_work->nr = nr; btrfs_queue_work(fs_info->delayed_workers, &async_work->work); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3895c21853cc..bae334212ee2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -706,43 +706,31 @@ static void end_workqueue_bio(struct bio *bio) struct btrfs_end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; struct btrfs_workqueue *wq; - btrfs_work_func_t func; fs_info = end_io_wq->info; end_io_wq->status = bio->bi_status; if (bio_op(bio) == REQ_OP_WRITE) { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { + if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) wq = fs_info->endio_meta_write_workers; - func = btrfs_endio_meta_write_helper; - } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) { + else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) wq = fs_info->endio_freespace_worker; - func = btrfs_freespace_write_helper; - } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) wq = fs_info->endio_raid56_workers; - func = btrfs_endio_raid56_helper; - } else { + else wq = fs_info->endio_write_workers; - func = btrfs_endio_write_helper; - } } else { - if (unlikely(end_io_wq->metadata == - BTRFS_WQ_ENDIO_DIO_REPAIR)) { + if (unlikely(end_io_wq->metadata == BTRFS_WQ_ENDIO_DIO_REPAIR)) wq = fs_info->endio_repair_workers; - func = btrfs_endio_repair_helper; - } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) wq = fs_info->endio_raid56_workers; - func = btrfs_endio_raid56_helper; - } else if (end_io_wq->metadata) { + else if (end_io_wq->metadata) wq = fs_info->endio_meta_workers; - func = btrfs_endio_meta_helper; - } else { + else wq = fs_info->endio_workers; - func = btrfs_endio_helper; - } } - btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL); + btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); btrfs_queue_work(wq, &end_io_wq->work); } @@ -835,8 +823,8 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, async->mirror_num = mirror_num; async->submit_bio_start = submit_bio_start; - btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start, - run_one_async_done, run_one_async_free); + btrfs_init_work(&async->work, run_one_async_start, run_one_async_done, + run_one_async_free); async->bio_offset = bio_offset; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bc6e7d15577a..dc14fc2e4206 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1268,10 +1268,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_chunk[i].write_flags = write_flags; INIT_LIST_HEAD(&async_chunk[i].extents); - btrfs_init_work(&async_chunk[i].work, - btrfs_delalloc_helper, - async_cow_start, async_cow_submit, - async_cow_free); + btrfs_init_work(&async_chunk[i].work, async_cow_start, + async_cow_submit, async_cow_free); nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE); atomic_add(nr_pages, &fs_info->async_delalloc_pages); @@ -2264,8 +2262,7 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end) SetPageChecked(page); get_page(page); - btrfs_init_work(&fixup->work, btrfs_fixup_helper, - btrfs_writepage_fixup_worker, NULL, NULL); + btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; btrfs_queue_work(fs_info->fixup_workers, &fixup->work); return -EBUSY; @@ -3258,7 +3255,6 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ordered_extent *ordered_extent = NULL; struct btrfs_workqueue *wq; - btrfs_work_func_t func; trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -3267,16 +3263,12 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, end - start + 1, uptodate)) return; - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { + if (btrfs_is_free_space_inode(BTRFS_I(inode))) wq = fs_info->endio_freespace_worker; - func = btrfs_freespace_write_helper; - } else { + else wq = fs_info->endio_write_workers; - func = btrfs_endio_write_helper; - } - btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, - NULL); + btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); btrfs_queue_work(wq, &ordered_extent->work); } @@ -8213,18 +8205,14 @@ static void __endio_write_update_ordered(struct inode *inode, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ordered_extent *ordered = NULL; struct btrfs_workqueue *wq; - btrfs_work_func_t func; u64 ordered_offset = offset; u64 ordered_bytes = bytes; u64 last_offset; - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { + if (btrfs_is_free_space_inode(BTRFS_I(inode))) wq = fs_info->endio_freespace_worker; - func = btrfs_freespace_write_helper; - } else { + else wq = fs_info->endio_write_workers; - func = btrfs_endio_write_helper; - } while (ordered_offset < offset + bytes) { last_offset = ordered_offset; @@ -8232,9 +8220,8 @@ static void __endio_write_update_ordered(struct inode *inode, &ordered_offset, ordered_bytes, uptodate)) { - btrfs_init_work(&ordered->work, func, - finish_ordered_fn, - NULL, NULL); + btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, + NULL); btrfs_queue_work(wq, &ordered->work); } /* @@ -10119,8 +10106,7 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode init_completion(&work->completion); INIT_LIST_HEAD(&work->list); work->inode = inode; - btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, - btrfs_run_delalloc_work, NULL, NULL); + btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); return work; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 24b6c72b9a59..6240a5a1f2c0 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -547,7 +547,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, spin_unlock(&root->ordered_extent_lock); btrfs_init_work(&ordered->flush_work, - btrfs_flush_delalloc_helper, btrfs_run_ordered_extent_work, NULL, NULL); list_add_tail(&ordered->work_list, &works); btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 3ad151655eb8..27a903aaf43b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3280,7 +3280,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, memset(&fs_info->qgroup_rescan_work, 0, sizeof(fs_info->qgroup_rescan_work)); btrfs_init_work(&fs_info->qgroup_rescan_work, - btrfs_qgroup_rescan_helper, btrfs_qgroup_rescan_worker, NULL, NULL); return 0; } diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 57a2ac721985..8f47a85944eb 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -190,7 +190,7 @@ static void scrub_parity_work(struct btrfs_work *work); static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func) { - btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL); + btrfs_init_work(&rbio->work, work_func, NULL, NULL); btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); } @@ -1743,8 +1743,7 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) plug = container_of(cb, struct btrfs_plug_cb, cb); if (from_schedule) { - btrfs_init_work(&plug->work, btrfs_rmw_helper, - unplug_work, NULL, NULL); + btrfs_init_work(&plug->work, unplug_work, NULL, NULL); btrfs_queue_work(plug->info->rmw_workers, &plug->work); return; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index dd4f9c2b7107..1feaeadc8cf5 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -819,8 +819,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) /* FIXME we cannot handle this properly right now */ BUG(); } - btrfs_init_work(&rmw->work, btrfs_readahead_helper, - reada_start_machine_worker, NULL, NULL); + btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); rmw->fs_info = fs_info; btrfs_queue_work(fs_info->readahead_workers, &rmw->work); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a0770a6aee00..a7b043fd7a57 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -598,8 +598,8 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx( sbio->index = i; sbio->sctx = sctx; sbio->page_count = 0; - btrfs_init_work(&sbio->work, btrfs_scrub_helper, - scrub_bio_end_io_worker, NULL, NULL); + btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL, + NULL); if (i != SCRUB_BIOS_PER_SCTX - 1) sctx->bios[i]->next_free = i + 1; @@ -1720,8 +1720,7 @@ static void scrub_wr_bio_end_io(struct bio *bio) sbio->status = bio->bi_status; sbio->bio = bio; - btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, - scrub_wr_bio_end_io_worker, NULL, NULL); + btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); } @@ -2203,8 +2202,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) raid56_add_scrub_pages(rbio, spage->page, spage->logical); } - btrfs_init_work(&sblock->work, btrfs_scrub_helper, - scrub_missing_raid56_worker, NULL, NULL); + btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL); scrub_block_get(sblock); scrub_pending_bio_inc(sctx); raid56_submit_missing_rbio(rbio); @@ -2742,8 +2740,8 @@ static void scrub_parity_bio_endio(struct bio *bio) bio_put(bio); - btrfs_init_work(&sparity->work, btrfs_scrubparity_helper, - scrub_parity_bio_endio_worker, NULL, NULL); + btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL, + NULL); btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e04409f85063..d8d7b1ee83ca 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6676,8 +6676,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, else generate_random_uuid(dev->uuid); - btrfs_init_work(&dev->work, btrfs_submit_helper, - pending_bios_fn, NULL, NULL); + btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); return dev; } From 4e1269e147980f9f765c4d049058b2e8b000d196 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 10 Jul 2019 12:28:16 -0700 Subject: [PATCH 179/191] Btrfs: only associate the locked page with one async_chunk struct [ Upstream commit 1d53c9e6723022b12e4a5ed4b141f67c834b7f6f ] The btrfs writepages function collects a large range of pages flagged for delayed allocation, and then sends them down through the COW code for processing. When compression is on, we allocate one async_chunk structure for every 512K, and then run those pages through the compression code for IO submission. writepages starts all of this off with a single page, locked by the original call to extent_write_cache_pages(), and it's important to keep track of this page because it has already been through clear_page_dirty_for_io(). The btrfs async_chunk struct has a pointer to the locked_page, and when we're redirtying the page because compression had to fallback to uncompressed IO, we use page->index to decide if a given async_chunk struct really owns that page. But, this is racey. If a given delalloc range is broken up into two async_chunks (chunkA and chunkB), we can end up with something like this: compress_file_range(chunkA) submit_compress_extents(chunkA) submit compressed bios(chunkA) put_page(locked_page) compress_file_range(chunkB) ... Or: async_cow_submit submit_compressed_extents <--- falls back to buffered writeout cow_file_range extent_clear_unlock_delalloc __process_pages_contig put_page(locked_pages) async_cow_submit The end result is that chunkA is completed and cleaned up before chunkB even starts processing. This means we can free locked_page() and reuse it elsewhere. If we get really lucky, it'll have the same page->index in its new home as it did before. While we're processing chunkB, we might decide we need to fall back to uncompressed IO, and so compress_file_range() will call __set_page_dirty_nobufers() on chunkB->locked_page. Without cgroups in use, this creates as a phantom dirty page, which isn't great but isn't the end of the world. What can happen, it can go through the fixup worker and the whole COW machinery again: in submit_compressed_extents(): while (async extents) { ... cow_file_range if (!page_started ...) extent_write_locked_range else if (...) unlock_page continue; This hasn't been observed in practice but is still possible. With cgroups in use, we might crash in the accounting code because page->mapping->i_wb isn't set. BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0 IP: percpu_counter_add_batch+0x11/0x70 PGD 66534e067 P4D 66534e067 PUD 66534f067 PMD 0 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC CPU: 16 PID: 2172 Comm: rm Not tainted RIP: 0010:percpu_counter_add_batch+0x11/0x70 RSP: 0018:ffffc9000a97bbe0 EFLAGS: 00010286 RAX: 0000000000000005 RBX: 0000000000000090 RCX: 0000000000026115 RDX: 0000000000000030 RSI: ffffffffffffffff RDI: 0000000000000090 RBP: 0000000000000000 R08: fffffffffffffff5 R09: 0000000000000000 R10: 00000000000260c0 R11: ffff881037fc26c0 R12: ffffffffffffffff R13: ffff880fe4111548 R14: ffffc9000a97bc90 R15: 0000000000000001 FS: 00007f5503ced480(0000) GS:ffff880ff7200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000d0 CR3: 00000001e0459005 CR4: 0000000000360ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: account_page_cleaned+0x15b/0x1f0 __cancel_dirty_page+0x146/0x200 truncate_cleanup_page+0x92/0xb0 truncate_inode_pages_range+0x202/0x7d0 btrfs_evict_inode+0x92/0x5a0 evict+0xc1/0x190 do_unlinkat+0x176/0x280 do_syscall_64+0x63/0x1a0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 The fix here is to make asyc_chunk->locked_page NULL everywhere but the one async_chunk struct that's allowed to do things to the locked page. Link: https://lore.kernel.org/linux-btrfs/c2419d01-5c84-3fb4-189e-4db519d08796@suse.com/ Fixes: 771ed689d2cd ("Btrfs: Optimize compressed writeback and reads") Reviewed-by: Josef Bacik Signed-off-by: Chris Mason [ update changelog from mail thread discussion ] Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent_io.c | 2 +- fs/btrfs/inode.c | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index be9dc78aa727..33c6b191ca59 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1899,7 +1899,7 @@ static int __process_pages_contig(struct address_space *mapping, if (page_ops & PAGE_SET_PRIVATE2) SetPagePrivate2(pages[i]); - if (pages[i] == locked_page) { + if (locked_page && pages[i] == locked_page) { put_page(pages[i]); pages_locked++; continue; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dc14fc2e4206..0b2758961b1c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -712,10 +712,12 @@ cleanup_and_bail_uncompressed: * to our extent and set things up for the async work queue to run * cow_file_range to do the normal delalloc dance. */ - if (page_offset(async_chunk->locked_page) >= start && - page_offset(async_chunk->locked_page) <= end) + if (async_chunk->locked_page && + (page_offset(async_chunk->locked_page) >= start && + page_offset(async_chunk->locked_page)) <= end) { __set_page_dirty_nobuffers(async_chunk->locked_page); /* unlocked later on in the async handlers */ + } if (redirty) extent_range_redirty_for_io(inode, start, end); @@ -795,7 +797,7 @@ retry: async_extent->start + async_extent->ram_size - 1, WB_SYNC_ALL); - else if (ret) + else if (ret && async_chunk->locked_page) unlock_page(async_chunk->locked_page); kfree(async_extent); cond_resched(); @@ -1264,10 +1266,25 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_chunk[i].inode = inode; async_chunk[i].start = start; async_chunk[i].end = cur_end; - async_chunk[i].locked_page = locked_page; async_chunk[i].write_flags = write_flags; INIT_LIST_HEAD(&async_chunk[i].extents); + /* + * The locked_page comes all the way from writepage and its + * the original page we were actually given. As we spread + * this large delalloc region across multiple async_chunk + * structs, only the first struct needs a pointer to locked_page + * + * This way we don't need racey decisions about who is supposed + * to unlock it. + */ + if (locked_page) { + async_chunk[i].locked_page = locked_page; + locked_page = NULL; + } else { + async_chunk[i].locked_page = NULL; + } + btrfs_init_work(&async_chunk[i].work, async_cow_start, async_cow_submit, async_cow_free); From 536d7fa7fb10aa72b604dccfa0a0732a74e80d00 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 17 Nov 2019 14:55:38 +0100 Subject: [PATCH 180/191] s390/smp: fix physical to logical CPU map for SMT [ Upstream commit 72a81ad9d6d62dcb79f7e8ad66ffd1c768b72026 ] If an SMT capable system is not IPL'ed from the first CPU the setup of the physical to logical CPU mapping is broken: the IPL core gets CPU number 0, but then the next core gets CPU number 1. Correct would be that all SMT threads of CPU 0 get the subsequent logical CPU numbers. This is important since a lot of code (like e.g. the CPU topology code) assumes that CPU maps are setup like this. If the mapping is broken the system will not IPL due to broken topology masks: [ 1.716341] BUG: arch topology broken [ 1.716342] the SMT domain not a subset of the MC domain [ 1.716343] BUG: arch topology broken [ 1.716344] the MC domain not a subset of the BOOK domain This scenario can usually not happen since LPARs are always IPL'ed from CPU 0 and also re-IPL is intiated from CPU 0. However older kernels did initiate re-IPL on an arbitrary CPU. If therefore a re-IPL from an old kernel into a new kernel is initiated this may lead to crash. Fix this by setting up the physical to logical CPU mapping correctly. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/smp.c | 80 ++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index d95c85780e07..06dddd7c4290 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -727,39 +727,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) static int smp_add_present_cpu(int cpu); -static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) +static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, + bool configured, bool early) { struct pcpu *pcpu; - cpumask_t avail; - int cpu, nr, i, j; + int cpu, nr, i; u16 address; nr = 0; - cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - cpu = cpumask_first(&avail); - for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (sclp.has_core_type && info->core[i].type != boot_core_type) + if (sclp.has_core_type && core->type != boot_core_type) + return nr; + cpu = cpumask_first(avail); + address = core->core_id << smp_cpu_mt_shift; + for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { + if (pcpu_find_address(cpu_present_mask, address + i)) continue; - address = info->core[i].core_id << smp_cpu_mt_shift; - for (j = 0; j <= smp_cpu_mtid; j++) { - if (pcpu_find_address(cpu_present_mask, address + j)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = address + j; - pcpu->state = - (cpu >= info->configured*(smp_cpu_mtid + 1)) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); - if (cpu >= nr_cpu_ids) + pcpu = pcpu_devices + cpu; + pcpu->address = address + i; + if (configured) + pcpu->state = CPU_STATE_CONFIGURED; + else + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (!early && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpumask_clear_cpu(cpu, avail); + cpu = cpumask_next(cpu, avail); + } + return nr; +} + +static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) +{ + struct sclp_core_entry *core; + cpumask_t avail; + bool configured; + u16 core_id; + int nr, i; + + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + /* + * Add IPL core first (which got logical CPU number 0) to make sure + * that all SMT threads get subsequent logical CPU numbers. + */ + if (early) { + core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + for (i = 0; i < info->configured; i++) { + core = &info->core[i]; + if (core->core_id == core_id) { + nr += smp_add_core(core, &avail, true, early); break; + } } } + for (i = 0; i < info->combined; i++) { + configured = i < info->configured; + nr += smp_add_core(&info->core[i], &avail, configured, early); + } return nr; } @@ -808,7 +836,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ get_online_cpus(); - __smp_rescan_cpus(info, 0); + __smp_rescan_cpus(info, true); put_online_cpus(); memblock_free_early((unsigned long)info, sizeof(*info)); } @@ -1153,7 +1181,7 @@ int __ref smp_rescan_cpus(void) smp_get_core_info(info, 0); get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - nr = __smp_rescan_cpus(info, 1); + nr = __smp_rescan_cpus(info, false); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); kfree(info); From 5e71be1a60d881a4417b8f619d45e3ccfa7e548a Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Sat, 30 Nov 2019 17:54:24 -0800 Subject: [PATCH 181/191] mm/sparse.c: mark populate_section_memmap as __meminit [ Upstream commit 030eab4f9ffb469344c10a46bc02c5149db0a2a9 ] Building the kernel on s390 with -Og produces the following warning: WARNING: vmlinux.o(.text+0x28dabe): Section mismatch in reference from the function populate_section_memmap() to the function .meminit.text:__populate_section_memmap() The function populate_section_memmap() references the function __meminit __populate_section_memmap(). This is often because populate_section_memmap lacks a __meminit annotation or the annotation of __populate_section_memmap is wrong. While -Og is not supported, in theory this might still happen with another compiler or on another architecture. So fix this by using the correct section annotations. [iii@linux.ibm.com: v2] Link: http://lkml.kernel.org/r/20191030151639.41486-1-iii@linux.ibm.com Link: http://lkml.kernel.org/r/20191028165549.14478-1-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Acked-by: David Hildenbrand Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/sparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/sparse.c b/mm/sparse.c index f6891c1992b1..c2c01b6330af 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -647,7 +647,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static struct page *populate_section_memmap(unsigned long pfn, +static struct page * __meminit populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap) { return __populate_section_memmap(pfn, nr_pages, nid, altmap); @@ -669,7 +669,7 @@ static void free_map_bootmem(struct page *memmap) vmemmap_free(start, end, NULL); } #else -struct page *populate_section_memmap(unsigned long pfn, +struct page * __meminit populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap) { struct page *page, *ret; From 50de69fd6e25a27998915cfd89aa95b362d6d189 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 26 Nov 2019 16:36:05 +0100 Subject: [PATCH 182/191] xen/blkback: Avoid unmapping unmapped grant pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f9bd84a8a845d82f9b5a081a7ae68c98a11d2e84 ] For each I/O request, blkback first maps the foreign pages for the request to its local pages. If an allocation of a local page for the mapping fails, it should unmap every mapping already made for the request. However, blkback's handling mechanism for the allocation failure does not mark the remaining foreign pages as unmapped. Therefore, the unmap function merely tries to unmap every valid grant page for the request, including the pages not mapped due to the allocation failure. On a system that fails the allocation frequently, this problem leads to following kernel crash. [ 372.012538] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 [ 372.012546] IP: [] gnttab_unmap_refs.part.7+0x1c/0x40 [ 372.012557] PGD 16f3e9067 PUD 16426e067 PMD 0 [ 372.012562] Oops: 0002 [#1] SMP [ 372.012566] Modules linked in: act_police sch_ingress cls_u32 ... [ 372.012746] Call Trace: [ 372.012752] [] gnttab_unmap_refs+0x34/0x40 [ 372.012759] [] xen_blkbk_unmap+0x83/0x150 [xen_blkback] ... [ 372.012802] [] dispatch_rw_block_io+0x970/0x980 [xen_blkback] ... Decompressing Linux... Parsing ELF... done. Booting the kernel. [ 0.000000] Initializing cgroup subsys cpuset This commit fixes this problem by marking the grant pages of the given request that didn't mapped due to the allocation failure as invalid. Fixes: c6cc142dac52 ("xen-blkback: use balloon pages for all mappings") Reviewed-by: David Woodhouse Reviewed-by: Maximilian Heyne Reviewed-by: Paul Durrant Reviewed-by: Roger Pau Monné Signed-off-by: SeongJae Park Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/xen-blkback/blkback.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index fd1e19f1a49f..3666afa639d1 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -936,6 +936,8 @@ next: out_of_memory: pr_alert("%s: out of memory\n", __func__); put_free_pages(ring, pages_to_gnt, segs_to_map); + for (i = last_map; i < num; i++) + pages[i]->handle = BLKBACK_INVALID_HANDLE; return -ENOMEM; } From 735e7a12a639c6c196ac1acf41f024f84281e491 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 4 Dec 2019 16:52:53 -0800 Subject: [PATCH 183/191] lib/ubsan: don't serialize UBSAN report [ Upstream commit ce5c31db3645b649a31044a4d8b6057f6c723702 ] At the moment, UBSAN report will be serialized using a spin_lock(). On RT-systems, spinlocks are turned to rt_spin_lock and may sleep. This will result to the following splat if the undefined behavior is in a context that can sleep: BUG: sleeping function called from invalid context at /src/linux/kernel/locking/rtmutex.c:968 in_atomic(): 1, irqs_disabled(): 128, pid: 3447, name: make 1 lock held by make/3447: #0: 000000009a966332 (&mm->mmap_sem){++++}, at: do_page_fault+0x140/0x4f8 irq event stamp: 6284 hardirqs last enabled at (6283): [] _raw_spin_unlock_irqrestore+0x90/0xa0 hardirqs last disabled at (6284): [] _raw_spin_lock_irqsave+0x30/0x78 softirqs last enabled at (2430): [] fpsimd_restore_current_state+0x60/0xe8 softirqs last disabled at (2427): [] fpsimd_restore_current_state+0x28/0xe8 Preemption disabled at: [] rt_mutex_futex_unlock+0x4c/0xb0 CPU: 3 PID: 3447 Comm: make Tainted: G W 5.2.14-rt7-01890-ge6e057589653 #911 Call trace: dump_backtrace+0x0/0x148 show_stack+0x14/0x20 dump_stack+0xbc/0x104 ___might_sleep+0x154/0x210 rt_spin_lock+0x68/0xa0 ubsan_prologue+0x30/0x68 handle_overflow+0x64/0xe0 __ubsan_handle_add_overflow+0x10/0x18 __lock_acquire+0x1c28/0x2a28 lock_acquire+0xf0/0x370 _raw_spin_lock_irqsave+0x58/0x78 rt_mutex_futex_unlock+0x4c/0xb0 rt_spin_unlock+0x28/0x70 get_page_from_freelist+0x428/0x2b60 __alloc_pages_nodemask+0x174/0x1708 alloc_pages_vma+0x1ac/0x238 __handle_mm_fault+0x4ac/0x10b0 handle_mm_fault+0x1d8/0x3b0 do_page_fault+0x1c8/0x4f8 do_translation_fault+0xb8/0xe0 do_mem_abort+0x3c/0x98 el0_da+0x20/0x24 The spin_lock() will protect against multiple CPUs to output a report together, I guess to prevent them from being interleaved. However, they can still interleave with other messages (and even splat from __might_sleep). So the lock usefulness seems pretty limited. Rather than trying to accomodate RT-system by switching to a raw_spin_lock(), the lock is now completely dropped. Link: http://lkml.kernel.org/r/20190920100835.14999-1-julien.grall@arm.com Signed-off-by: Julien Grall Reported-by: Andre Przywara Acked-by: Andrey Ryabinin Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/ubsan.c | 64 +++++++++++++++++++---------------------------------- 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 0c4681118fcd..f007a406f89c 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -140,25 +140,21 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type, } } -static DEFINE_SPINLOCK(report_lock); - -static void ubsan_prologue(struct source_location *location, - unsigned long *flags) +static void ubsan_prologue(struct source_location *location) { current->in_ubsan++; - spin_lock_irqsave(&report_lock, *flags); pr_err("========================================" "========================================\n"); print_source_location("UBSAN: Undefined behaviour in", location); } -static void ubsan_epilogue(unsigned long *flags) +static void ubsan_epilogue(void) { dump_stack(); pr_err("========================================" "========================================\n"); - spin_unlock_irqrestore(&report_lock, *flags); + current->in_ubsan--; } @@ -167,14 +163,13 @@ static void handle_overflow(struct overflow_data *data, void *lhs, { struct type_descriptor *type = data->type; - unsigned long flags; char lhs_val_str[VALUE_LENGTH]; char rhs_val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs); val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs); @@ -186,7 +181,7 @@ static void handle_overflow(struct overflow_data *data, void *lhs, rhs_val_str, type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } void __ubsan_handle_add_overflow(struct overflow_data *data, @@ -214,20 +209,19 @@ EXPORT_SYMBOL(__ubsan_handle_mul_overflow); void __ubsan_handle_negate_overflow(struct overflow_data *data, void *old_val) { - unsigned long flags; char old_val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val); pr_err("negation of %s cannot be represented in type %s:\n", old_val_str, data->type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_negate_overflow); @@ -235,13 +229,12 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow); void __ubsan_handle_divrem_overflow(struct overflow_data *data, void *lhs, void *rhs) { - unsigned long flags; char rhs_val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs); @@ -251,58 +244,52 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data, else pr_err("division by zero\n"); - ubsan_epilogue(&flags); + ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_divrem_overflow); static void handle_null_ptr_deref(struct type_mismatch_data_common *data) { - unsigned long flags; - if (suppress_report(data->location)) return; - ubsan_prologue(data->location, &flags); + ubsan_prologue(data->location); pr_err("%s null pointer of type %s\n", type_check_kinds[data->type_check_kind], data->type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } static void handle_misaligned_access(struct type_mismatch_data_common *data, unsigned long ptr) { - unsigned long flags; - if (suppress_report(data->location)) return; - ubsan_prologue(data->location, &flags); + ubsan_prologue(data->location); pr_err("%s misaligned address %p for type %s\n", type_check_kinds[data->type_check_kind], (void *)ptr, data->type->type_name); pr_err("which requires %ld byte alignment\n", data->alignment); - ubsan_epilogue(&flags); + ubsan_epilogue(); } static void handle_object_size_mismatch(struct type_mismatch_data_common *data, unsigned long ptr) { - unsigned long flags; - if (suppress_report(data->location)) return; - ubsan_prologue(data->location, &flags); + ubsan_prologue(data->location); pr_err("%s address %p with insufficient space\n", type_check_kinds[data->type_check_kind], (void *) ptr); pr_err("for an object of type %s\n", data->type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, @@ -351,25 +338,23 @@ EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index) { - unsigned long flags; char index_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(index_str, sizeof(index_str), data->index_type, index); pr_err("index %s is out of range for type %s\n", index_str, data->array_type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_out_of_bounds); void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, void *lhs, void *rhs) { - unsigned long flags; struct type_descriptor *rhs_type = data->rhs_type; struct type_descriptor *lhs_type = data->lhs_type; char rhs_str[VALUE_LENGTH]; @@ -379,7 +364,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, if (suppress_report(&data->location)) goto out; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(rhs_str, sizeof(rhs_str), rhs_type, rhs); val_to_string(lhs_str, sizeof(lhs_str), lhs_type, lhs); @@ -402,7 +387,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, lhs_str, rhs_str, lhs_type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); out: user_access_restore(ua_flags); } @@ -411,11 +396,9 @@ EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds); void __ubsan_handle_builtin_unreachable(struct unreachable_data *data) { - unsigned long flags; - - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); pr_err("calling __builtin_unreachable()\n"); - ubsan_epilogue(&flags); + ubsan_epilogue(); panic("can't return from __builtin_unreachable()"); } EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable); @@ -423,19 +406,18 @@ EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable); void __ubsan_handle_load_invalid_value(struct invalid_value_data *data, void *val) { - unsigned long flags; char val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(&data->location); val_to_string(val_str, sizeof(val_str), data->type, val); pr_err("load of value %s is not a valid value for type %s\n", val_str, data->type->type_name); - ubsan_epilogue(&flags); + ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_load_invalid_value); From 618ab2e3c1ee713517b881b5038a9a1cef51aa7f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 10 Dec 2019 10:09:45 +0100 Subject: [PATCH 184/191] efi: Don't attempt to map RCI2 config table if it doesn't exist [ Upstream commit a470552ee8965da0fe6fd4df0aa39c4cda652c7c ] Commit: 1c5fecb61255aa12 ("efi: Export Runtime Configuration Interface table to sysfs") ... added support for a Dell specific UEFI configuration table, but failed to take into account that mapping the table should not be attempted unless the table actually exists. If it doesn't exist, the code usually fails silently unless pr_debug() prints are enabled. However, on 32-bit PAE x86, the splat below is produced due to the attempt to map the placeholder value EFI_INVALID_TABLE_ADDR which we use for non-existing UEFI configuration tables, and which equals ULONG_MAX. memremap attempted on mixed range 0x00000000ffffffff size: 0x1e WARNING: CPU: 1 PID: 1 at kernel/iomem.c:81 memremap+0x1a3/0x1c0 Modules linked in: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.4.2-smp-mine #1 Hardware name: Hewlett-Packard HP Z400 Workstation/0B4Ch, BIOS 786G3 v03.61 03/05/2018 EIP: memremap+0x1a3/0x1c0 ... Call Trace: ? map_properties+0x473/0x473 ? efi_rci2_sysfs_init+0x2c/0x154 ? map_properties+0x473/0x473 ? do_one_initcall+0x49/0x1d4 ? parse_args+0x1e8/0x2a0 ? do_early_param+0x7a/0x7a ? kernel_init_freeable+0x139/0x1c2 ? rest_init+0x8e/0x8e ? kernel_init+0xd/0xf2 ? ret_from_fork+0x2e/0x38 Fix this by checking whether the table exists before attempting to map it. Reported-by: Richard Narron Tested-by: Richard Narron Signed-off-by: Ard Biesheuvel Cc: linux-efi@vger.kernel.org Fixes: 1c5fecb61255aa12 ("efi: Export Runtime Configuration Interface table to sysfs") Link: https://lkml.kernel.org/r/20191210090945.11501-2-ardb@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- drivers/firmware/efi/rci2-table.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c index 76b0c354a027..de1a9a1f9f14 100644 --- a/drivers/firmware/efi/rci2-table.c +++ b/drivers/firmware/efi/rci2-table.c @@ -81,6 +81,9 @@ static int __init efi_rci2_sysfs_init(void) struct kobject *tables_kobj; int ret = -ENOMEM; + if (rci2_table_phys == EFI_INVALID_TABLE_ADDR) + return 0; + rci2_base = memremap(rci2_table_phys, sizeof(struct rci2_table_global_hdr), MEMREMAP_WB); From 2c446b34afb071add598c3a8acff53b13c9e5df9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 5 Dec 2019 17:28:52 +0300 Subject: [PATCH 185/191] perf/x86/intel/bts: Fix the use of page_private() [ Upstream commit ff61541cc6c1962957758ba433c574b76f588d23 ] Commit 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver") brought in a warning with the BTS buffer initialization that is easily tripped with (assuming KPTI is disabled): instantly throwing: > ------------[ cut here ]------------ > WARNING: CPU: 2 PID: 326 at arch/x86/events/intel/bts.c:86 bts_buffer_setup_aux+0x117/0x3d0 > Modules linked in: > CPU: 2 PID: 326 Comm: perf Not tainted 5.4.0-rc8-00291-gceb9e77324fa #904 > RIP: 0010:bts_buffer_setup_aux+0x117/0x3d0 > Call Trace: > rb_alloc_aux+0x339/0x550 > perf_mmap+0x607/0xc70 > mmap_region+0x76b/0xbd0 ... It appears to assume (for lost raisins) that PagePrivate() is set, while later it actually tests for PagePrivate() before using page_private(). Make it consistent and always check PagePrivate() before using page_private(). Fixes: 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver") Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Jiri Olsa Cc: Vince Weaver Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Link: https://lkml.kernel.org/r/20191205142853.28894-2-alexander.shishkin@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/events/intel/bts.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 5ee3fed881d3..741540d849f3 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -63,9 +63,17 @@ struct bts_buffer { static struct pmu bts_pmu; +static int buf_nr_pages(struct page *page) +{ + if (!PagePrivate(page)) + return 1; + + return 1 << page_private(page); +} + static size_t buf_size(struct page *page) { - return 1 << (PAGE_SHIFT + page_private(page)); + return buf_nr_pages(page) * PAGE_SIZE; } static void * @@ -83,9 +91,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, /* count all the high order buffers */ for (pg = 0, nbuf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); - if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1)) - return NULL; - pg += 1 << page_private(page); + pg += buf_nr_pages(page); nbuf++; } @@ -109,7 +115,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, unsigned int __nr_pages; page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; + __nr_pages = buf_nr_pages(page); buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); From 8f8e806c51925abae56007ad2560a3356a6de52e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Dec 2019 18:51:03 -0800 Subject: [PATCH 186/191] net: annotate lockless accesses to sk->sk_pacing_shift [ Upstream commit 7c68fa2bddda6d942bd387c9ba5b4300737fd991 ] sk->sk_pacing_shift can be read and written without lock synchronization. This patch adds annotations to document this fact and avoid future syzbot complains. This might also avoid unexpected false sharing in sk_pacing_shift_update(), as the compiler could remove the conditional check and always write over sk->sk_pacing_shift : if (sk->sk_pacing_shift != val) sk->sk_pacing_shift = val; Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/sock.h | 4 ++-- net/core/sock.c | 2 +- net/ipv4/tcp_bbr.c | 3 ++- net/ipv4/tcp_output.c | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index e09e2886a836..6c5a3809483e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2589,9 +2589,9 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) */ static inline void sk_pacing_shift_update(struct sock *sk, int val) { - if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) + if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val) return; - sk->sk_pacing_shift = val; + WRITE_ONCE(sk->sk_pacing_shift, val); } /* if a socket is bound to a device, check that the given device diff --git a/net/core/sock.c b/net/core/sock.c index ac78a570e43a..b4d1112174c1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2918,7 +2918,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_max_pacing_rate = ~0UL; sk->sk_pacing_rate = ~0UL; - sk->sk_pacing_shift = 10; + WRITE_ONCE(sk->sk_pacing_shift, 10); sk->sk_incoming_cpu = -1; sk_rx_queue_clear(sk); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 32772d6ded4e..a6545ef0d27b 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -306,7 +306,8 @@ static u32 bbr_tso_segs_goal(struct sock *sk) /* Sort of tcp_tso_autosize() but ignoring * driver provided sk_gso_max_size. */ - bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift, + bytes = min_t(unsigned long, + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0269584e9cf7..e4ba915c4bb5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1728,7 +1728,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, u32 bytes, segs; bytes = min_t(unsigned long, - sk->sk_pacing_rate >> sk->sk_pacing_shift, + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); /* Goal is to send at least one packet per ms, @@ -2263,7 +2263,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = max_t(unsigned long, 2 * skb->truesize, - sk->sk_pacing_rate >> sk->sk_pacing_shift); + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); From 9fa51bbdf7c241f969d3895e07ab45657fb5cc10 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 22 Dec 2019 11:25:27 +0000 Subject: [PATCH 187/191] hsr: avoid debugfs warning message when module is remove [ Upstream commit 84bb59d773853bc2dda2ac1ef8474c40eb33a3c6 ] When hsr module is being removed, debugfs_remove() is called to remove both debugfs directory and file. When module is being removed, module state is changed to MODULE_STATE_GOING then exit() is called. At this moment, module couldn't be held so try_module_get() will be failed. debugfs's open() callback tries to hold the module if .owner is existing. If it fails, warning message is printed. CPU0 CPU1 delete_module() try_stop_module() hsr_exit() open() <-- WARNING debugfs_remove() In order to avoid the warning message, this patch makes hsr module does not set .owner. Unsetting .owner is safe because these are protected by inode_lock(). Test commands: #SHELL1 ip link add dummy0 type dummy ip link add dummy1 type dummy while : do ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1 modprobe -rv hsr done #SHELL2 while : do cat /sys/kernel/debug/hsr0/node_table done Splat looks like: [ 101.223783][ T1271] ------------[ cut here ]------------ [ 101.230309][ T1271] debugfs file owner did not clean up at exit: node_table [ 101.230380][ T1271] WARNING: CPU: 3 PID: 1271 at fs/debugfs/file.c:309 full_proxy_open+0x10f/0x650 [ 101.233153][ T1271] Modules linked in: hsr(-) dummy veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_d] [ 101.237112][ T1271] CPU: 3 PID: 1271 Comm: cat Tainted: G W 5.5.0-rc1+ #204 [ 101.238270][ T1271] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 101.240379][ T1271] RIP: 0010:full_proxy_open+0x10f/0x650 [ 101.241166][ T1271] Code: 48 c1 ea 03 80 3c 02 00 0f 85 c1 04 00 00 49 8b 3c 24 e8 04 86 7e ff 84 c0 75 2d 4c 8 [ 101.251985][ T1271] RSP: 0018:ffff8880ca22fa38 EFLAGS: 00010286 [ 101.273355][ T1271] RAX: dffffc0000000008 RBX: ffff8880cc6e6200 RCX: 0000000000000000 [ 101.274466][ T1271] RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffff8880c4dd5c14 [ 101.275581][ T1271] RBP: 0000000000000000 R08: fffffbfff2922f5d R09: 0000000000000000 [ 101.276733][ T1271] R10: 0000000000000001 R11: 0000000000000000 R12: ffffffffc0551bc0 [ 101.277853][ T1271] R13: ffff8880c4059a48 R14: ffff8880be50a5e0 R15: ffffffff941adaa0 [ 101.278956][ T1271] FS: 00007f8871cda540(0000) GS:ffff8880da800000(0000) knlGS:0000000000000000 [ 101.280216][ T1271] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 101.282832][ T1271] CR2: 00007f88717cfd10 CR3: 00000000b9440005 CR4: 00000000000606e0 [ 101.283974][ T1271] Call Trace: [ 101.285328][ T1271] do_dentry_open+0x63c/0xf50 [ 101.286077][ T1271] ? open_proxy_open+0x270/0x270 [ 101.288271][ T1271] ? __x64_sys_fchdir+0x180/0x180 [ 101.288987][ T1271] ? inode_permission+0x65/0x390 [ 101.289682][ T1271] path_openat+0x701/0x2810 [ 101.290294][ T1271] ? path_lookupat+0x880/0x880 [ 101.290957][ T1271] ? check_chain_key+0x236/0x5d0 [ 101.291676][ T1271] ? __lock_acquire+0xdfe/0x3de0 [ 101.292358][ T1271] ? sched_clock+0x5/0x10 [ 101.292962][ T1271] ? sched_clock_cpu+0x18/0x170 [ 101.293644][ T1271] ? find_held_lock+0x39/0x1d0 [ 101.305616][ T1271] do_filp_open+0x17a/0x270 [ 101.306061][ T1271] ? may_open_dev+0xc0/0xc0 [ ... ] Fixes: fc4ecaeebd26 ("net: hsr: add debugfs support for display node list") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/hsr/hsr_debugfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index 94447974a3c0..6135706f03d5 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -64,7 +64,6 @@ hsr_node_table_open(struct inode *inode, struct file *filp) } static const struct file_operations hsr_fops = { - .owner = THIS_MODULE, .open = hsr_node_table_open, .read = seq_read, .llseek = seq_lseek, From fe974fba4ee5da900e30d2d5b9e80b4a4e137186 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 22 Dec 2019 11:26:15 +0000 Subject: [PATCH 188/191] hsr: fix error handling routine in hsr_dev_finalize() [ Upstream commit 1d19e2d53e8ed9e4c98fc95e0067492cda7288b0 ] hsr_dev_finalize() is called to create new hsr interface. There are some wrong error handling codes. 1. wrong checking return value of debugfs_create_{dir/file}. These function doesn't return NULL. If error occurs in there, it returns error pointer. So, it should check error pointer instead of NULL. 2. It doesn't unregister interface if it fails to setup hsr interface. If it fails to initialize hsr interface after register_netdevice(), it should call unregister_netdevice(). 3. Ignore failure of creation of debugfs If creating of debugfs dir and file is failed, creating hsr interface will be failed. But debugfs doesn't affect actual logic of hsr module. So, ignoring this is more correct and this behavior is more general. Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/hsr/hsr_debugfs.c | 15 +++++++-------- net/hsr/hsr_device.c | 19 ++++++++++--------- net/hsr/hsr_main.h | 11 ++++------- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index 6135706f03d5..6618a9d8e58e 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -77,15 +77,14 @@ static const struct file_operations hsr_fops = { * When debugfs is configured this routine sets up the node_table file per * hsr device for dumping the node_table entries */ -int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) +void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) { - int rc = -1; struct dentry *de = NULL; de = debugfs_create_dir(hsr_dev->name, NULL); - if (!de) { + if (IS_ERR(de)) { pr_err("Cannot create hsr debugfs root\n"); - return rc; + return; } priv->node_tbl_root = de; @@ -93,13 +92,13 @@ int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) de = debugfs_create_file("node_table", S_IFREG | 0444, priv->node_tbl_root, priv, &hsr_fops); - if (!de) { + if (IS_ERR(de)) { pr_err("Cannot create hsr node_table directory\n"); - return rc; + debugfs_remove(priv->node_tbl_root); + priv->node_tbl_root = NULL; + return; } priv->node_tbl_file = de; - - return 0; } /* hsr_debugfs_term - Tear down debugfs intrastructure diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index b01e1bae4ddc..e73549075a03 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -477,30 +477,31 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); if (res) - goto err_add_port; + goto err_add_master; res = register_netdevice(hsr_dev); if (res) - goto fail; + goto err_unregister; res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A); if (res) - goto fail; + goto err_add_slaves; + res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B); if (res) - goto fail; + goto err_add_slaves; + hsr_debugfs_init(hsr, hsr_dev); mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); - res = hsr_debugfs_init(hsr, hsr_dev); - if (res) - goto fail; return 0; -fail: +err_add_slaves: + unregister_netdevice(hsr_dev); +err_unregister: list_for_each_entry_safe(port, tmp, &hsr->ports, port_list) hsr_del_port(port); -err_add_port: +err_add_master: hsr_del_self_node(&hsr->self_node_db); return res; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 96fac696a1e1..acab9c353a49 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -184,15 +184,12 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb) } #if IS_ENABLED(CONFIG_DEBUG_FS) -int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev); +void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev); void hsr_debugfs_term(struct hsr_priv *priv); #else -static inline int hsr_debugfs_init(struct hsr_priv *priv, - struct net_device *hsr_dev) -{ - return 0; -} - +static inline void hsr_debugfs_init(struct hsr_priv *priv, + struct net_device *hsr_dev) +{} static inline void hsr_debugfs_term(struct hsr_priv *priv) {} #endif From 0fc906e60301f79c5e49243e81754691d4170f02 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 22 Dec 2019 11:26:54 +0000 Subject: [PATCH 189/191] hsr: fix a race condition in node list insertion and deletion [ Upstream commit 92a35678ec075100ce666a2fb6969151affb0e5d ] hsr nodes are protected by RCU and there is no write side lock. But node insertions and deletions could be being operated concurrently. So write side locking is needed. Test commands: ip netns add nst ip link add veth0 type veth peer name veth1 ip link add veth2 type veth peer name veth3 ip link set veth1 netns nst ip link set veth3 netns nst ip link set veth0 up ip link set veth2 up ip link add hsr0 type hsr slave1 veth0 slave2 veth2 ip a a 192.168.100.1/24 dev hsr0 ip link set hsr0 up ip netns exec nst ip link set veth1 up ip netns exec nst ip link set veth3 up ip netns exec nst ip link add hsr1 type hsr slave1 veth1 slave2 veth3 ip netns exec nst ip a a 192.168.100.2/24 dev hsr1 ip netns exec nst ip link set hsr1 up for i in {0..9} do for j in {0..9} do for k in {0..9} do for l in {0..9} do arping 192.168.100.2 -I hsr0 -s 00:01:3$i:4$j:5$k:6$l -c1 & done done done done Splat looks like: [ 236.066091][ T3286] list_add corruption. next->prev should be prev (ffff8880a5940300), but was ffff8880a5940d0. [ 236.069617][ T3286] ------------[ cut here ]------------ [ 236.070545][ T3286] kernel BUG at lib/list_debug.c:25! [ 236.071391][ T3286] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 236.072343][ T3286] CPU: 0 PID: 3286 Comm: arping Tainted: G W 5.5.0-rc1+ #209 [ 236.073463][ T3286] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 236.074695][ T3286] RIP: 0010:__list_add_valid+0x74/0xd0 [ 236.075499][ T3286] Code: 48 39 da 75 27 48 39 f5 74 36 48 39 dd 74 31 48 83 c4 08 b8 01 00 00 00 5b 5d c3 48 b [ 236.078277][ T3286] RSP: 0018:ffff8880aaa97648 EFLAGS: 00010286 [ 236.086991][ T3286] RAX: 0000000000000075 RBX: ffff8880d4624c20 RCX: 0000000000000000 [ 236.088000][ T3286] RDX: 0000000000000075 RSI: 0000000000000008 RDI: ffffed1015552ebf [ 236.098897][ T3286] RBP: ffff88809b53d200 R08: ffffed101b3c04f9 R09: ffffed101b3c04f9 [ 236.099960][ T3286] R10: 00000000308769a1 R11: ffffed101b3c04f8 R12: ffff8880d4624c28 [ 236.100974][ T3286] R13: ffff8880d4624c20 R14: 0000000040310100 R15: ffff8880ce17ee02 [ 236.138967][ T3286] FS: 00007f23479fa680(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000 [ 236.144852][ T3286] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 236.145720][ T3286] CR2: 00007f4a14bab210 CR3: 00000000a61c6001 CR4: 00000000000606f0 [ 236.146776][ T3286] Call Trace: [ 236.147222][ T3286] hsr_add_node+0x314/0x490 [hsr] [ 236.153633][ T3286] hsr_forward_skb+0x2b6/0x1bc0 [hsr] [ 236.154362][ T3286] ? rcu_read_lock_sched_held+0x90/0xc0 [ 236.155091][ T3286] ? rcu_read_lock_bh_held+0xa0/0xa0 [ 236.156607][ T3286] hsr_dev_xmit+0x70/0xd0 [hsr] [ 236.157254][ T3286] dev_hard_start_xmit+0x160/0x740 [ 236.157941][ T3286] __dev_queue_xmit+0x1961/0x2e10 [ 236.158565][ T3286] ? netdev_core_pick_tx+0x2e0/0x2e0 [ ... ] Reported-by: syzbot+3924327f9ad5f4d2b343@syzkaller.appspotmail.com Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/hsr/hsr_device.c | 7 ++-- net/hsr/hsr_framereg.c | 73 ++++++++++++++++++++++++++---------------- net/hsr/hsr_framereg.h | 6 ++-- net/hsr/hsr_main.c | 2 +- net/hsr/hsr_main.h | 5 +-- 5 files changed, 56 insertions(+), 37 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e73549075a03..62c03f0d0079 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -368,7 +368,7 @@ static void hsr_dev_destroy(struct net_device *hsr_dev) del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->announce_timer); - hsr_del_self_node(&hsr->self_node_db); + hsr_del_self_node(hsr); hsr_del_nodes(&hsr->node_db); } @@ -440,11 +440,12 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], INIT_LIST_HEAD(&hsr->ports); INIT_LIST_HEAD(&hsr->node_db); INIT_LIST_HEAD(&hsr->self_node_db); + spin_lock_init(&hsr->list_lock); ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr); /* Make sure we recognize frames from ourselves in hsr_rcv() */ - res = hsr_create_self_node(&hsr->self_node_db, hsr_dev->dev_addr, + res = hsr_create_self_node(hsr, hsr_dev->dev_addr, slave[1]->dev_addr); if (res < 0) return res; @@ -502,7 +503,7 @@ err_unregister: list_for_each_entry_safe(port, tmp, &hsr->ports, port_list) hsr_del_port(port); err_add_master: - hsr_del_self_node(&hsr->self_node_db); + hsr_del_self_node(hsr); return res; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 292be446007b..27dc65d7de67 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -75,10 +75,11 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db, /* Helper for device init; the self_node_db is used in hsr_rcv() to recognize * frames from self that's been looped over the HSR ring. */ -int hsr_create_self_node(struct list_head *self_node_db, +int hsr_create_self_node(struct hsr_priv *hsr, unsigned char addr_a[ETH_ALEN], unsigned char addr_b[ETH_ALEN]) { + struct list_head *self_node_db = &hsr->self_node_db; struct hsr_node *node, *oldnode; node = kmalloc(sizeof(*node), GFP_KERNEL); @@ -88,33 +89,33 @@ int hsr_create_self_node(struct list_head *self_node_db, ether_addr_copy(node->macaddress_A, addr_a); ether_addr_copy(node->macaddress_B, addr_b); - rcu_read_lock(); + spin_lock_bh(&hsr->list_lock); oldnode = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); if (oldnode) { list_replace_rcu(&oldnode->mac_list, &node->mac_list); - rcu_read_unlock(); - synchronize_rcu(); - kfree(oldnode); + spin_unlock_bh(&hsr->list_lock); + kfree_rcu(oldnode, rcu_head); } else { - rcu_read_unlock(); list_add_tail_rcu(&node->mac_list, self_node_db); + spin_unlock_bh(&hsr->list_lock); } return 0; } -void hsr_del_self_node(struct list_head *self_node_db) +void hsr_del_self_node(struct hsr_priv *hsr) { + struct list_head *self_node_db = &hsr->self_node_db; struct hsr_node *node; - rcu_read_lock(); + spin_lock_bh(&hsr->list_lock); node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); - rcu_read_unlock(); if (node) { list_del_rcu(&node->mac_list); - kfree(node); + kfree_rcu(node, rcu_head); } + spin_unlock_bh(&hsr->list_lock); } void hsr_del_nodes(struct list_head *node_db) @@ -130,30 +131,43 @@ void hsr_del_nodes(struct list_head *node_db) * seq_out is used to initialize filtering of outgoing duplicate frames * originating from the newly added node. */ -struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], - u16 seq_out) +static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, + struct list_head *node_db, + unsigned char addr[], + u16 seq_out) { - struct hsr_node *node; + struct hsr_node *new_node, *node; unsigned long now; int i; - node = kzalloc(sizeof(*node), GFP_ATOMIC); - if (!node) + new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC); + if (!new_node) return NULL; - ether_addr_copy(node->macaddress_A, addr); + ether_addr_copy(new_node->macaddress_A, addr); /* We are only interested in time diffs here, so use current jiffies * as initialization. (0 could trigger an spurious ring error warning). */ now = jiffies; for (i = 0; i < HSR_PT_PORTS; i++) - node->time_in[i] = now; + new_node->time_in[i] = now; for (i = 0; i < HSR_PT_PORTS; i++) - node->seq_out[i] = seq_out; - - list_add_tail_rcu(&node->mac_list, node_db); + new_node->seq_out[i] = seq_out; + spin_lock_bh(&hsr->list_lock); + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->macaddress_A, addr)) + goto out; + if (ether_addr_equal(node->macaddress_B, addr)) + goto out; + } + list_add_tail_rcu(&new_node->mac_list, node_db); + spin_unlock_bh(&hsr->list_lock); + return new_node; +out: + spin_unlock_bh(&hsr->list_lock); + kfree(new_node); return node; } @@ -163,6 +177,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, bool is_sup) { struct list_head *node_db = &port->hsr->node_db; + struct hsr_priv *hsr = port->hsr; struct hsr_node *node; struct ethhdr *ethhdr; u16 seq_out; @@ -196,7 +211,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, seq_out = HSR_SEQNR_START; } - return hsr_add_node(node_db, ethhdr->h_source, seq_out); + return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out); } /* Use the Supervision frame's info about an eventual macaddress_B for merging @@ -206,10 +221,11 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, struct hsr_port *port_rcv) { - struct ethhdr *ethhdr; - struct hsr_node *node_real; + struct hsr_priv *hsr = port_rcv->hsr; struct hsr_sup_payload *hsr_sp; + struct hsr_node *node_real; struct list_head *node_db; + struct ethhdr *ethhdr; int i; ethhdr = (struct ethhdr *)skb_mac_header(skb); @@ -231,7 +247,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A); if (!node_real) /* No frame received from AddrA of this node yet */ - node_real = hsr_add_node(node_db, hsr_sp->macaddress_A, + node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A, HSR_SEQNR_START - 1); if (!node_real) goto done; /* No mem */ @@ -252,7 +268,9 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, } node_real->addr_B_port = port_rcv->type; + spin_lock_bh(&hsr->list_lock); list_del_rcu(&node_curr->mac_list); + spin_unlock_bh(&hsr->list_lock); kfree_rcu(node_curr, rcu_head); done: @@ -368,12 +386,13 @@ void hsr_prune_nodes(struct timer_list *t) { struct hsr_priv *hsr = from_timer(hsr, t, prune_timer); struct hsr_node *node; + struct hsr_node *tmp; struct hsr_port *port; unsigned long timestamp; unsigned long time_a, time_b; - rcu_read_lock(); - list_for_each_entry_rcu(node, &hsr->node_db, mac_list) { + spin_lock_bh(&hsr->list_lock); + list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) { /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A] * nor time_in[HSR_PT_SLAVE_B], will ever be updated for * the master port. Thus the master node will be repeatedly @@ -421,7 +440,7 @@ void hsr_prune_nodes(struct timer_list *t) kfree_rcu(node, rcu_head); } } - rcu_read_unlock(); + spin_unlock_bh(&hsr->list_lock); /* Restart timer */ mod_timer(&hsr->prune_timer, diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 89a3ce38151d..0f0fa12b4329 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -12,10 +12,8 @@ struct hsr_node; -void hsr_del_self_node(struct list_head *self_node_db); +void hsr_del_self_node(struct hsr_priv *hsr); void hsr_del_nodes(struct list_head *node_db); -struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], - u16 seq_out); struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, bool is_sup); void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, @@ -33,7 +31,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, void hsr_prune_nodes(struct timer_list *t); -int hsr_create_self_node(struct list_head *self_node_db, +int hsr_create_self_node(struct hsr_priv *hsr, unsigned char addr_a[ETH_ALEN], unsigned char addr_b[ETH_ALEN]); diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index b9988a662ee1..6deb8fa8d5c8 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -64,7 +64,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, /* Make sure we recognize frames from ourselves in hsr_rcv() */ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); - res = hsr_create_self_node(&hsr->self_node_db, + res = hsr_create_self_node(hsr, master->dev->dev_addr, port ? port->dev->dev_addr : diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index acab9c353a49..9ec38e33b8b1 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -160,8 +160,9 @@ struct hsr_priv { int announce_count; u16 sequence_nr; u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ - u8 prot_version; /* Indicate if HSRv0 or HSRv1. */ - spinlock_t seqnr_lock; /* locking for sequence_nr */ + u8 prot_version; /* Indicate if HSRv0 or HSRv1. */ + spinlock_t seqnr_lock; /* locking for sequence_nr */ + spinlock_t list_lock; /* locking for node list */ unsigned char sup_multicast_addr[ETH_ALEN]; #ifdef CONFIG_DEBUG_FS struct dentry *node_tbl_root; From 3a43ea27478705a5b877b5069be95ce2cd644a27 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 4 Jan 2020 13:00:15 -0800 Subject: [PATCH 190/191] mm/hugetlb: defer freeing of huge pages if in non-task context [ Upstream commit c77c0a8ac4c522638a8242fcb9de9496e3cdbb2d ] The following lockdep splat was observed when a certain hugetlbfs test was run: ================================ WARNING: inconsistent lock state 4.18.0-159.el8.x86_64+debug #1 Tainted: G W --------- - - -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/30/0 [HC0[0]:SC1[1]:HE1:SE0] takes: ffffffff9acdc038 (hugetlb_lock){+.?.}, at: free_huge_page+0x36f/0xaa0 {SOFTIRQ-ON-W} state was registered at: lock_acquire+0x14f/0x3b0 _raw_spin_lock+0x30/0x70 __nr_hugepages_store_common+0x11b/0xb30 hugetlb_sysctl_handler_common+0x209/0x2d0 proc_sys_call_handler+0x37f/0x450 vfs_write+0x157/0x460 ksys_write+0xb8/0x170 do_syscall_64+0xa5/0x4d0 entry_SYSCALL_64_after_hwframe+0x6a/0xdf irq event stamp: 691296 hardirqs last enabled at (691296): [] _raw_spin_unlock_irqrestore+0x4b/0x60 hardirqs last disabled at (691295): [] _raw_spin_lock_irqsave+0x22/0x81 softirqs last enabled at (691284): [] irq_enter+0xc3/0xe0 softirqs last disabled at (691285): [] irq_exit+0x23e/0x2b0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(hugetlb_lock); lock(hugetlb_lock); *** DEADLOCK *** : Call Trace: __lock_acquire+0x146b/0x48c0 lock_acquire+0x14f/0x3b0 _raw_spin_lock+0x30/0x70 free_huge_page+0x36f/0xaa0 bio_check_pages_dirty+0x2fc/0x5c0 clone_endio+0x17f/0x670 [dm_mod] blk_update_request+0x276/0xe50 scsi_end_request+0x7b/0x6a0 scsi_io_completion+0x1c6/0x1570 blk_done_softirq+0x22e/0x350 __do_softirq+0x23d/0xad8 irq_exit+0x23e/0x2b0 do_IRQ+0x11a/0x200 common_interrupt+0xf/0xf Both the hugetbl_lock and the subpool lock can be acquired in free_huge_page(). One way to solve the problem is to make both locks irq-safe. However, Mike Kravetz had learned that the hugetlb_lock is held for a linear scan of ALL hugetlb pages during a cgroup reparentling operation. So it is just too long to have irq disabled unless we can break hugetbl_lock down into finer-grained locks with shorter lock hold times. Another alternative is to defer the freeing to a workqueue job. This patch implements the deferred freeing by adding a free_hpage_workfn() work function to do the actual freeing. The free_huge_page() call in a non-task context saves the page to be freed in the hpage_freelist linked list in a lockless manner using the llist APIs. The generic workqueue is used to process the work, but a dedicated workqueue can be used instead if it is desirable to have the huge page freed ASAP. Thanks to Kirill Tkhai for suggesting the use of llist APIs which simplfy the code. Link: http://lkml.kernel.org/r/20191217170331.30893-1-longman@redhat.com Signed-off-by: Waiman Long Reviewed-by: Mike Kravetz Acked-by: Davidlohr Bueso Acked-by: Michal Hocko Reviewed-by: Kirill Tkhai Cc: Aneesh Kumar K.V Cc: Matthew Wilcox Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/hugetlb.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b45a95363a84..e0afd582ca01 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1255,7 +1256,7 @@ static inline void ClearPageHugeTemporary(struct page *page) page[2].mapping = NULL; } -void free_huge_page(struct page *page) +static void __free_huge_page(struct page *page) { /* * Can't pass hstate in here because it is called from the @@ -1318,6 +1319,54 @@ void free_huge_page(struct page *page) spin_unlock(&hugetlb_lock); } +/* + * As free_huge_page() can be called from a non-task context, we have + * to defer the actual freeing in a workqueue to prevent potential + * hugetlb_lock deadlock. + * + * free_hpage_workfn() locklessly retrieves the linked list of pages to + * be freed and frees them one-by-one. As the page->mapping pointer is + * going to be cleared in __free_huge_page() anyway, it is reused as the + * llist_node structure of a lockless linked list of huge pages to be freed. + */ +static LLIST_HEAD(hpage_freelist); + +static void free_hpage_workfn(struct work_struct *work) +{ + struct llist_node *node; + struct page *page; + + node = llist_del_all(&hpage_freelist); + + while (node) { + page = container_of((struct address_space **)node, + struct page, mapping); + node = node->next; + __free_huge_page(page); + } +} +static DECLARE_WORK(free_hpage_work, free_hpage_workfn); + +void free_huge_page(struct page *page) +{ + /* + * Defer freeing if in non-task context to avoid hugetlb_lock deadlock. + */ + if (!in_task()) { + /* + * Only call schedule_work() if hpage_freelist is previously + * empty. Otherwise, schedule_work() had been called but the + * workfn hasn't retrieved the list yet. + */ + if (llist_add((struct llist_node *)&page->mapping, + &hpage_freelist)) + schedule_work(&free_hpage_work); + return; + } + + __free_huge_page(page); +} + static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) { INIT_LIST_HEAD(&page->lru); From 506355630487f6d21c79aa1636d0f4feb1c65b6f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Jan 2020 10:20:08 +0100 Subject: [PATCH 191/191] Linux 5.4.9 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1adee1b06f3d..3ba15c3528c8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 8 +SUBLEVEL = 9 EXTRAVERSION = NAME = Kleptomaniac Octopus