msm: kgsl: Remove nonsense around the a5xx and a6xx SMMU table update

All a5xx and a6xx targets use the CP_SMMU_TABLE_UPDATE opcode to switch the
pagetable which handles all the needed locking but we've seen fit to add a
bunch more around it, including keeping a dummy NOP IB used for
synchronization on a3xx targets.

Stop the madness and remove all the extraneous locking and other baloney
and just do a straight up SMMU_TABLE_UPDATE without fanfare.  And now
that the setstate is only valid for a3xx, only create it for a3xx thereby
removing another global buffer from the mix.

Change-Id: Ic0dedbad7694cad6d40fcee66b7864a454df1653
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
This commit is contained in:
Jordan Crouse 2019-10-14 10:41:12 -06:00
parent 0735007430
commit ef5440e7b8
9 changed files with 54 additions and 151 deletions

View File

@ -605,7 +605,6 @@ enum adreno_regs {
ADRENO_REG_CP_RB_RPTR_ADDR_HI,
ADRENO_REG_CP_RB_RPTR,
ADRENO_REG_CP_RB_WPTR,
ADRENO_REG_CP_CNTL,
ADRENO_REG_CP_ME_CNTL,
ADRENO_REG_CP_RB_CNTL,
ADRENO_REG_CP_IB1_BASE,

View File

@ -1198,7 +1198,6 @@ static unsigned int a3xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP),
ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A3XX_CP_RB_RPTR),
ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A3XX_CP_RB_WPTR),
ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A3XX_CP_CNTL),
ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A3XX_CP_ME_CNTL),
ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A3XX_CP_RB_CNTL),
ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A3XX_CP_IB1_BASE),

View File

@ -2393,7 +2393,6 @@ static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
A5XX_CP_RB_RPTR_ADDR_HI),
ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A5XX_CP_RB_RPTR),
ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A5XX_CP_RB_WPTR),
ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A5XX_CP_CNTL),
ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A5XX_CP_ME_CNTL),
ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A5XX_CP_RB_CNTL),
ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A5XX_CP_IB1_BASE),

View File

@ -2264,7 +2264,6 @@ static unsigned int a6xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A6XX_CP_RB_WPTR),
ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A6XX_CP_RB_CNTL),
ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A6XX_CP_SQE_CNTL),
ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A6XX_CP_MISC_CNTL),
ADRENO_REG_DEFINE(ADRENO_REG_CP_HW_FAULT, A6XX_CP_HW_FAULT),
ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A6XX_CP_IB1_BASE),
ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A6XX_CP_IB1_BASE_HI),

View File

@ -116,66 +116,36 @@ static unsigned int a3xx_tlbiall(struct adreno_device *adreno_dev,
return cmds - start;
}
/**
* _adreno_iommu_add_idle_cmds - Add pm4 packets for GPU idle
* @adreno_dev - Pointer to device structure
* @cmds - Pointer to memory where idle commands need to be added
*/
static inline int _adreno_iommu_add_idle_cmds(struct adreno_device *adreno_dev,
unsigned int *cmds)
{
unsigned int *start = cmds;
cmds += cp_wait_for_idle(adreno_dev, cmds);
if (adreno_is_a3xx(adreno_dev))
cmds += cp_wait_for_me(adreno_dev, cmds);
return cmds - start;
}
/**
* adreno_iommu_set_apriv() - Generate commands to set/reset the APRIV
* @adreno_dev: Device on which the commands will execute
* @cmds: The memory pointer where commands are generated
* @set: If set then APRIV is set else reset
*
* Returns the number of commands generated
*/
static unsigned int adreno_iommu_set_apriv(struct adreno_device *adreno_dev,
unsigned int *cmds, int set)
{
unsigned int *cmds_orig = cmds;
/* adreno 3xx doesn't have the CP_CNTL.APRIV field */
if (adreno_is_a3xx(adreno_dev))
return 0;
/* Targets with apriv control do not need to explicitly set the bit */
if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
return 0;
cmds += cp_wait_for_idle(adreno_dev, cmds);
cmds += cp_wait_for_me(adreno_dev, cmds);
*cmds++ = cp_register(adreno_dev, adreno_getreg(adreno_dev,
ADRENO_REG_CP_CNTL), 1);
if (set)
*cmds++ = 1;
else
*cmds++ = 0;
return cmds - cmds_orig;
}
/* offset at which a nop command is placed in setstate */
#define KGSL_IOMMU_SETSTATE_NOP_OFFSET 1024
static unsigned int _adreno_iommu_set_pt_v2_a3xx(struct kgsl_device *device,
unsigned int *cmds_orig,
u64 ttbr0, u32 contextidr)
{
struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
unsigned int *cmds = cmds_orig;
cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
/*
* Adding an indirect buffer ensures that the prefetch stalls until
* the commands in indirect buffer have completed. We need to stall
* prefetch with a nop indirect buffer when updating pagetables
* because it provides stabler synchronization.
*/
cmds += cp_wait_for_me(adreno_dev, cmds);
if (!IS_ERR_OR_NULL(iommu->setstate)) {
*cmds++ = cp_mem_packet(adreno_dev,
CP_INDIRECT_BUFFER_PFE, 2, 1);
cmds += cp_gpuaddr(adreno_dev, cmds, iommu->setstate->gpuaddr +
KGSL_IOMMU_SETSTATE_NOP_OFFSET);
*cmds++ = 2;
}
cmds += cp_wait_for_idle(adreno_dev, cmds);
cmds += cp_wait_for_me(adreno_dev, cmds);
cmds += a3xx_vbif_lock(adreno_dev, cmds);
@ -192,8 +162,11 @@ static unsigned int _adreno_iommu_set_pt_v2_a3xx(struct kgsl_device *device,
/* wait for me to finish the TLBI */
cmds += cp_wait_for_me(adreno_dev, cmds);
cmds += cp_wait_for_idle(adreno_dev, cmds);
cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
/* Invalidate the state */
*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
*cmds++ = 0x7ffff;
return cmds - cmds_orig;
}
@ -206,9 +179,6 @@ static unsigned int _adreno_iommu_set_pt_v2_a5xx(struct kgsl_device *device,
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
unsigned int *cmds = cmds_orig;
cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
cmds += cp_wait_for_me(adreno_dev, cmds);
/* CP switches the pagetable and flushes the Caches */
*cmds++ = cp_packet(adreno_dev, CP_SMMU_TABLE_UPDATE, 3);
*cmds++ = lower_32_bits(ttbr0);
@ -222,11 +192,6 @@ static unsigned int _adreno_iommu_set_pt_v2_a5xx(struct kgsl_device *device,
*cmds++ = upper_32_bits(ttbr0);
*cmds++ = contextidr;
/* release all commands with wait_for_me */
cmds += cp_wait_for_me(adreno_dev, cmds);
cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
return cmds - cmds_orig;
}
@ -239,9 +204,6 @@ static unsigned int _adreno_iommu_set_pt_v2_a6xx(struct kgsl_device *device,
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
unsigned int *cmds = cmds_orig;
cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
cmds += cp_wait_for_me(adreno_dev, cmds);
/* CP switches the pagetable and flushes the Caches */
*cmds++ = cp_packet(adreno_dev, CP_SMMU_TABLE_UPDATE, 4);
*cmds++ = lower_32_bits(ttbr0);
@ -256,11 +218,6 @@ static unsigned int _adreno_iommu_set_pt_v2_a6xx(struct kgsl_device *device,
*cmds++ = upper_32_bits(ttbr0);
*cmds++ = contextidr;
/* release all commands with wait_for_me */
cmds += cp_wait_for_me(adreno_dev, cmds);
cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
return cmds - cmds_orig;
}
@ -281,42 +238,22 @@ unsigned int adreno_iommu_set_pt_generate_cmds(
struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
u64 ttbr0;
u32 contextidr;
unsigned int *cmds_orig = cmds;
ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pt);
contextidr = kgsl_mmu_pagetable_get_contextidr(pt);
cmds += adreno_iommu_set_apriv(adreno_dev, cmds, 1);
/*
* Adding an indirect buffer ensures that the prefetch stalls until
* the commands in indirect buffer have completed. We need to stall
* prefetch with a nop indirect buffer when updating pagetables
* because it provides stabler synchronization.
*/
cmds += cp_wait_for_me(adreno_dev, cmds);
*cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
cmds += cp_gpuaddr(adreno_dev, cmds,
iommu->setstate->gpuaddr + KGSL_IOMMU_SETSTATE_NOP_OFFSET);
*cmds++ = 2;
cmds += cp_wait_for_idle(adreno_dev, cmds);
if (adreno_is_a6xx(adreno_dev))
cmds += _adreno_iommu_set_pt_v2_a6xx(device, cmds,
return _adreno_iommu_set_pt_v2_a6xx(device, cmds,
ttbr0, contextidr, rb,
ctx->cb_num);
else if (adreno_is_a5xx(adreno_dev))
cmds += _adreno_iommu_set_pt_v2_a5xx(device, cmds,
return _adreno_iommu_set_pt_v2_a5xx(device, cmds,
ttbr0, contextidr, rb);
else if (adreno_is_a3xx(adreno_dev))
cmds += _adreno_iommu_set_pt_v2_a3xx(device, cmds,
return _adreno_iommu_set_pt_v2_a3xx(device, cmds,
ttbr0, contextidr);
/* invalidate all base pointers */
cmds += cp_invalidate_state(adreno_dev, cmds);
cmds += adreno_iommu_set_apriv(adreno_dev, cmds, 0);
return cmds - cmds_orig;
return 0;
}
/**
@ -440,18 +377,23 @@ void adreno_iommu_init(struct adreno_device *adreno_dev)
if (kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_NONE)
return;
if (!adreno_is_a3xx(adreno_dev))
return;
/*
* A nop is required in an indirect buffer when switching
* 3xx requres a nop in an indirect buffer when switching
* pagetables in-stream
*/
if (IS_ERR_OR_NULL(iommu->setstate)) {
iommu->setstate = kgsl_allocate_global(device, PAGE_SIZE,
KGSL_MEMFLAGS_GPUREADONLY, 0, "setstate");
kgsl_sharedmem_writel(device, iommu->setstate,
KGSL_IOMMU_SETSTATE_NOP_OFFSET,
cp_packet(adreno_dev, CP_NOP, 1));
kgsl_sharedmem_writel(device, iommu->setstate,
KGSL_IOMMU_SETSTATE_NOP_OFFSET,
cp_type3_packet(CP_NOP, 1));
}
/* Enable guard page MMU feature for A3xx and A4xx targets only */
if (adreno_is_a3xx(adreno_dev))
device->mmu.features |= KGSL_MMU_NEED_GUARD_PAGE;
device->mmu.features |= KGSL_MMU_NEED_GUARD_PAGE;
}
/**

View File

@ -360,30 +360,6 @@ static inline uint cp_wait_for_idle(struct adreno_device *adreno_dev,
return cmds - start;
}
/**
* cp_invalidate_state - common function for invalidating cp
* state
* @adreno_dev: The adreno device
* @cmds: command pointer to add gpuaddr
*/
static inline uint cp_invalidate_state(struct adreno_device *adreno_dev,
uint *cmds)
{
uint *start = cmds;
if (ADRENO_GPUREV(adreno_dev) < 500) {
*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
*cmds++ = 0x7fff;
} else {
*cmds++ = cp_type7_packet(CP_SET_DRAW_STATE, 3);
*cmds++ = 0x40000;
*cmds++ = 0;
*cmds++ = 0;
}
return cmds - start;
}
static inline u32 cp_protected_mode(struct adreno_device *adreno_dev,
u32 *cmds, int on)
{

View File

@ -208,18 +208,6 @@ static inline void parse_ib(struct kgsl_device *device,
}
static inline bool iommu_is_setstate_addr(struct kgsl_device *device,
uint64_t gpuaddr, uint64_t size)
{
struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU)
return false;
return kgsl_gpuaddr_in_memdesc(iommu->setstate, gpuaddr,
size);
}
static void dump_all_ibs(struct kgsl_device *device,
struct adreno_ringbuffer *rb,
struct kgsl_snapshot *snapshot)
@ -227,6 +215,7 @@ static void dump_all_ibs(struct kgsl_device *device,
int index = 0;
unsigned int *rbptr;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
rbptr = rb->buffer_desc->hostptr;
@ -248,7 +237,8 @@ static void dump_all_ibs(struct kgsl_device *device,
}
/* Don't parse known global IBs */
if (iommu_is_setstate_addr(device, ibaddr, ibsize))
if (kgsl_gpuaddr_in_memdesc(iommu->setstate,
ibaddr, ibsize))
continue;
if (kgsl_gpuaddr_in_memdesc(adreno_dev->pwron_fixup,
@ -384,6 +374,7 @@ static void snapshot_rb_ibs(struct kgsl_device *device,
parse_ibs = 0;
if (parse_ibs && adreno_cmd_is_ib(adreno_dev, rbptr[index])) {
struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
uint64_t ibaddr;
uint64_t ibsize;
@ -399,7 +390,8 @@ static void snapshot_rb_ibs(struct kgsl_device *device,
index = (index + 1) % KGSL_RB_DWORDS;
/* Don't parse known global IBs */
if (iommu_is_setstate_addr(device, ibaddr, ibsize))
if (kgsl_gpuaddr_in_memdesc(iommu->setstate,
ibaddr, ibsize))
continue;
if (kgsl_gpuaddr_in_memdesc(adreno_dev->pwron_fixup,

View File

@ -1209,7 +1209,7 @@ static int kgsl_iommu_init(struct kgsl_mmu *mmu)
struct kgsl_device *device = KGSL_MMU_DEVICE(mmu);
struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
int status;
int status = 0;
mmu->features |= KGSL_MMU_PAGED;
@ -1236,13 +1236,6 @@ static int kgsl_iommu_init(struct kgsl_mmu *mmu)
}
}
iommu->setstate = kgsl_allocate_global(device, PAGE_SIZE,
KGSL_MEMFLAGS_GPUREADONLY, 0, "setstate");
status = PTR_ERR_OR_ZERO(iommu->setstate);
if (status)
goto done;
device->qdss_desc = kgsl_allocate_global_fixed(device,
"qcom,gpu-qdss-stm", "gpu-qdss");

View File

@ -812,7 +812,11 @@ kgsl_sharedmem_writel(struct kgsl_device *device,
{
uint32_t *dst;
if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL))
/* Quietly return if the memdesc isn't valid */
if (IS_ERR_OR_NULL(memdesc))
return -EINVAL;
if (WARN_ON(memdesc->hostptr == NULL))
return -EINVAL;
WARN_ON(offsetbytes % sizeof(uint32_t) != 0);