Merge "msm: vidc: fix some BW and freq calculation issues"

This commit is contained in:
qctecmdr 2019-07-19 08:43:36 -07:00 committed by Gerrit - the friendly Code Review server
commit 5c854678be
4 changed files with 65 additions and 53 deletions

View File

@ -230,7 +230,7 @@ static unsigned long __calculate_decoder(struct vidc_bus_vote_data *d)
if (llc_ref_read_l2_cache_enabled) {
ddr.dpb_read = fp_div(ddr.dpb_read, is_h264_category ?
FP(1, 15, 100) : FP(1, 30, 100));
FP(1, 30, 100) : FP(1, 15, 100));
llc.dpb_read = dpb_total - ddr.dpb_write - ddr.dpb_read;
}

View File

@ -149,19 +149,17 @@ static unsigned long __calculate_decoder(struct vidc_bus_vote_data *d)
ddr.dpb_read = fp_div(fp_mult(ddr.dpb_read,
fp_mult(dpb_factor, motion_vector_complexity)),
dpb_read_compression_factor);
ddr.dpb_read += fp_div(ddr.dpb_read, FP_INT(2));
ddr.dpb_write = dpb_bpp == 8 ? y_bw_no_ubwc_8bpp : y_bw_no_ubwc_10bpp;
ddr.dpb_write = fp_div(fp_mult(ddr.dpb_write,
fp_mult(dpb_factor, dpb_write_factor)),
dpb_write_compression_factor);
ddr.dpb_write += fp_div(ddr.dpb_write, FP_INT(2));
dpb_total = ddr.dpb_read + ddr.dpb_write;
if (llc_ref_read_l2_cache_enabled) {
ddr.dpb_read = fp_div(ddr.dpb_read, is_h264_category ?
FP(1, 15, 100) : FP(1, 30, 100));
FP(1, 30, 100) : FP(1, 14, 100));
llc.dpb_read = dpb_total - ddr.dpb_write - ddr.dpb_read;
}
@ -310,6 +308,13 @@ static unsigned long __calculate_encoder(struct vidc_bus_vote_data *d)
rotation = d->rotation;
cropping_or_scaling = false;
vertical_tile_width = 960;
/*
* recon_write_bw_factor varies according to resolution and bit-depth,
* here use 1.08(1.075) for worst case.
* Similar for ref_y_read_bw_factor, it can reach 1.375 for worst case,
* here use 1.3 for average case, and can somewhat balance the
* worst case assumption for UBWC CR factors.
*/
recon_write_bw_factor = FP(1, 8, 100);
ref_y_read_bw_factor = FP(1, 30, 100);
ref_cbcr_read_bw_factor = FP(1, 50, 100);
@ -393,20 +398,18 @@ static unsigned long __calculate_encoder(struct vidc_bus_vote_data *d)
ddr.ref_read_y = dpb_bpp == 8 ?
y_bw_no_ubwc_8bpp : y_bw_no_ubwc_10bpp;
if (b_frames_enabled)
ddr.ref_read_y = ddr.ref_read_y * 2;
ddr.ref_read_y = fp_div(ddr.ref_read_y, dpb_compression_factor);
ddr.ref_read_crcb = ddr.ref_read_y;
ddr.ref_read_crcb = fp_mult((ddr.ref_read_y / 2),
ref_cbcr_read_bw_factor);
if (width != vertical_tile_width) {
if (width > vertical_tile_width) {
ddr.ref_read_y = fp_mult(ddr.ref_read_y,
ref_y_read_bw_factor);
}
ddr.ref_read_crcb = fp_mult(ddr.ref_read_crcb, FP(0, 50, 100));
if (llc_ref_chroma_cache_enabled) {
total_ref_read_crcb = ddr.ref_read_crcb;
ddr.ref_read_crcb = fp_div(ddr.ref_read_crcb,
@ -415,12 +418,14 @@ static unsigned long __calculate_encoder(struct vidc_bus_vote_data *d)
}
ddr.ref_write = dpb_bpp == 8 ? y_bw_no_ubwc_8bpp : y_bw_no_ubwc_10bpp;
ddr.ref_write = fp_mult(ddr.ref_write,
(fp_div(FP(1, 50, 100), dpb_compression_factor)));
ddr.ref_write = fp_div(fp_mult(ddr.ref_write, FP(1, 50, 100)),
dpb_compression_factor);
ddr.ref_write_overlap = fp_div(fp_mult(ddr.ref_write,
(recon_write_bw_factor - FP_ONE)),
recon_write_bw_factor);
if (width > vertical_tile_width) {
ddr.ref_write_overlap = fp_mult(ddr.ref_write,
(recon_write_bw_factor - FP_ONE));
ddr.ref_write = fp_mult(ddr.ref_write, recon_write_bw_factor);
}
ddr.orig_read = dpb_bpp == 8 ? y_bw_no_ubwc_8bpp :
(original_compression_enabled ? y_bw_no_ubwc_10bpp :

View File

@ -14,7 +14,7 @@
#define MSM_VIDC_MAX_UBWC_COMPLEXITY_FACTOR (4 << 16)
#define MSM_VIDC_MIN_UBWC_COMPRESSION_RATIO (1 << 16)
#define MSM_VIDC_MAX_UBWC_COMPRESSION_RATIO (3 << 16)
#define MSM_VIDC_MAX_UBWC_COMPRESSION_RATIO (5 << 16)
static int msm_vidc_decide_work_mode_ar50(struct msm_vidc_inst *inst);
static unsigned long msm_vidc_calc_freq_ar50(struct msm_vidc_inst *inst,
@ -783,6 +783,7 @@ static unsigned long msm_vidc_calc_freq_iris2(struct msm_vidc_inst *inst,
u32 filled_len)
{
u64 vsp_cycles = 0, vpp_cycles = 0, fw_cycles = 0, freq = 0;
u64 fw_vpp_cycles = 0;
u32 vpp_cycles_per_mb;
u32 mbs_per_second;
struct msm_vidc_core *core = NULL;
@ -806,15 +807,24 @@ static unsigned long msm_vidc_calc_freq_iris2(struct msm_vidc_inst *inst,
* between them.
*/
fw_cycles = fps * inst->core->resources.fw_cycles;
fw_vpp_cycles = fps * inst->core->resources.fw_vpp_cycles;
if (inst->session_type == MSM_VIDC_ENCODER) {
vpp_cycles_per_mb = inst->flags & VIDC_LOW_POWER ?
inst->clk_data.entry->low_power_cycles :
inst->clk_data.entry->vpp_cycles;
vpp_cycles = mbs_per_second * vpp_cycles_per_mb;
/* 21 / 20 is overhead factor */
vpp_cycles = (vpp_cycles * 21)/
(inst->clk_data.work_route * 20);
vpp_cycles = mbs_per_second * vpp_cycles_per_mb /
inst->clk_data.work_route;
/* 1.25 factor for IbP GOP structure */
if (msm_comm_g_ctrl_for_id(inst, V4L2_CID_MPEG_VIDEO_B_FRAMES))
vpp_cycles += vpp_cycles / 4;
/* 21 / 20 is minimum overhead factor */
vpp_cycles += max(vpp_cycles / 20, fw_vpp_cycles);
/* 1.01 is multi-pipe overhead */
if (inst->clk_data.work_route > 1)
vpp_cycles += vpp_cycles / 100;
vsp_cycles = mbs_per_second * inst->clk_data.entry->vsp_cycles;
@ -827,22 +837,19 @@ static unsigned long msm_vidc_calc_freq_iris2(struct msm_vidc_inst *inst,
}
vsp_cycles += ((u64)inst->clk_data.bitrate * vsp_factor_num) /
vsp_factor_den;
fw_cycles = fps * inst->core->resources.fw_cycles;
} else if (inst->session_type == MSM_VIDC_DECODER) {
vpp_cycles = mbs_per_second * inst->clk_data.entry->vpp_cycles;
/* 21 / 20 is overhead factor */
vpp_cycles = (vpp_cycles * 21)/
(inst->clk_data.work_route * 20);
vpp_cycles = mbs_per_second * inst->clk_data.entry->vpp_cycles /
inst->clk_data.work_route;
/* 21 / 20 is minimum overhead factor */
vpp_cycles += max(vpp_cycles / 20, fw_vpp_cycles);
/* 1.059 is multi-pipe overhead */
if (inst->clk_data.work_route > 1)
vpp_cycles += vpp_cycles * 59 / 1000;
vsp_cycles = mbs_per_second * inst->clk_data.entry->vsp_cycles;
/* vsp perf is about 0.5 bits/cycle */
vsp_cycles += ((fps * filled_len * 8) * 10) / 5;
fw_cycles = fps * inst->core->resources.fw_cycles;
} else {
dprintk(VIDC_ERR, "Unknown session type = %s\n", __func__);
return msm_vidc_max_freq(inst->core);

View File

@ -65,28 +65,28 @@ static struct msm_vidc_codec_data default_codec_data[] = {
/* Update with lito data */
static struct msm_vidc_codec_data lito_codec_data[] = {
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_ENCODER, 10, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_ENCODER, 10, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_ENCODER, 10, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_ENCODER, 0, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_ENCODER, 0, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_ENCODER, 0, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_TME, MSM_VIDC_ENCODER, 0, 540, 540),
CODEC_ENTRY(V4L2_PIX_FMT_MPEG2, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP9, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_MPEG2, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP9, MSM_VIDC_DECODER, 0, 200, 200),
};
/* Update with Kona data */
static struct msm_vidc_codec_data kona_codec_data[] = {
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_ENCODER, 10, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_ENCODER, 10, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_ENCODER, 10, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_ENCODER, 0, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_ENCODER, 0, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_ENCODER, 0, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_TME, MSM_VIDC_ENCODER, 0, 540, 540),
CODEC_ENTRY(V4L2_PIX_FMT_MPEG2, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP9, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_MPEG2, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP9, MSM_VIDC_DECODER, 0, 200, 200),
};
/* Update with SM6150 data */
@ -104,15 +104,15 @@ static struct msm_vidc_codec_data sm6150_codec_data[] = {
/* Update with 855 data */
static struct msm_vidc_codec_data sm8150_codec_data[] = {
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_ENCODER, 10, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_ENCODER, 10, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_ENCODER, 10, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_ENCODER, 0, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_ENCODER, 0, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_ENCODER, 0, 675, 320),
CODEC_ENTRY(V4L2_PIX_FMT_TME, MSM_VIDC_ENCODER, 0, 540, 540),
CODEC_ENTRY(V4L2_PIX_FMT_MPEG2, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP9, MSM_VIDC_DECODER, 10, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_MPEG2, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_H264, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_HEVC, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP8, MSM_VIDC_DECODER, 0, 200, 200),
CODEC_ENTRY(V4L2_PIX_FMT_VP9, MSM_VIDC_DECODER, 0, 200, 200),
};
static struct msm_vidc_codec_data sdm845_codec_data[] = {