diff options
Diffstat (limited to 'drivers/gpu/drm/radeon')
40 files changed, 2823 insertions, 893 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index aebe008..6cae4f2 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -65,7 +65,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \ r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \ - evergreen.o evergreen_cs.o + evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index cd0290f..df2b6f2 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -398,65 +398,76 @@ static void atombios_disable_ss(struct drm_crtc *crtc) union atom_enable_ss { - ENABLE_LVDS_SS_PARAMETERS legacy; + ENABLE_LVDS_SS_PARAMETERS lvds_ss; + ENABLE_LVDS_SS_PARAMETERS_V2 lvds_ss_2; ENABLE_SPREAD_SPECTRUM_ON_PPLL_PS_ALLOCATION v1; + ENABLE_SPREAD_SPECTRUM_ON_PPLL_V2 v2; }; -static void atombios_enable_ss(struct drm_crtc *crtc) +static void atombios_crtc_program_ss(struct drm_crtc *crtc, + int enable, + int pll_id, + struct radeon_atom_ss *ss) { - struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; - struct drm_encoder *encoder = NULL; - struct radeon_encoder *radeon_encoder = NULL; - struct radeon_encoder_atom_dig *dig = NULL; int index = GetIndexIntoMasterTable(COMMAND, EnableSpreadSpectrumOnPPLL); union atom_enable_ss args; - uint16_t percentage = 0; - uint8_t type = 0, step = 0, delay = 0, range = 0; - /* XXX add ss support for DCE4 */ - if (ASIC_IS_DCE4(rdev)) - return; + memset(&args, 0, sizeof(args)); - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - if (encoder->crtc == crtc) { - radeon_encoder = to_radeon_encoder(encoder); - /* only enable spread spectrum on LVDS */ - if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { - dig = radeon_encoder->enc_priv; - if (dig && dig->ss) { - percentage = dig->ss->percentage; - type = dig->ss->type; - step = dig->ss->step; - delay = dig->ss->delay; - range = dig->ss->range; - } else - return; - } else - return; + if (ASIC_IS_DCE4(rdev)) { + args.v2.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage); + args.v2.ucSpreadSpectrumType = ss->type; + switch (pll_id) { + case ATOM_PPLL1: + args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_P1PLL; + args.v2.usSpreadSpectrumAmount = ss->amount; + args.v2.usSpreadSpectrumStep = ss->step; + break; + case ATOM_PPLL2: + args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_P2PLL; + args.v2.usSpreadSpectrumAmount = ss->amount; + args.v2.usSpreadSpectrumStep = ss->step; break; + case ATOM_DCPLL: + args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_DCPLL; + args.v2.usSpreadSpectrumAmount = 0; + args.v2.usSpreadSpectrumStep = 0; + break; + case ATOM_PPLL_INVALID: + return; } - } - - if (!radeon_encoder) - return; - - memset(&args, 0, sizeof(args)); - if (ASIC_IS_AVIVO(rdev)) { - args.v1.usSpreadSpectrumPercentage = cpu_to_le16(percentage); - args.v1.ucSpreadSpectrumType = type; - args.v1.ucSpreadSpectrumStep = step; - args.v1.ucSpreadSpectrumDelay = delay; - args.v1.ucSpreadSpectrumRange = range; - args.v1.ucPpll = radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1; - args.v1.ucEnable = ATOM_ENABLE; + args.v2.ucEnable = enable; + } else if (ASIC_IS_DCE3(rdev)) { + args.v1.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage); + args.v1.ucSpreadSpectrumType = ss->type; + args.v1.ucSpreadSpectrumStep = ss->step; + args.v1.ucSpreadSpectrumDelay = ss->delay; + args.v1.ucSpreadSpectrumRange = ss->range; + args.v1.ucPpll = pll_id; + args.v1.ucEnable = enable; + } else if (ASIC_IS_AVIVO(rdev)) { + if (enable == ATOM_DISABLE) { + atombios_disable_ss(crtc); + return; + } + args.lvds_ss_2.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage); + args.lvds_ss_2.ucSpreadSpectrumType = ss->type; + args.lvds_ss_2.ucSpreadSpectrumStep = ss->step; + args.lvds_ss_2.ucSpreadSpectrumDelay = ss->delay; + args.lvds_ss_2.ucSpreadSpectrumRange = ss->range; + args.lvds_ss_2.ucEnable = enable; } else { - args.legacy.usSpreadSpectrumPercentage = cpu_to_le16(percentage); - args.legacy.ucSpreadSpectrumType = type; - args.legacy.ucSpreadSpectrumStepSize_Delay = (step & 3) << 2; - args.legacy.ucSpreadSpectrumStepSize_Delay |= (delay & 7) << 4; - args.legacy.ucEnable = ATOM_ENABLE; + if (enable == ATOM_DISABLE) { + atombios_disable_ss(crtc); + return; + } + args.lvds_ss.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage); + args.lvds_ss.ucSpreadSpectrumType = ss->type; + args.lvds_ss.ucSpreadSpectrumStepSize_Delay = (ss->step & 3) << 2; + args.lvds_ss.ucSpreadSpectrumStepSize_Delay |= (ss->delay & 7) << 4; + args.lvds_ss.ucEnable = enable; } atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); } @@ -468,7 +479,9 @@ union adjust_pixel_clock { static u32 atombios_adjust_pll(struct drm_crtc *crtc, struct drm_display_mode *mode, - struct radeon_pll *pll) + struct radeon_pll *pll, + bool ss_enabled, + struct radeon_atom_ss *ss) { struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; @@ -482,19 +495,6 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, /* reset the pll flags */ pll->flags = 0; - /* select the PLL algo */ - if (ASIC_IS_AVIVO(rdev)) { - if (radeon_new_pll == 0) - pll->algo = PLL_ALGO_LEGACY; - else - pll->algo = PLL_ALGO_NEW; - } else { - if (radeon_new_pll == 1) - pll->algo = PLL_ALGO_NEW; - else - pll->algo = PLL_ALGO_LEGACY; - } - if (ASIC_IS_AVIVO(rdev)) { if ((rdev->family == CHIP_RS600) || (rdev->family == CHIP_RS690) || @@ -531,29 +531,22 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, } } + /* use recommended ref_div for ss */ + if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { + if (ss_enabled) { + if (ss->refdiv) { + pll->flags |= RADEON_PLL_USE_REF_DIV; + pll->reference_div = ss->refdiv; + } + } + } + if (ASIC_IS_AVIVO(rdev)) { /* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */ if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1) adjusted_clock = mode->clock * 2; - if (radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT)) { - pll->algo = PLL_ALGO_LEGACY; + if (radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT)) pll->flags |= RADEON_PLL_PREFER_CLOSEST_LOWER; - } - /* There is some evidence (often anecdotal) that RV515/RV620 LVDS - * (on some boards at least) prefers the legacy algo. I'm not - * sure whether this should handled generically or on a - * case-by-case quirk basis. Both algos should work fine in the - * majority of cases. - */ - if ((radeon_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) && - ((rdev->family == CHIP_RV515) || - (rdev->family == CHIP_RV620))) { - /* allow the user to overrride just in case */ - if (radeon_new_pll == 1) - pll->algo = PLL_ALGO_NEW; - else - pll->algo = PLL_ALGO_LEGACY; - } } else { if (encoder->encoder_type != DRM_MODE_ENCODER_DAC) pll->flags |= RADEON_PLL_NO_ODD_POST_DIV; @@ -589,9 +582,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, args.v1.ucTransmitterID = radeon_encoder->encoder_id; args.v1.ucEncodeMode = encoder_mode; if (encoder_mode == ATOM_ENCODER_MODE_DP) { - /* may want to enable SS on DP eventually */ - /* args.v1.ucConfig |= - ADJUST_DISPLAY_CONFIG_SS_ENABLE;*/ + if (ss_enabled) + args.v1.ucConfig |= + ADJUST_DISPLAY_CONFIG_SS_ENABLE; } else if (encoder_mode == ATOM_ENCODER_MODE_LVDS) { args.v1.ucConfig |= ADJUST_DISPLAY_CONFIG_SS_ENABLE; @@ -608,11 +601,10 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, args.v3.sInput.ucDispPllConfig = 0; if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) { struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - if (encoder_mode == ATOM_ENCODER_MODE_DP) { - /* may want to enable SS on DP/eDP eventually */ - /*args.v3.sInput.ucDispPllConfig |= - DISPPLL_CONFIG_SS_ENABLE;*/ + if (ss_enabled) + args.v3.sInput.ucDispPllConfig |= + DISPPLL_CONFIG_SS_ENABLE; args.v3.sInput.ucDispPllConfig |= DISPPLL_CONFIG_COHERENT_MODE; /* 16200 or 27000 */ @@ -632,17 +624,17 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, } } else if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { if (encoder_mode == ATOM_ENCODER_MODE_DP) { - /* may want to enable SS on DP/eDP eventually */ - /*args.v3.sInput.ucDispPllConfig |= - DISPPLL_CONFIG_SS_ENABLE;*/ + if (ss_enabled) + args.v3.sInput.ucDispPllConfig |= + DISPPLL_CONFIG_SS_ENABLE; args.v3.sInput.ucDispPllConfig |= DISPPLL_CONFIG_COHERENT_MODE; /* 16200 or 27000 */ args.v3.sInput.usPixelClock = cpu_to_le16(dp_clock / 10); } else if (encoder_mode == ATOM_ENCODER_MODE_LVDS) { - /* want to enable SS on LVDS eventually */ - /*args.v3.sInput.ucDispPllConfig |= - DISPPLL_CONFIG_SS_ENABLE;*/ + if (ss_enabled) + args.v3.sInput.ucDispPllConfig |= + DISPPLL_CONFIG_SS_ENABLE; } else { if (mode->clock > 165000) args.v3.sInput.ucDispPllConfig |= @@ -816,6 +808,8 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode struct radeon_pll *pll; u32 adjusted_clock; int encoder_mode = 0; + struct radeon_atom_ss ss; + bool ss_enabled = false; list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { if (encoder->crtc == crtc) { @@ -842,25 +836,123 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode break; } + if (radeon_encoder->active_device & + (ATOM_DEVICE_LCD_SUPPORT | ATOM_DEVICE_DFP_SUPPORT)) { + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + struct drm_connector *connector = + radeon_get_connector_for_encoder(encoder); + struct radeon_connector *radeon_connector = + to_radeon_connector(connector); + struct radeon_connector_atom_dig *dig_connector = + radeon_connector->con_priv; + int dp_clock; + + switch (encoder_mode) { + case ATOM_ENCODER_MODE_DP: + /* DP/eDP */ + dp_clock = dig_connector->dp_clock / 10; + if (radeon_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) { + if (ASIC_IS_DCE4(rdev)) + ss_enabled = + radeon_atombios_get_asic_ss_info(rdev, &ss, + dig->lcd_ss_id, + dp_clock); + else + ss_enabled = + radeon_atombios_get_ppll_ss_info(rdev, &ss, + dig->lcd_ss_id); + } else { + if (ASIC_IS_DCE4(rdev)) + ss_enabled = + radeon_atombios_get_asic_ss_info(rdev, &ss, + ASIC_INTERNAL_SS_ON_DP, + dp_clock); + else { + if (dp_clock == 16200) { + ss_enabled = + radeon_atombios_get_ppll_ss_info(rdev, &ss, + ATOM_DP_SS_ID2); + if (!ss_enabled) + ss_enabled = + radeon_atombios_get_ppll_ss_info(rdev, &ss, + ATOM_DP_SS_ID1); + } else + ss_enabled = + radeon_atombios_get_ppll_ss_info(rdev, &ss, + ATOM_DP_SS_ID1); + } + } + break; + case ATOM_ENCODER_MODE_LVDS: + if (ASIC_IS_DCE4(rdev)) + ss_enabled = radeon_atombios_get_asic_ss_info(rdev, &ss, + dig->lcd_ss_id, + mode->clock / 10); + else + ss_enabled = radeon_atombios_get_ppll_ss_info(rdev, &ss, + dig->lcd_ss_id); + break; + case ATOM_ENCODER_MODE_DVI: + if (ASIC_IS_DCE4(rdev)) + ss_enabled = + radeon_atombios_get_asic_ss_info(rdev, &ss, + ASIC_INTERNAL_SS_ON_TMDS, + mode->clock / 10); + break; + case ATOM_ENCODER_MODE_HDMI: + if (ASIC_IS_DCE4(rdev)) + ss_enabled = + radeon_atombios_get_asic_ss_info(rdev, &ss, + ASIC_INTERNAL_SS_ON_HDMI, + mode->clock / 10); + break; + default: + break; + } + } + /* adjust pixel clock as needed */ - adjusted_clock = atombios_adjust_pll(crtc, mode, pll); + adjusted_clock = atombios_adjust_pll(crtc, mode, pll, ss_enabled, &ss); radeon_compute_pll(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div, &ref_div, &post_div); + atombios_crtc_program_ss(crtc, ATOM_DISABLE, radeon_crtc->pll_id, &ss); + atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id, encoder_mode, radeon_encoder->encoder_id, mode->clock, ref_div, fb_div, frac_fb_div, post_div); + if (ss_enabled) { + /* calculate ss amount and step size */ + if (ASIC_IS_DCE4(rdev)) { + u32 step_size; + u32 amount = (((fb_div * 10) + frac_fb_div) * ss.percentage) / 10000; + ss.amount = (amount / 10) & ATOM_PPLL_SS_AMOUNT_V2_FBDIV_MASK; + ss.amount |= ((amount - (ss.amount * 10)) << ATOM_PPLL_SS_AMOUNT_V2_NFRAC_SHIFT) & + ATOM_PPLL_SS_AMOUNT_V2_NFRAC_MASK; + if (ss.type & ATOM_PPLL_SS_TYPE_V2_CENTRE_SPREAD) + step_size = (4 * amount * ref_div * (ss.rate * 2048)) / + (125 * 25 * pll->reference_freq / 100); + else + step_size = (2 * amount * ref_div * (ss.rate * 2048)) / + (125 * 25 * pll->reference_freq / 100); + ss.step = step_size; + } + + atombios_crtc_program_ss(crtc, ATOM_ENABLE, radeon_crtc->pll_id, &ss); + } } -static int evergreen_crtc_set_base(struct drm_crtc *crtc, int x, int y, - struct drm_framebuffer *old_fb) +static int evergreen_crtc_do_set_base(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int x, int y, int atomic) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_framebuffer *radeon_fb; + struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; uint64_t fb_location; @@ -868,28 +960,43 @@ static int evergreen_crtc_set_base(struct drm_crtc *crtc, int x, int y, int r; /* no fb bound */ - if (!crtc->fb) { + if (!atomic && !crtc->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } - radeon_fb = to_radeon_framebuffer(crtc->fb); + if (atomic) { + radeon_fb = to_radeon_framebuffer(fb); + target_fb = fb; + } + else { + radeon_fb = to_radeon_framebuffer(crtc->fb); + target_fb = crtc->fb; + } - /* Pin framebuffer & get tilling informations */ + /* If atomic, assume fb object is pinned & idle & fenced and + * just update base pointers + */ obj = radeon_fb->obj; rbo = obj->driver_private; r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; - r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location); - if (unlikely(r != 0)) { - radeon_bo_unreserve(rbo); - return -EINVAL; + + if (atomic) + fb_location = radeon_bo_gpu_offset(rbo); + else { + r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location); + if (unlikely(r != 0)) { + radeon_bo_unreserve(rbo); + return -EINVAL; + } } + radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL); radeon_bo_unreserve(rbo); - switch (crtc->fb->bits_per_pixel) { + switch (target_fb->bits_per_pixel) { case 8: fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_8BPP) | EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_INDEXED)); @@ -909,7 +1016,7 @@ static int evergreen_crtc_set_base(struct drm_crtc *crtc, int x, int y, break; default: DRM_ERROR("Unsupported screen depth %d\n", - crtc->fb->bits_per_pixel); + target_fb->bits_per_pixel); return -EINVAL; } @@ -955,10 +1062,10 @@ static int evergreen_crtc_set_base(struct drm_crtc *crtc, int x, int y, WREG32(EVERGREEN_GRPH_SURFACE_OFFSET_Y + radeon_crtc->crtc_offset, 0); WREG32(EVERGREEN_GRPH_X_START + radeon_crtc->crtc_offset, 0); WREG32(EVERGREEN_GRPH_Y_START + radeon_crtc->crtc_offset, 0); - WREG32(EVERGREEN_GRPH_X_END + radeon_crtc->crtc_offset, crtc->fb->width); - WREG32(EVERGREEN_GRPH_Y_END + radeon_crtc->crtc_offset, crtc->fb->height); + WREG32(EVERGREEN_GRPH_X_END + radeon_crtc->crtc_offset, target_fb->width); + WREG32(EVERGREEN_GRPH_Y_END + radeon_crtc->crtc_offset, target_fb->height); - fb_pitch_pixels = crtc->fb->pitch / (crtc->fb->bits_per_pixel / 8); + fb_pitch_pixels = target_fb->pitch / (target_fb->bits_per_pixel / 8); WREG32(EVERGREEN_GRPH_PITCH + radeon_crtc->crtc_offset, fb_pitch_pixels); WREG32(EVERGREEN_GRPH_ENABLE + radeon_crtc->crtc_offset, 1); @@ -977,8 +1084,8 @@ static int evergreen_crtc_set_base(struct drm_crtc *crtc, int x, int y, else WREG32(EVERGREEN_DATA_FORMAT + radeon_crtc->crtc_offset, 0); - if (old_fb && old_fb != crtc->fb) { - radeon_fb = to_radeon_framebuffer(old_fb); + if (!atomic && fb && fb != crtc->fb) { + radeon_fb = to_radeon_framebuffer(fb); rbo = radeon_fb->obj->driver_private; r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -993,8 +1100,9 @@ static int evergreen_crtc_set_base(struct drm_crtc *crtc, int x, int y, return 0; } -static int avivo_crtc_set_base(struct drm_crtc *crtc, int x, int y, - struct drm_framebuffer *old_fb) +static int avivo_crtc_do_set_base(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int x, int y, int atomic) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; @@ -1002,33 +1110,48 @@ static int avivo_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct radeon_framebuffer *radeon_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; + struct drm_framebuffer *target_fb; uint64_t fb_location; uint32_t fb_format, fb_pitch_pixels, tiling_flags; int r; /* no fb bound */ - if (!crtc->fb) { + if (!atomic && !crtc->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } - radeon_fb = to_radeon_framebuffer(crtc->fb); + if (atomic) { + radeon_fb = to_radeon_framebuffer(fb); + target_fb = fb; + } + else { + radeon_fb = to_radeon_framebuffer(crtc->fb); + target_fb = crtc->fb; + } - /* Pin framebuffer & get tilling informations */ obj = radeon_fb->obj; rbo = obj->driver_private; r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; - r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location); - if (unlikely(r != 0)) { - radeon_bo_unreserve(rbo); - return -EINVAL; + + /* If atomic, assume fb object is pinned & idle & fenced and + * just update base pointers + */ + if (atomic) + fb_location = radeon_bo_gpu_offset(rbo); + else { + r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location); + if (unlikely(r != 0)) { + radeon_bo_unreserve(rbo); + return -EINVAL; + } } radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL); radeon_bo_unreserve(rbo); - switch (crtc->fb->bits_per_pixel) { + switch (target_fb->bits_per_pixel) { case 8: fb_format = AVIVO_D1GRPH_CONTROL_DEPTH_8BPP | @@ -1052,7 +1175,7 @@ static int avivo_crtc_set_base(struct drm_crtc *crtc, int x, int y, break; default: DRM_ERROR("Unsupported screen depth %d\n", - crtc->fb->bits_per_pixel); + target_fb->bits_per_pixel); return -EINVAL; } @@ -1093,10 +1216,10 @@ static int avivo_crtc_set_base(struct drm_crtc *crtc, int x, int y, WREG32(AVIVO_D1GRPH_SURFACE_OFFSET_Y + radeon_crtc->crtc_offset, 0); WREG32(AVIVO_D1GRPH_X_START + radeon_crtc->crtc_offset, 0); WREG32(AVIVO_D1GRPH_Y_START + radeon_crtc->crtc_offset, 0); - WREG32(AVIVO_D1GRPH_X_END + radeon_crtc->crtc_offset, crtc->fb->width); - WREG32(AVIVO_D1GRPH_Y_END + radeon_crtc->crtc_offset, crtc->fb->height); + WREG32(AVIVO_D1GRPH_X_END + radeon_crtc->crtc_offset, target_fb->width); + WREG32(AVIVO_D1GRPH_Y_END + radeon_crtc->crtc_offset, target_fb->height); - fb_pitch_pixels = crtc->fb->pitch / (crtc->fb->bits_per_pixel / 8); + fb_pitch_pixels = target_fb->pitch / (target_fb->bits_per_pixel / 8); WREG32(AVIVO_D1GRPH_PITCH + radeon_crtc->crtc_offset, fb_pitch_pixels); WREG32(AVIVO_D1GRPH_ENABLE + radeon_crtc->crtc_offset, 1); @@ -1115,8 +1238,8 @@ static int avivo_crtc_set_base(struct drm_crtc *crtc, int x, int y, else WREG32(AVIVO_D1MODE_DATA_FORMAT + radeon_crtc->crtc_offset, 0); - if (old_fb && old_fb != crtc->fb) { - radeon_fb = to_radeon_framebuffer(old_fb); + if (!atomic && fb && fb != crtc->fb) { + radeon_fb = to_radeon_framebuffer(fb); rbo = radeon_fb->obj->driver_private; r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -1138,11 +1261,26 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct radeon_device *rdev = dev->dev_private; if (ASIC_IS_DCE4(rdev)) - return evergreen_crtc_set_base(crtc, x, y, old_fb); + return evergreen_crtc_do_set_base(crtc, old_fb, x, y, 0); else if (ASIC_IS_AVIVO(rdev)) - return avivo_crtc_set_base(crtc, x, y, old_fb); + return avivo_crtc_do_set_base(crtc, old_fb, x, y, 0); else - return radeon_crtc_set_base(crtc, x, y, old_fb); + return radeon_crtc_do_set_base(crtc, old_fb, x, y, 0); +} + +int atombios_crtc_set_base_atomic(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int x, int y, enum mode_set_atomic state) +{ + struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; + + if (ASIC_IS_DCE4(rdev)) + return evergreen_crtc_do_set_base(crtc, fb, x, y, 1); + else if (ASIC_IS_AVIVO(rdev)) + return avivo_crtc_do_set_base(crtc, fb, x, y, 1); + else + return radeon_crtc_do_set_base(crtc, fb, x, y, 1); } /* properly set additional regs when using atombios */ @@ -1230,12 +1368,19 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc, } } - atombios_disable_ss(crtc); /* always set DCPLL */ - if (ASIC_IS_DCE4(rdev)) + if (ASIC_IS_DCE4(rdev)) { + struct radeon_atom_ss ss; + bool ss_enabled = radeon_atombios_get_asic_ss_info(rdev, &ss, + ASIC_INTERNAL_SS_ON_DCPLL, + rdev->clock.default_dispclk); + if (ss_enabled) + atombios_crtc_program_ss(crtc, ATOM_DISABLE, ATOM_DCPLL, &ss); atombios_crtc_set_dcpll(crtc); + if (ss_enabled) + atombios_crtc_program_ss(crtc, ATOM_ENABLE, ATOM_DCPLL, &ss); + } atombios_crtc_set_pll(crtc, adjusted_mode); - atombios_enable_ss(crtc); if (ASIC_IS_DCE4(rdev)) atombios_set_crtc_dtd_timing(crtc, adjusted_mode); @@ -1311,6 +1456,7 @@ static const struct drm_crtc_helper_funcs atombios_helper_funcs = { .mode_fixup = atombios_crtc_mode_fixup, .mode_set = atombios_crtc_mode_set, .mode_set_base = atombios_crtc_set_base, + .mode_set_base_atomic = atombios_crtc_set_base_atomic, .prepare = atombios_crtc_prepare, .commit = atombios_crtc_commit, .load_lut = radeon_crtc_load_lut, diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 2f93d46..f12a5b3 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -32,6 +32,7 @@ #include "atom.h" #include "avivod.h" #include "evergreen_reg.h" +#include "evergreen_blit_shaders.h" #define EVERGREEN_PFP_UCODE_SIZE 1120 #define EVERGREEN_PM4_UCODE_SIZE 1376 @@ -284,9 +285,444 @@ void evergreen_hpd_fini(struct radeon_device *rdev) } } +/* watermark setup */ + +static u32 evergreen_line_buffer_adjust(struct radeon_device *rdev, + struct radeon_crtc *radeon_crtc, + struct drm_display_mode *mode, + struct drm_display_mode *other_mode) +{ + u32 tmp = 0; + /* + * Line Buffer Setup + * There are 3 line buffers, each one shared by 2 display controllers. + * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between + * the display controllers. The paritioning is done via one of four + * preset allocations specified in bits 2:0: + * first display controller + * 0 - first half of lb (3840 * 2) + * 1 - first 3/4 of lb (5760 * 2) + * 2 - whole lb (7680 * 2) + * 3 - first 1/4 of lb (1920 * 2) + * second display controller + * 4 - second half of lb (3840 * 2) + * 5 - second 3/4 of lb (5760 * 2) + * 6 - whole lb (7680 * 2) + * 7 - last 1/4 of lb (1920 * 2) + */ + if (mode && other_mode) { + if (mode->hdisplay > other_mode->hdisplay) { + if (mode->hdisplay > 2560) + tmp = 1; /* 3/4 */ + else + tmp = 0; /* 1/2 */ + } else if (other_mode->hdisplay > mode->hdisplay) { + if (other_mode->hdisplay > 2560) + tmp = 3; /* 1/4 */ + else + tmp = 0; /* 1/2 */ + } else + tmp = 0; /* 1/2 */ + } else if (mode) + tmp = 2; /* whole */ + else if (other_mode) + tmp = 3; /* 1/4 */ + + /* second controller of the pair uses second half of the lb */ + if (radeon_crtc->crtc_id % 2) + tmp += 4; + WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset, tmp); + + switch (tmp) { + case 0: + case 4: + default: + return 3840 * 2; + case 1: + case 5: + return 5760 * 2; + case 2: + case 6: + return 7680 * 2; + case 3: + case 7: + return 1920 * 2; + } +} + +static u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev) +{ + u32 tmp = RREG32(MC_SHARED_CHMAP); + + switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { + case 0: + default: + return 1; + case 1: + return 2; + case 2: + return 4; + case 3: + return 8; + } +} + +struct evergreen_wm_params { + u32 dram_channels; /* number of dram channels */ + u32 yclk; /* bandwidth per dram data pin in kHz */ + u32 sclk; /* engine clock in kHz */ + u32 disp_clk; /* display clock in kHz */ + u32 src_width; /* viewport width */ + u32 active_time; /* active display time in ns */ + u32 blank_time; /* blank time in ns */ + bool interlaced; /* mode is interlaced */ + fixed20_12 vsc; /* vertical scale ratio */ + u32 num_heads; /* number of active crtcs */ + u32 bytes_per_pixel; /* bytes per pixel display + overlay */ + u32 lb_size; /* line buffer allocated to pipe */ + u32 vtaps; /* vertical scaler taps */ +}; + +static u32 evergreen_dram_bandwidth(struct evergreen_wm_params *wm) +{ + /* Calculate DRAM Bandwidth and the part allocated to display. */ + fixed20_12 dram_efficiency; /* 0.7 */ + fixed20_12 yclk, dram_channels, bandwidth; + fixed20_12 a; + + a.full = dfixed_const(1000); + yclk.full = dfixed_const(wm->yclk); + yclk.full = dfixed_div(yclk, a); + dram_channels.full = dfixed_const(wm->dram_channels * 4); + a.full = dfixed_const(10); + dram_efficiency.full = dfixed_const(7); + dram_efficiency.full = dfixed_div(dram_efficiency, a); + bandwidth.full = dfixed_mul(dram_channels, yclk); + bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); + + return dfixed_trunc(bandwidth); +} + +static u32 evergreen_dram_bandwidth_for_display(struct evergreen_wm_params *wm) +{ + /* Calculate DRAM Bandwidth and the part allocated to display. */ + fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ + fixed20_12 yclk, dram_channels, bandwidth; + fixed20_12 a; + + a.full = dfixed_const(1000); + yclk.full = dfixed_const(wm->yclk); + yclk.full = dfixed_div(yclk, a); + dram_channels.full = dfixed_const(wm->dram_channels * 4); + a.full = dfixed_const(10); + disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ + disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); + bandwidth.full = dfixed_mul(dram_channels, yclk); + bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); + + return dfixed_trunc(bandwidth); +} + +static u32 evergreen_data_return_bandwidth(struct evergreen_wm_params *wm) +{ + /* Calculate the display Data return Bandwidth */ + fixed20_12 return_efficiency; /* 0.8 */ + fixed20_12 sclk, bandwidth; + fixed20_12 a; + + a.full = dfixed_const(1000); + sclk.full = dfixed_const(wm->sclk); + sclk.full = dfixed_div(sclk, a); + a.full = dfixed_const(10); + return_efficiency.full = dfixed_const(8); + return_efficiency.full = dfixed_div(return_efficiency, a); + a.full = dfixed_const(32); + bandwidth.full = dfixed_mul(a, sclk); + bandwidth.full = dfixed_mul(bandwidth, return_efficiency); + + return dfixed_trunc(bandwidth); +} + +static u32 evergreen_dmif_request_bandwidth(struct evergreen_wm_params *wm) +{ + /* Calculate the DMIF Request Bandwidth */ + fixed20_12 disp_clk_request_efficiency; /* 0.8 */ + fixed20_12 disp_clk, bandwidth; + fixed20_12 a; + + a.full = dfixed_const(1000); + disp_clk.full = dfixed_const(wm->disp_clk); + disp_clk.full = dfixed_div(disp_clk, a); + a.full = dfixed_const(10); + disp_clk_request_efficiency.full = dfixed_const(8); + disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); + a.full = dfixed_const(32); + bandwidth.full = dfixed_mul(a, disp_clk); + bandwidth.full = dfixed_mul(bandwidth, disp_clk_request_efficiency); + + return dfixed_trunc(bandwidth); +} + +static u32 evergreen_available_bandwidth(struct evergreen_wm_params *wm) +{ + /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ + u32 dram_bandwidth = evergreen_dram_bandwidth(wm); + u32 data_return_bandwidth = evergreen_data_return_bandwidth(wm); + u32 dmif_req_bandwidth = evergreen_dmif_request_bandwidth(wm); + + return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); +} + +static u32 evergreen_average_bandwidth(struct evergreen_wm_params *wm) +{ + /* Calculate the display mode Average Bandwidth + * DisplayMode should contain the source and destination dimensions, + * timing, etc. + */ + fixed20_12 bpp; + fixed20_12 line_time; + fixed20_12 src_width; + fixed20_12 bandwidth; + fixed20_12 a; + + a.full = dfixed_const(1000); + line_time.full = dfixed_const(wm->active_time + wm->blank_time); + line_time.full = dfixed_div(line_time, a); + bpp.full = dfixed_const(wm->bytes_per_pixel); + src_width.full = dfixed_const(wm->src_width); + bandwidth.full = dfixed_mul(src_width, bpp); + bandwidth.full = dfixed_mul(bandwidth, wm->vsc); + bandwidth.full = dfixed_div(bandwidth, line_time); + + return dfixed_trunc(bandwidth); +} + +static u32 evergreen_latency_watermark(struct evergreen_wm_params *wm) +{ + /* First calcualte the latency in ns */ + u32 mc_latency = 2000; /* 2000 ns. */ + u32 available_bandwidth = evergreen_available_bandwidth(wm); + u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; + u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; + u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ + u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + + (wm->num_heads * cursor_line_pair_return_time); + u32 latency = mc_latency + other_heads_data_return_time + dc_latency; + u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; + fixed20_12 a, b, c; + + if (wm->num_heads == 0) + return 0; + + a.full = dfixed_const(2); + b.full = dfixed_const(1); + if ((wm->vsc.full > a.full) || + ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || + (wm->vtaps >= 5) || + ((wm->vsc.full >= a.full) && wm->interlaced)) + max_src_lines_per_dst_line = 4; + else + max_src_lines_per_dst_line = 2; + + a.full = dfixed_const(available_bandwidth); + b.full = dfixed_const(wm->num_heads); + a.full = dfixed_div(a, b); + + b.full = dfixed_const(1000); + c.full = dfixed_const(wm->disp_clk); + b.full = dfixed_div(c, b); + c.full = dfixed_const(wm->bytes_per_pixel); + b.full = dfixed_mul(b, c); + + lb_fill_bw = min(dfixed_trunc(a), dfixed_trunc(b)); + + a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); + b.full = dfixed_const(1000); + c.full = dfixed_const(lb_fill_bw); + b.full = dfixed_div(c, b); + a.full = dfixed_div(a, b); + line_fill_time = dfixed_trunc(a); + + if (line_fill_time < wm->active_time) + return latency; + else + return latency + (line_fill_time - wm->active_time); + +} + +static bool evergreen_average_bandwidth_vs_dram_bandwidth_for_display(struct evergreen_wm_params *wm) +{ + if (evergreen_average_bandwidth(wm) <= + (evergreen_dram_bandwidth_for_display(wm) / wm->num_heads)) + return true; + else + return false; +}; + +static bool evergreen_average_bandwidth_vs_available_bandwidth(struct evergreen_wm_params *wm) +{ + if (evergreen_average_bandwidth(wm) <= + (evergreen_available_bandwidth(wm) / wm->num_heads)) + return true; + else + return false; +}; + +static bool evergreen_check_latency_hiding(struct evergreen_wm_params *wm) +{ + u32 lb_partitions = wm->lb_size / wm->src_width; + u32 line_time = wm->active_time + wm->blank_time; + u32 latency_tolerant_lines; + u32 latency_hiding; + fixed20_12 a; + + a.full = dfixed_const(1); + if (wm->vsc.full > a.full) + latency_tolerant_lines = 1; + else { + if (lb_partitions <= (wm->vtaps + 1)) + latency_tolerant_lines = 1; + else + latency_tolerant_lines = 2; + } + + latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); + + if (evergreen_latency_watermark(wm) <= latency_hiding) + return true; + else + return false; +} + +static void evergreen_program_watermarks(struct radeon_device *rdev, + struct radeon_crtc *radeon_crtc, + u32 lb_size, u32 num_heads) +{ + struct drm_display_mode *mode = &radeon_crtc->base.mode; + struct evergreen_wm_params wm; + u32 pixel_period; + u32 line_time = 0; + u32 latency_watermark_a = 0, latency_watermark_b = 0; + u32 priority_a_mark = 0, priority_b_mark = 0; + u32 priority_a_cnt = PRIORITY_OFF; + u32 priority_b_cnt = PRIORITY_OFF; + u32 pipe_offset = radeon_crtc->crtc_id * 16; + u32 tmp, arb_control3; + fixed20_12 a, b, c; + + if (radeon_crtc->base.enabled && num_heads && mode) { + pixel_period = 1000000 / (u32)mode->clock; + line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + priority_a_cnt = 0; + priority_b_cnt = 0; + + wm.yclk = rdev->pm.current_mclk * 10; + wm.sclk = rdev->pm.current_sclk * 10; + wm.disp_clk = mode->clock; + wm.src_width = mode->crtc_hdisplay; + wm.active_time = mode->crtc_hdisplay * pixel_period; + wm.blank_time = line_time - wm.active_time; + wm.interlaced = false; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + wm.interlaced = true; + wm.vsc = radeon_crtc->vsc; + wm.vtaps = 1; + if (radeon_crtc->rmx_type != RMX_OFF) + wm.vtaps = 2; + wm.bytes_per_pixel = 4; /* XXX: get this from fb config */ + wm.lb_size = lb_size; + wm.dram_channels = evergreen_get_number_of_dram_channels(rdev); + wm.num_heads = num_heads; + + /* set for high clocks */ + latency_watermark_a = min(evergreen_latency_watermark(&wm), (u32)65535); + /* set for low clocks */ + /* wm.yclk = low clk; wm.sclk = low clk */ + latency_watermark_b = min(evergreen_latency_watermark(&wm), (u32)65535); + + /* possibly force display priority to high */ + /* should really do this at mode validation time... */ + if (!evergreen_average_bandwidth_vs_dram_bandwidth_for_display(&wm) || + !evergreen_average_bandwidth_vs_available_bandwidth(&wm) || + !evergreen_check_latency_hiding(&wm) || + (rdev->disp_priority == 2)) { + DRM_INFO("force priority to high\n"); + priority_a_cnt |= PRIORITY_ALWAYS_ON; + priority_b_cnt |= PRIORITY_ALWAYS_ON; + } + + a.full = dfixed_const(1000); + b.full = dfixed_const(mode->clock); + b.full = dfixed_div(b, a); + c.full = dfixed_const(latency_watermark_a); + c.full = dfixed_mul(c, b); + c.full = dfixed_mul(c, radeon_crtc->hsc); + c.full = dfixed_div(c, a); + a.full = dfixed_const(16); + c.full = dfixed_div(c, a); + priority_a_mark = dfixed_trunc(c); + priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK; + + a.full = dfixed_const(1000); + b.full = dfixed_const(mode->clock); + b.full = dfixed_div(b, a); + c.full = dfixed_const(latency_watermark_b); + c.full = dfixed_mul(c, b); + c.full = dfixed_mul(c, radeon_crtc->hsc); + c.full = dfixed_div(c, a); + a.full = dfixed_const(16); + c.full = dfixed_div(c, a); + priority_b_mark = dfixed_trunc(c); + priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK; + } + + /* select wm A */ + arb_control3 = RREG32(PIPE0_ARBITRATION_CONTROL3 + pipe_offset); + tmp = arb_control3; + tmp &= ~LATENCY_WATERMARK_MASK(3); + tmp |= LATENCY_WATERMARK_MASK(1); + WREG32(PIPE0_ARBITRATION_CONTROL3 + pipe_offset, tmp); + WREG32(PIPE0_LATENCY_CONTROL + pipe_offset, + (LATENCY_LOW_WATERMARK(latency_watermark_a) | + LATENCY_HIGH_WATERMARK(line_time))); + /* select wm B */ + tmp = RREG32(PIPE0_ARBITRATION_CONTROL3 + pipe_offset); + tmp &= ~LATENCY_WATERMARK_MASK(3); + tmp |= LATENCY_WATERMARK_MASK(2); + WREG32(PIPE0_ARBITRATION_CONTROL3 + pipe_offset, tmp); + WREG32(PIPE0_LATENCY_CONTROL + pipe_offset, + (LATENCY_LOW_WATERMARK(latency_watermark_b) | + LATENCY_HIGH_WATERMARK(line_time))); + /* restore original selection */ + WREG32(PIPE0_ARBITRATION_CONTROL3 + pipe_offset, arb_control3); + + /* write the priority marks */ + WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt); + WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt); + +} + void evergreen_bandwidth_update(struct radeon_device *rdev) { - /* XXX */ + struct drm_display_mode *mode0 = NULL; + struct drm_display_mode *mode1 = NULL; + u32 num_heads = 0, lb_size; + int i; + + radeon_update_display_priority(rdev); + + for (i = 0; i < rdev->num_crtc; i++) { + if (rdev->mode_info.crtcs[i]->base.enabled) + num_heads++; + } + for (i = 0; i < rdev->num_crtc; i += 2) { + mode0 = &rdev->mode_info.crtcs[i]->base.mode; + mode1 = &rdev->mode_info.crtcs[i+1]->base.mode; + lb_size = evergreen_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1); + evergreen_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); + lb_size = evergreen_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0); + evergreen_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads); + } } static int evergreen_mc_wait_for_idle(struct radeon_device *rdev) @@ -677,7 +1113,7 @@ static int evergreen_cp_load_microcode(struct radeon_device *rdev) static int evergreen_cp_start(struct radeon_device *rdev) { - int r; + int r, i; uint32_t cp_me; r = radeon_ring_lock(rdev, 7); @@ -697,16 +1133,39 @@ static int evergreen_cp_start(struct radeon_device *rdev) cp_me = 0xff; WREG32(CP_ME_CNTL, cp_me); - r = radeon_ring_lock(rdev, 4); + r = radeon_ring_lock(rdev, evergreen_default_size + 15); if (r) { DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); return r; } - /* init some VGT regs */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - radeon_ring_write(rdev, (VGT_VERTEX_REUSE_BLOCK_CNTL - PACKET3_SET_CONTEXT_REG_START) >> 2); - radeon_ring_write(rdev, 0xe); - radeon_ring_write(rdev, 0x10); + + /* setup clear context state */ + radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(rdev, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + for (i = 0; i < evergreen_default_size; i++) + radeon_ring_write(rdev, evergreen_default_state[i]); + + radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + radeon_ring_write(rdev, PACKET3_PREAMBLE_END_CLEAR_STATE); + + /* set clear context state */ + radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); + radeon_ring_write(rdev, 0); + + /* SQ_VTX_BASE_VTX_LOC */ + radeon_ring_write(rdev, 0xc0026f00); + radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(rdev, 0x00000000); + + /* Clear consts */ + radeon_ring_write(rdev, 0xc0036f00); + radeon_ring_write(rdev, 0x00000bc4); + radeon_ring_write(rdev, 0xffffffff); + radeon_ring_write(rdev, 0xffffffff); + radeon_ring_write(rdev, 0xffffffff); + radeon_ring_unlock_commit(rdev); return 0; @@ -731,7 +1190,7 @@ int evergreen_cp_resume(struct radeon_device *rdev) /* Set ring buffer size */ rb_bufsz = drm_order(rdev->cp.ring_size / 8); - tmp = RB_NO_UPDATE | (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif @@ -745,8 +1204,19 @@ int evergreen_cp_resume(struct radeon_device *rdev) WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); WREG32(CP_RB_RPTR_WR, 0); WREG32(CP_RB_WPTR, 0); - WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF); - WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr)); + + /* set the wb address wether it's enabled or not */ + WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); + WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); + WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); + + if (rdev->wb.enabled) + WREG32(SCRATCH_UMSK, 0xff); + else { + tmp |= RB_NO_UPDATE; + WREG32(SCRATCH_UMSK, 0); + } + mdelay(1); WREG32(CP_RB_CNTL, tmp); @@ -1584,6 +2054,7 @@ int evergreen_irq_set(struct radeon_device *rdev) if (rdev->irq.sw_int) { DRM_DEBUG("evergreen_irq_set: sw int\n"); cp_int_cntl |= RB_INT_ENABLE; + cp_int_cntl |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0]) { DRM_DEBUG("evergreen_irq_set: vblank 0\n"); @@ -1760,8 +2231,10 @@ static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev) { u32 wptr, tmp; - /* XXX use writeback */ - wptr = RREG32(IH_RB_WPTR); + if (rdev->wb.enabled) + wptr = rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]; + else + wptr = RREG32(IH_RB_WPTR); if (wptr & RB_OVERFLOW) { /* When a ring buffer overflow happen start parsing interrupt @@ -2000,6 +2473,7 @@ restart_ih: break; case 181: /* CP EOP event */ DRM_DEBUG("IH: CP EOP\n"); + radeon_fence_process(rdev); break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: CP EOP\n"); @@ -2048,26 +2522,18 @@ static int evergreen_startup(struct radeon_device *rdev) return r; } evergreen_gpu_init(rdev); -#if 0 - if (!rdev->r600_blit.shader_obj) { - r = r600_blit_init(rdev); - if (r) { - DRM_ERROR("radeon: failed blitter (%d).\n", r); - return r; - } - } - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->r600_blit.shader_gpu_addr); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); + r = evergreen_blit_init(rdev); if (r) { - DRM_ERROR("failed to pin blit object %d\n", r); - return r; + evergreen_blit_fini(rdev); + rdev->asic->copy = NULL; + dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); } -#endif + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; /* Enable IRQ */ r = r600_irq_init(rdev); @@ -2087,8 +2553,6 @@ static int evergreen_startup(struct radeon_device *rdev) r = evergreen_cp_resume(rdev); if (r) return r; - /* write back buffer are not vital so don't worry about failure */ - r600_wb_enable(rdev); return 0; } @@ -2122,23 +2586,43 @@ int evergreen_resume(struct radeon_device *rdev) int evergreen_suspend(struct radeon_device *rdev) { -#if 0 int r; -#endif + /* FIXME: we should wait for ring to be empty */ r700_cp_stop(rdev); rdev->cp.ready = false; evergreen_irq_suspend(rdev); - r600_wb_disable(rdev); + radeon_wb_disable(rdev); evergreen_pcie_gart_disable(rdev); -#if 0 + /* unpin shaders bo */ r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); if (likely(r == 0)) { radeon_bo_unpin(rdev->r600_blit.shader_obj); radeon_bo_unreserve(rdev->r600_blit.shader_obj); } -#endif + + return 0; +} + +int evergreen_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence) +{ + int r; + + mutex_lock(&rdev->r600_blit.mutex); + rdev->r600_blit.vb_ib = NULL; + r = evergreen_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE); + if (r) { + if (rdev->r600_blit.vb_ib) + radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); + mutex_unlock(&rdev->r600_blit.mutex); + return r; + } + evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE); + evergreen_blit_done_copy(rdev, fence); + mutex_unlock(&rdev->r600_blit.mutex); return 0; } @@ -2246,8 +2730,8 @@ int evergreen_init(struct radeon_device *rdev) if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r700_cp_fini(rdev); - r600_wb_fini(rdev); r600_irq_fini(rdev); + radeon_wb_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); rdev->accel_working = false; @@ -2269,10 +2753,10 @@ int evergreen_init(struct radeon_device *rdev) void evergreen_fini(struct radeon_device *rdev) { - /*r600_blit_fini(rdev);*/ + evergreen_blit_fini(rdev); r700_cp_fini(rdev); - r600_wb_fini(rdev); r600_irq_fini(rdev); + radeon_wb_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c new file mode 100644 index 0000000..086b9b0 --- /dev/null +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -0,0 +1,772 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + */ + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon.h" + +#include "evergreend.h" +#include "evergreen_blit_shaders.h" + +#define DI_PT_RECTLIST 0x11 +#define DI_INDEX_SIZE_16_BIT 0x0 +#define DI_SRC_SEL_AUTO_INDEX 0x2 + +#define FMT_8 0x1 +#define FMT_5_6_5 0x8 +#define FMT_8_8_8_8 0x1a +#define COLOR_8 0x1 +#define COLOR_5_6_5 0x8 +#define COLOR_8_8_8_8 0x1a + +/* emits 17 */ +static void +set_render_target(struct radeon_device *rdev, int format, + int w, int h, u64 gpu_addr) +{ + u32 cb_color_info; + int pitch, slice; + + h = ALIGN(h, 8); + if (h < 8) + h = 8; + + cb_color_info = ((format << 2) | (1 << 24)); + pitch = (w / 8) - 1; + slice = ((w * h) / 64) - 1; + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 15)); + radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, pitch); + radeon_ring_write(rdev, slice); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, cb_color_info); + radeon_ring_write(rdev, (1 << 4)); + radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); +} + +/* emits 5dw */ +static void +cp_set_surface_sync(struct radeon_device *rdev, + u32 sync_type, u32 size, + u64 mc_addr) +{ + u32 cp_coher_size; + + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(rdev, sync_type); + radeon_ring_write(rdev, cp_coher_size); + radeon_ring_write(rdev, mc_addr >> 8); + radeon_ring_write(rdev, 10); /* poll interval */ +} + +/* emits 11dw + 1 surface sync = 16dw */ +static void +set_shaders(struct radeon_device *rdev) +{ + u64 gpu_addr; + + /* VS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 3)); + radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, 2); + radeon_ring_write(rdev, 0); + + /* PS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 4)); + radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, 1); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 2); + + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; + cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); +} + +/* emits 10 + 1 sync (5) = 15 */ +static void +set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) +{ + u32 sq_vtx_constant_word2, sq_vtx_constant_word3; + + /* high addr, stride */ + sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); + /* xyzw swizzles */ + sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); + radeon_ring_write(rdev, 0x580); + radeon_ring_write(rdev, gpu_addr & 0xffffffff); + radeon_ring_write(rdev, 48 - 1); /* size */ + radeon_ring_write(rdev, sq_vtx_constant_word2); + radeon_ring_write(rdev, sq_vtx_constant_word3); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); + + if (rdev->family == CHIP_CEDAR) + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, 48, gpu_addr); + else + cp_set_surface_sync(rdev, + PACKET3_VC_ACTION_ENA, 48, gpu_addr); + +} + +/* emits 10 */ +static void +set_tex_resource(struct radeon_device *rdev, + int format, int w, int h, int pitch, + u64 gpu_addr) +{ + u32 sq_tex_resource_word0, sq_tex_resource_word1; + u32 sq_tex_resource_word4, sq_tex_resource_word7; + + if (h < 1) + h = 1; + + sq_tex_resource_word0 = (1 << 0); /* 2D */ + sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | + ((w - 1) << 18)); + sq_tex_resource_word1 = ((h - 1) << 0); + /* xyzw swizzles */ + sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25); + + sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_tex_resource_word0); + radeon_ring_write(rdev, sq_tex_resource_word1); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, sq_tex_resource_word4); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_tex_resource_word7); +} + +/* emits 12 */ +static void +set_scissors(struct radeon_device *rdev, int x1, int y1, + int x2, int y2) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); +} + +/* emits 10 */ +static void +draw_auto(struct radeon_device *rdev) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(rdev, DI_PT_RECTLIST); + + radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0)); + radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT); + + radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0)); + radeon_ring_write(rdev, 1); + + radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); + radeon_ring_write(rdev, 3); + radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX); + +} + +/* emits 30 */ +static void +set_default_state(struct radeon_device *rdev) +{ + u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; + u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; + u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; + int num_ps_gprs, num_vs_gprs, num_temp_gprs; + int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs; + int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; + int num_hs_threads, num_ls_threads; + int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; + int num_hs_stack_entries, num_ls_stack_entries; + + switch (rdev->family) { + case CHIP_CEDAR: + default: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 16; + num_gs_threads = 16; + num_es_threads = 16; + num_hs_threads = 16; + num_ls_threads = 16; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_REDWOOD: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_JUNIPER: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + } + + if (rdev->family == CHIP_CEDAR) + sq_config = 0; + else + sq_config = VC_ENABLE; + + sq_config |= (EXPORT_SRC_C | + CS_PRIO(0) | + LS_PRIO(0) | + HS_PRIO(0) | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | + NUM_VS_GPRS(num_vs_gprs) | + NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | + NUM_ES_GPRS(num_es_gprs)); + sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) | + NUM_LS_GPRS(num_ls_gprs)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | + NUM_VS_THREADS(num_vs_threads) | + NUM_GS_THREADS(num_gs_threads) | + NUM_ES_THREADS(num_es_threads)); + sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) | + NUM_LS_THREADS(num_ls_threads)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | + NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); + + /* set clear context state */ + radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); + radeon_ring_write(rdev, 0); + + /* disable dyn gprs */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(rdev, 0); + + /* SQ config */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11)); + radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(rdev, sq_config); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_3); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_thread_resource_mgmt); + radeon_ring_write(rdev, sq_thread_resource_mgmt_2); + radeon_ring_write(rdev, sq_stack_resource_mgmt_1); + radeon_ring_write(rdev, sq_stack_resource_mgmt_2); + radeon_ring_write(rdev, sq_stack_resource_mgmt_3); + + /* CONTEXT_CONTROL */ + radeon_ring_write(rdev, 0xc0012800); + radeon_ring_write(rdev, 0x80000000); + radeon_ring_write(rdev, 0x80000000); + + /* SQ_VTX_BASE_VTX_LOC */ + radeon_ring_write(rdev, 0xc0026f00); + radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(rdev, 0x00000000); + + /* SET_SAMPLER */ + radeon_ring_write(rdev, 0xc0036e00); + radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(rdev, 0x00000012); + radeon_ring_write(rdev, 0x00000000); + radeon_ring_write(rdev, 0x00000000); + +} + +static inline uint32_t i2f(uint32_t input) +{ + u32 result, i, exponent, fraction; + + if ((input & 0x3fff) == 0) + result = 0; /* 0 is a special case */ + else { + exponent = 140; /* exponent biased by 127; */ + fraction = (input & 0x3fff) << 10; /* cheat and only + handle numbers below 2^^15 */ + for (i = 0; i < 14; i++) { + if (fraction & 0x800000) + break; + else { + fraction = fraction << 1; /* keep + shifting left until top bit = 1 */ + exponent = exponent - 1; + } + } + result = exponent << 23 | (fraction & 0x7fffff); /* mask + off top bit; assumed 1 */ + } + return result; +} + +int evergreen_blit_init(struct radeon_device *rdev) +{ + u32 obj_size; + int r; + void *ptr; + + /* pin copy shader into vram if already initialized */ + if (rdev->r600_blit.shader_obj) + goto done; + + mutex_init(&rdev->r600_blit.mutex); + rdev->r600_blit.state_offset = 0; + rdev->r600_blit.state_len = 0; + obj_size = 0; + + rdev->r600_blit.vs_offset = obj_size; + obj_size += evergreen_vs_size * 4; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_blit.ps_offset = obj_size; + obj_size += evergreen_ps_size * 4; + obj_size = ALIGN(obj_size, 256); + + r = radeon_bo_create(rdev, NULL, obj_size, true, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_obj); + if (r) { + DRM_ERROR("evergreen failed to allocate shader\n"); + return r; + } + + DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n", + obj_size, + rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); + + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr); + if (r) { + DRM_ERROR("failed to map blit object %d\n", r); + return r; + } + + memcpy(ptr + rdev->r600_blit.vs_offset, evergreen_vs, evergreen_vs_size * 4); + memcpy(ptr + rdev->r600_blit.ps_offset, evergreen_ps, evergreen_ps_size * 4); + radeon_bo_kunmap(rdev->r600_blit.shader_obj); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + +done: + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_gpu_addr); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + if (r) { + dev_err(rdev->dev, "(%d) pin blit object failed\n", r); + return r; + } + return 0; +} + +void evergreen_blit_fini(struct radeon_device *rdev) +{ + int r; + + if (rdev->r600_blit.shader_obj == NULL) + return; + /* If we can't reserve the bo, unref should be enough to destroy + * it when it becomes idle. + */ + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (!r) { + radeon_bo_unpin(rdev->r600_blit.shader_obj); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + } + radeon_bo_unref(&rdev->r600_blit.shader_obj); +} + +static int evergreen_vb_ib_get(struct radeon_device *rdev) +{ + int r; + r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); + if (r) { + DRM_ERROR("failed to get IB for vertex buffer\n"); + return r; + } + + rdev->r600_blit.vb_total = 64*1024; + rdev->r600_blit.vb_used = 0; + return 0; +} + +static void evergreen_vb_ib_put(struct radeon_device *rdev) +{ + radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); + radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); +} + +int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) +{ + int r; + int ring_size, line_size; + int max_size; + /* loops of emits + fence emit possible */ + int dwords_per_loop = 74, num_loops; + + r = evergreen_vb_ib_get(rdev); + if (r) + return r; + + /* 8 bpp vs 32 bpp for xfer unit */ + if (size_bytes & 3) + line_size = 8192; + else + line_size = 8192 * 4; + + max_size = 8192 * line_size; + + /* major loops cover the max size transfer */ + num_loops = ((size_bytes + max_size) / max_size); + /* minor loops cover the extra non aligned bits */ + num_loops += ((size_bytes % line_size) ? 1 : 0); + /* calculate number of loops correctly */ + ring_size = num_loops * dwords_per_loop; + /* set default + shaders */ + ring_size += 46; /* shaders + def state */ + ring_size += 10; /* fence emit for VB IB */ + ring_size += 5; /* done copy */ + ring_size += 10; /* fence emit for done copy */ + r = radeon_ring_lock(rdev, ring_size); + if (r) + return r; + + set_default_state(rdev); /* 30 */ + set_shaders(rdev); /* 16 */ + return 0; +} + +void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) +{ + int r; + + if (rdev->r600_blit.vb_ib) + evergreen_vb_ib_put(rdev); + + if (fence) + r = radeon_fence_emit(rdev, fence); + + radeon_ring_unlock_commit(rdev); +} + +void evergreen_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes) +{ + int max_bytes; + u64 vb_gpu_addr; + u32 *vb; + + DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, + size_bytes, rdev->r600_blit.vb_used); + vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); + if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { + max_bytes = 8192; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = src_gpu_addr & 255; + int dst_x = dst_gpu_addr & 255; + int h = 1; + src_gpu_addr = src_gpu_addr & ~255ULL; + dst_gpu_addr = dst_gpu_addr & ~255ULL; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + } + + vb[0] = i2f(dst_x); + vb[1] = 0; + vb[2] = i2f(src_x); + vb[3] = 0; + + vb[4] = i2f(dst_x); + vb[5] = i2f(h); + vb[6] = i2f(src_x); + vb[7] = i2f(h); + + vb[8] = i2f(dst_x + cur_size); + vb[9] = i2f(h); + vb[10] = i2f(src_x + cur_size); + vb[11] = i2f(h); + + /* src 10 */ + set_tex_resource(rdev, FMT_8, + src_x + cur_size, h, src_x + cur_size, + src_gpu_addr); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + + /* dst 17 */ + set_render_target(rdev, COLOR_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); + + /* 15 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } else { + max_bytes = 8192 * 4; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = (src_gpu_addr & 255); + int dst_x = (dst_gpu_addr & 255); + int h = 1; + src_gpu_addr = src_gpu_addr & ~255ULL; + dst_gpu_addr = dst_gpu_addr & ~255ULL; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + } + + vb[0] = i2f(dst_x / 4); + vb[1] = 0; + vb[2] = i2f(src_x / 4); + vb[3] = 0; + + vb[4] = i2f(dst_x / 4); + vb[5] = i2f(h); + vb[6] = i2f(src_x / 4); + vb[7] = i2f(h); + + vb[8] = i2f((dst_x + cur_size) / 4); + vb[9] = i2f(h); + vb[10] = i2f((src_x + cur_size) / 4); + vb[11] = i2f(h); + + /* src 10 */ + set_tex_resource(rdev, FMT_8_8_8_8, + (src_x + cur_size) / 4, + h, (src_x + cur_size) / 4, + src_gpu_addr); + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst 17 */ + set_render_target(rdev, COLOR_8_8_8_8, + (dst_x + cur_size) / 4, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); + + /* Vertex buffer setup 15 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + /* 74 ring dwords per loop */ + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } +} + diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c new file mode 100644 index 0000000..ef1d28c --- /dev/null +++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c @@ -0,0 +1,348 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + */ + +#include <linux/types.h> +#include <linux/kernel.h> + +/* + * evergreen cards need to use the 3D engine to blit data which requires + * quite a bit of hw state setup. Rather than pull the whole 3D driver + * (which normally generates the 3D state) into the DRM, we opt to use + * statically generated state tables. The regsiter state and shaders + * were hand generated to support blitting functionality. See the 3D + * driver or documentation for descriptions of the registers and + * shader instructions. + */ + +const u32 evergreen_default_state[] = +{ + 0xc0016900, + 0x0000023b, + 0x00000000, /* SQ_LDS_ALLOC_PS */ + + 0xc0066900, + 0x00000240, + 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0046900, + 0x00000247, + 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026900, + 0x00000010, + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + 0xc0016900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ + + 0xc0026900, + 0x0000000a, + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0016900, + 0x000000d4, + 0x00000000, /* SX_MISC */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0106900, + 0x00000300, + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_0 */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_7 */ + 0xffffffff, /* PA_SC_AA_MASK */ + + 0xc00d6900, + 0x00000202, + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* SQ_DYN_GPR_RESOURCE_LIMIT_1 */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0016900, + 0x00000229, + 0x00000000, /* SQ_PGM_START_FS */ + + 0xc0016900, + 0x0000022a, + 0x00000000, /* SQ_PGM_RESOURCES_FS */ + + 0xc0096900, + 0x00000100, + 0x00ffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* SX_ALPHA_TEST_CONTROL */ + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, /* */ + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, /* */ + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* VGT_GS_MODE */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, /* */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc0016900, + 0x000001b1, + 0x00000000, /* SPI_VS_OUT_CONFIG */ + + 0xc0016900, + 0x00000187, + 0x00000000, /* SPI_VS_OUT_ID_0 */ + + 0xc0016900, + 0x00000191, + 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ + + 0xc00b6900, + 0x000001b3, + 0x20000001, /* SPI_PS_IN_CONTROL_0 */ + 0x00000000, /* SPI_PS_IN_CONTROL_1 */ + 0x00000000, /* SPI_INTERP_CONTROL_0 */ + 0x00000000, /* SPI_INPUT_Z */ + 0x00000000, /* SPI_FOG_CNTL */ + 0x00100000, /* SPI_BARYC_CNTL */ + 0x00000000, /* SPI_PS_IN_CONTROL_2 */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0026900, + 0x00000316, + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + 0x00000010, /* */ +}; + +const u32 evergreen_vs[] = +{ + 0x00000004, + 0x80800400, + 0x0000a03c, + 0x95000688, + 0x00004000, + 0x15200688, + 0x00000000, + 0x00000000, + 0x3c000000, + 0x67961001, + 0x00080000, + 0x00000000, + 0x1c000000, + 0x67961000, + 0x00000008, + 0x00000000, +}; + +const u32 evergreen_ps[] = +{ + 0x00000003, + 0xa00c0000, + 0x00000008, + 0x80400000, + 0x00000000, + 0x95200688, + 0x00380400, + 0x00146b10, + 0x00380000, + 0x20146b10, + 0x00380400, + 0x40146b00, + 0x80380000, + 0x60146b00, + 0x00000000, + 0x00000000, + 0x00000010, + 0x000d1000, + 0xb0800000, + 0x00000000, +}; + +const u32 evergreen_ps_size = ARRAY_SIZE(evergreen_ps); +const u32 evergreen_vs_size = ARRAY_SIZE(evergreen_vs); +const u32 evergreen_default_size = ARRAY_SIZE(evergreen_default_state); diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.h b/drivers/gpu/drm/radeon/evergreen_blit_shaders.h new file mode 100644 index 0000000..bb8d6c7 --- /dev/null +++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.h @@ -0,0 +1,35 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef EVERGREEN_BLIT_SHADERS_H +#define EVERGREEN_BLIT_SHADERS_H + +extern const u32 evergreen_ps[]; +extern const u32 evergreen_vs[]; +extern const u32 evergreen_default_state[]; + +extern const u32 evergreen_ps_size, evergreen_vs_size; +extern const u32 evergreen_default_size; + +#endif diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 9b7532d..113c70c 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -412,6 +412,19 @@ #define SOFT_RESET_REGBB (1 << 22) #define SOFT_RESET_ORB (1 << 23) +/* display watermarks */ +#define DC_LB_MEMORY_SPLIT 0x6b0c +#define PRIORITY_A_CNT 0x6b18 +#define PRIORITY_MARK_MASK 0x7fff +#define PRIORITY_OFF (1 << 16) +#define PRIORITY_ALWAYS_ON (1 << 20) +#define PRIORITY_B_CNT 0x6b1c +#define PIPE0_ARBITRATION_CONTROL3 0x0bf0 +# define LATENCY_WATERMARK_MASK(x) ((x) << 16) +#define PIPE0_LATENCY_CONTROL 0x0bf4 +# define LATENCY_LOW_WATERMARK(x) ((x) << 0) +# define LATENCY_HIGH_WATERMARK(x) ((x) << 16) + #define IH_RB_CNTL 0x3e00 # define IH_RB_ENABLE (1 << 0) # define IH_IB_SIZE(x) ((x) << 1) /* log2 */ @@ -645,6 +658,8 @@ #define PACKET3_EVENT_WRITE_EOP 0x47 #define PACKET3_EVENT_WRITE_EOS 0x48 #define PACKET3_PREAMBLE_CNTL 0x4A +# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) +# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) #define PACKET3_RB_OFFSET 0x4B #define PACKET3_ALU_PS_CONST_BUFFER_COPY 0x4C #define PACKET3_ALU_VS_CONST_BUFFER_COPY 0x4D @@ -802,6 +817,11 @@ #define SQ_ALU_CONST_CACHE_LS_14 0x28f78 #define SQ_ALU_CONST_CACHE_LS_15 0x28f7c +#define PA_SC_SCREEN_SCISSOR_TL 0x28030 +#define PA_SC_GENERIC_SCISSOR_TL 0x28240 +#define PA_SC_WINDOW_SCISSOR_TL 0x28204 +#define VGT_PRIMITIVE_TYPE 0x8958 + #define DB_DEPTH_CONTROL 0x28800 #define DB_DEPTH_VIEW 0x28008 #define DB_HTILE_DATA_BASE 0x28014 diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index e594223..6d1540c 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -675,67 +675,6 @@ void r100_fence_ring_emit(struct radeon_device *rdev, radeon_ring_write(rdev, RADEON_SW_INT_FIRE); } -int r100_wb_init(struct radeon_device *rdev) -{ - int r; - - if (rdev->wb.wb_obj == NULL) { - r = radeon_bo_create(rdev, NULL, RADEON_GPU_PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, - &rdev->wb.wb_obj); - if (r) { - dev_err(rdev->dev, "(%d) create WB buffer failed\n", r); - return r; - } - r = radeon_bo_reserve(rdev->wb.wb_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_GTT, - &rdev->wb.gpu_addr); - if (r) { - dev_err(rdev->dev, "(%d) pin WB buffer failed\n", r); - radeon_bo_unreserve(rdev->wb.wb_obj); - return r; - } - r = radeon_bo_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); - radeon_bo_unreserve(rdev->wb.wb_obj); - if (r) { - dev_err(rdev->dev, "(%d) map WB buffer failed\n", r); - return r; - } - } - WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr); - WREG32(R_00070C_CP_RB_RPTR_ADDR, - S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + 1024) >> 2)); - WREG32(R_000770_SCRATCH_UMSK, 0xff); - return 0; -} - -void r100_wb_disable(struct radeon_device *rdev) -{ - WREG32(R_000770_SCRATCH_UMSK, 0); -} - -void r100_wb_fini(struct radeon_device *rdev) -{ - int r; - - r100_wb_disable(rdev); - if (rdev->wb.wb_obj) { - r = radeon_bo_reserve(rdev->wb.wb_obj, false); - if (unlikely(r != 0)) { - dev_err(rdev->dev, "(%d) can't finish WB\n", r); - return; - } - radeon_bo_kunmap(rdev->wb.wb_obj); - radeon_bo_unpin(rdev->wb.wb_obj); - radeon_bo_unreserve(rdev->wb.wb_obj); - radeon_bo_unref(&rdev->wb.wb_obj); - rdev->wb.wb = NULL; - rdev->wb.wb_obj = NULL; - } -} - int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, @@ -996,20 +935,32 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) WREG32(0x718, pre_write_timer | (pre_write_limit << 28)); tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | REG_SET(RADEON_RB_BLKSZ, rb_blksz) | - REG_SET(RADEON_MAX_FETCH, max_fetch) | - RADEON_RB_NO_UPDATE); + REG_SET(RADEON_MAX_FETCH, max_fetch)); #ifdef __BIG_ENDIAN tmp |= RADEON_BUF_SWAP_32BIT; #endif - WREG32(RADEON_CP_RB_CNTL, tmp); + WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE); /* Set ring address */ DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr); WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr); /* Force read & write ptr to 0 */ - WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); + WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE); WREG32(RADEON_CP_RB_RPTR_WR, 0); WREG32(RADEON_CP_RB_WPTR, 0); + + /* set the wb address whether it's enabled or not */ + WREG32(R_00070C_CP_RB_RPTR_ADDR, + S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2)); + WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET); + + if (rdev->wb.enabled) + WREG32(R_000770_SCRATCH_UMSK, 0xff); + else { + tmp |= RADEON_RB_NO_UPDATE; + WREG32(R_000770_SCRATCH_UMSK, 0); + } + WREG32(RADEON_CP_RB_CNTL, tmp); udelay(10); rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); @@ -1052,6 +1003,7 @@ void r100_cp_disable(struct radeon_device *rdev) rdev->cp.ready = false; WREG32(RADEON_CP_CSQ_MODE, 0); WREG32(RADEON_CP_CSQ_CNTL, 0); + WREG32(R_000770_SCRATCH_UMSK, 0); if (r100_gui_wait_for_idle(rdev)) { printk(KERN_WARNING "Failed to wait GUI idle while " "programming pipes. Bad things might happen.\n"); @@ -2318,6 +2270,9 @@ void r100_vram_init_sizes(struct radeon_device *rdev) /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - * Novell bug 204882 + along with lots of ubuntu ones */ + if (rdev->mc.aper_size > config_aper_size) + config_aper_size = rdev->mc.aper_size; + if (config_aper_size > rdev->mc.real_vram_size) rdev->mc.mc_vram_size = config_aper_size; else @@ -3737,6 +3692,12 @@ static int r100_startup(struct radeon_device *rdev) if (r) return r; } + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ r100_irq_set(rdev); rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -3746,9 +3707,6 @@ static int r100_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failled initializing CP (%d).\n", r); return r; } - r = r100_wb_init(rdev); - if (r) - dev_err(rdev->dev, "failled initializing WB (%d).\n", r); r = r100_ib_init(rdev); if (r) { dev_err(rdev->dev, "failled initializing IB (%d).\n", r); @@ -3782,7 +3740,7 @@ int r100_resume(struct radeon_device *rdev) int r100_suspend(struct radeon_device *rdev) { r100_cp_disable(rdev); - r100_wb_disable(rdev); + radeon_wb_disable(rdev); r100_irq_disable(rdev); if (rdev->flags & RADEON_IS_PCI) r100_pci_gart_disable(rdev); @@ -3792,7 +3750,7 @@ int r100_suspend(struct radeon_device *rdev) void r100_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_gem_fini(rdev); if (rdev->flags & RADEON_IS_PCI) @@ -3905,7 +3863,7 @@ int r100_init(struct radeon_device *rdev) /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCI) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index c827738..34527e6 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -1332,6 +1332,12 @@ static int r300_startup(struct radeon_device *rdev) if (r) return r; } + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ r100_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -1341,9 +1347,6 @@ static int r300_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failled initializing CP (%d).\n", r); return r; } - r = r100_wb_init(rdev); - if (r) - dev_err(rdev->dev, "failled initializing WB (%d).\n", r); r = r100_ib_init(rdev); if (r) { dev_err(rdev->dev, "failled initializing IB (%d).\n", r); @@ -1379,7 +1382,7 @@ int r300_resume(struct radeon_device *rdev) int r300_suspend(struct radeon_device *rdev) { r100_cp_disable(rdev); - r100_wb_disable(rdev); + radeon_wb_disable(rdev); r100_irq_disable(rdev); if (rdev->flags & RADEON_IS_PCIE) rv370_pcie_gart_disable(rdev); @@ -1391,7 +1394,7 @@ int r300_suspend(struct radeon_device *rdev) void r300_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_gem_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) @@ -1484,7 +1487,7 @@ int r300_init(struct radeon_device *rdev) /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 59f7bcc..c387346 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -248,6 +248,12 @@ static int r420_startup(struct radeon_device *rdev) return r; } r420_pipes_init(rdev); + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ r100_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -258,10 +264,6 @@ static int r420_startup(struct radeon_device *rdev) return r; } r420_cp_errata_init(rdev); - r = r100_wb_init(rdev); - if (r) { - dev_err(rdev->dev, "failled initializing WB (%d).\n", r); - } r = r100_ib_init(rdev); if (r) { dev_err(rdev->dev, "failled initializing IB (%d).\n", r); @@ -302,7 +304,7 @@ int r420_suspend(struct radeon_device *rdev) { r420_cp_errata_fini(rdev); r100_cp_disable(rdev); - r100_wb_disable(rdev); + radeon_wb_disable(rdev); r100_irq_disable(rdev); if (rdev->flags & RADEON_IS_PCIE) rv370_pcie_gart_disable(rdev); @@ -314,7 +316,7 @@ int r420_suspend(struct radeon_device *rdev) void r420_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_gem_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) @@ -418,7 +420,7 @@ int r420_init(struct radeon_device *rdev) /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index 1458dee..3c8677f 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -181,6 +181,12 @@ static int r520_startup(struct radeon_device *rdev) if (r) return r; } + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ rs600_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -190,9 +196,6 @@ static int r520_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failled initializing CP (%d).\n", r); return r; } - r = r100_wb_init(rdev); - if (r) - dev_err(rdev->dev, "failled initializing WB (%d).\n", r); r = r100_ib_init(rdev); if (r) { dev_err(rdev->dev, "failled initializing IB (%d).\n", r); @@ -295,7 +298,7 @@ int r520_init(struct radeon_device *rdev) /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); rv370_pcie_gart_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 7b65e4ef..33952a1 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1608,8 +1608,11 @@ void r600_gpu_init(struct radeon_device *rdev) rdev->config.r600.tiling_npipes = rdev->config.r600.max_tile_pipes; rdev->config.r600.tiling_nbanks = 4 << ((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); - tiling_config |= GROUP_SIZE(0); - rdev->config.r600.tiling_group_size = 256; + tiling_config |= GROUP_SIZE((ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); + if ((ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) + rdev->config.r600.tiling_group_size = 512; + else + rdev->config.r600.tiling_group_size = 256; tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT; if (tmp > 3) { tiling_config |= ROW_TILING(3); @@ -1920,6 +1923,7 @@ void r600_cp_stop(struct radeon_device *rdev) { rdev->mc.active_vram_size = rdev->mc.visible_vram_size; WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); + WREG32(SCRATCH_UMSK, 0); } int r600_init_microcode(struct radeon_device *rdev) @@ -2152,7 +2156,7 @@ int r600_cp_resume(struct radeon_device *rdev) /* Set ring buffer size */ rb_bufsz = drm_order(rdev->cp.ring_size / 8); - tmp = RB_NO_UPDATE | (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; + tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN tmp |= BUF_SWAP_32BIT; #endif @@ -2166,8 +2170,19 @@ int r600_cp_resume(struct radeon_device *rdev) WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); WREG32(CP_RB_RPTR_WR, 0); WREG32(CP_RB_WPTR, 0); - WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF); - WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr)); + + /* set the wb address whether it's enabled or not */ + WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); + WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); + WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); + + if (rdev->wb.enabled) + WREG32(SCRATCH_UMSK, 0xff); + else { + tmp |= RB_NO_UPDATE; + WREG32(SCRATCH_UMSK, 0); + } + mdelay(1); WREG32(CP_RB_CNTL, tmp); @@ -2219,9 +2234,10 @@ void r600_scratch_init(struct radeon_device *rdev) int i; rdev->scratch.num_reg = 7; + rdev->scratch.reg_base = SCRATCH_REG0; for (i = 0; i < rdev->scratch.num_reg; i++) { rdev->scratch.free[i] = true; - rdev->scratch.reg[i] = SCRATCH_REG0 + (i * 4); + rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); } } @@ -2265,88 +2281,34 @@ int r600_ring_test(struct radeon_device *rdev) return r; } -void r600_wb_disable(struct radeon_device *rdev) -{ - int r; - - WREG32(SCRATCH_UMSK, 0); - if (rdev->wb.wb_obj) { - r = radeon_bo_reserve(rdev->wb.wb_obj, false); - if (unlikely(r != 0)) - return; - radeon_bo_kunmap(rdev->wb.wb_obj); - radeon_bo_unpin(rdev->wb.wb_obj); - radeon_bo_unreserve(rdev->wb.wb_obj); - } -} - -void r600_wb_fini(struct radeon_device *rdev) -{ - r600_wb_disable(rdev); - if (rdev->wb.wb_obj) { - radeon_bo_unref(&rdev->wb.wb_obj); - rdev->wb.wb = NULL; - rdev->wb.wb_obj = NULL; - } -} - -int r600_wb_enable(struct radeon_device *rdev) -{ - int r; - - if (rdev->wb.wb_obj == NULL) { - r = radeon_bo_create(rdev, NULL, RADEON_GPU_PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, &rdev->wb.wb_obj); - if (r) { - dev_warn(rdev->dev, "(%d) create WB bo failed\n", r); - return r; - } - r = radeon_bo_reserve(rdev->wb.wb_obj, false); - if (unlikely(r != 0)) { - r600_wb_fini(rdev); - return r; - } - r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_GTT, - &rdev->wb.gpu_addr); - if (r) { - radeon_bo_unreserve(rdev->wb.wb_obj); - dev_warn(rdev->dev, "(%d) pin WB bo failed\n", r); - r600_wb_fini(rdev); - return r; - } - r = radeon_bo_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); - radeon_bo_unreserve(rdev->wb.wb_obj); - if (r) { - dev_warn(rdev->dev, "(%d) map WB bo failed\n", r); - r600_wb_fini(rdev); - return r; - } - } - WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF); - WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC); - WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF); - WREG32(SCRATCH_UMSK, 0xff); - return 0; -} - void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { - /* Also consider EVENT_WRITE_EOP. it handles the interrupts + timestamps + events */ - - radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0)); - radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT); - /* wait for 3D idle clean */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); - radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit); - /* Emit fence sequence & fire IRQ */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); - radeon_ring_write(rdev, fence->seq); - /* CP_INTERRUPT packet 3 no longer exists, use packet 0 */ - radeon_ring_write(rdev, PACKET0(CP_INT_STATUS, 0)); - radeon_ring_write(rdev, RB_INT_STAT); + if (rdev->wb.use_event) { + u64 addr = rdev->wb.gpu_addr + R600_WB_EVENT_OFFSET + + (u64)(rdev->fence_drv.scratch_reg - rdev->scratch.reg_base); + /* EVENT_WRITE_EOP - flush caches, send int */ + radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + radeon_ring_write(rdev, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5)); + radeon_ring_write(rdev, addr & 0xffffffff); + radeon_ring_write(rdev, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2)); + radeon_ring_write(rdev, fence->seq); + radeon_ring_write(rdev, 0); + } else { + radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0)); + radeon_ring_write(rdev, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0)); + /* wait for 3D idle clean */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit); + /* Emit fence sequence & fire IRQ */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(rdev, fence->seq); + /* CP_INTERRUPT packet 3 no longer exists, use packet 0 */ + radeon_ring_write(rdev, PACKET0(CP_INT_STATUS, 0)); + radeon_ring_write(rdev, RB_INT_STAT); + } } int r600_copy_blit(struct radeon_device *rdev, @@ -2428,19 +2390,12 @@ int r600_startup(struct radeon_device *rdev) rdev->asic->copy = NULL; dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); } - /* pin copy shader into vram */ - if (rdev->r600_blit.shader_obj) { - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->r600_blit.shader_gpu_addr); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - if (r) { - dev_err(rdev->dev, "(%d) pin blit object failed\n", r); - return r; - } - } + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -2459,8 +2414,7 @@ int r600_startup(struct radeon_device *rdev) r = r600_cp_resume(rdev); if (r) return r; - /* write back buffer are not vital so don't worry about failure */ - r600_wb_enable(rdev); + return 0; } @@ -2519,7 +2473,7 @@ int r600_suspend(struct radeon_device *rdev) r600_cp_stop(rdev); rdev->cp.ready = false; r600_irq_suspend(rdev); - r600_wb_disable(rdev); + radeon_wb_disable(rdev); r600_pcie_gart_disable(rdev); /* unpin shaders bo */ if (rdev->r600_blit.shader_obj) { @@ -2616,8 +2570,8 @@ int r600_init(struct radeon_device *rdev) if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r600_cp_fini(rdev); - r600_wb_fini(rdev); r600_irq_fini(rdev); + radeon_wb_fini(rdev); radeon_irq_kms_fini(rdev); r600_pcie_gart_fini(rdev); rdev->accel_working = false; @@ -2647,8 +2601,8 @@ void r600_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r600_blit_fini(rdev); r600_cp_fini(rdev); - r600_wb_fini(rdev); r600_irq_fini(rdev); + radeon_wb_fini(rdev); radeon_irq_kms_fini(rdev); r600_pcie_gart_fini(rdev); radeon_agp_fini(rdev); @@ -2983,10 +2937,13 @@ int r600_irq_init(struct radeon_device *rdev) ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | IH_WPTR_OVERFLOW_CLEAR | (rb_bufsz << 1)); - /* WPTR writeback, not yet */ - /*ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;*/ - WREG32(IH_RB_WPTR_ADDR_LO, 0); - WREG32(IH_RB_WPTR_ADDR_HI, 0); + + if (rdev->wb.enabled) + ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; + + /* set the writeback address whether it's enabled or not */ + WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); + WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); WREG32(IH_RB_CNTL, ih_rb_cntl); @@ -3070,6 +3027,7 @@ int r600_irq_set(struct radeon_device *rdev) if (rdev->irq.sw_int) { DRM_DEBUG("r600_irq_set: sw int\n"); cp_int_cntl |= RB_INT_ENABLE; + cp_int_cntl |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0]) { DRM_DEBUG("r600_irq_set: vblank 0\n"); @@ -3244,8 +3202,10 @@ static inline u32 r600_get_ih_wptr(struct radeon_device *rdev) { u32 wptr, tmp; - /* XXX use writeback */ - wptr = RREG32(IH_RB_WPTR); + if (rdev->wb.enabled) + wptr = rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]; + else + wptr = RREG32(IH_RB_WPTR); if (wptr & RB_OVERFLOW) { /* When a ring buffer overflow happen start parsing interrupt @@ -3433,6 +3393,7 @@ restart_ih: break; case 181: /* CP EOP event */ DRM_DEBUG("IH: CP EOP\n"); + radeon_fence_process(rdev); break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: CP EOP\n"); diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 3473c00..8362974 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -472,9 +472,10 @@ int r600_blit_init(struct radeon_device *rdev) u32 packet2s[16]; int num_packet2s = 0; - /* don't reinitialize blit */ + /* pin copy shader into vram if already initialized */ if (rdev->r600_blit.shader_obj) - return 0; + goto done; + mutex_init(&rdev->r600_blit.mutex); rdev->r600_blit.state_offset = 0; @@ -532,6 +533,18 @@ int r600_blit_init(struct radeon_device *rdev) memcpy(ptr + rdev->r600_blit.ps_offset, r6xx_ps, r6xx_ps_size * 4); radeon_bo_kunmap(rdev->r600_blit.shader_obj); radeon_bo_unreserve(rdev->r600_blit.shader_obj); + +done: + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_gpu_addr); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + if (r) { + dev_err(rdev->dev, "(%d) pin blit object failed\n", r); + return r; + } rdev->mc.active_vram_size = rdev->mc.real_vram_size; return 0; } @@ -554,7 +567,7 @@ void r600_blit_fini(struct radeon_device *rdev) radeon_bo_unref(&rdev->r600_blit.shader_obj); } -int r600_vb_ib_get(struct radeon_device *rdev) +static int r600_vb_ib_get(struct radeon_device *rdev) { int r; r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); @@ -568,7 +581,7 @@ int r600_vb_ib_get(struct radeon_device *rdev) return 0; } -void r600_vb_ib_put(struct radeon_device *rdev) +static void r600_vb_ib_put(struct radeon_device *rdev) { radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); @@ -650,8 +663,8 @@ void r600_kms_blit_copy(struct radeon_device *rdev, int src_x = src_gpu_addr & 255; int dst_x = dst_gpu_addr & 255; int h = 1; - src_gpu_addr = src_gpu_addr & ~255; - dst_gpu_addr = dst_gpu_addr & ~255; + src_gpu_addr = src_gpu_addr & ~255ULL; + dst_gpu_addr = dst_gpu_addr & ~255ULL; if (!src_x && !dst_x) { h = (cur_size / max_bytes); @@ -672,17 +685,6 @@ void r600_kms_blit_copy(struct radeon_device *rdev, if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { WARN_ON(1); - -#if 0 - r600_vb_ib_put(rdev); - - r600_nomm_put_vb(dev); - r600_nomm_get_vb(dev); - if (!dev_priv->blit_vb) - return; - set_shaders(dev); - vb = r600_nomm_get_vb_ptr(dev); -#endif } vb[0] = i2f(dst_x); @@ -744,8 +746,8 @@ void r600_kms_blit_copy(struct radeon_device *rdev, int src_x = (src_gpu_addr & 255); int dst_x = (dst_gpu_addr & 255); int h = 1; - src_gpu_addr = src_gpu_addr & ~255; - dst_gpu_addr = dst_gpu_addr & ~255; + src_gpu_addr = src_gpu_addr & ~255ULL; + dst_gpu_addr = dst_gpu_addr & ~255ULL; if (!src_x && !dst_x) { h = (cur_size / max_bytes); @@ -767,17 +769,6 @@ void r600_kms_blit_copy(struct radeon_device *rdev, if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { WARN_ON(1); } -#if 0 - if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) { - r600_nomm_put_vb(dev); - r600_nomm_get_vb(dev); - if (!rdev->blit_vb) - return; - - set_shaders(dev); - vb = r600_nomm_get_vb_ptr(dev); - } -#endif vb[0] = i2f(dst_x / 4); vb[1] = 0; diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 250a3a9..7b294c1 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -170,6 +170,7 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) struct r600_cs_track *track = p->track; u32 bpe = 0, pitch, slice_tile_max, size, tmp, height, pitch_align; volatile u32 *ib = p->ib->ptr; + unsigned array_mode; if (G_0280A0_TILE_MODE(track->cb_color_info[i])) { dev_warn(p->dev, "FMASK or CMASK buffer are not supported by this kernel\n"); @@ -185,12 +186,12 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) /* pitch is the number of 8x8 tiles per row */ pitch = G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1; slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1; - height = size / (pitch * 8 * bpe); + slice_tile_max *= 64; + height = slice_tile_max / (pitch * 8); if (height > 8192) height = 8192; - if (height > 7) - height &= ~0x7; - switch (G_0280A0_ARRAY_MODE(track->cb_color_info[i])) { + array_mode = G_0280A0_ARRAY_MODE(track->cb_color_info[i]); + switch (array_mode) { case V_0280A0_ARRAY_LINEAR_GENERAL: /* technically height & 0x7 */ break; @@ -214,6 +215,9 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) __func__, __LINE__, pitch); return -EINVAL; } + /* avoid breaking userspace */ + if (height > 7) + height &= ~0x7; if (!IS_ALIGNED(height, 8)) { dev_warn(p->dev, "%s:%d cb height (%d) invalid\n", __func__, __LINE__, height); @@ -222,13 +226,13 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) break; case V_0280A0_ARRAY_2D_TILED_THIN1: pitch_align = max((u32)track->nbanks, - (u32)(((track->group_size / 8) / (bpe * track->nsamples)) * track->nbanks)); + (u32)(((track->group_size / 8) / (bpe * track->nsamples)) * track->nbanks)) / 8; if (!IS_ALIGNED(pitch, pitch_align)) { dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n", __func__, __LINE__, pitch); return -EINVAL; } - if (!IS_ALIGNED((height / 8), track->nbanks)) { + if (!IS_ALIGNED((height / 8), track->npipes)) { dev_warn(p->dev, "%s:%d cb height (%d) invalid\n", __func__, __LINE__, height); return -EINVAL; @@ -243,8 +247,18 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) /* check offset */ tmp = height * pitch * 8 * bpe; if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) { - dev_warn(p->dev, "%s offset[%d] %d too big\n", __func__, i, track->cb_color_bo_offset[i]); - return -EINVAL; + if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) { + /* the initial DDX does bad things with the CB size occasionally */ + /* it rounds up height too far for slice tile max but the BO is smaller */ + tmp = (height - 7) * 8 * bpe; + if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) { + dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i])); + return -EINVAL; + } + } else { + dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i])); + return -EINVAL; + } } if (!IS_ALIGNED(track->cb_color_bo_offset[i], track->group_size)) { dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->cb_color_bo_offset[i]); @@ -361,13 +375,13 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) break; case V_028010_ARRAY_2D_TILED_THIN1: pitch_align = max((u32)track->nbanks, - (u32)(((track->group_size / 8) / bpe) * track->nbanks)); + (u32)(((track->group_size / 8) / bpe) * track->nbanks)) / 8; if (!IS_ALIGNED(pitch, pitch_align)) { dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n", __func__, __LINE__, pitch); return -EINVAL; } - if ((height / 8) & (track->nbanks - 1)) { + if (!IS_ALIGNED((height / 8), track->npipes)) { dev_warn(p->dev, "%s:%d db height (%d) invalid\n", __func__, __LINE__, height); return -EINVAL; @@ -1138,7 +1152,7 @@ static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 i break; case V_038000_ARRAY_2D_TILED_THIN1: pitch_align = max((u32)track->nbanks, - (u32)(((track->group_size / 8) / bpe) * track->nbanks)); + (u32)(((track->group_size / 8) / bpe) * track->nbanks)) / 8; if (!IS_ALIGNED(pitch, pitch_align)) { dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n", __func__, __LINE__, pitch); diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 858a192..966a793 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -474,6 +474,7 @@ #define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58 #define VTX_REUSE_DEPTH_MASK 0x000000FF #define VGT_EVENT_INITIATOR 0x28a90 +# define CACHE_FLUSH_AND_INV_EVENT_TS (0x14 << 0) # define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0) #define VM_CONTEXT0_CNTL 0x1410 @@ -775,7 +776,27 @@ #define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) #define PACKET3_COND_WRITE 0x45 #define PACKET3_EVENT_WRITE 0x46 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + * 5 - TS events + */ #define PACKET3_EVENT_WRITE_EOP 0x47 +#define DATA_SEL(x) ((x) << 29) + /* 0 - discard + * 1 - send low 32bit data + * 2 - send 64bit data + * 3 - send 64bit counter value + */ +#define INT_SEL(x) ((x) << 24) + /* 0 - none + * 1 - interrupt only (DATA_SEL = 0) + * 2 - interrupt when data write is confirmed + */ #define PACKET3_ONE_REG_WRITE 0x57 #define PACKET3_SET_CONFIG_REG 0x68 #define PACKET3_SET_CONFIG_REG_OFFSET 0x00008000 diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 9ff38c9..73f600d3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -88,7 +88,6 @@ extern int radeon_benchmarking; extern int radeon_testing; extern int radeon_connector_table; extern int radeon_tv; -extern int radeon_new_pll; extern int radeon_audio; extern int radeon_disp_priority; extern int radeon_hw_i2c; @@ -366,6 +365,7 @@ bool radeon_atombios_sideport_present(struct radeon_device *rdev); */ struct radeon_scratch { unsigned num_reg; + uint32_t reg_base; bool free[32]; uint32_t reg[32]; }; @@ -594,8 +594,15 @@ struct radeon_wb { struct radeon_bo *wb_obj; volatile uint32_t *wb; uint64_t gpu_addr; + bool enabled; + bool use_event; }; +#define RADEON_WB_SCRATCH_OFFSET 0 +#define RADEON_WB_CP_RPTR_OFFSET 1024 +#define R600_WB_IH_WPTR_OFFSET 2048 +#define R600_WB_EVENT_OFFSET 3072 + /** * struct radeon_pm - power management datas * @max_bandwidth: maximum bandwidth the gpu has (MByte/s) @@ -1124,6 +1131,12 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, int size_bytes); +/* evergreen blit */ +int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes); +void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); +void evergreen_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes); static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) { @@ -1341,6 +1354,9 @@ extern void radeon_update_bandwidth_info(struct radeon_device *rdev); extern void radeon_update_display_priority(struct radeon_device *rdev); extern bool radeon_boot_test_post_card(struct radeon_device *rdev); extern void radeon_scratch_init(struct radeon_device *rdev); +extern void radeon_wb_fini(struct radeon_device *rdev); +extern int radeon_wb_init(struct radeon_device *rdev); +extern void radeon_wb_disable(struct radeon_device *rdev); extern void radeon_surface_init(struct radeon_device *rdev); extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable); @@ -1425,9 +1441,6 @@ extern int r600_pcie_gart_init(struct radeon_device *rdev); extern void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); extern int r600_ib_test(struct radeon_device *rdev); extern int r600_ring_test(struct radeon_device *rdev); -extern void r600_wb_fini(struct radeon_device *rdev); -extern int r600_wb_enable(struct radeon_device *rdev); -extern void r600_wb_disable(struct radeon_device *rdev); extern void r600_scratch_init(struct radeon_device *rdev); extern int r600_blit_init(struct radeon_device *rdev); extern void r600_blit_fini(struct radeon_device *rdev); @@ -1465,6 +1478,8 @@ extern void r700_cp_stop(struct radeon_device *rdev); extern void r700_cp_fini(struct radeon_device *rdev); extern void evergreen_disable_interrupt_state(struct radeon_device *rdev); extern int evergreen_irq_set(struct radeon_device *rdev); +extern int evergreen_blit_init(struct radeon_device *rdev); +extern void evergreen_blit_fini(struct radeon_device *rdev); /* radeon_acpi.c */ #if defined(CONFIG_ACPI) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 25e1dd1..64fb89e 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -726,9 +726,9 @@ static struct radeon_asic evergreen_asic = { .get_vblank_counter = &evergreen_get_vblank_counter, .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, - .copy_blit = NULL, - .copy_dma = NULL, - .copy = NULL, + .copy_blit = &evergreen_copy_blit, + .copy_dma = &evergreen_copy_blit, + .copy = &evergreen_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, .get_memory_clock = &radeon_atom_get_memory_clock, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index a5aff75..7409882 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -108,9 +108,6 @@ void r100_irq_disable(struct radeon_device *rdev); void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save); void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save); void r100_vram_init_sizes(struct radeon_device *rdev); -void r100_wb_disable(struct radeon_device *rdev); -void r100_wb_fini(struct radeon_device *rdev); -int r100_wb_init(struct radeon_device *rdev); int r100_cp_reset(struct radeon_device *rdev); void r100_vga_render_disable(struct radeon_device *rdev); void r100_restore_sanity(struct radeon_device *rdev); @@ -257,11 +254,6 @@ void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int r600_cs_parse(struct radeon_cs_parser *p); void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -int r600_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, - uint64_t dst_offset, - unsigned num_pages, - struct radeon_fence *fence); int r600_irq_process(struct radeon_device *rdev); int r600_irq_set(struct radeon_device *rdev); bool r600_gpu_is_lockup(struct radeon_device *rdev); @@ -307,6 +299,9 @@ int evergreen_resume(struct radeon_device *rdev); bool evergreen_gpu_is_lockup(struct radeon_device *rdev); int evergreen_asic_reset(struct radeon_device *rdev); void evergreen_bandwidth_update(struct radeon_device *rdev); +int evergreen_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence); void evergreen_hpd_init(struct radeon_device *rdev); void evergreen_hpd_fini(struct radeon_device *rdev); bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 8e43dda..04cac7e 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1112,8 +1112,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) * pre-DCE 3.0 r6xx hardware. This might need to be adjusted per * family. */ - if (!radeon_new_pll) - p1pll->pll_out_min = 64800; + p1pll->pll_out_min = 64800; } p1pll->pll_in_min = @@ -1277,36 +1276,27 @@ bool radeon_atombios_get_tmds_info(struct radeon_encoder *encoder, return false; } -static struct radeon_atom_ss *radeon_atombios_get_ss_info(struct - radeon_encoder - *encoder, - int id) +bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev, + struct radeon_atom_ss *ss, + int id) { - struct drm_device *dev = encoder->base.dev; - struct radeon_device *rdev = dev->dev_private; struct radeon_mode_info *mode_info = &rdev->mode_info; int index = GetIndexIntoMasterTable(DATA, PPLL_SS_Info); - uint16_t data_offset; + uint16_t data_offset, size; struct _ATOM_SPREAD_SPECTRUM_INFO *ss_info; uint8_t frev, crev; - struct radeon_atom_ss *ss = NULL; - int i; - - if (id > ATOM_MAX_SS_ENTRY) - return NULL; + int i, num_indices; - if (atom_parse_data_header(mode_info->atom_context, index, NULL, + memset(ss, 0, sizeof(struct radeon_atom_ss)); + if (atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { ss_info = (struct _ATOM_SPREAD_SPECTRUM_INFO *)(mode_info->atom_context->bios + data_offset); - ss = - kzalloc(sizeof(struct radeon_atom_ss), GFP_KERNEL); - - if (!ss) - return NULL; + num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / + sizeof(ATOM_SPREAD_SPECTRUM_ASSIGNMENT); - for (i = 0; i < ATOM_MAX_SS_ENTRY; i++) { + for (i = 0; i < num_indices; i++) { if (ss_info->asSS_Info[i].ucSS_Id == id) { ss->percentage = le16_to_cpu(ss_info->asSS_Info[i].usSpreadSpectrumPercentage); @@ -1315,11 +1305,88 @@ static struct radeon_atom_ss *radeon_atombios_get_ss_info(struct ss->delay = ss_info->asSS_Info[i].ucSS_Delay; ss->range = ss_info->asSS_Info[i].ucSS_Range; ss->refdiv = ss_info->asSS_Info[i].ucRecommendedRef_Div; - break; + return true; + } + } + } + return false; +} + +union asic_ss_info { + struct _ATOM_ASIC_INTERNAL_SS_INFO info; + struct _ATOM_ASIC_INTERNAL_SS_INFO_V2 info_2; + struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 info_3; +}; + +bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, + struct radeon_atom_ss *ss, + int id, u32 clock) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info); + uint16_t data_offset, size; + union asic_ss_info *ss_info; + uint8_t frev, crev; + int i, num_indices; + + memset(ss, 0, sizeof(struct radeon_atom_ss)); + if (atom_parse_data_header(mode_info->atom_context, index, &size, + &frev, &crev, &data_offset)) { + + ss_info = + (union asic_ss_info *)(mode_info->atom_context->bios + data_offset); + + switch (frev) { + case 1: + num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / + sizeof(ATOM_ASIC_SS_ASSIGNMENT); + + for (i = 0; i < num_indices; i++) { + if ((ss_info->info.asSpreadSpectrum[i].ucClockIndication == id) && + (clock <= ss_info->info.asSpreadSpectrum[i].ulTargetClockRange)) { + ss->percentage = + le16_to_cpu(ss_info->info.asSpreadSpectrum[i].usSpreadSpectrumPercentage); + ss->type = ss_info->info.asSpreadSpectrum[i].ucSpreadSpectrumMode; + ss->rate = le16_to_cpu(ss_info->info.asSpreadSpectrum[i].usSpreadRateInKhz); + return true; + } + } + break; + case 2: + num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2); + for (i = 0; i < num_indices; i++) { + if ((ss_info->info_2.asSpreadSpectrum[i].ucClockIndication == id) && + (clock <= ss_info->info_2.asSpreadSpectrum[i].ulTargetClockRange)) { + ss->percentage = + le16_to_cpu(ss_info->info_2.asSpreadSpectrum[i].usSpreadSpectrumPercentage); + ss->type = ss_info->info_2.asSpreadSpectrum[i].ucSpreadSpectrumMode; + ss->rate = le16_to_cpu(ss_info->info_2.asSpreadSpectrum[i].usSpreadRateIn10Hz); + return true; + } } + break; + case 3: + num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3); + for (i = 0; i < num_indices; i++) { + if ((ss_info->info_3.asSpreadSpectrum[i].ucClockIndication == id) && + (clock <= ss_info->info_3.asSpreadSpectrum[i].ulTargetClockRange)) { + ss->percentage = + le16_to_cpu(ss_info->info_3.asSpreadSpectrum[i].usSpreadSpectrumPercentage); + ss->type = ss_info->info_3.asSpreadSpectrum[i].ucSpreadSpectrumMode; + ss->rate = le16_to_cpu(ss_info->info_3.asSpreadSpectrum[i].usSpreadRateIn10Hz); + return true; + } + } + break; + default: + DRM_ERROR("Unsupported ASIC_InternalSS_Info table: %d %d\n", frev, crev); + break; } + } - return ss; + return false; } union lvds_info { @@ -1371,7 +1438,7 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct le16_to_cpu(lvds_info->info.sLCDTiming.usVSyncWidth); lvds->panel_pwr_delay = le16_to_cpu(lvds_info->info.usOffDelayInMs); - lvds->lvds_misc = lvds_info->info.ucLVDS_Misc; + lvds->lcd_misc = lvds_info->info.ucLVDS_Misc; misc = le16_to_cpu(lvds_info->info.sLCDTiming.susModeMiscInfo.usAccess); if (misc & ATOM_VSYNC_POLARITY) @@ -1388,19 +1455,7 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct /* set crtc values */ drm_mode_set_crtcinfo(&lvds->native_mode, CRTC_INTERLACE_HALVE_V); - lvds->ss = radeon_atombios_get_ss_info(encoder, lvds_info->info.ucSS_Id); - - if (ASIC_IS_AVIVO(rdev)) { - if (radeon_new_pll == 0) - lvds->pll_algo = PLL_ALGO_LEGACY; - else - lvds->pll_algo = PLL_ALGO_NEW; - } else { - if (radeon_new_pll == 1) - lvds->pll_algo = PLL_ALGO_NEW; - else - lvds->pll_algo = PLL_ALGO_LEGACY; - } + lvds->lcd_ss_id = lvds_info->info.ucSS_Id; encoder->native_mode = lvds->native_mode; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index ecc1a8f..4dac4b0 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -326,6 +326,34 @@ int radeon_connector_set_property(struct drm_connector *connector, struct drm_pr } } + if (property == rdev->mode_info.underscan_hborder_property) { + /* need to find digital encoder on connector */ + encoder = radeon_find_encoder(connector, DRM_MODE_ENCODER_TMDS); + if (!encoder) + return 0; + + radeon_encoder = to_radeon_encoder(encoder); + + if (radeon_encoder->underscan_hborder != val) { + radeon_encoder->underscan_hborder = val; + radeon_property_change_mode(&radeon_encoder->base); + } + } + + if (property == rdev->mode_info.underscan_vborder_property) { + /* need to find digital encoder on connector */ + encoder = radeon_find_encoder(connector, DRM_MODE_ENCODER_TMDS); + if (!encoder) + return 0; + + radeon_encoder = to_radeon_encoder(encoder); + + if (radeon_encoder->underscan_vborder != val) { + radeon_encoder->underscan_vborder = val; + radeon_property_change_mode(&radeon_encoder->base); + } + } + if (property == rdev->mode_info.tv_std_property) { encoder = radeon_find_encoder(connector, DRM_MODE_ENCODER_TVDAC); if (!encoder) { @@ -635,6 +663,11 @@ radeon_vga_detect(struct drm_connector *connector, bool force) ret = connector_status_connected; } } else { + + /* if we aren't forcing don't do destructive polling */ + if (!force) + return connector->status; + if (radeon_connector->dac_load_detect && encoder) { encoder_funcs = encoder->helper_private; ret = encoder_funcs->detect(encoder, connector); @@ -822,6 +855,11 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) if ((ret == connector_status_connected) && (radeon_connector->use_digital == true)) goto out; + if (!force) { + ret = connector->status; + goto out; + } + /* find analog encoder */ if (radeon_connector->dac_load_detect) { for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { @@ -1153,10 +1191,17 @@ radeon_add_atom_connector(struct drm_device *dev, drm_connector_attach_property(&radeon_connector->base, rdev->mode_info.coherent_mode_property, 1); - if (ASIC_IS_AVIVO(rdev)) + if (ASIC_IS_AVIVO(rdev)) { drm_connector_attach_property(&radeon_connector->base, rdev->mode_info.underscan_property, UNDERSCAN_AUTO); + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.underscan_hborder_property, + 0); + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.underscan_vborder_property, + 0); + } if (connector_type == DRM_MODE_CONNECTOR_DVII) { radeon_connector->dac_load_detect = true; drm_connector_attach_property(&radeon_connector->base, @@ -1181,10 +1226,17 @@ radeon_add_atom_connector(struct drm_device *dev, drm_connector_attach_property(&radeon_connector->base, rdev->mode_info.coherent_mode_property, 1); - if (ASIC_IS_AVIVO(rdev)) + if (ASIC_IS_AVIVO(rdev)) { drm_connector_attach_property(&radeon_connector->base, rdev->mode_info.underscan_property, UNDERSCAN_AUTO); + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.underscan_hborder_property, + 0); + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.underscan_vborder_property, + 0); + } subpixel_order = SubPixelHorizontalRGB; break; case DRM_MODE_CONNECTOR_DisplayPort: @@ -1212,10 +1264,17 @@ radeon_add_atom_connector(struct drm_device *dev, drm_connector_attach_property(&radeon_connector->base, rdev->mode_info.coherent_mode_property, 1); - if (ASIC_IS_AVIVO(rdev)) + if (ASIC_IS_AVIVO(rdev)) { drm_connector_attach_property(&radeon_connector->base, rdev->mode_info.underscan_property, UNDERSCAN_AUTO); + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.underscan_hborder_property, + 0); + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.underscan_vborder_property, + 0); + } break; case DRM_MODE_CONNECTOR_SVIDEO: case DRM_MODE_CONNECTOR_Composite: diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index fcc79b5..6d64a27 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -268,7 +268,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } r = radeon_ib_schedule(rdev, parser.ib); if (r) { - DRM_ERROR("Faild to schedule IB !\n"); + DRM_ERROR("Failed to schedule IB !\n"); } radeon_cs_parser_fini(&parser, r); mutex_unlock(&rdev->cs_mutex); diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 5731fc9..017ac54 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -118,22 +118,25 @@ static void radeon_show_cursor(struct drm_crtc *crtc) } static void radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj, - uint32_t gpu_addr) + uint64_t gpu_addr) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; if (ASIC_IS_DCE4(rdev)) { - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, 0); - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, gpu_addr); + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, + upper_32_bits(gpu_addr)); + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + gpu_addr & 0xffffffff); } else if (ASIC_IS_AVIVO(rdev)) { if (rdev->family >= CHIP_RV770) { if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, 0); + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, 0); + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); } - WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, gpu_addr); + WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + gpu_addr & 0xffffffff); } else { radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr; /* offset is from DISP(2)_BASE_ADDRESS */ @@ -203,6 +206,7 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; int xorigin = 0, yorigin = 0; + int w = radeon_crtc->cursor_width; if (x < 0) xorigin = -x + 1; @@ -213,22 +217,7 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, if (yorigin >= CURSOR_HEIGHT) yorigin = CURSOR_HEIGHT - 1; - radeon_lock_cursor(crtc, true); - if (ASIC_IS_DCE4(rdev)) { - /* cursors are offset into the total surface */ - x += crtc->x; - y += crtc->y; - DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); - - /* XXX: check if evergreen has the same issues as avivo chips */ - WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, - ((xorigin ? 0 : x) << 16) | - (yorigin ? 0 : y)); - WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); - WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset, - ((radeon_crtc->cursor_width - 1) << 16) | (radeon_crtc->cursor_height - 1)); - } else if (ASIC_IS_AVIVO(rdev)) { - int w = radeon_crtc->cursor_width; + if (ASIC_IS_AVIVO(rdev)) { int i = 0; struct drm_crtc *crtc_p; @@ -260,7 +249,17 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, if (w <= 0) w = 1; } + } + radeon_lock_cursor(crtc, true); + if (ASIC_IS_DCE4(rdev)) { + WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, + ((xorigin ? 0 : x) << 16) | + (yorigin ? 0 : y)); + WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); + WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset, + ((w - 1) << 16) | (radeon_crtc->cursor_height - 1)); + } else if (ASIC_IS_AVIVO(rdev)) { WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, ((xorigin ? 0 : x) << 16) | (yorigin ? 0 : y)); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 256d204..8adfedf 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -117,9 +117,10 @@ void radeon_scratch_init(struct radeon_device *rdev) } else { rdev->scratch.num_reg = 7; } + rdev->scratch.reg_base = RADEON_SCRATCH_REG0; for (i = 0; i < rdev->scratch.num_reg; i++) { rdev->scratch.free[i] = true; - rdev->scratch.reg[i] = RADEON_SCRATCH_REG0 + (i * 4); + rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); } } @@ -149,6 +150,86 @@ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg) } } +void radeon_wb_disable(struct radeon_device *rdev) +{ + int r; + + if (rdev->wb.wb_obj) { + r = radeon_bo_reserve(rdev->wb.wb_obj, false); + if (unlikely(r != 0)) + return; + radeon_bo_kunmap(rdev->wb.wb_obj); + radeon_bo_unpin(rdev->wb.wb_obj); + radeon_bo_unreserve(rdev->wb.wb_obj); + } + rdev->wb.enabled = false; +} + +void radeon_wb_fini(struct radeon_device *rdev) +{ + radeon_wb_disable(rdev); + if (rdev->wb.wb_obj) { + radeon_bo_unref(&rdev->wb.wb_obj); + rdev->wb.wb = NULL; + rdev->wb.wb_obj = NULL; + } +} + +int radeon_wb_init(struct radeon_device *rdev) +{ + int r; + + if (rdev->wb.wb_obj == NULL) { + r = radeon_bo_create(rdev, NULL, RADEON_GPU_PAGE_SIZE, true, + RADEON_GEM_DOMAIN_GTT, &rdev->wb.wb_obj); + if (r) { + dev_warn(rdev->dev, "(%d) create WB bo failed\n", r); + return r; + } + } + r = radeon_bo_reserve(rdev->wb.wb_obj, false); + if (unlikely(r != 0)) { + radeon_wb_fini(rdev); + return r; + } + r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_GTT, + &rdev->wb.gpu_addr); + if (r) { + radeon_bo_unreserve(rdev->wb.wb_obj); + dev_warn(rdev->dev, "(%d) pin WB bo failed\n", r); + radeon_wb_fini(rdev); + return r; + } + r = radeon_bo_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb); + radeon_bo_unreserve(rdev->wb.wb_obj); + if (r) { + dev_warn(rdev->dev, "(%d) map WB bo failed\n", r); + radeon_wb_fini(rdev); + return r; + } + + /* disable event_write fences */ + rdev->wb.use_event = false; + /* disabled via module param */ + if (radeon_no_wb == 1) + rdev->wb.enabled = false; + else { + /* often unreliable on AGP */ + if (rdev->flags & RADEON_IS_AGP) { + rdev->wb.enabled = false; + } else { + rdev->wb.enabled = true; + /* event_write fences are only available on r600+ */ + if (rdev->family >= CHIP_R600) + rdev->wb.use_event = true; + } + } + + dev_info(rdev->dev, "WB %sabled\n", rdev->wb.enabled ? "en" : "dis"); + + return 0; +} + /** * radeon_vram_location - try to find VRAM location * @rdev: radeon device structure holding all necessary informations diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index b92d2f2..0383631 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -454,13 +454,13 @@ static inline uint32_t radeon_div(uint64_t n, uint32_t d) return n; } -static void radeon_compute_pll_legacy(struct radeon_pll *pll, - uint64_t freq, - uint32_t *dot_clock_p, - uint32_t *fb_div_p, - uint32_t *frac_fb_div_p, - uint32_t *ref_div_p, - uint32_t *post_div_p) +void radeon_compute_pll(struct radeon_pll *pll, + uint64_t freq, + uint32_t *dot_clock_p, + uint32_t *fb_div_p, + uint32_t *frac_fb_div_p, + uint32_t *ref_div_p, + uint32_t *post_div_p) { uint32_t min_ref_div = pll->min_ref_div; uint32_t max_ref_div = pll->max_ref_div; @@ -513,7 +513,7 @@ static void radeon_compute_pll_legacy(struct radeon_pll *pll, max_fractional_feed_div = pll->max_frac_feedback_div; } - for (post_div = min_post_div; post_div <= max_post_div; ++post_div) { + for (post_div = max_post_div; post_div >= min_post_div; --post_div) { uint32_t ref_div; if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1)) @@ -631,214 +631,6 @@ static void radeon_compute_pll_legacy(struct radeon_pll *pll, *post_div_p = best_post_div; } -static bool -calc_fb_div(struct radeon_pll *pll, - uint32_t freq, - uint32_t post_div, - uint32_t ref_div, - uint32_t *fb_div, - uint32_t *fb_div_frac) -{ - fixed20_12 feedback_divider, a, b; - u32 vco_freq; - - vco_freq = freq * post_div; - /* feedback_divider = vco_freq * ref_div / pll->reference_freq; */ - a.full = dfixed_const(pll->reference_freq); - feedback_divider.full = dfixed_const(vco_freq); - feedback_divider.full = dfixed_div(feedback_divider, a); - a.full = dfixed_const(ref_div); - feedback_divider.full = dfixed_mul(feedback_divider, a); - - if (pll->flags & RADEON_PLL_USE_FRAC_FB_DIV) { - /* feedback_divider = floor((feedback_divider * 10.0) + 0.5) * 0.1; */ - a.full = dfixed_const(10); - feedback_divider.full = dfixed_mul(feedback_divider, a); - feedback_divider.full += dfixed_const_half(0); - feedback_divider.full = dfixed_floor(feedback_divider); - feedback_divider.full = dfixed_div(feedback_divider, a); - - /* *fb_div = floor(feedback_divider); */ - a.full = dfixed_floor(feedback_divider); - *fb_div = dfixed_trunc(a); - /* *fb_div_frac = fmod(feedback_divider, 1.0) * 10.0; */ - a.full = dfixed_const(10); - b.full = dfixed_mul(feedback_divider, a); - - feedback_divider.full = dfixed_floor(feedback_divider); - feedback_divider.full = dfixed_mul(feedback_divider, a); - feedback_divider.full = b.full - feedback_divider.full; - *fb_div_frac = dfixed_trunc(feedback_divider); - } else { - /* *fb_div = floor(feedback_divider + 0.5); */ - feedback_divider.full += dfixed_const_half(0); - feedback_divider.full = dfixed_floor(feedback_divider); - - *fb_div = dfixed_trunc(feedback_divider); - *fb_div_frac = 0; - } - - if (((*fb_div) < pll->min_feedback_div) || ((*fb_div) > pll->max_feedback_div)) - return false; - else - return true; -} - -static bool -calc_fb_ref_div(struct radeon_pll *pll, - uint32_t freq, - uint32_t post_div, - uint32_t *fb_div, - uint32_t *fb_div_frac, - uint32_t *ref_div) -{ - fixed20_12 ffreq, max_error, error, pll_out, a; - u32 vco; - u32 pll_out_min, pll_out_max; - - if (pll->flags & RADEON_PLL_IS_LCD) { - pll_out_min = pll->lcd_pll_out_min; - pll_out_max = pll->lcd_pll_out_max; - } else { - pll_out_min = pll->pll_out_min; - pll_out_max = pll->pll_out_max; - } - - ffreq.full = dfixed_const(freq); - /* max_error = ffreq * 0.0025; */ - a.full = dfixed_const(400); - max_error.full = dfixed_div(ffreq, a); - - for ((*ref_div) = pll->min_ref_div; (*ref_div) < pll->max_ref_div; ++(*ref_div)) { - if (calc_fb_div(pll, freq, post_div, (*ref_div), fb_div, fb_div_frac)) { - vco = pll->reference_freq * (((*fb_div) * 10) + (*fb_div_frac)); - vco = vco / ((*ref_div) * 10); - - if ((vco < pll_out_min) || (vco > pll_out_max)) - continue; - - /* pll_out = vco / post_div; */ - a.full = dfixed_const(post_div); - pll_out.full = dfixed_const(vco); - pll_out.full = dfixed_div(pll_out, a); - - if (pll_out.full >= ffreq.full) { - error.full = pll_out.full - ffreq.full; - if (error.full <= max_error.full) - return true; - } - } - } - return false; -} - -static void radeon_compute_pll_new(struct radeon_pll *pll, - uint64_t freq, - uint32_t *dot_clock_p, - uint32_t *fb_div_p, - uint32_t *frac_fb_div_p, - uint32_t *ref_div_p, - uint32_t *post_div_p) -{ - u32 fb_div = 0, fb_div_frac = 0, post_div = 0, ref_div = 0; - u32 best_freq = 0, vco_frequency; - u32 pll_out_min, pll_out_max; - - if (pll->flags & RADEON_PLL_IS_LCD) { - pll_out_min = pll->lcd_pll_out_min; - pll_out_max = pll->lcd_pll_out_max; - } else { - pll_out_min = pll->pll_out_min; - pll_out_max = pll->pll_out_max; - } - - /* freq = freq / 10; */ - do_div(freq, 10); - - if (pll->flags & RADEON_PLL_USE_POST_DIV) { - post_div = pll->post_div; - if ((post_div < pll->min_post_div) || (post_div > pll->max_post_div)) - goto done; - - vco_frequency = freq * post_div; - if ((vco_frequency < pll_out_min) || (vco_frequency > pll_out_max)) - goto done; - - if (pll->flags & RADEON_PLL_USE_REF_DIV) { - ref_div = pll->reference_div; - if ((ref_div < pll->min_ref_div) || (ref_div > pll->max_ref_div)) - goto done; - if (!calc_fb_div(pll, freq, post_div, ref_div, &fb_div, &fb_div_frac)) - goto done; - } - } else { - for (post_div = pll->max_post_div; post_div >= pll->min_post_div; --post_div) { - if (pll->flags & RADEON_PLL_LEGACY) { - if ((post_div == 5) || - (post_div == 7) || - (post_div == 9) || - (post_div == 10) || - (post_div == 11)) - continue; - } - - if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1)) - continue; - - vco_frequency = freq * post_div; - if ((vco_frequency < pll_out_min) || (vco_frequency > pll_out_max)) - continue; - if (pll->flags & RADEON_PLL_USE_REF_DIV) { - ref_div = pll->reference_div; - if ((ref_div < pll->min_ref_div) || (ref_div > pll->max_ref_div)) - goto done; - if (calc_fb_div(pll, freq, post_div, ref_div, &fb_div, &fb_div_frac)) - break; - } else { - if (calc_fb_ref_div(pll, freq, post_div, &fb_div, &fb_div_frac, &ref_div)) - break; - } - } - } - - best_freq = pll->reference_freq * 10 * fb_div; - best_freq += pll->reference_freq * fb_div_frac; - best_freq = best_freq / (ref_div * post_div); - -done: - if (best_freq == 0) - DRM_ERROR("Couldn't find valid PLL dividers\n"); - - *dot_clock_p = best_freq / 10; - *fb_div_p = fb_div; - *frac_fb_div_p = fb_div_frac; - *ref_div_p = ref_div; - *post_div_p = post_div; - - DRM_DEBUG_KMS("%u %d.%d, %d, %d\n", *dot_clock_p, *fb_div_p, *frac_fb_div_p, *ref_div_p, *post_div_p); -} - -void radeon_compute_pll(struct radeon_pll *pll, - uint64_t freq, - uint32_t *dot_clock_p, - uint32_t *fb_div_p, - uint32_t *frac_fb_div_p, - uint32_t *ref_div_p, - uint32_t *post_div_p) -{ - switch (pll->algo) { - case PLL_ALGO_NEW: - radeon_compute_pll_new(pll, freq, dot_clock_p, fb_div_p, - frac_fb_div_p, ref_div_p, post_div_p); - break; - case PLL_ALGO_LEGACY: - default: - radeon_compute_pll_legacy(pll, freq, dot_clock_p, fb_div_p, - frac_fb_div_p, ref_div_p, post_div_p); - break; - } -} - static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb); @@ -1002,6 +794,24 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) radeon_underscan_enum_list[i].name); } + rdev->mode_info.underscan_hborder_property = + drm_property_create(rdev->ddev, + DRM_MODE_PROP_RANGE, + "underscan hborder", 2); + if (!rdev->mode_info.underscan_hborder_property) + return -ENOMEM; + rdev->mode_info.underscan_hborder_property->values[0] = 0; + rdev->mode_info.underscan_hborder_property->values[1] = 128; + + rdev->mode_info.underscan_vborder_property = + drm_property_create(rdev->ddev, + DRM_MODE_PROP_RANGE, + "underscan vborder", 2); + if (!rdev->mode_info.underscan_vborder_property) + return -ENOMEM; + rdev->mode_info.underscan_vborder_property->values[0] = 0; + rdev->mode_info.underscan_vborder_property->values[1] = 128; + return 0; } @@ -1159,8 +969,14 @@ bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc, ((radeon_encoder->underscan_type == UNDERSCAN_AUTO) && drm_detect_hdmi_monitor(radeon_connector->edid) && is_hdtv_mode(mode)))) { - radeon_crtc->h_border = (mode->hdisplay >> 5) + 16; - radeon_crtc->v_border = (mode->vdisplay >> 5) + 16; + if (radeon_encoder->underscan_hborder != 0) + radeon_crtc->h_border = radeon_encoder->underscan_hborder; + else + radeon_crtc->h_border = (mode->hdisplay >> 5) + 16; + if (radeon_encoder->underscan_vborder != 0) + radeon_crtc->v_border = radeon_encoder->underscan_vborder; + else + radeon_crtc->v_border = (mode->vdisplay >> 5) + 16; radeon_crtc->rmx_type = RMX_FULL; src_v = crtc->mode.vdisplay; dst_v = crtc->mode.vdisplay - (radeon_crtc->v_border * 2); @@ -1195,3 +1011,156 @@ bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc, } return true; } + +/* + * Retrieve current video scanout position of crtc on a given gpu. + * + * \param rdev Device to query. + * \param crtc Crtc to query. + * \param *vpos Location where vertical scanout position should be stored. + * \param *hpos Location where horizontal scanout position should go. + * + * Returns vpos as a positive number while in active scanout area. + * Returns vpos as a negative number inside vblank, counting the number + * of scanlines to go until end of vblank, e.g., -1 means "one scanline + * until start of active scanout / end of vblank." + * + * \return Flags, or'ed together as follows: + * + * RADEON_SCANOUTPOS_VALID = Query successfull. + * RADEON_SCANOUTPOS_INVBL = Inside vblank. + * RADEON_SCANOUTPOS_ACCURATE = Returned position is accurate. A lack of + * this flag means that returned position may be offset by a constant but + * unknown small number of scanlines wrt. real scanout position. + * + */ +int radeon_get_crtc_scanoutpos(struct radeon_device *rdev, int crtc, int *vpos, int *hpos) +{ + u32 stat_crtc = 0, vbl = 0, position = 0; + int vbl_start, vbl_end, vtotal, ret = 0; + bool in_vbl = true; + + if (ASIC_IS_DCE4(rdev)) { + if (crtc == 0) { + vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + + EVERGREEN_CRTC0_REGISTER_OFFSET); + position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + + EVERGREEN_CRTC0_REGISTER_OFFSET); + ret |= RADEON_SCANOUTPOS_VALID; + } + if (crtc == 1) { + vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + + EVERGREEN_CRTC1_REGISTER_OFFSET); + position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + + EVERGREEN_CRTC1_REGISTER_OFFSET); + ret |= RADEON_SCANOUTPOS_VALID; + } + if (crtc == 2) { + vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + + EVERGREEN_CRTC2_REGISTER_OFFSET); + position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + + EVERGREEN_CRTC2_REGISTER_OFFSET); + ret |= RADEON_SCANOUTPOS_VALID; + } + if (crtc == 3) { + vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + + EVERGREEN_CRTC3_REGISTER_OFFSET); + position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + + EVERGREEN_CRTC3_REGISTER_OFFSET); + ret |= RADEON_SCANOUTPOS_VALID; + } + if (crtc == 4) { + vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + + EVERGREEN_CRTC4_REGISTER_OFFSET); + position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + + EVERGREEN_CRTC4_REGISTER_OFFSET); + ret |= RADEON_SCANOUTPOS_VALID; + } + if (crtc == 5) { + vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + + EVERGREEN_CRTC5_REGISTER_OFFSET); + position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + + EVERGREEN_CRTC5_REGISTER_OFFSET); + ret |= RADEON_SCANOUTPOS_VALID; + } + } else if (ASIC_IS_AVIVO(rdev)) { + if (crtc == 0) { + vbl = RREG32(AVIVO_D1CRTC_V_BLANK_START_END); + position = RREG32(AVIVO_D1CRTC_STATUS_POSITION); + ret |= RADEON_SCANOUTPOS_VALID; + } + if (crtc == 1) { + vbl = RREG32(AVIVO_D2CRTC_V_BLANK_START_END); + position = RREG32(AVIVO_D2CRTC_STATUS_POSITION); + ret |= RADEON_SCANOUTPOS_VALID; + } + } else { + /* Pre-AVIVO: Different encoding of scanout pos and vblank interval. */ + if (crtc == 0) { + /* Assume vbl_end == 0, get vbl_start from + * upper 16 bits. + */ + vbl = (RREG32(RADEON_CRTC_V_TOTAL_DISP) & + RADEON_CRTC_V_DISP) >> RADEON_CRTC_V_DISP_SHIFT; + /* Only retrieve vpos from upper 16 bits, set hpos == 0. */ + position = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; + stat_crtc = RREG32(RADEON_CRTC_STATUS); + if (!(stat_crtc & 1)) + in_vbl = false; + + ret |= RADEON_SCANOUTPOS_VALID; + } + if (crtc == 1) { + vbl = (RREG32(RADEON_CRTC2_V_TOTAL_DISP) & + RADEON_CRTC_V_DISP) >> RADEON_CRTC_V_DISP_SHIFT; + position = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; + stat_crtc = RREG32(RADEON_CRTC2_STATUS); + if (!(stat_crtc & 1)) + in_vbl = false; + + ret |= RADEON_SCANOUTPOS_VALID; + } + } + + /* Decode into vertical and horizontal scanout position. */ + *vpos = position & 0x1fff; + *hpos = (position >> 16) & 0x1fff; + + /* Valid vblank area boundaries from gpu retrieved? */ + if (vbl > 0) { + /* Yes: Decode. */ + ret |= RADEON_SCANOUTPOS_ACCURATE; + vbl_start = vbl & 0x1fff; + vbl_end = (vbl >> 16) & 0x1fff; + } + else { + /* No: Fake something reasonable which gives at least ok results. */ + vbl_start = rdev->mode_info.crtcs[crtc]->base.mode.crtc_vdisplay; + vbl_end = 0; + } + + /* Test scanout position against vblank region. */ + if ((*vpos < vbl_start) && (*vpos >= vbl_end)) + in_vbl = false; + + /* Check if inside vblank area and apply corrective offsets: + * vpos will then be >=0 in video scanout area, but negative + * within vblank area, counting down the number of lines until + * start of scanout. + */ + + /* Inside "upper part" of vblank area? Apply corrective offset if so: */ + if (in_vbl && (*vpos >= vbl_start)) { + vtotal = rdev->mode_info.crtcs[crtc]->base.mode.crtc_vtotal; + *vpos = *vpos - vtotal; + } + + /* Correct for shifted end of vbl at vbl_end. */ + *vpos = *vpos - vbl_end; + + /* In vblank? */ + if (in_vbl) + ret |= RADEON_SCANOUTPOS_INVBL; + + return ret; +} diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 795403b..88e4ea9 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -47,9 +47,10 @@ * - 2.4.0 - add crtc id query * - 2.5.0 - add get accel 2 to work around ddx breakage for evergreen * - 2.6.0 - add tiling config query (r6xx+), add initial HiZ support (r300->r500) + * 2.7.0 - fixups for r600 2D tiling support. (no external ABI change), add eg dyn gpr regs */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 6 +#define KMS_DRIVER_MINOR 7 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); @@ -93,7 +94,6 @@ int radeon_benchmarking = 0; int radeon_testing = 0; int radeon_connector_table = 0; int radeon_tv = 1; -int radeon_new_pll = -1; int radeon_audio = 1; int radeon_disp_priority = 0; int radeon_hw_i2c = 0; @@ -131,9 +131,6 @@ module_param_named(connector_table, radeon_connector_table, int, 0444); MODULE_PARM_DESC(tv, "TV enable (0 = disable)"); module_param_named(tv, radeon_tv, int, 0444); -MODULE_PARM_DESC(new_pll, "Select new PLL code"); -module_param_named(new_pll, radeon_new_pll, int, 0444); - MODULE_PARM_DESC(audio, "Audio enable (0 = disable)"); module_param_named(audio, radeon_audio, int, 0444); @@ -203,8 +200,6 @@ static struct drm_driver driver_old = { .irq_uninstall = radeon_driver_irq_uninstall, .irq_handler = radeon_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls, .dma_ioctl = radeon_cp_buffers, .fops = { @@ -219,6 +214,7 @@ static struct drm_driver driver_old = { #ifdef CONFIG_COMPAT .compat_ioctl = radeon_compat_ioctl, #endif + .llseek = noop_llseek, }, .pci_driver = { @@ -290,8 +286,6 @@ static struct drm_driver kms_driver = { .irq_uninstall = radeon_driver_irq_uninstall_kms, .irq_handler = radeon_driver_irq_handler_kms, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls_kms, .gem_init_object = radeon_gem_object_init, .gem_free_object = radeon_gem_object_free, diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 2c293e8..ae58b68 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -529,9 +529,9 @@ atombios_digital_setup(struct drm_encoder *encoder, int action) args.v1.ucMisc |= PANEL_ENCODER_MISC_HDMI_TYPE; args.v1.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10); if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { - if (dig->lvds_misc & ATOM_PANEL_MISC_DUAL) + if (dig->lcd_misc & ATOM_PANEL_MISC_DUAL) args.v1.ucMisc |= PANEL_ENCODER_MISC_DUAL; - if (dig->lvds_misc & ATOM_PANEL_MISC_888RGB) + if (dig->lcd_misc & ATOM_PANEL_MISC_888RGB) args.v1.ucMisc |= (1 << 1); } else { if (dig->linkb) @@ -558,18 +558,18 @@ atombios_digital_setup(struct drm_encoder *encoder, int action) args.v2.ucTemporal = 0; args.v2.ucFRC = 0; if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { - if (dig->lvds_misc & ATOM_PANEL_MISC_DUAL) + if (dig->lcd_misc & ATOM_PANEL_MISC_DUAL) args.v2.ucMisc |= PANEL_ENCODER_MISC_DUAL; - if (dig->lvds_misc & ATOM_PANEL_MISC_SPATIAL) { + if (dig->lcd_misc & ATOM_PANEL_MISC_SPATIAL) { args.v2.ucSpatial = PANEL_ENCODER_SPATIAL_DITHER_EN; - if (dig->lvds_misc & ATOM_PANEL_MISC_888RGB) + if (dig->lcd_misc & ATOM_PANEL_MISC_888RGB) args.v2.ucSpatial |= PANEL_ENCODER_SPATIAL_DITHER_DEPTH; } - if (dig->lvds_misc & ATOM_PANEL_MISC_TEMPORAL) { + if (dig->lcd_misc & ATOM_PANEL_MISC_TEMPORAL) { args.v2.ucTemporal = PANEL_ENCODER_TEMPORAL_DITHER_EN; - if (dig->lvds_misc & ATOM_PANEL_MISC_888RGB) + if (dig->lcd_misc & ATOM_PANEL_MISC_888RGB) args.v2.ucTemporal |= PANEL_ENCODER_TEMPORAL_DITHER_DEPTH; - if (((dig->lvds_misc >> ATOM_PANEL_MISC_GREY_LEVEL_SHIFT) & 0x3) == 2) + if (((dig->lcd_misc >> ATOM_PANEL_MISC_GREY_LEVEL_SHIFT) & 0x3) == 2) args.v2.ucTemporal |= PANEL_ENCODER_TEMPORAL_LEVEL_4; } } else { diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 40b0c08..efa2118 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -59,6 +59,8 @@ static struct fb_ops radeonfb_ops = { .fb_pan_display = drm_fb_helper_pan_display, .fb_blank = drm_fb_helper_blank, .fb_setcmap = drm_fb_helper_setcmap, + .fb_debug_enter = drm_fb_helper_debug_enter, + .fb_debug_leave = drm_fb_helper_debug_leave, }; diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index b1f9a81..216392d 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -72,7 +72,15 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev) bool wake = false; unsigned long cjiffies; - seq = RREG32(rdev->fence_drv.scratch_reg); + if (rdev->wb.enabled) { + u32 scratch_index; + if (rdev->wb.use_event) + scratch_index = R600_WB_EVENT_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; + else + scratch_index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; + seq = rdev->wb.wb[scratch_index/4]; + } else + seq = RREG32(rdev->fence_drv.scratch_reg); if (seq != rdev->fence_drv.last_seq) { rdev->fence_drv.last_seq = seq; rdev->fence_drv.last_jiffies = jiffies; diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index 305049a..ace2e63 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -348,10 +348,25 @@ void radeon_crtc_dpms(struct drm_crtc *crtc, int mode) int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { + return radeon_crtc_do_set_base(crtc, old_fb, x, y, 0); +} + +int radeon_crtc_set_base_atomic(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int x, int y, enum mode_set_atomic state) +{ + return radeon_crtc_do_set_base(crtc, fb, x, y, 1); +} + +int radeon_crtc_do_set_base(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int x, int y, int atomic) +{ struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_framebuffer *radeon_fb; + struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; uint64_t base; @@ -364,14 +379,21 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, DRM_DEBUG_KMS("\n"); /* no fb bound */ - if (!crtc->fb) { + if (!atomic && !crtc->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } - radeon_fb = to_radeon_framebuffer(crtc->fb); + if (atomic) { + radeon_fb = to_radeon_framebuffer(fb); + target_fb = fb; + } + else { + radeon_fb = to_radeon_framebuffer(crtc->fb); + target_fb = crtc->fb; + } - switch (crtc->fb->bits_per_pixel) { + switch (target_fb->bits_per_pixel) { case 8: format = 2; break; @@ -415,10 +437,10 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, crtc_offset_cntl = 0; - pitch_pixels = crtc->fb->pitch / (crtc->fb->bits_per_pixel / 8); - crtc_pitch = (((pitch_pixels * crtc->fb->bits_per_pixel) + - ((crtc->fb->bits_per_pixel * 8) - 1)) / - (crtc->fb->bits_per_pixel * 8)); + pitch_pixels = target_fb->pitch / (target_fb->bits_per_pixel / 8); + crtc_pitch = (((pitch_pixels * target_fb->bits_per_pixel) + + ((target_fb->bits_per_pixel * 8) - 1)) / + (target_fb->bits_per_pixel * 8)); crtc_pitch |= crtc_pitch << 16; @@ -443,14 +465,14 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, crtc_tile_x0_y0 = x | (y << 16); base &= ~0x7ff; } else { - int byteshift = crtc->fb->bits_per_pixel >> 4; + int byteshift = target_fb->bits_per_pixel >> 4; int tile_addr = (((y >> 3) * pitch_pixels + x) >> (8 - byteshift)) << 11; base += tile_addr + ((x << byteshift) % 256) + ((y % 8) << 8); crtc_offset_cntl |= (y % 16); } } else { int offset = y * pitch_pixels + x; - switch (crtc->fb->bits_per_pixel) { + switch (target_fb->bits_per_pixel) { case 8: offset *= 1; break; @@ -496,8 +518,8 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, crtc_offset); WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch); - if (old_fb && old_fb != crtc->fb) { - radeon_fb = to_radeon_framebuffer(old_fb); + if (!atomic && fb && fb != crtc->fb) { + radeon_fb = to_radeon_framebuffer(fb); rbo = radeon_fb->obj->driver_private; r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -717,10 +739,6 @@ static void radeon_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode) pll = &rdev->clock.p1pll; pll->flags = RADEON_PLL_LEGACY; - if (radeon_new_pll == 1) - pll->algo = PLL_ALGO_NEW; - else - pll->algo = PLL_ALGO_LEGACY; if (mode->clock > 200000) /* range limits??? */ pll->flags |= RADEON_PLL_PREFER_HIGH_FB_DIV; @@ -1040,6 +1058,7 @@ static const struct drm_crtc_helper_funcs legacy_helper_funcs = { .mode_fixup = radeon_crtc_mode_fixup, .mode_set = radeon_crtc_mode_set, .mode_set_base = radeon_crtc_set_base, + .mode_set_base_atomic = radeon_crtc_set_base_atomic, .prepare = radeon_crtc_prepare, .commit = radeon_crtc_commit, .load_lut = radeon_crtc_load_lut, diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 17a6602..9245716 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -35,8 +35,8 @@ #include <drm_edid.h> #include <drm_dp_helper.h> #include <drm_fixed.h> +#include <drm_crtc_helper.h> #include <linux/i2c.h> -#include <linux/i2c-id.h> #include <linux/i2c-algo-bit.h> struct radeon_bo; @@ -150,12 +150,6 @@ struct radeon_tmds_pll { #define RADEON_PLL_USE_POST_DIV (1 << 12) #define RADEON_PLL_IS_LCD (1 << 13) -/* pll algo */ -enum radeon_pll_algo { - PLL_ALGO_LEGACY, - PLL_ALGO_NEW -}; - struct radeon_pll { /* reference frequency */ uint32_t reference_freq; @@ -188,8 +182,6 @@ struct radeon_pll { /* pll id */ uint32_t id; - /* pll algo */ - enum radeon_pll_algo algo; }; struct radeon_i2c_chan { @@ -241,6 +233,8 @@ struct radeon_mode_info { struct drm_property *tmds_pll_property; /* underscan */ struct drm_property *underscan_property; + struct drm_property *underscan_hborder_property; + struct drm_property *underscan_vborder_property; /* hardcoded DFP edid from BIOS */ struct edid *bios_hardcoded_edid; @@ -336,22 +330,24 @@ struct radeon_encoder_ext_tmds { struct radeon_atom_ss { uint16_t percentage; uint8_t type; - uint8_t step; + uint16_t step; uint8_t delay; uint8_t range; uint8_t refdiv; + /* asic_ss */ + uint16_t rate; + uint16_t amount; }; struct radeon_encoder_atom_dig { bool linkb; /* atom dig */ bool coherent_mode; - int dig_encoder; /* -1 disabled, 0 DIGA, 1 DIGB */ - /* atom lvds */ - uint32_t lvds_misc; + int dig_encoder; /* -1 disabled, 0 DIGA, 1 DIGB, etc. */ + /* atom lvds/edp */ + uint32_t lcd_misc; uint16_t panel_pwr_delay; - enum radeon_pll_algo pll_algo; - struct radeon_atom_ss *ss; + uint32_t lcd_ss_id; /* panel mode */ struct drm_display_mode native_mode; }; @@ -370,6 +366,8 @@ struct radeon_encoder { uint32_t pixel_clock; enum radeon_rmx_type rmx_type; enum radeon_underscan_type underscan_type; + uint32_t underscan_hborder; + uint32_t underscan_vborder; struct drm_display_mode native_mode; void *enc_priv; int audio_polling_active; @@ -436,6 +434,11 @@ struct radeon_framebuffer { struct drm_gem_object *obj; }; +/* radeon_get_crtc_scanoutpos() return flags */ +#define RADEON_SCANOUTPOS_VALID (1 << 0) +#define RADEON_SCANOUTPOS_INVBL (1 << 1) +#define RADEON_SCANOUTPOS_ACCURATE (1 << 2) + extern enum radeon_tv_std radeon_combios_get_tv_info(struct radeon_device *rdev); extern enum radeon_tv_std @@ -491,6 +494,13 @@ extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector); extern struct drm_encoder *radeon_best_encoder(struct drm_connector *connector); +extern bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev, + struct radeon_atom_ss *ss, + int id); +extern bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev, + struct radeon_atom_ss *ss, + int id, u32 clock); + extern void radeon_compute_pll(struct radeon_pll *pll, uint64_t freq, uint32_t *dot_clock_p, @@ -514,6 +524,10 @@ extern void radeon_encoder_set_active_device(struct drm_encoder *encoder); extern void radeon_crtc_load_lut(struct drm_crtc *crtc); extern int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb); +extern int atombios_crtc_set_base_atomic(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int x, int y, + enum mode_set_atomic state); extern int atombios_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -523,7 +537,13 @@ extern void atombios_crtc_dpms(struct drm_crtc *crtc, int mode); extern int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb); - +extern int radeon_crtc_set_base_atomic(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int x, int y, + enum mode_set_atomic state); +extern int radeon_crtc_do_set_base(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int x, int y, int atomic); extern int radeon_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv, uint32_t handle, @@ -532,6 +552,8 @@ extern int radeon_crtc_cursor_set(struct drm_crtc *crtc, extern int radeon_crtc_cursor_move(struct drm_crtc *crtc, int x, int y); +extern int radeon_get_crtc_scanoutpos(struct radeon_device *rdev, int crtc, int *vpos, int *hpos); + extern bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev); extern struct edid * radeon_combios_get_hardcoded_edid(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index b3b5306..d7ab914 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -435,7 +435,7 @@ int radeon_bo_get_surface_reg(struct radeon_bo *bo) out: radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch, - bo->tbo.mem.mm_node->start << PAGE_SHIFT, + bo->tbo.mem.start << PAGE_SHIFT, bo->tbo.num_pages << PAGE_SHIFT); return 0; } @@ -532,7 +532,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) rdev = rbo->rdev; if (bo->mem.mem_type == TTM_PL_VRAM) { size = bo->mem.num_pages << PAGE_SHIFT; - offset = bo->mem.mm_node->start << PAGE_SHIFT; + offset = bo->mem.start << PAGE_SHIFT; if ((offset + size) > rdev->mc.visible_vram_size) { /* hurrah the memory is not visible ! */ radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); @@ -540,7 +540,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) r = ttm_bo_validate(bo, &rbo->placement, false, true, false); if (unlikely(r != 0)) return r; - offset = bo->mem.mm_node->start << PAGE_SHIFT; + offset = bo->mem.start << PAGE_SHIFT; /* this should not happen */ if ((offset + size) > rdev->mc.visible_vram_size) return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index f87efec..8c9b2ef 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -712,73 +712,21 @@ void radeon_pm_compute_clocks(struct radeon_device *rdev) static bool radeon_pm_in_vbl(struct radeon_device *rdev) { - u32 stat_crtc = 0, vbl = 0, position = 0; + int crtc, vpos, hpos, vbl_status; bool in_vbl = true; - if (ASIC_IS_DCE4(rdev)) { - if (rdev->pm.active_crtcs & (1 << 0)) { - vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + - EVERGREEN_CRTC0_REGISTER_OFFSET) & 0xfff; - position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + - EVERGREEN_CRTC0_REGISTER_OFFSET) & 0xfff; - } - if (rdev->pm.active_crtcs & (1 << 1)) { - vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + - EVERGREEN_CRTC1_REGISTER_OFFSET) & 0xfff; - position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + - EVERGREEN_CRTC1_REGISTER_OFFSET) & 0xfff; - } - if (rdev->pm.active_crtcs & (1 << 2)) { - vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + - EVERGREEN_CRTC2_REGISTER_OFFSET) & 0xfff; - position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + - EVERGREEN_CRTC2_REGISTER_OFFSET) & 0xfff; - } - if (rdev->pm.active_crtcs & (1 << 3)) { - vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + - EVERGREEN_CRTC3_REGISTER_OFFSET) & 0xfff; - position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + - EVERGREEN_CRTC3_REGISTER_OFFSET) & 0xfff; - } - if (rdev->pm.active_crtcs & (1 << 4)) { - vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + - EVERGREEN_CRTC4_REGISTER_OFFSET) & 0xfff; - position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + - EVERGREEN_CRTC4_REGISTER_OFFSET) & 0xfff; - } - if (rdev->pm.active_crtcs & (1 << 5)) { - vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + - EVERGREEN_CRTC5_REGISTER_OFFSET) & 0xfff; - position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + - EVERGREEN_CRTC5_REGISTER_OFFSET) & 0xfff; - } - } else if (ASIC_IS_AVIVO(rdev)) { - if (rdev->pm.active_crtcs & (1 << 0)) { - vbl = RREG32(AVIVO_D1CRTC_V_BLANK_START_END) & 0xfff; - position = RREG32(AVIVO_D1CRTC_STATUS_POSITION) & 0xfff; - } - if (rdev->pm.active_crtcs & (1 << 1)) { - vbl = RREG32(AVIVO_D2CRTC_V_BLANK_START_END) & 0xfff; - position = RREG32(AVIVO_D2CRTC_STATUS_POSITION) & 0xfff; - } - if (position < vbl && position > 1) - in_vbl = false; - } else { - if (rdev->pm.active_crtcs & (1 << 0)) { - stat_crtc = RREG32(RADEON_CRTC_STATUS); - if (!(stat_crtc & 1)) - in_vbl = false; - } - if (rdev->pm.active_crtcs & (1 << 1)) { - stat_crtc = RREG32(RADEON_CRTC2_STATUS); - if (!(stat_crtc & 1)) + /* Iterate over all active crtc's. All crtc's must be in vblank, + * otherwise return in_vbl == false. + */ + for (crtc = 0; (crtc < rdev->num_crtc) && in_vbl; crtc++) { + if (rdev->pm.active_crtcs & (1 << crtc)) { + vbl_status = radeon_get_crtc_scanoutpos(rdev, crtc, &vpos, &hpos); + if ((vbl_status & RADEON_SCANOUTPOS_VALID) && + !(vbl_status & RADEON_SCANOUTPOS_INVBL)) in_vbl = false; } } - if (position < vbl && position > 1) - in_vbl = false; - return in_vbl; } diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 261e98a..6ea798c 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -247,10 +247,14 @@ void radeon_ib_pool_fini(struct radeon_device *rdev) */ void radeon_ring_free_size(struct radeon_device *rdev) { - if (rdev->family >= CHIP_R600) - rdev->cp.rptr = RREG32(R600_CP_RB_RPTR); - else - rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); + if (rdev->wb.enabled) + rdev->cp.rptr = rdev->wb.wb[RADEON_WB_CP_RPTR_OFFSET/4]; + else { + if (rdev->family >= CHIP_R600) + rdev->cp.rptr = RREG32(R600_CP_RB_RPTR); + else + rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR); + } /* This works because ring_size is a power of 2 */ rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4)); rdev->cp.ring_free_dw -= rdev->cp.wptr; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 84c53e4..fe95bb3 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -152,6 +152,7 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, man->default_caching = TTM_PL_FLAG_CACHED; break; case TTM_PL_TT: + man->func = &ttm_bo_manager_func; man->gpu_offset = rdev->mc.gtt_start; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; @@ -173,6 +174,7 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, break; case TTM_PL_VRAM: /* "On-card" video ram */ + man->func = &ttm_bo_manager_func; man->gpu_offset = rdev->mc.vram_start; man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; @@ -246,8 +248,8 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } - old_start = old_mem->mm_node->start << PAGE_SHIFT; - new_start = new_mem->mm_node->start << PAGE_SHIFT; + old_start = old_mem->start << PAGE_SHIFT; + new_start = new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { case TTM_PL_VRAM: @@ -326,14 +328,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, } r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem); out_cleanup: - if (tmp_mem.mm_node) { - struct ttm_bo_global *glob = rdev->mman.bdev.glob; - - spin_lock(&glob->lru_lock); - drm_mm_put_block(tmp_mem.mm_node); - spin_unlock(&glob->lru_lock); - return r; - } + ttm_bo_mem_put(bo, &tmp_mem); return r; } @@ -372,14 +367,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, goto out_cleanup; } out_cleanup: - if (tmp_mem.mm_node) { - struct ttm_bo_global *glob = rdev->mman.bdev.glob; - - spin_lock(&glob->lru_lock); - drm_mm_put_block(tmp_mem.mm_node); - spin_unlock(&glob->lru_lock); - return r; - } + ttm_bo_mem_put(bo, &tmp_mem); return r; } @@ -449,14 +437,14 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ #if __OS_HAS_AGP if (rdev->flags & RADEON_IS_AGP) { /* RADEON_IS_AGP is set only if AGP is active */ - mem->bus.offset = mem->mm_node->start << PAGE_SHIFT; + mem->bus.offset = mem->start << PAGE_SHIFT; mem->bus.base = rdev->mc.agp_base; mem->bus.is_iomem = !rdev->ddev->agp->cant_use_aperture; } #endif break; case TTM_PL_VRAM: - mem->bus.offset = mem->mm_node->start << PAGE_SHIFT; + mem->bus.offset = mem->start << PAGE_SHIFT; /* check if it's visible */ if ((mem->bus.offset + mem->bus.size) > rdev->mc.visible_vram_size) return -EINVAL; @@ -631,7 +619,7 @@ int radeon_mmap(struct file *filp, struct vm_area_struct *vma) return drm_mmap(filp, vma); } - file_priv = (struct drm_file *)filp->private_data; + file_priv = filp->private_data; rdev = file_priv->minor->dev->dev_private; if (rdev == NULL) { return -EINVAL; @@ -699,7 +687,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend, int r; gtt = container_of(backend, struct radeon_ttm_backend, backend); - gtt->offset = bo_mem->mm_node->start << PAGE_SHIFT; + gtt->offset = bo_mem->start << PAGE_SHIFT; if (!gtt->num_pages) { WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", gtt->num_pages, bo_mem, backend); } @@ -798,9 +786,9 @@ static int radeon_ttm_debugfs_init(struct radeon_device *rdev) radeon_mem_types_list[i].show = &radeon_mm_dump_table; radeon_mem_types_list[i].driver_features = 0; if (i == 0) - radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_VRAM].manager; + radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_VRAM].priv; else - radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_TT].manager; + radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_TT].priv; } /* Add ttm page pool to debugfs */ diff --git a/drivers/gpu/drm/radeon/reg_srcs/evergreen b/drivers/gpu/drm/radeon/reg_srcs/evergreen index f78fd59..ac40fd3 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/evergreen +++ b/drivers/gpu/drm/radeon/reg_srcs/evergreen @@ -22,6 +22,10 @@ evergreen 0x9400 0x00008B10 PA_SC_LINE_STIPPLE_STATE 0x00008BF0 PA_SC_ENHANCE 0x00008D8C SQ_DYN_GPR_CNTL_PS_FLUSH_REQ +0x00008D90 SQ_DYN_GPR_OPTIMIZATION +0x00008D94 SQ_DYN_GPR_SIMD_LOCK_EN +0x00008D98 SQ_DYN_GPR_THREAD_LIMIT +0x00008D9C SQ_DYN_GPR_LDS_LIMIT 0x00008C00 SQ_CONFIG 0x00008C04 SQ_GPR_RESOURCE_MGMT_1 0x00008C08 SQ_GPR_RESOURCE_MGMT_2 @@ -34,6 +38,10 @@ evergreen 0x9400 0x00008C24 SQ_STACK_RESOURCE_MGMT_2 0x00008C28 SQ_STACK_RESOURCE_MGMT_3 0x00008DF8 SQ_CONST_MEM_BASE +0x00008E20 SQ_STATIC_THREAD_MGMT_1 +0x00008E24 SQ_STATIC_THREAD_MGMT_2 +0x00008E28 SQ_STATIC_THREAD_MGMT_3 +0x00008E2C SQ_LDS_RESOURCE_MGMT 0x00008E48 SQ_EX_ALLOC_TABLE_SLOTS 0x00009100 SPI_CONFIG_CNTL 0x0000913C SPI_CONFIG_CNTL_1 diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index ae2b76b..f683e51 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -397,6 +397,12 @@ static int rs400_startup(struct radeon_device *rdev) r = rs400_gart_enable(rdev); if (r) return r; + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ r100_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -406,9 +412,6 @@ static int rs400_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failled initializing CP (%d).\n", r); return r; } - r = r100_wb_init(rdev); - if (r) - dev_err(rdev->dev, "failled initializing WB (%d).\n", r); r = r100_ib_init(rdev); if (r) { dev_err(rdev->dev, "failled initializing IB (%d).\n", r); @@ -443,7 +446,7 @@ int rs400_resume(struct radeon_device *rdev) int rs400_suspend(struct radeon_device *rdev) { r100_cp_disable(rdev); - r100_wb_disable(rdev); + radeon_wb_disable(rdev); r100_irq_disable(rdev); rs400_gart_disable(rdev); return 0; @@ -452,7 +455,7 @@ int rs400_suspend(struct radeon_device *rdev) void rs400_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_gem_fini(rdev); rs400_gart_fini(rdev); @@ -526,7 +529,7 @@ int rs400_init(struct radeon_device *rdev) /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); rs400_gart_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 51d5f7b..b091a1f 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -796,6 +796,12 @@ static int rs600_startup(struct radeon_device *rdev) r = rs600_gart_enable(rdev); if (r) return r; + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ rs600_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -805,9 +811,6 @@ static int rs600_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failled initializing CP (%d).\n", r); return r; } - r = r100_wb_init(rdev); - if (r) - dev_err(rdev->dev, "failled initializing WB (%d).\n", r); r = r100_ib_init(rdev); if (r) { dev_err(rdev->dev, "failled initializing IB (%d).\n", r); @@ -848,7 +851,7 @@ int rs600_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); r100_cp_disable(rdev); - r100_wb_disable(rdev); + radeon_wb_disable(rdev); rs600_irq_disable(rdev); rs600_gart_disable(rdev); return 0; @@ -858,7 +861,7 @@ void rs600_fini(struct radeon_device *rdev) { r600_audio_fini(rdev); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_gem_fini(rdev); rs600_gart_fini(rdev); @@ -932,7 +935,7 @@ int rs600_init(struct radeon_device *rdev) /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); rs600_gart_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 4dc2a87..0137d3e 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -616,6 +616,12 @@ static int rs690_startup(struct radeon_device *rdev) r = rs400_gart_enable(rdev); if (r) return r; + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ rs600_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -625,9 +631,6 @@ static int rs690_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failled initializing CP (%d).\n", r); return r; } - r = r100_wb_init(rdev); - if (r) - dev_err(rdev->dev, "failled initializing WB (%d).\n", r); r = r100_ib_init(rdev); if (r) { dev_err(rdev->dev, "failled initializing IB (%d).\n", r); @@ -668,7 +671,7 @@ int rs690_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); r100_cp_disable(rdev); - r100_wb_disable(rdev); + radeon_wb_disable(rdev); rs600_irq_disable(rdev); rs400_gart_disable(rdev); return 0; @@ -678,7 +681,7 @@ void rs690_fini(struct radeon_device *rdev) { r600_audio_fini(rdev); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_gem_fini(rdev); rs400_gart_fini(rdev); @@ -753,7 +756,7 @@ int rs690_init(struct radeon_device *rdev) /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); rs400_gart_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 4d6e860..5d569f4 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -386,6 +386,12 @@ static int rv515_startup(struct radeon_device *rdev) if (r) return r; } + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ rs600_irq_set(rdev); rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); @@ -395,9 +401,6 @@ static int rv515_startup(struct radeon_device *rdev) dev_err(rdev->dev, "failled initializing CP (%d).\n", r); return r; } - r = r100_wb_init(rdev); - if (r) - dev_err(rdev->dev, "failled initializing WB (%d).\n", r); r = r100_ib_init(rdev); if (r) { dev_err(rdev->dev, "failled initializing IB (%d).\n", r); @@ -431,7 +434,7 @@ int rv515_resume(struct radeon_device *rdev) int rv515_suspend(struct radeon_device *rdev) { r100_cp_disable(rdev); - r100_wb_disable(rdev); + radeon_wb_disable(rdev); rs600_irq_disable(rdev); if (rdev->flags & RADEON_IS_PCIE) rv370_pcie_gart_disable(rdev); @@ -447,7 +450,7 @@ void rv515_set_safe_registers(struct radeon_device *rdev) void rv515_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_gem_fini(rdev); rv370_pcie_gart_fini(rdev); @@ -527,7 +530,7 @@ int rv515_init(struct radeon_device *rdev) /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); - r100_wb_fini(rdev); + radeon_wb_fini(rdev); r100_ib_fini(rdev); radeon_irq_kms_fini(rdev); rv370_pcie_gart_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 9490da7..245374e 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -269,6 +269,7 @@ void r700_cp_stop(struct radeon_device *rdev) { rdev->mc.active_vram_size = rdev->mc.visible_vram_size; WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); + WREG32(SCRATCH_UMSK, 0); } static int rv770_cp_load_microcode(struct radeon_device *rdev) @@ -643,10 +644,11 @@ static void rv770_gpu_init(struct radeon_device *rdev) else gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3); - - gb_tiling_config |= GROUP_SIZE(0); - rdev->config.rv770.tiling_group_size = 256; - + gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); + if ((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) + rdev->config.rv770.tiling_group_size = 512; + else + rdev->config.rv770.tiling_group_size = 256; if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) { gb_tiling_config |= ROW_TILING(3); gb_tiling_config |= SAMPLE_SPLIT(3); @@ -1030,19 +1032,12 @@ static int rv770_startup(struct radeon_device *rdev) rdev->asic->copy = NULL; dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); } - /* pin copy shader into vram */ - if (rdev->r600_blit.shader_obj) { - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->r600_blit.shader_gpu_addr); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - if (r) { - DRM_ERROR("failed to pin blit object %d\n", r); - return r; - } - } + + /* allocate wb buffer */ + r = radeon_wb_init(rdev); + if (r) + return r; + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -1061,8 +1056,7 @@ static int rv770_startup(struct radeon_device *rdev) r = r600_cp_resume(rdev); if (r) return r; - /* write back buffer are not vital so don't worry about failure */ - r600_wb_enable(rdev); + return 0; } @@ -1108,7 +1102,7 @@ int rv770_suspend(struct radeon_device *rdev) r700_cp_stop(rdev); rdev->cp.ready = false; r600_irq_suspend(rdev); - r600_wb_disable(rdev); + radeon_wb_disable(rdev); rv770_pcie_gart_disable(rdev); /* unpin shaders bo */ if (rdev->r600_blit.shader_obj) { @@ -1203,8 +1197,8 @@ int rv770_init(struct radeon_device *rdev) if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r700_cp_fini(rdev); - r600_wb_fini(rdev); r600_irq_fini(rdev); + radeon_wb_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); rdev->accel_working = false; @@ -1236,8 +1230,8 @@ void rv770_fini(struct radeon_device *rdev) { r600_blit_fini(rdev); r700_cp_fini(rdev); - r600_wb_fini(rdev); r600_irq_fini(rdev); + radeon_wb_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); rv770_vram_scratch_fini(rdev); |