From 1ffdff134eb2d943bde3e4901ac48a9656a7e7a5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 18 Oct 2012 10:15:24 +0200 Subject: drm: dp helper: extract drm_dp_channel_eq_ok radeon and intel use the exact same definition. Reviewed-by: Alex Deucher Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/radeon/atombios_dp.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index d5699fe..3f46bb1 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -34,7 +34,6 @@ /* move these to drm_dp_helper.c/h */ #define DP_LINK_CONFIGURATION_SIZE 9 -#define DP_LINK_STATUS_SIZE 6 #define DP_DPCD_SIZE 8 static char *voltage_names[] = { @@ -318,25 +317,6 @@ static bool dp_clock_recovery_ok(u8 link_status[DP_LINK_STATUS_SIZE], return true; } -static bool dp_channel_eq_ok(u8 link_status[DP_LINK_STATUS_SIZE], - int lane_count) -{ - u8 lane_align; - u8 lane_status; - int lane; - - lane_align = dp_link_status(link_status, - DP_LANE_ALIGN_STATUS_UPDATED); - if ((lane_align & DP_INTERLANE_ALIGN_DONE) == 0) - return false; - for (lane = 0; lane < lane_count; lane++) { - lane_status = dp_get_lane_status(link_status, lane); - if ((lane_status & DP_CHANNEL_EQ_BITS) != DP_CHANNEL_EQ_BITS) - return false; - } - return true; -} - static u8 dp_get_adjust_request_voltage(u8 link_status[DP_LINK_STATUS_SIZE], int lane) @@ -664,7 +644,7 @@ bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector) if (!radeon_dp_get_link_status(radeon_connector, link_status)) return false; - if (dp_channel_eq_ok(link_status, dig->dp_lane_count)) + if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count)) return false; return true; } @@ -896,7 +876,7 @@ static int radeon_dp_link_train_ce(struct radeon_dp_link_train_info *dp_info) break; } - if (dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) { + if (drm_dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) { channel_eq = true; break; } -- cgit v1.1 From 01916270b840f7f37b7daab936add1747d6afbbf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 18 Oct 2012 10:15:25 +0200 Subject: drm: dp helper: extract drm_dp_clock_recovery_ok radeon and intel use the exact same definition. Reviewed-by: Alex Deucher Acked-by: Dave Airlie v2: Kill 2 more helpers in intel_dp.c that I've missed. Signed-off-by: Daniel Vetter --- drivers/gpu/drm/radeon/atombios_dp.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 3f46bb1..65f0c60 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -294,29 +294,6 @@ static u8 dp_link_status(u8 link_status[DP_LINK_STATUS_SIZE], int r) return link_status[r - DP_LANE0_1_STATUS]; } -static u8 dp_get_lane_status(u8 link_status[DP_LINK_STATUS_SIZE], - int lane) -{ - int i = DP_LANE0_1_STATUS + (lane >> 1); - int s = (lane & 1) * 4; - u8 l = dp_link_status(link_status, i); - return (l >> s) & 0xf; -} - -static bool dp_clock_recovery_ok(u8 link_status[DP_LINK_STATUS_SIZE], - int lane_count) -{ - int lane; - u8 lane_status; - - for (lane = 0; lane < lane_count; lane++) { - lane_status = dp_get_lane_status(link_status, lane); - if ((lane_status & DP_LANE_CR_DONE) == 0) - return false; - } - return true; -} - static u8 dp_get_adjust_request_voltage(u8 link_status[DP_LINK_STATUS_SIZE], int lane) @@ -811,7 +788,7 @@ static int radeon_dp_link_train_cr(struct radeon_dp_link_train_info *dp_info) break; } - if (dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) { + if (drm_dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) { clock_recovery = true; break; } -- cgit v1.1 From 0f037bdee1a12947a0c55b21a05f57793332bc07 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 18 Oct 2012 10:15:27 +0200 Subject: drm: extract helpers to compute new training values from sink request Safe for the minor difference that the intel versions get an offset into the link_status as an argument, both are the same again. Reviewed-by: Alex Deucher Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/radeon/atombios_dp.c | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 65f0c60..5ad8bfa 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -289,36 +289,6 @@ int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode, /***** general DP utility functions *****/ -static u8 dp_link_status(u8 link_status[DP_LINK_STATUS_SIZE], int r) -{ - return link_status[r - DP_LANE0_1_STATUS]; -} - -static u8 dp_get_adjust_request_voltage(u8 link_status[DP_LINK_STATUS_SIZE], - int lane) - -{ - int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1); - int s = ((lane & 1) ? - DP_ADJUST_VOLTAGE_SWING_LANE1_SHIFT : - DP_ADJUST_VOLTAGE_SWING_LANE0_SHIFT); - u8 l = dp_link_status(link_status, i); - - return ((l >> s) & 0x3) << DP_TRAIN_VOLTAGE_SWING_SHIFT; -} - -static u8 dp_get_adjust_request_pre_emphasis(u8 link_status[DP_LINK_STATUS_SIZE], - int lane) -{ - int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1); - int s = ((lane & 1) ? - DP_ADJUST_PRE_EMPHASIS_LANE1_SHIFT : - DP_ADJUST_PRE_EMPHASIS_LANE0_SHIFT); - u8 l = dp_link_status(link_status, i); - - return ((l >> s) & 0x3) << DP_TRAIN_PRE_EMPHASIS_SHIFT; -} - #define DP_VOLTAGE_MAX DP_TRAIN_VOLTAGE_SWING_1200 #define DP_PRE_EMPHASIS_MAX DP_TRAIN_PRE_EMPHASIS_9_5 @@ -331,8 +301,8 @@ static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE], int lane; for (lane = 0; lane < lane_count; lane++) { - u8 this_v = dp_get_adjust_request_voltage(link_status, lane); - u8 this_p = dp_get_adjust_request_pre_emphasis(link_status, lane); + u8 this_v = drm_dp_get_adjust_request_voltage(link_status, lane); + u8 this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane); DRM_DEBUG_KMS("requested signal parameters: lane %d voltage %s pre_emph %s\n", lane, -- cgit v1.1 From 1a644cd47ca0c40a9210db170bd0630031c3a60b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 18 Oct 2012 15:32:40 +0200 Subject: drm: extract dp link train delay functions from radeon This requires a few changes since that dpcd value is above the range currently cached by radeon. I've check the dp specs, and above 0xf there's a big gap and nothing that looks like we should cache it while a given device is plugged in. It's also the same value that i915.ko uses. Hence extend the various dpcd arrays in the radeon driver, use proper symbolic constants where applicable (one place overallocated the dpcd array to 25 bytes). Then also drop the rd_interval cache - radeon_dp_link_train_init re-reads the dpcd block, so the values we'll consume in train_cr and train_ce will always be fresh. To avoid needless diff-churn, #define the old size of dpcd as the new one and keep it around. v2: Alex Deucher noticed one place where I've forgotten to replace 8 with DP_RECEIVER_CAP_SIZE. Reviewed-by: Alex Deucher Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/radeon/atombios_dp.c | 27 ++++++++++----------------- drivers/gpu/drm/radeon/radeon_mode.h | 2 +- 2 files changed, 11 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 5ad8bfa..5e23ab2 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -34,7 +34,7 @@ /* move these to drm_dp_helper.c/h */ #define DP_LINK_CONFIGURATION_SIZE 9 -#define DP_DPCD_SIZE 8 +#define DP_DPCD_SIZE DP_RECEIVER_CAP_SIZE static char *voltage_names[] = { "0.4V", "0.6V", "0.8V", "1.2V" @@ -478,14 +478,15 @@ static void radeon_dp_probe_oui(struct radeon_connector *radeon_connector) bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector) { struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; - u8 msg[25]; + u8 msg[DP_DPCD_SIZE]; int ret, i; - ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg, 8, 0); + ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg, + DP_DPCD_SIZE, 0); if (ret > 0) { - memcpy(dig_connector->dpcd, msg, 8); + memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE); DRM_DEBUG_KMS("DPCD: "); - for (i = 0; i < 8; i++) + for (i = 0; i < DP_DPCD_SIZE; i++) DRM_DEBUG_KMS("%02x ", msg[i]); DRM_DEBUG_KMS("\n"); @@ -604,9 +605,8 @@ struct radeon_dp_link_train_info { int enc_id; int dp_clock; int dp_lane_count; - int rd_interval; bool tp3_supported; - u8 dpcd[8]; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; u8 train_set[4]; u8 link_status[DP_LINK_STATUS_SIZE]; u8 tries; @@ -748,10 +748,7 @@ static int radeon_dp_link_train_cr(struct radeon_dp_link_train_info *dp_info) dp_info->tries = 0; voltage = 0xff; while (1) { - if (dp_info->rd_interval == 0) - udelay(100); - else - mdelay(dp_info->rd_interval * 4); + drm_dp_link_train_clock_recovery_delay(dp_info->dpcd); if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) { DRM_ERROR("displayport link status failed\n"); @@ -813,10 +810,7 @@ static int radeon_dp_link_train_ce(struct radeon_dp_link_train_info *dp_info) dp_info->tries = 0; channel_eq = false; while (1) { - if (dp_info->rd_interval == 0) - udelay(400); - else - mdelay(dp_info->rd_interval * 4); + drm_dp_link_train_channel_eq_delay(dp_info->dpcd); if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) { DRM_ERROR("displayport link status failed\n"); @@ -901,14 +895,13 @@ void radeon_dp_link_train(struct drm_encoder *encoder, else dp_info.enc_id |= ATOM_DP_CONFIG_LINK_A; - dp_info.rd_interval = radeon_read_dpcd_reg(radeon_connector, DP_TRAINING_AUX_RD_INTERVAL); tmp = radeon_read_dpcd_reg(radeon_connector, DP_MAX_LANE_COUNT); if (ASIC_IS_DCE5(rdev) && (tmp & DP_TPS3_SUPPORTED)) dp_info.tp3_supported = true; else dp_info.tp3_supported = false; - memcpy(dp_info.dpcd, dig_connector->dpcd, 8); + memcpy(dp_info.dpcd, dig_connector->dpcd, DP_RECEIVER_CAP_SIZE); dp_info.rdev = rdev; dp_info.encoder = encoder; dp_info.connector = connector; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 92c5f47..d818b50 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -427,7 +427,7 @@ struct radeon_connector_atom_dig { uint32_t igp_lane_info; /* displayport */ struct radeon_i2c_chan *dp_i2c_bus; - u8 dpcd[8]; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; u8 dp_sink_type; int dp_clock; int dp_lane_count; -- cgit v1.1 From 3b5c662e8f536ca47396116de82f08d771727076 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 18 Oct 2012 10:15:31 +0200 Subject: drm: extract dp link bw helpers Reviewed-by: Alex Deucher Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/radeon/atombios_dp.c | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 5e23ab2..093e17d 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -347,37 +347,11 @@ static int dp_get_max_dp_pix_clock(int link_rate, return (link_rate * lane_num * 8) / bpp; } -static int dp_get_max_link_rate(u8 dpcd[DP_DPCD_SIZE]) -{ - switch (dpcd[DP_MAX_LINK_RATE]) { - case DP_LINK_BW_1_62: - default: - return 162000; - case DP_LINK_BW_2_7: - return 270000; - case DP_LINK_BW_5_4: - return 540000; - } -} - static u8 dp_get_max_lane_number(u8 dpcd[DP_DPCD_SIZE]) { return dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK; } -static u8 dp_get_dp_link_rate_coded(int link_rate) -{ - switch (link_rate) { - case 162000: - default: - return DP_LINK_BW_1_62; - case 270000: - return DP_LINK_BW_2_7; - case 540000: - return DP_LINK_BW_5_4; - } -} - /***** radeon specific DP functions *****/ /* First get the min lane# when low rate is used according to pixel clock @@ -389,7 +363,7 @@ static int radeon_dp_get_dp_lane_number(struct drm_connector *connector, int pix_clock) { int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector)); - int max_link_rate = dp_get_max_link_rate(dpcd); + int max_link_rate = drm_dp_max_link_rate(dpcd); int max_lane_num = dp_get_max_lane_number(dpcd); int lane_num; int max_dp_pix_clock; @@ -427,7 +401,7 @@ static int radeon_dp_get_dp_link_clock(struct drm_connector *connector, return 540000; } - return dp_get_max_link_rate(dpcd); + return drm_dp_max_link_rate(dpcd); } static u8 radeon_dp_encoder_service(struct radeon_device *rdev, @@ -692,7 +666,7 @@ static int radeon_dp_link_train_init(struct radeon_dp_link_train_info *dp_info) radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LANE_COUNT_SET, tmp); /* set the link rate on the sink */ - tmp = dp_get_dp_link_rate_coded(dp_info->dp_clock); + tmp = drm_dp_link_rate_to_bw_code(dp_info->dp_clock); radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LINK_BW_SET, tmp); /* start training on the source */ -- cgit v1.1 From 397fe15715ef1457d89f52666d0e249eb5eae64c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 22 Oct 2012 22:56:43 +0200 Subject: drm: extract drm_dp_max_lane_count helper Reviewed-by: Alex Deucher Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/radeon/atombios_dp.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 093e17d..064023b 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -347,11 +347,6 @@ static int dp_get_max_dp_pix_clock(int link_rate, return (link_rate * lane_num * 8) / bpp; } -static u8 dp_get_max_lane_number(u8 dpcd[DP_DPCD_SIZE]) -{ - return dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK; -} - /***** radeon specific DP functions *****/ /* First get the min lane# when low rate is used according to pixel clock @@ -364,7 +359,7 @@ static int radeon_dp_get_dp_lane_number(struct drm_connector *connector, { int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector)); int max_link_rate = drm_dp_max_link_rate(dpcd); - int max_lane_num = dp_get_max_lane_number(dpcd); + int max_lane_num = drm_dp_max_lane_count(dpcd); int lane_num; int max_dp_pix_clock; -- cgit v1.1 From 0a46fb5f41dbc7fae20764556ec7bf742cc0763a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 12 Oct 2012 14:59:17 +0000 Subject: drm/radeon: Use ttm_bo_is_reserved Signed-off-by: Maarten Lankhorst Reviewed-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_gart.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 6 +++--- drivers/gpu/drm/radeon/radeon_object.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 4debd60..8690be7 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -1237,7 +1237,7 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, { struct radeon_bo_va *bo_va; - BUG_ON(!atomic_read(&bo->tbo.reserved)); + BUG_ON(!radeon_bo_is_reserved(bo)); list_for_each_entry(bo_va, &bo->va, bo_list) { bo_va->valid = false; } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index b91118c..65c5555 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -384,7 +384,7 @@ int radeon_bo_get_surface_reg(struct radeon_bo *bo) int steal; int i; - BUG_ON(!atomic_read(&bo->tbo.reserved)); + BUG_ON(!radeon_bo_is_reserved(bo)); if (!bo->tiling_flags) return 0; @@ -510,7 +510,7 @@ void radeon_bo_get_tiling_flags(struct radeon_bo *bo, uint32_t *tiling_flags, uint32_t *pitch) { - BUG_ON(!atomic_read(&bo->tbo.reserved)); + BUG_ON(!radeon_bo_is_reserved(bo)); if (tiling_flags) *tiling_flags = bo->tiling_flags; if (pitch) @@ -520,7 +520,7 @@ void radeon_bo_get_tiling_flags(struct radeon_bo *bo, int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, bool force_drop) { - BUG_ON(!atomic_read(&bo->tbo.reserved)); + BUG_ON(!radeon_bo_is_reserved(bo)); if (!(bo->tiling_flags & RADEON_TILING_SURFACE)) return 0; diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 93cd491..5fc86b0 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -80,7 +80,7 @@ static inline unsigned long radeon_bo_size(struct radeon_bo *bo) static inline bool radeon_bo_is_reserved(struct radeon_bo *bo) { - return !!atomic_read(&bo->tbo.reserved); + return ttm_bo_is_reserved(&bo->tbo); } static inline unsigned radeon_bo_ngpu_pages(struct radeon_bo *bo) -- cgit v1.1 From ef8cf3a1c523afa499d15856e7db3844ad59d1fb Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 9 Nov 2012 12:10:41 +0000 Subject: drm/radeon: Use hweight32 Use hweight32 instead of counting for each bit Signed-off-by: Akinobu Mita Cc: David Airlie Cc: dri-devel@lists.freedesktop.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600.c | 8 +------- drivers/gpu/drm/radeon/r600_cp.c | 7 +------ 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index cda280d..169ecc9 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1424,13 +1424,7 @@ u32 r6xx_remap_render_backend(struct radeon_device *rdev, int r600_count_pipe_bits(uint32_t val) { - int i, ret = 0; - - for (i = 0; i < 32; i++) { - ret += val & 1; - val >>= 1; - } - return ret; + return hweight32(val); } static void r600_gpu_init(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 2514123..be85f75 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -721,12 +721,7 @@ static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes, static int r600_count_pipe_bits(uint32_t val) { - int i, ret = 0; - for (i = 0; i < 32; i++) { - ret += val & 1; - val >>= 1; - } - return ret; + return hweight32(val); } static void r600_gfx_init(struct drm_device *dev, -- cgit v1.1 From 0b91c4a1cd7cc368763de2fe25b8ea64ea803c08 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 6 Nov 2012 21:49:51 +0000 Subject: drm/ttm: remove ttm_buffer_object->buffer_start All drivers set it to 0 and nothing uses it. Signed-off-by: Marcin Slusarz Reviewed-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 65c5555..7c4b4bb 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -140,7 +140,7 @@ int radeon_bo_create(struct radeon_device *rdev, /* Kernel allocation are uninterruptible */ down_read(&rdev->pm.mclk_lock); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, - &bo->placement, page_align, 0, !kernel, NULL, + &bo->placement, page_align, !kernel, NULL, acc_size, sg, &radeon_ttm_bo_destroy); up_read(&rdev->pm.mclk_lock); if (unlikely(r != 0)) { -- cgit v1.1 From 302381511f4b1bc98c6f61ccaec5d8e98ccd0365 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 9 Nov 2012 09:19:39 +0000 Subject: drm/radeon: check alloc_apertures() success in radeon_kick_out_firmware_fb() Check for alloc_apertures() memory allocation failure, and propagate an error code in case the allocation failed. Signed-off-by: Tommi Rantala Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_drv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 07eb84e..8c1a83c 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -281,12 +281,15 @@ static struct drm_driver driver_old = { static struct drm_driver kms_driver; -static void radeon_kick_out_firmware_fb(struct pci_dev *pdev) +static int radeon_kick_out_firmware_fb(struct pci_dev *pdev) { struct apertures_struct *ap; bool primary = false; ap = alloc_apertures(1); + if (!ap) + return -ENOMEM; + ap->ranges[0].base = pci_resource_start(pdev, 0); ap->ranges[0].size = pci_resource_len(pdev, 0); @@ -295,13 +298,19 @@ static void radeon_kick_out_firmware_fb(struct pci_dev *pdev) #endif remove_conflicting_framebuffers(ap, "radeondrmfb", primary); kfree(ap); + + return 0; } static int __devinit radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + int ret; + /* Get rid of things like offb */ - radeon_kick_out_firmware_fb(pdev); + ret = radeon_kick_out_firmware_fb(pdev); + if (ret) + return ret; return drm_get_pci_dev(pdev, ent, &kms_driver); } -- cgit v1.1 From b03640b1de2eb349c2453d060d0bd0b0486e29b8 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 12 Oct 2012 15:03:11 +0000 Subject: drm/ttm: remove sync_obj_arg from ttm_bo_move_accel_cleanup Signed-off-by: Maarten Lankhorst Reviewed-By: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 5ebe1b3..929be87 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -265,7 +265,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ &fence); /* FIXME: handle copy error */ - r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, + r = ttm_bo_move_accel_cleanup(bo, (void *)fence, evict, no_wait_reserve, no_wait_gpu, new_mem); radeon_fence_unref(&fence); return r; -- cgit v1.1 From dedfdffd448aea2543b59fd504b92b8212ab3b7d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 12 Oct 2012 15:04:00 +0000 Subject: drm/ttm: remove sync_arg from driver functions Signed-off-by: Maarten Lankhorst Reviewed-By: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_ttm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 929be87..563c8ed 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -471,13 +471,12 @@ static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re { } -static int radeon_sync_obj_wait(void *sync_obj, void *sync_arg, - bool lazy, bool interruptible) +static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible) { return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible); } -static int radeon_sync_obj_flush(void *sync_obj, void *sync_arg) +static int radeon_sync_obj_flush(void *sync_obj) { return 0; } @@ -492,7 +491,7 @@ static void *radeon_sync_obj_ref(void *sync_obj) return radeon_fence_ref((struct radeon_fence *)sync_obj); } -static bool radeon_sync_obj_signaled(void *sync_obj, void *sync_arg) +static bool radeon_sync_obj_signaled(void *sync_obj) { return radeon_fence_signaled((struct radeon_fence *)sync_obj); } -- cgit v1.1 From e35755fa343995d85b1069513300bd7bd4eca117 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 11 Oct 2012 20:44:52 -0500 Subject: drm/radeon: drm_connector_property -> drm_object_property Signed-off-by: Rob Clark --- drivers/gpu/drm/radeon/radeon_connectors.c | 62 +++++++++++++++--------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index b884c36..47bf162 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1599,7 +1599,7 @@ radeon_add_atom_connector(struct drm_device *dev, connector->interlace_allowed = true; connector->doublescan_allowed = true; radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, 1); break; @@ -1608,13 +1608,13 @@ radeon_add_atom_connector(struct drm_device *dev, case DRM_MODE_CONNECTOR_HDMIA: case DRM_MODE_CONNECTOR_HDMIB: case DRM_MODE_CONNECTOR_DisplayPort: - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_property, UNDERSCAN_OFF); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_hborder_property, 0); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_vborder_property, 0); subpixel_order = SubPixelHorizontalRGB; @@ -1625,14 +1625,14 @@ radeon_add_atom_connector(struct drm_device *dev, connector->doublescan_allowed = false; if (connector_type == DRM_MODE_CONNECTOR_DVII) { radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, 1); } break; case DRM_MODE_CONNECTOR_LVDS: case DRM_MODE_CONNECTOR_eDP: - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); subpixel_order = SubPixelHorizontalRGB; @@ -1651,7 +1651,7 @@ radeon_add_atom_connector(struct drm_device *dev, DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, 1); /* no HPD on analog connectors */ @@ -1669,7 +1669,7 @@ radeon_add_atom_connector(struct drm_device *dev, DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, 1); /* no HPD on analog connectors */ @@ -1692,23 +1692,23 @@ radeon_add_atom_connector(struct drm_device *dev, DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } subpixel_order = SubPixelHorizontalRGB; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.coherent_mode_property, 1); if (ASIC_IS_AVIVO(rdev)) { - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_property, UNDERSCAN_OFF); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_hborder_property, 0); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_vborder_property, 0); } if (connector_type == DRM_MODE_CONNECTOR_DVII) { radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, 1); } @@ -1732,17 +1732,17 @@ radeon_add_atom_connector(struct drm_device *dev, if (!radeon_connector->ddc_bus) DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.coherent_mode_property, 1); if (ASIC_IS_AVIVO(rdev)) { - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_property, UNDERSCAN_OFF); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_hborder_property, 0); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_vborder_property, 0); } @@ -1771,17 +1771,17 @@ radeon_add_atom_connector(struct drm_device *dev, DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } subpixel_order = SubPixelHorizontalRGB; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.coherent_mode_property, 1); if (ASIC_IS_AVIVO(rdev)) { - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_property, UNDERSCAN_OFF); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_hborder_property, 0); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_vborder_property, 0); } @@ -1806,7 +1806,7 @@ radeon_add_atom_connector(struct drm_device *dev, if (!radeon_connector->ddc_bus) DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); subpixel_order = SubPixelHorizontalRGB; @@ -1819,10 +1819,10 @@ radeon_add_atom_connector(struct drm_device *dev, drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type); drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs); radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, 1); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.tv_std_property, radeon_atombios_get_tv_info(rdev)); /* no HPD on analog connectors */ @@ -1843,7 +1843,7 @@ radeon_add_atom_connector(struct drm_device *dev, if (!radeon_connector->ddc_bus) DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); subpixel_order = SubPixelHorizontalRGB; @@ -1922,7 +1922,7 @@ radeon_add_legacy_connector(struct drm_device *dev, DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, 1); /* no HPD on analog connectors */ @@ -1940,7 +1940,7 @@ radeon_add_legacy_connector(struct drm_device *dev, DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, 1); /* no HPD on analog connectors */ @@ -1959,7 +1959,7 @@ radeon_add_legacy_connector(struct drm_device *dev, } if (connector_type == DRM_MODE_CONNECTOR_DVII) { radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, 1); } @@ -1983,10 +1983,10 @@ radeon_add_legacy_connector(struct drm_device *dev, */ if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) radeon_connector->dac_load_detect = false; - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.load_detect_property, radeon_connector->dac_load_detect); - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.tv_std_property, radeon_combios_get_tv_info(rdev)); /* no HPD on analog connectors */ @@ -2002,7 +2002,7 @@ radeon_add_legacy_connector(struct drm_device *dev, if (!radeon_connector->ddc_bus) DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } - drm_connector_attach_property(&radeon_connector->base, + drm_object_attach_property(&radeon_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); subpixel_order = SubPixelHorizontalRGB; -- cgit v1.1 From fbed600af159b9dce78dd74c4bff56b40bb19d47 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Dec 2012 11:52:49 -0500 Subject: drm/radeon/dce3.2: add registers for ELD handling Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/rv770d.h | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h index b0adfc5..e2d9dc8 100644 --- a/drivers/gpu/drm/radeon/rv770d.h +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -551,6 +551,54 @@ #define HDMI_OFFSET0 (0x7400 - 0x7400) #define HDMI_OFFSET1 (0x7800 - 0x7400) +/* DCE3.2 ELD audio interface */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x71c8 /* LPCM */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x71cc /* AC3 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x71d0 /* MPEG1 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3 0x71d4 /* MP3 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4 0x71d8 /* MPEG2 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5 0x71dc /* AAC */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6 0x71e0 /* DTS */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7 0x71e4 /* ATRAC */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8 0x71e8 /* one bit audio - leave at 0 (default) */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9 0x71ec /* Dolby Digital */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10 0x71f0 /* DTS-HD */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11 0x71f4 /* MAT-MLP */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12 0x71f8 /* DTS */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13 0x71fc /* WMA Pro */ +# define MAX_CHANNELS(x) (((x) & 0x7) << 0) +/* max channels minus one. 7 = 8 channels */ +# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8) +# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16) +# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */ +/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO + * bit0 = 32 kHz + * bit1 = 44.1 kHz + * bit2 = 48 kHz + * bit3 = 88.2 kHz + * bit4 = 96 kHz + * bit5 = 176.4 kHz + * bit6 = 192 kHz + */ + +#define AZ_HOT_PLUG_CONTROL 0x7300 +# define AZ_FORCE_CODEC_WAKE (1 << 0) +# define PIN0_JACK_DETECTION_ENABLE (1 << 4) +# define PIN1_JACK_DETECTION_ENABLE (1 << 5) +# define PIN2_JACK_DETECTION_ENABLE (1 << 6) +# define PIN3_JACK_DETECTION_ENABLE (1 << 7) +# define PIN0_UNSOLICITED_RESPONSE_ENABLE (1 << 8) +# define PIN1_UNSOLICITED_RESPONSE_ENABLE (1 << 9) +# define PIN2_UNSOLICITED_RESPONSE_ENABLE (1 << 10) +# define PIN3_UNSOLICITED_RESPONSE_ENABLE (1 << 11) +# define CODEC_HOT_PLUG_ENABLE (1 << 12) +# define PIN0_AUDIO_ENABLED (1 << 24) +# define PIN1_AUDIO_ENABLED (1 << 25) +# define PIN2_AUDIO_ENABLED (1 << 26) +# define PIN3_AUDIO_ENABLED (1 << 27) +# define AUDIO_ENABLED (1 << 31) + + #define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110 #define D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6914 #define D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6114 -- cgit v1.1 From 1c4c3a99435c8891469fe6fca5ccd5fbe16f295a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Dec 2012 11:59:21 -0500 Subject: drm/radeon/dce4/5: add registers for ELD handling Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreend.h | 48 +++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 2bc0f6a..00efb2c 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -355,6 +355,54 @@ # define AFMT_MPEG_INFO_UPDATE (1 << 10) #define AFMT_GENERIC0_7 0x7138 +/* DCE4/5 ELD audio interface */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x5f84 /* LPCM */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x5f88 /* AC3 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x5f8c /* MPEG1 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3 0x5f90 /* MP3 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4 0x5f94 /* MPEG2 */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5 0x5f98 /* AAC */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6 0x5f9c /* DTS */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7 0x5fa0 /* ATRAC */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8 0x5fa4 /* one bit audio - leave at 0 (default) */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9 0x5fa8 /* Dolby Digital */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10 0x5fac /* DTS-HD */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11 0x5fb0 /* MAT-MLP */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12 0x5fb4 /* DTS */ +#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13 0x5fb8 /* WMA Pro */ +# define MAX_CHANNELS(x) (((x) & 0x7) << 0) +/* max channels minus one. 7 = 8 channels */ +# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8) +# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16) +# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */ +/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO + * bit0 = 32 kHz + * bit1 = 44.1 kHz + * bit2 = 48 kHz + * bit3 = 88.2 kHz + * bit4 = 96 kHz + * bit5 = 176.4 kHz + * bit6 = 192 kHz + */ + +#define AZ_HOT_PLUG_CONTROL 0x5e78 +# define AZ_FORCE_CODEC_WAKE (1 << 0) +# define PIN0_JACK_DETECTION_ENABLE (1 << 4) +# define PIN1_JACK_DETECTION_ENABLE (1 << 5) +# define PIN2_JACK_DETECTION_ENABLE (1 << 6) +# define PIN3_JACK_DETECTION_ENABLE (1 << 7) +# define PIN0_UNSOLICITED_RESPONSE_ENABLE (1 << 8) +# define PIN1_UNSOLICITED_RESPONSE_ENABLE (1 << 9) +# define PIN2_UNSOLICITED_RESPONSE_ENABLE (1 << 10) +# define PIN3_UNSOLICITED_RESPONSE_ENABLE (1 << 11) +# define CODEC_HOT_PLUG_ENABLE (1 << 12) +# define PIN0_AUDIO_ENABLED (1 << 24) +# define PIN1_AUDIO_ENABLED (1 << 25) +# define PIN2_AUDIO_ENABLED (1 << 26) +# define PIN3_AUDIO_ENABLED (1 << 27) +# define AUDIO_ENABLED (1 << 31) + + #define GC_USER_SHADER_PIPE_CONFIG 0x8954 #define INACTIVE_QD_PIPES(x) ((x) << 8) #define INACTIVE_QD_PIPES_MASK 0x0000FF00 -- cgit v1.1 From ae133a1129790ec288b429b5f08ab4701633844a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 18 Sep 2012 15:30:44 -0400 Subject: drm/radeon: stop page faults from hanging the system (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Redirect invalid memory accesses to the default page instead of locking up the memory controller. Also enable the invalid memory access interrupts and start spamming system log with it. v2 (agd5f): fix up against 2 level PT changes Signed-off-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/evergreen.c | 10 ++++++++++ drivers/gpu/drm/radeon/evergreend.h | 3 +++ drivers/gpu/drm/radeon/ni.c | 16 +++++++++++++--- drivers/gpu/drm/radeon/nid.h | 11 +++++++++++ drivers/gpu/drm/radeon/si.c | 25 +++++++++++++++++++++++-- drivers/gpu/drm/radeon/sid.h | 14 ++++++++++++++ 6 files changed, 74 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 219942c..78de2e4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3093,6 +3093,16 @@ restart_ih: break; } break; + case 146: + case 147: + dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); + dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", + RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); + dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", + RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); + /* reset addr and status */ + WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); + break; case 176: /* CP_INT in ring buffer */ case 177: /* CP_INT in IB1 */ case 178: /* CP_INT in IB2 */ diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 00efb2c..cae7ab4 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -699,6 +699,7 @@ #define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) #define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) #define VM_CONTEXT1_CNTL 0x1414 +#define VM_CONTEXT1_CNTL2 0x1434 #define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C #define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C #define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155C @@ -720,6 +721,8 @@ #define CACHE_UPDATE_MODE(x) ((x) << 6) #define VM_L2_STATUS 0x140C #define L2_BUSY (1 << 0) +#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC +#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC #define WAIT_UNTIL 0x8040 diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 81e6a56..30c18a6 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -784,10 +784,20 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) /* enable context1-7 */ WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(rdev->dummy_page.addr >> 12)); - WREG32(VM_CONTEXT1_CNTL2, 0); - WREG32(VM_CONTEXT1_CNTL, 0); + WREG32(VM_CONTEXT1_CNTL2, 4); WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); + RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | + PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | + VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | + VALID_PROTECTION_FAULT_ENABLE_DEFAULT | + READ_PROTECTION_FAULT_ENABLE_INTERRUPT | + READ_PROTECTION_FAULT_ENABLE_DEFAULT | + WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); cayman_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index cbef681..f5e54a7 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -80,7 +80,18 @@ #define VM_CONTEXT0_CNTL 0x1410 #define ENABLE_CONTEXT (1 << 0) #define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) +#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3) #define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) +#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6) +#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7) +#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9) +#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10) +#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12) +#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13) +#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15) +#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16) +#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18) +#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19) #define VM_CONTEXT1_CNTL 0x1414 #define VM_CONTEXT0_CNTL2 0x1430 #define VM_CONTEXT1_CNTL2 0x1434 diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 4422d63..c4d9eb6 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2426,9 +2426,20 @@ static int si_pcie_gart_enable(struct radeon_device *rdev) /* enable context1-15 */ WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(rdev->dummy_page.addr >> 12)); - WREG32(VM_CONTEXT1_CNTL2, 0); + WREG32(VM_CONTEXT1_CNTL2, 4); WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); + RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | + PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | + VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | + VALID_PROTECTION_FAULT_ENABLE_DEFAULT | + READ_PROTECTION_FAULT_ENABLE_INTERRUPT | + READ_PROTECTION_FAULT_ENABLE_DEFAULT | + WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); si_pcie_gart_tlb_flush(rdev); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", @@ -3684,6 +3695,16 @@ restart_ih: break; } break; + case 146: + case 147: + dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); + dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", + RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); + dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", + RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); + /* reset addr and status */ + WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); + break; case 176: /* RINGID0 CP_INT */ radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); break; diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index a8871af..53b4d45 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -91,7 +91,18 @@ #define VM_CONTEXT0_CNTL 0x1410 #define ENABLE_CONTEXT (1 << 0) #define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) +#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3) #define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) +#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6) +#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7) +#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9) +#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10) +#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12) +#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13) +#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15) +#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16) +#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18) +#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19) #define VM_CONTEXT1_CNTL 0x1414 #define VM_CONTEXT0_CNTL2 0x1430 #define VM_CONTEXT1_CNTL2 0x1434 @@ -104,6 +115,9 @@ #define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x1450 #define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x1454 +#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC +#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC + #define VM_INVALIDATE_REQUEST 0x1478 #define VM_INVALIDATE_RESPONSE 0x147c -- cgit v1.1 From 0d0b3e7443bed6b49cb90fe7ddc4b5578a83a88d Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 28 Nov 2012 13:47:55 -0500 Subject: drm/radeon: use cached memory when evicting for vram on non agp Force the use of cached memory when evicting from vram on non agp hardware. Also force write combine on agp hw. This is to insure the minimum cache type change when allocating memory and improving memory eviction especialy on pci/pcie hw. Signed-off-by: Jerome Glisse --- drivers/gpu/drm/radeon/radeon_object.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index b91118c..3f9f3bb 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -88,10 +88,20 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) if (domain & RADEON_GEM_DOMAIN_VRAM) rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; - if (domain & RADEON_GEM_DOMAIN_GTT) - rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - if (domain & RADEON_GEM_DOMAIN_CPU) - rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + if (domain & RADEON_GEM_DOMAIN_GTT) { + if (rbo->rdev->flags & RADEON_IS_AGP) { + rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT; + } else { + rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; + } + } + if (domain & RADEON_GEM_DOMAIN_CPU) { + if (rbo->rdev->flags & RADEON_IS_AGP) { + rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT; + } else { + rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; + } + } if (!c) rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; rbo->placement.num_placement = c; -- cgit v1.1 From a02dc74b317d78298cb0587b9b1f6f741fd5c139 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 Nov 2012 18:03:41 -0500 Subject: drm/radeon/dce32+: use fractional fb dividers for high clocks Fixes flickering with some high res montiors. Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_crtc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 24d932f..9175615 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -561,6 +561,8 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, /* use frac fb div on APUs */ if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)) radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV; + if (ASIC_IS_DCE32(rdev) && mode->clock > 165000) + radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV; } else { radeon_crtc->pll_flags |= RADEON_PLL_LEGACY; -- cgit v1.1 From 2e1a7674f65eb2c9118ab59d9c8aa9c731da6b85 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Dec 2012 12:55:37 -0500 Subject: drm/radeon: add new INFO ioctl requests Add requests to get the number of shader engines (SE) and the number of SH per SE. These are needed for geometry and tesselation shaders in the 3D driver as well as setting up PA_SC_RASTER_CONFIG on SI asics. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_kms.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index dc781c4..9c312f9 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -361,6 +361,22 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; } break; + case RADEON_INFO_MAX_SE: + if (rdev->family >= CHIP_TAHITI) + value = rdev->config.si.max_shader_engines; + else if (rdev->family >= CHIP_CAYMAN) + value = rdev->config.cayman.max_shader_engines; + else if (rdev->family >= CHIP_CEDAR) + value = rdev->config.evergreen.num_ses; + else + value = 1; + break; + case RADEON_INFO_MAX_SH_PER_SE: + if (rdev->family >= CHIP_TAHITI) + value = rdev->config.si.max_sh_per_se; + else + return -EINVAL; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; -- cgit v1.1 From 93927f9c1db5f55085457e820f0631064c7bfa34 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Dec 2012 16:50:28 -0500 Subject: drm/radeon: fix eDP clk and lane setup for scaled modes Need to use the adjusted mode since we are sending native timing and using the scaler for non-native modes. Signed-off-by: Alex Deucher Reviewed-by: Jerome Glisse cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_encoders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 010bae1..4552d4a 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -340,7 +340,7 @@ static bool radeon_atom_mode_fixup(struct drm_encoder *encoder, ((radeon_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) || (radeon_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE))) { struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); - radeon_dp_set_link_config(connector, mode); + radeon_dp_set_link_config(connector, adjusted_mode); } return true; -- cgit v1.1 From 71bfe916ebe6d026cd3d0e41c398574fc1228e03 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Dec 2012 20:00:30 -0500 Subject: drm/radeon: bump driver version for new info ioctl requests Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 07eb84e..d5ab55d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -65,9 +65,10 @@ * 2.22.0 - r600 only: RESOLVE_BOX allowed * 2.23.0 - allow STRMOUT_BASE_UPDATE on RS780 and RS880 * 2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures + * 2.25.0 - eg+: new info request for num SE and num SH */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 24 +#define KMS_DRIVER_MINOR 25 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); -- cgit v1.1 From 6ed9ccb41209b93409c92eb8c130eada4e0832ef Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 28 Nov 2012 11:25:40 +0000 Subject: drm/radeon: allow move_notify to be called without reservation The few places that care should have those checks instead. This allows destruction of bo backed memory without a reservation. It's required for being able to rework the delayed destroy path, as it is no longer guaranteed to hold a reservation before unlocking. However any previous wait is still guaranteed to complete, and it's one of the last things to be done before the buffer object is freed. Signed-off-by: Maarten Lankhorst Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_gart.c | 1 - drivers/gpu/drm/radeon/radeon_object.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 8690be7..6e24f84 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -1237,7 +1237,6 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, { struct radeon_bo_va *bo_va; - BUG_ON(!radeon_bo_is_reserved(bo)); list_for_each_entry(bo_va, &bo->va, bo_list) { bo_va->valid = false; } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index fe6fe25..e6ee65c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -530,7 +530,7 @@ void radeon_bo_get_tiling_flags(struct radeon_bo *bo, int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, bool force_drop) { - BUG_ON(!radeon_bo_is_reserved(bo)); + BUG_ON(!radeon_bo_is_reserved(bo) && !force_drop); if (!(bo->tiling_flags & RADEON_TILING_SURFACE)) return 0; -- cgit v1.1 From 97a875cbdf89a4638eea57c2b456c7cc4e3e8b21 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 28 Nov 2012 11:25:44 +0000 Subject: drm/ttm: remove no_wait_reserve, v3 All items on the lru list are always reservable, so this is a stupid thing to keep. Not only that, it is used in a way which would guarantee deadlocks if it were ever to be set to block on reserve. This is a lot of churn, but mostly because of the removal of the argument which can be nested arbitrarily deeply in many places. No change of code in this patch except removal of the no_wait_reserve argument, the previous patch removed the use of no_wait_reserve. v2: - Warn if -EBUSY is returned on reservation, all objects on the list should be reservable. Adjusted patch slightly due to conflicts. v3: - Focus on no_wait_reserve removal only. Signed-off-by: Maarten Lankhorst Reviewed-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_object.c | 8 ++++---- drivers/gpu/drm/radeon/radeon_ttm.c | 31 ++++++++++++++++--------------- 2 files changed, 20 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index e6ee65c..bfb332e 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -250,7 +250,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, } for (i = 0; i < bo->placement.num_placement; i++) bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; - r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { bo->pin_count = 1; if (gpu_addr != NULL) @@ -279,7 +279,7 @@ int radeon_bo_unpin(struct radeon_bo *bo) return 0; for (i = 0; i < bo->placement.num_placement; i++) bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; - r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (unlikely(r != 0)) dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo); return r; @@ -365,7 +365,7 @@ int radeon_bo_list_validate(struct list_head *head) retry: radeon_ttm_placement_from_domain(bo, domain); r = ttm_bo_validate(&bo->tbo, &bo->placement, - true, false, false); + true, false); if (unlikely(r)) { if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) { domain |= RADEON_GEM_DOMAIN_GTT; @@ -585,7 +585,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* hurrah the memory is not visible ! */ radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; - r = ttm_bo_validate(bo, &rbo->placement, false, true, false); + r = ttm_bo_validate(bo, &rbo->placement, false, false); if (unlikely(r != 0)) return r; offset = bo->mem.start << PAGE_SHIFT; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 563c8ed..1d8ff2f 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -216,7 +216,7 @@ static void radeon_move_null(struct ttm_buffer_object *bo, } static int radeon_move_blit(struct ttm_buffer_object *bo, - bool evict, int no_wait_reserve, bool no_wait_gpu, + bool evict, bool no_wait_gpu, struct ttm_mem_reg *new_mem, struct ttm_mem_reg *old_mem) { @@ -266,14 +266,14 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, &fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, - evict, no_wait_reserve, no_wait_gpu, new_mem); + evict, no_wait_gpu, new_mem); radeon_fence_unref(&fence); return r; } static int radeon_move_vram_ram(struct ttm_buffer_object *bo, bool evict, bool interruptible, - bool no_wait_reserve, bool no_wait_gpu, + bool no_wait_gpu, struct ttm_mem_reg *new_mem) { struct radeon_device *rdev; @@ -294,7 +294,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, placement.busy_placement = &placements; placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, - interruptible, no_wait_reserve, no_wait_gpu); + interruptible, no_wait_gpu); if (unlikely(r)) { return r; } @@ -308,11 +308,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem, old_mem); + r = radeon_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem); if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -320,7 +320,7 @@ out_cleanup: static int radeon_move_ram_vram(struct ttm_buffer_object *bo, bool evict, bool interruptible, - bool no_wait_reserve, bool no_wait_gpu, + bool no_wait_gpu, struct ttm_mem_reg *new_mem) { struct radeon_device *rdev; @@ -340,15 +340,16 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, placement.num_busy_placement = 1; placement.busy_placement = &placements; placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_reserve, no_wait_gpu); + r = ttm_bo_mem_space(bo, &placement, &tmp_mem, + interruptible, no_wait_gpu); if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } - r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, new_mem, old_mem); + r = radeon_move_blit(bo, true, no_wait_gpu, new_mem, old_mem); if (unlikely(r)) { goto out_cleanup; } @@ -359,7 +360,7 @@ out_cleanup: static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, bool interruptible, - bool no_wait_reserve, bool no_wait_gpu, + bool no_wait_gpu, struct ttm_mem_reg *new_mem) { struct radeon_device *rdev; @@ -388,18 +389,18 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { r = radeon_move_vram_ram(bo, evict, interruptible, - no_wait_reserve, no_wait_gpu, new_mem); + no_wait_gpu, new_mem); } else if (old_mem->mem_type == TTM_PL_SYSTEM && new_mem->mem_type == TTM_PL_VRAM) { r = radeon_move_ram_vram(bo, evict, interruptible, - no_wait_reserve, no_wait_gpu, new_mem); + no_wait_gpu, new_mem); } else { - r = radeon_move_blit(bo, evict, no_wait_reserve, no_wait_gpu, new_mem, old_mem); + r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem); } if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); } return r; } -- cgit v1.1 From 4d75658bffea78f0c6f82fd46df1ec983ccacdf0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Sep 2012 15:08:35 -0400 Subject: drm/radeon/kms: Add initial support for async DMA on r6xx/r7xx Uses the new multi-ring infrastucture. 6xx/7xx has a single async DMA ring. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c | 471 ++++++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/r600d.h | 54 +++- drivers/gpu/drm/radeon/radeon.h | 11 +- drivers/gpu/drm/radeon/radeon_asic.c | 39 ++- drivers/gpu/drm/radeon/radeon_asic.h | 13 + drivers/gpu/drm/radeon/rv770.c | 31 ++- drivers/gpu/drm/radeon/rv770d.h | 23 ++ 7 files changed, 630 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index cda280d..ee06c87 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1370,6 +1370,29 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) return radeon_ring_test_lockup(rdev, ring); } +/** + * r600_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up (r6xx-evergreen). + * Returns true if the engine appears to be locked up, false if not. + */ +bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 dma_status_reg; + + dma_status_reg = RREG32(DMA_STATUS_REG); + if (dma_status_reg & DMA_IDLE) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + int r600_asic_reset(struct radeon_device *rdev) { return r600_gpu_soft_reset(rdev); @@ -1594,6 +1617,7 @@ static void r600_gpu_init(struct radeon_device *rdev) WREG32(GB_TILING_CONFIG, tiling_config); WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); + WREG32(DMA_TILING_CONFIG, tiling_config & 0xffff); tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); @@ -1871,6 +1895,7 @@ void r600_cp_stop(struct radeon_device *rdev) radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); WREG32(SCRATCH_UMSK, 0); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; } int r600_init_microcode(struct radeon_device *rdev) @@ -2196,6 +2221,128 @@ void r600_cp_fini(struct radeon_device *rdev) radeon_scratch_free(rdev, ring->rptr_save_reg); } +/* + * DMA + * Starting with R600, the GPU has an asynchronous + * DMA engine. The programming model is very similar + * to the 3D engine (ring buffer, IBs, etc.), but the + * DMA controller has it's own packet format that is + * different form the PM4 format used by the 3D engine. + * It supports copying data, writing embedded data, + * solid fills, and a number of other things. It also + * has support for tiling/detiling of buffers. + */ +/** + * r600_dma_stop - stop the async dma engine + * + * @rdev: radeon_device pointer + * + * Stop the async dma engine (r6xx-evergreen). + */ +void r600_dma_stop(struct radeon_device *rdev) +{ + u32 rb_cntl = RREG32(DMA_RB_CNTL); + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + + rb_cntl &= ~DMA_RB_ENABLE; + WREG32(DMA_RB_CNTL, rb_cntl); + + rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; +} + +/** + * r600_dma_resume - setup and start the async dma engine + * + * @rdev: radeon_device pointer + * + * Set up the DMA ring buffer and enable it. (r6xx-evergreen). + * Returns 0 for success, error for failure. + */ +int r600_dma_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + u32 rb_cntl, dma_cntl; + u32 rb_bufsz; + int r; + + /* Reset dma */ + if (rdev->family >= CHIP_RV770) + WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); + else + WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); + RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + + WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); + WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = drm_order(ring->ring_size / 4); + rb_cntl = rb_bufsz << 1; +#ifdef __BIG_ENDIAN + rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; +#endif + WREG32(DMA_RB_CNTL, rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(DMA_RB_RPTR, 0); + WREG32(DMA_RB_WPTR, 0); + + /* set the wb address whether it's enabled or not */ + WREG32(DMA_RB_RPTR_ADDR_HI, + upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); + WREG32(DMA_RB_RPTR_ADDR_LO, + ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); + + if (rdev->wb.enabled) + rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + + WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); + + /* enable DMA IBs */ + WREG32(DMA_IB_CNTL, DMA_IB_ENABLE); + + dma_cntl = RREG32(DMA_CNTL); + dma_cntl &= ~CTXEMPTY_INT_ENABLE; + WREG32(DMA_CNTL, dma_cntl); + + if (rdev->family >= CHIP_RV770) + WREG32(DMA_MODE, 1); + + ring->wptr = 0; + WREG32(DMA_RB_WPTR, ring->wptr << 2); + + ring->rptr = RREG32(DMA_RB_RPTR) >> 2; + + WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); + + ring->ready = true; + + r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + + return 0; +} + +/** + * r600_dma_fini - tear down the async dma engine + * + * @rdev: radeon_device pointer + * + * Stop the async dma engine and free the ring (r6xx-evergreen). + */ +void r600_dma_fini(struct radeon_device *rdev) +{ + r600_dma_stop(rdev); + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); +} /* * GPU scratch registers helpers function. @@ -2252,6 +2399,64 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } +/** + * r600_dma_ring_test - simple async dma engine test + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. (r6xx-SI). + * Returns 0 for success, error for failure. + */ +int r600_dma_ring_test(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + unsigned i; + int r; + void __iomem *ptr = (void *)rdev->vram_scratch.ptr; + u32 tmp; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + writel(tmp, ptr); + + r = radeon_ring_lock(rdev, ring, 4); + if (r) { + DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); + return r; + } + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); + + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = readl(ptr); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +/* + * CP fences/semaphores + */ + void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { @@ -2315,6 +2520,58 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); } +/* + * DMA fences/semaphores + */ + +/** + * r600_dma_fence_ring_emit - emit a fence on the DMA ring + * + * @rdev: radeon_device pointer + * @fence: radeon fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (r6xx-r7xx). + */ +void r600_dma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + /* write the fence */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); + radeon_ring_write(ring, fence->seq); + /* generate an interrupt */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); +} + +/** + * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * @semaphore: radeon semaphore object + * @emit_wait: wait or signal semaphore + * + * Add a DMA semaphore packet to the ring wait on or signal + * other rings (r6xx-SI). + */ +void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + u64 addr = semaphore->gpu_addr; + u32 s = emit_wait ? 0 : 1; + + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); +} + int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, @@ -2334,6 +2591,80 @@ int r600_copy_blit(struct radeon_device *rdev, return 0; } +/** + * r600_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (r6xx-r7xx). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_dw, cur_size_in_dw; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; + num_loops = DIV_ROUND_UP(size_in_dw, 0xffff); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; + if (cur_size_in_dw > 0xFFFF) + cur_size_in_dw = 0xFFFF; + size_in_dw -= cur_size_in_dw; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, src_offset & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_dw * 4; + dst_offset += cur_size_in_dw * 4; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + int r600_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size) @@ -2349,7 +2680,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg) static int r600_startup(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + struct radeon_ring *ring; int r; /* enable pcie gen2 link */ @@ -2394,6 +2725,12 @@ static int r600_startup(struct radeon_device *rdev) return r; } + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -2403,12 +2740,20 @@ static int r600_startup(struct radeon_device *rdev) } r600_irq_set(rdev); + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, R600_CP_RB_RPTR, R600_CP_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); + if (r) + return r; + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, + DMA_RB_RPTR, DMA_RB_WPTR, + 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); if (r) return r; + r = r600_cp_load_microcode(rdev); if (r) return r; @@ -2416,6 +2761,10 @@ static int r600_startup(struct radeon_device *rdev) if (r) return r; + r = r600_dma_resume(rdev); + if (r) + return r; + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -2471,7 +2820,7 @@ int r600_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); r600_cp_stop(rdev); - rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + r600_dma_stop(rdev); r600_irq_suspend(rdev); radeon_wb_disable(rdev); r600_pcie_gart_disable(rdev); @@ -2544,6 +2893,9 @@ int r600_init(struct radeon_device *rdev) rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); + rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -2556,6 +2908,7 @@ int r600_init(struct radeon_device *rdev) if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r600_cp_fini(rdev); + r600_dma_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); @@ -2572,6 +2925,7 @@ void r600_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r600_blit_fini(rdev); r600_cp_fini(rdev); + r600_dma_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); @@ -2674,6 +3028,104 @@ free_scratch: return r; } +/** + * r600_dma_ib_test - test an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Test a simple IB in the DMA ring (r6xx-SI). + * Returns 0 on success, error on failure. + */ +int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_ib ib; + unsigned i; + int r; + void __iomem *ptr = (void *)rdev->vram_scratch.ptr; + u32 tmp = 0; + + if (!ptr) { + DRM_ERROR("invalid vram scratch pointer\n"); + return -EINVAL; + } + + tmp = 0xCAFEDEAD; + writel(tmp, ptr); + + r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); + ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; + ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; + ib.ptr[3] = 0xDEADBEEF; + ib.length_dw = 4; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + return r; + } + r = radeon_fence_wait(ib.fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + return r; + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = readl(ptr); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < rdev->usec_timeout) { + DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); + } else { + DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } + radeon_ib_free(rdev, &ib); + return r; +} + +/** + * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (r6xx-r7xx). + */ +void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 4; + while ((next_rptr & 7) != 5) + next_rptr++; + next_rptr += 3; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); + radeon_ring_write(ring, next_rptr); + } + + /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. + * Pad as necessary with NOPs. + */ + while ((ring->wptr & 7) != 5) + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); + radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); + radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + +} + /* * Interrupts * @@ -2865,6 +3317,8 @@ static void r600_disable_interrupt_state(struct radeon_device *rdev) u32 tmp; WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE; + WREG32(DMA_CNTL, tmp); WREG32(GRBM_INT_CNTL, 0); WREG32(DxMODE_INT_MASK, 0); WREG32(D1GRPH_INTERRUPT_CONTROL, 0); @@ -3006,6 +3460,7 @@ int r600_irq_set(struct radeon_device *rdev) u32 grbm_int_cntl = 0; u32 hdmi0, hdmi1; u32 d1grph = 0, d2grph = 0; + u32 dma_cntl; if (!rdev->irq.installed) { WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); @@ -3040,12 +3495,19 @@ int r600_irq_set(struct radeon_device *rdev) hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; } + dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE; if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("r600_irq_set: sw int\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; } + + if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { + DRM_DEBUG("r600_irq_set: sw int dma\n"); + dma_cntl |= TRAP_ENABLE; + } + if (rdev->irq.crtc_vblank_int[0] || atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("r600_irq_set: vblank 0\n"); @@ -3090,6 +3552,7 @@ int r600_irq_set(struct radeon_device *rdev) } WREG32(CP_INT_CNTL, cp_int_cntl); + WREG32(DMA_CNTL, dma_cntl); WREG32(DxMODE_INT_MASK, mode_int); WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); @@ -3469,6 +3932,10 @@ restart_ih: DRM_DEBUG("IH: CP EOP\n"); radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); break; + case 224: /* DMA trap event */ + DRM_DEBUG("IH: DMA trap\n"); + radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); + break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); break; diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index fa6f370..a596c55 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -590,9 +590,59 @@ #define WAIT_2D_IDLECLEAN_bit (1 << 16) #define WAIT_3D_IDLECLEAN_bit (1 << 17) +/* async DMA */ +#define DMA_TILING_CONFIG 0x3ec4 +#define DMA_CONFIG 0x3e4c + +#define DMA_RB_CNTL 0xd000 +# define DMA_RB_ENABLE (1 << 0) +# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ +# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ +# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) +# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ +# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ +#define DMA_RB_BASE 0xd004 +#define DMA_RB_RPTR 0xd008 +#define DMA_RB_WPTR 0xd00c + +#define DMA_RB_RPTR_ADDR_HI 0xd01c +#define DMA_RB_RPTR_ADDR_LO 0xd020 + +#define DMA_IB_CNTL 0xd024 +# define DMA_IB_ENABLE (1 << 0) +# define DMA_IB_SWAP_ENABLE (1 << 4) +#define DMA_IB_RPTR 0xd028 +#define DMA_CNTL 0xd02c +# define TRAP_ENABLE (1 << 0) +# define SEM_INCOMPLETE_INT_ENABLE (1 << 1) +# define SEM_WAIT_INT_ENABLE (1 << 2) +# define DATA_SWAP_ENABLE (1 << 3) +# define FENCE_SWAP_ENABLE (1 << 4) +# define CTXEMPTY_INT_ENABLE (1 << 28) +#define DMA_STATUS_REG 0xd034 +# define DMA_IDLE (1 << 0) +#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044 +#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048 +#define DMA_MODE 0xd0bc + +/* async DMA packets */ +#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ + (((t) & 0x1) << 23) | \ + (((s) & 0x1) << 22) | \ + (((n) & 0xFFFF) << 0)) +/* async DMA Packet types */ +#define DMA_PACKET_WRITE 0x2 +#define DMA_PACKET_COPY 0x3 +#define DMA_PACKET_INDIRECT_BUFFER 0x4 +#define DMA_PACKET_SEMAPHORE 0x5 +#define DMA_PACKET_FENCE 0x6 +#define DMA_PACKET_TRAP 0x7 +#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */ +#define DMA_PACKET_NOP 0xf + #define IH_RB_CNTL 0x3e00 # define IH_RB_ENABLE (1 << 0) -# define IH_IB_SIZE(x) ((x) << 1) /* log2 */ +# define IH_RB_SIZE(x) ((x) << 1) /* log2 */ # define IH_RB_FULL_DRAIN_ENABLE (1 << 6) # define IH_WPTR_WRITEBACK_ENABLE (1 << 8) # define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */ @@ -637,7 +687,9 @@ #define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20 #define SRBM_SOFT_RESET 0xe60 +# define SOFT_RESET_DMA (1 << 12) # define SOFT_RESET_RLC (1 << 13) +# define RV770_SOFT_RESET_DMA (1 << 20) #define CP_INT_CNTL 0xc124 # define CNTX_BUSY_INT_ENABLE (1 << 19) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8c42d54..461bf53 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -109,7 +109,7 @@ extern int radeon_lockup_timeout; #define RADEON_BIOS_NUM_SCRATCH 8 /* max number of rings */ -#define RADEON_NUM_RINGS 3 +#define RADEON_NUM_RINGS 4 /* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL @@ -122,6 +122,9 @@ extern int radeon_lockup_timeout; #define CAYMAN_RING_TYPE_CP1_INDEX 1 #define CAYMAN_RING_TYPE_CP2_INDEX 2 +/* R600+ has an async dma ring */ +#define R600_RING_TYPE_DMA_INDEX 3 + /* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_RESERVED_SIZE (8 << 20) @@ -787,6 +790,11 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); +/* r600 async dma */ +void r600_dma_stop(struct radeon_device *rdev); +int r600_dma_resume(struct radeon_device *rdev); +void r600_dma_fini(struct radeon_device *rdev); + /* * CS. */ @@ -883,6 +891,7 @@ struct radeon_wb { #define RADEON_WB_CP_RPTR_OFFSET 1024 #define RADEON_WB_CP1_RPTR_OFFSET 1280 #define RADEON_WB_CP2_RPTR_OFFSET 1536 +#define R600_WB_DMA_RPTR_OFFSET 1792 #define R600_WB_IH_WPTR_OFFSET 2048 #define R600_WB_EVENT_OFFSET 3072 diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 654520b..3cf9b29 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -947,6 +947,15 @@ static struct radeon_asic r600_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &r600_gpu_is_lockup, + }, + [R600_RING_TYPE_DMA_INDEX] = { + .ib_execute = &r600_dma_ring_ib_execute, + .emit_fence = &r600_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &r600_dma_is_lockup, } }, .irq = { @@ -963,8 +972,8 @@ static struct radeon_asic r600_asic = { .copy = { .blit = &r600_copy_blit, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = NULL, - .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &r600_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, .copy = &r600_copy_blit, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, @@ -1022,6 +1031,15 @@ static struct radeon_asic rs780_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &r600_gpu_is_lockup, + }, + [R600_RING_TYPE_DMA_INDEX] = { + .ib_execute = &r600_dma_ring_ib_execute, + .emit_fence = &r600_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &r600_dma_is_lockup, } }, .irq = { @@ -1038,8 +1056,8 @@ static struct radeon_asic rs780_asic = { .copy = { .blit = &r600_copy_blit, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = NULL, - .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &r600_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, .copy = &r600_copy_blit, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, @@ -1097,6 +1115,15 @@ static struct radeon_asic rv770_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &r600_gpu_is_lockup, + }, + [R600_RING_TYPE_DMA_INDEX] = { + .ib_execute = &r600_dma_ring_ib_execute, + .emit_fence = &r600_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &r600_dma_is_lockup, } }, .irq = { @@ -1113,8 +1140,8 @@ static struct radeon_asic rv770_asic = { .copy = { .blit = &r600_copy_blit, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = NULL, - .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &r600_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, .copy = &r600_copy_blit, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 5e3a0e5..70a5b1f 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -309,6 +309,14 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); +void r600_dma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); int r600_asic_reset(struct radeon_device *rdev); int r600_set_surface_reg(struct radeon_device *rdev, int reg, @@ -316,11 +324,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t offset, uint32_t obj_size); void r600_clear_surface_reg(struct radeon_device *rdev, int reg); int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); +int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); +int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence); +int r600_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, struct radeon_fence **fence); void r600_hpd_init(struct radeon_device *rdev); void r600_hpd_fini(struct radeon_device *rdev); bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 79814a0..87c979c 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev) radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); WREG32(SCRATCH_UMSK, 0); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; } static int rv770_cp_load_microcode(struct radeon_device *rdev) @@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev) WREG32(GB_TILING_CONFIG, gb_tiling_config); WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); + WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff)); + WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff)); WREG32(CGTS_SYS_TCC_DISABLE, 0); WREG32(CGTS_TCC_DISABLE, 0); @@ -886,7 +889,7 @@ static int rv770_mc_init(struct radeon_device *rdev) static int rv770_startup(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + struct radeon_ring *ring; int r; /* enable pcie gen2 link */ @@ -932,6 +935,12 @@ static int rv770_startup(struct radeon_device *rdev) return r; } + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -941,11 +950,20 @@ static int rv770_startup(struct radeon_device *rdev) } r600_irq_set(rdev); + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, R600_CP_RB_RPTR, R600_CP_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); if (r) return r; + + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, + DMA_RB_RPTR, DMA_RB_WPTR, + 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + if (r) + return r; + r = rv770_cp_load_microcode(rdev); if (r) return r; @@ -953,6 +971,10 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; + r = r600_dma_resume(rdev); + if (r) + return r; + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -995,7 +1017,7 @@ int rv770_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); r700_cp_stop(rdev); - rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + r600_dma_stop(rdev); r600_irq_suspend(rdev); radeon_wb_disable(rdev); rv770_pcie_gart_disable(rdev); @@ -1066,6 +1088,9 @@ int rv770_init(struct radeon_device *rdev) rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); + rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -1078,6 +1103,7 @@ int rv770_init(struct radeon_device *rdev) if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r700_cp_fini(rdev); + r600_dma_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); @@ -1093,6 +1119,7 @@ void rv770_fini(struct radeon_device *rdev) { r600_blit_fini(rdev); r700_cp_fini(rdev); + r600_dma_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h index e2d9dc8..20e29d2 100644 --- a/drivers/gpu/drm/radeon/rv770d.h +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -109,6 +109,9 @@ #define PIPE_TILING__SHIFT 1 #define PIPE_TILING__MASK 0x0000000e +#define DMA_TILING_CONFIG 0x3ec8 +#define DMA_TILING_CONFIG2 0xd0b8 + #define GC_USER_SHADER_PIPE_CONFIG 0x8954 #define INACTIVE_QD_PIPES(x) ((x) << 8) #define INACTIVE_QD_PIPES_MASK 0x0000FF00 @@ -358,6 +361,26 @@ #define WAIT_UNTIL 0x8040 +/* async DMA */ +#define DMA_RB_RPTR 0xd008 +#define DMA_RB_WPTR 0xd00c + +/* async DMA packets */ +#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ + (((t) & 0x1) << 23) | \ + (((s) & 0x1) << 22) | \ + (((n) & 0xFFFF) << 0)) +/* async DMA Packet types */ +#define DMA_PACKET_WRITE 0x2 +#define DMA_PACKET_COPY 0x3 +#define DMA_PACKET_INDIRECT_BUFFER 0x4 +#define DMA_PACKET_SEMAPHORE 0x5 +#define DMA_PACKET_FENCE 0x6 +#define DMA_PACKET_TRAP 0x7 +#define DMA_PACKET_CONSTANT_FILL 0xd +#define DMA_PACKET_NOP 0xf + + #define SRBM_STATUS 0x0E50 /* DCE 3.2 HDMI */ -- cgit v1.1 From 233d1ad59a2895e348259bb6f9f4528a75ea7752 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Dec 2012 15:25:59 -0500 Subject: drm/radeon/kms: Add initial support for async DMA on evergreen Pretty similar to 6xx/7xx except the count field increased in the packet header and the max IB size increased. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen.c | 181 ++++++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/evergreend.h | 29 ++++++ drivers/gpu/drm/radeon/radeon_asic.c | 39 ++++++-- drivers/gpu/drm/radeon/radeon_asic.h | 8 ++ 4 files changed, 248 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 78de2e4..68206df 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2034,6 +2034,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) WREG32(GB_ADDR_CONFIG, gb_addr_config); WREG32(DMIF_ADDR_CONFIG, gb_addr_config); WREG32(HDP_ADDR_CONFIG, gb_addr_config); + WREG32(DMA_TILING_CONFIG, gb_addr_config); tmp = gb_addr_config & NUM_PIPES_MASK; tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends, @@ -2405,6 +2406,8 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev) cayman_cp_int_cntl_setup(rdev, 2, 0); } else WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE; + WREG32(DMA_CNTL, tmp); WREG32(GRBM_INT_CNTL, 0); WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); @@ -2457,6 +2460,7 @@ int evergreen_irq_set(struct radeon_device *rdev) u32 grbm_int_cntl = 0; u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; + u32 dma_cntl; if (!rdev->irq.installed) { WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); @@ -2484,6 +2488,8 @@ int evergreen_irq_set(struct radeon_device *rdev) afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; + dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE; + if (rdev->family >= CHIP_CAYMAN) { /* enable CP interrupts on all rings */ if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { @@ -2506,6 +2512,11 @@ int evergreen_irq_set(struct radeon_device *rdev) } } + if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { + DRM_DEBUG("r600_irq_set: sw int dma\n"); + dma_cntl |= TRAP_ENABLE; + } + if (rdev->irq.crtc_vblank_int[0] || atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("evergreen_irq_set: vblank 0\n"); @@ -2591,6 +2602,9 @@ int evergreen_irq_set(struct radeon_device *rdev) cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2); } else WREG32(CP_INT_CNTL, cp_int_cntl); + + WREG32(DMA_CNTL, dma_cntl); + WREG32(GRBM_INT_CNTL, grbm_int_cntl); WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); @@ -3126,6 +3140,10 @@ restart_ih: } else radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); break; + case 224: /* DMA trap event */ + DRM_DEBUG("IH: DMA trap\n"); + radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); + break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); break; @@ -3154,6 +3172,143 @@ restart_ih: return IRQ_HANDLED; } +/** + * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring + * + * @rdev: radeon_device pointer + * @fence: radeon fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (evergreen-SI). + */ +void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + /* write the fence */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); + radeon_ring_write(ring, addr & 0xfffffffc); + radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); + radeon_ring_write(ring, fence->seq); + /* generate an interrupt */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); + /* flush HDP */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | HDP_MEM_COHERENCY_FLUSH_CNTL); + radeon_ring_write(ring, 1); +} + +/** + * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (evergreen). + */ +void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 4; + while ((next_rptr & 7) != 5) + next_rptr++; + next_rptr += 3; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); + radeon_ring_write(ring, next_rptr); + } + + /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. + * Pad as necessary with NOPs. + */ + while ((ring->wptr & 7) != 5) + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); + radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); + radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + +} + +/** + * evergreen_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (evergreen-cayman). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int evergreen_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_dw, cur_size_in_dw; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; + num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_dw = size_in_dw; + if (cur_size_in_dw > 0xFFFFF) + cur_size_in_dw = 0xFFFFF; + size_in_dw -= cur_size_in_dw; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); + radeon_ring_write(ring, dst_offset & 0xfffffffc); + radeon_ring_write(ring, src_offset & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_dw * 4; + dst_offset += cur_size_in_dw * 4; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + static int evergreen_startup(struct radeon_device *rdev) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; @@ -3217,6 +3372,12 @@ static int evergreen_startup(struct radeon_device *rdev) return r; } + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -3231,12 +3392,23 @@ static int evergreen_startup(struct radeon_device *rdev) 0, 0xfffff, RADEON_CP_PACKET2); if (r) return r; + + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, + DMA_RB_RPTR, DMA_RB_WPTR, + 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + if (r) + return r; + r = evergreen_cp_load_microcode(rdev); if (r) return r; r = evergreen_cp_resume(rdev); if (r) return r; + r = r600_dma_resume(rdev); + if (r) + return r; r = radeon_ib_pool_init(rdev); if (r) { @@ -3283,11 +3455,9 @@ int evergreen_resume(struct radeon_device *rdev) int evergreen_suspend(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - r600_audio_fini(rdev); r700_cp_stop(rdev); - ring->ready = false; + r600_dma_stop(rdev); evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); evergreen_pcie_gart_disable(rdev); @@ -3364,6 +3534,9 @@ int evergreen_init(struct radeon_device *rdev) rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); + rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -3376,6 +3549,7 @@ int evergreen_init(struct radeon_device *rdev) if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r700_cp_fini(rdev); + r600_dma_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); @@ -3403,6 +3577,7 @@ void evergreen_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r600_blit_fini(rdev); r700_cp_fini(rdev); + r600_dma_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index cae7ab4..92d1f45 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -905,6 +905,35 @@ # define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) # define DC_HPDx_EN (1 << 28) +/* ASYNC DMA */ +#define DMA_RB_RPTR 0xd008 +#define DMA_RB_WPTR 0xd00c + +#define DMA_CNTL 0xd02c +# define TRAP_ENABLE (1 << 0) +# define SEM_INCOMPLETE_INT_ENABLE (1 << 1) +# define SEM_WAIT_INT_ENABLE (1 << 2) +# define DATA_SWAP_ENABLE (1 << 3) +# define FENCE_SWAP_ENABLE (1 << 4) +# define CTXEMPTY_INT_ENABLE (1 << 28) +#define DMA_TILING_CONFIG 0xD0B8 + +/* async DMA packets */ +#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ + (((t) & 0x1) << 23) | \ + (((s) & 0x1) << 22) | \ + (((n) & 0xFFFFF) << 0)) +/* async DMA Packet types */ +#define DMA_PACKET_WRITE 0x2 +#define DMA_PACKET_COPY 0x3 +#define DMA_PACKET_INDIRECT_BUFFER 0x4 +#define DMA_PACKET_SEMAPHORE 0x5 +#define DMA_PACKET_FENCE 0x6 +#define DMA_PACKET_TRAP 0x7 +#define DMA_PACKET_SRBM_WRITE 0x9 +#define DMA_PACKET_CONSTANT_FILL 0xd +#define DMA_PACKET_NOP 0xf + /* PCIE link stuff */ #define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */ #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 3cf9b29..1dd8d92 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1199,6 +1199,15 @@ static struct radeon_asic evergreen_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + }, + [R600_RING_TYPE_DMA_INDEX] = { + .ib_execute = &evergreen_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &r600_dma_is_lockup, } }, .irq = { @@ -1215,8 +1224,8 @@ static struct radeon_asic evergreen_asic = { .copy = { .blit = &r600_copy_blit, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = NULL, - .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &evergreen_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, .copy = &r600_copy_blit, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, @@ -1275,6 +1284,15 @@ static struct radeon_asic sumo_asic = { .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, }, + [R600_RING_TYPE_DMA_INDEX] = { + .ib_execute = &evergreen_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &r600_dma_is_lockup, + } }, .irq = { .set = &evergreen_irq_set, @@ -1290,8 +1308,8 @@ static struct radeon_asic sumo_asic = { .copy = { .blit = &r600_copy_blit, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = NULL, - .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &evergreen_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, .copy = &r600_copy_blit, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, @@ -1349,6 +1367,15 @@ static struct radeon_asic btc_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + }, + [R600_RING_TYPE_DMA_INDEX] = { + .ib_execute = &evergreen_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &r600_dma_is_lockup, } }, .irq = { @@ -1365,8 +1392,8 @@ static struct radeon_asic btc_asic = { .copy = { .blit = &r600_copy_blit, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = NULL, - .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &evergreen_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, .copy = &r600_copy_blit, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 70a5b1f..7a2705d 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -441,6 +441,14 @@ extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc); void evergreen_disable_interrupt_state(struct radeon_device *rdev); int evergreen_blit_init(struct radeon_device *rdev); int evergreen_mc_wait_for_idle(struct radeon_device *rdev); +void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +void evergreen_dma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib); +int evergreen_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence); /* * cayman -- cgit v1.1 From f60cbd117a416830d5a7effc208eab8470a19167 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Dec 2012 15:27:33 -0500 Subject: drm/radeon/kms: Add initial support for async DMA on cayman/TN There are 2 async DMA engines on cayman, one at 0xd000 and one at 0xd800. The programming interface is the same as evergreen however there are some changes to the commands for using vmids. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen.c | 21 ++- drivers/gpu/drm/radeon/evergreend.h | 2 + drivers/gpu/drm/radeon/ni.c | 272 ++++++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/nid.h | 75 ++++++++++ drivers/gpu/drm/radeon/radeon.h | 5 +- drivers/gpu/drm/radeon/radeon_asic.c | 48 ++++++- drivers/gpu/drm/radeon/radeon_asic.h | 4 + 7 files changed, 420 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 68206df..c66251e4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2404,6 +2404,8 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev) CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); cayman_cp_int_cntl_setup(rdev, 1, 0); cayman_cp_int_cntl_setup(rdev, 2, 0); + tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE; + WREG32(CAYMAN_DMA1_CNTL, tmp); } else WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE; @@ -2460,7 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev) u32 grbm_int_cntl = 0; u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; - u32 dma_cntl; + u32 dma_cntl, dma_cntl1 = 0; if (!rdev->irq.installed) { WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); @@ -2517,6 +2519,14 @@ int evergreen_irq_set(struct radeon_device *rdev) dma_cntl |= TRAP_ENABLE; } + if (rdev->family >= CHIP_CAYMAN) { + dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE; + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { + DRM_DEBUG("r600_irq_set: sw int dma1\n"); + dma_cntl1 |= TRAP_ENABLE; + } + } + if (rdev->irq.crtc_vblank_int[0] || atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("evergreen_irq_set: vblank 0\n"); @@ -2605,6 +2615,9 @@ int evergreen_irq_set(struct radeon_device *rdev) WREG32(DMA_CNTL, dma_cntl); + if (rdev->family >= CHIP_CAYMAN) + WREG32(CAYMAN_DMA1_CNTL, dma_cntl1); + WREG32(GRBM_INT_CNTL, grbm_int_cntl); WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); @@ -3147,6 +3160,12 @@ restart_ih: case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); break; + case 244: /* DMA trap event */ + if (rdev->family >= CHIP_CAYMAN) { + DRM_DEBUG("IH: DMA1 trap\n"); + radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); + } + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); break; diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 92d1f45..7b4a650 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -918,6 +918,8 @@ # define CTXEMPTY_INT_ENABLE (1 << 28) #define DMA_TILING_CONFIG 0xD0B8 +#define CAYMAN_DMA1_CNTL 0xd82c + /* async DMA packets */ #define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ (((t) & 0x1) << 23) | \ diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 30c18a6..b81aca4 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev) WREG32(GB_ADDR_CONFIG, gb_addr_config); WREG32(DMIF_ADDR_CONFIG, gb_addr_config); WREG32(HDP_ADDR_CONFIG, gb_addr_config); + WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config); + WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config); tmp = gb_addr_config & NUM_PIPES_MASK; tmp = r6xx_remap_render_backend(rdev, tmp, @@ -915,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable) radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); WREG32(SCRATCH_UMSK, 0); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; } } @@ -1128,6 +1131,181 @@ static int cayman_cp_resume(struct radeon_device *rdev) return 0; } +/* + * DMA + * Starting with R600, the GPU has an asynchronous + * DMA engine. The programming model is very similar + * to the 3D engine (ring buffer, IBs, etc.), but the + * DMA controller has it's own packet format that is + * different form the PM4 format used by the 3D engine. + * It supports copying data, writing embedded data, + * solid fills, and a number of other things. It also + * has support for tiling/detiling of buffers. + * Cayman and newer support two asynchronous DMA engines. + */ +/** + * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (cayman-SI). + */ +void cayman_dma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (rdev->wb.enabled) { + u32 next_rptr = ring->wptr + 4; + while ((next_rptr & 7) != 5) + next_rptr++; + next_rptr += 3; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); + radeon_ring_write(ring, next_rptr); + } + + /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. + * Pad as necessary with NOPs. + */ + while ((ring->wptr & 7) != 5) + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); + radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); + radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + +} + +/** + * cayman_dma_stop - stop the async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the async dma engines (cayman-SI). + */ +void cayman_dma_stop(struct radeon_device *rdev) +{ + u32 rb_cntl; + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + + /* dma0 */ + rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); + rb_cntl &= ~DMA_RB_ENABLE; + WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); + + /* dma1 */ + rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); + rb_cntl &= ~DMA_RB_ENABLE; + WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); + + rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; +} + +/** + * cayman_dma_resume - setup and start the async dma engines + * + * @rdev: radeon_device pointer + * + * Set up the DMA ring buffers and enable them. (cayman-SI). + * Returns 0 for success, error for failure. + */ +int cayman_dma_resume(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + u32 rb_cntl, dma_cntl; + u32 rb_bufsz; + u32 reg_offset, wb_offset; + int i, r; + + /* Reset dma */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); + RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + + for (i = 0; i < 2; i++) { + if (i == 0) { + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + reg_offset = DMA0_REGISTER_OFFSET; + wb_offset = R600_WB_DMA_RPTR_OFFSET; + } else { + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + reg_offset = DMA1_REGISTER_OFFSET; + wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; + } + + WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); + WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = drm_order(ring->ring_size / 4); + rb_cntl = rb_bufsz << 1; +#ifdef __BIG_ENDIAN + rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; +#endif + WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(DMA_RB_RPTR + reg_offset, 0); + WREG32(DMA_RB_WPTR + reg_offset, 0); + + /* set the wb address whether it's enabled or not */ + WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, + upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); + WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, + ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); + + if (rdev->wb.enabled) + rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + + WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); + + /* enable DMA IBs */ + WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE); + + dma_cntl = RREG32(DMA_CNTL + reg_offset); + dma_cntl &= ~CTXEMPTY_INT_ENABLE; + WREG32(DMA_CNTL + reg_offset, dma_cntl); + + ring->wptr = 0; + WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); + + ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; + + WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); + + ring->ready = true; + + r = radeon_ring_test(rdev, ring->idx, ring); + if (r) { + ring->ready = false; + return r; + } + } + + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + + return 0; +} + +/** + * cayman_dma_fini - tear down the async dma engines + * + * @rdev: radeon_device pointer + * + * Stop the async dma engines and free the rings (cayman-SI). + */ +void cayman_dma_fini(struct radeon_device *rdev) +{ + cayman_dma_stop(rdev); + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); + radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); +} + static int cayman_gpu_soft_reset(struct radeon_device *rdev) { struct evergreen_mc_save save; @@ -1218,6 +1396,32 @@ int cayman_asic_reset(struct radeon_device *rdev) return cayman_gpu_soft_reset(rdev); } +/** + * cayman_dma_is_lockup - Check if the DMA engine is locked up + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if the async DMA engine is locked up (cayman-SI). + * Returns true if the engine appears to be locked up, false if not. + */ +bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 dma_status_reg; + + if (ring->idx == R600_RING_TYPE_DMA_INDEX) + dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET); + else + dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET); + if (dma_status_reg & DMA_IDLE) { + radeon_ring_lockup_update(ring); + return false; + } + /* force ring activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} + static int cayman_startup(struct radeon_device *rdev) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; @@ -1299,6 +1503,18 @@ static int cayman_startup(struct radeon_device *rdev) return r; } + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); + return r; + } + + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -1313,6 +1529,23 @@ static int cayman_startup(struct radeon_device *rdev) 0, 0xfffff, RADEON_CP_PACKET2); if (r) return r; + + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, + DMA_RB_RPTR + DMA0_REGISTER_OFFSET, + DMA_RB_WPTR + DMA0_REGISTER_OFFSET, + 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, + DMA_RB_RPTR + DMA1_REGISTER_OFFSET, + DMA_RB_WPTR + DMA1_REGISTER_OFFSET, + 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); + if (r) + return r; + r = cayman_cp_load_microcode(rdev); if (r) return r; @@ -1320,6 +1553,10 @@ static int cayman_startup(struct radeon_device *rdev) if (r) return r; + r = cayman_dma_resume(rdev); + if (r) + return r; + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -1364,7 +1601,7 @@ int cayman_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); cayman_cp_enable(rdev, false); - rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + cayman_dma_stop(rdev); evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); cayman_pcie_gart_disable(rdev); @@ -1431,6 +1668,14 @@ int cayman_init(struct radeon_device *rdev) ring->ring_obj = NULL; r600_ring_init(rdev, ring, 1024 * 1024); + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 64 * 1024); + + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 64 * 1024); + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -1443,6 +1688,7 @@ int cayman_init(struct radeon_device *rdev) if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); cayman_cp_fini(rdev); + cayman_dma_fini(rdev); r600_irq_fini(rdev); if (rdev->flags & RADEON_IS_IGP) si_rlc_fini(rdev); @@ -1473,6 +1719,7 @@ void cayman_fini(struct radeon_device *rdev) { r600_blit_fini(rdev); cayman_cp_fini(rdev); + cayman_dma_fini(rdev); r600_irq_fini(rdev); if (rdev->flags & RADEON_IS_IGP) si_rlc_fini(rdev); @@ -1606,3 +1853,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); } + +void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + + if (vm == NULL) + return; + + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); + radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + + /* flush hdp cache */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); + radeon_ring_write(ring, 1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); + radeon_ring_write(ring, 1 << vm->id); +} + diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index f5e54a7..b93186b 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -50,6 +50,24 @@ #define VMID(x) (((x) & 0x7) << 0) #define SRBM_STATUS 0x0E50 +#define SRBM_SOFT_RESET 0x0E60 +#define SOFT_RESET_BIF (1 << 1) +#define SOFT_RESET_CG (1 << 2) +#define SOFT_RESET_DC (1 << 5) +#define SOFT_RESET_DMA1 (1 << 6) +#define SOFT_RESET_GRBM (1 << 8) +#define SOFT_RESET_HDP (1 << 9) +#define SOFT_RESET_IH (1 << 10) +#define SOFT_RESET_MC (1 << 11) +#define SOFT_RESET_RLC (1 << 13) +#define SOFT_RESET_ROM (1 << 14) +#define SOFT_RESET_SEM (1 << 15) +#define SOFT_RESET_VMC (1 << 17) +#define SOFT_RESET_DMA (1 << 20) +#define SOFT_RESET_TST (1 << 21) +#define SOFT_RESET_REGBB (1 << 22) +#define SOFT_RESET_ORB (1 << 23) + #define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 #define REQUEST_TYPE(x) (((x) & 0xf) << 0) #define RESPONSE_TYPE_MASK 0x000000F0 @@ -599,5 +617,62 @@ #define PACKET3_SET_APPEND_CNT 0x75 #define PACKET3_ME_WRITE 0x7A +/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */ +#define DMA0_REGISTER_OFFSET 0x0 /* not a register */ +#define DMA1_REGISTER_OFFSET 0x800 /* not a register */ + +#define DMA_RB_CNTL 0xd000 +# define DMA_RB_ENABLE (1 << 0) +# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ +# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ +# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) +# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ +# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ +#define DMA_RB_BASE 0xd004 +#define DMA_RB_RPTR 0xd008 +#define DMA_RB_WPTR 0xd00c + +#define DMA_RB_RPTR_ADDR_HI 0xd01c +#define DMA_RB_RPTR_ADDR_LO 0xd020 + +#define DMA_IB_CNTL 0xd024 +# define DMA_IB_ENABLE (1 << 0) +# define DMA_IB_SWAP_ENABLE (1 << 4) +# define CMD_VMID_FORCE (1 << 31) +#define DMA_IB_RPTR 0xd028 +#define DMA_CNTL 0xd02c +# define TRAP_ENABLE (1 << 0) +# define SEM_INCOMPLETE_INT_ENABLE (1 << 1) +# define SEM_WAIT_INT_ENABLE (1 << 2) +# define DATA_SWAP_ENABLE (1 << 3) +# define FENCE_SWAP_ENABLE (1 << 4) +# define CTXEMPTY_INT_ENABLE (1 << 28) +#define DMA_STATUS_REG 0xd034 +# define DMA_IDLE (1 << 0) +#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044 +#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048 +#define DMA_TILING_CONFIG 0xd0b8 +#define DMA_MODE 0xd0bc + +#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ + (((t) & 0x1) << 23) | \ + (((s) & 0x1) << 22) | \ + (((n) & 0xFFFFF) << 0)) + +#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \ + (((vmid) & 0xF) << 20) | \ + (((n) & 0xFFFFF) << 0)) + +/* async DMA Packet types */ +#define DMA_PACKET_WRITE 0x2 +#define DMA_PACKET_COPY 0x3 +#define DMA_PACKET_INDIRECT_BUFFER 0x4 +#define DMA_PACKET_SEMAPHORE 0x5 +#define DMA_PACKET_FENCE 0x6 +#define DMA_PACKET_TRAP 0x7 +#define DMA_PACKET_SRBM_WRITE 0x9 +#define DMA_PACKET_CONSTANT_FILL 0xd +#define DMA_PACKET_NOP 0xf + #endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 461bf53..38b6fa3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -109,7 +109,7 @@ extern int radeon_lockup_timeout; #define RADEON_BIOS_NUM_SCRATCH 8 /* max number of rings */ -#define RADEON_NUM_RINGS 4 +#define RADEON_NUM_RINGS 5 /* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL @@ -124,6 +124,8 @@ extern int radeon_lockup_timeout; /* R600+ has an async dma ring */ #define R600_RING_TYPE_DMA_INDEX 3 +/* cayman add a second async dma ring */ +#define CAYMAN_RING_TYPE_DMA1_INDEX 4 /* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) @@ -893,6 +895,7 @@ struct radeon_wb { #define RADEON_WB_CP2_RPTR_OFFSET 1536 #define R600_WB_DMA_RPTR_OFFSET 1792 #define R600_WB_IH_WPTR_OFFSET 2048 +#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 #define R600_WB_EVENT_OFFSET 3072 /** diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 1dd8d92..8cf8ae8 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1481,6 +1481,26 @@ static struct radeon_asic cayman_asic = { .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, .vm_flush = &cayman_vm_flush, + }, + [R600_RING_TYPE_DMA_INDEX] = { + .ib_execute = &cayman_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &cayman_dma_is_lockup, + .vm_flush = &cayman_dma_vm_flush, + }, + [CAYMAN_RING_TYPE_DMA1_INDEX] = { + .ib_execute = &cayman_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &cayman_dma_is_lockup, + .vm_flush = &cayman_dma_vm_flush, } }, .irq = { @@ -1497,8 +1517,8 @@ static struct radeon_asic cayman_asic = { .copy = { .blit = &r600_copy_blit, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = NULL, - .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &evergreen_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, .copy = &r600_copy_blit, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, @@ -1586,6 +1606,26 @@ static struct radeon_asic trinity_asic = { .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, .vm_flush = &cayman_vm_flush, + }, + [R600_RING_TYPE_DMA_INDEX] = { + .ib_execute = &cayman_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &cayman_dma_is_lockup, + .vm_flush = &cayman_dma_vm_flush, + }, + [CAYMAN_RING_TYPE_DMA1_INDEX] = { + .ib_execute = &cayman_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &cayman_dma_is_lockup, + .vm_flush = &cayman_dma_vm_flush, } }, .irq = { @@ -1602,8 +1642,8 @@ static struct radeon_asic trinity_asic = { .copy = { .blit = &r600_copy_blit, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = NULL, - .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &evergreen_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, .copy = &r600_copy_blit, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 7a2705d..c2988f7 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -470,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); +void cayman_dma_ring_ib_execute(struct radeon_device *rdev, + struct radeon_ib *ib); +bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); +void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); /* DCE6 - SI */ void dce6_bandwidth_update(struct radeon_device *rdev); -- cgit v1.1 From 8c5fd7efcc5c037bdfbf6d90639fcdc499824bbd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Dec 2012 15:28:18 -0500 Subject: drm/radeon/kms: Add initial support for async DMA on SI Pretty much the same as cayman. Some changes to the copy packets. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 4 + drivers/gpu/drm/radeon/radeon_asic.c | 24 ++++- drivers/gpu/drm/radeon/radeon_asic.h | 5 + drivers/gpu/drm/radeon/si.c | 179 ++++++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/sid.h | 47 +++++++++ 5 files changed, 254 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 38b6fa3..5d68346 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -797,6 +797,10 @@ void r600_dma_stop(struct radeon_device *rdev); int r600_dma_resume(struct radeon_device *rdev); void r600_dma_fini(struct radeon_device *rdev); +void cayman_dma_stop(struct radeon_device *rdev); +int cayman_dma_resume(struct radeon_device *rdev); +void cayman_dma_fini(struct radeon_device *rdev); + /* * CS. */ diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 8cf8ae8..d455bcb 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1731,6 +1731,26 @@ static struct radeon_asic si_asic = { .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, .vm_flush = &si_vm_flush, + }, + [R600_RING_TYPE_DMA_INDEX] = { + .ib_execute = &cayman_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &cayman_dma_is_lockup, + .vm_flush = &si_dma_vm_flush, + }, + [CAYMAN_RING_TYPE_DMA1_INDEX] = { + .ib_execute = &cayman_dma_ring_ib_execute, + .emit_fence = &evergreen_dma_fence_ring_emit, + .emit_semaphore = &r600_dma_semaphore_ring_emit, + .cs_parse = NULL, + .ring_test = &r600_dma_ring_test, + .ib_test = &r600_dma_ib_test, + .is_lockup = &cayman_dma_is_lockup, + .vm_flush = &si_dma_vm_flush, } }, .irq = { @@ -1747,8 +1767,8 @@ static struct radeon_asic si_asic = { .copy = { .blit = NULL, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, - .dma = NULL, - .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &si_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, .copy = NULL, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, }, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index c2988f7..ae56673 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -501,5 +501,10 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); uint64_t si_get_gpu_clock(struct radeon_device *rdev); +int si_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence); +void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); #endif diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index c4d9eb6..93f7171 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -1660,6 +1660,8 @@ static void si_gpu_init(struct radeon_device *rdev) WREG32(GB_ADDR_CONFIG, gb_addr_config); WREG32(DMIF_ADDR_CONFIG, gb_addr_config); WREG32(HDP_ADDR_CONFIG, gb_addr_config); + WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config); + WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config); si_tiling_mode_table_init(rdev); @@ -1836,6 +1838,9 @@ static void si_cp_enable(struct radeon_device *rdev, bool enable) radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); WREG32(SCRATCH_UMSK, 0); + rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; } udelay(50); } @@ -2891,6 +2896,32 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 0x0); } +void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +{ + struct radeon_ring *ring = &rdev->ring[ridx]; + + if (vm == NULL) + return; + + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + if (vm->id < 8) { + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); + } else { + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2)); + } + radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + + /* flush hdp cache */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); + radeon_ring_write(ring, 1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); + radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); + radeon_ring_write(ring, 1 << vm->id); +} + /* * RLC */ @@ -3059,6 +3090,10 @@ static void si_disable_interrupt_state(struct radeon_device *rdev) WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); WREG32(CP_INT_CNTL_RING1, 0); WREG32(CP_INT_CNTL_RING2, 0); + tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; + WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp); + tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; + WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp); WREG32(GRBM_INT_CNTL, 0); WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); @@ -3178,6 +3213,7 @@ int si_irq_set(struct radeon_device *rdev) u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; u32 grbm_int_cntl = 0; u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; + u32 dma_cntl, dma_cntl1; if (!rdev->irq.installed) { WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); @@ -3198,6 +3234,9 @@ int si_irq_set(struct radeon_device *rdev) hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; + dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; + dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; + /* enable CP interrupts on all rings */ if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("si_irq_set: sw int gfx\n"); @@ -3211,6 +3250,15 @@ int si_irq_set(struct radeon_device *rdev) DRM_DEBUG("si_irq_set: sw int cp2\n"); cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; } + if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { + DRM_DEBUG("si_irq_set: sw int dma\n"); + dma_cntl |= TRAP_ENABLE; + } + + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { + DRM_DEBUG("si_irq_set: sw int dma1\n"); + dma_cntl1 |= TRAP_ENABLE; + } if (rdev->irq.crtc_vblank_int[0] || atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("si_irq_set: vblank 0\n"); @@ -3270,6 +3318,9 @@ int si_irq_set(struct radeon_device *rdev) WREG32(CP_INT_CNTL_RING1, cp_int_cntl1); WREG32(CP_INT_CNTL_RING2, cp_int_cntl2); + WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl); + WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1); + WREG32(GRBM_INT_CNTL, grbm_int_cntl); WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); @@ -3728,9 +3779,17 @@ restart_ih: break; } break; + case 224: /* DMA trap event */ + DRM_DEBUG("IH: DMA trap\n"); + radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); + break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); break; + case 244: /* DMA trap event */ + DRM_DEBUG("IH: DMA1 trap\n"); + radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); break; @@ -3754,6 +3813,80 @@ restart_ih: return IRQ_HANDLED; } +/** + * si_copy_dma - copy pages using the DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the DMA engine (SI). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int si_copy_dma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.dma_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_bytes, cur_size_in_bytes; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); + num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff); + r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_bytes = size_in_bytes; + if (cur_size_in_bytes > 0xFFFFF) + cur_size_in_bytes = 0xFFFFF; + size_in_bytes -= cur_size_in_bytes; + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes)); + radeon_ring_write(ring, dst_offset & 0xffffffff); + radeon_ring_write(ring, src_offset & 0xffffffff); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff); + src_offset += cur_size_in_bytes; + dst_offset += cur_size_in_bytes; + } + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + /* * startup/shutdown callbacks */ @@ -3825,6 +3958,18 @@ static int si_startup(struct radeon_device *rdev) return r; } + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); + return r; + } + + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); + return r; + } + /* Enable IRQ */ r = si_irq_init(rdev); if (r) { @@ -3855,6 +4000,22 @@ static int si_startup(struct radeon_device *rdev) if (r) return r; + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, + DMA_RB_RPTR + DMA0_REGISTER_OFFSET, + DMA_RB_WPTR + DMA0_REGISTER_OFFSET, + 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); + if (r) + return r; + + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, + DMA_RB_RPTR + DMA1_REGISTER_OFFSET, + DMA_RB_WPTR + DMA1_REGISTER_OFFSET, + 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); + if (r) + return r; + r = si_cp_load_microcode(rdev); if (r) return r; @@ -3862,6 +4023,10 @@ static int si_startup(struct radeon_device *rdev) if (r) return r; + r = cayman_dma_resume(rdev); + if (r) + return r; + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -3903,9 +4068,7 @@ int si_resume(struct radeon_device *rdev) int si_suspend(struct radeon_device *rdev) { si_cp_enable(rdev, false); - rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; - rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; - rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; + cayman_dma_stop(rdev); si_irq_suspend(rdev); radeon_wb_disable(rdev); si_pcie_gart_disable(rdev); @@ -3983,6 +4146,14 @@ int si_init(struct radeon_device *rdev) ring->ring_obj = NULL; r600_ring_init(rdev, ring, 1024 * 1024); + ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 64 * 1024); + + ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 64 * 1024); + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -3995,6 +4166,7 @@ int si_init(struct radeon_device *rdev) if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); si_cp_fini(rdev); + cayman_dma_fini(rdev); si_irq_fini(rdev); si_rlc_fini(rdev); radeon_wb_fini(rdev); @@ -4023,6 +4195,7 @@ void si_fini(struct radeon_device *rdev) r600_blit_fini(rdev); #endif si_cp_fini(rdev); + cayman_dma_fini(rdev); si_irq_fini(rdev); si_rlc_fini(rdev); radeon_wb_fini(rdev); diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 53b4d45..0acd327 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -936,4 +936,51 @@ #define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A #define PACKET3_SWITCH_BUFFER 0x8B +/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */ +#define DMA0_REGISTER_OFFSET 0x0 /* not a register */ +#define DMA1_REGISTER_OFFSET 0x800 /* not a register */ + +#define DMA_RB_CNTL 0xd000 +# define DMA_RB_ENABLE (1 << 0) +# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ +# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ +# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) +# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ +# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ +#define DMA_RB_BASE 0xd004 +#define DMA_RB_RPTR 0xd008 +#define DMA_RB_WPTR 0xd00c + +#define DMA_RB_RPTR_ADDR_HI 0xd01c +#define DMA_RB_RPTR_ADDR_LO 0xd020 + +#define DMA_IB_CNTL 0xd024 +# define DMA_IB_ENABLE (1 << 0) +# define DMA_IB_SWAP_ENABLE (1 << 4) +#define DMA_IB_RPTR 0xd028 +#define DMA_CNTL 0xd02c +# define TRAP_ENABLE (1 << 0) +# define SEM_INCOMPLETE_INT_ENABLE (1 << 1) +# define SEM_WAIT_INT_ENABLE (1 << 2) +# define DATA_SWAP_ENABLE (1 << 3) +# define FENCE_SWAP_ENABLE (1 << 4) +# define CTXEMPTY_INT_ENABLE (1 << 28) +#define DMA_TILING_CONFIG 0xd0b8 + +#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \ + (((b) & 0x1) << 26) | \ + (((t) & 0x1) << 23) | \ + (((s) & 0x1) << 22) | \ + (((n) & 0xFFFFF) << 0)) +/* async DMA Packet types */ +#define DMA_PACKET_WRITE 0x2 +#define DMA_PACKET_COPY 0x3 +#define DMA_PACKET_INDIRECT_BUFFER 0x4 +#define DMA_PACKET_SEMAPHORE 0x5 +#define DMA_PACKET_FENCE 0x6 +#define DMA_PACKET_TRAP 0x7 +#define DMA_PACKET_SRBM_WRITE 0x9 +#define DMA_PACKET_CONSTANT_FILL 0xd +#define DMA_PACKET_NOP 0xf + #endif -- cgit v1.1 From 009ee7a0d4520d7d7aa810ca3662c03580ceeaae Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 4 Jun 2012 18:45:15 -0400 Subject: drm/radeon/kms: add support for dma rings to radeon_test_moves() Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_test.c | 37 ++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 587c09a..fda09c9 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -26,16 +26,31 @@ #include "radeon_reg.h" #include "radeon.h" +#define RADEON_TEST_COPY_BLIT 1 +#define RADEON_TEST_COPY_DMA 0 + /* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ -void radeon_test_moves(struct radeon_device *rdev) +static void radeon_do_test_moves(struct radeon_device *rdev, int flag) { struct radeon_bo *vram_obj = NULL; struct radeon_bo **gtt_obj = NULL; struct radeon_fence *fence = NULL; uint64_t gtt_addr, vram_addr; unsigned i, n, size; - int r; + int r, ring; + + switch (flag) { + case RADEON_TEST_COPY_DMA: + ring = radeon_copy_dma_ring_index(rdev); + break; + case RADEON_TEST_COPY_BLIT: + ring = radeon_copy_blit_ring_index(rdev); + break; + default: + DRM_ERROR("Unknown copy method\n"); + return; + } size = 1024 * 1024; @@ -106,7 +121,10 @@ void radeon_test_moves(struct radeon_device *rdev) radeon_bo_kunmap(gtt_obj[i]); - r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); + if (ring == R600_RING_TYPE_DMA_INDEX) + r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); + else + r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_cleanup; @@ -149,7 +167,10 @@ void radeon_test_moves(struct radeon_device *rdev) radeon_bo_kunmap(vram_obj); - r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); + if (ring == R600_RING_TYPE_DMA_INDEX) + r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); + else + r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_cleanup; @@ -223,6 +244,14 @@ out_cleanup: } } +void radeon_test_moves(struct radeon_device *rdev) +{ + if (rdev->asic->copy.dma) + radeon_do_test_moves(rdev, RADEON_TEST_COPY_DMA); + if (rdev->asic->copy.blit) + radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT); +} + void radeon_test_ring_sync(struct radeon_device *rdev, struct radeon_ring *ringA, struct radeon_ring *ringB) -- cgit v1.1 From 2d6cc7296d4ee128ab0fa3b715f0afde511f49c2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 20 Jul 2012 13:49:49 -0400 Subject: drm/radeon: use async dma for ttm buffer moves on 6xx-SI Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_asic.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index d455bcb..8505060 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -974,8 +974,8 @@ static struct radeon_asic r600_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &r600_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_blit, - .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &r600_copy_dma, + .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, @@ -1058,8 +1058,8 @@ static struct radeon_asic rs780_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &r600_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_blit, - .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &r600_copy_dma, + .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, @@ -1142,8 +1142,8 @@ static struct radeon_asic rv770_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &r600_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_blit, - .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &r600_copy_dma, + .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, @@ -1226,8 +1226,8 @@ static struct radeon_asic evergreen_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_blit, - .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &evergreen_copy_dma, + .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, @@ -1310,8 +1310,8 @@ static struct radeon_asic sumo_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_blit, - .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &evergreen_copy_dma, + .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, @@ -1394,8 +1394,8 @@ static struct radeon_asic btc_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_blit, - .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &evergreen_copy_dma, + .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, @@ -1519,8 +1519,8 @@ static struct radeon_asic cayman_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_blit, - .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &evergreen_copy_dma, + .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, @@ -1644,8 +1644,8 @@ static struct radeon_asic trinity_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &evergreen_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = &r600_copy_blit, - .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &evergreen_copy_dma, + .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, @@ -1769,8 +1769,8 @@ static struct radeon_asic si_asic = { .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma = &si_copy_dma, .dma_ring_index = R600_RING_TYPE_DMA_INDEX, - .copy = NULL, - .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .copy = &si_copy_dma, + .copy_ring_index = R600_RING_TYPE_DMA_INDEX, }, .surface = { .set_reg = r600_set_surface_reg, -- cgit v1.1 From 3b6b59b610f0c0f351e68ec3eff9ab51ef75fb1a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 22 Oct 2012 12:19:01 -0400 Subject: drm/radeon: add dma engine support for vm pt updates on ni (v5) Async DMA has a special packet for contiguous pt updates which saves overhead. v2: leave the CP method enabled for now as doing the updates in the DMA rings is not working properly yet. v3: update for 2 level pts v4: rebase v5: drop pte/pde packet. doesn't seem to work on NI. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ni.c | 69 +++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index b81aca4..39e8be1 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1795,30 +1795,57 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, { struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); - - while (count) { - unsigned ndw = 1 + count * 2; - if (ndw > 0x3FFF) - ndw = 0x3FFF; - - radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); - for (; ndw > 1; ndw -= 2, --count, pe += 8) { - uint64_t value = 0; - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; + uint64_t value; + unsigned ndw; + + if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { + while (count) { + ndw = 1 + count * 2; + if (ndw > 0x3FFF) + ndw = 0x3FFF; + + radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); + radeon_ring_write(ring, pe); + radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + for (; ndw > 1; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } addr += incr; - - } else if (flags & RADEON_VM_PAGE_VALID) { - value = addr; + value |= r600_flags; + radeon_ring_write(ring, value); + radeon_ring_write(ring, upper_32_bits(value)); + } + } + } else { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw)); + radeon_ring_write(ring, pe); + radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + for (; ndw > 0; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } addr += incr; + value |= r600_flags; + radeon_ring_write(ring, value); + radeon_ring_write(ring, upper_32_bits(value)); } - - value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); } } } -- cgit v1.1 From 33e5467871b3007c4e6deea95b2cac38a55ff9f5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 22 Oct 2012 12:22:39 -0400 Subject: drm/radeon: use DMA engine for VM page table updates on cayman/TN DMA engine has special packets to facilitate this and it also keeps the 3D engine free for other things. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_asic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 8505060..8b8f218 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1445,7 +1445,7 @@ static struct radeon_asic cayman_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, - .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .pt_ring_index = R600_RING_TYPE_DMA_INDEX, .set_page = &cayman_vm_set_page, }, .ring = { @@ -1570,7 +1570,7 @@ static struct radeon_asic trinity_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, - .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .pt_ring_index = R600_RING_TYPE_DMA_INDEX, .set_page = &cayman_vm_set_page, }, .ring = { -- cgit v1.1 From deab48f140d28d788cb2b5705761a92b02e3440d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 22 Oct 2012 12:32:54 -0400 Subject: drm/radeon: add dma engine support for vm pt updates on si (v2) Async DMA has a special packet for contiguous pt updates which saves overhead. v2: rebase Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si.c | 104 +++++++++++++++++++++++++++++++++---------- drivers/gpu/drm/radeon/sid.h | 10 +++++ 2 files changed, 90 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 93f7171..f6e7815 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2825,30 +2825,86 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, { struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); - - while (count) { - unsigned ndw = 2 + count * 2; - if (ndw > 0x3FFE) - ndw = 0x3FFE; - - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(1))); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe)); - for (; ndw > 2; ndw -= 2, --count, pe += 8) { - uint64_t value; - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & RADEON_VM_PAGE_VALID) - value = addr; - else - value = 0; - addr += incr; - value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + uint64_t value; + unsigned ndw; + + if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { + while (count) { + ndw = 2 + count * 2; + if (ndw > 0x3FFE) + ndw = 0x3FFE; + + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(1))); + radeon_ring_write(ring, pe); + radeon_ring_write(ring, upper_32_bits(pe)); + for (; ndw > 2; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; + value |= r600_flags; + radeon_ring_write(ring, value); + radeon_ring_write(ring, upper_32_bits(value)); + } + } + } else { + /* DMA */ + if (flags & RADEON_VM_PAGE_SYSTEM) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw)); + radeon_ring_write(ring, pe); + radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + for (; ndw > 0; ndw -= 2, --count, pe += 8) { + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + } else { + value = 0; + } + addr += incr; + value |= r600_flags; + radeon_ring_write(ring, value); + radeon_ring_write(ring, upper_32_bits(value)); + } + } + } else { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + if (flags & RADEON_VM_PAGE_VALID) + value = addr; + else + value = 0; + /* for physically contiguous pages (vram) */ + radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw)); + radeon_ring_write(ring, pe); /* dst addr */ + radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + radeon_ring_write(ring, r600_flags); /* mask */ + radeon_ring_write(ring, 0); + radeon_ring_write(ring, value); /* value */ + radeon_ring_write(ring, upper_32_bits(value)); + radeon_ring_write(ring, incr); /* increment size */ + radeon_ring_write(ring, 0); + pe += ndw * 4; + addr += (ndw / 2) * incr; + count -= ndw / 2; + } } } } diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 0acd327..e153c25 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -972,6 +972,16 @@ (((t) & 0x1) << 23) | \ (((s) & 0x1) << 22) | \ (((n) & 0xFFFFF) << 0)) + +#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \ + (((vmid) & 0xF) << 20) | \ + (((n) & 0xFFFFF) << 0)) + +#define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \ + (1 << 26) | \ + (1 << 21) | \ + (((n) & 0xFFFFF) << 0)) + /* async DMA Packet types */ #define DMA_PACKET_WRITE 0x2 #define DMA_PACKET_COPY 0x3 -- cgit v1.1 From bf66a786c92488dfc99cc7f19bc9eda7b4c98fa6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 22 Oct 2012 12:34:39 -0400 Subject: drm/radeon: use DMA engine for VM page table updates on SI DMA engine has special packets to facilitate this and it also keeps the 3D engine free for other things. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_asic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 8b8f218..3ea0475 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1695,7 +1695,7 @@ static struct radeon_asic si_asic = { .vm = { .init = &si_vm_init, .fini = &si_vm_fini, - .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .pt_ring_index = R600_RING_TYPE_DMA_INDEX, .set_page = &si_vm_set_page, }, .ring = { -- cgit v1.1 From d025e9e2b890db679f1246037bf65bd4be512627 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 29 Nov 2012 10:35:41 -0500 Subject: drm/radeon: do not move bo to different placement at each cs The bo creation placement is where the bo will be. Instead of trying to move bo at each command stream let this work to another worker thread that will use more advance heuristic. agd5f: remove leftover unused variable Signed-off-by: Jerome Glisse Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_object.c | 18 ++++++++---------- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5d68346..1b9120a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -318,6 +318,7 @@ struct radeon_bo { struct list_head list; /* Protected by tbo.reserved */ u32 placements[3]; + u32 busy_placements[3]; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index bfb332e..93d3445 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -84,7 +84,6 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) rbo->placement.fpfn = 0; rbo->placement.lpfn = 0; rbo->placement.placement = rbo->placements; - rbo->placement.busy_placement = rbo->placements; if (domain & RADEON_GEM_DOMAIN_VRAM) rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; @@ -105,6 +104,14 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) if (!c) rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; rbo->placement.num_placement = c; + + c = 0; + rbo->placement.busy_placement = rbo->busy_placements; + if (rbo->rdev->flags & RADEON_IS_AGP) { + rbo->busy_placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT; + } else { + rbo->busy_placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; + } rbo->placement.num_busy_placement = c; } @@ -350,7 +357,6 @@ int radeon_bo_list_validate(struct list_head *head) { struct radeon_bo_list *lobj; struct radeon_bo *bo; - u32 domain; int r; r = ttm_eu_reserve_buffers(head); @@ -360,17 +366,9 @@ int radeon_bo_list_validate(struct list_head *head) list_for_each_entry(lobj, head, tv.head) { bo = lobj->bo; if (!bo->pin_count) { - domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain; - - retry: - radeon_ttm_placement_from_domain(bo, domain); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); if (unlikely(r)) { - if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) { - domain |= RADEON_GEM_DOMAIN_GTT; - goto retry; - } return r; } } -- cgit v1.1 From bd25f0783dc3fb72e1e2779c2b99b2d34b67fa8a Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 11 Dec 2012 11:56:52 -0500 Subject: drm/radeon: fix amd afusion gpu setup aka sumo v2 Set the proper number of tile pipe that should be a multiple of pipe depending on the number of se engine. Fix: https://bugs.freedesktop.org/show_bug.cgi?id=56405 https://bugs.freedesktop.org/show_bug.cgi?id=56720 v2: Don't change sumo2 Signed-off-by: Jerome Glisse Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen.c | 8 ++++---- drivers/gpu/drm/radeon/evergreend.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index c66251e4..8dbc69a 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1821,7 +1821,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) case CHIP_SUMO: rdev->config.evergreen.num_ses = 1; rdev->config.evergreen.max_pipes = 4; - rdev->config.evergreen.max_tile_pipes = 2; + rdev->config.evergreen.max_tile_pipes = 4; if (rdev->pdev->device == 0x9648) rdev->config.evergreen.max_simds = 3; else if ((rdev->pdev->device == 0x9647) || @@ -1844,7 +1844,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) rdev->config.evergreen.sc_prim_fifo_size = 0x40; rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30; rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130; - gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN; + gb_addr_config = SUMO_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_SUMO2: rdev->config.evergreen.num_ses = 1; @@ -1866,7 +1866,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) rdev->config.evergreen.sc_prim_fifo_size = 0x40; rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30; rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130; - gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN; + gb_addr_config = SUMO2_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_BARTS: rdev->config.evergreen.num_ses = 2; @@ -1914,7 +1914,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) break; case CHIP_CAICOS: rdev->config.evergreen.num_ses = 1; - rdev->config.evergreen.max_pipes = 4; + rdev->config.evergreen.max_pipes = 2; rdev->config.evergreen.max_tile_pipes = 2; rdev->config.evergreen.max_simds = 2; rdev->config.evergreen.max_backends = 1 * rdev->config.evergreen.num_ses; diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 7b4a650..a5f3d0d 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -45,6 +45,8 @@ #define TURKS_GB_ADDR_CONFIG_GOLDEN 0x02010002 #define CEDAR_GB_ADDR_CONFIG_GOLDEN 0x02010001 #define CAICOS_GB_ADDR_CONFIG_GOLDEN 0x02010001 +#define SUMO_GB_ADDR_CONFIG_GOLDEN 0x02010002 +#define SUMO2_GB_ADDR_CONFIG_GOLDEN 0x02010002 /* Registers */ -- cgit v1.1 From 6253e4c75d96006c06b9ac8f417eba873de2497b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Dec 2012 14:30:32 -0500 Subject: drm/radeon: improve mc_stop/mc_resume on r5xx-r7xx Along the same lines of what was done for evergreen+ in the last kernel. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_reg.h | 9 +++ drivers/gpu/drm/radeon/radeon_asic.h | 1 + drivers/gpu/drm/radeon/rv515.c | 122 ++++++++++++++++++++++++++++++----- 3 files changed, 116 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r600_reg.h b/drivers/gpu/drm/radeon/r600_reg.h index 2b960cb..909219b 100644 --- a/drivers/gpu/drm/radeon/r600_reg.h +++ b/drivers/gpu/drm/radeon/r600_reg.h @@ -96,6 +96,15 @@ #define R600_CONFIG_F0_BASE 0x542C #define R600_CONFIG_APER_SIZE 0x5430 +#define R600_BIF_FB_EN 0x5490 +#define R600_FB_READ_EN (1 << 0) +#define R600_FB_WRITE_EN (1 << 1) + +#define R600_CITF_CNTL 0x200c +#define R600_BLACKOUT_MASK 0x00000003 + +#define R700_MC_CITF_CNTL 0x25c0 + #define R600_ROM_CNTL 0x1600 # define R600_SCK_OVERWRITE (1 << 1) # define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28 diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index ae56673..c338931 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -263,6 +263,7 @@ extern int rs690_mc_wait_for_idle(struct radeon_device *rdev); struct rv515_mc_save { u32 vga_render_control; u32 vga_hdp_control; + bool crtc_enabled[2]; }; int rv515_init(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 785d095..2bb6d0e 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -40,6 +40,12 @@ static int rv515_debugfs_ga_info_init(struct radeon_device *rdev); static void rv515_gpu_init(struct radeon_device *rdev); int rv515_mc_wait_for_idle(struct radeon_device *rdev); +static const u32 crtc_offsets[2] = +{ + 0, + AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL +}; + void rv515_debugfs(struct radeon_device *rdev) { if (r100_debugfs_rbbm_init(rdev)) { @@ -281,30 +287,114 @@ static int rv515_debugfs_ga_info_init(struct radeon_device *rdev) void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save) { + u32 crtc_enabled, tmp, frame_count, blackout; + int i, j; + save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL); save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL); - /* Stop all video */ - WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0); + /* disable VGA render */ WREG32(R_000300_VGA_RENDER_CONTROL, 0); - WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1); - WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1); - WREG32(R_006080_D1CRTC_CONTROL, 0); - WREG32(R_006880_D2CRTC_CONTROL, 0); - WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0); - WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0); - WREG32(R_000330_D1VGA_CONTROL, 0); - WREG32(R_000338_D2VGA_CONTROL, 0); + /* blank the display controllers */ + for (i = 0; i < rdev->num_crtc; i++) { + crtc_enabled = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]) & AVIVO_CRTC_EN; + if (crtc_enabled) { + save->crtc_enabled[i] = true; + tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]); + if (!(tmp & AVIVO_CRTC_DISP_READ_REQUEST_DISABLE)) { + radeon_wait_for_vblank(rdev, i); + tmp |= AVIVO_CRTC_DISP_READ_REQUEST_DISABLE; + WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp); + } + /* wait for the next frame */ + frame_count = radeon_get_vblank_counter(rdev, i); + for (j = 0; j < rdev->usec_timeout; j++) { + if (radeon_get_vblank_counter(rdev, i) != frame_count) + break; + udelay(1); + } + } else { + save->crtc_enabled[i] = false; + } + } + + radeon_mc_wait_for_idle(rdev); + + if (rdev->family >= CHIP_R600) { + if (rdev->family >= CHIP_RV770) + blackout = RREG32(R700_MC_CITF_CNTL); + else + blackout = RREG32(R600_CITF_CNTL); + if ((blackout & R600_BLACKOUT_MASK) != R600_BLACKOUT_MASK) { + /* Block CPU access */ + WREG32(R600_BIF_FB_EN, 0); + /* blackout the MC */ + blackout |= R600_BLACKOUT_MASK; + if (rdev->family >= CHIP_RV770) + WREG32(R700_MC_CITF_CNTL, blackout); + else + WREG32(R600_CITF_CNTL, blackout); + } + } } void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save) { - WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start); - WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start); - WREG32(R_006910_D2GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start); - WREG32(R_006918_D2GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start); - WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start); - /* Unlock host access */ + u32 tmp, frame_count; + int i, j; + + /* update crtc base addresses */ + for (i = 0; i < rdev->num_crtc; i++) { + if (rdev->family >= CHIP_RV770) { + if (i == 1) { + WREG32(R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, + upper_32_bits(rdev->mc.vram_start)); + WREG32(R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, + upper_32_bits(rdev->mc.vram_start)); + } else { + WREG32(R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, + upper_32_bits(rdev->mc.vram_start)); + WREG32(R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, + upper_32_bits(rdev->mc.vram_start)); + } + } + WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], + (u32)rdev->mc.vram_start); + WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i], + (u32)rdev->mc.vram_start); + } + WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, (u32)rdev->mc.vram_start); + + if (rdev->family >= CHIP_R600) { + /* unblackout the MC */ + if (rdev->family >= CHIP_RV770) + tmp = RREG32(R700_MC_CITF_CNTL); + else + tmp = RREG32(R600_CITF_CNTL); + tmp &= ~R600_BLACKOUT_MASK; + if (rdev->family >= CHIP_RV770) + WREG32(R700_MC_CITF_CNTL, tmp); + else + WREG32(R600_CITF_CNTL, tmp); + /* allow CPU access */ + WREG32(R600_BIF_FB_EN, R600_FB_READ_EN | R600_FB_WRITE_EN); + } + + for (i = 0; i < rdev->num_crtc; i++) { + if (save->crtc_enabled[i]) { + tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]); + tmp &= ~AVIVO_CRTC_DISP_READ_REQUEST_DISABLE; + WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp); + /* wait for the next frame */ + frame_count = radeon_get_vblank_counter(rdev, i); + for (j = 0; j < rdev->usec_timeout; j++) { + if (radeon_get_vblank_counter(rdev, i) != frame_count) + break; + udelay(1); + } + } + } + /* Unlock vga access */ WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control); mdelay(1); WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control); -- cgit v1.1 From b997a8ba26377895506a26a3f2b8f1e7abc4ed22 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Dec 2012 18:07:25 -0500 Subject: drm/radeon: add register headers for CP DMA on r6xx-SI Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreend.h | 47 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/r600d.h | 32 +++++++++++++++++++++++++ drivers/gpu/drm/radeon/sid.h | 48 +++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index a5f3d0d..cb9baaa 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -1035,6 +1035,53 @@ #define PACKET3_WAIT_REG_MEM 0x3C #define PACKET3_MEM_WRITE 0x3D #define PACKET3_INDIRECT_BUFFER 0x32 +#define PACKET3_CP_DMA 0x41 +/* 1. header + * 2. SRC_ADDR_LO or DATA [31:0] + * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | + * SRC_ADDR_HI [7:0] + * 4. DST_ADDR_LO [31:0] + * 5. DST_ADDR_HI [7:0] + * 6. COMMAND [29:22] | BYTE_COUNT [20:0] + */ +# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) + /* 0 - SRC_ADDR + * 1 - GDS + */ +# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) + /* 0 - ME + * 1 - PFP + */ +# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29) + /* 0 - SRC_ADDR + * 1 - GDS + * 2 - DATA + */ +# define PACKET3_CP_DMA_CP_SYNC (1 << 31) +/* COMMAND */ +# define PACKET3_CP_DMA_DIS_WC (1 << 21) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) + /* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) + /* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +# define PACKET3_CP_DMA_CMD_SAS (1 << 26) + /* 0 - memory + * 1 - register + */ +# define PACKET3_CP_DMA_CMD_DAS (1 << 27) + /* 0 - memory + * 1 - register + */ +# define PACKET3_CP_DMA_CMD_SAIC (1 << 28) +# define PACKET3_CP_DMA_CMD_DAIC (1 << 29) #define PACKET3_SURFACE_SYNC 0x43 # define PACKET3_CB0_DEST_BASE_ENA (1 << 6) # define PACKET3_CB1_DEST_BASE_ENA (1 << 7) diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index a596c55..4a53402 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1186,6 +1186,38 @@ #define PACKET3_WAIT_REG_MEM 0x3C #define PACKET3_MEM_WRITE 0x3D #define PACKET3_INDIRECT_BUFFER 0x32 +#define PACKET3_CP_DMA 0x41 +/* 1. header + * 2. SRC_ADDR_LO [31:0] + * 3. CP_SYNC [31] | SRC_ADDR_HI [7:0] + * 4. DST_ADDR_LO [31:0] + * 5. DST_ADDR_HI [7:0] + * 6. COMMAND [29:22] | BYTE_COUNT [20:0] + */ +# define PACKET3_CP_DMA_CP_SYNC (1 << 31) +/* COMMAND */ +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) + /* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) + /* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +# define PACKET3_CP_DMA_CMD_SAS (1 << 26) + /* 0 - memory + * 1 - register + */ +# define PACKET3_CP_DMA_CMD_DAS (1 << 27) + /* 0 - memory + * 1 - register + */ +# define PACKET3_CP_DMA_CMD_SAIC (1 << 28) +# define PACKET3_CP_DMA_CMD_DAIC (1 << 29) #define PACKET3_SURFACE_SYNC 0x43 # define PACKET3_CB0_DEST_BASE_ENA (1 << 6) # define PACKET3_TC_ACTION_ENA (1 << 23) diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index e153c25..62b4621 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -849,6 +849,54 @@ #define PACKET3_WAIT_REG_MEM 0x3C #define PACKET3_MEM_WRITE 0x3D #define PACKET3_COPY_DATA 0x40 +#define PACKET3_CP_DMA 0x41 +/* 1. header + * 2. SRC_ADDR_LO or DATA [31:0] + * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | + * SRC_ADDR_HI [7:0] + * 4. DST_ADDR_LO [31:0] + * 5. DST_ADDR_HI [7:0] + * 6. COMMAND [30:21] | BYTE_COUNT [20:0] + */ +# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) + /* 0 - SRC_ADDR + * 1 - GDS + */ +# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) + /* 0 - ME + * 1 - PFP + */ +# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29) + /* 0 - SRC_ADDR + * 1 - GDS + * 2 - DATA + */ +# define PACKET3_CP_DMA_CP_SYNC (1 << 31) +/* COMMAND */ +# define PACKET3_CP_DMA_DIS_WC (1 << 21) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) + /* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) + /* 0 - none + * 1 - 8 in 16 + * 2 - 8 in 32 + * 3 - 8 in 64 + */ +# define PACKET3_CP_DMA_CMD_SAS (1 << 26) + /* 0 - memory + * 1 - register + */ +# define PACKET3_CP_DMA_CMD_DAS (1 << 27) + /* 0 - memory + * 1 - register + */ +# define PACKET3_CP_DMA_CMD_SAIC (1 << 28) +# define PACKET3_CP_DMA_CMD_DAIC (1 << 29) +# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30) #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 # define PACKET3_DEST_BASE_0_ENA (1 << 0) -- cgit v1.1 From 6830f585724e1db5609dab0b059ea2e338bd9cc2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Dec 2012 19:03:23 -0500 Subject: drm/radeon: add support for CP DMA packet to r6xx/r7xx CS checker Currently only memory to memory transfers are allowed. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_cs.c | 72 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 211c402..5d6e7f9 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -1949,6 +1949,78 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ib[idx+2] = upper_32_bits(offset) & 0xff; } break; + case PACKET3_CP_DMA: + { + u32 command, size; + u64 offset, tmp; + if (pkt->count != 4) { + DRM_ERROR("bad CP DMA\n"); + return -EINVAL; + } + command = radeon_get_ib_value(p, idx+4); + size = command & 0x1fffff; + if (command & PACKET3_CP_DMA_CMD_SAS) { + /* src address space is register */ + DRM_ERROR("CP DMA SAS not supported\n"); + return -EINVAL; + } else { + if (command & PACKET3_CP_DMA_CMD_SAIC) { + DRM_ERROR("CP DMA SAIC only supported for registers\n"); + return -EINVAL; + } + /* src address space is memory */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad CP DMA SRC\n"); + return -EINVAL; + } + + tmp = radeon_get_ib_value(p, idx) + + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); + + offset = reloc->lobj.gpu_offset + tmp; + + if ((tmp + size) > radeon_bo_size(reloc->robj)) { + dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + + ib[idx] = offset; + ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff); + } + if (command & PACKET3_CP_DMA_CMD_DAS) { + /* dst address space is register */ + DRM_ERROR("CP DMA DAS not supported\n"); + return -EINVAL; + } else { + /* dst address space is memory */ + if (command & PACKET3_CP_DMA_CMD_DAIC) { + DRM_ERROR("CP DMA DAIC only supported for registers\n"); + return -EINVAL; + } + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad CP DMA DST\n"); + return -EINVAL; + } + + tmp = radeon_get_ib_value(p, idx+2) + + ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); + + offset = reloc->lobj.gpu_offset + tmp; + + if ((tmp + size) > radeon_bo_size(reloc->robj)) { + dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + + ib[idx+2] = offset; + ib[idx+3] = upper_32_bits(offset) & 0xff; + } + break; + } case PACKET3_SURFACE_SYNC: if (pkt->count != 3) { DRM_ERROR("bad SURFACE_SYNC\n"); -- cgit v1.1 From 8770b86b3e02c3e30f2ffc42753ff9d62bc428bf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Dec 2012 19:18:30 -0500 Subject: drm/radeon: add support for CP DMA packet to evergreen CS checker Currently only memory and GDS transfers are allowed. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 89 +++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index c042e49..5435879 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -2232,6 +2232,95 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+2] = upper_32_bits(offset) & 0xff; } break; + case PACKET3_CP_DMA: + { + u32 command, size, info; + u64 offset, tmp; + if (pkt->count != 4) { + DRM_ERROR("bad CP DMA\n"); + return -EINVAL; + } + command = radeon_get_ib_value(p, idx+4); + size = command & 0x1fffff; + info = radeon_get_ib_value(p, idx+1); + if (command & PACKET3_CP_DMA_CMD_SAS) { + /* src address space is register */ + /* GDS is ok */ + if (((info & 0x60000000) >> 29) != 1) { + DRM_ERROR("CP DMA SAS not supported\n"); + return -EINVAL; + } + } else { + if (command & PACKET3_CP_DMA_CMD_SAIC) { + DRM_ERROR("CP DMA SAIC only supported for registers\n"); + return -EINVAL; + } + /* src address space is memory */ + if (((info & 0x60000000) >> 29) == 0) { + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad CP DMA SRC\n"); + return -EINVAL; + } + + tmp = radeon_get_ib_value(p, idx) + + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); + + offset = reloc->lobj.gpu_offset + tmp; + + if ((tmp + size) > radeon_bo_size(reloc->robj)) { + dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + + ib[idx] = offset; + ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff); + } else if (((info & 0x60000000) >> 29) != 2) { + DRM_ERROR("bad CP DMA SRC_SEL\n"); + return -EINVAL; + } + } + if (command & PACKET3_CP_DMA_CMD_DAS) { + /* dst address space is register */ + /* GDS is ok */ + if (((info & 0x00300000) >> 20) != 1) { + DRM_ERROR("CP DMA DAS not supported\n"); + return -EINVAL; + } + } else { + /* dst address space is memory */ + if (command & PACKET3_CP_DMA_CMD_DAIC) { + DRM_ERROR("CP DMA DAIC only supported for registers\n"); + return -EINVAL; + } + if (((info & 0x00300000) >> 20) == 0) { + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad CP DMA DST\n"); + return -EINVAL; + } + + tmp = radeon_get_ib_value(p, idx+2) + + ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); + + offset = reloc->lobj.gpu_offset + tmp; + + if ((tmp + size) > radeon_bo_size(reloc->robj)) { + dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + + ib[idx+2] = offset; + ib[idx+3] = upper_32_bits(offset) & 0xff; + } else { + DRM_ERROR("bad CP DMA DST_SEL\n"); + return -EINVAL; + } + } + break; + } case PACKET3_SURFACE_SYNC: if (pkt->count != 3) { DRM_ERROR("bad SURFACE_SYNC\n"); -- cgit v1.1 From 94e014ee98e98dedb080ed1cdf510a583ed0514b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Dec 2012 19:32:54 -0500 Subject: drm/radeon/cayman: add VM CS checker support for CP DMA Need to verify for copies involving registers. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 5435879..62c2271 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -2932,6 +2932,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, u32 idx = pkt->idx + 1; u32 idx_value = ib[idx]; u32 start_reg, end_reg, reg, i; + u32 command, info; switch (pkt->opcode) { case PACKET3_NOP: @@ -3006,6 +3007,52 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, return -EINVAL; } break; + case PACKET3_CP_DMA: + command = ib[idx + 4]; + info = ib[idx + 1]; + if (command & PACKET3_CP_DMA_CMD_SAS) { + /* src address space is register */ + if (((info & 0x60000000) >> 29) == 0) { + start_reg = idx_value << 2; + if (command & PACKET3_CP_DMA_CMD_SAIC) { + reg = start_reg; + if (!evergreen_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad SRC register\n"); + return -EINVAL; + } + } else { + for (i = 0; i < (command & 0x1fffff); i++) { + reg = start_reg + (4 * i); + if (!evergreen_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad SRC register\n"); + return -EINVAL; + } + } + } + } + } + if (command & PACKET3_CP_DMA_CMD_DAS) { + /* dst address space is register */ + if (((info & 0x00300000) >> 20) == 0) { + start_reg = ib[idx + 2]; + if (command & PACKET3_CP_DMA_CMD_DAIC) { + reg = start_reg; + if (!evergreen_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad DST register\n"); + return -EINVAL; + } + } else { + for (i = 0; i < (command & 0x1fffff); i++) { + reg = start_reg + (4 * i); + if (!evergreen_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad DST register\n"); + return -EINVAL; + } + } + } + } + } + break; default: return -EINVAL; } -- cgit v1.1 From 5aa709be7e60a8296859766935f92bce51465341 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Dec 2012 19:42:37 -0500 Subject: drm/radeon/si: add VM CS checker support for CP DMA Need to verify for copies involving registers. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index f6e7815..7e835d9 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2550,6 +2550,7 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev, u32 idx = pkt->idx + 1; u32 idx_value = ib[idx]; u32 start_reg, end_reg, reg, i; + u32 command, info; switch (pkt->opcode) { case PACKET3_NOP: @@ -2649,6 +2650,52 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev, return -EINVAL; } break; + case PACKET3_CP_DMA: + command = ib[idx + 4]; + info = ib[idx + 1]; + if (command & PACKET3_CP_DMA_CMD_SAS) { + /* src address space is register */ + if (((info & 0x60000000) >> 29) == 0) { + start_reg = idx_value << 2; + if (command & PACKET3_CP_DMA_CMD_SAIC) { + reg = start_reg; + if (!si_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad SRC register\n"); + return -EINVAL; + } + } else { + for (i = 0; i < (command & 0x1fffff); i++) { + reg = start_reg + (4 * i); + if (!si_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad SRC register\n"); + return -EINVAL; + } + } + } + } + } + if (command & PACKET3_CP_DMA_CMD_DAS) { + /* dst address space is register */ + if (((info & 0x00300000) >> 20) == 0) { + start_reg = ib[idx + 2]; + if (command & PACKET3_CP_DMA_CMD_DAIC) { + reg = start_reg; + if (!si_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad DST register\n"); + return -EINVAL; + } + } else { + for (i = 0; i < (command & 0x1fffff); i++) { + reg = start_reg + (4 * i); + if (!si_vm_reg_valid(reg)) { + DRM_ERROR("CP DMA Bad DST register\n"); + return -EINVAL; + } + } + } + } + } + break; default: DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode); return -EINVAL; -- cgit v1.1 From 86a1881d08f65a42c17071a59c0088dbe2870246 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 12 Dec 2012 16:43:15 -0500 Subject: drm/radeon: fix fence driver for dma ring when wb is disabled The dma ring can't write to register thus have to write to memory its fence value. This ensure that it doesn't try to use scratch register for dma ring fence driver. Should fix: https://bugs.freedesktop.org/show_bug.cgi?id=58166 Signed-off-by: Jerome Glisse Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c | 3 ++- drivers/gpu/drm/radeon/radeon_fence.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index a76eca1..2aaf147 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2533,11 +2533,12 @@ void r600_dma_fence_ring_emit(struct radeon_device *rdev, { struct radeon_ring *ring = &rdev->ring[fence->ring]; u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + /* write the fence */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); radeon_ring_write(ring, addr & 0xfffffffc); radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); - radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, lower_32_bits(fence->seq)); /* generate an interrupt */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); } diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 22bd6c2..410a975 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -772,7 +772,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) int r; radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); - if (rdev->wb.use_event) { + if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) { rdev->fence_drv[ring].scratch_reg = 0; index = R600_WB_EVENT_OFFSET + ring * 4; } else { -- cgit v1.1 From 2ef9bdfe64079c9d0b98dc89af3af52918b818a0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 2 Dec 2012 14:02:51 +0100 Subject: drm/radeon: add W|RREG32_IDX for MM_INDEX|DATA based mmio accesss Just refactoring to make the next patche simpler. Now all indirect register access in the new modesetting driver should go through the r100_mm_(w|r)reg fucntions. RADEON_READ_MM from the old driver seems to be totally unused, so just kill it. Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r100.c | 10 ++++++---- drivers/gpu/drm/radeon/radeon.h | 16 ++++++++++------ drivers/gpu/drm/radeon/radeon_combios.c | 6 ++---- drivers/gpu/drm/radeon/radeon_cp.c | 14 -------------- drivers/gpu/drm/radeon/radeon_cursor.c | 17 +++++++++-------- drivers/gpu/drm/radeon/radeon_drv.h | 1 - 6 files changed, 27 insertions(+), 37 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 376884f..ae4c857 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -4135,9 +4135,10 @@ int r100_init(struct radeon_device *rdev) return 0; } -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) +uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, + bool always_indirect) { - if (reg < rdev->rmmio_size) + if (reg < rdev->rmmio_size && !always_indirect) return readl(((void __iomem *)rdev->rmmio) + reg); else { writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); @@ -4145,9 +4146,10 @@ uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) } } -void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) +void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, + bool always_indirect) { - if (reg < rdev->rmmio_size) + if (reg < rdev->rmmio_size && !always_indirect) writel(v, ((void __iomem *)rdev->rmmio) + reg); else { writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1b9120a..609bb18 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1631,8 +1631,10 @@ int radeon_device_init(struct radeon_device *rdev, void radeon_device_fini(struct radeon_device *rdev); int radeon_gpu_wait_for_idle(struct radeon_device *rdev); -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg); -void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); +uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, + bool always_indirect); +void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, + bool always_indirect); u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); @@ -1648,9 +1650,11 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); #define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg)) #define RREG16(reg) readw((rdev->rmmio) + (reg)) #define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg)) -#define RREG32(reg) r100_mm_rreg(rdev, (reg)) -#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg))) -#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v)) +#define RREG32(reg) r100_mm_rreg(rdev, (reg), false) +#define RREG32_IDX(reg) r100_mm_rreg(rdev, (reg), true) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg), false)) +#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v), false) +#define WREG32_IDX(reg, v) r100_mm_wreg(rdev, (reg), (v), true) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg)) @@ -1675,7 +1679,7 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) -#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg))) +#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false)) #define RREG32_IO(reg) r100_io_rreg(rdev, (reg)) #define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v)) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 45b660b..4af8912 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -3246,11 +3246,9 @@ static uint32_t combios_detect_ram(struct drm_device *dev, int ram, while (ram--) { addr = ram * 1024 * 1024; /* write to each page */ - WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER); - WREG32(RADEON_MM_DATA, 0xdeadbeef); + WREG32_IDX((addr) | RADEON_MM_APER, 0xdeadbeef); /* read back and verify */ - WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER); - if (RREG32(RADEON_MM_DATA) != 0xdeadbeef) + if (RREG32_IDX((addr) | RADEON_MM_APER) != 0xdeadbeef) return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c index 8b2797d..9143fc4 100644 --- a/drivers/gpu/drm/radeon/radeon_cp.c +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -116,20 +116,6 @@ u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index) } } -u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr) -{ - u32 ret; - - if (addr < 0x10000) - ret = DRM_READ32(dev_priv->mmio, addr); - else { - DRM_WRITE32(dev_priv->mmio, RADEON_MM_INDEX, addr); - ret = DRM_READ32(dev_priv->mmio, RADEON_MM_DATA); - } - - return ret; -} - static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr) { u32 ret; diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 0fe56c9f..ad6df62 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -66,24 +66,25 @@ static void radeon_hide_cursor(struct drm_crtc *crtc) struct radeon_device *rdev = crtc->dev->dev_private; if (ASIC_IS_DCE4(rdev)) { - WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset); - WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | - EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); + WREG32_IDX(EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset, + EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | + EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); } else if (ASIC_IS_AVIVO(rdev)) { - WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset); - WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); + WREG32_IDX(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset, + (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); } else { + u32 reg; switch (radeon_crtc->crtc_id) { case 0: - WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL); + reg = RADEON_CRTC_GEN_CNTL; break; case 1: - WREG32(RADEON_MM_INDEX, RADEON_CRTC2_GEN_CNTL); + reg = RADEON_CRTC2_GEN_CNTL; break; default: return; } - WREG32_P(RADEON_MM_DATA, 0, ~RADEON_CRTC_CUR_EN); + WREG32_IDX(reg, RREG32_IDX(reg) & ~RADEON_CRTC_CUR_EN); } } diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index a1b59ca..e7fdf16 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -366,7 +366,6 @@ extern int radeon_cp_buffers(struct drm_device *dev, void *data, struct drm_file extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv); extern void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc); extern void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base); -extern u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr); extern void radeon_freelist_reset(struct drm_device * dev); extern struct drm_buf *radeon_freelist_get(struct drm_device * dev); -- cgit v1.1 From 2c385151ed6db8ded2faa3328f0377e6c5fa1e89 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 2 Dec 2012 14:06:15 +0100 Subject: drm/radeon: make indirect register access concurrency-safe With the new per-crtc locking mutliple set-cursor calls could happen in parallel. Out of sheer paranoia I've opted for an irqsave spinlock. But if there's indeed an access from interrupt contexts to these regs it's already broken with the old code, so this can likely just be reduced to a normal spinlock. Otoh the pageflip completion happens from the vblank irq handler ... Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r100.c | 13 ++++++++++++- drivers/gpu/drm/radeon/radeon.h | 2 ++ drivers/gpu/drm/radeon/radeon_device.c | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ae4c857..8ff7cac 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -4141,8 +4141,15 @@ uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, if (reg < rdev->rmmio_size && !always_indirect) return readl(((void __iomem *)rdev->rmmio) + reg); else { + unsigned long flags; + uint32_t ret; + + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); - return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); + + return ret; } } @@ -4152,8 +4159,12 @@ void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, if (reg < rdev->rmmio_size && !always_indirect) writel(v, ((void __iomem *)rdev->rmmio) + reg); else { + unsigned long flags; + + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); } } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 609bb18..285fb3f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1556,6 +1556,8 @@ struct radeon_device { /* Register mmio */ resource_size_t rmmio_base; resource_size_t rmmio_size; + /* protects concurrent MM_INDEX/DATA based register access */ + spinlock_t mmio_idx_lock; void __iomem *rmmio; radeon_rreg_t mc_rreg; radeon_wreg_t mc_wreg; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e2f5f88..49b0659 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1059,6 +1059,7 @@ int radeon_device_init(struct radeon_device *rdev, /* Registers mapping */ /* TODO: block userspace mapping of io register */ + spin_lock_init(&rdev->mmio_idx_lock); rdev->rmmio_base = pci_resource_start(rdev->pdev, 2); rdev->rmmio_size = pci_resource_len(rdev->pdev, 2); rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size); -- cgit v1.1 From 9af20792124850369e764965690b99b20623dfc4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 11 Dec 2012 23:42:24 +0100 Subject: drm/radeon: fix fence locking in the pageflip callback We need to hold bdev->fence_lock while grabbing a reference to the fence, to prevent concurrent clearing/changing of the ttm_bo->sync_obj field. Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index bfa2a60..310c0e5 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -378,8 +378,12 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, work->old_rbo = rbo; obj = new_radeon_fb->obj; rbo = gem_to_radeon_bo(obj); + + spin_lock(&rbo->tbo.bdev->fence_lock); if (rbo->tbo.sync_obj) work->fence = radeon_fence_ref(rbo->tbo.sync_obj); + spin_unlock(&rbo->tbo.bdev->fence_lock); + INIT_WORK(&work->work, radeon_unpin_work_func); /* We borrow the event spin lock for protecting unpin_work */ -- cgit v1.1 From 4ac0533abaec2b83a7f2c675010eedd55664bc26 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 13 Dec 2012 12:08:11 -0500 Subject: drm/radeon: fix htile buffer size computation for command stream checker Fix the size computation of the htile buffer. Signed-off-by: Jerome Glisse Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 17 +++++-- drivers/gpu/drm/radeon/r600_cs.c | 92 ++++++++--------------------------- drivers/gpu/drm/radeon/radeon_drv.c | 3 +- 3 files changed, 35 insertions(+), 77 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 62c2271..fc7e613 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -507,20 +507,28 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p, /* height is npipes htiles aligned == npipes * 8 pixel aligned */ nby = round_up(nby, track->npipes * 8); } else { + /* always assume 8x8 htile */ + /* align is htile align * 8, htile align vary according to + * number of pipe and tile width and nby + */ switch (track->npipes) { case 8: + /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ nbx = round_up(nbx, 64 * 8); nby = round_up(nby, 64 * 8); break; case 4: + /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ nbx = round_up(nbx, 64 * 8); nby = round_up(nby, 32 * 8); break; case 2: + /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ nbx = round_up(nbx, 32 * 8); nby = round_up(nby, 32 * 8); break; case 1: + /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ nbx = round_up(nbx, 32 * 8); nby = round_up(nby, 16 * 8); break; @@ -531,9 +539,10 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p, } } /* compute number of htile */ - nbx = nbx / 8; - nby = nby / 8; - size = nbx * nby * 4; + nbx = nbx >> 3; + nby = nby >> 3; + /* size must be aligned on npipes * 2K boundary */ + size = roundup(nbx * nby * 4, track->npipes * (2 << 10)); size += track->htile_offset; if (size > radeon_bo_size(track->htile_bo)) { @@ -1790,6 +1799,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_HTILE_SURFACE: /* 8x8 only */ track->htile_surface = radeon_get_ib_value(p, idx); + /* force 8x8 htile width and height */ + ib[idx] |= 3; track->db_dirty = true; break; case CB_IMMED0_BASE: diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 5d6e7f9..0b4d833 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -657,87 +657,30 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) /* nby is npipes htiles aligned == npipes * 8 pixel aligned */ nby = round_up(nby, track->npipes * 8); } else { - /* htile widht & nby (8 or 4) make 2 bits number */ - tmp = track->htile_surface & 3; + /* always assume 8x8 htile */ /* align is htile align * 8, htile align vary according to * number of pipe and tile width and nby */ switch (track->npipes) { case 8: - switch (tmp) { - case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ - nbx = round_up(nbx, 64 * 8); - nby = round_up(nby, 64 * 8); - break; - case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ - case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ - nbx = round_up(nbx, 64 * 8); - nby = round_up(nby, 32 * 8); - break; - case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ - nbx = round_up(nbx, 32 * 8); - nby = round_up(nby, 32 * 8); - break; - default: - return -EINVAL; - } + /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + nbx = round_up(nbx, 64 * 8); + nby = round_up(nby, 64 * 8); break; case 4: - switch (tmp) { - case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ - nbx = round_up(nbx, 64 * 8); - nby = round_up(nby, 32 * 8); - break; - case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ - case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ - nbx = round_up(nbx, 32 * 8); - nby = round_up(nby, 32 * 8); - break; - case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ - nbx = round_up(nbx, 32 * 8); - nby = round_up(nby, 16 * 8); - break; - default: - return -EINVAL; - } + /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + nbx = round_up(nbx, 64 * 8); + nby = round_up(nby, 32 * 8); break; case 2: - switch (tmp) { - case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ - nbx = round_up(nbx, 32 * 8); - nby = round_up(nby, 32 * 8); - break; - case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ - case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ - nbx = round_up(nbx, 32 * 8); - nby = round_up(nby, 16 * 8); - break; - case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ - nbx = round_up(nbx, 16 * 8); - nby = round_up(nby, 16 * 8); - break; - default: - return -EINVAL; - } + /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 32 * 8); break; case 1: - switch (tmp) { - case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ - nbx = round_up(nbx, 32 * 8); - nby = round_up(nby, 16 * 8); - break; - case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/ - case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/ - nbx = round_up(nbx, 16 * 8); - nby = round_up(nby, 16 * 8); - break; - case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/ - nbx = round_up(nbx, 16 * 8); - nby = round_up(nby, 8 * 8); - break; - default: - return -EINVAL; - } + /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ + nbx = round_up(nbx, 32 * 8); + nby = round_up(nby, 16 * 8); break; default: dev_warn(p->dev, "%s:%d invalid num pipes %d\n", @@ -746,9 +689,10 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) } } /* compute number of htile */ - nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4; - nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4; - size = nbx * nby * 4; + nbx = nbx >> 3; + nby = nby >> 3; + /* size must be aligned on npipes * 2K boundary */ + size = roundup(nbx * nby * 4, track->npipes * (2 << 10)); size += track->htile_offset; if (size > radeon_bo_size(track->htile_bo)) { @@ -1492,6 +1436,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case DB_HTILE_SURFACE: track->htile_surface = radeon_get_ib_value(p, idx); + /* force 8x8 htile width and height */ + ib[idx] |= 3; track->db_dirty = true; break; case SQ_PGM_START_FS: diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 91b6427..12e9912 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -66,9 +66,10 @@ * 2.23.0 - allow STRMOUT_BASE_UPDATE on RS780 and RS880 * 2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures * 2.25.0 - eg+: new info request for num SE and num SH + * 2.26.0 - r600-eg: fix htile size computation */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 25 +#define KMS_DRIVER_MINOR 26 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); -- cgit v1.1 From cf4ccd016bae1a03bb38170eb54b5db4b04e0545 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Nov 2011 10:19:47 -0500 Subject: drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2) Allows us to use the DMA ring from userspace. DMA doesn't have a good NOP packet in which to embed the reloc idx, so userspace has to add a reloc for each buffer used and order them to match the command stream. v2: fix address bounds checking, reloc indexing Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_cs.c | 193 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_asic.c | 6 +- drivers/gpu/drm/radeon/radeon_asic.h | 1 + drivers/gpu/drm/radeon/radeon_cs.c | 1 + 5 files changed, 199 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 0b4d833..0be768b 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -2514,3 +2514,196 @@ void r600_cs_legacy_init(void) { r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; } + +/* + * DMA + */ +/** + * r600_dma_cs_next_reloc() - parse next reloc + * @p: parser structure holding parsing context. + * @cs_reloc: reloc informations + * + * Return the next reloc, do bo validation and compute + * GPU offset using the provided start. + **/ +int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc) +{ + struct radeon_cs_chunk *relocs_chunk; + unsigned idx; + + if (p->chunk_relocs_idx == -1) { + DRM_ERROR("No relocation chunk !\n"); + return -EINVAL; + } + *cs_reloc = NULL; + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + idx = p->dma_reloc_idx; + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + *cs_reloc = p->relocs_ptr[idx]; + p->dma_reloc_idx++; + return 0; +} + +#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28) +#define GET_DMA_COUNT(h) ((h) & 0x0000ffff) +#define GET_DMA_T(h) (((h) & 0x00800000) >> 23) + +/** + * r600_dma_cs_parse() - parse the DMA IB + * @p: parser structure holding parsing context. + * + * Parses the DMA IB from the CS ioctl and updates + * the GPU addresses based on the reloc information and + * checks for errors. (R6xx-R7xx) + * Returns 0 for success and an error on failure. + **/ +int r600_dma_cs_parse(struct radeon_cs_parser *p) +{ + struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; + struct radeon_cs_reloc *src_reloc, *dst_reloc; + u32 header, cmd, count, tiled; + volatile u32 *ib = p->ib.ptr; + u32 idx, idx_value; + u64 src_offset, dst_offset; + int r; + + do { + if (p->idx >= ib_chunk->length_dw) { + DRM_ERROR("Can not parse packet at %d after CS end %d !\n", + p->idx, ib_chunk->length_dw); + return -EINVAL; + } + idx = p->idx; + header = radeon_get_ib_value(p, idx); + cmd = GET_DMA_CMD(header); + count = GET_DMA_COUNT(header); + tiled = GET_DMA_T(header); + + switch (cmd) { + case DMA_PACKET_WRITE: + r = r600_dma_cs_next_reloc(p, &dst_reloc); + if (r) { + DRM_ERROR("bad DMA_PACKET_WRITE\n"); + return -EINVAL; + } + if (tiled) { + dst_offset = ib[idx+1]; + dst_offset <<= 8; + + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + p->idx += count + 5; + } else { + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; + + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + p->idx += count + 3; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + break; + case DMA_PACKET_COPY: + r = r600_dma_cs_next_reloc(p, &src_reloc); + if (r) { + DRM_ERROR("bad DMA_PACKET_COPY\n"); + return -EINVAL; + } + r = r600_dma_cs_next_reloc(p, &dst_reloc); + if (r) { + DRM_ERROR("bad DMA_PACKET_COPY\n"); + return -EINVAL; + } + if (tiled) { + idx_value = radeon_get_ib_value(p, idx + 2); + /* detile bit */ + if (idx_value & (1 << 31)) { + /* tiled src, linear dst */ + src_offset = ib[idx+1]; + src_offset <<= 8; + ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + + dst_offset = ib[idx+5]; + dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; + ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + } else { + /* linear src, tiled dst */ + src_offset = ib[idx+5]; + src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; + ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + + dst_offset = ib[idx+1]; + dst_offset <<= 8; + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + } + p->idx += 7; + } else { + src_offset = ib[idx+2]; + src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + p->idx += 5; + } + if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { + dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + return -EINVAL; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + break; + case DMA_PACKET_CONSTANT_FILL: + if (p->family < CHIP_RV770) { + DRM_ERROR("Constant Fill is 7xx only !\n"); + return -EINVAL; + } + r = r600_dma_cs_next_reloc(p, &dst_reloc); + if (r) { + DRM_ERROR("bad DMA_PACKET_WRITE\n"); + return -EINVAL; + } + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16; + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; + p->idx += 4; + break; + case DMA_PACKET_NOP: + p->idx += 1; + break; + default: + DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + return -EINVAL; + } + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); +#if 0 + for (r = 0; r < p->ib->length_dw; r++) { + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); + mdelay(1); + } +#endif + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 285fb3f..5dc744d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -839,6 +839,7 @@ struct radeon_cs_parser { struct radeon_cs_reloc *relocs; struct radeon_cs_reloc **relocs_ptr; struct list_head validated; + unsigned dma_reloc_idx; /* indices of various chunks */ int chunk_ib_idx; int chunk_relocs_idx; diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 3ea0475..d360341 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -952,7 +952,7 @@ static struct radeon_asic r600_asic = { .ib_execute = &r600_dma_ring_ib_execute, .emit_fence = &r600_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &r600_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &r600_dma_is_lockup, @@ -1036,7 +1036,7 @@ static struct radeon_asic rs780_asic = { .ib_execute = &r600_dma_ring_ib_execute, .emit_fence = &r600_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &r600_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &r600_dma_is_lockup, @@ -1120,7 +1120,7 @@ static struct radeon_asic rv770_asic = { .ib_execute = &r600_dma_ring_ib_execute, .emit_fence = &r600_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &r600_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &r600_dma_is_lockup, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index c338931..b311c0a 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -304,6 +304,7 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev); uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int r600_cs_parse(struct radeon_cs_parser *p); +int r600_dma_cs_parse(struct radeon_cs_parser *p); void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); void r600_semaphore_ring_emit(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 41672cc..1b32a5a 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -43,6 +43,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) return 0; } chunk = &p->chunks[p->chunk_relocs_idx]; + p->dma_reloc_idx = 0; /* FIXME: we assume that each relocs use 4 dwords */ p->nrelocs = chunk->length_dw / 4; p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); -- cgit v1.1 From d2ead3eaf8a4bf92129eda69189ce18a6c1cc8bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 13 Dec 2012 09:55:45 -0500 Subject: drm/radeon/kms: add evergreen/cayman CS parser for async DMA (v2) Allows us to use the DMA ring from userspace. DMA doesn't have a good NOP packet in which to embed the reloc idx, so userspace has to add a reloc for each buffer used and order them to match the command stream. v2: fix address bounds checking Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 451 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_asic.c | 14 +- drivers/gpu/drm/radeon/radeon_asic.h | 1 + 3 files changed, 459 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index fc7e613..0a1ec4e 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -34,6 +34,8 @@ #define MAX(a,b) (((a)>(b))?(a):(b)) #define MIN(a,b) (((a)<(b))?(a):(b)) +int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, + struct radeon_cs_reloc **cs_reloc); static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, struct radeon_cs_reloc **cs_reloc); @@ -2815,6 +2817,455 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) return 0; } +/* + * DMA + */ + +#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28) +#define GET_DMA_COUNT(h) ((h) & 0x000fffff) +#define GET_DMA_T(h) (((h) & 0x00800000) >> 23) +#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26) +#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20) + +/** + * evergreen_dma_cs_parse() - parse the DMA IB + * @p: parser structure holding parsing context. + * + * Parses the DMA IB from the CS ioctl and updates + * the GPU addresses based on the reloc information and + * checks for errors. (Evergreen-Cayman) + * Returns 0 for success and an error on failure. + **/ +int evergreen_dma_cs_parse(struct radeon_cs_parser *p) +{ + struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; + struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc; + u32 header, cmd, count, tiled, new_cmd, misc; + volatile u32 *ib = p->ib.ptr; + u32 idx, idx_value; + u64 src_offset, dst_offset, dst2_offset; + int r; + + do { + if (p->idx >= ib_chunk->length_dw) { + DRM_ERROR("Can not parse packet at %d after CS end %d !\n", + p->idx, ib_chunk->length_dw); + return -EINVAL; + } + idx = p->idx; + header = radeon_get_ib_value(p, idx); + cmd = GET_DMA_CMD(header); + count = GET_DMA_COUNT(header); + tiled = GET_DMA_T(header); + new_cmd = GET_DMA_NEW(header); + misc = GET_DMA_MISC(header); + + switch (cmd) { + case DMA_PACKET_WRITE: + r = r600_dma_cs_next_reloc(p, &dst_reloc); + if (r) { + DRM_ERROR("bad DMA_PACKET_WRITE\n"); + return -EINVAL; + } + if (tiled) { + dst_offset = ib[idx+1]; + dst_offset <<= 8; + + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + p->idx += count + 7; + } else { + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; + + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + p->idx += count + 3; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", + dst_offset, radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + break; + case DMA_PACKET_COPY: + r = r600_dma_cs_next_reloc(p, &src_reloc); + if (r) { + DRM_ERROR("bad DMA_PACKET_COPY\n"); + return -EINVAL; + } + r = r600_dma_cs_next_reloc(p, &dst_reloc); + if (r) { + DRM_ERROR("bad DMA_PACKET_COPY\n"); + return -EINVAL; + } + if (tiled) { + idx_value = radeon_get_ib_value(p, idx + 2); + if (new_cmd) { + switch (misc) { + case 0: + /* L2T, frame to fields */ + if (idx_value & (1 << 31)) { + DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); + return -EINVAL; + } + r = r600_dma_cs_next_reloc(p, &dst2_reloc); + if (r) { + DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); + return -EINVAL; + } + dst_offset = ib[idx+1]; + dst_offset <<= 8; + dst2_offset = ib[idx+2]; + dst2_offset <<= 8; + src_offset = ib[idx+8]; + src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; + if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + return -EINVAL; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + return -EINVAL; + } + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + p->idx += 10; + break; + case 1: + /* L2T, T2L partial */ + if (p->family < CHIP_CAYMAN) { + DRM_ERROR("L2T, T2L Partial is cayman only !\n"); + return -EINVAL; + } + /* detile bit */ + if (idx_value & (1 << 31)) { + /* tiled src, linear dst */ + ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + + ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + } else { + /* linear src, tiled dst */ + ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + } + p->idx += 12; + break; + case 3: + /* L2T, broadcast */ + if (idx_value & (1 << 31)) { + DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + return -EINVAL; + } + r = r600_dma_cs_next_reloc(p, &dst2_reloc); + if (r) { + DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + return -EINVAL; + } + dst_offset = ib[idx+1]; + dst_offset <<= 8; + dst2_offset = ib[idx+2]; + dst2_offset <<= 8; + src_offset = ib[idx+8]; + src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; + if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + return -EINVAL; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + return -EINVAL; + } + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + p->idx += 10; + break; + case 4: + /* L2T, T2L */ + /* detile bit */ + if (idx_value & (1 << 31)) { + /* tiled src, linear dst */ + src_offset = ib[idx+1]; + src_offset <<= 8; + ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + + dst_offset = ib[idx+7]; + dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + } else { + /* linear src, tiled dst */ + src_offset = ib[idx+7]; + src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + + dst_offset = ib[idx+1]; + dst_offset <<= 8; + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + } + if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + return -EINVAL; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + p->idx += 9; + break; + case 5: + /* T2T partial */ + if (p->family < CHIP_CAYMAN) { + DRM_ERROR("L2T, T2L Partial is cayman only !\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + p->idx += 13; + break; + case 7: + /* L2T, broadcast */ + if (idx_value & (1 << 31)) { + DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + return -EINVAL; + } + r = r600_dma_cs_next_reloc(p, &dst2_reloc); + if (r) { + DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + return -EINVAL; + } + dst_offset = ib[idx+1]; + dst_offset <<= 8; + dst2_offset = ib[idx+2]; + dst2_offset <<= 8; + src_offset = ib[idx+8]; + src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; + if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + return -EINVAL; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + return -EINVAL; + } + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + p->idx += 10; + break; + default: + DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); + return -EINVAL; + } + } else { + switch (misc) { + case 0: + /* detile bit */ + if (idx_value & (1 << 31)) { + /* tiled src, linear dst */ + src_offset = ib[idx+1]; + src_offset <<= 8; + ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + + dst_offset = ib[idx+7]; + dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + } else { + /* linear src, tiled dst */ + src_offset = ib[idx+7]; + src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + + dst_offset = ib[idx+1]; + dst_offset <<= 8; + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + } + if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + return -EINVAL; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + p->idx += 9; + break; + default: + DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); + return -EINVAL; + } + } + } else { + if (new_cmd) { + switch (misc) { + case 0: + /* L2L, byte */ + src_offset = ib[idx+2]; + src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { + dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", + src_offset + count, radeon_bo_size(src_reloc->robj)); + return -EINVAL; + } + if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", + dst_offset + count, radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + p->idx += 5; + break; + case 1: + /* L2L, partial */ + if (p->family < CHIP_CAYMAN) { + DRM_ERROR("L2L Partial is cayman only !\n"); + return -EINVAL; + } + ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + + p->idx += 9; + break; + case 4: + /* L2L, dw, broadcast */ + r = r600_dma_cs_next_reloc(p, &dst2_reloc); + if (r) { + DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); + return -EINVAL; + } + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; + dst2_offset = ib[idx+2]; + dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32; + src_offset = ib[idx+3]; + src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; + if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { + dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + return -EINVAL; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { + dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + return -EINVAL; + } + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff; + ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + p->idx += 7; + break; + default: + DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); + return -EINVAL; + } + } else { + /* L2L, dw */ + src_offset = ib[idx+2]; + src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { + dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + return -EINVAL; + } + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + p->idx += 5; + } + } + break; + case DMA_PACKET_CONSTANT_FILL: + r = r600_dma_cs_next_reloc(p, &dst_reloc); + if (r) { + DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n"); + return -EINVAL; + } + dst_offset = ib[idx+1]; + dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16; + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { + dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", + dst_offset, radeon_bo_size(dst_reloc->robj)); + return -EINVAL; + } + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); + ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; + p->idx += 4; + break; + case DMA_PACKET_NOP: + p->idx += 1; + break; + default: + DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + return -EINVAL; + } + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); +#if 0 + for (r = 0; r < p->ib->length_dw; r++) { + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); + mdelay(1); + } +#endif + return 0; +} + /* vm parser */ static bool evergreen_vm_reg_valid(u32 reg) { diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index d360341..ac1d570 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1204,7 +1204,7 @@ static struct radeon_asic evergreen_asic = { .ib_execute = &evergreen_dma_ring_ib_execute, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &evergreen_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &r600_dma_is_lockup, @@ -1288,7 +1288,7 @@ static struct radeon_asic sumo_asic = { .ib_execute = &evergreen_dma_ring_ib_execute, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &evergreen_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &r600_dma_is_lockup, @@ -1372,7 +1372,7 @@ static struct radeon_asic btc_asic = { .ib_execute = &evergreen_dma_ring_ib_execute, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &evergreen_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &r600_dma_is_lockup, @@ -1486,7 +1486,7 @@ static struct radeon_asic cayman_asic = { .ib_execute = &cayman_dma_ring_ib_execute, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &evergreen_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &cayman_dma_is_lockup, @@ -1496,7 +1496,7 @@ static struct radeon_asic cayman_asic = { .ib_execute = &cayman_dma_ring_ib_execute, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &evergreen_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &cayman_dma_is_lockup, @@ -1611,7 +1611,7 @@ static struct radeon_asic trinity_asic = { .ib_execute = &cayman_dma_ring_ib_execute, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &evergreen_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &cayman_dma_is_lockup, @@ -1621,7 +1621,7 @@ static struct radeon_asic trinity_asic = { .ib_execute = &cayman_dma_ring_ib_execute, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, - .cs_parse = NULL, + .cs_parse = &evergreen_dma_cs_parse, .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &cayman_dma_is_lockup, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index b311c0a..d2ac646 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -431,6 +431,7 @@ u32 evergreen_get_vblank_counter(struct radeon_device *rdev, int crtc); int evergreen_irq_set(struct radeon_device *rdev); int evergreen_irq_process(struct radeon_device *rdev); extern int evergreen_cs_parse(struct radeon_cs_parser *p); +extern int evergreen_dma_cs_parse(struct radeon_cs_parser *p); extern void evergreen_pm_misc(struct radeon_device *rdev); extern void evergreen_pm_prepare(struct radeon_device *rdev); extern void evergreen_pm_finish(struct radeon_device *rdev); -- cgit v1.1 From cd459e525f4faeefa0bf78e1bcba3e04496b2cb5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 13 Dec 2012 12:17:38 -0500 Subject: drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI Allows us to use async DMA from userspace. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 111 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_asic.c | 6 ++ drivers/gpu/drm/radeon/radeon_asic.h | 1 + 3 files changed, 118 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 0a1ec4e..9a9d3ae 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -3556,3 +3556,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) return ret; } + +/** + * evergreen_dma_ib_parse() - parse the DMA IB for VM + * @rdev: radeon_device pointer + * @ib: radeon_ib pointer + * + * Parses the DMA IB from the VM CS ioctl + * checks for errors. (Cayman-SI) + * Returns 0 for success and an error on failure. + **/ +int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) +{ + u32 idx = 0; + u32 header, cmd, count, tiled, new_cmd, misc; + + do { + header = ib->ptr[idx]; + cmd = GET_DMA_CMD(header); + count = GET_DMA_COUNT(header); + tiled = GET_DMA_T(header); + new_cmd = GET_DMA_NEW(header); + misc = GET_DMA_MISC(header); + + switch (cmd) { + case DMA_PACKET_WRITE: + if (tiled) + idx += count + 7; + else + idx += count + 3; + break; + case DMA_PACKET_COPY: + if (tiled) { + if (new_cmd) { + switch (misc) { + case 0: + /* L2T, frame to fields */ + idx += 10; + break; + case 1: + /* L2T, T2L partial */ + idx += 12; + break; + case 3: + /* L2T, broadcast */ + idx += 10; + break; + case 4: + /* L2T, T2L */ + idx += 9; + break; + case 5: + /* T2T partial */ + idx += 13; + break; + case 7: + /* L2T, broadcast */ + idx += 10; + break; + default: + DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); + return -EINVAL; + } + } else { + switch (misc) { + case 0: + idx += 9; + break; + default: + DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); + return -EINVAL; + } + } + } else { + if (new_cmd) { + switch (misc) { + case 0: + /* L2L, byte */ + idx += 5; + break; + case 1: + /* L2L, partial */ + idx += 9; + break; + case 4: + /* L2L, dw, broadcast */ + idx += 7; + break; + default: + DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc); + return -EINVAL; + } + } else { + /* L2L, dw */ + idx += 5; + } + } + break; + case DMA_PACKET_CONSTANT_FILL: + idx += 4; + break; + case DMA_PACKET_NOP: + idx += 1; + break; + default: + DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + return -EINVAL; + } + } while (idx < ib->length_dw); + + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index ac1d570..596bcbe 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1484,6 +1484,7 @@ static struct radeon_asic cayman_asic = { }, [R600_RING_TYPE_DMA_INDEX] = { .ib_execute = &cayman_dma_ring_ib_execute, + .ib_parse = &evergreen_dma_ib_parse, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, .cs_parse = &evergreen_dma_cs_parse, @@ -1494,6 +1495,7 @@ static struct radeon_asic cayman_asic = { }, [CAYMAN_RING_TYPE_DMA1_INDEX] = { .ib_execute = &cayman_dma_ring_ib_execute, + .ib_parse = &evergreen_dma_ib_parse, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, .cs_parse = &evergreen_dma_cs_parse, @@ -1609,6 +1611,7 @@ static struct radeon_asic trinity_asic = { }, [R600_RING_TYPE_DMA_INDEX] = { .ib_execute = &cayman_dma_ring_ib_execute, + .ib_parse = &evergreen_dma_ib_parse, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, .cs_parse = &evergreen_dma_cs_parse, @@ -1619,6 +1622,7 @@ static struct radeon_asic trinity_asic = { }, [CAYMAN_RING_TYPE_DMA1_INDEX] = { .ib_execute = &cayman_dma_ring_ib_execute, + .ib_parse = &evergreen_dma_ib_parse, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, .cs_parse = &evergreen_dma_cs_parse, @@ -1734,6 +1738,7 @@ static struct radeon_asic si_asic = { }, [R600_RING_TYPE_DMA_INDEX] = { .ib_execute = &cayman_dma_ring_ib_execute, + .ib_parse = &evergreen_dma_ib_parse, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, .cs_parse = NULL, @@ -1744,6 +1749,7 @@ static struct radeon_asic si_asic = { }, [CAYMAN_RING_TYPE_DMA1_INDEX] = { .ib_execute = &cayman_dma_ring_ib_execute, + .ib_parse = &evergreen_dma_ib_parse, .emit_fence = &evergreen_dma_fence_ring_emit, .emit_semaphore = &r600_dma_semaphore_ring_emit, .cs_parse = NULL, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index d2ac646..5f4882c 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -473,6 +473,7 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); +int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); void cayman_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring); -- cgit v1.1 From 278a334cbc96d3da66d56235b8ce84081e9a1892 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 13 Dec 2012 12:27:28 -0500 Subject: drm/radeon: enable the async DMA rings in the CS ioctl This enables the functionality added in the previous patches. Userspace acceleration drivers can use the CS ioctl to submit command buffers to the async DMA rings. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 1b32a5a..396baba 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -112,6 +112,18 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority } else p->ring = RADEON_RING_TYPE_GFX_INDEX; break; + case RADEON_CS_RING_DMA: + if (p->rdev->family >= CHIP_CAYMAN) { + if (p->priority > 0) + p->ring = R600_RING_TYPE_DMA_INDEX; + else + p->ring = CAYMAN_RING_TYPE_DMA1_INDEX; + } else if (p->rdev->family >= CHIP_R600) { + p->ring = R600_RING_TYPE_DMA_INDEX; + } else { + return -EINVAL; + } + break; } return 0; } -- cgit v1.1 From 8696e33f06b0c52195152cc6a0e3d52233f486c1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 13 Dec 2012 18:57:07 -0500 Subject: drm/radeon: bump version for CS ioctl support for async DMA Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 12e9912..9b1a727 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -67,9 +67,10 @@ * 2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures * 2.25.0 - eg+: new info request for num SE and num SH * 2.26.0 - r600-eg: fix htile size computation + * 2.27.0 - r600-SI: Add CS ioctl support for async DMA */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 26 +#define KMS_DRIVER_MINOR 27 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); -- cgit v1.1 From 9d89d78e3a20980205966fba6345645547e59ceb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 14 Dec 2012 00:23:06 -0500 Subject: drm/radeon: add more pedantic checks in the CP DMA checker non-mem-to-mem transfers require dw aligned byte count. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 9a9d3ae..74c6b42 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -2256,6 +2256,18 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, command = radeon_get_ib_value(p, idx+4); size = command & 0x1fffff; info = radeon_get_ib_value(p, idx+1); + if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */ + (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */ + ((((info & 0x00300000) >> 20) == 0) && + (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */ + ((((info & 0x60000000) >> 29) == 0) && + (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ + /* non mem to mem copies requires dw aligned count */ + if (size % 4) { + DRM_ERROR("CP DMA command requires dw count alignment\n"); + return -EINVAL; + } + } if (command & PACKET3_CP_DMA_CMD_SAS) { /* src address space is register */ /* GDS is ok */ @@ -3472,6 +3484,18 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, case PACKET3_CP_DMA: command = ib[idx + 4]; info = ib[idx + 1]; + if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */ + (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */ + ((((info & 0x00300000) >> 20) == 0) && + (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */ + ((((info & 0x60000000) >> 29) == 0) && + (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ + /* non mem to mem copies requires dw aligned count */ + if ((command & 0x1fffff) % 4) { + DRM_ERROR("CP DMA command requires dw count alignment\n"); + return -EINVAL; + } + } if (command & PACKET3_CP_DMA_CMD_SAS) { /* src address space is register */ if (((info & 0x60000000) >> 29) == 0) { -- cgit v1.1 From dd54fee7d440c4a9756cce2c24a50c15e4c17ccb Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 14 Dec 2012 21:04:46 +1000 Subject: radeon: fix regression with eviction since evict caching changes Since 0d0b3e7443bed6b49cb90fe7ddc4b5578a83a88d drm/radeon: use cached memory when evicting for vram on non agp evicting from TTM would try and evict to TTM instead of system, not so good. This should fix: https://bugs.freedesktop.org/show_bug.cgi?id=58272 Signed-off-by: Dave Airlie Signed-off-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 93d3445..883c95d 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -96,9 +96,9 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) } if (domain & RADEON_GEM_DOMAIN_CPU) { if (rbo->rdev->flags & RADEON_IS_AGP) { - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT; + rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM; } else { - rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; + rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM; } } if (!c) -- cgit v1.1