summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/radeon/Makefile5
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c1
-rw-r--r--drivers/gpu/drm/radeon/avivod.h (renamed from drivers/gpu/drm/radeon/r300.h)42
-rw-r--r--drivers/gpu/drm/radeon/r100.c135
-rw-r--r--drivers/gpu/drm/radeon/r100d.h76
-rw-r--r--drivers/gpu/drm/radeon/r300.c3
-rw-r--r--drivers/gpu/drm/radeon/r300d.h76
-rw-r--r--drivers/gpu/drm/radeon/r600.c1714
-rw-r--r--drivers/gpu/drm/radeon/r600_blit.c855
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_kms.c777
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_shaders.c1072
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_shaders.h14
-rw-r--r--drivers/gpu/drm/radeon/r600_cp.c252
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c658
-rw-r--r--drivers/gpu/drm/radeon/r600d.h661
-rw-r--r--drivers/gpu/drm/radeon/radeon.h120
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h156
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_clocks.c10
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c340
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.h141
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c54
-rw-r--r--drivers/gpu/drm/radeon/radeon_reg.h18
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c119
-rw-r--r--drivers/gpu/drm/radeon/radeon_share.h77
-rw-r--r--drivers/gpu/drm/radeon/radeon_state.c18
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c7
-rw-r--r--drivers/gpu/drm/radeon/rs400.c2
-rw-r--r--drivers/gpu/drm/radeon/rs780.c102
-rw-r--r--drivers/gpu/drm/radeon/rv515.c2
-rw-r--r--drivers/gpu/drm/radeon/rv515d.h (renamed from drivers/gpu/drm/radeon/rv515r.h)56
-rw-r--r--drivers/gpu/drm/radeon/rv770.c987
-rw-r--r--drivers/gpu/drm/radeon/rv770d.h341
33 files changed, 8289 insertions, 606 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index c5db0c4..14c3fe6 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -46,8 +46,9 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \
radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \
radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \
radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
- rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \
- radeon_test.o r200.o radeon_legacy_tv.o
+ rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
+ r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
+ r600_blit_kms.o
radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 8e31e99..a7edd0f 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -389,6 +389,7 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
pll_flags |= RADEON_PLL_USE_REF_DIV;
}
radeon_encoder = to_radeon_encoder(encoder);
+ break;
}
}
diff --git a/drivers/gpu/drm/radeon/r300.h b/drivers/gpu/drm/radeon/avivod.h
index 8486b4d..d4e6e6e 100644
--- a/drivers/gpu/drm/radeon/r300.h
+++ b/drivers/gpu/drm/radeon/avivod.h
@@ -1,7 +1,6 @@
/*
- * Copyright 2008 Advanced Micro Devices, Inc.
- * Copyright 2008 Red Hat Inc.
- * Copyright 2009 Jerome Glisse.
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,12 +24,37 @@
* Alex Deucher
* Jerome Glisse
*/
-#ifndef R300_H
-#define R300_H
+#ifndef AVIVOD_H
+#define AVIVOD_H
-struct r300_asic {
- const unsigned *reg_safe_bm;
- unsigned reg_safe_bm_size;
-};
+
+#define D1CRTC_CONTROL 0x6080
+#define CRTC_EN (1 << 0)
+#define D1CRTC_UPDATE_LOCK 0x60E8
+#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110
+#define D1GRPH_SECONDARY_SURFACE_ADDRESS 0x6118
+
+#define D2CRTC_CONTROL 0x6880
+#define D2CRTC_UPDATE_LOCK 0x68E8
+#define D2GRPH_PRIMARY_SURFACE_ADDRESS 0x6910
+#define D2GRPH_SECONDARY_SURFACE_ADDRESS 0x6918
+
+#define D1VGA_CONTROL 0x0330
+#define DVGA_CONTROL_MODE_ENABLE (1 << 0)
+#define DVGA_CONTROL_TIMING_SELECT (1 << 8)
+#define DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9)
+#define DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10)
+#define DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16)
+#define DVGA_CONTROL_ROTATE (1 << 24)
+#define D2VGA_CONTROL 0x0338
+
+#define VGA_HDP_CONTROL 0x328
+#define VGA_MEM_PAGE_SELECT_EN (1 << 0)
+#define VGA_MEMORY_DISABLE (1 << 4)
+#define VGA_RBBM_LOCK_DISABLE (1 << 8)
+#define VGA_SOFT_RESET (1 << 16)
+#define VGA_MEMORY_BASE_ADDRESS 0x0310
+#define VGA_RENDER_CONTROL 0x0300
+#define VGA_VSTATUS_CNTL_MASK 0x00030000
#endif
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index ee3ab62..5708c07 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -31,6 +31,8 @@
#include "radeon_drm.h"
#include "radeon_reg.h"
#include "radeon.h"
+#include "r100d.h"
+
#include <linux/firmware.h>
#include <linux/platform_device.h>
@@ -391,9 +393,9 @@ int r100_wb_init(struct radeon_device *rdev)
return r;
}
}
- WREG32(0x774, rdev->wb.gpu_addr);
- WREG32(0x70C, rdev->wb.gpu_addr + 1024);
- WREG32(0x770, 0xff);
+ WREG32(RADEON_SCRATCH_ADDR, rdev->wb.gpu_addr);
+ WREG32(RADEON_CP_RB_RPTR_ADDR, rdev->wb.gpu_addr + 1024);
+ WREG32(RADEON_SCRATCH_UMSK, 0xff);
return 0;
}
@@ -559,18 +561,18 @@ static int r100_cp_init_microcode(struct radeon_device *rdev)
fw_name = FIRMWARE_R520;
}
- err = request_firmware(&rdev->fw, fw_name, &pdev->dev);
+ err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
platform_device_unregister(pdev);
if (err) {
printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
fw_name);
- } else if (rdev->fw->size % 8) {
+ } else if (rdev->me_fw->size % 8) {
printk(KERN_ERR
"radeon_cp: Bogus length %zu in firmware \"%s\"\n",
- rdev->fw->size, fw_name);
+ rdev->me_fw->size, fw_name);
err = -EINVAL;
- release_firmware(rdev->fw);
- rdev->fw = NULL;
+ release_firmware(rdev->me_fw);
+ rdev->me_fw = NULL;
}
return err;
}
@@ -584,9 +586,9 @@ static void r100_cp_load_microcode(struct radeon_device *rdev)
"programming pipes. Bad things might happen.\n");
}
- if (rdev->fw) {
- size = rdev->fw->size / 4;
- fw_data = (const __be32 *)&rdev->fw->data[0];
+ if (rdev->me_fw) {
+ size = rdev->me_fw->size / 4;
+ fw_data = (const __be32 *)&rdev->me_fw->data[0];
WREG32(RADEON_CP_ME_RAM_ADDR, 0);
for (i = 0; i < size; i += 2) {
WREG32(RADEON_CP_ME_RAM_DATAH,
@@ -632,7 +634,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
}
- if (!rdev->fw) {
+ if (!rdev->me_fw) {
r = r100_cp_init_microcode(rdev);
if (r) {
DRM_ERROR("Failed to load firmware!\n");
@@ -765,6 +767,12 @@ int r100_cp_reset(struct radeon_device *rdev)
return -1;
}
+void r100_cp_commit(struct radeon_device *rdev)
+{
+ WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
+ (void)RREG32(RADEON_CP_RB_WPTR);
+}
+
/*
* CS functions
@@ -2954,3 +2962,106 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track
}
}
}
+
+int r100_ring_test(struct radeon_device *rdev)
+{
+ uint32_t scratch;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ r = radeon_scratch_get(rdev, &scratch);
+ if (r) {
+ DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
+ return r;
+ }
+ WREG32(scratch, 0xCAFEDEAD);
+ r = radeon_ring_lock(rdev, 2);
+ if (r) {
+ DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+ radeon_scratch_free(rdev, scratch);
+ return r;
+ }
+ radeon_ring_write(rdev, PACKET0(scratch, 0));
+ radeon_ring_write(rdev, 0xDEADBEEF);
+ radeon_ring_unlock_commit(rdev);
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF) {
+ break;
+ }
+ DRM_UDELAY(1);
+ }
+ if (i < rdev->usec_timeout) {
+ DRM_INFO("ring test succeeded in %d usecs\n", i);
+ } else {
+ DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
+ scratch, tmp);
+ r = -EINVAL;
+ }
+ radeon_scratch_free(rdev, scratch);
+ return r;
+}
+
+void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+ radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
+ radeon_ring_write(rdev, ib->gpu_addr);
+ radeon_ring_write(rdev, ib->length_dw);
+}
+
+int r100_ib_test(struct radeon_device *rdev)
+{
+ struct radeon_ib *ib;
+ uint32_t scratch;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ r = radeon_scratch_get(rdev, &scratch);
+ if (r) {
+ DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
+ return r;
+ }
+ WREG32(scratch, 0xCAFEDEAD);
+ r = radeon_ib_get(rdev, &ib);
+ if (r) {
+ return r;
+ }
+ ib->ptr[0] = PACKET0(scratch, 0);
+ ib->ptr[1] = 0xDEADBEEF;
+ ib->ptr[2] = PACKET2(0);
+ ib->ptr[3] = PACKET2(0);
+ ib->ptr[4] = PACKET2(0);
+ ib->ptr[5] = PACKET2(0);
+ ib->ptr[6] = PACKET2(0);
+ ib->ptr[7] = PACKET2(0);
+ ib->length_dw = 8;
+ r = radeon_ib_schedule(rdev, ib);
+ if (r) {
+ radeon_scratch_free(rdev, scratch);
+ radeon_ib_free(rdev, &ib);
+ return r;
+ }
+ r = radeon_fence_wait(ib->fence, false);
+ if (r) {
+ return r;
+ }
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF) {
+ break;
+ }
+ DRM_UDELAY(1);
+ }
+ if (i < rdev->usec_timeout) {
+ DRM_INFO("ib test succeeded in %u usecs\n", i);
+ } else {
+ DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
+ scratch, tmp);
+ r = -EINVAL;
+ }
+ radeon_scratch_free(rdev, scratch);
+ radeon_ib_free(rdev, &ib);
+ return r;
+}
diff --git a/drivers/gpu/drm/radeon/r100d.h b/drivers/gpu/drm/radeon/r100d.h
new file mode 100644
index 0000000..6da7d92
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r100d.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+#ifndef __R100D_H__
+#define __R100D_H__
+
+#define CP_PACKET0 0x00000000
+#define PACKET0_BASE_INDEX_SHIFT 0
+#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0)
+#define PACKET0_COUNT_SHIFT 16
+#define PACKET0_COUNT_MASK (0x3fff << 16)
+#define CP_PACKET1 0x40000000
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+#define CP_PACKET3 0xC0000000
+#define PACKET3_IT_OPCODE_SHIFT 8
+#define PACKET3_IT_OPCODE_MASK (0xff << 8)
+#define PACKET3_COUNT_SHIFT 16
+#define PACKET3_COUNT_MASK (0x3fff << 16)
+/* PACKET3 op code */
+#define PACKET3_NOP 0x10
+#define PACKET3_3D_DRAW_VBUF 0x28
+#define PACKET3_3D_DRAW_IMMD 0x29
+#define PACKET3_3D_DRAW_INDX 0x2A
+#define PACKET3_3D_LOAD_VBPNTR 0x2F
+#define PACKET3_INDX_BUFFER 0x33
+#define PACKET3_3D_DRAW_VBUF_2 0x34
+#define PACKET3_3D_DRAW_IMMD_2 0x35
+#define PACKET3_3D_DRAW_INDX_2 0x36
+#define PACKET3_BITBLT_MULTI 0x9B
+
+#define PACKET0(reg, n) (CP_PACKET0 | \
+ REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \
+ REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n) (CP_PACKET3 | \
+ REG_SET(PACKET3_IT_OPCODE, (op)) | \
+ REG_SET(PACKET3_COUNT, (n)))
+
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 33a2c55..a5f82f7 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -33,6 +33,7 @@
#include "radeon_drm.h"
#include "radeon_share.h"
#include "r100_track.h"
+#include "r300d.h"
#include "r300_reg_safe.h"
@@ -127,7 +128,7 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev)
WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
rv370_pcie_gart_tlb_flush(rdev);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
- rdev->mc.gtt_size >> 20, table_addr);
+ (unsigned)(rdev->mc.gtt_size >> 20), table_addr);
rdev->gart.ready = true;
return 0;
}
diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h
new file mode 100644
index 0000000..63ec076
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r300d.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+#ifndef __R300D_H__
+#define __R300D_H__
+
+#define CP_PACKET0 0x00000000
+#define PACKET0_BASE_INDEX_SHIFT 0
+#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0)
+#define PACKET0_COUNT_SHIFT 16
+#define PACKET0_COUNT_MASK (0x3fff << 16)
+#define CP_PACKET1 0x40000000
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+#define CP_PACKET3 0xC0000000
+#define PACKET3_IT_OPCODE_SHIFT 8
+#define PACKET3_IT_OPCODE_MASK (0xff << 8)
+#define PACKET3_COUNT_SHIFT 16
+#define PACKET3_COUNT_MASK (0x3fff << 16)
+/* PACKET3 op code */
+#define PACKET3_NOP 0x10
+#define PACKET3_3D_DRAW_VBUF 0x28
+#define PACKET3_3D_DRAW_IMMD 0x29
+#define PACKET3_3D_DRAW_INDX 0x2A
+#define PACKET3_3D_LOAD_VBPNTR 0x2F
+#define PACKET3_INDX_BUFFER 0x33
+#define PACKET3_3D_DRAW_VBUF_2 0x34
+#define PACKET3_3D_DRAW_IMMD_2 0x35
+#define PACKET3_3D_DRAW_INDX_2 0x36
+#define PACKET3_BITBLT_MULTI 0x9B
+
+#define PACKET0(reg, n) (CP_PACKET0 | \
+ REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \
+ REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n) (CP_PACKET3 | \
+ REG_SET(PACKET3_IT_OPCODE, (op)) | \
+ REG_SET(PACKET3_COUNT, (n)))
+
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 538cd90..d8fcef4 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -25,12 +25,46 @@
* Alex Deucher
* Jerome Glisse
*/
+#include <linux/seq_file.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
#include "drmP.h"
-#include "radeon_reg.h"
+#include "radeon_drm.h"
#include "radeon.h"
+#include "radeon_mode.h"
+#include "radeon_share.h"
+#include "r600d.h"
+#include "avivod.h"
+#include "atom.h"
-/* r600,rv610,rv630,rv620,rv635,rv670 depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
+#define PFP_UCODE_SIZE 576
+#define PM4_UCODE_SIZE 1792
+#define R700_PFP_UCODE_SIZE 848
+#define R700_PM4_UCODE_SIZE 1360
+
+/* Firmware Names */
+MODULE_FIRMWARE("radeon/R600_pfp.bin");
+MODULE_FIRMWARE("radeon/R600_me.bin");
+MODULE_FIRMWARE("radeon/RV610_pfp.bin");
+MODULE_FIRMWARE("radeon/RV610_me.bin");
+MODULE_FIRMWARE("radeon/RV630_pfp.bin");
+MODULE_FIRMWARE("radeon/RV630_me.bin");
+MODULE_FIRMWARE("radeon/RV620_pfp.bin");
+MODULE_FIRMWARE("radeon/RV620_me.bin");
+MODULE_FIRMWARE("radeon/RV635_pfp.bin");
+MODULE_FIRMWARE("radeon/RV635_me.bin");
+MODULE_FIRMWARE("radeon/RV670_pfp.bin");
+MODULE_FIRMWARE("radeon/RV670_me.bin");
+MODULE_FIRMWARE("radeon/RS780_pfp.bin");
+MODULE_FIRMWARE("radeon/RS780_me.bin");
+MODULE_FIRMWARE("radeon/RV770_pfp.bin");
+MODULE_FIRMWARE("radeon/RV770_me.bin");
+MODULE_FIRMWARE("radeon/RV730_pfp.bin");
+MODULE_FIRMWARE("radeon/RV730_me.bin");
+MODULE_FIRMWARE("radeon/RV710_pfp.bin");
+MODULE_FIRMWARE("radeon/RV710_me.bin");
+
+int r600_debugfs_mc_info_init(struct radeon_device *rdev);
/* This files gather functions specifics to:
* r600,rv610,rv630,rv620,rv635,rv670
@@ -39,87 +73,270 @@ void rs600_mc_disable_clients(struct radeon_device *rdev);
*/
int r600_mc_wait_for_idle(struct radeon_device *rdev);
void r600_gpu_init(struct radeon_device *rdev);
+void r600_fini(struct radeon_device *rdev);
/*
- * MC
+ * R600 PCIE GART
*/
-int r600_mc_init(struct radeon_device *rdev)
+int r600_gart_clear_page(struct radeon_device *rdev, int i)
{
- uint32_t tmp;
+ void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
+ u64 pte;
- r600_gpu_init(rdev);
+ if (i < 0 || i > rdev->gart.num_gpu_pages)
+ return -EINVAL;
+ pte = 0;
+ writeq(pte, ((void __iomem *)ptr) + (i * 8));
+ return 0;
+}
- /* setup the gart before changing location so we can ask to
- * discard unmapped mc request
- */
- /* FIXME: disable out of gart access */
- tmp = rdev->mc.gtt_location / 4096;
- tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
- WREG32(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp);
- tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096;
- tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
- WREG32(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp);
-
- rs600_mc_disable_clients(rdev);
- if (r600_mc_wait_for_idle(rdev)) {
- printk(KERN_WARNING "Failed to wait MC idle while "
- "programming pipes. Bad things might happen.\n");
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev)
+{
+ unsigned i;
+ u32 tmp;
+
+ WREG32(VM_CONTEXT0_INVALIDATION_LOW_ADDR, rdev->mc.gtt_start >> 12);
+ WREG32(VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+ WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1));
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(VM_CONTEXT0_REQUEST_RESPONSE);
+ tmp = (tmp & RESPONSE_TYPE_MASK) >> RESPONSE_TYPE_SHIFT;
+ if (tmp == 2) {
+ printk(KERN_WARNING "[drm] r600 flush TLB failed\n");
+ return;
+ }
+ if (tmp) {
+ return;
+ }
+ udelay(1);
+ }
+}
+
+int r600_pcie_gart_enable(struct radeon_device *rdev)
+{
+ u32 tmp;
+ int r, i;
+
+ /* Initialize common gart structure */
+ r = radeon_gart_init(rdev);
+ if (r) {
+ return r;
+ }
+ rdev->gart.table_size = rdev->gart.num_gpu_pages * 8;
+ r = radeon_gart_table_vram_alloc(rdev);
+ if (r) {
+ return r;
}
+ for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+ r600_gart_clear_page(rdev, i);
+ /* Setup L2 cache */
+ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+ ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+ EFFECTIVE_L2_QUEUE_SIZE(7));
+ WREG32(VM_L2_CNTL2, 0);
+ WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
+ /* Setup TLB control */
+ tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+ SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+ EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
+ ENABLE_WAIT_L2_QUERY;
+ WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp | ENABLE_L1_STRICT_ORDERING);
+ WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
+ WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
+ WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+ WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+ WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+ WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+ WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+ (u32)(rdev->dummy_page.addr >> 12));
+ for (i = 1; i < 7; i++)
+ WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
- tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
- tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24);
- tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24);
- WREG32(R600_MC_VM_FB_LOCATION, tmp);
- tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
- tmp = REG_SET(R600_MC_AGP_TOP, tmp >> 22);
- WREG32(R600_MC_VM_AGP_TOP, tmp);
- tmp = REG_SET(R600_MC_AGP_BOT, rdev->mc.gtt_location >> 22);
- WREG32(R600_MC_VM_AGP_BOT, tmp);
+ r600_pcie_gart_tlb_flush(rdev);
+ rdev->gart.ready = true;
return 0;
}
-void r600_mc_fini(struct radeon_device *rdev)
+void r600_pcie_gart_disable(struct radeon_device *rdev)
{
- /* FIXME: implement */
-}
+ u32 tmp;
+ int i;
+ /* Clear ptes*/
+ for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+ r600_gart_clear_page(rdev, i);
+ r600_pcie_gart_tlb_flush(rdev);
+ /* Disable all tables */
+ for (i = 0; i < 7; i++)
+ WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
-/*
- * Global GPU functions
- */
-void r600_errata(struct radeon_device *rdev)
-{
- rdev->pll_errata = 0;
+ /* Disable L2 cache */
+ WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
+ EFFECTIVE_L2_QUEUE_SIZE(7));
+ WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
+ /* Setup L1 TLB control */
+ tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
+ ENABLE_WAIT_L2_QUERY;
+ WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp);
+ WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
}
int r600_mc_wait_for_idle(struct radeon_device *rdev)
{
- /* FIXME: implement */
- return 0;
+ unsigned i;
+ u32 tmp;
+
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(R_000E50_SRBM_STATUS) & 0x3F00;
+ if (!tmp)
+ return 0;
+ udelay(1);
+ }
+ return -1;
}
-void r600_gpu_init(struct radeon_device *rdev)
+static void r600_mc_resume(struct radeon_device *rdev)
{
- /* FIXME: implement */
-}
+ u32 d1vga_control, d2vga_control;
+ u32 vga_render_control, vga_hdp_control;
+ u32 d1crtc_control, d2crtc_control;
+ u32 new_d1grph_primary, new_d1grph_secondary;
+ u32 new_d2grph_primary, new_d2grph_secondary;
+ u64 old_vram_start;
+ u32 tmp;
+ int i, j;
+ /* Initialize HDP */
+ for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+ WREG32((0x2c14 + j), 0x00000000);
+ WREG32((0x2c18 + j), 0x00000000);
+ WREG32((0x2c1c + j), 0x00000000);
+ WREG32((0x2c20 + j), 0x00000000);
+ WREG32((0x2c24 + j), 0x00000000);
+ }
+ WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
-/*
- * VRAM info
- */
-void r600_vram_get_type(struct radeon_device *rdev)
+ d1vga_control = RREG32(D1VGA_CONTROL);
+ d2vga_control = RREG32(D2VGA_CONTROL);
+ vga_render_control = RREG32(VGA_RENDER_CONTROL);
+ vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+ d1crtc_control = RREG32(D1CRTC_CONTROL);
+ d2crtc_control = RREG32(D2CRTC_CONTROL);
+ old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
+ new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS);
+ new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS);
+ new_d1grph_primary += rdev->mc.vram_start - old_vram_start;
+ new_d1grph_secondary += rdev->mc.vram_start - old_vram_start;
+ new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS);
+ new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS);
+ new_d2grph_primary += rdev->mc.vram_start - old_vram_start;
+ new_d2grph_secondary += rdev->mc.vram_start - old_vram_start;
+
+ /* Stop all video */
+ WREG32(D1VGA_CONTROL, 0);
+ WREG32(D2VGA_CONTROL, 0);
+ WREG32(VGA_RENDER_CONTROL, 0);
+ WREG32(D1CRTC_UPDATE_LOCK, 1);
+ WREG32(D2CRTC_UPDATE_LOCK, 1);
+ WREG32(D1CRTC_CONTROL, 0);
+ WREG32(D2CRTC_CONTROL, 0);
+ WREG32(D1CRTC_UPDATE_LOCK, 0);
+ WREG32(D2CRTC_UPDATE_LOCK, 0);
+
+ mdelay(1);
+ if (r600_mc_wait_for_idle(rdev)) {
+ printk(KERN_WARNING "[drm] MC not idle !\n");
+ }
+
+ /* Lockout access through VGA aperture*/
+ WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+
+ /* Update configuration */
+ WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12);
+ WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12);
+ WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
+ tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16;
+ tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+ WREG32(MC_VM_FB_LOCATION, tmp);
+ WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+ WREG32(HDP_NONSURFACE_INFO, (2 << 7));
+ WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF);
+ if (rdev->flags & RADEON_IS_AGP) {
+ WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16);
+ WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
+ WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
+ } else {
+ WREG32(MC_VM_AGP_BASE, 0);
+ WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+ WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+ }
+ WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary);
+ WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary);
+ WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary);
+ WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary);
+ WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
+
+ /* Unlock host access */
+ WREG32(VGA_HDP_CONTROL, vga_hdp_control);
+
+ mdelay(1);
+ if (r600_mc_wait_for_idle(rdev)) {
+ printk(KERN_WARNING "[drm] MC not idle !\n");
+ }
+
+ /* Restore video state */
+ WREG32(D1CRTC_UPDATE_LOCK, 1);
+ WREG32(D2CRTC_UPDATE_LOCK, 1);
+ WREG32(D1CRTC_CONTROL, d1crtc_control);
+ WREG32(D2CRTC_CONTROL, d2crtc_control);
+ WREG32(D1CRTC_UPDATE_LOCK, 0);
+ WREG32(D2CRTC_UPDATE_LOCK, 0);
+ WREG32(D1VGA_CONTROL, d1vga_control);
+ WREG32(D2VGA_CONTROL, d2vga_control);
+ WREG32(VGA_RENDER_CONTROL, vga_render_control);
+}
+
+int r600_mc_init(struct radeon_device *rdev)
{
- uint32_t tmp;
+ fixed20_12 a;
+ u32 tmp;
int chansize;
+ int r;
+ /* Get VRAM informations */
rdev->mc.vram_width = 128;
rdev->mc.vram_is_ddr = true;
-
- tmp = RREG32(R600_RAMCFG);
- if (tmp & R600_CHANSIZE_OVERRIDE) {
+ tmp = RREG32(RAMCFG);
+ if (tmp & CHANSIZE_OVERRIDE) {
chansize = 16;
- } else if (tmp & R600_CHANSIZE) {
+ } else if (tmp & CHANSIZE_MASK) {
chansize = 64;
} else {
chansize = 32;
@@ -135,36 +352,1391 @@ void r600_vram_get_type(struct radeon_device *rdev)
(rdev->family == CHIP_RV635)) {
rdev->mc.vram_width = 2 * chansize;
}
+ /* Could aper size report 0 ? */
+ rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+ rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+ /* Setup GPU memory space */
+ rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
+ rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
+ if (rdev->flags & RADEON_IS_AGP) {
+ r = radeon_agp_init(rdev);
+ if (r)
+ return r;
+ /* gtt_size is setup by radeon_agp_init */
+ rdev->mc.gtt_location = rdev->mc.agp_base;
+ tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
+ /* Try to put vram before or after AGP because we
+ * we want SYSTEM_APERTURE to cover both VRAM and
+ * AGP so that GPU can catch out of VRAM/AGP access
+ */
+ if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
+ /* Enought place before */
+ rdev->mc.vram_location = rdev->mc.gtt_location -
+ rdev->mc.mc_vram_size;
+ } else if (tmp > rdev->mc.mc_vram_size) {
+ /* Enought place after */
+ rdev->mc.vram_location = rdev->mc.gtt_location +
+ rdev->mc.gtt_size;
+ } else {
+ /* Try to setup VRAM then AGP might not
+ * not work on some card
+ */
+ rdev->mc.vram_location = 0x00000000UL;
+ rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+ }
+ } else {
+ if (rdev->family == CHIP_RS780 || rdev->family == CHIP_RS880) {
+ rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) &
+ 0xFFFF) << 24;
+ rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+ tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+ if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
+ /* Enough place after vram */
+ rdev->mc.gtt_location = tmp;
+ } else if (rdev->mc.vram_location >= rdev->mc.gtt_size) {
+ /* Enough place before vram */
+ rdev->mc.gtt_location = 0;
+ } else {
+ /* Not enough place after or before shrink
+ * gart size
+ */
+ if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) {
+ rdev->mc.gtt_location = 0;
+ rdev->mc.gtt_size = rdev->mc.vram_location;
+ } else {
+ rdev->mc.gtt_location = tmp;
+ rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp;
+ }
+ }
+ rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+ } else {
+ rdev->mc.vram_location = 0x00000000UL;
+ rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+ rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+ }
+ }
+ rdev->mc.vram_start = rdev->mc.vram_location;
+ rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+ rdev->mc.gtt_start = rdev->mc.gtt_location;
+ rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size;
+ /* FIXME: we should enforce default clock in case GPU is not in
+ * default setup
+ */
+ a.full = rfixed_const(100);
+ rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+ rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+ return 0;
}
-void r600_vram_info(struct radeon_device *rdev)
+/* We doesn't check that the GPU really needs a reset we simply do the
+ * reset, it's up to the caller to determine if the GPU needs one. We
+ * might add an helper function to check that.
+ */
+int r600_gpu_soft_reset(struct radeon_device *rdev)
{
- r600_vram_get_type(rdev);
- rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE);
- rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
+ u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) |
+ S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) |
+ S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) |
+ S_008010_SH_BUSY(1) | S_008010_SPI03_BUSY(1) |
+ S_008010_SMX_BUSY(1) | S_008010_SC_BUSY(1) |
+ S_008010_PA_BUSY(1) | S_008010_DB03_BUSY(1) |
+ S_008010_CR_BUSY(1) | S_008010_CB03_BUSY(1) |
+ S_008010_GUI_ACTIVE(1);
+ u32 grbm2_busy_mask = S_008014_SPI0_BUSY(1) | S_008014_SPI1_BUSY(1) |
+ S_008014_SPI2_BUSY(1) | S_008014_SPI3_BUSY(1) |
+ S_008014_TA0_BUSY(1) | S_008014_TA1_BUSY(1) |
+ S_008014_TA2_BUSY(1) | S_008014_TA3_BUSY(1) |
+ S_008014_DB0_BUSY(1) | S_008014_DB1_BUSY(1) |
+ S_008014_DB2_BUSY(1) | S_008014_DB3_BUSY(1) |
+ S_008014_CB0_BUSY(1) | S_008014_CB1_BUSY(1) |
+ S_008014_CB2_BUSY(1) | S_008014_CB3_BUSY(1);
+ u32 srbm_reset = 0;
- /* Could aper size report 0 ? */
- rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
- rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+ /* Disable CP parsing/prefetching */
+ WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(0xff));
+ /* Check if any of the rendering block is busy and reset it */
+ if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) ||
+ (RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) {
+ WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CR(1) |
+ S_008020_SOFT_RESET_DB(1) |
+ S_008020_SOFT_RESET_CB(1) |
+ S_008020_SOFT_RESET_PA(1) |
+ S_008020_SOFT_RESET_SC(1) |
+ S_008020_SOFT_RESET_SMX(1) |
+ S_008020_SOFT_RESET_SPI(1) |
+ S_008020_SOFT_RESET_SX(1) |
+ S_008020_SOFT_RESET_SH(1) |
+ S_008020_SOFT_RESET_TC(1) |
+ S_008020_SOFT_RESET_TA(1) |
+ S_008020_SOFT_RESET_VC(1) |
+ S_008020_SOFT_RESET_VGT(1));
+ (void)RREG32(R_008020_GRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(R_008020_GRBM_SOFT_RESET, 0);
+ (void)RREG32(R_008020_GRBM_SOFT_RESET);
+ }
+ /* Reset CP (we always reset CP) */
+ WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CP(1));
+ (void)RREG32(R_008020_GRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(R_008020_GRBM_SOFT_RESET, 0);
+ (void)RREG32(R_008020_GRBM_SOFT_RESET);
+ /* Reset others GPU block if necessary */
+ if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
+ if (G_000E50_GRBM_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_GRBM(1);
+ if (G_000E50_HI_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_IH(1);
+ if (G_000E50_VMC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_VMC(1);
+ if (G_000E50_MCB_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+ if (G_000E50_MCDZ_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+ if (G_000E50_MCDY_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+ if (G_000E50_MCDX_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+ if (G_000E50_MCDW_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+ if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
+ if (G_000E50_SEM_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+ srbm_reset |= S_000E60_SOFT_RESET_SEM(1);
+ WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
+ (void)RREG32(R_000E60_SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(R_000E60_SRBM_SOFT_RESET, 0);
+ (void)RREG32(R_000E60_SRBM_SOFT_RESET);
+ /* Wait a little for things to settle down */
+ udelay(50);
+ return 0;
+}
+
+int r600_gpu_reset(struct radeon_device *rdev)
+{
+ return r600_gpu_soft_reset(rdev);
+}
+
+static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+ u32 num_backends,
+ u32 backend_disable_mask)
+{
+ u32 backend_map = 0;
+ u32 enabled_backends_mask;
+ u32 enabled_backends_count;
+ u32 cur_pipe;
+ u32 swizzle_pipe[R6XX_MAX_PIPES];
+ u32 cur_backend;
+ u32 i;
+
+ if (num_tile_pipes > R6XX_MAX_PIPES)
+ num_tile_pipes = R6XX_MAX_PIPES;
+ if (num_tile_pipes < 1)
+ num_tile_pipes = 1;
+ if (num_backends > R6XX_MAX_BACKENDS)
+ num_backends = R6XX_MAX_BACKENDS;
+ if (num_backends < 1)
+ num_backends = 1;
+
+ enabled_backends_mask = 0;
+ enabled_backends_count = 0;
+ for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
+ if (((backend_disable_mask >> i) & 1) == 0) {
+ enabled_backends_mask |= (1 << i);
+ ++enabled_backends_count;
+ }
+ if (enabled_backends_count == num_backends)
+ break;
+ }
+
+ if (enabled_backends_count == 0) {
+ enabled_backends_mask = 1;
+ enabled_backends_count = 1;
+ }
+
+ if (enabled_backends_count != num_backends)
+ num_backends = enabled_backends_count;
+
+ memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
+ switch (num_tile_pipes) {
+ case 1:
+ swizzle_pipe[0] = 0;
+ break;
+ case 2:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 1;
+ break;
+ case 3:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 1;
+ swizzle_pipe[2] = 2;
+ break;
+ case 4:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 1;
+ swizzle_pipe[2] = 2;
+ swizzle_pipe[3] = 3;
+ break;
+ case 5:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 1;
+ swizzle_pipe[2] = 2;
+ swizzle_pipe[3] = 3;
+ swizzle_pipe[4] = 4;
+ break;
+ case 6:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 2;
+ swizzle_pipe[2] = 4;
+ swizzle_pipe[3] = 5;
+ swizzle_pipe[4] = 1;
+ swizzle_pipe[5] = 3;
+ break;
+ case 7:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 2;
+ swizzle_pipe[2] = 4;
+ swizzle_pipe[3] = 6;
+ swizzle_pipe[4] = 1;
+ swizzle_pipe[5] = 3;
+ swizzle_pipe[6] = 5;
+ break;
+ case 8:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 2;
+ swizzle_pipe[2] = 4;
+ swizzle_pipe[3] = 6;
+ swizzle_pipe[4] = 1;
+ swizzle_pipe[5] = 3;
+ swizzle_pipe[6] = 5;
+ swizzle_pipe[7] = 7;
+ break;
+ }
+
+ cur_backend = 0;
+ for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
+ while (((1 << cur_backend) & enabled_backends_mask) == 0)
+ cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
+
+ backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
+
+ cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
+ }
+
+ return backend_map;
+}
+
+int r600_count_pipe_bits(uint32_t val)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < 32; i++) {
+ ret += val & 1;
+ val >>= 1;
+ }
+ return ret;
}
+void r600_gpu_init(struct radeon_device *rdev)
+{
+ u32 tiling_config;
+ u32 ramcfg;
+ u32 tmp;
+ int i, j;
+ u32 sq_config;
+ u32 sq_gpr_resource_mgmt_1 = 0;
+ u32 sq_gpr_resource_mgmt_2 = 0;
+ u32 sq_thread_resource_mgmt = 0;
+ u32 sq_stack_resource_mgmt_1 = 0;
+ u32 sq_stack_resource_mgmt_2 = 0;
+
+ /* FIXME: implement */
+ switch (rdev->family) {
+ case CHIP_R600:
+ rdev->config.r600.max_pipes = 4;
+ rdev->config.r600.max_tile_pipes = 8;
+ rdev->config.r600.max_simds = 4;
+ rdev->config.r600.max_backends = 4;
+ rdev->config.r600.max_gprs = 256;
+ rdev->config.r600.max_threads = 192;
+ rdev->config.r600.max_stack_entries = 256;
+ rdev->config.r600.max_hw_contexts = 8;
+ rdev->config.r600.max_gs_threads = 16;
+ rdev->config.r600.sx_max_export_size = 128;
+ rdev->config.r600.sx_max_export_pos_size = 16;
+ rdev->config.r600.sx_max_export_smx_size = 128;
+ rdev->config.r600.sq_num_cf_insts = 2;
+ break;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ rdev->config.r600.max_pipes = 2;
+ rdev->config.r600.max_tile_pipes = 2;
+ rdev->config.r600.max_simds = 3;
+ rdev->config.r600.max_backends = 1;
+ rdev->config.r600.max_gprs = 128;
+ rdev->config.r600.max_threads = 192;
+ rdev->config.r600.max_stack_entries = 128;
+ rdev->config.r600.max_hw_contexts = 8;
+ rdev->config.r600.max_gs_threads = 4;
+ rdev->config.r600.sx_max_export_size = 128;
+ rdev->config.r600.sx_max_export_pos_size = 16;
+ rdev->config.r600.sx_max_export_smx_size = 128;
+ rdev->config.r600.sq_num_cf_insts = 2;
+ break;
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ rdev->config.r600.max_pipes = 1;
+ rdev->config.r600.max_tile_pipes = 1;
+ rdev->config.r600.max_simds = 2;
+ rdev->config.r600.max_backends = 1;
+ rdev->config.r600.max_gprs = 128;
+ rdev->config.r600.max_threads = 192;
+ rdev->config.r600.max_stack_entries = 128;
+ rdev->config.r600.max_hw_contexts = 4;
+ rdev->config.r600.max_gs_threads = 4;
+ rdev->config.r600.sx_max_export_size = 128;
+ rdev->config.r600.sx_max_export_pos_size = 16;
+ rdev->config.r600.sx_max_export_smx_size = 128;
+ rdev->config.r600.sq_num_cf_insts = 1;
+ break;
+ case CHIP_RV670:
+ rdev->config.r600.max_pipes = 4;
+ rdev->config.r600.max_tile_pipes = 4;
+ rdev->config.r600.max_simds = 4;
+ rdev->config.r600.max_backends = 4;
+ rdev->config.r600.max_gprs = 192;
+ rdev->config.r600.max_threads = 192;
+ rdev->config.r600.max_stack_entries = 256;
+ rdev->config.r600.max_hw_contexts = 8;
+ rdev->config.r600.max_gs_threads = 16;
+ rdev->config.r600.sx_max_export_size = 128;
+ rdev->config.r600.sx_max_export_pos_size = 16;
+ rdev->config.r600.sx_max_export_smx_size = 128;
+ rdev->config.r600.sq_num_cf_insts = 2;
+ break;
+ default:
+ break;
+ }
+
+ /* Initialize HDP */
+ for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+ WREG32((0x2c14 + j), 0x00000000);
+ WREG32((0x2c18 + j), 0x00000000);
+ WREG32((0x2c1c + j), 0x00000000);
+ WREG32((0x2c20 + j), 0x00000000);
+ WREG32((0x2c24 + j), 0x00000000);
+ }
+
+ WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
+
+ /* Setup tiling */
+ tiling_config = 0;
+ ramcfg = RREG32(RAMCFG);
+ switch (rdev->config.r600.max_tile_pipes) {
+ case 1:
+ tiling_config |= PIPE_TILING(0);
+ break;
+ case 2:
+ tiling_config |= PIPE_TILING(1);
+ break;
+ case 4:
+ tiling_config |= PIPE_TILING(2);
+ break;
+ case 8:
+ tiling_config |= PIPE_TILING(3);
+ break;
+ default:
+ break;
+ }
+ tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
+ tiling_config |= GROUP_SIZE(0);
+ tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT;
+ if (tmp > 3) {
+ tiling_config |= ROW_TILING(3);
+ tiling_config |= SAMPLE_SPLIT(3);
+ } else {
+ tiling_config |= ROW_TILING(tmp);
+ tiling_config |= SAMPLE_SPLIT(tmp);
+ }
+ tiling_config |= BANK_SWAPS(1);
+ tmp = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes,
+ rdev->config.r600.max_backends,
+ (0xff << rdev->config.r600.max_backends) & 0xff);
+ tiling_config |= BACKEND_MAP(tmp);
+ WREG32(GB_TILING_CONFIG, tiling_config);
+ WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
+ WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
+
+ tmp = BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK);
+ WREG32(CC_RB_BACKEND_DISABLE, tmp);
+
+ /* Setup pipes */
+ tmp = INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK);
+ tmp |= INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK);
+ WREG32(CC_GC_SHADER_PIPE_CONFIG, tmp);
+ WREG32(GC_USER_SHADER_PIPE_CONFIG, tmp);
+
+ tmp = R6XX_MAX_BACKENDS - r600_count_pipe_bits(tmp & INACTIVE_QD_PIPES_MASK);
+ WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
+ WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK);
+
+ /* Setup some CP states */
+ WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | ROQ_IB2_START(0x2b)));
+ WREG32(CP_MEQ_THRESHOLDS, (MEQ_END(0x40) | ROQ_END(0x40)));
+
+ WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | SYNC_GRADIENT |
+ SYNC_WALKER | SYNC_ALIGNER));
+ /* Setup various GPU states */
+ if (rdev->family == CHIP_RV670)
+ WREG32(ARB_GDEC_RD_CNTL, 0x00000021);
+
+ tmp = RREG32(SX_DEBUG_1);
+ tmp |= SMX_EVENT_RELEASE;
+ if ((rdev->family > CHIP_R600))
+ tmp |= ENABLE_NEW_SMX_ADDRESS;
+ WREG32(SX_DEBUG_1, tmp);
+
+ if (((rdev->family) == CHIP_R600) ||
+ ((rdev->family) == CHIP_RV630) ||
+ ((rdev->family) == CHIP_RV610) ||
+ ((rdev->family) == CHIP_RV620) ||
+ ((rdev->family) == CHIP_RS780)) {
+ WREG32(DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE);
+ } else {
+ WREG32(DB_DEBUG, 0);
+ }
+ WREG32(DB_WATERMARKS, (DEPTH_FREE(4) | DEPTH_CACHELINE_FREE(16) |
+ DEPTH_FLUSH(16) | DEPTH_PENDING_FREE(4)));
+
+ WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+ WREG32(VGT_NUM_INSTANCES, 0);
+
+ WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
+ WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(0));
+
+ tmp = RREG32(SQ_MS_FIFO_SIZES);
+ if (((rdev->family) == CHIP_RV610) ||
+ ((rdev->family) == CHIP_RV620) ||
+ ((rdev->family) == CHIP_RS780)) {
+ tmp = (CACHE_FIFO_SIZE(0xa) |
+ FETCH_FIFO_HIWATER(0xa) |
+ DONE_FIFO_HIWATER(0xe0) |
+ ALU_UPDATE_FIFO_HIWATER(0x8));
+ } else if (((rdev->family) == CHIP_R600) ||
+ ((rdev->family) == CHIP_RV630)) {
+ tmp &= ~DONE_FIFO_HIWATER(0xff);
+ tmp |= DONE_FIFO_HIWATER(0x4);
+ }
+ WREG32(SQ_MS_FIFO_SIZES, tmp);
+
+ /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
+ * should be adjusted as needed by the 2D/3D drivers. This just sets default values
+ */
+ sq_config = RREG32(SQ_CONFIG);
+ sq_config &= ~(PS_PRIO(3) |
+ VS_PRIO(3) |
+ GS_PRIO(3) |
+ ES_PRIO(3));
+ sq_config |= (DX9_CONSTS |
+ VC_ENABLE |
+ PS_PRIO(0) |
+ VS_PRIO(1) |
+ GS_PRIO(2) |
+ ES_PRIO(3));
+
+ if ((rdev->family) == CHIP_R600) {
+ sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(124) |
+ NUM_VS_GPRS(124) |
+ NUM_CLAUSE_TEMP_GPRS(4));
+ sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(0) |
+ NUM_ES_GPRS(0));
+ sq_thread_resource_mgmt = (NUM_PS_THREADS(136) |
+ NUM_VS_THREADS(48) |
+ NUM_GS_THREADS(4) |
+ NUM_ES_THREADS(4));
+ sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(128) |
+ NUM_VS_STACK_ENTRIES(128));
+ sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(0) |
+ NUM_ES_STACK_ENTRIES(0));
+ } else if (((rdev->family) == CHIP_RV610) ||
+ ((rdev->family) == CHIP_RV620) ||
+ ((rdev->family) == CHIP_RS780)) {
+ /* no vertex cache */
+ sq_config &= ~VC_ENABLE;
+
+ sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+ NUM_VS_GPRS(44) |
+ NUM_CLAUSE_TEMP_GPRS(2));
+ sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
+ NUM_ES_GPRS(17));
+ sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+ NUM_VS_THREADS(78) |
+ NUM_GS_THREADS(4) |
+ NUM_ES_THREADS(31));
+ sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
+ NUM_VS_STACK_ENTRIES(40));
+ sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
+ NUM_ES_STACK_ENTRIES(16));
+ } else if (((rdev->family) == CHIP_RV630) ||
+ ((rdev->family) == CHIP_RV635)) {
+ sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+ NUM_VS_GPRS(44) |
+ NUM_CLAUSE_TEMP_GPRS(2));
+ sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(18) |
+ NUM_ES_GPRS(18));
+ sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+ NUM_VS_THREADS(78) |
+ NUM_GS_THREADS(4) |
+ NUM_ES_THREADS(31));
+ sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
+ NUM_VS_STACK_ENTRIES(40));
+ sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
+ NUM_ES_STACK_ENTRIES(16));
+ } else if ((rdev->family) == CHIP_RV670) {
+ sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+ NUM_VS_GPRS(44) |
+ NUM_CLAUSE_TEMP_GPRS(2));
+ sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
+ NUM_ES_GPRS(17));
+ sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+ NUM_VS_THREADS(78) |
+ NUM_GS_THREADS(4) |
+ NUM_ES_THREADS(31));
+ sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(64) |
+ NUM_VS_STACK_ENTRIES(64));
+ sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(64) |
+ NUM_ES_STACK_ENTRIES(64));
+ }
+
+ WREG32(SQ_CONFIG, sq_config);
+ WREG32(SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1);
+ WREG32(SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2);
+ WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
+ WREG32(SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
+ WREG32(SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
+
+ if (((rdev->family) == CHIP_RV610) ||
+ ((rdev->family) == CHIP_RV620) ||
+ ((rdev->family) == CHIP_RS780)) {
+ WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(TC_ONLY));
+ } else {
+ WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC));
+ }
+
+ /* More default values. 2D/3D driver should adjust as needed */
+ WREG32(PA_SC_AA_SAMPLE_LOCS_2S, (S0_X(0xc) | S0_Y(0x4) |
+ S1_X(0x4) | S1_Y(0xc)));
+ WREG32(PA_SC_AA_SAMPLE_LOCS_4S, (S0_X(0xe) | S0_Y(0xe) |
+ S1_X(0x2) | S1_Y(0x2) |
+ S2_X(0xa) | S2_Y(0x6) |
+ S3_X(0x6) | S3_Y(0xa)));
+ WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD0, (S0_X(0xe) | S0_Y(0xb) |
+ S1_X(0x4) | S1_Y(0xc) |
+ S2_X(0x1) | S2_Y(0x6) |
+ S3_X(0xa) | S3_Y(0xe)));
+ WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD1, (S4_X(0x6) | S4_Y(0x1) |
+ S5_X(0x0) | S5_Y(0x0) |
+ S6_X(0xb) | S6_Y(0x4) |
+ S7_X(0x7) | S7_Y(0x8)));
+
+ WREG32(VGT_STRMOUT_EN, 0);
+ tmp = rdev->config.r600.max_pipes * 16;
+ switch (rdev->family) {
+ case CHIP_RV610:
+ case CHIP_RS780:
+ case CHIP_RV620:
+ tmp += 32;
+ break;
+ case CHIP_RV670:
+ tmp += 128;
+ break;
+ default:
+ break;
+ }
+ if (tmp > 256) {
+ tmp = 256;
+ }
+ WREG32(VGT_ES_PER_GS, 128);
+ WREG32(VGT_GS_PER_ES, tmp);
+ WREG32(VGT_GS_PER_VS, 2);
+ WREG32(VGT_GS_VERTEX_REUSE, 16);
+
+ /* more default values. 2D/3D driver should adjust as needed */
+ WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
+ WREG32(VGT_STRMOUT_EN, 0);
+ WREG32(SX_MISC, 0);
+ WREG32(PA_SC_MODE_CNTL, 0);
+ WREG32(PA_SC_AA_CONFIG, 0);
+ WREG32(PA_SC_LINE_STIPPLE, 0);
+ WREG32(SPI_INPUT_Z, 0);
+ WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
+ WREG32(CB_COLOR7_FRAG, 0);
+
+ /* Clear render buffer base addresses */
+ WREG32(CB_COLOR0_BASE, 0);
+ WREG32(CB_COLOR1_BASE, 0);
+ WREG32(CB_COLOR2_BASE, 0);
+ WREG32(CB_COLOR3_BASE, 0);
+ WREG32(CB_COLOR4_BASE, 0);
+ WREG32(CB_COLOR5_BASE, 0);
+ WREG32(CB_COLOR6_BASE, 0);
+ WREG32(CB_COLOR7_BASE, 0);
+ WREG32(CB_COLOR7_FRAG, 0);
+
+ switch (rdev->family) {
+ case CHIP_RV610:
+ case CHIP_RS780:
+ case CHIP_RV620:
+ tmp = TC_L2_SIZE(8);
+ break;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ tmp = TC_L2_SIZE(4);
+ break;
+ case CHIP_R600:
+ tmp = TC_L2_SIZE(0) | L2_DISABLE_LATE_HIT;
+ break;
+ default:
+ tmp = TC_L2_SIZE(0);
+ break;
+ }
+ WREG32(TC_CNTL, tmp);
+
+ tmp = RREG32(HDP_HOST_PATH_CNTL);
+ WREG32(HDP_HOST_PATH_CNTL, tmp);
+
+ tmp = RREG32(ARB_POP);
+ tmp |= ENABLE_TC128;
+ WREG32(ARB_POP, tmp);
+
+ WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+ WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
+ NUM_CLIP_SEQ(3)));
+ WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095));
+}
+
+
/*
* Indirect registers accessor
*/
-uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg)
+u32 r600_pciep_rreg(struct radeon_device *rdev, u32 reg)
+{
+ u32 r;
+
+ WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+ (void)RREG32(PCIE_PORT_INDEX);
+ r = RREG32(PCIE_PORT_DATA);
+ return r;
+}
+
+void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+ (void)RREG32(PCIE_PORT_INDEX);
+ WREG32(PCIE_PORT_DATA, (v));
+ (void)RREG32(PCIE_PORT_DATA);
+}
+
+
+/*
+ * CP & Ring
+ */
+void r600_cp_stop(struct radeon_device *rdev)
+{
+ WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
+}
+
+int r600_cp_init_microcode(struct radeon_device *rdev)
+{
+ struct platform_device *pdev;
+ const char *chip_name;
+ size_t pfp_req_size, me_req_size;
+ char fw_name[30];
+ int err;
+
+ DRM_DEBUG("\n");
+
+ pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
+ err = IS_ERR(pdev);
+ if (err) {
+ printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
+ return -EINVAL;
+ }
+
+ switch (rdev->family) {
+ case CHIP_R600: chip_name = "R600"; break;
+ case CHIP_RV610: chip_name = "RV610"; break;
+ case CHIP_RV630: chip_name = "RV630"; break;
+ case CHIP_RV620: chip_name = "RV620"; break;
+ case CHIP_RV635: chip_name = "RV635"; break;
+ case CHIP_RV670: chip_name = "RV670"; break;
+ case CHIP_RS780:
+ case CHIP_RS880: chip_name = "RS780"; break;
+ case CHIP_RV770: chip_name = "RV770"; break;
+ case CHIP_RV730:
+ case CHIP_RV740: chip_name = "RV730"; break;
+ case CHIP_RV710: chip_name = "RV710"; break;
+ default: BUG();
+ }
+
+ if (rdev->family >= CHIP_RV770) {
+ pfp_req_size = R700_PFP_UCODE_SIZE * 4;
+ me_req_size = R700_PM4_UCODE_SIZE * 4;
+ } else {
+ pfp_req_size = PFP_UCODE_SIZE * 4;
+ me_req_size = PM4_UCODE_SIZE * 12;
+ }
+
+ DRM_INFO("Loading %s CP Microcode\n", chip_name);
+
+ snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
+ err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
+ if (err)
+ goto out;
+ if (rdev->pfp_fw->size != pfp_req_size) {
+ printk(KERN_ERR
+ "r600_cp: Bogus length %zu in firmware \"%s\"\n",
+ rdev->pfp_fw->size, fw_name);
+ err = -EINVAL;
+ goto out;
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
+ err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
+ if (err)
+ goto out;
+ if (rdev->me_fw->size != me_req_size) {
+ printk(KERN_ERR
+ "r600_cp: Bogus length %zu in firmware \"%s\"\n",
+ rdev->me_fw->size, fw_name);
+ err = -EINVAL;
+ }
+out:
+ platform_device_unregister(pdev);
+
+ if (err) {
+ if (err != -EINVAL)
+ printk(KERN_ERR
+ "r600_cp: Failed to load firmware \"%s\"\n",
+ fw_name);
+ release_firmware(rdev->pfp_fw);
+ rdev->pfp_fw = NULL;
+ release_firmware(rdev->me_fw);
+ rdev->me_fw = NULL;
+ }
+ return err;
+}
+
+static int r600_cp_load_microcode(struct radeon_device *rdev)
+{
+ const __be32 *fw_data;
+ int i;
+
+ if (!rdev->me_fw || !rdev->pfp_fw)
+ return -EINVAL;
+
+ r600_cp_stop(rdev);
+
+ WREG32(CP_RB_CNTL, RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
+
+ /* Reset cp */
+ WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+ RREG32(GRBM_SOFT_RESET);
+ mdelay(15);
+ WREG32(GRBM_SOFT_RESET, 0);
+
+ WREG32(CP_ME_RAM_WADDR, 0);
+
+ fw_data = (const __be32 *)rdev->me_fw->data;
+ WREG32(CP_ME_RAM_WADDR, 0);
+ for (i = 0; i < PM4_UCODE_SIZE * 3; i++)
+ WREG32(CP_ME_RAM_DATA,
+ be32_to_cpup(fw_data++));
+
+ fw_data = (const __be32 *)rdev->pfp_fw->data;
+ WREG32(CP_PFP_UCODE_ADDR, 0);
+ for (i = 0; i < PFP_UCODE_SIZE; i++)
+ WREG32(CP_PFP_UCODE_DATA,
+ be32_to_cpup(fw_data++));
+
+ WREG32(CP_PFP_UCODE_ADDR, 0);
+ WREG32(CP_ME_RAM_WADDR, 0);
+ WREG32(CP_ME_RAM_RADDR, 0);
+ return 0;
+}
+
+int r600_cp_start(struct radeon_device *rdev)
+{
+ int r;
+ uint32_t cp_me;
+
+ r = radeon_ring_lock(rdev, 7);
+ if (r) {
+ DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+ radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
+ radeon_ring_write(rdev, 0x1);
+ if (rdev->family < CHIP_RV770) {
+ radeon_ring_write(rdev, 0x3);
+ radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1);
+ } else {
+ radeon_ring_write(rdev, 0x0);
+ radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1);
+ }
+ radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
+ radeon_ring_write(rdev, 0);
+ radeon_ring_write(rdev, 0);
+ radeon_ring_unlock_commit(rdev);
+
+ cp_me = 0xff;
+ WREG32(R_0086D8_CP_ME_CNTL, cp_me);
+ return 0;
+}
+
+int r600_cp_resume(struct radeon_device *rdev)
+{
+ u32 tmp;
+ u32 rb_bufsz;
+ int r;
+
+ /* Reset cp */
+ WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+ RREG32(GRBM_SOFT_RESET);
+ mdelay(15);
+ WREG32(GRBM_SOFT_RESET, 0);
+
+ /* Set ring buffer size */
+ rb_bufsz = drm_order(rdev->cp.ring_size / 8);
+#ifdef __BIG_ENDIAN
+ WREG32(CP_RB_CNTL, BUF_SWAP_32BIT | RB_NO_UPDATE |
+ (drm_order(4096/8) << 8) | rb_bufsz);
+#else
+ WREG32(CP_RB_CNTL, RB_NO_UPDATE | (drm_order(4096/8) << 8) | rb_bufsz);
+#endif
+ WREG32(CP_SEM_WAIT_TIMER, 0x4);
+
+ /* Set the write pointer delay */
+ WREG32(CP_RB_WPTR_DELAY, 0);
+
+ /* Initialize the ring buffer's read and write pointers */
+ tmp = RREG32(CP_RB_CNTL);
+ WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
+ WREG32(CP_RB_RPTR_WR, 0);
+ WREG32(CP_RB_WPTR, 0);
+ WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF);
+ WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr));
+ mdelay(1);
+ WREG32(CP_RB_CNTL, tmp);
+
+ WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8);
+ WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
+
+ rdev->cp.rptr = RREG32(CP_RB_RPTR);
+ rdev->cp.wptr = RREG32(CP_RB_WPTR);
+
+ r600_cp_start(rdev);
+ rdev->cp.ready = true;
+ r = radeon_ring_test(rdev);
+ if (r) {
+ rdev->cp.ready = false;
+ return r;
+ }
+ return 0;
+}
+
+void r600_cp_commit(struct radeon_device *rdev)
+{
+ WREG32(CP_RB_WPTR, rdev->cp.wptr);
+ (void)RREG32(CP_RB_WPTR);
+}
+
+void r600_ring_init(struct radeon_device *rdev, unsigned ring_size)
+{
+ u32 rb_bufsz;
+
+ /* Align ring size */
+ rb_bufsz = drm_order(ring_size / 8);
+ ring_size = (1 << (rb_bufsz + 1)) * 4;
+ rdev->cp.ring_size = ring_size;
+ rdev->cp.align_mask = 16 - 1;
+}
+
+
+/*
+ * GPU scratch registers helpers function.
+ */
+void r600_scratch_init(struct radeon_device *rdev)
+{
+ int i;
+
+ rdev->scratch.num_reg = 7;
+ for (i = 0; i < rdev->scratch.num_reg; i++) {
+ rdev->scratch.free[i] = true;
+ rdev->scratch.reg[i] = SCRATCH_REG0 + (i * 4);
+ }
+}
+
+int r600_ring_test(struct radeon_device *rdev)
+{
+ uint32_t scratch;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ r = radeon_scratch_get(rdev, &scratch);
+ if (r) {
+ DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
+ return r;
+ }
+ WREG32(scratch, 0xCAFEDEAD);
+ r = radeon_ring_lock(rdev, 3);
+ if (r) {
+ DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+ radeon_scratch_free(rdev, scratch);
+ return r;
+ }
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+ radeon_ring_write(rdev, 0xDEADBEEF);
+ radeon_ring_unlock_commit(rdev);
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+ if (i < rdev->usec_timeout) {
+ DRM_INFO("ring test succeeded in %d usecs\n", i);
+ } else {
+ DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
+ scratch, tmp);
+ r = -EINVAL;
+ }
+ radeon_scratch_free(rdev, scratch);
+ return r;
+}
+
+/*
+ * Writeback
+ */
+int r600_wb_init(struct radeon_device *rdev)
+{
+ int r;
+
+ if (rdev->wb.wb_obj == NULL) {
+ r = radeon_object_create(rdev, NULL, 4096,
+ true,
+ RADEON_GEM_DOMAIN_GTT,
+ false, &rdev->wb.wb_obj);
+ if (r) {
+ DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
+ return r;
+ }
+ r = radeon_object_pin(rdev->wb.wb_obj,
+ RADEON_GEM_DOMAIN_GTT,
+ &rdev->wb.gpu_addr);
+ if (r) {
+ DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
+ return r;
+ }
+ r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
+ if (r) {
+ DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
+ return r;
+ }
+ }
+ WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF);
+ WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC);
+ WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF);
+ WREG32(SCRATCH_UMSK, 0xff);
+ return 0;
+}
+
+void r600_wb_fini(struct radeon_device *rdev)
+{
+ if (rdev->wb.wb_obj) {
+ radeon_object_kunmap(rdev->wb.wb_obj);
+ radeon_object_unpin(rdev->wb.wb_obj);
+ radeon_object_unref(&rdev->wb.wb_obj);
+ rdev->wb.wb = NULL;
+ rdev->wb.wb_obj = NULL;
+ }
+}
+
+
+/*
+ * CS
+ */
+void r600_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence)
+{
+ /* Emit fence sequence & fire IRQ */
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+ radeon_ring_write(rdev, fence->seq);
+}
+
+int r600_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ unsigned num_pages,
+ struct radeon_fence *fence)
+{
+ /* FIXME: implement */
+ return 0;
+}
+
+int r600_copy_blit(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_pages, struct radeon_fence *fence)
+{
+ r600_blit_prepare_copy(rdev, num_pages * 4096);
+ r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * 4096);
+ r600_blit_done_copy(rdev, fence);
+ return 0;
+}
+
+int r600_irq_process(struct radeon_device *rdev)
+{
+ /* FIXME: implement */
+ return 0;
+}
+
+int r600_irq_set(struct radeon_device *rdev)
+{
+ /* FIXME: implement */
+ return 0;
+}
+
+int r600_set_surface_reg(struct radeon_device *rdev, int reg,
+ uint32_t tiling_flags, uint32_t pitch,
+ uint32_t offset, uint32_t obj_size)
+{
+ /* FIXME: implement */
+ return 0;
+}
+
+void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
+{
+ /* FIXME: implement */
+}
+
+
+bool r600_card_posted(struct radeon_device *rdev)
+{
+ uint32_t reg;
+
+ /* first check CRTCs */
+ reg = RREG32(D1CRTC_CONTROL) |
+ RREG32(D2CRTC_CONTROL);
+ if (reg & CRTC_EN)
+ return true;
+
+ /* then check MEM_SIZE, in case the crtcs are off */
+ if (RREG32(CONFIG_MEMSIZE))
+ return true;
+
+ return false;
+}
+
+int r600_resume(struct radeon_device *rdev)
+{
+ int r;
+
+ r600_gpu_reset(rdev);
+ r600_mc_resume(rdev);
+ r = r600_pcie_gart_enable(rdev);
+ if (r)
+ return r;
+ r600_gpu_init(rdev);
+ r = radeon_ring_init(rdev, rdev->cp.ring_size);
+ if (r)
+ return r;
+ r = r600_cp_load_microcode(rdev);
+ if (r)
+ return r;
+ r = r600_cp_resume(rdev);
+ if (r)
+ return r;
+ r = r600_wb_init(rdev);
+ if (r)
+ return r;
+ return 0;
+}
+
+int r600_suspend(struct radeon_device *rdev)
+{
+ /* FIXME: we should wait for ring to be empty */
+ r600_cp_stop(rdev);
+ return 0;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int r600_init(struct radeon_device *rdev)
{
- uint32_t r;
+ int r;
- WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
- (void)RREG32(R600_PCIE_PORT_INDEX);
- r = RREG32(R600_PCIE_PORT_DATA);
+ rdev->new_init_path = true;
+ r = radeon_dummy_page_init(rdev);
+ if (r)
+ return r;
+ if (r600_debugfs_mc_info_init(rdev)) {
+ DRM_ERROR("Failed to register debugfs file for mc !\n");
+ }
+ /* This don't do much */
+ r = radeon_gem_init(rdev);
+ if (r)
+ return r;
+ /* Read BIOS */
+ if (!radeon_get_bios(rdev)) {
+ if (ASIC_IS_AVIVO(rdev))
+ return -EINVAL;
+ }
+ /* Must be an ATOMBIOS */
+ if (!rdev->is_atom_bios)
+ return -EINVAL;
+ r = radeon_atombios_init(rdev);
+ if (r)
+ return r;
+ /* Post card if necessary */
+ if (!r600_card_posted(rdev) && rdev->bios) {
+ DRM_INFO("GPU not posted. posting now...\n");
+ atom_asic_init(rdev->mode_info.atom_context);
+ }
+ /* Initialize scratch registers */
+ r600_scratch_init(rdev);
+ /* Initialize surface registers */
+ radeon_surface_init(rdev);
+ r = radeon_clocks_init(rdev);
+ if (r)
+ return r;
+ /* Fence driver */
+ r = radeon_fence_driver_init(rdev);
+ if (r)
+ return r;
+ r = r600_mc_init(rdev);
+ if (r) {
+ if (rdev->flags & RADEON_IS_AGP) {
+ /* Retry with disabling AGP */
+ r600_fini(rdev);
+ rdev->flags &= ~RADEON_IS_AGP;
+ return r600_init(rdev);
+ }
+ return r;
+ }
+ /* Memory manager */
+ r = radeon_object_init(rdev);
+ if (r)
+ return r;
+ rdev->cp.ring_obj = NULL;
+ r600_ring_init(rdev, 1024 * 1024);
+
+ if (!rdev->me_fw || !rdev->pfp_fw) {
+ r = r600_cp_init_microcode(rdev);
+ if (r) {
+ DRM_ERROR("Failed to load firmware!\n");
+ return r;
+ }
+ }
+
+ r = r600_resume(rdev);
+ if (r) {
+ if (rdev->flags & RADEON_IS_AGP) {
+ /* Retry with disabling AGP */
+ r600_fini(rdev);
+ rdev->flags &= ~RADEON_IS_AGP;
+ return r600_init(rdev);
+ }
+ return r;
+ }
+ r = radeon_ib_pool_init(rdev);
+ if (r) {
+ DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+ return r;
+ }
+ r = r600_blit_init(rdev);
+ if (r) {
+ DRM_ERROR("radeon: failled blitter (%d).\n", r);
+ return r;
+ }
+ r = radeon_ib_test(rdev);
+ if (r) {
+ DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+ return r;
+ }
+ return 0;
+}
+
+void r600_fini(struct radeon_device *rdev)
+{
+ /* Suspend operations */
+ r600_suspend(rdev);
+
+ r600_blit_fini(rdev);
+ radeon_ring_fini(rdev);
+ r600_pcie_gart_disable(rdev);
+ radeon_gart_table_vram_free(rdev);
+ radeon_gart_fini(rdev);
+ radeon_gem_fini(rdev);
+ radeon_fence_driver_fini(rdev);
+ radeon_clocks_fini(rdev);
+#if __OS_HAS_AGP
+ if (rdev->flags & RADEON_IS_AGP)
+ radeon_agp_fini(rdev);
+#endif
+ radeon_object_fini(rdev);
+ if (rdev->is_atom_bios)
+ radeon_atombios_fini(rdev);
+ else
+ radeon_combios_fini(rdev);
+ kfree(rdev->bios);
+ rdev->bios = NULL;
+ radeon_dummy_page_fini(rdev);
+}
+
+
+/*
+ * CS stuff
+ */
+void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+ /* FIXME: implement */
+ radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC);
+ radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF);
+ radeon_ring_write(rdev, ib->length_dw);
+}
+
+int r600_ib_test(struct radeon_device *rdev)
+{
+ struct radeon_ib *ib;
+ uint32_t scratch;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ r = radeon_scratch_get(rdev, &scratch);
+ if (r) {
+ DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
+ return r;
+ }
+ WREG32(scratch, 0xCAFEDEAD);
+ r = radeon_ib_get(rdev, &ib);
+ if (r) {
+ DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+ return r;
+ }
+ ib->ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
+ ib->ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+ ib->ptr[2] = 0xDEADBEEF;
+ ib->ptr[3] = PACKET2(0);
+ ib->ptr[4] = PACKET2(0);
+ ib->ptr[5] = PACKET2(0);
+ ib->ptr[6] = PACKET2(0);
+ ib->ptr[7] = PACKET2(0);
+ ib->ptr[8] = PACKET2(0);
+ ib->ptr[9] = PACKET2(0);
+ ib->ptr[10] = PACKET2(0);
+ ib->ptr[11] = PACKET2(0);
+ ib->ptr[12] = PACKET2(0);
+ ib->ptr[13] = PACKET2(0);
+ ib->ptr[14] = PACKET2(0);
+ ib->ptr[15] = PACKET2(0);
+ ib->length_dw = 16;
+ r = radeon_ib_schedule(rdev, ib);
+ if (r) {
+ radeon_scratch_free(rdev, scratch);
+ radeon_ib_free(rdev, &ib);
+ DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+ return r;
+ }
+ r = radeon_fence_wait(ib->fence, false);
+ if (r) {
+ DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+ return r;
+ }
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+ if (i < rdev->usec_timeout) {
+ DRM_INFO("ib test succeeded in %u usecs\n", i);
+ } else {
+ DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
+ scratch, tmp);
+ r = -EINVAL;
+ }
+ radeon_scratch_free(rdev, scratch);
+ radeon_ib_free(rdev, &ib);
return r;
}
-void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+
+
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data)
{
- WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
- (void)RREG32(R600_PCIE_PORT_INDEX);
- WREG32(R600_PCIE_PORT_DATA, (v));
- (void)RREG32(R600_PCIE_PORT_DATA);
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct radeon_device *rdev = dev->dev_private;
+ uint32_t rdp, wdp;
+ unsigned count, i, j;
+
+ radeon_ring_free_size(rdev);
+ rdp = RREG32(CP_RB_RPTR);
+ wdp = RREG32(CP_RB_WPTR);
+ count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
+ seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT));
+ seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
+ seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
+ seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
+ seq_printf(m, "%u dwords in ring\n", count);
+ for (j = 0; j <= count; j++) {
+ i = (rdp + j) & rdev->cp.ptr_mask;
+ seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
+ }
+ return 0;
+}
+
+static int r600_debugfs_mc_info(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct radeon_device *rdev = dev->dev_private;
+
+ DREG32_SYS(m, rdev, R_000E50_SRBM_STATUS);
+ DREG32_SYS(m, rdev, VM_L2_STATUS);
+ return 0;
+}
+
+static struct drm_info_list r600_mc_info_list[] = {
+ {"r600_mc_info", r600_debugfs_mc_info, 0, NULL},
+ {"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL},
+};
+#endif
+
+int r600_debugfs_mc_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ return radeon_debugfs_add_files(rdev, r600_mc_info_list, ARRAY_SIZE(r600_mc_info_list));
+#else
+ return 0;
+#endif
}
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
new file mode 100644
index 0000000..c51402e
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -0,0 +1,855 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Alex Deucher <alexander.deucher@amd.com>
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_drv.h"
+
+#include "r600_blit_shaders.h"
+
+#define DI_PT_RECTLIST 0x11
+#define DI_INDEX_SIZE_16_BIT 0x0
+#define DI_SRC_SEL_AUTO_INDEX 0x2
+
+#define FMT_8 0x1
+#define FMT_5_6_5 0x8
+#define FMT_8_8_8_8 0x1a
+#define COLOR_8 0x1
+#define COLOR_5_6_5 0x8
+#define COLOR_8_8_8_8 0x1a
+
+static inline void
+set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
+{
+ u32 cb_color_info;
+ int pitch, slice;
+ RING_LOCALS;
+ DRM_DEBUG("\n");
+
+ h = (h + 7) & ~7;
+ if (h < 8)
+ h = 8;
+
+ cb_color_info = ((format << 2) | (1 << 27));
+ pitch = (w / 8) - 1;
+ slice = ((w * h) / 64) - 1;
+
+ if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
+ ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
+ BEGIN_RING(21 + 2);
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(gpu_addr >> 8);
+ OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+ OUT_RING(2 << 0);
+ } else {
+ BEGIN_RING(21);
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(gpu_addr >> 8);
+ }
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING((pitch << 0) | (slice << 10));
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(0);
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(cb_color_info);
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(0);
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(0);
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(0);
+
+ ADVANCE_RING();
+}
+
+static inline void
+cp_set_surface_sync(drm_radeon_private_t *dev_priv,
+ u32 sync_type, u32 size, u64 mc_addr)
+{
+ u32 cp_coher_size;
+ RING_LOCALS;
+ DRM_DEBUG("\n");
+
+ if (size == 0xffffffff)
+ cp_coher_size = 0xffffffff;
+ else
+ cp_coher_size = ((size + 255) >> 8);
+
+ BEGIN_RING(5);
+ OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
+ OUT_RING(sync_type);
+ OUT_RING(cp_coher_size);
+ OUT_RING((mc_addr >> 8));
+ OUT_RING(10); /* poll interval */
+ ADVANCE_RING();
+}
+
+static inline void
+set_shaders(struct drm_device *dev)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ u64 gpu_addr;
+ int shader_size, i;
+ u32 *vs, *ps;
+ uint32_t sq_pgm_resources;
+ RING_LOCALS;
+ DRM_DEBUG("\n");
+
+ /* load shaders */
+ vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
+ ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
+
+ shader_size = r6xx_vs_size;
+ for (i = 0; i < shader_size; i++)
+ vs[i] = r6xx_vs[i];
+ shader_size = r6xx_ps_size;
+ for (i = 0; i < shader_size; i++)
+ ps[i] = r6xx_ps[i];
+
+ dev_priv->blit_vb->used = 512;
+
+ gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
+
+ /* setup shader regs */
+ sq_pgm_resources = (1 << 0);
+
+ BEGIN_RING(9 + 12);
+ /* VS */
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(gpu_addr >> 8);
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(sq_pgm_resources);
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(0);
+
+ /* PS */
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING((gpu_addr + 256) >> 8);
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(sq_pgm_resources | (1 << 28));
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(2);
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+ OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING(0);
+ ADVANCE_RING();
+
+ cp_set_surface_sync(dev_priv,
+ R600_SH_ACTION_ENA, 512, gpu_addr);
+}
+
+static inline void
+set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
+{
+ uint32_t sq_vtx_constant_word2;
+ RING_LOCALS;
+ DRM_DEBUG("\n");
+
+ sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
+
+ BEGIN_RING(9);
+ OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+ OUT_RING(0x460);
+ OUT_RING(gpu_addr & 0xffffffff);
+ OUT_RING(48 - 1);
+ OUT_RING(sq_vtx_constant_word2);
+ OUT_RING(1 << 0);
+ OUT_RING(0);
+ OUT_RING(0);
+ OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
+ ADVANCE_RING();
+
+ if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
+ ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
+ ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+ ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
+ ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
+ cp_set_surface_sync(dev_priv,
+ R600_TC_ACTION_ENA, 48, gpu_addr);
+ else
+ cp_set_surface_sync(dev_priv,
+ R600_VC_ACTION_ENA, 48, gpu_addr);
+}
+
+static inline void
+set_tex_resource(drm_radeon_private_t *dev_priv,
+ int format, int w, int h, int pitch, u64 gpu_addr)
+{
+ uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+ RING_LOCALS;
+ DRM_DEBUG("\n");
+
+ if (h < 1)
+ h = 1;
+
+ sq_tex_resource_word0 = (1 << 0);
+ sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
+ ((w - 1) << 19));
+
+ sq_tex_resource_word1 = (format << 26);
+ sq_tex_resource_word1 |= ((h - 1) << 0);
+
+ sq_tex_resource_word4 = ((1 << 14) |
+ (0 << 16) |
+ (1 << 19) |
+ (2 << 22) |
+ (3 << 25));
+
+ BEGIN_RING(9);
+ OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+ OUT_RING(0);
+ OUT_RING(sq_tex_resource_word0);
+ OUT_RING(sq_tex_resource_word1);
+ OUT_RING(gpu_addr >> 8);
+ OUT_RING(gpu_addr >> 8);
+ OUT_RING(sq_tex_resource_word4);
+ OUT_RING(0);
+ OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
+ ADVANCE_RING();
+
+}
+
+static inline void
+set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
+{
+ RING_LOCALS;
+ DRM_DEBUG("\n");
+
+ BEGIN_RING(12);
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+ OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING((x1 << 0) | (y1 << 16));
+ OUT_RING((x2 << 0) | (y2 << 16));
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+ OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
+ OUT_RING((x2 << 0) | (y2 << 16));
+
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+ OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+ OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
+ OUT_RING((x2 << 0) | (y2 << 16));
+ ADVANCE_RING();
+}
+
+static inline void
+draw_auto(drm_radeon_private_t *dev_priv)
+{
+ RING_LOCALS;
+ DRM_DEBUG("\n");
+
+ BEGIN_RING(10);
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+ OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
+ OUT_RING(DI_PT_RECTLIST);
+
+ OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ OUT_RING(DI_INDEX_SIZE_16_BIT);
+
+ OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ OUT_RING(1);
+
+ OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+ OUT_RING(3);
+ OUT_RING(DI_SRC_SEL_AUTO_INDEX);
+
+ ADVANCE_RING();
+ COMMIT_RING();
+}
+
+static inline void
+set_default_state(drm_radeon_private_t *dev_priv)
+{
+ int default_state_dw, i;
+ u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+ u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+ int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
+ int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
+ int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
+ RING_LOCALS;
+
+ switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
+ case CHIP_R600:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 40;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ default:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV670:
+ num_ps_gprs = 144;
+ num_vs_gprs = 40;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV770:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 256;
+ num_vs_stack_entries = 256;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV730:
+ case CHIP_RV740:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV710:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 48;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ }
+
+ if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
+ ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
+ ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+ ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
+ ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
+ sq_config = 0;
+ else
+ sq_config = R600_VC_ENABLE;
+
+ sq_config |= (R600_DX9_CONSTS |
+ R600_ALU_INST_PREFER_VECTOR |
+ R600_PS_PRIO(0) |
+ R600_VS_PRIO(1) |
+ R600_GS_PRIO(2) |
+ R600_ES_PRIO(3));
+
+ sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
+ R600_NUM_VS_GPRS(num_vs_gprs) |
+ R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+ sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
+ R600_NUM_ES_GPRS(num_es_gprs));
+ sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
+ R600_NUM_VS_THREADS(num_vs_threads) |
+ R600_NUM_GS_THREADS(num_gs_threads) |
+ R600_NUM_ES_THREADS(num_es_threads));
+ sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+ R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+ sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+ R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+
+ if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
+ default_state_dw = r7xx_default_size * 4;
+ BEGIN_RING(default_state_dw + 10);
+ for (i = 0; i < default_state_dw; i++)
+ OUT_RING(r7xx_default_state[i]);
+ } else {
+ default_state_dw = r6xx_default_size * 4;
+ BEGIN_RING(default_state_dw + 10);
+ for (i = 0; i < default_state_dw; i++)
+ OUT_RING(r6xx_default_state[i]);
+ }
+ OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+ OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
+ /* SQ config */
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
+ OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
+ OUT_RING(sq_config);
+ OUT_RING(sq_gpr_resource_mgmt_1);
+ OUT_RING(sq_gpr_resource_mgmt_2);
+ OUT_RING(sq_thread_resource_mgmt);
+ OUT_RING(sq_stack_resource_mgmt_1);
+ OUT_RING(sq_stack_resource_mgmt_2);
+ ADVANCE_RING();
+}
+
+static inline uint32_t i2f(uint32_t input)
+{
+ u32 result, i, exponent, fraction;
+
+ if ((input & 0x3fff) == 0)
+ result = 0; /* 0 is a special case */
+ else {
+ exponent = 140; /* exponent biased by 127; */
+ fraction = (input & 0x3fff) << 10; /* cheat and only
+ handle numbers below 2^^15 */
+ for (i = 0; i < 14; i++) {
+ if (fraction & 0x800000)
+ break;
+ else {
+ fraction = fraction << 1; /* keep
+ shifting left until top bit = 1 */
+ exponent = exponent - 1;
+ }
+ }
+ result = exponent << 23 | (fraction & 0x7fffff); /* mask
+ off top bit; assumed 1 */
+ }
+ return result;
+}
+
+
+int r600_nomm_get_vb(struct drm_device *dev)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ dev_priv->blit_vb = radeon_freelist_get(dev);
+ if (!dev_priv->blit_vb) {
+ DRM_ERROR("Unable to allocate vertex buffer for blit\n");
+ return -EAGAIN;
+ }
+ return 0;
+}
+
+void r600_nomm_put_vb(struct drm_device *dev)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+
+ dev_priv->blit_vb->used = 0;
+ radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
+}
+
+void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ return (((char *)dev->agp_buffer_map->handle +
+ dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
+}
+
+int
+r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ DRM_DEBUG("\n");
+
+ r600_nomm_get_vb(dev);
+
+ dev_priv->blit_vb->file_priv = file_priv;
+
+ set_default_state(dev_priv);
+ set_shaders(dev);
+
+ return 0;
+}
+
+
+void
+r600_done_blit_copy(struct drm_device *dev)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ RING_LOCALS;
+ DRM_DEBUG("\n");
+
+ BEGIN_RING(5);
+ OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+ OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
+ /* wait for 3D idle clean */
+ OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+ OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
+ OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
+
+ ADVANCE_RING();
+ COMMIT_RING();
+
+ r600_nomm_put_vb(dev);
+}
+
+void
+r600_blit_copy(struct drm_device *dev,
+ uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+ int size_bytes)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ int max_bytes;
+ u64 vb_addr;
+ u32 *vb;
+
+ vb = r600_nomm_get_vb_ptr(dev);
+
+ if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
+ max_bytes = 8192;
+
+ while (size_bytes) {
+ int cur_size = size_bytes;
+ int src_x = src_gpu_addr & 255;
+ int dst_x = dst_gpu_addr & 255;
+ int h = 1;
+ src_gpu_addr = src_gpu_addr & ~255;
+ dst_gpu_addr = dst_gpu_addr & ~255;
+
+ if (!src_x && !dst_x) {
+ h = (cur_size / max_bytes);
+ if (h > 8192)
+ h = 8192;
+ if (h == 0)
+ h = 1;
+ else
+ cur_size = max_bytes;
+ } else {
+ if (cur_size > max_bytes)
+ cur_size = max_bytes;
+ if (cur_size > (max_bytes - dst_x))
+ cur_size = (max_bytes - dst_x);
+ if (cur_size > (max_bytes - src_x))
+ cur_size = (max_bytes - src_x);
+ }
+
+ if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+
+ r600_nomm_put_vb(dev);
+ r600_nomm_get_vb(dev);
+ if (!dev_priv->blit_vb)
+ return;
+ set_shaders(dev);
+ vb = r600_nomm_get_vb_ptr(dev);
+ }
+
+ vb[0] = i2f(dst_x);
+ vb[1] = 0;
+ vb[2] = i2f(src_x);
+ vb[3] = 0;
+
+ vb[4] = i2f(dst_x);
+ vb[5] = i2f(h);
+ vb[6] = i2f(src_x);
+ vb[7] = i2f(h);
+
+ vb[8] = i2f(dst_x + cur_size);
+ vb[9] = i2f(h);
+ vb[10] = i2f(src_x + cur_size);
+ vb[11] = i2f(h);
+
+ /* src */
+ set_tex_resource(dev_priv, FMT_8,
+ src_x + cur_size, h, src_x + cur_size,
+ src_gpu_addr);
+
+ cp_set_surface_sync(dev_priv,
+ R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+ /* dst */
+ set_render_target(dev_priv, COLOR_8,
+ dst_x + cur_size, h,
+ dst_gpu_addr);
+
+ /* scissors */
+ set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
+
+ /* Vertex buffer setup */
+ vb_addr = dev_priv->gart_buffers_offset +
+ dev_priv->blit_vb->offset +
+ dev_priv->blit_vb->used;
+ set_vtx_resource(dev_priv, vb_addr);
+
+ /* draw */
+ draw_auto(dev_priv);
+
+ cp_set_surface_sync(dev_priv,
+ R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+ cur_size * h, dst_gpu_addr);
+
+ vb += 12;
+ dev_priv->blit_vb->used += 12 * 4;
+
+ src_gpu_addr += cur_size * h;
+ dst_gpu_addr += cur_size * h;
+ size_bytes -= cur_size * h;
+ }
+ } else {
+ max_bytes = 8192 * 4;
+
+ while (size_bytes) {
+ int cur_size = size_bytes;
+ int src_x = (src_gpu_addr & 255);
+ int dst_x = (dst_gpu_addr & 255);
+ int h = 1;
+ src_gpu_addr = src_gpu_addr & ~255;
+ dst_gpu_addr = dst_gpu_addr & ~255;
+
+ if (!src_x && !dst_x) {
+ h = (cur_size / max_bytes);
+ if (h > 8192)
+ h = 8192;
+ if (h == 0)
+ h = 1;
+ else
+ cur_size = max_bytes;
+ } else {
+ if (cur_size > max_bytes)
+ cur_size = max_bytes;
+ if (cur_size > (max_bytes - dst_x))
+ cur_size = (max_bytes - dst_x);
+ if (cur_size > (max_bytes - src_x))
+ cur_size = (max_bytes - src_x);
+ }
+
+ if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+ r600_nomm_put_vb(dev);
+ r600_nomm_get_vb(dev);
+ if (!dev_priv->blit_vb)
+ return;
+
+ set_shaders(dev);
+ vb = r600_nomm_get_vb_ptr(dev);
+ }
+
+ vb[0] = i2f(dst_x / 4);
+ vb[1] = 0;
+ vb[2] = i2f(src_x / 4);
+ vb[3] = 0;
+
+ vb[4] = i2f(dst_x / 4);
+ vb[5] = i2f(h);
+ vb[6] = i2f(src_x / 4);
+ vb[7] = i2f(h);
+
+ vb[8] = i2f((dst_x + cur_size) / 4);
+ vb[9] = i2f(h);
+ vb[10] = i2f((src_x + cur_size) / 4);
+ vb[11] = i2f(h);
+
+ /* src */
+ set_tex_resource(dev_priv, FMT_8_8_8_8,
+ (src_x + cur_size) / 4,
+ h, (src_x + cur_size) / 4,
+ src_gpu_addr);
+
+ cp_set_surface_sync(dev_priv,
+ R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+ /* dst */
+ set_render_target(dev_priv, COLOR_8_8_8_8,
+ dst_x + cur_size, h,
+ dst_gpu_addr);
+
+ /* scissors */
+ set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
+
+ /* Vertex buffer setup */
+ vb_addr = dev_priv->gart_buffers_offset +
+ dev_priv->blit_vb->offset +
+ dev_priv->blit_vb->used;
+ set_vtx_resource(dev_priv, vb_addr);
+
+ /* draw */
+ draw_auto(dev_priv);
+
+ cp_set_surface_sync(dev_priv,
+ R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+ cur_size * h, dst_gpu_addr);
+
+ vb += 12;
+ dev_priv->blit_vb->used += 12 * 4;
+
+ src_gpu_addr += cur_size * h;
+ dst_gpu_addr += cur_size * h;
+ size_bytes -= cur_size * h;
+ }
+ }
+}
+
+void
+r600_blit_swap(struct drm_device *dev,
+ uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+ int sx, int sy, int dx, int dy,
+ int w, int h, int src_pitch, int dst_pitch, int cpp)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ int cb_format, tex_format;
+ u64 vb_addr;
+ u32 *vb;
+
+ vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
+ dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
+
+ if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+
+ r600_nomm_put_vb(dev);
+ r600_nomm_get_vb(dev);
+ if (!dev_priv->blit_vb)
+ return;
+
+ set_shaders(dev);
+ vb = r600_nomm_get_vb_ptr(dev);
+ }
+
+ if (cpp == 4) {
+ cb_format = COLOR_8_8_8_8;
+ tex_format = FMT_8_8_8_8;
+ } else if (cpp == 2) {
+ cb_format = COLOR_5_6_5;
+ tex_format = FMT_5_6_5;
+ } else {
+ cb_format = COLOR_8;
+ tex_format = FMT_8;
+ }
+
+ vb[0] = i2f(dx);
+ vb[1] = i2f(dy);
+ vb[2] = i2f(sx);
+ vb[3] = i2f(sy);
+
+ vb[4] = i2f(dx);
+ vb[5] = i2f(dy + h);
+ vb[6] = i2f(sx);
+ vb[7] = i2f(sy + h);
+
+ vb[8] = i2f(dx + w);
+ vb[9] = i2f(dy + h);
+ vb[10] = i2f(sx + w);
+ vb[11] = i2f(sy + h);
+
+ /* src */
+ set_tex_resource(dev_priv, tex_format,
+ src_pitch / cpp,
+ sy + h, src_pitch / cpp,
+ src_gpu_addr);
+
+ cp_set_surface_sync(dev_priv,
+ R600_TC_ACTION_ENA, (src_pitch * (sy + h)), src_gpu_addr);
+
+ /* dst */
+ set_render_target(dev_priv, cb_format,
+ dst_pitch / cpp, dy + h,
+ dst_gpu_addr);
+
+ /* scissors */
+ set_scissors(dev_priv, dx, dy, dx + w, dy + h);
+
+ /* Vertex buffer setup */
+ vb_addr = dev_priv->gart_buffers_offset +
+ dev_priv->blit_vb->offset +
+ dev_priv->blit_vb->used;
+ set_vtx_resource(dev_priv, vb_addr);
+
+ /* draw */
+ draw_auto(dev_priv);
+
+ cp_set_surface_sync(dev_priv,
+ R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+ dst_pitch * (dy + h), dst_gpu_addr);
+
+ dev_priv->blit_vb->used += 12 * 4;
+}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
new file mode 100644
index 0000000..5755647
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -0,0 +1,777 @@
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+#include "r600d.h"
+#include "r600_blit_shaders.h"
+
+#define DI_PT_RECTLIST 0x11
+#define DI_INDEX_SIZE_16_BIT 0x0
+#define DI_SRC_SEL_AUTO_INDEX 0x2
+
+#define FMT_8 0x1
+#define FMT_5_6_5 0x8
+#define FMT_8_8_8_8 0x1a
+#define COLOR_8 0x1
+#define COLOR_5_6_5 0x8
+#define COLOR_8_8_8_8 0x1a
+
+/* emits 21 on rv770+, 23 on r600 */
+static void
+set_render_target(struct radeon_device *rdev, int format,
+ int w, int h, u64 gpu_addr)
+{
+ u32 cb_color_info;
+ int pitch, slice;
+
+ h = (h + 7) & ~7;
+ if (h < 8)
+ h = 8;
+
+ cb_color_info = ((format << 2) | (1 << 27));
+ pitch = (w / 8) - 1;
+ slice = ((w * h) / 64) - 1;
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, gpu_addr >> 8);
+
+ if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
+ radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
+ radeon_ring_write(rdev, 2 << 0);
+ }
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, (pitch << 0) | (slice << 10));
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, 0);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, cb_color_info);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, 0);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, 0);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, 0);
+}
+
+/* emits 5dw */
+static void
+cp_set_surface_sync(struct radeon_device *rdev,
+ u32 sync_type, u32 size,
+ u64 mc_addr)
+{
+ u32 cp_coher_size;
+
+ if (size == 0xffffffff)
+ cp_coher_size = 0xffffffff;
+ else
+ cp_coher_size = ((size + 255) >> 8);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
+ radeon_ring_write(rdev, sync_type);
+ radeon_ring_write(rdev, cp_coher_size);
+ radeon_ring_write(rdev, mc_addr >> 8);
+ radeon_ring_write(rdev, 10); /* poll interval */
+}
+
+/* emits 21dw + 1 surface sync = 26dw */
+static void
+set_shaders(struct radeon_device *rdev)
+{
+ u64 gpu_addr;
+ u32 sq_pgm_resources;
+
+ /* setup shader regs */
+ sq_pgm_resources = (1 << 0);
+
+ /* VS */
+ gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, gpu_addr >> 8);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, sq_pgm_resources);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, 0);
+
+ /* PS */
+ gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, gpu_addr >> 8);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, sq_pgm_resources | (1 << 28));
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, 2);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, 0);
+
+ cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
+}
+
+/* emits 9 + 1 sync (5) = 14*/
+static void
+set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
+{
+ u32 sq_vtx_constant_word2;
+
+ sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
+ radeon_ring_write(rdev, 0x460);
+ radeon_ring_write(rdev, gpu_addr & 0xffffffff);
+ radeon_ring_write(rdev, 48 - 1);
+ radeon_ring_write(rdev, sq_vtx_constant_word2);
+ radeon_ring_write(rdev, 1 << 0);
+ radeon_ring_write(rdev, 0);
+ radeon_ring_write(rdev, 0);
+ radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
+
+ if ((rdev->family == CHIP_RV610) ||
+ (rdev->family == CHIP_RV620) ||
+ (rdev->family == CHIP_RS780) ||
+ (rdev->family == CHIP_RS880) ||
+ (rdev->family == CHIP_RV710))
+ cp_set_surface_sync(rdev,
+ PACKET3_TC_ACTION_ENA, 48, gpu_addr);
+ else
+ cp_set_surface_sync(rdev,
+ PACKET3_VC_ACTION_ENA, 48, gpu_addr);
+}
+
+/* emits 9 */
+static void
+set_tex_resource(struct radeon_device *rdev,
+ int format, int w, int h, int pitch,
+ u64 gpu_addr)
+{
+ uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+
+ if (h < 1)
+ h = 1;
+
+ sq_tex_resource_word0 = (1 << 0);
+ sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
+ ((w - 1) << 19));
+
+ sq_tex_resource_word1 = (format << 26);
+ sq_tex_resource_word1 |= ((h - 1) << 0);
+
+ sq_tex_resource_word4 = ((1 << 14) |
+ (0 << 16) |
+ (1 << 19) |
+ (2 << 22) |
+ (3 << 25));
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
+ radeon_ring_write(rdev, 0);
+ radeon_ring_write(rdev, sq_tex_resource_word0);
+ radeon_ring_write(rdev, sq_tex_resource_word1);
+ radeon_ring_write(rdev, gpu_addr >> 8);
+ radeon_ring_write(rdev, gpu_addr >> 8);
+ radeon_ring_write(rdev, sq_tex_resource_word4);
+ radeon_ring_write(rdev, 0);
+ radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30);
+}
+
+/* emits 12 */
+static void
+set_scissors(struct radeon_device *rdev, int x1, int y1,
+ int x2, int y2)
+{
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
+ radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
+ radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
+ radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+}
+
+/* emits 10 */
+static void
+draw_auto(struct radeon_device *rdev)
+{
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, DI_PT_RECTLIST);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
+ radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
+ radeon_ring_write(rdev, 1);
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
+ radeon_ring_write(rdev, 3);
+ radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
+
+}
+
+/* emits 14 */
+static void
+set_default_state(struct radeon_device *rdev)
+{
+ u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+ u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+ int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
+ int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
+ int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
+ u64 gpu_addr;
+
+ switch (rdev->family) {
+ case CHIP_R600:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 40;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ default:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV670:
+ num_ps_gprs = 144;
+ num_vs_gprs = 40;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 136;
+ num_vs_threads = 48;
+ num_gs_threads = 4;
+ num_es_threads = 4;
+ num_ps_stack_entries = 40;
+ num_vs_stack_entries = 40;
+ num_gs_stack_entries = 32;
+ num_es_stack_entries = 16;
+ break;
+ case CHIP_RV770:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 256;
+ num_vs_stack_entries = 256;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV730:
+ case CHIP_RV740:
+ num_ps_gprs = 84;
+ num_vs_gprs = 36;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 188;
+ num_vs_threads = 60;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ case CHIP_RV710:
+ num_ps_gprs = 192;
+ num_vs_gprs = 56;
+ num_temp_gprs = 4;
+ num_gs_gprs = 0;
+ num_es_gprs = 0;
+ num_ps_threads = 144;
+ num_vs_threads = 48;
+ num_gs_threads = 0;
+ num_es_threads = 0;
+ num_ps_stack_entries = 128;
+ num_vs_stack_entries = 128;
+ num_gs_stack_entries = 0;
+ num_es_stack_entries = 0;
+ break;
+ }
+
+ if ((rdev->family == CHIP_RV610) ||
+ (rdev->family == CHIP_RV620) ||
+ (rdev->family == CHIP_RS780) ||
+ (rdev->family == CHIP_RS780) ||
+ (rdev->family == CHIP_RV710))
+ sq_config = 0;
+ else
+ sq_config = VC_ENABLE;
+
+ sq_config |= (DX9_CONSTS |
+ ALU_INST_PREFER_VECTOR |
+ PS_PRIO(0) |
+ VS_PRIO(1) |
+ GS_PRIO(2) |
+ ES_PRIO(3));
+
+ sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
+ NUM_VS_GPRS(num_vs_gprs) |
+ NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+ sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
+ NUM_ES_GPRS(num_es_gprs));
+ sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
+ NUM_VS_THREADS(num_vs_threads) |
+ NUM_GS_THREADS(num_gs_threads) |
+ NUM_ES_THREADS(num_es_threads));
+ sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+ NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+ sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+ NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+
+ /* emit an IB pointing at default state */
+ gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
+ radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
+ radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
+ radeon_ring_write(rdev, (rdev->r600_blit.state_len / 4));
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+ radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+ /* SQ config */
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
+ radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, sq_config);
+ radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
+ radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
+ radeon_ring_write(rdev, sq_thread_resource_mgmt);
+ radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
+ radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
+}
+
+static inline uint32_t i2f(uint32_t input)
+{
+ u32 result, i, exponent, fraction;
+
+ if ((input & 0x3fff) == 0)
+ result = 0; /* 0 is a special case */
+ else {
+ exponent = 140; /* exponent biased by 127; */
+ fraction = (input & 0x3fff) << 10; /* cheat and only
+ handle numbers below 2^^15 */
+ for (i = 0; i < 14; i++) {
+ if (fraction & 0x800000)
+ break;
+ else {
+ fraction = fraction << 1; /* keep
+ shifting left until top bit = 1 */
+ exponent = exponent - 1;
+ }
+ }
+ result = exponent << 23 | (fraction & 0x7fffff); /* mask
+ off top bit; assumed 1 */
+ }
+ return result;
+}
+
+int r600_blit_init(struct radeon_device *rdev)
+{
+ u32 obj_size;
+ int r;
+ void *ptr;
+
+ rdev->r600_blit.state_offset = 0;
+
+ if (rdev->family >= CHIP_RV770)
+ rdev->r600_blit.state_len = r7xx_default_size * 4;
+ else
+ rdev->r600_blit.state_len = r6xx_default_size * 4;
+
+ obj_size = rdev->r600_blit.state_len;
+ obj_size = ALIGN(obj_size, 256);
+
+ rdev->r600_blit.vs_offset = obj_size;
+ obj_size += r6xx_vs_size * 4;
+ obj_size = ALIGN(obj_size, 256);
+
+ rdev->r600_blit.ps_offset = obj_size;
+ obj_size += r6xx_ps_size * 4;
+ obj_size = ALIGN(obj_size, 256);
+
+ r = radeon_object_create(rdev, NULL, obj_size,
+ true, RADEON_GEM_DOMAIN_VRAM,
+ false, &rdev->r600_blit.shader_obj);
+ if (r) {
+ DRM_ERROR("r600 failed to allocate shader\n");
+ return r;
+ }
+
+ r = radeon_object_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
+ &rdev->r600_blit.shader_gpu_addr);
+ if (r) {
+ DRM_ERROR("failed to pin blit object %d\n", r);
+ return r;
+ }
+
+ DRM_DEBUG("r6xx blit allocated bo @ 0x%16llx %08x vs %08x ps %08x\n",
+ rdev->r600_blit.shader_gpu_addr, obj_size,
+ rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
+
+ r = radeon_object_kmap(rdev->r600_blit.shader_obj, &ptr);
+ if (r) {
+ DRM_ERROR("failed to map blit object %d\n", r);
+ return r;
+ }
+
+ if (rdev->family >= CHIP_RV770)
+ memcpy_toio(ptr + rdev->r600_blit.state_offset, r7xx_default_state, rdev->r600_blit.state_len);
+ else
+ memcpy_toio(ptr + rdev->r600_blit.state_offset, r6xx_default_state, rdev->r600_blit.state_len);
+
+ memcpy(ptr + rdev->r600_blit.vs_offset, r6xx_vs, r6xx_vs_size * 4);
+ memcpy(ptr + rdev->r600_blit.ps_offset, r6xx_ps, r6xx_ps_size * 4);
+
+ radeon_object_kunmap(rdev->r600_blit.shader_obj);
+ return 0;
+}
+
+void r600_blit_fini(struct radeon_device *rdev)
+{
+ radeon_object_unpin(rdev->r600_blit.shader_obj);
+ radeon_object_unref(&rdev->r600_blit.shader_obj);
+}
+
+int r600_vb_ib_get(struct radeon_device *rdev)
+{
+ int r;
+ r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
+ if (r) {
+ DRM_ERROR("failed to get IB for vertex buffer\n");
+ return r;
+ }
+
+ rdev->r600_blit.vb_total = 64*1024;
+ rdev->r600_blit.vb_used = 0;
+ return 0;
+}
+
+void r600_vb_ib_put(struct radeon_device *rdev)
+{
+ mutex_lock(&rdev->ib_pool.mutex);
+ radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
+ list_add_tail(&rdev->r600_blit.vb_ib->list, &rdev->ib_pool.scheduled_ibs);
+ mutex_unlock(&rdev->ib_pool.mutex);
+ radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
+}
+
+int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
+{
+ int r;
+ int ring_size;
+ const int max_size = 8192*8192;
+
+ r = r600_vb_ib_get(rdev);
+ WARN_ON(r);
+
+ /* loops of emits 64 + fence emit possible */
+ ring_size = ((size_bytes + max_size) / max_size) * 78;
+ /* set default + shaders */
+ ring_size += 40; /* shaders + def state */
+ ring_size += 3; /* fence emit for VB IB */
+ ring_size += 5; /* done copy */
+ ring_size += 3; /* fence emit for done copy */
+ r = radeon_ring_lock(rdev, ring_size);
+ WARN_ON(r);
+
+ set_default_state(rdev); /* 14 */
+ set_shaders(rdev); /* 26 */
+ return 0;
+}
+
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
+{
+ int r;
+
+ radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+ radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+ /* wait for 3D idle clean */
+ radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+ radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
+
+ if (rdev->r600_blit.vb_ib)
+ r600_vb_ib_put(rdev);
+
+ if (fence)
+ r = radeon_fence_emit(rdev, fence);
+
+ radeon_ring_unlock_commit(rdev);
+}
+
+void r600_kms_blit_copy(struct radeon_device *rdev,
+ u64 src_gpu_addr, u64 dst_gpu_addr,
+ int size_bytes)
+{
+ int max_bytes;
+ u64 vb_gpu_addr;
+ u32 *vb;
+
+ DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
+ size_bytes, rdev->r600_blit.vb_used);
+ vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
+ if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
+ max_bytes = 8192;
+
+ while (size_bytes) {
+ int cur_size = size_bytes;
+ int src_x = src_gpu_addr & 255;
+ int dst_x = dst_gpu_addr & 255;
+ int h = 1;
+ src_gpu_addr = src_gpu_addr & ~255;
+ dst_gpu_addr = dst_gpu_addr & ~255;
+
+ if (!src_x && !dst_x) {
+ h = (cur_size / max_bytes);
+ if (h > 8192)
+ h = 8192;
+ if (h == 0)
+ h = 1;
+ else
+ cur_size = max_bytes;
+ } else {
+ if (cur_size > max_bytes)
+ cur_size = max_bytes;
+ if (cur_size > (max_bytes - dst_x))
+ cur_size = (max_bytes - dst_x);
+ if (cur_size > (max_bytes - src_x))
+ cur_size = (max_bytes - src_x);
+ }
+
+ if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+ WARN_ON(1);
+
+#if 0
+ r600_vb_ib_put(rdev);
+
+ r600_nomm_put_vb(dev);
+ r600_nomm_get_vb(dev);
+ if (!dev_priv->blit_vb)
+ return;
+ set_shaders(dev);
+ vb = r600_nomm_get_vb_ptr(dev);
+#endif
+ }
+
+ vb[0] = i2f(dst_x);
+ vb[1] = 0;
+ vb[2] = i2f(src_x);
+ vb[3] = 0;
+
+ vb[4] = i2f(dst_x);
+ vb[5] = i2f(h);
+ vb[6] = i2f(src_x);
+ vb[7] = i2f(h);
+
+ vb[8] = i2f(dst_x + cur_size);
+ vb[9] = i2f(h);
+ vb[10] = i2f(src_x + cur_size);
+ vb[11] = i2f(h);
+
+ /* src 9 */
+ set_tex_resource(rdev, FMT_8,
+ src_x + cur_size, h, src_x + cur_size,
+ src_gpu_addr);
+
+ /* 5 */
+ cp_set_surface_sync(rdev,
+ PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+ /* dst 23 */
+ set_render_target(rdev, COLOR_8,
+ dst_x + cur_size, h,
+ dst_gpu_addr);
+
+ /* scissors 12 */
+ set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
+
+ /* 14 */
+ vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+ set_vtx_resource(rdev, vb_gpu_addr);
+
+ /* draw 10 */
+ draw_auto(rdev);
+
+ /* 5 */
+ cp_set_surface_sync(rdev,
+ PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+ cur_size * h, dst_gpu_addr);
+
+ vb += 12;
+ rdev->r600_blit.vb_used += 12 * 4;
+
+ src_gpu_addr += cur_size * h;
+ dst_gpu_addr += cur_size * h;
+ size_bytes -= cur_size * h;
+ }
+ } else {
+ max_bytes = 8192 * 4;
+
+ while (size_bytes) {
+ int cur_size = size_bytes;
+ int src_x = (src_gpu_addr & 255);
+ int dst_x = (dst_gpu_addr & 255);
+ int h = 1;
+ src_gpu_addr = src_gpu_addr & ~255;
+ dst_gpu_addr = dst_gpu_addr & ~255;
+
+ if (!src_x && !dst_x) {
+ h = (cur_size / max_bytes);
+ if (h > 8192)
+ h = 8192;
+ if (h == 0)
+ h = 1;
+ else
+ cur_size = max_bytes;
+ } else {
+ if (cur_size > max_bytes)
+ cur_size = max_bytes;
+ if (cur_size > (max_bytes - dst_x))
+ cur_size = (max_bytes - dst_x);
+ if (cur_size > (max_bytes - src_x))
+ cur_size = (max_bytes - src_x);
+ }
+
+ if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+ WARN_ON(1);
+ }
+#if 0
+ if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) {
+ r600_nomm_put_vb(dev);
+ r600_nomm_get_vb(dev);
+ if (!rdev->blit_vb)
+ return;
+
+ set_shaders(dev);
+ vb = r600_nomm_get_vb_ptr(dev);
+ }
+#endif
+
+ vb[0] = i2f(dst_x / 4);
+ vb[1] = 0;
+ vb[2] = i2f(src_x / 4);
+ vb[3] = 0;
+
+ vb[4] = i2f(dst_x / 4);
+ vb[5] = i2f(h);
+ vb[6] = i2f(src_x / 4);
+ vb[7] = i2f(h);
+
+ vb[8] = i2f((dst_x + cur_size) / 4);
+ vb[9] = i2f(h);
+ vb[10] = i2f((src_x + cur_size) / 4);
+ vb[11] = i2f(h);
+
+ /* src 9 */
+ set_tex_resource(rdev, FMT_8_8_8_8,
+ (src_x + cur_size) / 4,
+ h, (src_x + cur_size) / 4,
+ src_gpu_addr);
+ /* 5 */
+ cp_set_surface_sync(rdev,
+ PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+ /* dst 23 */
+ set_render_target(rdev, COLOR_8_8_8_8,
+ dst_x + cur_size, h,
+ dst_gpu_addr);
+
+ /* scissors 12 */
+ set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
+
+ /* Vertex buffer setup 14 */
+ vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+ set_vtx_resource(rdev, vb_gpu_addr);
+
+ /* draw 10 */
+ draw_auto(rdev);
+
+ /* 5 */
+ cp_set_surface_sync(rdev,
+ PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+ cur_size * h, dst_gpu_addr);
+
+ /* 78 ring dwords per loop */
+ vb += 12;
+ rdev->r600_blit.vb_used += 12 * 4;
+
+ src_gpu_addr += cur_size * h;
+ dst_gpu_addr += cur_size * h;
+ size_bytes -= cur_size * h;
+ }
+ }
+}
+
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c
new file mode 100644
index 0000000..d745e81
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.c
@@ -0,0 +1,1072 @@
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+const u32 r6xx_default_state[] =
+{
+ 0xc0002400,
+ 0x00000000,
+ 0xc0012800,
+ 0x80000000,
+ 0x80000000,
+ 0xc0004600,
+ 0x00000016,
+ 0xc0016800,
+ 0x00000010,
+ 0x00028000,
+ 0xc0016800,
+ 0x00000010,
+ 0x00008000,
+ 0xc0016800,
+ 0x00000542,
+ 0x07000003,
+ 0xc0016800,
+ 0x000005c5,
+ 0x00000000,
+ 0xc0016800,
+ 0x00000363,
+ 0x00000000,
+ 0xc0016800,
+ 0x0000060c,
+ 0x82000000,
+ 0xc0016800,
+ 0x0000060e,
+ 0x01020204,
+ 0xc0016f00,
+ 0x00000000,
+ 0x00000000,
+ 0xc0016f00,
+ 0x00000001,
+ 0x00000000,
+ 0xc0096900,
+ 0x0000022a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000004,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000000a,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000000b,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000010c,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000010d,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000200,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000343,
+ 0x00000060,
+ 0xc0016900,
+ 0x00000344,
+ 0x00000040,
+ 0xc0016900,
+ 0x00000351,
+ 0x0000aa00,
+ 0xc0016900,
+ 0x00000104,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000010e,
+ 0x00000000,
+ 0xc0046900,
+ 0x00000105,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xc0036900,
+ 0x00000109,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xc0046900,
+ 0x0000030c,
+ 0x01000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xc0046900,
+ 0x00000048,
+ 0x3f800000,
+ 0x00000000,
+ 0x3f800000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x0000008e,
+ 0x0000000f,
+ 0xc0016900,
+ 0x00000080,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000083,
+ 0x0000ffff,
+ 0xc0016900,
+ 0x00000084,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000085,
+ 0x20002000,
+ 0xc0016900,
+ 0x00000086,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000087,
+ 0x20002000,
+ 0xc0016900,
+ 0x00000088,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000089,
+ 0x20002000,
+ 0xc0016900,
+ 0x0000008a,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000008b,
+ 0x20002000,
+ 0xc0016900,
+ 0x0000008c,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000094,
+ 0x80000000,
+ 0xc0016900,
+ 0x00000095,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000b4,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x00000096,
+ 0x80000000,
+ 0xc0016900,
+ 0x00000097,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000b6,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x00000098,
+ 0x80000000,
+ 0xc0016900,
+ 0x00000099,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000b8,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x0000009a,
+ 0x80000000,
+ 0xc0016900,
+ 0x0000009b,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000ba,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x0000009c,
+ 0x80000000,
+ 0xc0016900,
+ 0x0000009d,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000bc,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x0000009e,
+ 0x80000000,
+ 0xc0016900,
+ 0x0000009f,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000be,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a0,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a1,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c0,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a2,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a3,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c2,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a4,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a5,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c4,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a6,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a7,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c6,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a8,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a9,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c8,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000aa,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000ab,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000ca,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000ac,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000ad,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000cc,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000ae,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000af,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000ce,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000b0,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000b1,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000d0,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000b2,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000b3,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000d2,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x00000293,
+ 0x00004010,
+ 0xc0016900,
+ 0x00000300,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000301,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000312,
+ 0xffffffff,
+ 0xc0016900,
+ 0x00000307,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000308,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000283,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000292,
+ 0x00000000,
+ 0xc0066900,
+ 0x0000010f,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000206,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000207,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000208,
+ 0x00000000,
+ 0xc0046900,
+ 0x00000303,
+ 0x3f800000,
+ 0x3f800000,
+ 0x3f800000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x00000205,
+ 0x00000004,
+ 0xc0016900,
+ 0x00000280,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000281,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000037e,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000382,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000380,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000383,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000381,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000282,
+ 0x00000008,
+ 0xc0016900,
+ 0x00000302,
+ 0x0000002d,
+ 0xc0016900,
+ 0x0000037f,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b2,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b6,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b7,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b8,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b9,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000225,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000229,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000237,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000100,
+ 0x00000800,
+ 0xc0016900,
+ 0x00000101,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000102,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002a8,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002a9,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000103,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000284,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000290,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000285,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000286,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000287,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000288,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000289,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028a,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028b,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028c,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028d,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028e,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028f,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002a1,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002a5,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002ac,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002ad,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002ae,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002c8,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000206,
+ 0x00000100,
+ 0xc0016900,
+ 0x00000204,
+ 0x00010000,
+ 0xc0036e00,
+ 0x00000000,
+ 0x00000012,
+ 0x00000000,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000008f,
+ 0x0000000f,
+ 0xc0016900,
+ 0x000001e8,
+ 0x00000001,
+ 0xc0016900,
+ 0x00000202,
+ 0x00cc0000,
+ 0xc0016900,
+ 0x00000205,
+ 0x00000244,
+ 0xc0016900,
+ 0x00000203,
+ 0x00000210,
+ 0xc0016900,
+ 0x000001b1,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000185,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b3,
+ 0x00000001,
+ 0xc0016900,
+ 0x000001b4,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000191,
+ 0x00000b00,
+ 0xc0016900,
+ 0x000001b5,
+ 0x00000000,
+};
+
+const u32 r7xx_default_state[] =
+{
+ 0xc0012800,
+ 0x80000000,
+ 0x80000000,
+ 0xc0004600,
+ 0x00000016,
+ 0xc0016800,
+ 0x00000010,
+ 0x00028000,
+ 0xc0016800,
+ 0x00000010,
+ 0x00008000,
+ 0xc0016800,
+ 0x00000542,
+ 0x07000002,
+ 0xc0016800,
+ 0x000005c5,
+ 0x00000000,
+ 0xc0016800,
+ 0x00000363,
+ 0x00004000,
+ 0xc0016800,
+ 0x0000060c,
+ 0x00000000,
+ 0xc0016800,
+ 0x0000060e,
+ 0x00420204,
+ 0xc0016f00,
+ 0x00000000,
+ 0x00000000,
+ 0xc0016f00,
+ 0x00000001,
+ 0x00000000,
+ 0xc0096900,
+ 0x0000022a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000004,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000000a,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000000b,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000010c,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000010d,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000200,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000343,
+ 0x00000060,
+ 0xc0016900,
+ 0x00000344,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000351,
+ 0x0000aa00,
+ 0xc0016900,
+ 0x00000104,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000010e,
+ 0x00000000,
+ 0xc0046900,
+ 0x00000105,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xc0046900,
+ 0x0000030c,
+ 0x01000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000008e,
+ 0x0000000f,
+ 0xc0016900,
+ 0x00000080,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000083,
+ 0x0000ffff,
+ 0xc0016900,
+ 0x00000084,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000085,
+ 0x20002000,
+ 0xc0016900,
+ 0x00000086,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000087,
+ 0x20002000,
+ 0xc0016900,
+ 0x00000088,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000089,
+ 0x20002000,
+ 0xc0016900,
+ 0x0000008a,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000008b,
+ 0x20002000,
+ 0xc0016900,
+ 0x0000008c,
+ 0xaaaaaaaa,
+ 0xc0016900,
+ 0x00000094,
+ 0x80000000,
+ 0xc0016900,
+ 0x00000095,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000b4,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x00000096,
+ 0x80000000,
+ 0xc0016900,
+ 0x00000097,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000b6,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x00000098,
+ 0x80000000,
+ 0xc0016900,
+ 0x00000099,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000b8,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x0000009a,
+ 0x80000000,
+ 0xc0016900,
+ 0x0000009b,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000ba,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x0000009c,
+ 0x80000000,
+ 0xc0016900,
+ 0x0000009d,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000bc,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x0000009e,
+ 0x80000000,
+ 0xc0016900,
+ 0x0000009f,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000be,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a0,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a1,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c0,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a2,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a3,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c2,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a4,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a5,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c4,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a6,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a7,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c6,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000a8,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000a9,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000c8,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000aa,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000ab,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000ca,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000ac,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000ad,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000cc,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000ae,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000af,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000ce,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000b0,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000b1,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000d0,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x000000b2,
+ 0x80000000,
+ 0xc0016900,
+ 0x000000b3,
+ 0x20002000,
+ 0xc0026900,
+ 0x000000d2,
+ 0x00000000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x00000293,
+ 0x00514000,
+ 0xc0016900,
+ 0x00000300,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000301,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000312,
+ 0xffffffff,
+ 0xc0016900,
+ 0x00000307,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000308,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000283,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000292,
+ 0x00000000,
+ 0xc0066900,
+ 0x0000010f,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000206,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000207,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000208,
+ 0x00000000,
+ 0xc0046900,
+ 0x00000303,
+ 0x3f800000,
+ 0x3f800000,
+ 0x3f800000,
+ 0x3f800000,
+ 0xc0016900,
+ 0x00000205,
+ 0x00000004,
+ 0xc0016900,
+ 0x00000280,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000281,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000037e,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000382,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000380,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000383,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000381,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000282,
+ 0x00000008,
+ 0xc0016900,
+ 0x00000302,
+ 0x0000002d,
+ 0xc0016900,
+ 0x0000037f,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b2,
+ 0x00000001,
+ 0xc0016900,
+ 0x000001b6,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b7,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b8,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b9,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000225,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000229,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000237,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000100,
+ 0x00000800,
+ 0xc0016900,
+ 0x00000101,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000102,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002a8,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002a9,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000103,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000284,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000290,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000285,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000286,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000287,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000288,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000289,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028a,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028b,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028c,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028d,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028e,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000028f,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002a1,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002a5,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002ac,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002ad,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002ae,
+ 0x00000000,
+ 0xc0016900,
+ 0x000002c8,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000206,
+ 0x00000100,
+ 0xc0016900,
+ 0x00000204,
+ 0x00010000,
+ 0xc0036e00,
+ 0x00000000,
+ 0x00000012,
+ 0x00000000,
+ 0x00000000,
+ 0xc0016900,
+ 0x0000008f,
+ 0x0000000f,
+ 0xc0016900,
+ 0x000001e8,
+ 0x00000001,
+ 0xc0016900,
+ 0x00000202,
+ 0x00cc0000,
+ 0xc0016900,
+ 0x00000205,
+ 0x00000244,
+ 0xc0016900,
+ 0x00000203,
+ 0x00000210,
+ 0xc0016900,
+ 0x000001b1,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000185,
+ 0x00000000,
+ 0xc0016900,
+ 0x000001b3,
+ 0x00000001,
+ 0xc0016900,
+ 0x000001b4,
+ 0x00000000,
+ 0xc0016900,
+ 0x00000191,
+ 0x00000b00,
+ 0xc0016900,
+ 0x000001b5,
+ 0x00000000,
+};
+
+/* same for r6xx/r7xx */
+const u32 r6xx_vs[] =
+{
+ 0x00000004,
+ 0x81000000,
+ 0x0000203c,
+ 0x94000b08,
+ 0x00004000,
+ 0x14200b1a,
+ 0x00000000,
+ 0x00000000,
+ 0x3c000000,
+ 0x68cd1000,
+ 0x00080000,
+ 0x00000000,
+};
+
+const u32 r6xx_ps[] =
+{
+ 0x00000002,
+ 0x80800000,
+ 0x00000000,
+ 0x94200688,
+ 0x00000010,
+ 0x000d1000,
+ 0xb0800000,
+ 0x00000000,
+};
+
+const u32 r6xx_ps_size = ARRAY_SIZE(r6xx_ps);
+const u32 r6xx_vs_size = ARRAY_SIZE(r6xx_vs);
+const u32 r6xx_default_size = ARRAY_SIZE(r6xx_default_state);
+const u32 r7xx_default_size = ARRAY_SIZE(r7xx_default_state);
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h
new file mode 100644
index 0000000..fdc3b37
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.h
@@ -0,0 +1,14 @@
+
+#ifndef R600_BLIT_SHADERS_H
+#define R600_BLIT_SHADERS_H
+
+extern const u32 r6xx_ps[];
+extern const u32 r6xx_vs[];
+extern const u32 r7xx_default_state[];
+extern const u32 r6xx_default_state[];
+
+
+extern const u32 r6xx_ps_size, r6xx_vs_size;
+extern const u32 r6xx_default_size, r7xx_default_size;
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 8327912..6d5a711 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -58,6 +58,12 @@ MODULE_FIRMWARE("radeon/RV730_me.bin");
MODULE_FIRMWARE("radeon/RV710_pfp.bin");
MODULE_FIRMWARE("radeon/RV710_me.bin");
+
+int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
+ unsigned family, u32 *ib, int *l);
+void r600_cs_legacy_init(void);
+
+
# define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */
# define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1))
@@ -1857,6 +1863,8 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
DRM_DEBUG("\n");
+ mutex_init(&dev_priv->cs_mutex);
+ r600_cs_legacy_init();
/* if we require new memory map but we don't have it fail */
if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
@@ -1888,7 +1896,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
/* Enable vblank on CRTC1 for older X servers
*/
dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
-
+ dev_priv->do_boxes = 0;
dev_priv->cp_mode = init->cp_mode;
/* We don't support anything other than bus-mastering ring mode,
@@ -1974,11 +1982,11 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
} else
#endif
{
- dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset;
+ dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset;
dev_priv->ring_rptr->handle =
- (void *)dev_priv->ring_rptr->offset;
+ (void *)(unsigned long)dev_priv->ring_rptr->offset;
dev->agp_buffer_map->handle =
- (void *)dev->agp_buffer_map->offset;
+ (void *)(unsigned long)dev->agp_buffer_map->offset;
DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
dev_priv->cp_ring->handle);
@@ -2282,3 +2290,239 @@ int r600_cp_dispatch_indirect(struct drm_device *dev,
return 0;
}
+
+void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ struct drm_master *master = file_priv->master;
+ struct drm_radeon_master_private *master_priv = master->driver_priv;
+ drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
+ int nbox = sarea_priv->nbox;
+ struct drm_clip_rect *pbox = sarea_priv->boxes;
+ int i, cpp, src_pitch, dst_pitch;
+ uint64_t src, dst;
+ RING_LOCALS;
+ DRM_DEBUG("\n");
+
+ if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)
+ cpp = 4;
+ else
+ cpp = 2;
+
+ if (sarea_priv->pfCurrentPage == 0) {
+ src_pitch = dev_priv->back_pitch;
+ dst_pitch = dev_priv->front_pitch;
+ src = dev_priv->back_offset + dev_priv->fb_location;
+ dst = dev_priv->front_offset + dev_priv->fb_location;
+ } else {
+ src_pitch = dev_priv->front_pitch;
+ dst_pitch = dev_priv->back_pitch;
+ src = dev_priv->front_offset + dev_priv->fb_location;
+ dst = dev_priv->back_offset + dev_priv->fb_location;
+ }
+
+ if (r600_prepare_blit_copy(dev, file_priv)) {
+ DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
+ return;
+ }
+ for (i = 0; i < nbox; i++) {
+ int x = pbox[i].x1;
+ int y = pbox[i].y1;
+ int w = pbox[i].x2 - x;
+ int h = pbox[i].y2 - y;
+
+ DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
+
+ r600_blit_swap(dev,
+ src, dst,
+ x, y, x, y, w, h,
+ src_pitch, dst_pitch, cpp);
+ }
+ r600_done_blit_copy(dev);
+
+ /* Increment the frame counter. The client-side 3D driver must
+ * throttle the framerate by waiting for this value before
+ * performing the swapbuffer ioctl.
+ */
+ sarea_priv->last_frame++;
+
+ BEGIN_RING(3);
+ R600_FRAME_AGE(sarea_priv->last_frame);
+ ADVANCE_RING();
+}
+
+int r600_cp_dispatch_texture(struct drm_device *dev,
+ struct drm_file *file_priv,
+ drm_radeon_texture_t *tex,
+ drm_radeon_tex_image_t *image)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ struct drm_buf *buf;
+ u32 *buffer;
+ const u8 __user *data;
+ int size, pass_size;
+ u64 src_offset, dst_offset;
+
+ if (!radeon_check_offset(dev_priv, tex->offset)) {
+ DRM_ERROR("Invalid destination offset\n");
+ return -EINVAL;
+ }
+
+ /* this might fail for zero-sized uploads - are those illegal? */
+ if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {
+ DRM_ERROR("Invalid final destination offset\n");
+ return -EINVAL;
+ }
+
+ size = tex->height * tex->pitch;
+
+ if (size == 0)
+ return 0;
+
+ dst_offset = tex->offset;
+
+ if (r600_prepare_blit_copy(dev, file_priv)) {
+ DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
+ return -EAGAIN;
+ }
+ do {
+ data = (const u8 __user *)image->data;
+ pass_size = size;
+
+ buf = radeon_freelist_get(dev);
+ if (!buf) {
+ DRM_DEBUG("EAGAIN\n");
+ if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
+ return -EFAULT;
+ return -EAGAIN;
+ }
+
+ if (pass_size > buf->total)
+ pass_size = buf->total;
+
+ /* Dispatch the indirect buffer.
+ */
+ buffer =
+ (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
+
+ if (DRM_COPY_FROM_USER(buffer, data, pass_size)) {
+ DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);
+ return -EFAULT;
+ }
+
+ buf->file_priv = file_priv;
+ buf->used = pass_size;
+ src_offset = dev_priv->gart_buffers_offset + buf->offset;
+
+ r600_blit_copy(dev, src_offset, dst_offset, pass_size);
+
+ radeon_cp_discard_buffer(dev, file_priv->master, buf);
+
+ /* Update the input parameters for next time */
+ image->data = (const u8 __user *)image->data + pass_size;
+ dst_offset += pass_size;
+ size -= pass_size;
+ } while (size > 0);
+ r600_done_blit_copy(dev);
+
+ return 0;
+}
+
+/*
+ * Legacy cs ioctl
+ */
+static u32 radeon_cs_id_get(struct drm_radeon_private *radeon)
+{
+ /* FIXME: check if wrap affect last reported wrap & sequence */
+ radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF;
+ if (!radeon->cs_id_scnt) {
+ /* increment wrap counter */
+ radeon->cs_id_wcnt += 0x01000000;
+ /* valid sequence counter start at 1 */
+ radeon->cs_id_scnt = 1;
+ }
+ return (radeon->cs_id_scnt | radeon->cs_id_wcnt);
+}
+
+static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id)
+{
+ RING_LOCALS;
+
+ *id = radeon_cs_id_get(dev_priv);
+
+ /* SCRATCH 2 */
+ BEGIN_RING(3);
+ R600_CLEAR_AGE(*id);
+ ADVANCE_RING();
+ COMMIT_RING();
+}
+
+static int r600_ib_get(struct drm_device *dev,
+ struct drm_file *fpriv,
+ struct drm_buf **buffer)
+{
+ struct drm_buf *buf;
+
+ *buffer = NULL;
+ buf = radeon_freelist_get(dev);
+ if (!buf) {
+ return -EBUSY;
+ }
+ buf->file_priv = fpriv;
+ *buffer = buf;
+ return 0;
+}
+
+static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf,
+ struct drm_file *fpriv, int l, int r)
+{
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+
+ if (buf) {
+ if (!r)
+ r600_cp_dispatch_indirect(dev, buf, 0, l * 4);
+ radeon_cp_discard_buffer(dev, fpriv->master, buf);
+ COMMIT_RING();
+ }
+}
+
+int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
+{
+ struct drm_radeon_private *dev_priv = dev->dev_private;
+ struct drm_radeon_cs *cs = data;
+ struct drm_buf *buf;
+ unsigned family;
+ int l, r = 0;
+ u32 *ib, cs_id = 0;
+
+ if (dev_priv == NULL) {
+ DRM_ERROR("called with no initialization\n");
+ return -EINVAL;
+ }
+ family = dev_priv->flags & RADEON_FAMILY_MASK;
+ if (family < CHIP_R600) {
+ DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n");
+ return -EINVAL;
+ }
+ mutex_lock(&dev_priv->cs_mutex);
+ /* get ib */
+ r = r600_ib_get(dev, fpriv, &buf);
+ if (r) {
+ DRM_ERROR("ib_get failed\n");
+ goto out;
+ }
+ ib = dev->agp_buffer_map->handle + buf->offset;
+ /* now parse command stream */
+ r = r600_cs_legacy(dev, data, fpriv, family, ib, &l);
+ if (r) {
+ goto out;
+ }
+
+out:
+ r600_ib_free(dev, buf, fpriv, l, r);
+ /* emit cs id sequence */
+ r600_cs_id_emit(dev_priv, &cs_id);
+ cs->cs_id = cs_id;
+ mutex_unlock(&dev_priv->cs_mutex);
+ return r;
+}
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
new file mode 100644
index 0000000..39bf634
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon.h"
+#include "radeon_share.h"
+#include "r600d.h"
+#include "avivod.h"
+
+static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc);
+static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc);
+typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**);
+static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm;
+
+/**
+ * r600_cs_packet_parse() - parse cp packet and point ib index to next packet
+ * @parser: parser structure holding parsing context.
+ * @pkt: where to store packet informations
+ *
+ * Assume that chunk_ib_index is properly set. Will return -EINVAL
+ * if packet is bigger than remaining ib size. or if packets is unknown.
+ **/
+int r600_cs_packet_parse(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ unsigned idx)
+{
+ struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ uint32_t header;
+
+ if (idx >= ib_chunk->length_dw) {
+ DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+ idx, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ header = ib_chunk->kdata[idx];
+ pkt->idx = idx;
+ pkt->type = CP_PACKET_GET_TYPE(header);
+ pkt->count = CP_PACKET_GET_COUNT(header);
+ pkt->one_reg_wr = 0;
+ switch (pkt->type) {
+ case PACKET_TYPE0:
+ pkt->reg = CP_PACKET0_GET_REG(header);
+ break;
+ case PACKET_TYPE3:
+ pkt->opcode = CP_PACKET3_GET_OPCODE(header);
+ break;
+ case PACKET_TYPE2:
+ pkt->count = -1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
+ return -EINVAL;
+ }
+ if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
+ DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
+ pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3
+ * @parser: parser structure holding parsing context.
+ * @data: pointer to relocation data
+ * @offset_start: starting offset
+ * @offset_mask: offset mask (to align start offset on)
+ * @reloc: reloc informations
+ *
+ * Check next packet is relocation packet3, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc)
+{
+ struct radeon_cs_chunk *ib_chunk;
+ struct radeon_cs_chunk *relocs_chunk;
+ struct radeon_cs_packet p3reloc;
+ unsigned idx;
+ int r;
+
+ if (p->chunk_relocs_idx == -1) {
+ DRM_ERROR("No relocation chunk !\n");
+ return -EINVAL;
+ }
+ *cs_reloc = NULL;
+ ib_chunk = &p->chunks[p->chunk_ib_idx];
+ relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ r = r600_cs_packet_parse(p, &p3reloc, p->idx);
+ if (r) {
+ return r;
+ }
+ p->idx += p3reloc.count + 2;
+ if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
+ DRM_ERROR("No packet3 for relocation for packet at %d.\n",
+ p3reloc.idx);
+ return -EINVAL;
+ }
+ idx = ib_chunk->kdata[p3reloc.idx + 1];
+ if (idx >= relocs_chunk->length_dw) {
+ DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+ idx, relocs_chunk->length_dw);
+ return -EINVAL;
+ }
+ /* FIXME: we assume reloc size is 4 dwords */
+ *cs_reloc = p->relocs_ptr[(idx / 4)];
+ return 0;
+}
+
+/**
+ * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3
+ * @parser: parser structure holding parsing context.
+ * @data: pointer to relocation data
+ * @offset_start: starting offset
+ * @offset_mask: offset mask (to align start offset on)
+ * @reloc: reloc informations
+ *
+ * Check next packet is relocation packet3, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc)
+{
+ struct radeon_cs_chunk *ib_chunk;
+ struct radeon_cs_chunk *relocs_chunk;
+ struct radeon_cs_packet p3reloc;
+ unsigned idx;
+ int r;
+
+ if (p->chunk_relocs_idx == -1) {
+ DRM_ERROR("No relocation chunk !\n");
+ return -EINVAL;
+ }
+ *cs_reloc = NULL;
+ ib_chunk = &p->chunks[p->chunk_ib_idx];
+ relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ r = r600_cs_packet_parse(p, &p3reloc, p->idx);
+ if (r) {
+ return r;
+ }
+ p->idx += p3reloc.count + 2;
+ if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
+ DRM_ERROR("No packet3 for relocation for packet at %d.\n",
+ p3reloc.idx);
+ return -EINVAL;
+ }
+ idx = ib_chunk->kdata[p3reloc.idx + 1];
+ if (idx >= relocs_chunk->length_dw) {
+ DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+ idx, relocs_chunk->length_dw);
+ return -EINVAL;
+ }
+ *cs_reloc = &p->relocs[0];
+ (*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32;
+ (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
+ return 0;
+}
+
+static int r600_packet0_check(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ unsigned idx, unsigned reg)
+{
+ switch (reg) {
+ case AVIVO_D1MODE_VLINE_START_END:
+ case AVIVO_D2MODE_VLINE_START_END:
+ break;
+ default:
+ printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
+ reg, idx);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt)
+{
+ unsigned reg, i;
+ unsigned idx;
+ int r;
+
+ idx = pkt->idx + 1;
+ reg = pkt->reg;
+ for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
+ r = r600_packet0_check(p, pkt, idx, reg);
+ if (r) {
+ return r;
+ }
+ }
+ return 0;
+}
+
+static int r600_packet3_check(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt)
+{
+ struct radeon_cs_chunk *ib_chunk;
+ struct radeon_cs_reloc *reloc;
+ volatile u32 *ib;
+ unsigned idx;
+ unsigned i;
+ unsigned start_reg, end_reg, reg;
+ int r;
+
+ ib = p->ib->ptr;
+ ib_chunk = &p->chunks[p->chunk_ib_idx];
+ idx = pkt->idx + 1;
+ switch (pkt->opcode) {
+ case PACKET3_START_3D_CMDBUF:
+ if (p->family >= CHIP_RV770 || pkt->count) {
+ DRM_ERROR("bad START_3D\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_CONTEXT_CONTROL:
+ if (pkt->count != 1) {
+ DRM_ERROR("bad CONTEXT_CONTROL\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_INDEX_TYPE:
+ case PACKET3_NUM_INSTANCES:
+ if (pkt->count) {
+ DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_DRAW_INDEX:
+ if (pkt->count != 3) {
+ DRM_ERROR("bad DRAW_INDEX\n");
+ return -EINVAL;
+ }
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad DRAW_INDEX\n");
+ return -EINVAL;
+ }
+ ib[idx+0] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+1] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+ break;
+ case PACKET3_DRAW_INDEX_AUTO:
+ if (pkt->count != 1) {
+ DRM_ERROR("bad DRAW_INDEX_AUTO\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_DRAW_INDEX_IMMD_BE:
+ case PACKET3_DRAW_INDEX_IMMD:
+ if (pkt->count < 2) {
+ DRM_ERROR("bad DRAW_INDEX_IMMD\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_WAIT_REG_MEM:
+ if (pkt->count != 5) {
+ DRM_ERROR("bad WAIT_REG_MEM\n");
+ return -EINVAL;
+ }
+ /* bit 4 is reg (0) or mem (1) */
+ if (ib_chunk->kdata[idx+0] & 0x10) {
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad WAIT_REG_MEM\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+ }
+ break;
+ case PACKET3_SURFACE_SYNC:
+ if (pkt->count != 3) {
+ DRM_ERROR("bad SURFACE_SYNC\n");
+ return -EINVAL;
+ }
+ /* 0xffffffff/0x0 is flush all cache flag */
+ if (ib_chunk->kdata[idx+1] != 0xffffffff ||
+ ib_chunk->kdata[idx+2] != 0) {
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad SURFACE_SYNC\n");
+ return -EINVAL;
+ }
+ ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+ }
+ break;
+ case PACKET3_EVENT_WRITE:
+ if (pkt->count != 2 && pkt->count != 0) {
+ DRM_ERROR("bad EVENT_WRITE\n");
+ return -EINVAL;
+ }
+ if (pkt->count) {
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad EVENT_WRITE\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+ }
+ break;
+ case PACKET3_EVENT_WRITE_EOP:
+ if (pkt->count != 4) {
+ DRM_ERROR("bad EVENT_WRITE_EOP\n");
+ return -EINVAL;
+ }
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad EVENT_WRITE\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+ break;
+ case PACKET3_SET_CONFIG_REG:
+ start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONFIG_REG_OFFSET;
+ end_reg = 4 * pkt->count + start_reg - 4;
+ if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) ||
+ (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
+ (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
+ DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
+ return -EINVAL;
+ }
+ for (i = 0; i < pkt->count; i++) {
+ reg = start_reg + (4 * i);
+ switch (reg) {
+ case CP_COHER_BASE:
+ /* use PACKET3_SURFACE_SYNC */
+ return -EINVAL;
+ default:
+ break;
+ }
+ }
+ break;
+ case PACKET3_SET_CONTEXT_REG:
+ start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONTEXT_REG_OFFSET;
+ end_reg = 4 * pkt->count + start_reg - 4;
+ if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) ||
+ (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
+ (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
+ DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
+ return -EINVAL;
+ }
+ for (i = 0; i < pkt->count; i++) {
+ reg = start_reg + (4 * i);
+ switch (reg) {
+ case DB_DEPTH_BASE:
+ case CB_COLOR0_BASE:
+ case CB_COLOR1_BASE:
+ case CB_COLOR2_BASE:
+ case CB_COLOR3_BASE:
+ case CB_COLOR4_BASE:
+ case CB_COLOR5_BASE:
+ case CB_COLOR6_BASE:
+ case CB_COLOR7_BASE:
+ case SQ_PGM_START_FS:
+ case SQ_PGM_START_ES:
+ case SQ_PGM_START_VS:
+ case SQ_PGM_START_GS:
+ case SQ_PGM_START_PS:
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad SET_CONTEXT_REG "
+ "0x%04X\n", reg);
+ return -EINVAL;
+ }
+ ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+ break;
+ case VGT_DMA_BASE:
+ case VGT_DMA_BASE_HI:
+ /* These should be handled by DRAW_INDEX packet 3 */
+ case VGT_STRMOUT_BASE_OFFSET_0:
+ case VGT_STRMOUT_BASE_OFFSET_1:
+ case VGT_STRMOUT_BASE_OFFSET_2:
+ case VGT_STRMOUT_BASE_OFFSET_3:
+ case VGT_STRMOUT_BASE_OFFSET_HI_0:
+ case VGT_STRMOUT_BASE_OFFSET_HI_1:
+ case VGT_STRMOUT_BASE_OFFSET_HI_2:
+ case VGT_STRMOUT_BASE_OFFSET_HI_3:
+ case VGT_STRMOUT_BUFFER_BASE_0:
+ case VGT_STRMOUT_BUFFER_BASE_1:
+ case VGT_STRMOUT_BUFFER_BASE_2:
+ case VGT_STRMOUT_BUFFER_BASE_3:
+ case VGT_STRMOUT_BUFFER_OFFSET_0:
+ case VGT_STRMOUT_BUFFER_OFFSET_1:
+ case VGT_STRMOUT_BUFFER_OFFSET_2:
+ case VGT_STRMOUT_BUFFER_OFFSET_3:
+ /* These should be handled by STRMOUT_BUFFER packet 3 */
+ DRM_ERROR("bad context reg: 0x%08x\n", reg);
+ return -EINVAL;
+ default:
+ break;
+ }
+ }
+ break;
+ case PACKET3_SET_RESOURCE:
+ if (pkt->count % 7) {
+ DRM_ERROR("bad SET_RESOURCE\n");
+ return -EINVAL;
+ }
+ start_reg = (ib[idx+0] << 2) + PACKET3_SET_RESOURCE_OFFSET;
+ end_reg = 4 * pkt->count + start_reg - 4;
+ if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) ||
+ (start_reg >= PACKET3_SET_RESOURCE_END) ||
+ (end_reg >= PACKET3_SET_RESOURCE_END)) {
+ DRM_ERROR("bad SET_RESOURCE\n");
+ return -EINVAL;
+ }
+ for (i = 0; i < (pkt->count / 7); i++) {
+ switch (G__SQ_VTX_CONSTANT_TYPE(ib[idx+(i*7)+6+1])) {
+ case SQ_TEX_VTX_VALID_TEXTURE:
+ /* tex base */
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad SET_RESOURCE\n");
+ return -EINVAL;
+ }
+ ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+ /* tex mip base */
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad SET_RESOURCE\n");
+ return -EINVAL;
+ }
+ ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+ break;
+ case SQ_TEX_VTX_VALID_BUFFER:
+ /* vtx base */
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad SET_RESOURCE\n");
+ return -EINVAL;
+ }
+ ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff);
+ ib[idx+1+(i*7)+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+ break;
+ case SQ_TEX_VTX_INVALID_TEXTURE:
+ case SQ_TEX_VTX_INVALID_BUFFER:
+ default:
+ DRM_ERROR("bad SET_RESOURCE\n");
+ return -EINVAL;
+ }
+ }
+ break;
+ case PACKET3_SET_ALU_CONST:
+ start_reg = (ib[idx+0] << 2) + PACKET3_SET_ALU_CONST_OFFSET;
+ end_reg = 4 * pkt->count + start_reg - 4;
+ if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) ||
+ (start_reg >= PACKET3_SET_ALU_CONST_END) ||
+ (end_reg >= PACKET3_SET_ALU_CONST_END)) {
+ DRM_ERROR("bad SET_ALU_CONST\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_SET_BOOL_CONST:
+ start_reg = (ib[idx+0] << 2) + PACKET3_SET_BOOL_CONST_OFFSET;
+ end_reg = 4 * pkt->count + start_reg - 4;
+ if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) ||
+ (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
+ (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
+ DRM_ERROR("bad SET_BOOL_CONST\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_SET_LOOP_CONST:
+ start_reg = (ib[idx+0] << 2) + PACKET3_SET_LOOP_CONST_OFFSET;
+ end_reg = 4 * pkt->count + start_reg - 4;
+ if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) ||
+ (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
+ (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
+ DRM_ERROR("bad SET_LOOP_CONST\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_SET_CTL_CONST:
+ start_reg = (ib[idx+0] << 2) + PACKET3_SET_CTL_CONST_OFFSET;
+ end_reg = 4 * pkt->count + start_reg - 4;
+ if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) ||
+ (start_reg >= PACKET3_SET_CTL_CONST_END) ||
+ (end_reg >= PACKET3_SET_CTL_CONST_END)) {
+ DRM_ERROR("bad SET_CTL_CONST\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_SET_SAMPLER:
+ if (pkt->count % 3) {
+ DRM_ERROR("bad SET_SAMPLER\n");
+ return -EINVAL;
+ }
+ start_reg = (ib[idx+0] << 2) + PACKET3_SET_SAMPLER_OFFSET;
+ end_reg = 4 * pkt->count + start_reg - 4;
+ if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) ||
+ (start_reg >= PACKET3_SET_SAMPLER_END) ||
+ (end_reg >= PACKET3_SET_SAMPLER_END)) {
+ DRM_ERROR("bad SET_SAMPLER\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_SURFACE_BASE_UPDATE:
+ if (p->family >= CHIP_RV770 || p->family == CHIP_R600) {
+ DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
+ return -EINVAL;
+ }
+ if (pkt->count) {
+ DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
+ return -EINVAL;
+ }
+ break;
+ case PACKET3_NOP:
+ break;
+ default:
+ DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int r600_cs_parse(struct radeon_cs_parser *p)
+{
+ struct radeon_cs_packet pkt;
+ int r;
+
+ do {
+ r = r600_cs_packet_parse(p, &pkt, p->idx);
+ if (r) {
+ return r;
+ }
+ p->idx += pkt.count + 2;
+ switch (pkt.type) {
+ case PACKET_TYPE0:
+ r = r600_cs_parse_packet0(p, &pkt);
+ break;
+ case PACKET_TYPE2:
+ break;
+ case PACKET_TYPE3:
+ r = r600_packet3_check(p, &pkt);
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d !\n", pkt.type);
+ return -EINVAL;
+ }
+ if (r) {
+ return r;
+ }
+ } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+ for (r = 0; r < p->ib->length_dw; r++) {
+ printk(KERN_INFO "%05d 0x%08X\n", r, p->ib->ptr[r]);
+ mdelay(1);
+ }
+#endif
+ return 0;
+}
+
+static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
+{
+ if (p->chunk_relocs_idx == -1) {
+ return 0;
+ }
+ p->relocs = kcalloc(1, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+ if (p->relocs == NULL) {
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser: parser structure holding parsing context.
+ * @error: error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
+{
+ unsigned i;
+
+ kfree(parser->relocs);
+ for (i = 0; i < parser->nchunks; i++) {
+ kfree(parser->chunks[i].kdata);
+ }
+ kfree(parser->chunks);
+ kfree(parser->chunks_array);
+}
+
+int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
+ unsigned family, u32 *ib, int *l)
+{
+ struct radeon_cs_parser parser;
+ struct radeon_cs_chunk *ib_chunk;
+ struct radeon_ib fake_ib;
+ int r;
+
+ /* initialize parser */
+ memset(&parser, 0, sizeof(struct radeon_cs_parser));
+ parser.filp = filp;
+ parser.rdev = NULL;
+ parser.family = family;
+ parser.ib = &fake_ib;
+ fake_ib.ptr = ib;
+ r = radeon_cs_parser_init(&parser, data);
+ if (r) {
+ DRM_ERROR("Failed to initialize parser !\n");
+ r600_cs_parser_fini(&parser, r);
+ return r;
+ }
+ r = r600_cs_parser_relocs_legacy(&parser);
+ if (r) {
+ DRM_ERROR("Failed to parse relocation !\n");
+ r600_cs_parser_fini(&parser, r);
+ return r;
+ }
+ /* Copy the packet into the IB, the parser will read from the
+ * input memory (cached) and write to the IB (which can be
+ * uncached). */
+ ib_chunk = &parser.chunks[parser.chunk_ib_idx];
+ parser.ib->length_dw = ib_chunk->length_dw;
+ memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4);
+ *l = parser.ib->length_dw;
+ r = r600_cs_parse(&parser);
+ if (r) {
+ DRM_ERROR("Invalid command stream !\n");
+ r600_cs_parser_fini(&parser, r);
+ return r;
+ }
+ r600_cs_parser_fini(&parser, r);
+ return r;
+}
+
+void r600_cs_legacy_init(void)
+{
+ r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
+}
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
new file mode 100644
index 0000000..723295f
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -0,0 +1,661 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+#ifndef R600D_H
+#define R600D_H
+
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define R6XX_MAX_SH_GPRS 256
+#define R6XX_MAX_TEMP_GPRS 16
+#define R6XX_MAX_SH_THREADS 256
+#define R6XX_MAX_SH_STACK_ENTRIES 4096
+#define R6XX_MAX_BACKENDS 8
+#define R6XX_MAX_BACKENDS_MASK 0xff
+#define R6XX_MAX_SIMDS 8
+#define R6XX_MAX_SIMDS_MASK 0xff
+#define R6XX_MAX_PIPES 8
+#define R6XX_MAX_PIPES_MASK 0xff
+
+/* PTE flags */
+#define PTE_VALID (1 << 0)
+#define PTE_SYSTEM (1 << 1)
+#define PTE_SNOOPED (1 << 2)
+#define PTE_READABLE (1 << 5)
+#define PTE_WRITEABLE (1 << 6)
+
+/* Registers */
+#define ARB_POP 0x2418
+#define ENABLE_TC128 (1 << 30)
+#define ARB_GDEC_RD_CNTL 0x246C
+
+#define CC_GC_SHADER_PIPE_CONFIG 0x8950
+#define CC_RB_BACKEND_DISABLE 0x98F4
+#define BACKEND_DISABLE(x) ((x) << 16)
+
+#define CB_COLOR0_BASE 0x28040
+#define CB_COLOR1_BASE 0x28044
+#define CB_COLOR2_BASE 0x28048
+#define CB_COLOR3_BASE 0x2804C
+#define CB_COLOR4_BASE 0x28050
+#define CB_COLOR5_BASE 0x28054
+#define CB_COLOR6_BASE 0x28058
+#define CB_COLOR7_BASE 0x2805C
+#define CB_COLOR7_FRAG 0x280FC
+
+#define CB_COLOR0_SIZE 0x28060
+#define CB_COLOR0_VIEW 0x28080
+#define CB_COLOR0_INFO 0x280a0
+#define CB_COLOR0_TILE 0x280c0
+#define CB_COLOR0_FRAG 0x280e0
+#define CB_COLOR0_MASK 0x28100
+
+#define CONFIG_MEMSIZE 0x5428
+#define CP_STAT 0x8680
+#define CP_COHER_BASE 0x85F8
+#define CP_DEBUG 0xC1FC
+#define R_0086D8_CP_ME_CNTL 0x86D8
+#define S_0086D8_CP_ME_HALT(x) (((x) & 1)<<28)
+#define C_0086D8_CP_ME_HALT(x) ((x) & 0xEFFFFFFF)
+#define CP_ME_RAM_DATA 0xC160
+#define CP_ME_RAM_RADDR 0xC158
+#define CP_ME_RAM_WADDR 0xC15C
+#define CP_MEQ_THRESHOLDS 0x8764
+#define MEQ_END(x) ((x) << 16)
+#define ROQ_END(x) ((x) << 24)
+#define CP_PERFMON_CNTL 0x87FC
+#define CP_PFP_UCODE_ADDR 0xC150
+#define CP_PFP_UCODE_DATA 0xC154
+#define CP_QUEUE_THRESHOLDS 0x8760
+#define ROQ_IB1_START(x) ((x) << 0)
+#define ROQ_IB2_START(x) ((x) << 8)
+#define CP_RB_BASE 0xC100
+#define CP_RB_CNTL 0xC104
+#define RB_BUFSZ(x) ((x)<<0)
+#define RB_BLKSZ(x) ((x)<<8)
+#define RB_NO_UPDATE (1<<27)
+#define RB_RPTR_WR_ENA (1<<31)
+#define BUF_SWAP_32BIT (2 << 16)
+#define CP_RB_RPTR 0x8700
+#define CP_RB_RPTR_ADDR 0xC10C
+#define CP_RB_RPTR_ADDR_HI 0xC110
+#define CP_RB_RPTR_WR 0xC108
+#define CP_RB_WPTR 0xC114
+#define CP_RB_WPTR_ADDR 0xC118
+#define CP_RB_WPTR_ADDR_HI 0xC11C
+#define CP_RB_WPTR_DELAY 0x8704
+#define CP_ROQ_IB1_STAT 0x8784
+#define CP_ROQ_IB2_STAT 0x8788
+#define CP_SEM_WAIT_TIMER 0x85BC
+
+#define DB_DEBUG 0x9830
+#define PREZ_MUST_WAIT_FOR_POSTZ_DONE (1 << 31)
+#define DB_DEPTH_BASE 0x2800C
+#define DB_WATERMARKS 0x9838
+#define DEPTH_FREE(x) ((x) << 0)
+#define DEPTH_FLUSH(x) ((x) << 5)
+#define DEPTH_PENDING_FREE(x) ((x) << 15)
+#define DEPTH_CACHELINE_FREE(x) ((x) << 20)
+
+#define DCP_TILING_CONFIG 0x6CA0
+#define PIPE_TILING(x) ((x) << 1)
+#define BANK_TILING(x) ((x) << 4)
+#define GROUP_SIZE(x) ((x) << 6)
+#define ROW_TILING(x) ((x) << 8)
+#define BANK_SWAPS(x) ((x) << 11)
+#define SAMPLE_SPLIT(x) ((x) << 14)
+#define BACKEND_MAP(x) ((x) << 16)
+
+#define GB_TILING_CONFIG 0x98F0
+
+#define GC_USER_SHADER_PIPE_CONFIG 0x8954
+#define INACTIVE_QD_PIPES(x) ((x) << 8)
+#define INACTIVE_QD_PIPES_MASK 0x0000FF00
+#define INACTIVE_SIMDS(x) ((x) << 16)
+#define INACTIVE_SIMDS_MASK 0x00FF0000
+
+#define SQ_CONFIG 0x8c00
+# define VC_ENABLE (1 << 0)
+# define EXPORT_SRC_C (1 << 1)
+# define DX9_CONSTS (1 << 2)
+# define ALU_INST_PREFER_VECTOR (1 << 3)
+# define DX10_CLAMP (1 << 4)
+# define CLAUSE_SEQ_PRIO(x) ((x) << 8)
+# define PS_PRIO(x) ((x) << 24)
+# define VS_PRIO(x) ((x) << 26)
+# define GS_PRIO(x) ((x) << 28)
+# define ES_PRIO(x) ((x) << 30)
+#define SQ_GPR_RESOURCE_MGMT_1 0x8c04
+# define NUM_PS_GPRS(x) ((x) << 0)
+# define NUM_VS_GPRS(x) ((x) << 16)
+# define NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28)
+#define SQ_GPR_RESOURCE_MGMT_2 0x8c08
+# define NUM_GS_GPRS(x) ((x) << 0)
+# define NUM_ES_GPRS(x) ((x) << 16)
+#define SQ_THREAD_RESOURCE_MGMT 0x8c0c
+# define NUM_PS_THREADS(x) ((x) << 0)
+# define NUM_VS_THREADS(x) ((x) << 8)
+# define NUM_GS_THREADS(x) ((x) << 16)
+# define NUM_ES_THREADS(x) ((x) << 24)
+#define SQ_STACK_RESOURCE_MGMT_1 0x8c10
+# define NUM_PS_STACK_ENTRIES(x) ((x) << 0)
+# define NUM_VS_STACK_ENTRIES(x) ((x) << 16)
+#define SQ_STACK_RESOURCE_MGMT_2 0x8c14
+# define NUM_GS_STACK_ENTRIES(x) ((x) << 0)
+# define NUM_ES_STACK_ENTRIES(x) ((x) << 16)
+
+#define GRBM_CNTL 0x8000
+# define GRBM_READ_TIMEOUT(x) ((x) << 0)
+#define GRBM_STATUS 0x8010
+#define CMDFIFO_AVAIL_MASK 0x0000001F
+#define GUI_ACTIVE (1<<31)
+#define GRBM_STATUS2 0x8014
+#define GRBM_SOFT_RESET 0x8020
+#define SOFT_RESET_CP (1<<0)
+
+#define HDP_HOST_PATH_CNTL 0x2C00
+#define HDP_NONSURFACE_BASE 0x2C04
+#define HDP_NONSURFACE_INFO 0x2C08
+#define HDP_NONSURFACE_SIZE 0x2C0C
+#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0
+#define HDP_TILING_CONFIG 0x2F3C
+
+#define MC_VM_AGP_TOP 0x2184
+#define MC_VM_AGP_BOT 0x2188
+#define MC_VM_AGP_BASE 0x218C
+#define MC_VM_FB_LOCATION 0x2180
+#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C
+#define ENABLE_L1_TLB (1 << 0)
+#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1)
+#define ENABLE_L1_STRICT_ORDERING (1 << 2)
+#define SYSTEM_ACCESS_MODE_MASK 0x000000C0
+#define SYSTEM_ACCESS_MODE_SHIFT 6
+#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 6)
+#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 6)
+#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 6)
+#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 6)
+#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 8)
+#define SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE (1 << 8)
+#define ENABLE_SEMAPHORE_MODE (1 << 10)
+#define ENABLE_WAIT_L2_QUERY (1 << 11)
+#define EFFECTIVE_L1_TLB_SIZE(x) (((x) & 7) << 12)
+#define EFFECTIVE_L1_TLB_SIZE_MASK 0x00007000
+#define EFFECTIVE_L1_TLB_SIZE_SHIFT 12
+#define EFFECTIVE_L1_QUEUE_SIZE(x) (((x) & 7) << 15)
+#define EFFECTIVE_L1_QUEUE_SIZE_MASK 0x00038000
+#define EFFECTIVE_L1_QUEUE_SIZE_SHIFT 15
+#define MC_VM_L1_TLB_MCD_RD_B_CNTL 0x21A0
+#define MC_VM_L1_TLB_MCB_RD_GFX_CNTL 0x21FC
+#define MC_VM_L1_TLB_MCB_RD_HDP_CNTL 0x2204
+#define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL 0x2208
+#define MC_VM_L1_TLB_MCB_RD_SEM_CNTL 0x220C
+#define MC_VM_L1_TLB_MCB_RD_SYS_CNTL 0x2200
+#define MC_VM_L1_TLB_MCD_WR_A_CNTL 0x21A4
+#define MC_VM_L1_TLB_MCD_WR_B_CNTL 0x21A8
+#define MC_VM_L1_TLB_MCB_WR_GFX_CNTL 0x2210
+#define MC_VM_L1_TLB_MCB_WR_HDP_CNTL 0x2218
+#define MC_VM_L1_TLB_MCB_WR_PDMA_CNTL 0x221C
+#define MC_VM_L1_TLB_MCB_WR_SEM_CNTL 0x2220
+#define MC_VM_L1_TLB_MCB_WR_SYS_CNTL 0x2214
+#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2190
+#define LOGICAL_PAGE_NUMBER_MASK 0x000FFFFF
+#define LOGICAL_PAGE_NUMBER_SHIFT 0
+#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194
+#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198
+
+#define PA_CL_ENHANCE 0x8A14
+#define CLIP_VTX_REORDER_ENA (1 << 0)
+#define NUM_CLIP_SEQ(x) ((x) << 1)
+#define PA_SC_AA_CONFIG 0x28C04
+#define PA_SC_AA_SAMPLE_LOCS_2S 0x8B40
+#define PA_SC_AA_SAMPLE_LOCS_4S 0x8B44
+#define PA_SC_AA_SAMPLE_LOCS_8S_WD0 0x8B48
+#define PA_SC_AA_SAMPLE_LOCS_8S_WD1 0x8B4C
+#define S0_X(x) ((x) << 0)
+#define S0_Y(x) ((x) << 4)
+#define S1_X(x) ((x) << 8)
+#define S1_Y(x) ((x) << 12)
+#define S2_X(x) ((x) << 16)
+#define S2_Y(x) ((x) << 20)
+#define S3_X(x) ((x) << 24)
+#define S3_Y(x) ((x) << 28)
+#define S4_X(x) ((x) << 0)
+#define S4_Y(x) ((x) << 4)
+#define S5_X(x) ((x) << 8)
+#define S5_Y(x) ((x) << 12)
+#define S6_X(x) ((x) << 16)
+#define S6_Y(x) ((x) << 20)
+#define S7_X(x) ((x) << 24)
+#define S7_Y(x) ((x) << 28)
+#define PA_SC_CLIPRECT_RULE 0x2820c
+#define PA_SC_ENHANCE 0x8BF0
+#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0)
+#define FORCE_EOV_MAX_TILE_CNT(x) ((x) << 12)
+#define PA_SC_LINE_STIPPLE 0x28A0C
+#define PA_SC_LINE_STIPPLE_STATE 0x8B10
+#define PA_SC_MODE_CNTL 0x28A4C
+#define PA_SC_MULTI_CHIP_CNTL 0x8B20
+
+#define PA_SC_SCREEN_SCISSOR_TL 0x28030
+#define PA_SC_GENERIC_SCISSOR_TL 0x28240
+#define PA_SC_WINDOW_SCISSOR_TL 0x28204
+
+#define PCIE_PORT_INDEX 0x0038
+#define PCIE_PORT_DATA 0x003C
+
+#define RAMCFG 0x2408
+#define NOOFBANK_SHIFT 0
+#define NOOFBANK_MASK 0x00000001
+#define NOOFRANK_SHIFT 1
+#define NOOFRANK_MASK 0x00000002
+#define NOOFROWS_SHIFT 2
+#define NOOFROWS_MASK 0x0000001C
+#define NOOFCOLS_SHIFT 5
+#define NOOFCOLS_MASK 0x00000060
+#define CHANSIZE_SHIFT 7
+#define CHANSIZE_MASK 0x00000080
+#define BURSTLENGTH_SHIFT 8
+#define BURSTLENGTH_MASK 0x00000100
+#define CHANSIZE_OVERRIDE (1 << 10)
+
+#define SCRATCH_REG0 0x8500
+#define SCRATCH_REG1 0x8504
+#define SCRATCH_REG2 0x8508
+#define SCRATCH_REG3 0x850C
+#define SCRATCH_REG4 0x8510
+#define SCRATCH_REG5 0x8514
+#define SCRATCH_REG6 0x8518
+#define SCRATCH_REG7 0x851C
+#define SCRATCH_UMSK 0x8540
+#define SCRATCH_ADDR 0x8544
+
+#define SPI_CONFIG_CNTL 0x9100
+#define GPR_WRITE_PRIORITY(x) ((x) << 0)
+#define DISABLE_INTERP_1 (1 << 5)
+#define SPI_CONFIG_CNTL_1 0x913C
+#define VTX_DONE_DELAY(x) ((x) << 0)
+#define INTERP_ONE_PRIM_PER_ROW (1 << 4)
+#define SPI_INPUT_Z 0x286D8
+#define SPI_PS_IN_CONTROL_0 0x286CC
+#define NUM_INTERP(x) ((x)<<0)
+#define POSITION_ENA (1<<8)
+#define POSITION_CENTROID (1<<9)
+#define POSITION_ADDR(x) ((x)<<10)
+#define PARAM_GEN(x) ((x)<<15)
+#define PARAM_GEN_ADDR(x) ((x)<<19)
+#define BARYC_SAMPLE_CNTL(x) ((x)<<26)
+#define PERSP_GRADIENT_ENA (1<<28)
+#define LINEAR_GRADIENT_ENA (1<<29)
+#define POSITION_SAMPLE (1<<30)
+#define BARYC_AT_SAMPLE_ENA (1<<31)
+#define SPI_PS_IN_CONTROL_1 0x286D0
+#define GEN_INDEX_PIX (1<<0)
+#define GEN_INDEX_PIX_ADDR(x) ((x)<<1)
+#define FRONT_FACE_ENA (1<<8)
+#define FRONT_FACE_CHAN(x) ((x)<<9)
+#define FRONT_FACE_ALL_BITS (1<<11)
+#define FRONT_FACE_ADDR(x) ((x)<<12)
+#define FOG_ADDR(x) ((x)<<17)
+#define FIXED_PT_POSITION_ENA (1<<24)
+#define FIXED_PT_POSITION_ADDR(x) ((x)<<25)
+
+#define SQ_MS_FIFO_SIZES 0x8CF0
+#define CACHE_FIFO_SIZE(x) ((x) << 0)
+#define FETCH_FIFO_HIWATER(x) ((x) << 8)
+#define DONE_FIFO_HIWATER(x) ((x) << 16)
+#define ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24)
+#define SQ_PGM_START_ES 0x28880
+#define SQ_PGM_START_FS 0x28894
+#define SQ_PGM_START_GS 0x2886C
+#define SQ_PGM_START_PS 0x28840
+#define SQ_PGM_RESOURCES_PS 0x28850
+#define SQ_PGM_EXPORTS_PS 0x28854
+#define SQ_PGM_CF_OFFSET_PS 0x288cc
+#define SQ_PGM_START_VS 0x28858
+#define SQ_PGM_RESOURCES_VS 0x28868
+#define SQ_PGM_CF_OFFSET_VS 0x288d0
+#define SQ_VTX_CONSTANT_WORD6_0 0x38018
+#define S__SQ_VTX_CONSTANT_TYPE(x) (((x) & 3) << 30)
+#define G__SQ_VTX_CONSTANT_TYPE(x) (((x) >> 30) & 3)
+#define SQ_TEX_VTX_INVALID_TEXTURE 0x0
+#define SQ_TEX_VTX_INVALID_BUFFER 0x1
+#define SQ_TEX_VTX_VALID_TEXTURE 0x2
+#define SQ_TEX_VTX_VALID_BUFFER 0x3
+
+
+#define SX_MISC 0x28350
+#define SX_DEBUG_1 0x9054
+#define SMX_EVENT_RELEASE (1 << 0)
+#define ENABLE_NEW_SMX_ADDRESS (1 << 16)
+
+#define TA_CNTL_AUX 0x9508
+#define DISABLE_CUBE_WRAP (1 << 0)
+#define DISABLE_CUBE_ANISO (1 << 1)
+#define SYNC_GRADIENT (1 << 24)
+#define SYNC_WALKER (1 << 25)
+#define SYNC_ALIGNER (1 << 26)
+#define BILINEAR_PRECISION_6_BIT (0 << 31)
+#define BILINEAR_PRECISION_8_BIT (1 << 31)
+
+#define TC_CNTL 0x9608
+#define TC_L2_SIZE(x) ((x)<<5)
+#define L2_DISABLE_LATE_HIT (1<<9)
+
+
+#define VGT_CACHE_INVALIDATION 0x88C4
+#define CACHE_INVALIDATION(x) ((x)<<0)
+#define VC_ONLY 0
+#define TC_ONLY 1
+#define VC_AND_TC 2
+#define VGT_DMA_BASE 0x287E8
+#define VGT_DMA_BASE_HI 0x287E4
+#define VGT_ES_PER_GS 0x88CC
+#define VGT_GS_PER_ES 0x88C8
+#define VGT_GS_PER_VS 0x88E8
+#define VGT_GS_VERTEX_REUSE 0x88D4
+#define VGT_PRIMITIVE_TYPE 0x8958
+#define VGT_NUM_INSTANCES 0x8974
+#define VGT_OUT_DEALLOC_CNTL 0x28C5C
+#define DEALLOC_DIST_MASK 0x0000007F
+#define VGT_STRMOUT_BASE_OFFSET_0 0x28B10
+#define VGT_STRMOUT_BASE_OFFSET_1 0x28B14
+#define VGT_STRMOUT_BASE_OFFSET_2 0x28B18
+#define VGT_STRMOUT_BASE_OFFSET_3 0x28B1c
+#define VGT_STRMOUT_BASE_OFFSET_HI_0 0x28B44
+#define VGT_STRMOUT_BASE_OFFSET_HI_1 0x28B48
+#define VGT_STRMOUT_BASE_OFFSET_HI_2 0x28B4c
+#define VGT_STRMOUT_BASE_OFFSET_HI_3 0x28B50
+#define VGT_STRMOUT_BUFFER_BASE_0 0x28AD8
+#define VGT_STRMOUT_BUFFER_BASE_1 0x28AE8
+#define VGT_STRMOUT_BUFFER_BASE_2 0x28AF8
+#define VGT_STRMOUT_BUFFER_BASE_3 0x28B08
+#define VGT_STRMOUT_BUFFER_OFFSET_0 0x28ADC
+#define VGT_STRMOUT_BUFFER_OFFSET_1 0x28AEC
+#define VGT_STRMOUT_BUFFER_OFFSET_2 0x28AFC
+#define VGT_STRMOUT_BUFFER_OFFSET_3 0x28B0C
+#define VGT_STRMOUT_EN 0x28AB0
+#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58
+#define VTX_REUSE_DEPTH_MASK 0x000000FF
+#define VGT_EVENT_INITIATOR 0x28a90
+# define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0)
+
+#define VM_CONTEXT0_CNTL 0x1410
+#define ENABLE_CONTEXT (1 << 0)
+#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
+#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
+#define VM_CONTEXT0_INVALIDATION_LOW_ADDR 0x1490
+#define VM_CONTEXT0_INVALIDATION_HIGH_ADDR 0x14B0
+#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x1574
+#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x1594
+#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x15B4
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1554
+#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470
+#define REQUEST_TYPE(x) (((x) & 0xf) << 0)
+#define RESPONSE_TYPE_MASK 0x000000F0
+#define RESPONSE_TYPE_SHIFT 4
+#define VM_L2_CNTL 0x1400
+#define ENABLE_L2_CACHE (1 << 0)
+#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1)
+#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9)
+#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 13)
+#define VM_L2_CNTL2 0x1404
+#define INVALIDATE_ALL_L1_TLBS (1 << 0)
+#define INVALIDATE_L2_CACHE (1 << 1)
+#define VM_L2_CNTL3 0x1408
+#define BANK_SELECT_0(x) (((x) & 0x1f) << 0)
+#define BANK_SELECT_1(x) (((x) & 0x1f) << 5)
+#define L2_CACHE_UPDATE_MODE(x) (((x) & 3) << 10)
+#define VM_L2_STATUS 0x140C
+#define L2_BUSY (1 << 0)
+
+#define WAIT_UNTIL 0x8040
+#define WAIT_2D_IDLE_bit (1 << 14)
+#define WAIT_3D_IDLE_bit (1 << 15)
+#define WAIT_2D_IDLECLEAN_bit (1 << 16)
+#define WAIT_3D_IDLECLEAN_bit (1 << 17)
+
+
+
+/*
+ * PM4
+ */
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
+ (((reg) >> 2) & 0xFFFF) | \
+ ((n) & 0x3FFF) << 16)
+#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
+ (((op) & 0xFF) << 8) | \
+ ((n) & 0x3FFF) << 16)
+
+/* Packet 3 types */
+#define PACKET3_NOP 0x10
+#define PACKET3_INDIRECT_BUFFER_END 0x17
+#define PACKET3_SET_PREDICATION 0x20
+#define PACKET3_REG_RMW 0x21
+#define PACKET3_COND_EXEC 0x22
+#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_START_3D_CMDBUF 0x24
+#define PACKET3_DRAW_INDEX_2 0x27
+#define PACKET3_CONTEXT_CONTROL 0x28
+#define PACKET3_DRAW_INDEX_IMMD_BE 0x29
+#define PACKET3_INDEX_TYPE 0x2A
+#define PACKET3_DRAW_INDEX 0x2B
+#define PACKET3_DRAW_INDEX_AUTO 0x2D
+#define PACKET3_DRAW_INDEX_IMMD 0x2E
+#define PACKET3_NUM_INSTANCES 0x2F
+#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
+#define PACKET3_INDIRECT_BUFFER_MP 0x38
+#define PACKET3_MEM_SEMAPHORE 0x39
+#define PACKET3_MPEG_INDEX 0x3A
+#define PACKET3_WAIT_REG_MEM 0x3C
+#define PACKET3_MEM_WRITE 0x3D
+#define PACKET3_INDIRECT_BUFFER 0x32
+#define PACKET3_CP_INTERRUPT 0x40
+#define PACKET3_SURFACE_SYNC 0x43
+# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
+# define PACKET3_TC_ACTION_ENA (1 << 23)
+# define PACKET3_VC_ACTION_ENA (1 << 24)
+# define PACKET3_CB_ACTION_ENA (1 << 25)
+# define PACKET3_DB_ACTION_ENA (1 << 26)
+# define PACKET3_SH_ACTION_ENA (1 << 27)
+# define PACKET3_SMX_ACTION_ENA (1 << 28)
+#define PACKET3_ME_INITIALIZE 0x44
+#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
+#define PACKET3_COND_WRITE 0x45
+#define PACKET3_EVENT_WRITE 0x46
+#define PACKET3_EVENT_WRITE_EOP 0x47
+#define PACKET3_ONE_REG_WRITE 0x57
+#define PACKET3_SET_CONFIG_REG 0x68
+#define PACKET3_SET_CONFIG_REG_OFFSET 0x00008000
+#define PACKET3_SET_CONFIG_REG_END 0x0000ac00
+#define PACKET3_SET_CONTEXT_REG 0x69
+#define PACKET3_SET_CONTEXT_REG_OFFSET 0x00028000
+#define PACKET3_SET_CONTEXT_REG_END 0x00029000
+#define PACKET3_SET_ALU_CONST 0x6A
+#define PACKET3_SET_ALU_CONST_OFFSET 0x00030000
+#define PACKET3_SET_ALU_CONST_END 0x00032000
+#define PACKET3_SET_BOOL_CONST 0x6B
+#define PACKET3_SET_BOOL_CONST_OFFSET 0x0003e380
+#define PACKET3_SET_BOOL_CONST_END 0x00040000
+#define PACKET3_SET_LOOP_CONST 0x6C
+#define PACKET3_SET_LOOP_CONST_OFFSET 0x0003e200
+#define PACKET3_SET_LOOP_CONST_END 0x0003e380
+#define PACKET3_SET_RESOURCE 0x6D
+#define PACKET3_SET_RESOURCE_OFFSET 0x00038000
+#define PACKET3_SET_RESOURCE_END 0x0003c000
+#define PACKET3_SET_SAMPLER 0x6E
+#define PACKET3_SET_SAMPLER_OFFSET 0x0003c000
+#define PACKET3_SET_SAMPLER_END 0x0003cff0
+#define PACKET3_SET_CTL_CONST 0x6F
+#define PACKET3_SET_CTL_CONST_OFFSET 0x0003cff0
+#define PACKET3_SET_CTL_CONST_END 0x0003e200
+#define PACKET3_SURFACE_BASE_UPDATE 0x73
+
+
+#define R_008020_GRBM_SOFT_RESET 0x8020
+#define S_008020_SOFT_RESET_CP(x) (((x) & 1) << 0)
+#define S_008020_SOFT_RESET_CB(x) (((x) & 1) << 1)
+#define S_008020_SOFT_RESET_CR(x) (((x) & 1) << 2)
+#define S_008020_SOFT_RESET_DB(x) (((x) & 1) << 3)
+#define S_008020_SOFT_RESET_PA(x) (((x) & 1) << 5)
+#define S_008020_SOFT_RESET_SC(x) (((x) & 1) << 6)
+#define S_008020_SOFT_RESET_SMX(x) (((x) & 1) << 7)
+#define S_008020_SOFT_RESET_SPI(x) (((x) & 1) << 8)
+#define S_008020_SOFT_RESET_SH(x) (((x) & 1) << 9)
+#define S_008020_SOFT_RESET_SX(x) (((x) & 1) << 10)
+#define S_008020_SOFT_RESET_TC(x) (((x) & 1) << 11)
+#define S_008020_SOFT_RESET_TA(x) (((x) & 1) << 12)
+#define S_008020_SOFT_RESET_VC(x) (((x) & 1) << 13)
+#define S_008020_SOFT_RESET_VGT(x) (((x) & 1) << 14)
+#define R_008010_GRBM_STATUS 0x8010
+#define S_008010_CMDFIFO_AVAIL(x) (((x) & 0x1F) << 0)
+#define S_008010_CP_RQ_PENDING(x) (((x) & 1) << 6)
+#define S_008010_CF_RQ_PENDING(x) (((x) & 1) << 7)
+#define S_008010_PF_RQ_PENDING(x) (((x) & 1) << 8)
+#define S_008010_GRBM_EE_BUSY(x) (((x) & 1) << 10)
+#define S_008010_VC_BUSY(x) (((x) & 1) << 11)
+#define S_008010_DB03_CLEAN(x) (((x) & 1) << 12)
+#define S_008010_CB03_CLEAN(x) (((x) & 1) << 13)
+#define S_008010_VGT_BUSY_NO_DMA(x) (((x) & 1) << 16)
+#define S_008010_VGT_BUSY(x) (((x) & 1) << 17)
+#define S_008010_TA03_BUSY(x) (((x) & 1) << 18)
+#define S_008010_TC_BUSY(x) (((x) & 1) << 19)
+#define S_008010_SX_BUSY(x) (((x) & 1) << 20)
+#define S_008010_SH_BUSY(x) (((x) & 1) << 21)
+#define S_008010_SPI03_BUSY(x) (((x) & 1) << 22)
+#define S_008010_SMX_BUSY(x) (((x) & 1) << 23)
+#define S_008010_SC_BUSY(x) (((x) & 1) << 24)
+#define S_008010_PA_BUSY(x) (((x) & 1) << 25)
+#define S_008010_DB03_BUSY(x) (((x) & 1) << 26)
+#define S_008010_CR_BUSY(x) (((x) & 1) << 27)
+#define S_008010_CP_COHERENCY_BUSY(x) (((x) & 1) << 28)
+#define S_008010_CP_BUSY(x) (((x) & 1) << 29)
+#define S_008010_CB03_BUSY(x) (((x) & 1) << 30)
+#define S_008010_GUI_ACTIVE(x) (((x) & 1) << 31)
+#define G_008010_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x1F)
+#define G_008010_CP_RQ_PENDING(x) (((x) >> 6) & 1)
+#define G_008010_CF_RQ_PENDING(x) (((x) >> 7) & 1)
+#define G_008010_PF_RQ_PENDING(x) (((x) >> 8) & 1)
+#define G_008010_GRBM_EE_BUSY(x) (((x) >> 10) & 1)
+#define G_008010_VC_BUSY(x) (((x) >> 11) & 1)
+#define G_008010_DB03_CLEAN(x) (((x) >> 12) & 1)
+#define G_008010_CB03_CLEAN(x) (((x) >> 13) & 1)
+#define G_008010_VGT_BUSY_NO_DMA(x) (((x) >> 16) & 1)
+#define G_008010_VGT_BUSY(x) (((x) >> 17) & 1)
+#define G_008010_TA03_BUSY(x) (((x) >> 18) & 1)
+#define G_008010_TC_BUSY(x) (((x) >> 19) & 1)
+#define G_008010_SX_BUSY(x) (((x) >> 20) & 1)
+#define G_008010_SH_BUSY(x) (((x) >> 21) & 1)
+#define G_008010_SPI03_BUSY(x) (((x) >> 22) & 1)
+#define G_008010_SMX_BUSY(x) (((x) >> 23) & 1)
+#define G_008010_SC_BUSY(x) (((x) >> 24) & 1)
+#define G_008010_PA_BUSY(x) (((x) >> 25) & 1)
+#define G_008010_DB03_BUSY(x) (((x) >> 26) & 1)
+#define G_008010_CR_BUSY(x) (((x) >> 27) & 1)
+#define G_008010_CP_COHERENCY_BUSY(x) (((x) >> 28) & 1)
+#define G_008010_CP_BUSY(x) (((x) >> 29) & 1)
+#define G_008010_CB03_BUSY(x) (((x) >> 30) & 1)
+#define G_008010_GUI_ACTIVE(x) (((x) >> 31) & 1)
+#define R_008014_GRBM_STATUS2 0x8014
+#define S_008014_CR_CLEAN(x) (((x) & 1) << 0)
+#define S_008014_SMX_CLEAN(x) (((x) & 1) << 1)
+#define S_008014_SPI0_BUSY(x) (((x) & 1) << 8)
+#define S_008014_SPI1_BUSY(x) (((x) & 1) << 9)
+#define S_008014_SPI2_BUSY(x) (((x) & 1) << 10)
+#define S_008014_SPI3_BUSY(x) (((x) & 1) << 11)
+#define S_008014_TA0_BUSY(x) (((x) & 1) << 12)
+#define S_008014_TA1_BUSY(x) (((x) & 1) << 13)
+#define S_008014_TA2_BUSY(x) (((x) & 1) << 14)
+#define S_008014_TA3_BUSY(x) (((x) & 1) << 15)
+#define S_008014_DB0_BUSY(x) (((x) & 1) << 16)
+#define S_008014_DB1_BUSY(x) (((x) & 1) << 17)
+#define S_008014_DB2_BUSY(x) (((x) & 1) << 18)
+#define S_008014_DB3_BUSY(x) (((x) & 1) << 19)
+#define S_008014_CB0_BUSY(x) (((x) & 1) << 20)
+#define S_008014_CB1_BUSY(x) (((x) & 1) << 21)
+#define S_008014_CB2_BUSY(x) (((x) & 1) << 22)
+#define S_008014_CB3_BUSY(x) (((x) & 1) << 23)
+#define G_008014_CR_CLEAN(x) (((x) >> 0) & 1)
+#define G_008014_SMX_CLEAN(x) (((x) >> 1) & 1)
+#define G_008014_SPI0_BUSY(x) (((x) >> 8) & 1)
+#define G_008014_SPI1_BUSY(x) (((x) >> 9) & 1)
+#define G_008014_SPI2_BUSY(x) (((x) >> 10) & 1)
+#define G_008014_SPI3_BUSY(x) (((x) >> 11) & 1)
+#define G_008014_TA0_BUSY(x) (((x) >> 12) & 1)
+#define G_008014_TA1_BUSY(x) (((x) >> 13) & 1)
+#define G_008014_TA2_BUSY(x) (((x) >> 14) & 1)
+#define G_008014_TA3_BUSY(x) (((x) >> 15) & 1)
+#define G_008014_DB0_BUSY(x) (((x) >> 16) & 1)
+#define G_008014_DB1_BUSY(x) (((x) >> 17) & 1)
+#define G_008014_DB2_BUSY(x) (((x) >> 18) & 1)
+#define G_008014_DB3_BUSY(x) (((x) >> 19) & 1)
+#define G_008014_CB0_BUSY(x) (((x) >> 20) & 1)
+#define G_008014_CB1_BUSY(x) (((x) >> 21) & 1)
+#define G_008014_CB2_BUSY(x) (((x) >> 22) & 1)
+#define G_008014_CB3_BUSY(x) (((x) >> 23) & 1)
+#define R_000E50_SRBM_STATUS 0x0E50
+#define G_000E50_RLC_RQ_PENDING(x) (((x) >> 3) & 1)
+#define G_000E50_RCU_RQ_PENDING(x) (((x) >> 4) & 1)
+#define G_000E50_GRBM_RQ_PENDING(x) (((x) >> 5) & 1)
+#define G_000E50_HI_RQ_PENDING(x) (((x) >> 6) & 1)
+#define G_000E50_IO_EXTERN_SIGNAL(x) (((x) >> 7) & 1)
+#define G_000E50_VMC_BUSY(x) (((x) >> 8) & 1)
+#define G_000E50_MCB_BUSY(x) (((x) >> 9) & 1)
+#define G_000E50_MCDZ_BUSY(x) (((x) >> 10) & 1)
+#define G_000E50_MCDY_BUSY(x) (((x) >> 11) & 1)
+#define G_000E50_MCDX_BUSY(x) (((x) >> 12) & 1)
+#define G_000E50_MCDW_BUSY(x) (((x) >> 13) & 1)
+#define G_000E50_SEM_BUSY(x) (((x) >> 14) & 1)
+#define G_000E50_RLC_BUSY(x) (((x) >> 15) & 1)
+#define R_000E60_SRBM_SOFT_RESET 0x0E60
+#define S_000E60_SOFT_RESET_BIF(x) (((x) & 1) << 1)
+#define S_000E60_SOFT_RESET_CG(x) (((x) & 1) << 2)
+#define S_000E60_SOFT_RESET_CMC(x) (((x) & 1) << 3)
+#define S_000E60_SOFT_RESET_CSC(x) (((x) & 1) << 4)
+#define S_000E60_SOFT_RESET_DC(x) (((x) & 1) << 5)
+#define S_000E60_SOFT_RESET_GRBM(x) (((x) & 1) << 8)
+#define S_000E60_SOFT_RESET_HDP(x) (((x) & 1) << 9)
+#define S_000E60_SOFT_RESET_IH(x) (((x) & 1) << 10)
+#define S_000E60_SOFT_RESET_MC(x) (((x) & 1) << 11)
+#define S_000E60_SOFT_RESET_RLC(x) (((x) & 1) << 13)
+#define S_000E60_SOFT_RESET_ROM(x) (((x) & 1) << 14)
+#define S_000E60_SOFT_RESET_SEM(x) (((x) & 1) << 15)
+#define S_000E60_SOFT_RESET_TSC(x) (((x) & 1) << 16)
+#define S_000E60_SOFT_RESET_VMC(x) (((x) & 1) << 17)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index e47f2fc..3299733 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -50,8 +50,8 @@
#include <linux/kref.h>
#include "radeon_mode.h"
+#include "radeon_share.h"
#include "radeon_reg.h"
-#include "r300.h"
/*
* Modules parameters.
@@ -112,10 +112,11 @@ enum radeon_family {
CHIP_RV635,
CHIP_RV670,
CHIP_RS780,
+ CHIP_RS880,
CHIP_RV770,
CHIP_RV730,
CHIP_RV710,
- CHIP_RS880,
+ CHIP_RV740,
CHIP_LAST,
};
@@ -152,10 +153,21 @@ struct radeon_device;
*/
bool radeon_get_bios(struct radeon_device *rdev);
+
/*
- * Clocks
+ * Dummy page
*/
+struct radeon_dummy_page {
+ struct page *page;
+ dma_addr_t addr;
+};
+int radeon_dummy_page_init(struct radeon_device *rdev);
+void radeon_dummy_page_fini(struct radeon_device *rdev);
+
+/*
+ * Clocks
+ */
struct radeon_clock {
struct radeon_pll p1pll;
struct radeon_pll p2pll;
@@ -166,6 +178,7 @@ struct radeon_clock {
uint32_t default_sclk;
};
+
/*
* Fences.
*/
@@ -332,14 +345,18 @@ struct radeon_mc {
resource_size_t aper_size;
resource_size_t aper_base;
resource_size_t agp_base;
- unsigned gtt_location;
- unsigned gtt_size;
- unsigned vram_location;
/* for some chips with <= 32MB we need to lie
* about vram size near mc fb location */
- unsigned mc_vram_size;
+ u64 mc_vram_size;
+ u64 gtt_location;
+ u64 gtt_size;
+ u64 gtt_start;
+ u64 gtt_end;
+ u64 vram_location;
+ u64 vram_start;
+ u64 vram_end;
unsigned vram_width;
- unsigned real_vram_size;
+ u64 real_vram_size;
int vram_mtrr;
bool vram_is_ddr;
};
@@ -411,6 +428,16 @@ struct radeon_cp {
bool ready;
};
+struct r600_blit {
+ struct radeon_object *shader_obj;
+ u64 shader_gpu_addr;
+ u32 vs_offset, ps_offset;
+ u32 state_offset;
+ u32 state_len;
+ u32 vb_used, vb_total;
+ struct radeon_ib *vb_ib;
+};
+
int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -463,6 +490,7 @@ struct radeon_cs_parser {
int chunk_relocs_idx;
struct radeon_ib *ib;
void *track;
+ unsigned family;
};
struct radeon_cs_packet {
@@ -559,6 +587,9 @@ int r100_debugfs_cp_init(struct radeon_device *rdev);
*/
struct radeon_asic {
int (*init)(struct radeon_device *rdev);
+ void (*fini)(struct radeon_device *rdev);
+ int (*resume)(struct radeon_device *rdev);
+ int (*suspend)(struct radeon_device *rdev);
void (*errata)(struct radeon_device *rdev);
void (*vram_info)(struct radeon_device *rdev);
int (*gpu_reset)(struct radeon_device *rdev);
@@ -573,7 +604,11 @@ struct radeon_asic {
int (*cp_init)(struct radeon_device *rdev, unsigned ring_size);
void (*cp_fini)(struct radeon_device *rdev);
void (*cp_disable)(struct radeon_device *rdev);
+ void (*cp_commit)(struct radeon_device *rdev);
void (*ring_start)(struct radeon_device *rdev);
+ int (*ring_test)(struct radeon_device *rdev);
+ void (*ring_ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
+ int (*ib_test)(struct radeon_device *rdev);
int (*irq_set)(struct radeon_device *rdev);
int (*irq_process)(struct radeon_device *rdev);
u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc);
@@ -613,6 +648,8 @@ struct r100_asic {
union radeon_asic_config {
struct r300_asic r300;
struct r100_asic r100;
+ struct r600_asic r600;
+ struct rv770_asic rv770;
};
@@ -698,12 +735,16 @@ struct radeon_device {
struct radeon_pm pm;
struct mutex cs_mutex;
struct radeon_wb wb;
+ struct radeon_dummy_page dummy_page;
bool gpu_lockup;
bool shutdown;
bool suspend;
bool need_dma32;
+ bool new_init_path;
struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
- const struct firmware *fw; /* firmware */
+ const struct firmware *me_fw; /* all family ME firmware */
+ const struct firmware *pfp_fw; /* r6/700 PFP firmware */
+ struct r600_blit r600_blit;
};
int radeon_device_init(struct radeon_device *rdev,
@@ -713,6 +754,13 @@ int radeon_device_init(struct radeon_device *rdev,
void radeon_device_fini(struct radeon_device *rdev);
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
+/* r600 blit */
+int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
+void r600_kms_blit_copy(struct radeon_device *rdev,
+ u64 src_gpu_addr, u64 dst_gpu_addr,
+ int size_bytes);
+
static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
{
if (reg < 0x10000)
@@ -740,6 +788,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32
#define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg))
#define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg))
#define RREG32(reg) r100_mm_rreg(rdev, (reg))
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg)))
#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v))
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
@@ -763,6 +812,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32
tmp_ |= ((val) & ~(mask)); \
WREG32_PLL(reg, tmp_); \
} while (0)
+#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg)))
/*
* Indirect registers accessor
@@ -827,51 +877,6 @@ void radeon_atombios_fini(struct radeon_device *rdev);
/*
* RING helpers.
*/
-#define CP_PACKET0 0x00000000
-#define PACKET0_BASE_INDEX_SHIFT 0
-#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0)
-#define PACKET0_COUNT_SHIFT 16
-#define PACKET0_COUNT_MASK (0x3fff << 16)
-#define CP_PACKET1 0x40000000
-#define CP_PACKET2 0x80000000
-#define PACKET2_PAD_SHIFT 0
-#define PACKET2_PAD_MASK (0x3fffffff << 0)
-#define CP_PACKET3 0xC0000000
-#define PACKET3_IT_OPCODE_SHIFT 8
-#define PACKET3_IT_OPCODE_MASK (0xff << 8)
-#define PACKET3_COUNT_SHIFT 16
-#define PACKET3_COUNT_MASK (0x3fff << 16)
-/* PACKET3 op code */
-#define PACKET3_NOP 0x10
-#define PACKET3_3D_DRAW_VBUF 0x28
-#define PACKET3_3D_DRAW_IMMD 0x29
-#define PACKET3_3D_DRAW_INDX 0x2A
-#define PACKET3_3D_LOAD_VBPNTR 0x2F
-#define PACKET3_INDX_BUFFER 0x33
-#define PACKET3_3D_DRAW_VBUF_2 0x34
-#define PACKET3_3D_DRAW_IMMD_2 0x35
-#define PACKET3_3D_DRAW_INDX_2 0x36
-#define PACKET3_BITBLT_MULTI 0x9B
-
-#define PACKET0(reg, n) (CP_PACKET0 | \
- REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \
- REG_SET(PACKET0_COUNT, (n)))
-#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
-#define PACKET3(op, n) (CP_PACKET3 | \
- REG_SET(PACKET3_IT_OPCODE, (op)) | \
- REG_SET(PACKET3_COUNT, (n)))
-
-#define PACKET_TYPE0 0
-#define PACKET_TYPE1 1
-#define PACKET_TYPE2 2
-#define PACKET_TYPE3 3
-
-#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
-#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
-#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
-#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
-#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
-
static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
{
#if DRM_DEBUG_CODE
@@ -890,6 +895,9 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
* ASICs macro.
*/
#define radeon_init(rdev) (rdev)->asic->init((rdev))
+#define radeon_fini(rdev) (rdev)->asic->fini((rdev))
+#define radeon_resume(rdev) (rdev)->asic->resume((rdev))
+#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
#define radeon_cs_parse(p) rdev->asic->cs_parse((p))
#define radeon_errata(rdev) (rdev)->asic->errata((rdev))
#define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev))
@@ -905,7 +913,11 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
#define radeon_cp_init(rdev,rsize) (rdev)->asic->cp_init((rdev), (rsize))
#define radeon_cp_fini(rdev) (rdev)->asic->cp_fini((rdev))
#define radeon_cp_disable(rdev) (rdev)->asic->cp_disable((rdev))
+#define radeon_cp_commit(rdev) (rdev)->asic->cp_commit((rdev))
#define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev))
+#define radeon_ring_test(rdev) (rdev)->asic->ring_test((rdev))
+#define radeon_ring_ib_execute(rdev, ib) (rdev)->asic->ring_ib_execute((rdev), (ib))
+#define radeon_ib_test(rdev) (rdev)->asic->ib_test((rdev))
#define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev))
#define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
#define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index c9cbd8a..e87bb91 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -60,6 +60,7 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
void r100_cp_fini(struct radeon_device *rdev);
void r100_cp_disable(struct radeon_device *rdev);
+void r100_cp_commit(struct radeon_device *rdev);
void r100_ring_start(struct radeon_device *rdev);
int r100_irq_set(struct radeon_device *rdev);
int r100_irq_process(struct radeon_device *rdev);
@@ -78,6 +79,9 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg,
uint32_t offset, uint32_t obj_size);
int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
void r100_bandwidth_update(struct radeon_device *rdev);
+void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+int r100_ib_test(struct radeon_device *rdev);
+int r100_ring_test(struct radeon_device *rdev);
static struct radeon_asic r100_asic = {
.init = &r100_init,
@@ -95,7 +99,11 @@ static struct radeon_asic r100_asic = {
.cp_init = &r100_cp_init,
.cp_fini = &r100_cp_fini,
.cp_disable = &r100_cp_disable,
+ .cp_commit = &r100_cp_commit,
.ring_start = &r100_ring_start,
+ .ring_test = &r100_ring_test,
+ .ring_ib_execute = &r100_ring_ib_execute,
+ .ib_test = &r100_ib_test,
.irq_set = &r100_irq_set,
.irq_process = &r100_irq_process,
.get_vblank_counter = &r100_get_vblank_counter,
@@ -156,7 +164,11 @@ static struct radeon_asic r300_asic = {
.cp_init = &r100_cp_init,
.cp_fini = &r100_cp_fini,
.cp_disable = &r100_cp_disable,
+ .cp_commit = &r100_cp_commit,
.ring_start = &r300_ring_start,
+ .ring_test = &r100_ring_test,
+ .ring_ib_execute = &r100_ring_ib_execute,
+ .ib_test = &r100_ib_test,
.irq_set = &r100_irq_set,
.irq_process = &r100_irq_process,
.get_vblank_counter = &r100_get_vblank_counter,
@@ -197,7 +209,11 @@ static struct radeon_asic r420_asic = {
.cp_init = &r100_cp_init,
.cp_fini = &r100_cp_fini,
.cp_disable = &r100_cp_disable,
+ .cp_commit = &r100_cp_commit,
.ring_start = &r300_ring_start,
+ .ring_test = &r100_ring_test,
+ .ring_ib_execute = &r100_ring_ib_execute,
+ .ib_test = &r100_ib_test,
.irq_set = &r100_irq_set,
.irq_process = &r100_irq_process,
.get_vblank_counter = &r100_get_vblank_counter,
@@ -245,7 +261,11 @@ static struct radeon_asic rs400_asic = {
.cp_init = &r100_cp_init,
.cp_fini = &r100_cp_fini,
.cp_disable = &r100_cp_disable,
+ .cp_commit = &r100_cp_commit,
.ring_start = &r300_ring_start,
+ .ring_test = &r100_ring_test,
+ .ring_ib_execute = &r100_ring_ib_execute,
+ .ib_test = &r100_ib_test,
.irq_set = &r100_irq_set,
.irq_process = &r100_irq_process,
.get_vblank_counter = &r100_get_vblank_counter,
@@ -298,7 +318,11 @@ static struct radeon_asic rs600_asic = {
.cp_init = &r100_cp_init,
.cp_fini = &r100_cp_fini,
.cp_disable = &r100_cp_disable,
+ .cp_commit = &r100_cp_commit,
.ring_start = &r300_ring_start,
+ .ring_test = &r100_ring_test,
+ .ring_ib_execute = &r100_ring_ib_execute,
+ .ib_test = &r100_ib_test,
.irq_set = &rs600_irq_set,
.irq_process = &rs600_irq_process,
.get_vblank_counter = &rs600_get_vblank_counter,
@@ -341,7 +365,11 @@ static struct radeon_asic rs690_asic = {
.cp_init = &r100_cp_init,
.cp_fini = &r100_cp_fini,
.cp_disable = &r100_cp_disable,
+ .cp_commit = &r100_cp_commit,
.ring_start = &r300_ring_start,
+ .ring_test = &r100_ring_test,
+ .ring_ib_execute = &r100_ring_ib_execute,
+ .ib_test = &r100_ib_test,
.irq_set = &rs600_irq_set,
.irq_process = &rs600_irq_process,
.get_vblank_counter = &rs600_get_vblank_counter,
@@ -391,7 +419,11 @@ static struct radeon_asic rv515_asic = {
.cp_init = &r100_cp_init,
.cp_fini = &r100_cp_fini,
.cp_disable = &r100_cp_disable,
+ .cp_commit = &r100_cp_commit,
.ring_start = &rv515_ring_start,
+ .ring_test = &r100_ring_test,
+ .ring_ib_execute = &r100_ring_ib_execute,
+ .ib_test = &r100_ib_test,
.irq_set = &rs600_irq_set,
.irq_process = &rs600_irq_process,
.get_vblank_counter = &rs600_get_vblank_counter,
@@ -434,7 +466,11 @@ static struct radeon_asic r520_asic = {
.cp_init = &r100_cp_init,
.cp_fini = &r100_cp_fini,
.cp_disable = &r100_cp_disable,
+ .cp_commit = &r100_cp_commit,
.ring_start = &rv515_ring_start,
+ .ring_test = &r100_ring_test,
+ .ring_ib_execute = &r100_ring_ib_execute,
+ .ib_test = &r100_ib_test,
.irq_set = &rs600_irq_set,
.irq_process = &rs600_irq_process,
.get_vblank_counter = &rs600_get_vblank_counter,
@@ -453,9 +489,127 @@ static struct radeon_asic r520_asic = {
};
/*
- * r600,rv610,rv630,rv620,rv635,rv670,rs780,rv770,rv730,rv710
+ * r600,rv610,rv630,rv620,rv635,rv670,rs780,rs880
*/
+int r600_init(struct radeon_device *rdev);
+void r600_fini(struct radeon_device *rdev);
+int r600_suspend(struct radeon_device *rdev);
+int r600_resume(struct radeon_device *rdev);
+int r600_wb_init(struct radeon_device *rdev);
+void r600_wb_fini(struct radeon_device *rdev);
+void r600_cp_commit(struct radeon_device *rdev);
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+int r600_cs_parse(struct radeon_cs_parser *p);
+void r600_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence);
+int r600_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ unsigned num_pages,
+ struct radeon_fence *fence);
+int r600_irq_process(struct radeon_device *rdev);
+int r600_irq_set(struct radeon_device *rdev);
+int r600_gpu_reset(struct radeon_device *rdev);
+int r600_set_surface_reg(struct radeon_device *rdev, int reg,
+ uint32_t tiling_flags, uint32_t pitch,
+ uint32_t offset, uint32_t obj_size);
+int r600_clear_surface_reg(struct radeon_device *rdev, int reg);
+void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+int r600_ib_test(struct radeon_device *rdev);
+int r600_ring_test(struct radeon_device *rdev);
+int r600_copy_blit(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_pages, struct radeon_fence *fence);
+
+static struct radeon_asic r600_asic = {
+ .errata = NULL,
+ .init = &r600_init,
+ .fini = &r600_fini,
+ .suspend = &r600_suspend,
+ .resume = &r600_resume,
+ .cp_commit = &r600_cp_commit,
+ .vram_info = NULL,
+ .gpu_reset = &r600_gpu_reset,
+ .mc_init = NULL,
+ .mc_fini = NULL,
+ .wb_init = &r600_wb_init,
+ .wb_fini = &r600_wb_fini,
+ .gart_enable = NULL,
+ .gart_disable = NULL,
+ .gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+ .gart_set_page = &rs600_gart_set_page,
+ .cp_init = NULL,
+ .cp_fini = NULL,
+ .cp_disable = NULL,
+ .ring_start = NULL,
+ .ring_test = &r600_ring_test,
+ .ring_ib_execute = &r600_ring_ib_execute,
+ .ib_test = &r600_ib_test,
+ .irq_set = &r600_irq_set,
+ .irq_process = &r600_irq_process,
+ .fence_ring_emit = &r600_fence_ring_emit,
+ .cs_parse = &r600_cs_parse,
+ .copy_blit = &r600_copy_blit,
+ .copy_dma = &r600_copy_blit,
+ .copy = NULL,
+ .set_engine_clock = &radeon_atom_set_engine_clock,
+ .set_memory_clock = &radeon_atom_set_memory_clock,
+ .set_pcie_lanes = NULL,
+ .set_clock_gating = &radeon_atom_set_clock_gating,
+ .set_surface_reg = r600_set_surface_reg,
+ .clear_surface_reg = r600_clear_surface_reg,
+ .bandwidth_update = &r520_bandwidth_update,
+};
+
+/*
+ * rv770,rv730,rv710,rv740
+ */
+int rv770_init(struct radeon_device *rdev);
+void rv770_fini(struct radeon_device *rdev);
+int rv770_suspend(struct radeon_device *rdev);
+int rv770_resume(struct radeon_device *rdev);
+int rv770_gpu_reset(struct radeon_device *rdev);
+
+static struct radeon_asic rv770_asic = {
+ .errata = NULL,
+ .init = &rv770_init,
+ .fini = &rv770_fini,
+ .suspend = &rv770_suspend,
+ .resume = &rv770_resume,
+ .cp_commit = &r600_cp_commit,
+ .vram_info = NULL,
+ .gpu_reset = &rv770_gpu_reset,
+ .mc_init = NULL,
+ .mc_fini = NULL,
+ .wb_init = &r600_wb_init,
+ .wb_fini = &r600_wb_fini,
+ .gart_enable = NULL,
+ .gart_disable = NULL,
+ .gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+ .gart_set_page = &rs600_gart_set_page,
+ .cp_init = NULL,
+ .cp_fini = NULL,
+ .cp_disable = NULL,
+ .ring_start = NULL,
+ .ring_test = &r600_ring_test,
+ .ring_ib_execute = &r600_ring_ib_execute,
+ .ib_test = &r600_ib_test,
+ .irq_set = &r600_irq_set,
+ .irq_process = &r600_irq_process,
+ .fence_ring_emit = &r600_fence_ring_emit,
+ .cs_parse = &r600_cs_parse,
+ .copy_blit = &r600_copy_blit,
+ .copy_dma = &r600_copy_blit,
+ .copy = NULL,
+ .set_engine_clock = &radeon_atom_set_engine_clock,
+ .set_memory_clock = &radeon_atom_set_memory_clock,
+ .set_pcie_lanes = NULL,
+ .set_clock_gating = &radeon_atom_set_clock_gating,
+ .set_surface_reg = r600_set_surface_reg,
+ .clear_surface_reg = r600_clear_surface_reg,
+ .bandwidth_update = &r520_bandwidth_update,
+};
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bba9b4b..a8fb392 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -370,10 +370,6 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
&& record->
ucRecordType <=
ATOM_MAX_OBJECT_RECORD_NUMBER) {
- DRM_ERROR
- ("record type %d\n",
- record->
- ucRecordType);
switch (record->
ucRecordType) {
case ATOM_I2C_RECORD_TYPE:
diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c
index a37cbce..152eef1 100644
--- a/drivers/gpu/drm/radeon/radeon_clocks.c
+++ b/drivers/gpu/drm/radeon/radeon_clocks.c
@@ -102,10 +102,12 @@ void radeon_get_clock_info(struct drm_device *dev)
p1pll->reference_div = 12;
if (p2pll->reference_div < 2)
p2pll->reference_div = 12;
- if (spll->reference_div < 2)
- spll->reference_div =
- RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
- RADEON_M_SPLL_REF_DIV_MASK;
+ if (rdev->family < CHIP_RS600) {
+ if (spll->reference_div < 2)
+ spll->reference_div =
+ RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
+ RADEON_M_SPLL_REF_DIV_MASK;
+ }
if (mpll->reference_div < 2)
mpll->reference_div = spll->reference_div;
} else {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 7693f7c..f2469c5 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -37,7 +37,7 @@
/*
* Clear GPU surface registers.
*/
-static void radeon_surface_init(struct radeon_device *rdev)
+void radeon_surface_init(struct radeon_device *rdev)
{
/* FIXME: check this out */
if (rdev->family < CHIP_R600) {
@@ -56,7 +56,7 @@ static void radeon_surface_init(struct radeon_device *rdev)
/*
* GPU scratch registers helpers function.
*/
-static void radeon_scratch_init(struct radeon_device *rdev)
+void radeon_scratch_init(struct radeon_device *rdev)
{
int i;
@@ -156,16 +156,14 @@ int radeon_mc_setup(struct radeon_device *rdev)
tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
rdev->mc.gtt_location = tmp;
}
- DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20);
+ DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.mc_vram_size >> 20));
DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n",
- rdev->mc.vram_location,
- rdev->mc.vram_location + rdev->mc.mc_vram_size - 1);
- if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size)
- DRM_INFO("radeon: VRAM less than aperture workaround enabled\n");
- DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20);
+ (unsigned)rdev->mc.vram_location,
+ (unsigned)(rdev->mc.vram_location + rdev->mc.mc_vram_size - 1));
+ DRM_INFO("radeon: GTT %uM\n", (unsigned)(rdev->mc.gtt_size >> 20));
DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n",
- rdev->mc.gtt_location,
- rdev->mc.gtt_location + rdev->mc.gtt_size - 1);
+ (unsigned)rdev->mc.gtt_location,
+ (unsigned)(rdev->mc.gtt_location + rdev->mc.gtt_size - 1));
return 0;
}
@@ -205,6 +203,31 @@ static bool radeon_card_posted(struct radeon_device *rdev)
}
+int radeon_dummy_page_init(struct radeon_device *rdev)
+{
+ rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
+ if (rdev->dummy_page.page == NULL)
+ return -ENOMEM;
+ rdev->dummy_page.addr = pci_map_page(rdev->pdev, rdev->dummy_page.page,
+ 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ if (!rdev->dummy_page.addr) {
+ __free_page(rdev->dummy_page.page);
+ rdev->dummy_page.page = NULL;
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void radeon_dummy_page_fini(struct radeon_device *rdev)
+{
+ if (rdev->dummy_page.page == NULL)
+ return;
+ pci_unmap_page(rdev->pdev, rdev->dummy_page.addr,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ __free_page(rdev->dummy_page.page);
+ rdev->dummy_page.page = NULL;
+}
+
/*
* Registers accessors functions.
@@ -323,9 +346,15 @@ int radeon_asic_init(struct radeon_device *rdev)
case CHIP_RV635:
case CHIP_RV670:
case CHIP_RS780:
+ case CHIP_RS880:
+ rdev->asic = &r600_asic;
+ break;
case CHIP_RV770:
case CHIP_RV730:
case CHIP_RV710:
+ case CHIP_RV740:
+ rdev->asic = &rv770_asic;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -448,7 +477,7 @@ int radeon_device_init(struct radeon_device *rdev,
struct pci_dev *pdev,
uint32_t flags)
{
- int r, ret;
+ int r, ret = 0;
int dma_bits;
DRM_INFO("radeon: Initializing kernel modesetting.\n");
@@ -487,10 +516,6 @@ int radeon_device_init(struct radeon_device *rdev,
if (r) {
return r;
}
- r = radeon_init(rdev);
- if (r) {
- return r;
- }
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
@@ -521,111 +546,118 @@ int radeon_device_init(struct radeon_device *rdev,
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)rdev->rmmio_base);
DRM_INFO("register mmio size: %u\n", (unsigned)rdev->rmmio_size);
- /* Setup errata flags */
- radeon_errata(rdev);
- /* Initialize scratch registers */
- radeon_scratch_init(rdev);
- /* Initialize surface registers */
- radeon_surface_init(rdev);
-
- /* TODO: disable VGA need to use VGA request */
- /* BIOS*/
- if (!radeon_get_bios(rdev)) {
- if (ASIC_IS_AVIVO(rdev))
- return -EINVAL;
- }
- if (rdev->is_atom_bios) {
- r = radeon_atombios_init(rdev);
+ rdev->new_init_path = false;
+ r = radeon_init(rdev);
+ if (r) {
+ return r;
+ }
+ if (!rdev->new_init_path) {
+ /* Setup errata flags */
+ radeon_errata(rdev);
+ /* Initialize scratch registers */
+ radeon_scratch_init(rdev);
+ /* Initialize surface registers */
+ radeon_surface_init(rdev);
+
+ /* TODO: disable VGA need to use VGA request */
+ /* BIOS*/
+ if (!radeon_get_bios(rdev)) {
+ if (ASIC_IS_AVIVO(rdev))
+ return -EINVAL;
+ }
+ if (rdev->is_atom_bios) {
+ r = radeon_atombios_init(rdev);
+ if (r) {
+ return r;
+ }
+ } else {
+ r = radeon_combios_init(rdev);
+ if (r) {
+ return r;
+ }
+ }
+ /* Reset gpu before posting otherwise ATOM will enter infinite loop */
+ if (radeon_gpu_reset(rdev)) {
+ /* FIXME: what do we want to do here ? */
+ }
+ /* check if cards are posted or not */
+ if (!radeon_card_posted(rdev) && rdev->bios) {
+ DRM_INFO("GPU not posted. posting now...\n");
+ if (rdev->is_atom_bios) {
+ atom_asic_init(rdev->mode_info.atom_context);
+ } else {
+ radeon_combios_asic_init(rdev->ddev);
+ }
+ }
+ /* Initialize clocks */
+ r = radeon_clocks_init(rdev);
if (r) {
return r;
}
- } else {
- r = radeon_combios_init(rdev);
+ /* Get vram informations */
+ radeon_vram_info(rdev);
+
+ /* Add an MTRR for the VRAM */
+ rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
+ MTRR_TYPE_WRCOMB, 1);
+ DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
+ (unsigned)(rdev->mc.mc_vram_size >> 20),
+ (unsigned)(rdev->mc.aper_size >> 20));
+ DRM_INFO("RAM width %dbits %cDR\n",
+ rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
+ /* Initialize memory controller (also test AGP) */
+ r = radeon_mc_init(rdev);
if (r) {
return r;
}
- }
- /* Reset gpu before posting otherwise ATOM will enter infinite loop */
- if (radeon_gpu_reset(rdev)) {
- /* FIXME: what do we want to do here ? */
- }
- /* check if cards are posted or not */
- if (!radeon_card_posted(rdev) && rdev->bios) {
- DRM_INFO("GPU not posted. posting now...\n");
- if (rdev->is_atom_bios) {
- atom_asic_init(rdev->mode_info.atom_context);
- } else {
- radeon_combios_asic_init(rdev->ddev);
- }
- }
- /* Initialize clocks */
- r = radeon_clocks_init(rdev);
- if (r) {
- return r;
- }
- /* Get vram informations */
- radeon_vram_info(rdev);
-
- /* Add an MTRR for the VRAM */
- rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
- MTRR_TYPE_WRCOMB, 1);
- DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
- rdev->mc.real_vram_size >> 20,
- (unsigned)rdev->mc.aper_size >> 20);
- DRM_INFO("RAM width %dbits %cDR\n",
- rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
- /* Initialize memory controller (also test AGP) */
- r = radeon_mc_init(rdev);
- if (r) {
- return r;
- }
- /* Fence driver */
- r = radeon_fence_driver_init(rdev);
- if (r) {
- return r;
- }
- r = radeon_irq_kms_init(rdev);
- if (r) {
- return r;
- }
- /* Memory manager */
- r = radeon_object_init(rdev);
- if (r) {
- return r;
- }
- /* Initialize GART (initialize after TTM so we can allocate
- * memory through TTM but finalize after TTM) */
- r = radeon_gart_enable(rdev);
- if (!r) {
- r = radeon_gem_init(rdev);
- }
-
- /* 1M ring buffer */
- if (!r) {
- r = radeon_cp_init(rdev, 1024 * 1024);
- }
- if (!r) {
- r = radeon_wb_init(rdev);
+ /* Fence driver */
+ r = radeon_fence_driver_init(rdev);
if (r) {
- DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
return r;
}
- }
- if (!r) {
- r = radeon_ib_pool_init(rdev);
+ r = radeon_irq_kms_init(rdev);
if (r) {
- DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
return r;
}
- }
- if (!r) {
- r = radeon_ib_test(rdev);
+ /* Memory manager */
+ r = radeon_object_init(rdev);
if (r) {
- DRM_ERROR("radeon: failled testing IB (%d).\n", r);
return r;
}
+ /* Initialize GART (initialize after TTM so we can allocate
+ * memory through TTM but finalize after TTM) */
+ r = radeon_gart_enable(rdev);
+ if (!r) {
+ r = radeon_gem_init(rdev);
+ }
+
+ /* 1M ring buffer */
+ if (!r) {
+ r = radeon_cp_init(rdev, 1024 * 1024);
+ }
+ if (!r) {
+ r = radeon_wb_init(rdev);
+ if (r) {
+ DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
+ return r;
+ }
+ }
+ if (!r) {
+ r = radeon_ib_pool_init(rdev);
+ if (r) {
+ DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+ return r;
+ }
+ }
+ if (!r) {
+ r = radeon_ib_test(rdev);
+ if (r) {
+ DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+ return r;
+ }
+ }
+ ret = r;
}
- ret = r;
r = radeon_modeset_init(rdev);
if (r) {
return r;
@@ -651,26 +683,29 @@ void radeon_device_fini(struct radeon_device *rdev)
rdev->shutdown = true;
/* Order matter so becarefull if you rearrange anythings */
radeon_modeset_fini(rdev);
- radeon_ib_pool_fini(rdev);
- radeon_cp_fini(rdev);
- radeon_wb_fini(rdev);
- radeon_gem_fini(rdev);
- radeon_object_fini(rdev);
- /* mc_fini must be after object_fini */
- radeon_mc_fini(rdev);
+ if (!rdev->new_init_path) {
+ radeon_ib_pool_fini(rdev);
+ radeon_cp_fini(rdev);
+ radeon_wb_fini(rdev);
+ radeon_gem_fini(rdev);
+ radeon_mc_fini(rdev);
#if __OS_HAS_AGP
- radeon_agp_fini(rdev);
+ radeon_agp_fini(rdev);
#endif
- radeon_irq_kms_fini(rdev);
- radeon_fence_driver_fini(rdev);
- radeon_clocks_fini(rdev);
- if (rdev->is_atom_bios) {
- radeon_atombios_fini(rdev);
+ radeon_irq_kms_fini(rdev);
+ radeon_fence_driver_fini(rdev);
+ radeon_clocks_fini(rdev);
+ radeon_object_fini(rdev);
+ if (rdev->is_atom_bios) {
+ radeon_atombios_fini(rdev);
+ } else {
+ radeon_combios_fini(rdev);
+ }
+ kfree(rdev->bios);
+ rdev->bios = NULL;
} else {
- radeon_combios_fini(rdev);
+ radeon_fini(rdev);
}
- kfree(rdev->bios);
- rdev->bios = NULL;
iounmap(rdev->rmmio);
rdev->rmmio = NULL;
}
@@ -708,9 +743,12 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
/* wait for gpu to finish processing current batch */
radeon_fence_wait_last(rdev);
- radeon_cp_disable(rdev);
- radeon_gart_disable(rdev);
-
+ if (!rdev->new_init_path) {
+ radeon_cp_disable(rdev);
+ radeon_gart_disable(rdev);
+ } else {
+ radeon_suspend(rdev);
+ }
/* evict remaining vram memory */
radeon_object_evict_vram(rdev);
@@ -746,33 +784,37 @@ int radeon_resume_kms(struct drm_device *dev)
if (radeon_gpu_reset(rdev)) {
/* FIXME: what do we want to do here ? */
}
- /* post card */
- if (rdev->is_atom_bios) {
- atom_asic_init(rdev->mode_info.atom_context);
+ if (!rdev->new_init_path) {
+ /* post card */
+ if (rdev->is_atom_bios) {
+ atom_asic_init(rdev->mode_info.atom_context);
+ } else {
+ radeon_combios_asic_init(rdev->ddev);
+ }
+ /* Initialize clocks */
+ r = radeon_clocks_init(rdev);
+ if (r) {
+ release_console_sem();
+ return r;
+ }
+ /* Enable IRQ */
+ rdev->irq.sw_int = true;
+ radeon_irq_set(rdev);
+ /* Initialize GPU Memory Controller */
+ r = radeon_mc_init(rdev);
+ if (r) {
+ goto out;
+ }
+ r = radeon_gart_enable(rdev);
+ if (r) {
+ goto out;
+ }
+ r = radeon_cp_init(rdev, rdev->cp.ring_size);
+ if (r) {
+ goto out;
+ }
} else {
- radeon_combios_asic_init(rdev->ddev);
- }
- /* Initialize clocks */
- r = radeon_clocks_init(rdev);
- if (r) {
- release_console_sem();
- return r;
- }
- /* Enable IRQ */
- rdev->irq.sw_int = true;
- radeon_irq_set(rdev);
- /* Initialize GPU Memory Controller */
- r = radeon_mc_init(rdev);
- if (r) {
- goto out;
- }
- r = radeon_gart_enable(rdev);
- if (r) {
- goto out;
- }
- r = radeon_cp_init(rdev, rdev->cp.ring_size);
- if (r) {
- goto out;
+ radeon_resume(rdev);
}
out:
fb_set_suspend(rdev->fbdev_info, 0);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 40294a0..c7b1859 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -356,6 +356,12 @@ typedef struct drm_radeon_private {
int r700_sc_hiz_tile_fifo_size;
int r700_sc_earlyz_tile_fifo_fize;
+ struct mutex cs_mutex;
+ u32 cs_id_scnt;
+ u32 cs_id_wcnt;
+ /* r6xx/r7xx drm blit vertex buffer */
+ struct drm_buf *blit_vb;
+
/* firmware */
const struct firmware *me_fw, *pfp_fw;
} drm_radeon_private_t;
@@ -396,6 +402,9 @@ static __inline__ int radeon_check_offset(drm_radeon_private_t *dev_priv,
(off >= gart_start && off <= gart_end));
}
+/* radeon_state.c */
+extern void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf);
+
/* radeon_cp.c */
extern int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv);
extern int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv);
@@ -487,6 +496,22 @@ extern int r600_cp_dispatch_indirect(struct drm_device *dev,
struct drm_buf *buf, int start, int end);
extern int r600_page_table_init(struct drm_device *dev);
extern void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info);
+extern int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv);
+extern void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv);
+extern int r600_cp_dispatch_texture(struct drm_device *dev,
+ struct drm_file *file_priv,
+ drm_radeon_texture_t *tex,
+ drm_radeon_tex_image_t *image);
+/* r600_blit.c */
+extern int r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv);
+extern void r600_done_blit_copy(struct drm_device *dev);
+extern void r600_blit_copy(struct drm_device *dev,
+ uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+ int size_bytes);
+extern void r600_blit_swap(struct drm_device *dev,
+ uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+ int sx, int sy, int dx, int dy,
+ int w, int h, int src_pitch, int dst_pitch, int cpp);
/* Flags for stats.boxes
*/
@@ -1114,13 +1139,71 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index);
# define RADEON_CNTL_BITBLT_MULTI 0x00009B00
# define RADEON_CNTL_SET_SCISSORS 0xC0001E00
-# define R600_IT_INDIRECT_BUFFER 0x00003200
-# define R600_IT_ME_INITIALIZE 0x00004400
+# define R600_IT_INDIRECT_BUFFER_END 0x00001700
+# define R600_IT_SET_PREDICATION 0x00002000
+# define R600_IT_REG_RMW 0x00002100
+# define R600_IT_COND_EXEC 0x00002200
+# define R600_IT_PRED_EXEC 0x00002300
+# define R600_IT_START_3D_CMDBUF 0x00002400
+# define R600_IT_DRAW_INDEX_2 0x00002700
+# define R600_IT_CONTEXT_CONTROL 0x00002800
+# define R600_IT_DRAW_INDEX_IMMD_BE 0x00002900
+# define R600_IT_INDEX_TYPE 0x00002A00
+# define R600_IT_DRAW_INDEX 0x00002B00
+# define R600_IT_DRAW_INDEX_AUTO 0x00002D00
+# define R600_IT_DRAW_INDEX_IMMD 0x00002E00
+# define R600_IT_NUM_INSTANCES 0x00002F00
+# define R600_IT_STRMOUT_BUFFER_UPDATE 0x00003400
+# define R600_IT_INDIRECT_BUFFER_MP 0x00003800
+# define R600_IT_MEM_SEMAPHORE 0x00003900
+# define R600_IT_MPEG_INDEX 0x00003A00
+# define R600_IT_WAIT_REG_MEM 0x00003C00
+# define R600_IT_MEM_WRITE 0x00003D00
+# define R600_IT_INDIRECT_BUFFER 0x00003200
+# define R600_IT_CP_INTERRUPT 0x00004000
+# define R600_IT_SURFACE_SYNC 0x00004300
+# define R600_CB0_DEST_BASE_ENA (1 << 6)
+# define R600_TC_ACTION_ENA (1 << 23)
+# define R600_VC_ACTION_ENA (1 << 24)
+# define R600_CB_ACTION_ENA (1 << 25)
+# define R600_DB_ACTION_ENA (1 << 26)
+# define R600_SH_ACTION_ENA (1 << 27)
+# define R600_SMX_ACTION_ENA (1 << 28)
+# define R600_IT_ME_INITIALIZE 0x00004400
# define R600_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
-# define R600_IT_EVENT_WRITE 0x00004600
-# define R600_IT_SET_CONFIG_REG 0x00006800
-# define R600_SET_CONFIG_REG_OFFSET 0x00008000
-# define R600_SET_CONFIG_REG_END 0x0000ac00
+# define R600_IT_COND_WRITE 0x00004500
+# define R600_IT_EVENT_WRITE 0x00004600
+# define R600_IT_EVENT_WRITE_EOP 0x00004700
+# define R600_IT_ONE_REG_WRITE 0x00005700
+# define R600_IT_SET_CONFIG_REG 0x00006800
+# define R600_SET_CONFIG_REG_OFFSET 0x00008000
+# define R600_SET_CONFIG_REG_END 0x0000ac00
+# define R600_IT_SET_CONTEXT_REG 0x00006900
+# define R600_SET_CONTEXT_REG_OFFSET 0x00028000
+# define R600_SET_CONTEXT_REG_END 0x00029000
+# define R600_IT_SET_ALU_CONST 0x00006A00
+# define R600_SET_ALU_CONST_OFFSET 0x00030000
+# define R600_SET_ALU_CONST_END 0x00032000
+# define R600_IT_SET_BOOL_CONST 0x00006B00
+# define R600_SET_BOOL_CONST_OFFSET 0x0003e380
+# define R600_SET_BOOL_CONST_END 0x00040000
+# define R600_IT_SET_LOOP_CONST 0x00006C00
+# define R600_SET_LOOP_CONST_OFFSET 0x0003e200
+# define R600_SET_LOOP_CONST_END 0x0003e380
+# define R600_IT_SET_RESOURCE 0x00006D00
+# define R600_SET_RESOURCE_OFFSET 0x00038000
+# define R600_SET_RESOURCE_END 0x0003c000
+# define R600_SQ_TEX_VTX_INVALID_TEXTURE 0x0
+# define R600_SQ_TEX_VTX_INVALID_BUFFER 0x1
+# define R600_SQ_TEX_VTX_VALID_TEXTURE 0x2
+# define R600_SQ_TEX_VTX_VALID_BUFFER 0x3
+# define R600_IT_SET_SAMPLER 0x00006E00
+# define R600_SET_SAMPLER_OFFSET 0x0003c000
+# define R600_SET_SAMPLER_END 0x0003cff0
+# define R600_IT_SET_CTL_CONST 0x00006F00
+# define R600_SET_CTL_CONST_OFFSET 0x0003cff0
+# define R600_SET_CTL_CONST_END 0x0003e200
+# define R600_IT_SURFACE_BASE_UPDATE 0x00007300
#define RADEON_CP_PACKET_MASK 0xC0000000
#define RADEON_CP_PACKET_COUNT_MASK 0x3fff0000
@@ -1598,6 +1681,52 @@ extern u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index);
#define R600_CB_COLOR7_BASE 0x2805c
#define R600_CB_COLOR7_FRAG 0x280fc
+#define R600_CB_COLOR0_SIZE 0x28060
+#define R600_CB_COLOR0_VIEW 0x28080
+#define R600_CB_COLOR0_INFO 0x280a0
+#define R600_CB_COLOR0_TILE 0x280c0
+#define R600_CB_COLOR0_FRAG 0x280e0
+#define R600_CB_COLOR0_MASK 0x28100
+
+#define AVIVO_D1MODE_VLINE_START_END 0x6538
+#define AVIVO_D2MODE_VLINE_START_END 0x6d38
+#define R600_CP_COHER_BASE 0x85f8
+#define R600_DB_DEPTH_BASE 0x2800c
+#define R600_SQ_PGM_START_FS 0x28894
+#define R600_SQ_PGM_START_ES 0x28880
+#define R600_SQ_PGM_START_VS 0x28858
+#define R600_SQ_PGM_RESOURCES_VS 0x28868
+#define R600_SQ_PGM_CF_OFFSET_VS 0x288d0
+#define R600_SQ_PGM_START_GS 0x2886c
+#define R600_SQ_PGM_START_PS 0x28840
+#define R600_SQ_PGM_RESOURCES_PS 0x28850
+#define R600_SQ_PGM_EXPORTS_PS 0x28854
+#define R600_SQ_PGM_CF_OFFSET_PS 0x288cc
+#define R600_VGT_DMA_BASE 0x287e8
+#define R600_VGT_DMA_BASE_HI 0x287e4
+#define R600_VGT_STRMOUT_BASE_OFFSET_0 0x28b10
+#define R600_VGT_STRMOUT_BASE_OFFSET_1 0x28b14
+#define R600_VGT_STRMOUT_BASE_OFFSET_2 0x28b18
+#define R600_VGT_STRMOUT_BASE_OFFSET_3 0x28b1c
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_0 0x28b44
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_1 0x28b48
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_2 0x28b4c
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_3 0x28b50
+#define R600_VGT_STRMOUT_BUFFER_BASE_0 0x28ad8
+#define R600_VGT_STRMOUT_BUFFER_BASE_1 0x28ae8
+#define R600_VGT_STRMOUT_BUFFER_BASE_2 0x28af8
+#define R600_VGT_STRMOUT_BUFFER_BASE_3 0x28b08
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_0 0x28adc
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_1 0x28aec
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_2 0x28afc
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_3 0x28b0c
+
+#define R600_VGT_PRIMITIVE_TYPE 0x8958
+
+#define R600_PA_SC_SCREEN_SCISSOR_TL 0x28030
+#define R600_PA_SC_GENERIC_SCISSOR_TL 0x28240
+#define R600_PA_SC_WINDOW_SCISSOR_TL 0x28204
+
#define R600_TC_CNTL 0x9608
# define R600_TC_L2_SIZE(x) ((x) << 5)
# define R600_L2_DISABLE_LATE_HIT (1 << 9)
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index b4e48dd..506dd4d 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -53,9 +53,9 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
* away
*/
WREG32(rdev->fence_drv.scratch_reg, fence->seq);
- } else {
+ } else
radeon_fence_ring_emit(rdev, fence);
- }
+
fence->emited = true;
fence->timeout = jiffies + ((2000 * HZ) / 1000);
list_del(&fence->list);
@@ -168,7 +168,47 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
return signaled;
}
-int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
+int r600_fence_wait(struct radeon_fence *fence, bool intr, bool lazy)
+{
+ struct radeon_device *rdev;
+ unsigned long cur_jiffies;
+ unsigned long timeout;
+ int ret = 0;
+
+ cur_jiffies = jiffies;
+ timeout = HZ / 100;
+
+ if (time_after(fence->timeout, cur_jiffies)) {
+ timeout = fence->timeout - cur_jiffies;
+ }
+
+ rdev = fence->rdev;
+
+ __set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+
+ while (1) {
+ if (radeon_fence_signaled(fence))
+ break;
+
+ if (time_after_eq(jiffies, timeout)) {
+ ret = -EBUSY;
+ break;
+ }
+
+ if (lazy)
+ schedule_timeout(1);
+
+ if (intr && signal_pending(current)) {
+ ret = -ERESTART;
+ break;
+ }
+ }
+ __set_current_state(TASK_RUNNING);
+ return ret;
+}
+
+
+int radeon_fence_wait(struct radeon_fence *fence, bool intr)
{
struct radeon_device *rdev;
unsigned long cur_jiffies;
@@ -176,7 +216,6 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
bool expired = false;
int r;
-
if (fence == NULL) {
WARN(1, "Querying an invalid fence : %p !\n", fence);
return 0;
@@ -185,13 +224,18 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
if (radeon_fence_signaled(fence)) {
return 0;
}
+
+ if (rdev->family >= CHIP_R600)
+ return r600_fence_wait(fence, intr, 0);
+
retry:
cur_jiffies = jiffies;
timeout = HZ / 100;
if (time_after(fence->timeout, cur_jiffies)) {
timeout = fence->timeout - cur_jiffies;
}
- if (interruptible) {
+
+ if (intr) {
r = wait_event_interruptible_timeout(rdev->fence_drv.queue,
radeon_fence_signaled(fence), timeout);
if (unlikely(r == -ERESTARTSYS)) {
diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h
index 28be2f1..21da871 100644
--- a/drivers/gpu/drm/radeon/radeon_reg.h
+++ b/drivers/gpu/drm/radeon/radeon_reg.h
@@ -3255,6 +3255,24 @@
#define RADEON_CP_RB_WPTR 0x0714
#define RADEON_CP_RB_RPTR_WR 0x071c
+#define RADEON_SCRATCH_UMSK 0x0770
+#define RADEON_SCRATCH_ADDR 0x0774
+
+#define R600_CP_RB_BASE 0xc100
+#define R600_CP_RB_CNTL 0xc104
+# define R600_RB_BUFSZ(x) ((x) << 0)
+# define R600_RB_BLKSZ(x) ((x) << 8)
+# define R600_RB_NO_UPDATE (1 << 27)
+# define R600_RB_RPTR_WR_ENA (1 << 31)
+#define R600_CP_RB_RPTR_WR 0xc108
+#define R600_CP_RB_RPTR_ADDR 0xc10c
+#define R600_CP_RB_RPTR_ADDR_HI 0xc110
+#define R600_CP_RB_WPTR 0xc114
+#define R600_CP_RB_WPTR_ADDR 0xc118
+#define R600_CP_RB_WPTR_ADDR_HI 0xc11c
+#define R600_CP_RB_RPTR 0x8700
+#define R600_CP_RB_WPTR_DELAY 0x8704
+
#define RADEON_CP_IB_BASE 0x0738
#define RADEON_CP_IB_BUFSZ 0x073c
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 60d1593..aa9837a 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -110,7 +110,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
return;
}
list_del(&tmp->list);
- INIT_LIST_HEAD(&tmp->list);
if (tmp->fence) {
radeon_fence_unref(&tmp->fence);
}
@@ -119,19 +118,11 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
mutex_unlock(&rdev->ib_pool.mutex);
}
-static void radeon_ib_align(struct radeon_device *rdev, struct radeon_ib *ib)
-{
- while ((ib->length_dw & rdev->cp.align_mask)) {
- ib->ptr[ib->length_dw++] = PACKET2(0);
- }
-}
-
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
{
int r = 0;
mutex_lock(&rdev->ib_pool.mutex);
- radeon_ib_align(rdev, ib);
if (!ib->length_dw || !rdev->cp.ready) {
/* TODO: Nothings in the ib we should report. */
mutex_unlock(&rdev->ib_pool.mutex);
@@ -145,9 +136,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
mutex_unlock(&rdev->ib_pool.mutex);
return r;
}
- radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
- radeon_ring_write(rdev, ib->gpu_addr);
- radeon_ring_write(rdev, ib->length_dw);
+ radeon_ring_ib_execute(rdev, ib);
radeon_fence_emit(rdev, ib->fence);
radeon_ring_unlock_commit(rdev);
list_add_tail(&ib->list, &rdev->ib_pool.scheduled_ibs);
@@ -215,69 +204,16 @@ void radeon_ib_pool_fini(struct radeon_device *rdev)
mutex_unlock(&rdev->ib_pool.mutex);
}
-int radeon_ib_test(struct radeon_device *rdev)
-{
- struct radeon_ib *ib;
- uint32_t scratch;
- uint32_t tmp = 0;
- unsigned i;
- int r;
-
- r = radeon_scratch_get(rdev, &scratch);
- if (r) {
- DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
- return r;
- }
- WREG32(scratch, 0xCAFEDEAD);
- r = radeon_ib_get(rdev, &ib);
- if (r) {
- return r;
- }
- ib->ptr[0] = PACKET0(scratch, 0);
- ib->ptr[1] = 0xDEADBEEF;
- ib->ptr[2] = PACKET2(0);
- ib->ptr[3] = PACKET2(0);
- ib->ptr[4] = PACKET2(0);
- ib->ptr[5] = PACKET2(0);
- ib->ptr[6] = PACKET2(0);
- ib->ptr[7] = PACKET2(0);
- ib->length_dw = 8;
- r = radeon_ib_schedule(rdev, ib);
- if (r) {
- radeon_scratch_free(rdev, scratch);
- radeon_ib_free(rdev, &ib);
- return r;
- }
- r = radeon_fence_wait(ib->fence, false);
- if (r) {
- return r;
- }
- for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF) {
- break;
- }
- DRM_UDELAY(1);
- }
- if (i < rdev->usec_timeout) {
- DRM_INFO("ib test succeeded in %u usecs\n", i);
- } else {
- DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
- scratch, tmp);
- r = -EINVAL;
- }
- radeon_scratch_free(rdev, scratch);
- radeon_ib_free(rdev, &ib);
- return r;
-}
-
/*
* Ring.
*/
void radeon_ring_free_size(struct radeon_device *rdev)
{
- rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
+ if (rdev->family >= CHIP_R600)
+ rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
+ else
+ rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
/* This works because ring_size is a power of 2 */
rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4));
rdev->cp.ring_free_dw -= rdev->cp.wptr;
@@ -320,11 +256,10 @@ void radeon_ring_unlock_commit(struct radeon_device *rdev)
count_dw_pad = (rdev->cp.align_mask + 1) -
(rdev->cp.wptr & rdev->cp.align_mask);
for (i = 0; i < count_dw_pad; i++) {
- radeon_ring_write(rdev, PACKET2(0));
+ radeon_ring_write(rdev, 2 << 30);
}
DRM_MEMORYBARRIER();
- WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
- (void)RREG32(RADEON_CP_RB_WPTR);
+ radeon_cp_commit(rdev);
mutex_unlock(&rdev->cp.mutex);
}
@@ -334,46 +269,6 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev)
mutex_unlock(&rdev->cp.mutex);
}
-int radeon_ring_test(struct radeon_device *rdev)
-{
- uint32_t scratch;
- uint32_t tmp = 0;
- unsigned i;
- int r;
-
- r = radeon_scratch_get(rdev, &scratch);
- if (r) {
- DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
- return r;
- }
- WREG32(scratch, 0xCAFEDEAD);
- r = radeon_ring_lock(rdev, 2);
- if (r) {
- DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
- radeon_scratch_free(rdev, scratch);
- return r;
- }
- radeon_ring_write(rdev, PACKET0(scratch, 0));
- radeon_ring_write(rdev, 0xDEADBEEF);
- radeon_ring_unlock_commit(rdev);
- for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF) {
- break;
- }
- DRM_UDELAY(1);
- }
- if (i < rdev->usec_timeout) {
- DRM_INFO("ring test succeeded in %d usecs\n", i);
- } else {
- DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
- scratch, tmp);
- r = -EINVAL;
- }
- radeon_scratch_free(rdev, scratch);
- return r;
-}
-
int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size)
{
int r;
diff --git a/drivers/gpu/drm/radeon/radeon_share.h b/drivers/gpu/drm/radeon/radeon_share.h
index 63a7735..5f9e358 100644
--- a/drivers/gpu/drm/radeon/radeon_share.h
+++ b/drivers/gpu/drm/radeon/radeon_share.h
@@ -28,12 +28,89 @@
#ifndef __RADEON_SHARE_H__
#define __RADEON_SHARE_H__
+/* Common */
+struct radeon_device;
+struct radeon_cs_parser;
+int radeon_clocks_init(struct radeon_device *rdev);
+void radeon_clocks_fini(struct radeon_device *rdev);
+void radeon_scratch_init(struct radeon_device *rdev);
+void radeon_surface_init(struct radeon_device *rdev);
+int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
+
+
+/* R100, RV100, RS100, RV200, RS200, R200, RV250, RS300, RV280 */
void r100_vram_init_sizes(struct radeon_device *rdev);
+
+/* R300, R350, RV350, RV380 */
+struct r300_asic {
+ const unsigned *reg_safe_bm;
+ unsigned reg_safe_bm_size;
+};
+
+
+/* RS690, RS740 */
void rs690_line_buffer_adjust(struct radeon_device *rdev,
struct drm_display_mode *mode1,
struct drm_display_mode *mode2);
+
+/* RV515 */
void rv515_bandwidth_avivo_update(struct radeon_device *rdev);
+
+/* R600, RV610, RV630, RV620, RV635, RV670, RS780, RS880 */
+bool r600_card_posted(struct radeon_device *rdev);
+void r600_cp_stop(struct radeon_device *rdev);
+void r600_ring_init(struct radeon_device *rdev, unsigned ring_size);
+int r600_cp_resume(struct radeon_device *rdev);
+int r600_count_pipe_bits(uint32_t val);
+int r600_gart_clear_page(struct radeon_device *rdev, int i);
+int r600_mc_wait_for_idle(struct radeon_device *rdev);
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
+int r600_ib_test(struct radeon_device *rdev);
+int r600_ring_test(struct radeon_device *rdev);
+int r600_wb_init(struct radeon_device *rdev);
+void r600_wb_fini(struct radeon_device *rdev);
+void r600_scratch_init(struct radeon_device *rdev);
+int r600_blit_init(struct radeon_device *rdev);
+void r600_blit_fini(struct radeon_device *rdev);
+int r600_cp_init_microcode(struct radeon_device *rdev);
+struct r600_asic {
+ unsigned max_pipes;
+ unsigned max_tile_pipes;
+ unsigned max_simds;
+ unsigned max_backends;
+ unsigned max_gprs;
+ unsigned max_threads;
+ unsigned max_stack_entries;
+ unsigned max_hw_contexts;
+ unsigned max_gs_threads;
+ unsigned sx_max_export_size;
+ unsigned sx_max_export_pos_size;
+ unsigned sx_max_export_smx_size;
+ unsigned sq_num_cf_insts;
+};
+
+/* RV770, RV7300, RV710 */
+struct rv770_asic {
+ unsigned max_pipes;
+ unsigned max_tile_pipes;
+ unsigned max_simds;
+ unsigned max_backends;
+ unsigned max_gprs;
+ unsigned max_threads;
+ unsigned max_stack_entries;
+ unsigned max_hw_contexts;
+ unsigned max_gs_threads;
+ unsigned sx_max_export_size;
+ unsigned sx_max_export_pos_size;
+ unsigned sx_max_export_smx_size;
+ unsigned sq_num_cf_insts;
+ unsigned sx_num_of_sets;
+ unsigned sc_prim_fifo_size;
+ unsigned sc_hiz_tile_fifo_size;
+ unsigned sc_earlyz_tile_fifo_fize;
+};
+
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 2882f40..aad0c6f 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -1546,7 +1546,7 @@ static void radeon_cp_dispatch_vertex(struct drm_device * dev,
} while (i < nbox);
}
-static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
+void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
struct drm_radeon_master_private *master_priv = master->driver_priv;
@@ -2213,7 +2213,10 @@ static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *f
if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
- radeon_cp_dispatch_swap(dev, file_priv->master);
+ if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
+ r600_cp_dispatch_swap(dev, file_priv);
+ else
+ radeon_cp_dispatch_swap(dev, file_priv->master);
sarea_priv->ctx_owner = 0;
COMMIT_RING();
@@ -2412,7 +2415,10 @@ static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file
RING_SPACE_TEST_WITH_RETURN(dev_priv);
VB_AGE_TEST_WITH_RETURN(dev_priv);
- ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
+ if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
+ ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
+ else
+ ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
return ret;
}
@@ -2495,8 +2501,9 @@ static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_fil
radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
}
- if (indirect->discard)
+ if (indirect->discard) {
radeon_cp_discard_buffer(dev, file_priv->master, buf);
+ }
COMMIT_RING();
return 0;
@@ -3227,7 +3234,8 @@ struct drm_ioctl_desc radeon_ioctls[] = {
DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
- DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH)
+ DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
+ DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
};
int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index dc7a442..acd889c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -376,9 +376,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
radeon_move_null(bo, new_mem);
return 0;
}
- if (!rdev->cp.ready) {
+ if (!rdev->cp.ready || rdev->asic->copy == NULL) {
/* use memcpy */
- DRM_ERROR("CP is not ready use memcpy.\n");
goto memcpy;
}
@@ -495,7 +494,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
return r;
}
DRM_INFO("radeon: %uM of VRAM memory ready\n",
- rdev->mc.real_vram_size / (1024 * 1024));
+ (unsigned)rdev->mc.real_vram_size / (1024 * 1024));
r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0,
((rdev->mc.gtt_size) >> PAGE_SHIFT));
if (r) {
@@ -503,7 +502,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
return r;
}
DRM_INFO("radeon: %uM of GTT memory ready.\n",
- rdev->mc.gtt_size / (1024 * 1024));
+ (unsigned)(rdev->mc.gtt_size / (1024 * 1024)));
if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
}
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index b29affd..8c3ea7e 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -63,7 +63,7 @@ void rs400_gart_adjust_size(struct radeon_device *rdev)
break;
default:
DRM_ERROR("Unable to use IGP GART size %uM\n",
- rdev->mc.gtt_size >> 20);
+ (unsigned)(rdev->mc.gtt_size >> 20));
DRM_ERROR("Valid GART size for IGP are 32M,64M,128M,256M,512M,1G,2G\n");
DRM_ERROR("Forcing to 32M GART size\n");
rdev->mc.gtt_size = 32 * 1024 * 1024;
diff --git a/drivers/gpu/drm/radeon/rs780.c b/drivers/gpu/drm/radeon/rs780.c
deleted file mode 100644
index 0affcff..0000000
--- a/drivers/gpu/drm/radeon/rs780.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright 2008 Advanced Micro Devices, Inc.
- * Copyright 2008 Red Hat Inc.
- * Copyright 2009 Jerome Glisse.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Dave Airlie
- * Alex Deucher
- * Jerome Glisse
- */
-#include "drmP.h"
-#include "radeon_reg.h"
-#include "radeon.h"
-
-/* rs780 depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
-
-/* This files gather functions specifics to:
- * rs780
- *
- * Some of these functions might be used by newer ASICs.
- */
-int rs780_mc_wait_for_idle(struct radeon_device *rdev);
-void rs780_gpu_init(struct radeon_device *rdev);
-
-
-/*
- * MC
- */
-int rs780_mc_init(struct radeon_device *rdev)
-{
- rs780_gpu_init(rdev);
- /* FIXME: implement */
-
- rs600_mc_disable_clients(rdev);
- if (rs780_mc_wait_for_idle(rdev)) {
- printk(KERN_WARNING "Failed to wait MC idle while "
- "programming pipes. Bad things might happen.\n");
- }
- return 0;
-}
-
-void rs780_mc_fini(struct radeon_device *rdev)
-{
- /* FIXME: implement */
-}
-
-
-/*
- * Global GPU functions
- */
-void rs780_errata(struct radeon_device *rdev)
-{
- rdev->pll_errata = 0;
-}
-
-int rs780_mc_wait_for_idle(struct radeon_device *rdev)
-{
- /* FIXME: implement */
- return 0;
-}
-
-void rs780_gpu_init(struct radeon_device *rdev)
-{
- /* FIXME: implement */
-}
-
-
-/*
- * VRAM info
- */
-void rs780_vram_get_type(struct radeon_device *rdev)
-{
- /* FIXME: implement */
-}
-
-void rs780_vram_info(struct radeon_device *rdev)
-{
- rs780_vram_get_type(rdev);
-
- /* FIXME: implement */
- /* Could aper size report 0 ? */
- rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
- rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
-}
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 97965c4..99e397f 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -27,7 +27,7 @@
*/
#include <linux/seq_file.h>
#include "drmP.h"
-#include "rv515r.h"
+#include "rv515d.h"
#include "radeon.h"
#include "radeon_share.h"
diff --git a/drivers/gpu/drm/radeon/rv515r.h b/drivers/gpu/drm/radeon/rv515d.h
index f3cf840..a65e17e 100644
--- a/drivers/gpu/drm/radeon/rv515r.h
+++ b/drivers/gpu/drm/radeon/rv515d.h
@@ -25,10 +25,12 @@
* Alex Deucher
* Jerome Glisse
*/
-#ifndef RV515R_H
-#define RV515R_H
+#ifndef __RV515D_H__
+#define __RV515D_H__
-/* RV515 registers */
+/*
+ * RV515 registers
+ */
#define PCIE_INDEX 0x0030
#define PCIE_DATA 0x0034
#define MC_IND_INDEX 0x0070
@@ -166,5 +168,53 @@
#define MC_GLOBW_INIT_LAT_MASK 0xF0000000
+/*
+ * PM4 packet
+ */
+#define CP_PACKET0 0x00000000
+#define PACKET0_BASE_INDEX_SHIFT 0
+#define PACKET0_BASE_INDEX_MASK (0x1ffff << 0)
+#define PACKET0_COUNT_SHIFT 16
+#define PACKET0_COUNT_MASK (0x3fff << 16)
+#define CP_PACKET1 0x40000000
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+#define CP_PACKET3 0xC0000000
+#define PACKET3_IT_OPCODE_SHIFT 8
+#define PACKET3_IT_OPCODE_MASK (0xff << 8)
+#define PACKET3_COUNT_SHIFT 16
+#define PACKET3_COUNT_MASK (0x3fff << 16)
+/* PACKET3 op code */
+#define PACKET3_NOP 0x10
+#define PACKET3_3D_DRAW_VBUF 0x28
+#define PACKET3_3D_DRAW_IMMD 0x29
+#define PACKET3_3D_DRAW_INDX 0x2A
+#define PACKET3_3D_LOAD_VBPNTR 0x2F
+#define PACKET3_INDX_BUFFER 0x33
+#define PACKET3_3D_DRAW_VBUF_2 0x34
+#define PACKET3_3D_DRAW_IMMD_2 0x35
+#define PACKET3_3D_DRAW_INDX_2 0x36
+#define PACKET3_BITBLT_MULTI 0x9B
+
+#define PACKET0(reg, n) (CP_PACKET0 | \
+ REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) | \
+ REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n) (CP_PACKET3 | \
+ REG_SET(PACKET3_IT_OPCODE, (op)) | \
+ REG_SET(PACKET3_COUNT, (n)))
+
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
#endif
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 21d8ffd..57765f6 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -25,100 +25,975 @@
* Alex Deucher
* Jerome Glisse
*/
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
#include "drmP.h"
-#include "radeon_reg.h"
#include "radeon.h"
+#include "radeon_share.h"
+#include "rv770d.h"
+#include "avivod.h"
+#include "atom.h"
-/* rv770,rv730,rv710 depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
+#define R700_PFP_UCODE_SIZE 848
+#define R700_PM4_UCODE_SIZE 1360
-/* This files gather functions specifics to:
- * rv770,rv730,rv710
- *
- * Some of these functions might be used by newer ASICs.
- */
-int rv770_mc_wait_for_idle(struct radeon_device *rdev);
-void rv770_gpu_init(struct radeon_device *rdev);
+static void rv770_gpu_init(struct radeon_device *rdev);
+void rv770_fini(struct radeon_device *rdev);
/*
- * MC
+ * GART
*/
-int rv770_mc_init(struct radeon_device *rdev)
+int rv770_pcie_gart_enable(struct radeon_device *rdev)
{
- uint32_t tmp;
+ u32 tmp;
+ int r, i;
- rv770_gpu_init(rdev);
+ /* Initialize common gart structure */
+ r = radeon_gart_init(rdev);
+ if (r) {
+ return r;
+ }
+ rdev->gart.table_size = rdev->gart.num_gpu_pages * 8;
+ r = radeon_gart_table_vram_alloc(rdev);
+ if (r) {
+ return r;
+ }
+ for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+ r600_gart_clear_page(rdev, i);
+ /* Setup L2 cache */
+ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+ ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+ EFFECTIVE_L2_QUEUE_SIZE(7));
+ WREG32(VM_L2_CNTL2, 0);
+ WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+ /* Setup TLB control */
+ tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+ SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+ SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
+ EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+ WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+ WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+ WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+ WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+ WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+ WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+ WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+ WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+ WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+ WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+ WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+ WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+ (u32)(rdev->dummy_page.addr >> 12));
+ for (i = 1; i < 7; i++)
+ WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
- /* setup the gart before changing location so we can ask to
- * discard unmapped mc request
- */
- /* FIXME: disable out of gart access */
- tmp = rdev->mc.gtt_location / 4096;
- tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp);
- WREG32(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp);
- tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096;
- tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp);
- WREG32(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp);
-
- rs600_mc_disable_clients(rdev);
- if (rv770_mc_wait_for_idle(rdev)) {
- printk(KERN_WARNING "Failed to wait MC idle while "
- "programming pipes. Bad things might happen.\n");
- }
-
- tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
- tmp = REG_SET(R700_MC_FB_TOP, tmp >> 24);
- tmp |= REG_SET(R700_MC_FB_BASE, rdev->mc.vram_location >> 24);
- WREG32(R700_MC_VM_FB_LOCATION, tmp);
- tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
- tmp = REG_SET(R700_MC_AGP_TOP, tmp >> 22);
- WREG32(R700_MC_VM_AGP_TOP, tmp);
- tmp = REG_SET(R700_MC_AGP_BOT, rdev->mc.gtt_location >> 22);
- WREG32(R700_MC_VM_AGP_BOT, tmp);
+ r600_pcie_gart_tlb_flush(rdev);
+ rdev->gart.ready = true;
return 0;
}
-void rv770_mc_fini(struct radeon_device *rdev)
+void rv770_pcie_gart_disable(struct radeon_device *rdev)
{
- /* FIXME: implement */
+ u32 tmp;
+ int i;
+
+ /* Clear ptes*/
+ for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+ r600_gart_clear_page(rdev, i);
+ r600_pcie_gart_tlb_flush(rdev);
+ /* Disable all tables */
+ for (i = 0; i < 7; i++)
+ WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+ /* Setup L2 cache */
+ WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
+ EFFECTIVE_L2_QUEUE_SIZE(7));
+ WREG32(VM_L2_CNTL2, 0);
+ WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+ /* Setup TLB control */
+ tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+ WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+ WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+ WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+ WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+ WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+ WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+ WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
}
/*
- * Global GPU functions
+ * MC
*/
-void rv770_errata(struct radeon_device *rdev)
+static void rv770_mc_resume(struct radeon_device *rdev)
{
- rdev->pll_errata = 0;
+ u32 d1vga_control, d2vga_control;
+ u32 vga_render_control, vga_hdp_control;
+ u32 d1crtc_control, d2crtc_control;
+ u32 new_d1grph_primary, new_d1grph_secondary;
+ u32 new_d2grph_primary, new_d2grph_secondary;
+ u64 old_vram_start;
+ u32 tmp;
+ int i, j;
+
+ /* Initialize HDP */
+ for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+ WREG32((0x2c14 + j), 0x00000000);
+ WREG32((0x2c18 + j), 0x00000000);
+ WREG32((0x2c1c + j), 0x00000000);
+ WREG32((0x2c20 + j), 0x00000000);
+ WREG32((0x2c24 + j), 0x00000000);
+ }
+ WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+ d1vga_control = RREG32(D1VGA_CONTROL);
+ d2vga_control = RREG32(D2VGA_CONTROL);
+ vga_render_control = RREG32(VGA_RENDER_CONTROL);
+ vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+ d1crtc_control = RREG32(D1CRTC_CONTROL);
+ d2crtc_control = RREG32(D2CRTC_CONTROL);
+ old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
+ new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS);
+ new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS);
+ new_d1grph_primary += rdev->mc.vram_start - old_vram_start;
+ new_d1grph_secondary += rdev->mc.vram_start - old_vram_start;
+ new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS);
+ new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS);
+ new_d2grph_primary += rdev->mc.vram_start - old_vram_start;
+ new_d2grph_secondary += rdev->mc.vram_start - old_vram_start;
+
+ /* Stop all video */
+ WREG32(D1VGA_CONTROL, 0);
+ WREG32(D2VGA_CONTROL, 0);
+ WREG32(VGA_RENDER_CONTROL, 0);
+ WREG32(D1CRTC_UPDATE_LOCK, 1);
+ WREG32(D2CRTC_UPDATE_LOCK, 1);
+ WREG32(D1CRTC_CONTROL, 0);
+ WREG32(D2CRTC_CONTROL, 0);
+ WREG32(D1CRTC_UPDATE_LOCK, 0);
+ WREG32(D2CRTC_UPDATE_LOCK, 0);
+
+ mdelay(1);
+ if (r600_mc_wait_for_idle(rdev)) {
+ printk(KERN_WARNING "[drm] MC not idle !\n");
+ }
+
+ /* Lockout access through VGA aperture*/
+ WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+
+ /* Update configuration */
+ WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12);
+ WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12);
+ WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
+ tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16;
+ tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+ WREG32(MC_VM_FB_LOCATION, tmp);
+ WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+ WREG32(HDP_NONSURFACE_INFO, (2 << 7));
+ WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF);
+ if (rdev->flags & RADEON_IS_AGP) {
+ WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16);
+ WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
+ WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
+ } else {
+ WREG32(MC_VM_AGP_BASE, 0);
+ WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+ WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+ }
+ WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary);
+ WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary);
+ WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary);
+ WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary);
+ WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
+
+ /* Unlock host access */
+ WREG32(VGA_HDP_CONTROL, vga_hdp_control);
+
+ mdelay(1);
+ if (r600_mc_wait_for_idle(rdev)) {
+ printk(KERN_WARNING "[drm] MC not idle !\n");
+ }
+
+ /* Restore video state */
+ WREG32(D1CRTC_UPDATE_LOCK, 1);
+ WREG32(D2CRTC_UPDATE_LOCK, 1);
+ WREG32(D1CRTC_CONTROL, d1crtc_control);
+ WREG32(D2CRTC_CONTROL, d2crtc_control);
+ WREG32(D1CRTC_UPDATE_LOCK, 0);
+ WREG32(D2CRTC_UPDATE_LOCK, 0);
+ WREG32(D1VGA_CONTROL, d1vga_control);
+ WREG32(D2VGA_CONTROL, d2vga_control);
+ WREG32(VGA_RENDER_CONTROL, vga_render_control);
}
-int rv770_mc_wait_for_idle(struct radeon_device *rdev)
+
+/*
+ * CP.
+ */
+void r700_cp_stop(struct radeon_device *rdev)
{
- /* FIXME: implement */
- return 0;
+ WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
}
-void rv770_gpu_init(struct radeon_device *rdev)
+
+static int rv770_cp_load_microcode(struct radeon_device *rdev)
{
- /* FIXME: implement */
+ const __be32 *fw_data;
+ int i;
+
+ if (!rdev->me_fw || !rdev->pfp_fw)
+ return -EINVAL;
+
+ r700_cp_stop(rdev);
+ WREG32(CP_RB_CNTL, RB_NO_UPDATE | (15 << 8) | (3 << 0));
+
+ /* Reset cp */
+ WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+ RREG32(GRBM_SOFT_RESET);
+ mdelay(15);
+ WREG32(GRBM_SOFT_RESET, 0);
+
+ fw_data = (const __be32 *)rdev->pfp_fw->data;
+ WREG32(CP_PFP_UCODE_ADDR, 0);
+ for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
+ WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
+ WREG32(CP_PFP_UCODE_ADDR, 0);
+
+ fw_data = (const __be32 *)rdev->me_fw->data;
+ WREG32(CP_ME_RAM_WADDR, 0);
+ for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
+ WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
+
+ WREG32(CP_PFP_UCODE_ADDR, 0);
+ WREG32(CP_ME_RAM_WADDR, 0);
+ WREG32(CP_ME_RAM_RADDR, 0);
+ return 0;
}
/*
- * VRAM info
+ * Core functions
*/
-void rv770_vram_get_type(struct radeon_device *rdev)
+static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+ u32 num_backends,
+ u32 backend_disable_mask)
{
- /* FIXME: implement */
+ u32 backend_map = 0;
+ u32 enabled_backends_mask;
+ u32 enabled_backends_count;
+ u32 cur_pipe;
+ u32 swizzle_pipe[R7XX_MAX_PIPES];
+ u32 cur_backend;
+ u32 i;
+
+ if (num_tile_pipes > R7XX_MAX_PIPES)
+ num_tile_pipes = R7XX_MAX_PIPES;
+ if (num_tile_pipes < 1)
+ num_tile_pipes = 1;
+ if (num_backends > R7XX_MAX_BACKENDS)
+ num_backends = R7XX_MAX_BACKENDS;
+ if (num_backends < 1)
+ num_backends = 1;
+
+ enabled_backends_mask = 0;
+ enabled_backends_count = 0;
+ for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
+ if (((backend_disable_mask >> i) & 1) == 0) {
+ enabled_backends_mask |= (1 << i);
+ ++enabled_backends_count;
+ }
+ if (enabled_backends_count == num_backends)
+ break;
+ }
+
+ if (enabled_backends_count == 0) {
+ enabled_backends_mask = 1;
+ enabled_backends_count = 1;
+ }
+
+ if (enabled_backends_count != num_backends)
+ num_backends = enabled_backends_count;
+
+ memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
+ switch (num_tile_pipes) {
+ case 1:
+ swizzle_pipe[0] = 0;
+ break;
+ case 2:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 1;
+ break;
+ case 3:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 2;
+ swizzle_pipe[2] = 1;
+ break;
+ case 4:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 2;
+ swizzle_pipe[2] = 3;
+ swizzle_pipe[3] = 1;
+ break;
+ case 5:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 2;
+ swizzle_pipe[2] = 4;
+ swizzle_pipe[3] = 1;
+ swizzle_pipe[4] = 3;
+ break;
+ case 6:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 2;
+ swizzle_pipe[2] = 4;
+ swizzle_pipe[3] = 5;
+ swizzle_pipe[4] = 3;
+ swizzle_pipe[5] = 1;
+ break;
+ case 7:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 2;
+ swizzle_pipe[2] = 4;
+ swizzle_pipe[3] = 6;
+ swizzle_pipe[4] = 3;
+ swizzle_pipe[5] = 1;
+ swizzle_pipe[6] = 5;
+ break;
+ case 8:
+ swizzle_pipe[0] = 0;
+ swizzle_pipe[1] = 2;
+ swizzle_pipe[2] = 4;
+ swizzle_pipe[3] = 6;
+ swizzle_pipe[4] = 3;
+ swizzle_pipe[5] = 1;
+ swizzle_pipe[6] = 7;
+ swizzle_pipe[7] = 5;
+ break;
+ }
+
+ cur_backend = 0;
+ for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
+ while (((1 << cur_backend) & enabled_backends_mask) == 0)
+ cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
+
+ backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
+
+ cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
+ }
+
+ return backend_map;
}
-void rv770_vram_info(struct radeon_device *rdev)
+static void rv770_gpu_init(struct radeon_device *rdev)
{
- rv770_vram_get_type(rdev);
+ int i, j, num_qd_pipes;
+ u32 sx_debug_1;
+ u32 smx_dc_ctl0;
+ u32 num_gs_verts_per_thread;
+ u32 vgt_gs_per_es;
+ u32 gs_prim_buffer_depth = 0;
+ u32 sq_ms_fifo_sizes;
+ u32 sq_config;
+ u32 sq_thread_resource_mgmt;
+ u32 hdp_host_path_cntl;
+ u32 sq_dyn_gpr_size_simd_ab_0;
+ u32 backend_map;
+ u32 gb_tiling_config = 0;
+ u32 cc_rb_backend_disable = 0;
+ u32 cc_gc_shader_pipe_config = 0;
+ u32 mc_arb_ramcfg;
+ u32 db_debug4;
- /* FIXME: implement */
+ /* setup chip specs */
+ switch (rdev->family) {
+ case CHIP_RV770:
+ rdev->config.rv770.max_pipes = 4;
+ rdev->config.rv770.max_tile_pipes = 8;
+ rdev->config.rv770.max_simds = 10;
+ rdev->config.rv770.max_backends = 4;
+ rdev->config.rv770.max_gprs = 256;
+ rdev->config.rv770.max_threads = 248;
+ rdev->config.rv770.max_stack_entries = 512;
+ rdev->config.rv770.max_hw_contexts = 8;
+ rdev->config.rv770.max_gs_threads = 16 * 2;
+ rdev->config.rv770.sx_max_export_size = 128;
+ rdev->config.rv770.sx_max_export_pos_size = 16;
+ rdev->config.rv770.sx_max_export_smx_size = 112;
+ rdev->config.rv770.sq_num_cf_insts = 2;
+
+ rdev->config.rv770.sx_num_of_sets = 7;
+ rdev->config.rv770.sc_prim_fifo_size = 0xF9;
+ rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+ rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+ break;
+ case CHIP_RV730:
+ rdev->config.rv770.max_pipes = 2;
+ rdev->config.rv770.max_tile_pipes = 4;
+ rdev->config.rv770.max_simds = 8;
+ rdev->config.rv770.max_backends = 2;
+ rdev->config.rv770.max_gprs = 128;
+ rdev->config.rv770.max_threads = 248;
+ rdev->config.rv770.max_stack_entries = 256;
+ rdev->config.rv770.max_hw_contexts = 8;
+ rdev->config.rv770.max_gs_threads = 16 * 2;
+ rdev->config.rv770.sx_max_export_size = 256;
+ rdev->config.rv770.sx_max_export_pos_size = 32;
+ rdev->config.rv770.sx_max_export_smx_size = 224;
+ rdev->config.rv770.sq_num_cf_insts = 2;
+
+ rdev->config.rv770.sx_num_of_sets = 7;
+ rdev->config.rv770.sc_prim_fifo_size = 0xf9;
+ rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+ rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+ if (rdev->config.rv770.sx_max_export_pos_size > 16) {
+ rdev->config.rv770.sx_max_export_pos_size -= 16;
+ rdev->config.rv770.sx_max_export_smx_size += 16;
+ }
+ break;
+ case CHIP_RV710:
+ rdev->config.rv770.max_pipes = 2;
+ rdev->config.rv770.max_tile_pipes = 2;
+ rdev->config.rv770.max_simds = 2;
+ rdev->config.rv770.max_backends = 1;
+ rdev->config.rv770.max_gprs = 256;
+ rdev->config.rv770.max_threads = 192;
+ rdev->config.rv770.max_stack_entries = 256;
+ rdev->config.rv770.max_hw_contexts = 4;
+ rdev->config.rv770.max_gs_threads = 8 * 2;
+ rdev->config.rv770.sx_max_export_size = 128;
+ rdev->config.rv770.sx_max_export_pos_size = 16;
+ rdev->config.rv770.sx_max_export_smx_size = 112;
+ rdev->config.rv770.sq_num_cf_insts = 1;
+
+ rdev->config.rv770.sx_num_of_sets = 7;
+ rdev->config.rv770.sc_prim_fifo_size = 0x40;
+ rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+ rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+ break;
+ case CHIP_RV740:
+ rdev->config.rv770.max_pipes = 4;
+ rdev->config.rv770.max_tile_pipes = 4;
+ rdev->config.rv770.max_simds = 8;
+ rdev->config.rv770.max_backends = 4;
+ rdev->config.rv770.max_gprs = 256;
+ rdev->config.rv770.max_threads = 248;
+ rdev->config.rv770.max_stack_entries = 512;
+ rdev->config.rv770.max_hw_contexts = 8;
+ rdev->config.rv770.max_gs_threads = 16 * 2;
+ rdev->config.rv770.sx_max_export_size = 256;
+ rdev->config.rv770.sx_max_export_pos_size = 32;
+ rdev->config.rv770.sx_max_export_smx_size = 224;
+ rdev->config.rv770.sq_num_cf_insts = 2;
+
+ rdev->config.rv770.sx_num_of_sets = 7;
+ rdev->config.rv770.sc_prim_fifo_size = 0x100;
+ rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+ rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+
+ if (rdev->config.rv770.sx_max_export_pos_size > 16) {
+ rdev->config.rv770.sx_max_export_pos_size -= 16;
+ rdev->config.rv770.sx_max_export_smx_size += 16;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Initialize HDP */
+ j = 0;
+ for (i = 0; i < 32; i++) {
+ WREG32((0x2c14 + j), 0x00000000);
+ WREG32((0x2c18 + j), 0x00000000);
+ WREG32((0x2c1c + j), 0x00000000);
+ WREG32((0x2c20 + j), 0x00000000);
+ WREG32((0x2c24 + j), 0x00000000);
+ j += 0x18;
+ }
+
+ WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
+
+ /* setup tiling, simd, pipe config */
+ mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
+
+ switch (rdev->config.rv770.max_tile_pipes) {
+ case 1:
+ gb_tiling_config |= PIPE_TILING(0);
+ break;
+ case 2:
+ gb_tiling_config |= PIPE_TILING(1);
+ break;
+ case 4:
+ gb_tiling_config |= PIPE_TILING(2);
+ break;
+ case 8:
+ gb_tiling_config |= PIPE_TILING(3);
+ break;
+ default:
+ break;
+ }
+
+ if (rdev->family == CHIP_RV770)
+ gb_tiling_config |= BANK_TILING(1);
+ else
+ gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_SHIFT) >> NOOFBANK_MASK);
+
+ gb_tiling_config |= GROUP_SIZE(0);
+
+ if (((mc_arb_ramcfg & NOOFROWS_MASK) & NOOFROWS_SHIFT) > 3) {
+ gb_tiling_config |= ROW_TILING(3);
+ gb_tiling_config |= SAMPLE_SPLIT(3);
+ } else {
+ gb_tiling_config |=
+ ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
+ gb_tiling_config |=
+ SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
+ }
+
+ gb_tiling_config |= BANK_SWAPS(1);
+
+ backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes,
+ rdev->config.rv770.max_backends,
+ (0xff << rdev->config.rv770.max_backends) & 0xff);
+ gb_tiling_config |= BACKEND_MAP(backend_map);
+
+ cc_gc_shader_pipe_config =
+ INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK);
+ cc_gc_shader_pipe_config |=
+ INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK);
+
+ cc_rb_backend_disable =
+ BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK);
+
+ WREG32(GB_TILING_CONFIG, gb_tiling_config);
+ WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
+ WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
+
+ WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
+ WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
+ WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
+
+ WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
+ WREG32(CGTS_SYS_TCC_DISABLE, 0);
+ WREG32(CGTS_TCC_DISABLE, 0);
+ WREG32(CGTS_USER_SYS_TCC_DISABLE, 0);
+ WREG32(CGTS_USER_TCC_DISABLE, 0);
+
+ num_qd_pipes =
+ R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK);
+ WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK);
+ WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK);
+
+ /* set HW defaults for 3D engine */
+ WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
+ ROQ_IB2_START(0x2b)));
+
+ WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30));
+
+ WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO |
+ SYNC_GRADIENT |
+ SYNC_WALKER |
+ SYNC_ALIGNER));
+
+ sx_debug_1 = RREG32(SX_DEBUG_1);
+ sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
+ WREG32(SX_DEBUG_1, sx_debug_1);
+
+ smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
+ smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff);
+ smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1);
+ WREG32(SMX_DC_CTL0, smx_dc_ctl0);
+
+ WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) |
+ GS_FLUSH_CTL(4) |
+ ACK_FLUSH_CTL(3) |
+ SYNC_FLUSH_CTL));
+
+ if (rdev->family == CHIP_RV770)
+ WREG32(DB_DEBUG3, DB_CLK_OFF_DELAY(0x1f));
+ else {
+ db_debug4 = RREG32(DB_DEBUG4);
+ db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER;
+ WREG32(DB_DEBUG4, db_debug4);
+ }
+
+ WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) |
+ POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) |
+ SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1)));
+
+ WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) |
+ SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) |
+ SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize)));
+
+ WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+
+ WREG32(VGT_NUM_INSTANCES, 1);
+
+ WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
+
+ WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
+
+ WREG32(CP_PERFMON_CNTL, 0);
+
+ sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) |
+ DONE_FIFO_HIWATER(0xe0) |
+ ALU_UPDATE_FIFO_HIWATER(0x8));
+ switch (rdev->family) {
+ case CHIP_RV770:
+ sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1);
+ break;
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ default:
+ sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4);
+ break;
+ }
+ WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
+
+ /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
+ * should be adjusted as needed by the 2D/3D drivers. This just sets default values
+ */
+ sq_config = RREG32(SQ_CONFIG);
+ sq_config &= ~(PS_PRIO(3) |
+ VS_PRIO(3) |
+ GS_PRIO(3) |
+ ES_PRIO(3));
+ sq_config |= (DX9_CONSTS |
+ VC_ENABLE |
+ EXPORT_SRC_C |
+ PS_PRIO(0) |
+ VS_PRIO(1) |
+ GS_PRIO(2) |
+ ES_PRIO(3));
+ if (rdev->family == CHIP_RV710)
+ /* no vertex cache */
+ sq_config &= ~VC_ENABLE;
+
+ WREG32(SQ_CONFIG, sq_config);
+
+ WREG32(SQ_GPR_RESOURCE_MGMT_1, (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
+ NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
+ NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2)));
+
+ WREG32(SQ_GPR_RESOURCE_MGMT_2, (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) |
+ NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64)));
+
+ sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) |
+ NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) |
+ NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8));
+ if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads)
+ sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads);
+ else
+ sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8);
+ WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
+
+ WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
+ NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
+
+ WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
+ NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
+
+ sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) |
+ SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) |
+ SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) |
+ SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64));
+
+ WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
+ WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
+ WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
+ WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
+ WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
+ WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
+ WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
+ WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
+
+ WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
+ FORCE_EOV_MAX_REZ_CNT(255)));
+
+ if (rdev->family == CHIP_RV710)
+ WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) |
+ AUTO_INVLD_EN(ES_AND_GS_AUTO)));
+ else
+ WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) |
+ AUTO_INVLD_EN(ES_AND_GS_AUTO)));
+
+ switch (rdev->family) {
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV740:
+ gs_prim_buffer_depth = 384;
+ break;
+ case CHIP_RV710:
+ gs_prim_buffer_depth = 128;
+ break;
+ default:
+ break;
+ }
+
+ num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16;
+ vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
+ /* Max value for this is 256 */
+ if (vgt_gs_per_es > 256)
+ vgt_gs_per_es = 256;
+
+ WREG32(VGT_ES_PER_GS, 128);
+ WREG32(VGT_GS_PER_ES, vgt_gs_per_es);
+ WREG32(VGT_GS_PER_VS, 2);
+
+ /* more default values. 2D/3D driver should adjust as needed */
+ WREG32(VGT_GS_VERTEX_REUSE, 16);
+ WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
+ WREG32(VGT_STRMOUT_EN, 0);
+ WREG32(SX_MISC, 0);
+ WREG32(PA_SC_MODE_CNTL, 0);
+ WREG32(PA_SC_EDGERULE, 0xaaaaaaaa);
+ WREG32(PA_SC_AA_CONFIG, 0);
+ WREG32(PA_SC_CLIPRECT_RULE, 0xffff);
+ WREG32(PA_SC_LINE_STIPPLE, 0);
+ WREG32(SPI_INPUT_Z, 0);
+ WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
+ WREG32(CB_COLOR7_FRAG, 0);
+
+ /* clear render buffer base addresses */
+ WREG32(CB_COLOR0_BASE, 0);
+ WREG32(CB_COLOR1_BASE, 0);
+ WREG32(CB_COLOR2_BASE, 0);
+ WREG32(CB_COLOR3_BASE, 0);
+ WREG32(CB_COLOR4_BASE, 0);
+ WREG32(CB_COLOR5_BASE, 0);
+ WREG32(CB_COLOR6_BASE, 0);
+ WREG32(CB_COLOR7_BASE, 0);
+
+ WREG32(TCP_CNTL, 0);
+
+ hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
+ WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
+
+ WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+
+ WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
+ NUM_CLIP_SEQ(3)));
+
+}
+
+int rv770_mc_init(struct radeon_device *rdev)
+{
+ fixed20_12 a;
+ u32 tmp;
+ int r;
+
+ /* Get VRAM informations */
+ /* FIXME: Don't know how to determine vram width, need to check
+ * vram_width usage
+ */
+ rdev->mc.vram_width = 128;
+ rdev->mc.vram_is_ddr = true;
/* Could aper size report 0 ? */
rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+ /* Setup GPU memory space */
+ rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
+ rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
+ if (rdev->flags & RADEON_IS_AGP) {
+ r = radeon_agp_init(rdev);
+ if (r)
+ return r;
+ /* gtt_size is setup by radeon_agp_init */
+ rdev->mc.gtt_location = rdev->mc.agp_base;
+ tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
+ /* Try to put vram before or after AGP because we
+ * we want SYSTEM_APERTURE to cover both VRAM and
+ * AGP so that GPU can catch out of VRAM/AGP access
+ */
+ if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
+ /* Enought place before */
+ rdev->mc.vram_location = rdev->mc.gtt_location -
+ rdev->mc.mc_vram_size;
+ } else if (tmp > rdev->mc.mc_vram_size) {
+ /* Enought place after */
+ rdev->mc.vram_location = rdev->mc.gtt_location +
+ rdev->mc.gtt_size;
+ } else {
+ /* Try to setup VRAM then AGP might not
+ * not work on some card
+ */
+ rdev->mc.vram_location = 0x00000000UL;
+ rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+ }
+ } else {
+ rdev->mc.vram_location = 0x00000000UL;
+ rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+ rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+ }
+ rdev->mc.vram_start = rdev->mc.vram_location;
+ rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+ rdev->mc.gtt_start = rdev->mc.gtt_location;
+ rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size;
+ /* FIXME: we should enforce default clock in case GPU is not in
+ * default setup
+ */
+ a.full = rfixed_const(100);
+ rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+ rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+ return 0;
+}
+int rv770_gpu_reset(struct radeon_device *rdev)
+{
+ /* FIXME: implement */
+ return 0;
+}
+
+int rv770_resume(struct radeon_device *rdev)
+{
+ int r;
+
+ rv770_mc_resume(rdev);
+ r = rv770_pcie_gart_enable(rdev);
+ if (r)
+ return r;
+ rv770_gpu_init(rdev);
+ r = radeon_ring_init(rdev, rdev->cp.ring_size);
+ if (r)
+ return r;
+ r = rv770_cp_load_microcode(rdev);
+ if (r)
+ return r;
+ r = r600_cp_resume(rdev);
+ if (r)
+ return r;
+ r = r600_wb_init(rdev);
+ if (r)
+ return r;
+ return 0;
+}
+
+int rv770_suspend(struct radeon_device *rdev)
+{
+ /* FIXME: we should wait for ring to be empty */
+ r700_cp_stop(rdev);
+ return 0;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int rv770_init(struct radeon_device *rdev)
+{
+ int r;
+
+ rdev->new_init_path = true;
+ r = radeon_dummy_page_init(rdev);
+ if (r)
+ return r;
+ /* This don't do much */
+ r = radeon_gem_init(rdev);
+ if (r)
+ return r;
+ /* Read BIOS */
+ if (!radeon_get_bios(rdev)) {
+ if (ASIC_IS_AVIVO(rdev))
+ return -EINVAL;
+ }
+ /* Must be an ATOMBIOS */
+ if (!rdev->is_atom_bios)
+ return -EINVAL;
+ r = radeon_atombios_init(rdev);
+ if (r)
+ return r;
+ /* Post card if necessary */
+ if (!r600_card_posted(rdev) && rdev->bios) {
+ DRM_INFO("GPU not posted. posting now...\n");
+ atom_asic_init(rdev->mode_info.atom_context);
+ }
+ /* Initialize scratch registers */
+ r600_scratch_init(rdev);
+ /* Initialize surface registers */
+ radeon_surface_init(rdev);
+ r = radeon_clocks_init(rdev);
+ if (r)
+ return r;
+ /* Fence driver */
+ r = radeon_fence_driver_init(rdev);
+ if (r)
+ return r;
+ r = rv770_mc_init(rdev);
+ if (r) {
+ if (rdev->flags & RADEON_IS_AGP) {
+ /* Retry with disabling AGP */
+ rv770_fini(rdev);
+ rdev->flags &= ~RADEON_IS_AGP;
+ return rv770_init(rdev);
+ }
+ return r;
+ }
+ /* Memory manager */
+ r = radeon_object_init(rdev);
+ if (r)
+ return r;
+ rdev->cp.ring_obj = NULL;
+ r600_ring_init(rdev, 1024 * 1024);
+
+ if (!rdev->me_fw || !rdev->pfp_fw) {
+ r = r600_cp_init_microcode(rdev);
+ if (r) {
+ DRM_ERROR("Failed to load firmware!\n");
+ return r;
+ }
+ }
+
+ r = rv770_resume(rdev);
+ if (r) {
+ if (rdev->flags & RADEON_IS_AGP) {
+ /* Retry with disabling AGP */
+ rv770_fini(rdev);
+ rdev->flags &= ~RADEON_IS_AGP;
+ return rv770_init(rdev);
+ }
+ return r;
+ }
+ r = r600_blit_init(rdev);
+ if (r) {
+ DRM_ERROR("radeon: failled blitter (%d).\n", r);
+ return r;
+ }
+ r = radeon_ib_pool_init(rdev);
+ if (r) {
+ DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+ return r;
+ }
+ r = radeon_ib_test(rdev);
+ if (r) {
+ DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+ return r;
+ }
+ return 0;
+}
+
+void rv770_fini(struct radeon_device *rdev)
+{
+ r600_blit_fini(rdev);
+ radeon_ring_fini(rdev);
+ rv770_pcie_gart_disable(rdev);
+ radeon_gart_table_vram_free(rdev);
+ radeon_gart_fini(rdev);
+ radeon_gem_fini(rdev);
+ radeon_fence_driver_fini(rdev);
+ radeon_clocks_fini(rdev);
+#if __OS_HAS_AGP
+ if (rdev->flags & RADEON_IS_AGP)
+ radeon_agp_fini(rdev);
+#endif
+ radeon_object_fini(rdev);
+ if (rdev->is_atom_bios) {
+ radeon_atombios_fini(rdev);
+ } else {
+ radeon_combios_fini(rdev);
+ }
+ kfree(rdev->bios);
+ rdev->bios = NULL;
+ radeon_dummy_page_fini(rdev);
}
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
new file mode 100644
index 0000000..4b9c3d6
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+#ifndef RV770_H
+#define RV770_H
+
+#define R7XX_MAX_SH_GPRS 256
+#define R7XX_MAX_TEMP_GPRS 16
+#define R7XX_MAX_SH_THREADS 256
+#define R7XX_MAX_SH_STACK_ENTRIES 4096
+#define R7XX_MAX_BACKENDS 8
+#define R7XX_MAX_BACKENDS_MASK 0xff
+#define R7XX_MAX_SIMDS 16
+#define R7XX_MAX_SIMDS_MASK 0xffff
+#define R7XX_MAX_PIPES 8
+#define R7XX_MAX_PIPES_MASK 0xff
+
+/* Registers */
+#define CB_COLOR0_BASE 0x28040
+#define CB_COLOR1_BASE 0x28044
+#define CB_COLOR2_BASE 0x28048
+#define CB_COLOR3_BASE 0x2804C
+#define CB_COLOR4_BASE 0x28050
+#define CB_COLOR5_BASE 0x28054
+#define CB_COLOR6_BASE 0x28058
+#define CB_COLOR7_BASE 0x2805C
+#define CB_COLOR7_FRAG 0x280FC
+
+#define CC_GC_SHADER_PIPE_CONFIG 0x8950
+#define CC_RB_BACKEND_DISABLE 0x98F4
+#define BACKEND_DISABLE(x) ((x) << 16)
+#define CC_SYS_RB_BACKEND_DISABLE 0x3F88
+
+#define CGTS_SYS_TCC_DISABLE 0x3F90
+#define CGTS_TCC_DISABLE 0x9148
+#define CGTS_USER_SYS_TCC_DISABLE 0x3F94
+#define CGTS_USER_TCC_DISABLE 0x914C
+
+#define CONFIG_MEMSIZE 0x5428
+
+#define CP_ME_CNTL 0x86D8
+#define CP_ME_HALT (1<<28)
+#define CP_PFP_HALT (1<<26)
+#define CP_ME_RAM_DATA 0xC160
+#define CP_ME_RAM_RADDR 0xC158
+#define CP_ME_RAM_WADDR 0xC15C
+#define CP_MEQ_THRESHOLDS 0x8764
+#define STQ_SPLIT(x) ((x) << 0)
+#define CP_PERFMON_CNTL 0x87FC
+#define CP_PFP_UCODE_ADDR 0xC150
+#define CP_PFP_UCODE_DATA 0xC154
+#define CP_QUEUE_THRESHOLDS 0x8760
+#define ROQ_IB1_START(x) ((x) << 0)
+#define ROQ_IB2_START(x) ((x) << 8)
+#define CP_RB_CNTL 0xC104
+#define RB_BUFSZ(x) ((x)<<0)
+#define RB_BLKSZ(x) ((x)<<8)
+#define RB_NO_UPDATE (1<<27)
+#define RB_RPTR_WR_ENA (1<<31)
+#define BUF_SWAP_32BIT (2 << 16)
+#define CP_RB_RPTR 0x8700
+#define CP_RB_RPTR_ADDR 0xC10C
+#define CP_RB_RPTR_ADDR_HI 0xC110
+#define CP_RB_RPTR_WR 0xC108
+#define CP_RB_WPTR 0xC114
+#define CP_RB_WPTR_ADDR 0xC118
+#define CP_RB_WPTR_ADDR_HI 0xC11C
+#define CP_RB_WPTR_DELAY 0x8704
+#define CP_SEM_WAIT_TIMER 0x85BC
+
+#define DB_DEBUG3 0x98B0
+#define DB_CLK_OFF_DELAY(x) ((x) << 11)
+#define DB_DEBUG4 0x9B8C
+#define DISABLE_TILE_COVERED_FOR_PS_ITER (1 << 6)
+
+#define DCP_TILING_CONFIG 0x6CA0
+#define PIPE_TILING(x) ((x) << 1)
+#define BANK_TILING(x) ((x) << 4)
+#define GROUP_SIZE(x) ((x) << 6)
+#define ROW_TILING(x) ((x) << 8)
+#define BANK_SWAPS(x) ((x) << 11)
+#define SAMPLE_SPLIT(x) ((x) << 14)
+#define BACKEND_MAP(x) ((x) << 16)
+
+#define GB_TILING_CONFIG 0x98F0
+
+#define GC_USER_SHADER_PIPE_CONFIG 0x8954
+#define INACTIVE_QD_PIPES(x) ((x) << 8)
+#define INACTIVE_QD_PIPES_MASK 0x0000FF00
+#define INACTIVE_SIMDS(x) ((x) << 16)
+#define INACTIVE_SIMDS_MASK 0x00FF0000
+
+#define GRBM_CNTL 0x8000
+#define GRBM_READ_TIMEOUT(x) ((x) << 0)
+#define GRBM_SOFT_RESET 0x8020
+#define SOFT_RESET_CP (1<<0)
+#define GRBM_STATUS 0x8010
+#define CMDFIFO_AVAIL_MASK 0x0000000F
+#define GUI_ACTIVE (1<<31)
+#define GRBM_STATUS2 0x8014
+
+#define HDP_HOST_PATH_CNTL 0x2C00
+#define HDP_NONSURFACE_BASE 0x2C04
+#define HDP_NONSURFACE_INFO 0x2C08
+#define HDP_NONSURFACE_SIZE 0x2C0C
+#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0
+#define HDP_TILING_CONFIG 0x2F3C
+
+#define MC_ARB_RAMCFG 0x2760
+#define NOOFBANK_SHIFT 0
+#define NOOFBANK_MASK 0x00000003
+#define NOOFRANK_SHIFT 2
+#define NOOFRANK_MASK 0x00000004
+#define NOOFROWS_SHIFT 3
+#define NOOFROWS_MASK 0x00000038
+#define NOOFCOLS_SHIFT 6
+#define NOOFCOLS_MASK 0x000000C0
+#define CHANSIZE_SHIFT 8
+#define CHANSIZE_MASK 0x00000100
+#define BURSTLENGTH_SHIFT 9
+#define BURSTLENGTH_MASK 0x00000200
+#define MC_VM_AGP_TOP 0x2028
+#define MC_VM_AGP_BOT 0x202C
+#define MC_VM_AGP_BASE 0x2030
+#define MC_VM_FB_LOCATION 0x2024
+#define MC_VM_MB_L1_TLB0_CNTL 0x2234
+#define MC_VM_MB_L1_TLB1_CNTL 0x2238
+#define MC_VM_MB_L1_TLB2_CNTL 0x223C
+#define MC_VM_MB_L1_TLB3_CNTL 0x2240
+#define ENABLE_L1_TLB (1 << 0)
+#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1)
+#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 3)
+#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 3)
+#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 3)
+#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 3)
+#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5)
+#define EFFECTIVE_L1_TLB_SIZE(x) ((x)<<15)
+#define EFFECTIVE_L1_QUEUE_SIZE(x) ((x)<<18)
+#define MC_VM_MD_L1_TLB0_CNTL 0x2654
+#define MC_VM_MD_L1_TLB1_CNTL 0x2658
+#define MC_VM_MD_L1_TLB2_CNTL 0x265C
+#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x203C
+#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2038
+#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2034
+
+#define PA_CL_ENHANCE 0x8A14
+#define CLIP_VTX_REORDER_ENA (1 << 0)
+#define NUM_CLIP_SEQ(x) ((x) << 1)
+#define PA_SC_AA_CONFIG 0x28C04
+#define PA_SC_CLIPRECT_RULE 0x2820C
+#define PA_SC_EDGERULE 0x28230
+#define PA_SC_FIFO_SIZE 0x8BCC
+#define SC_PRIM_FIFO_SIZE(x) ((x) << 0)
+#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 12)
+#define PA_SC_FORCE_EOV_MAX_CNTS 0x8B24
+#define FORCE_EOV_MAX_CLK_CNT(x) ((x)<<0)
+#define FORCE_EOV_MAX_REZ_CNT(x) ((x)<<16)
+#define PA_SC_LINE_STIPPLE 0x28A0C
+#define PA_SC_LINE_STIPPLE_STATE 0x8B10
+#define PA_SC_MODE_CNTL 0x28A4C
+#define PA_SC_MULTI_CHIP_CNTL 0x8B20
+#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 20)
+
+#define SCRATCH_REG0 0x8500
+#define SCRATCH_REG1 0x8504
+#define SCRATCH_REG2 0x8508
+#define SCRATCH_REG3 0x850C
+#define SCRATCH_REG4 0x8510
+#define SCRATCH_REG5 0x8514
+#define SCRATCH_REG6 0x8518
+#define SCRATCH_REG7 0x851C
+#define SCRATCH_UMSK 0x8540
+#define SCRATCH_ADDR 0x8544
+
+#define SMX_DC_CTL0 0xA020
+#define USE_HASH_FUNCTION (1 << 0)
+#define CACHE_DEPTH(x) ((x) << 1)
+#define FLUSH_ALL_ON_EVENT (1 << 10)
+#define STALL_ON_EVENT (1 << 11)
+#define SMX_EVENT_CTL 0xA02C
+#define ES_FLUSH_CTL(x) ((x) << 0)
+#define GS_FLUSH_CTL(x) ((x) << 3)
+#define ACK_FLUSH_CTL(x) ((x) << 6)
+#define SYNC_FLUSH_CTL (1 << 8)
+
+#define SPI_CONFIG_CNTL 0x9100
+#define GPR_WRITE_PRIORITY(x) ((x) << 0)
+#define DISABLE_INTERP_1 (1 << 5)
+#define SPI_CONFIG_CNTL_1 0x913C
+#define VTX_DONE_DELAY(x) ((x) << 0)
+#define INTERP_ONE_PRIM_PER_ROW (1 << 4)
+#define SPI_INPUT_Z 0x286D8
+#define SPI_PS_IN_CONTROL_0 0x286CC
+#define NUM_INTERP(x) ((x)<<0)
+#define POSITION_ENA (1<<8)
+#define POSITION_CENTROID (1<<9)
+#define POSITION_ADDR(x) ((x)<<10)
+#define PARAM_GEN(x) ((x)<<15)
+#define PARAM_GEN_ADDR(x) ((x)<<19)
+#define BARYC_SAMPLE_CNTL(x) ((x)<<26)
+#define PERSP_GRADIENT_ENA (1<<28)
+#define LINEAR_GRADIENT_ENA (1<<29)
+#define POSITION_SAMPLE (1<<30)
+#define BARYC_AT_SAMPLE_ENA (1<<31)
+
+#define SQ_CONFIG 0x8C00
+#define VC_ENABLE (1 << 0)
+#define EXPORT_SRC_C (1 << 1)
+#define DX9_CONSTS (1 << 2)
+#define ALU_INST_PREFER_VECTOR (1 << 3)
+#define DX10_CLAMP (1 << 4)
+#define CLAUSE_SEQ_PRIO(x) ((x) << 8)
+#define PS_PRIO(x) ((x) << 24)
+#define VS_PRIO(x) ((x) << 26)
+#define GS_PRIO(x) ((x) << 28)
+#define SQ_DYN_GPR_SIZE_SIMD_AB_0 0x8DB0
+#define SIMDA_RING0(x) ((x)<<0)
+#define SIMDA_RING1(x) ((x)<<8)
+#define SIMDB_RING0(x) ((x)<<16)
+#define SIMDB_RING1(x) ((x)<<24)
+#define SQ_DYN_GPR_SIZE_SIMD_AB_1 0x8DB4
+#define SQ_DYN_GPR_SIZE_SIMD_AB_2 0x8DB8
+#define SQ_DYN_GPR_SIZE_SIMD_AB_3 0x8DBC
+#define SQ_DYN_GPR_SIZE_SIMD_AB_4 0x8DC0
+#define SQ_DYN_GPR_SIZE_SIMD_AB_5 0x8DC4
+#define SQ_DYN_GPR_SIZE_SIMD_AB_6 0x8DC8
+#define SQ_DYN_GPR_SIZE_SIMD_AB_7 0x8DCC
+#define ES_PRIO(x) ((x) << 30)
+#define SQ_GPR_RESOURCE_MGMT_1 0x8C04
+#define NUM_PS_GPRS(x) ((x) << 0)
+#define NUM_VS_GPRS(x) ((x) << 16)
+#define DYN_GPR_ENABLE (1 << 27)
+#define NUM_CLAUSE_TEMP_GPRS(x) ((x) << 28)
+#define SQ_GPR_RESOURCE_MGMT_2 0x8C08
+#define NUM_GS_GPRS(x) ((x) << 0)
+#define NUM_ES_GPRS(x) ((x) << 16)
+#define SQ_MS_FIFO_SIZES 0x8CF0
+#define CACHE_FIFO_SIZE(x) ((x) << 0)
+#define FETCH_FIFO_HIWATER(x) ((x) << 8)
+#define DONE_FIFO_HIWATER(x) ((x) << 16)
+#define ALU_UPDATE_FIFO_HIWATER(x) ((x) << 24)
+#define SQ_STACK_RESOURCE_MGMT_1 0x8C10
+#define NUM_PS_STACK_ENTRIES(x) ((x) << 0)
+#define NUM_VS_STACK_ENTRIES(x) ((x) << 16)
+#define SQ_STACK_RESOURCE_MGMT_2 0x8C14
+#define NUM_GS_STACK_ENTRIES(x) ((x) << 0)
+#define NUM_ES_STACK_ENTRIES(x) ((x) << 16)
+#define SQ_THREAD_RESOURCE_MGMT 0x8C0C
+#define NUM_PS_THREADS(x) ((x) << 0)
+#define NUM_VS_THREADS(x) ((x) << 8)
+#define NUM_GS_THREADS(x) ((x) << 16)
+#define NUM_ES_THREADS(x) ((x) << 24)
+
+#define SX_DEBUG_1 0x9058
+#define ENABLE_NEW_SMX_ADDRESS (1 << 16)
+#define SX_EXPORT_BUFFER_SIZES 0x900C
+#define COLOR_BUFFER_SIZE(x) ((x) << 0)
+#define POSITION_BUFFER_SIZE(x) ((x) << 8)
+#define SMX_BUFFER_SIZE(x) ((x) << 16)
+#define SX_MISC 0x28350
+
+#define TA_CNTL_AUX 0x9508
+#define DISABLE_CUBE_WRAP (1 << 0)
+#define DISABLE_CUBE_ANISO (1 << 1)
+#define SYNC_GRADIENT (1 << 24)
+#define SYNC_WALKER (1 << 25)
+#define SYNC_ALIGNER (1 << 26)
+#define BILINEAR_PRECISION_6_BIT (0 << 31)
+#define BILINEAR_PRECISION_8_BIT (1 << 31)
+
+#define TCP_CNTL 0x9610
+
+#define VGT_CACHE_INVALIDATION 0x88C4
+#define CACHE_INVALIDATION(x) ((x)<<0)
+#define VC_ONLY 0
+#define TC_ONLY 1
+#define VC_AND_TC 2
+#define AUTO_INVLD_EN(x) ((x) << 6)
+#define NO_AUTO 0
+#define ES_AUTO 1
+#define GS_AUTO 2
+#define ES_AND_GS_AUTO 3
+#define VGT_ES_PER_GS 0x88CC
+#define VGT_GS_PER_ES 0x88C8
+#define VGT_GS_PER_VS 0x88E8
+#define VGT_GS_VERTEX_REUSE 0x88D4
+#define VGT_NUM_INSTANCES 0x8974
+#define VGT_OUT_DEALLOC_CNTL 0x28C5C
+#define DEALLOC_DIST_MASK 0x0000007F
+#define VGT_STRMOUT_EN 0x28AB0
+#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58
+#define VTX_REUSE_DEPTH_MASK 0x000000FF
+
+#define VM_CONTEXT0_CNTL 0x1410
+#define ENABLE_CONTEXT (1 << 0)
+#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
+#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
+#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C
+#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C
+#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155C
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x1518
+#define VM_L2_CNTL 0x1400
+#define ENABLE_L2_CACHE (1 << 0)
+#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1)
+#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9)
+#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 14)
+#define VM_L2_CNTL2 0x1404
+#define INVALIDATE_ALL_L1_TLBS (1 << 0)
+#define INVALIDATE_L2_CACHE (1 << 1)
+#define VM_L2_CNTL3 0x1408
+#define BANK_SELECT(x) ((x) << 0)
+#define CACHE_UPDATE_MODE(x) ((x) << 6)
+#define VM_L2_STATUS 0x140C
+#define L2_BUSY (1 << 0)
+
+#define WAIT_UNTIL 0x8040
+
+#endif
OpenPOWER on IntegriCloud