summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/nouveau/core/engine/graph/fuc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-03 23:29:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-03 23:29:23 -0700
commit612a9aab56a93533e76e3ad91642db7033e03b69 (patch)
tree8402096973f67af941f9392f7da06cca03e0b58a /drivers/gpu/drm/nouveau/core/engine/graph/fuc
parent3a494318b14b1bc0f59d2d6ce84c505c74d82d2a (diff)
parent268d28371cd326be4dfcd7eba5917bf4b9d30c8f (diff)
downloadop-kernel-dev-612a9aab56a93533e76e3ad91642db7033e03b69.zip
op-kernel-dev-612a9aab56a93533e76e3ad91642db7033e03b69.tar.gz
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm merge (part 1) from Dave Airlie: "So first of all my tree and uapi stuff has a conflict mess, its my fault as the nouveau stuff didn't hit -next as were trying to rebase regressions out of it before we merged. Highlights: - SH mobile modesetting driver and associated helpers - some DRM core documentation - i915 modesetting rework, haswell hdmi, haswell and vlv fixes, write combined pte writing, ilk rc6 support, - nouveau: major driver rework into a hw core driver, makes features like SLI a lot saner to implement, - psb: add eDP/DP support for Cedarview - radeon: 2 layer page tables, async VM pte updates, better PLL selection for > 2 screens, better ACPI interactions The rest is general grab bag of fixes. So why part 1? well I have the exynos pull req which came in a bit late but was waiting for me to do something they shouldn't have and it looks fairly safe, and David Howells has some more header cleanups he'd like me to pull, that seem like a good idea, but I'd like to get this merge out of the way so -next dosen't get blocked." Tons of conflicts mostly due to silly include line changes, but mostly mindless. A few other small semantic conflicts too, noted from Dave's pre-merged branch. * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (447 commits) drm/nv98/crypt: fix fuc build with latest envyas drm/nouveau/devinit: fixup various issues with subdev ctor/init ordering drm/nv41/vm: fix and enable use of "real" pciegart drm/nv44/vm: fix and enable use of "real" pciegart drm/nv04/dmaobj: fixup vm target handling in preparation for nv4x pcie drm/nouveau: store supported dma mask in vmmgr drm/nvc0/ibus: initial implementation of subdev drm/nouveau/therm: add support for fan-control modes drm/nouveau/hwmon: rename pwm0* to pmw1* to follow hwmon's rules drm/nouveau/therm: calculate the pwm divisor on nv50+ drm/nouveau/fan: rewrite the fan tachometer driver to get more precision, faster drm/nouveau/therm: move thermal-related functions to the therm subdev drm/nouveau/bios: parse the pwm divisor from the perf table drm/nouveau/therm: use the EXTDEV table to detect i2c monitoring devices drm/nouveau/therm: rework thermal table parsing drm/nouveau/gpio: expose the PWM/TOGGLE parameter found in the gpio vbios table drm/nouveau: fix pm initialization order drm/nouveau/bios: check that fixed tvdac gpio data is valid before using it drm/nouveau: log channel debug/error messages from client object rather than drm client drm/nouveau: have drm debugging macros build on top of core macros ...
Diffstat (limited to 'drivers/gpu/drm/nouveau/core/engine/graph/fuc')
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc539
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h604
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc451
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h530
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc856
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h927
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc780
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h857
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/nvc0.fuc400
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/fuc/nve0.fuc400
10 files changed, 6344 insertions, 0 deletions
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc
new file mode 100644
index 0000000..b86cc60
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc
@@ -0,0 +1,539 @@
+/* fuc microcode for nvc0 PGRAPH/GPC
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+/* To build:
+ * m4 gpcnvc0.fuc | envyas -a -w -m fuc -V fuc3 -o gpcnvc0.fuc.h
+ */
+
+/* TODO
+ * - bracket certain functions with scratch writes, useful for debugging
+ * - watchdog timer around ctx operations
+ */
+
+.section #nvc0_grgpc_data
+include(`nvc0.fuc')
+gpc_id: .b32 0
+gpc_mmio_list_head: .b32 0
+gpc_mmio_list_tail: .b32 0
+
+tpc_count: .b32 0
+tpc_mask: .b32 0
+tpc_mmio_list_head: .b32 0
+tpc_mmio_list_tail: .b32 0
+
+cmd_queue: queue_init
+
+// chipset descriptions
+chipsets:
+.b8 0xc0 0 0 0
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc0_tpc_mmio_tail
+.b8 0xc1 0 0 0
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc1_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc1_tpc_mmio_tail
+.b8 0xc3 0 0 0
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc3_tpc_mmio_tail
+.b8 0xc4 0 0 0
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc3_tpc_mmio_tail
+.b8 0xc8 0 0 0
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc0_tpc_mmio_tail
+.b8 0xce 0 0 0
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvc3_tpc_mmio_tail
+.b8 0xcf 0 0 0
+.b16 #nvc0_gpc_mmio_head
+.b16 #nvc0_gpc_mmio_tail
+.b16 #nvc0_tpc_mmio_head
+.b16 #nvcf_tpc_mmio_tail
+.b8 0xd9 0 0 0
+.b16 #nvd9_gpc_mmio_head
+.b16 #nvd9_gpc_mmio_tail
+.b16 #nvd9_tpc_mmio_head
+.b16 #nvd9_tpc_mmio_tail
+.b8 0 0 0 0
+
+// GPC mmio lists
+nvc0_gpc_mmio_head:
+mmctx_data(0x000380, 1)
+mmctx_data(0x000400, 6)
+mmctx_data(0x000450, 9)
+mmctx_data(0x000600, 1)
+mmctx_data(0x000684, 1)
+mmctx_data(0x000700, 5)
+mmctx_data(0x000800, 1)
+mmctx_data(0x000808, 3)
+mmctx_data(0x000828, 1)
+mmctx_data(0x000830, 1)
+mmctx_data(0x0008d8, 1)
+mmctx_data(0x0008e0, 1)
+mmctx_data(0x0008e8, 6)
+mmctx_data(0x00091c, 1)
+mmctx_data(0x000924, 3)
+mmctx_data(0x000b00, 1)
+mmctx_data(0x000b08, 6)
+mmctx_data(0x000bb8, 1)
+mmctx_data(0x000c08, 1)
+mmctx_data(0x000c10, 8)
+mmctx_data(0x000c80, 1)
+mmctx_data(0x000c8c, 1)
+mmctx_data(0x001000, 3)
+mmctx_data(0x001014, 1)
+nvc0_gpc_mmio_tail:
+mmctx_data(0x000c6c, 1);
+nvc1_gpc_mmio_tail:
+
+nvd9_gpc_mmio_head:
+mmctx_data(0x000380, 1)
+mmctx_data(0x000400, 2)
+mmctx_data(0x00040c, 3)
+mmctx_data(0x000450, 9)
+mmctx_data(0x000600, 1)
+mmctx_data(0x000684, 1)
+mmctx_data(0x000700, 5)
+mmctx_data(0x000800, 1)
+mmctx_data(0x000808, 3)
+mmctx_data(0x000828, 1)
+mmctx_data(0x000830, 1)
+mmctx_data(0x0008d8, 1)
+mmctx_data(0x0008e0, 1)
+mmctx_data(0x0008e8, 6)
+mmctx_data(0x00091c, 1)
+mmctx_data(0x000924, 3)
+mmctx_data(0x000b00, 1)
+mmctx_data(0x000b08, 6)
+mmctx_data(0x000bb8, 1)
+mmctx_data(0x000c08, 1)
+mmctx_data(0x000c10, 8)
+mmctx_data(0x000c6c, 1)
+mmctx_data(0x000c80, 1)
+mmctx_data(0x000c8c, 1)
+mmctx_data(0x001000, 3)
+mmctx_data(0x001014, 1)
+nvd9_gpc_mmio_tail:
+
+// TPC mmio lists
+nvc0_tpc_mmio_head:
+mmctx_data(0x000018, 1)
+mmctx_data(0x00003c, 1)
+mmctx_data(0x000048, 1)
+mmctx_data(0x000064, 1)
+mmctx_data(0x000088, 1)
+mmctx_data(0x000200, 6)
+mmctx_data(0x00021c, 2)
+mmctx_data(0x000300, 6)
+mmctx_data(0x0003d0, 1)
+mmctx_data(0x0003e0, 2)
+mmctx_data(0x000400, 3)
+mmctx_data(0x000420, 1)
+mmctx_data(0x0004b0, 1)
+mmctx_data(0x0004e8, 1)
+mmctx_data(0x0004f4, 1)
+mmctx_data(0x000520, 2)
+mmctx_data(0x000604, 4)
+mmctx_data(0x000644, 20)
+mmctx_data(0x000698, 1)
+mmctx_data(0x000750, 2)
+nvc0_tpc_mmio_tail:
+mmctx_data(0x000758, 1)
+mmctx_data(0x0002c4, 1)
+mmctx_data(0x0006e0, 1)
+nvcf_tpc_mmio_tail:
+mmctx_data(0x0004bc, 1)
+nvc3_tpc_mmio_tail:
+mmctx_data(0x000544, 1)
+nvc1_tpc_mmio_tail:
+
+nvd9_tpc_mmio_head:
+mmctx_data(0x000018, 1)
+mmctx_data(0x00003c, 1)
+mmctx_data(0x000048, 1)
+mmctx_data(0x000064, 1)
+mmctx_data(0x000088, 1)
+mmctx_data(0x000200, 6)
+mmctx_data(0x00021c, 2)
+mmctx_data(0x0002c4, 1)
+mmctx_data(0x000300, 6)
+mmctx_data(0x0003d0, 1)
+mmctx_data(0x0003e0, 2)
+mmctx_data(0x000400, 3)
+mmctx_data(0x000420, 3)
+mmctx_data(0x0004b0, 1)
+mmctx_data(0x0004e8, 1)
+mmctx_data(0x0004f4, 1)
+mmctx_data(0x000520, 2)
+mmctx_data(0x000544, 1)
+mmctx_data(0x000604, 4)
+mmctx_data(0x000644, 20)
+mmctx_data(0x000698, 1)
+mmctx_data(0x0006e0, 1)
+mmctx_data(0x000750, 3)
+nvd9_tpc_mmio_tail:
+
+.section #nvc0_grgpc_code
+bra #init
+define(`include_code')
+include(`nvc0.fuc')
+
+// reports an exception to the host
+//
+// In: $r15 error code (see nvc0.fuc)
+//
+error:
+ push $r14
+ mov $r14 -0x67ec // 0x9814
+ sethi $r14 0x400000
+ call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
+ add b32 $r14 0x41c
+ mov $r15 1
+ call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET
+ pop $r14
+ ret
+
+// GPC fuc initialisation, executed by triggering ucode start, will
+// fall through to main loop after completion.
+//
+// Input:
+// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
+// CC_SCRATCH[1]: context base
+//
+// Output:
+// CC_SCRATCH[0]:
+// 31:31: set to signal completion
+// CC_SCRATCH[1]:
+// 31:0: GPC context size
+//
+init:
+ clear b32 $r0
+ mov $sp $r0
+
+ // enable fifo access
+ mov $r1 0x1200
+ mov $r2 2
+ iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
+
+ // setup i0 handler, and route all interrupts to it
+ mov $r1 #ih
+ mov $iv0 $r1
+ mov $r1 0x400
+ iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
+
+ // enable fifo interrupt
+ mov $r2 4
+ iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
+
+ // enable interrupts
+ bset $flags ie0
+
+ // figure out which GPC we are, and how many TPCs we have
+ mov $r1 0x608
+ shl b32 $r1 6
+ iord $r2 I[$r1 + 0x000] // UNITS
+ mov $r3 1
+ and $r2 0x1f
+ shl b32 $r3 $r2
+ sub b32 $r3 1
+ st b32 D[$r0 + #tpc_count] $r2
+ st b32 D[$r0 + #tpc_mask] $r3
+ add b32 $r1 0x400
+ iord $r2 I[$r1 + 0x000] // MYINDEX
+ st b32 D[$r0 + #gpc_id] $r2
+
+ // find context data for this chipset
+ mov $r2 0x800
+ shl b32 $r2 6
+ iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
+ mov $r1 #chipsets - 12
+ init_find_chipset:
+ add b32 $r1 12
+ ld b32 $r3 D[$r1 + 0x00]
+ cmpu b32 $r3 $r2
+ bra e #init_context
+ cmpu b32 $r3 0
+ bra ne #init_find_chipset
+ // unknown chipset
+ ret
+
+ // initialise context base, and size tracking
+ init_context:
+ mov $r2 0x800
+ shl b32 $r2 6
+ iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base
+ clear b32 $r3 // track GPC context size here
+
+ // set mmctx base addresses now so we don't have to do it later,
+ // they don't currently ever change
+ mov $r4 0x700
+ shl b32 $r4 6
+ shr b32 $r5 $r2 8
+ iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE
+ iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE
+
+ // calculate GPC mmio context size, store the chipset-specific
+ // mmio list pointers somewhere we can get at them later without
+ // re-parsing the chipset list
+ clear b32 $r14
+ clear b32 $r15
+ ld b16 $r14 D[$r1 + 4]
+ ld b16 $r15 D[$r1 + 6]
+ st b16 D[$r0 + #gpc_mmio_list_head] $r14
+ st b16 D[$r0 + #gpc_mmio_list_tail] $r15
+ call #mmctx_size
+ add b32 $r2 $r15
+ add b32 $r3 $r15
+
+ // calculate per-TPC mmio context size, store the list pointers
+ ld b16 $r14 D[$r1 + 8]
+ ld b16 $r15 D[$r1 + 10]
+ st b16 D[$r0 + #tpc_mmio_list_head] $r14
+ st b16 D[$r0 + #tpc_mmio_list_tail] $r15
+ call #mmctx_size
+ ld b32 $r14 D[$r0 + #tpc_count]
+ mulu $r14 $r15
+ add b32 $r2 $r14
+ add b32 $r3 $r14
+
+ // round up base/size to 256 byte boundary (for strand SWBASE)
+ add b32 $r4 0x1300
+ shr b32 $r3 2
+ iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!?
+ shr b32 $r2 8
+ shr b32 $r3 6
+ add b32 $r2 1
+ add b32 $r3 1
+ shl b32 $r2 8
+ shl b32 $r3 8
+
+ // calculate size of strand context data
+ mov b32 $r15 $r2
+ call #strand_ctx_init
+ add b32 $r3 $r15
+
+ // save context size, and tell HUB we're done
+ mov $r1 0x800
+ shl b32 $r1 6
+ iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size
+ add b32 $r1 0x800
+ clear b32 $r2
+ bset $r2 31
+ iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
+
+// Main program loop, very simple, sleeps until woken up by the interrupt
+// handler, pulls a command from the queue and executes its handler
+//
+main:
+ bset $flags $p0
+ sleep $p0
+ mov $r13 #cmd_queue
+ call #queue_get
+ bra $p1 #main
+
+ // 0x0000-0x0003 are all context transfers
+ cmpu b32 $r14 0x04
+ bra nc #main_not_ctx_xfer
+ // fetch $flags and mask off $p1/$p2
+ mov $r1 $flags
+ mov $r2 0x0006
+ not b32 $r2
+ and $r1 $r2
+ // set $p1/$p2 according to transfer type
+ shl b32 $r14 1
+ or $r1 $r14
+ mov $flags $r1
+ // transfer context data
+ call #ctx_xfer
+ bra #main
+
+ main_not_ctx_xfer:
+ shl b32 $r15 $r14 16
+ or $r15 E_BAD_COMMAND
+ call #error
+ bra #main
+
+// interrupt handler
+ih:
+ push $r8
+ mov $r8 $flags
+ push $r8
+ push $r9
+ push $r10
+ push $r11
+ push $r13
+ push $r14
+ push $r15
+
+ // incoming fifo command?
+ iord $r10 I[$r0 + 0x200] // INTR
+ and $r11 $r10 0x00000004
+ bra e #ih_no_fifo
+ // queue incoming fifo command for later processing
+ mov $r11 0x1900
+ mov $r13 #cmd_queue
+ iord $r14 I[$r11 + 0x100] // FIFO_CMD
+ iord $r15 I[$r11 + 0x000] // FIFO_DATA
+ call #queue_put
+ add b32 $r11 0x400
+ mov $r14 1
+ iowr I[$r11 + 0x000] $r14 // FIFO_ACK
+
+ // ack, and wake up main()
+ ih_no_fifo:
+ iowr I[$r0 + 0x100] $r10 // INTR_ACK
+
+ pop $r15
+ pop $r14
+ pop $r13
+ pop $r11
+ pop $r10
+ pop $r9
+ pop $r8
+ mov $flags $r8
+ pop $r8
+ bclr $flags $p0
+ iret
+
+// Set this GPC's bit in HUB_BAR, used to signal completion of various
+// activities to the HUB fuc
+//
+hub_barrier_done:
+ mov $r15 1
+ ld b32 $r14 D[$r0 + #gpc_id]
+ shl b32 $r15 $r14
+ mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
+ sethi $r14 0x400000
+ call #nv_wr32
+ ret
+
+// Disables various things, waits a bit, and re-enables them..
+//
+// Not sure how exactly this helps, perhaps "ENABLE" is not such a
+// good description for the bits we turn off? Anyways, without this,
+// funny things happen.
+//
+ctx_redswitch:
+ mov $r14 0x614
+ shl b32 $r14 6
+ mov $r15 0x020
+ iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
+ mov $r15 8
+ ctx_redswitch_delay:
+ sub b32 $r15 1
+ bra ne #ctx_redswitch_delay
+ mov $r15 0xa20
+ iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
+ ret
+
+// Transfer GPC context data between GPU and storage area
+//
+// In: $r15 context base address
+// $p1 clear on save, set on load
+// $p2 set if opposite direction done/will be done, so:
+// on save it means: "a load will follow this save"
+// on load it means: "a save preceeded this load"
+//
+ctx_xfer:
+ // set context base address
+ mov $r1 0xa04
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r15// MEM_BASE
+ bra not $p1 #ctx_xfer_not_load
+ call #ctx_redswitch
+ ctx_xfer_not_load:
+
+ // strands
+ mov $r1 0x4afc
+ sethi $r1 0x20000
+ mov $r2 0xc
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
+ call #strand_wait
+ mov $r2 0x47fc
+ sethi $r2 0x20000
+ iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
+ xbit $r2 $flags $p1
+ add b32 $r2 3
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
+
+ // mmio context
+ xbit $r10 $flags $p1 // direction
+ or $r10 2 // first
+ mov $r11 0x0000
+ sethi $r11 0x500000
+ ld b32 $r12 D[$r0 + #gpc_id]
+ shl b32 $r12 15
+ add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
+ ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
+ ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
+ mov $r14 0 // not multi
+ call #mmctx_xfer
+
+ // per-TPC mmio context
+ xbit $r10 $flags $p1 // direction
+ or $r10 4 // last
+ mov $r11 0x4000
+ sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
+ ld b32 $r12 D[$r0 + #gpc_id]
+ shl b32 $r12 15
+ add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
+ ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
+ ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
+ ld b32 $r15 D[$r0 + #tpc_mask]
+ mov $r14 0x800 // stride = 0x800
+ call #mmctx_xfer
+
+ // wait for strands to finish
+ call #strand_wait
+
+ // if load, or a save without a load following, do some
+ // unknown stuff that's done after finishing a block of
+ // strand commands
+ bra $p1 #ctx_xfer_post
+ bra not $p2 #ctx_xfer_done
+ ctx_xfer_post:
+ mov $r1 0x4afc
+ sethi $r1 0x20000
+ mov $r2 0xd
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
+ call #strand_wait
+
+ // mark completion in HUB's barrier
+ ctx_xfer_done:
+ call #hub_barrier_done
+ ret
+
+.align 256
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h
new file mode 100644
index 0000000..96050dd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h
@@ -0,0 +1,604 @@
+uint32_t nvc0_grgpc_data[] = {
+/* 0x0000: gpc_id */
+ 0x00000000,
+/* 0x0004: gpc_mmio_list_head */
+ 0x00000000,
+/* 0x0008: gpc_mmio_list_tail */
+ 0x00000000,
+/* 0x000c: tpc_count */
+ 0x00000000,
+/* 0x0010: tpc_mask */
+ 0x00000000,
+/* 0x0014: tpc_mmio_list_head */
+ 0x00000000,
+/* 0x0018: tpc_mmio_list_tail */
+ 0x00000000,
+/* 0x001c: cmd_queue */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+/* 0x0064: chipsets */
+ 0x000000c0,
+ 0x012800c8,
+ 0x01e40194,
+ 0x000000c1,
+ 0x012c00c8,
+ 0x01f80194,
+ 0x000000c3,
+ 0x012800c8,
+ 0x01f40194,
+ 0x000000c4,
+ 0x012800c8,
+ 0x01f40194,
+ 0x000000c8,
+ 0x012800c8,
+ 0x01e40194,
+ 0x000000ce,
+ 0x012800c8,
+ 0x01f40194,
+ 0x000000cf,
+ 0x012800c8,
+ 0x01f00194,
+ 0x000000d9,
+ 0x0194012c,
+ 0x025401f8,
+ 0x00000000,
+/* 0x00c8: nvc0_gpc_mmio_head */
+ 0x00000380,
+ 0x14000400,
+ 0x20000450,
+ 0x00000600,
+ 0x00000684,
+ 0x10000700,
+ 0x00000800,
+ 0x08000808,
+ 0x00000828,
+ 0x00000830,
+ 0x000008d8,
+ 0x000008e0,
+ 0x140008e8,
+ 0x0000091c,
+ 0x08000924,
+ 0x00000b00,
+ 0x14000b08,
+ 0x00000bb8,
+ 0x00000c08,
+ 0x1c000c10,
+ 0x00000c80,
+ 0x00000c8c,
+ 0x08001000,
+ 0x00001014,
+/* 0x0128: nvc0_gpc_mmio_tail */
+ 0x00000c6c,
+/* 0x012c: nvc1_gpc_mmio_tail */
+/* 0x012c: nvd9_gpc_mmio_head */
+ 0x00000380,
+ 0x04000400,
+ 0x0800040c,
+ 0x20000450,
+ 0x00000600,
+ 0x00000684,
+ 0x10000700,
+ 0x00000800,
+ 0x08000808,
+ 0x00000828,
+ 0x00000830,
+ 0x000008d8,
+ 0x000008e0,
+ 0x140008e8,
+ 0x0000091c,
+ 0x08000924,
+ 0x00000b00,
+ 0x14000b08,
+ 0x00000bb8,
+ 0x00000c08,
+ 0x1c000c10,
+ 0x00000c6c,
+ 0x00000c80,
+ 0x00000c8c,
+ 0x08001000,
+ 0x00001014,
+/* 0x0194: nvd9_gpc_mmio_tail */
+/* 0x0194: nvc0_tpc_mmio_head */
+ 0x00000018,
+ 0x0000003c,
+ 0x00000048,
+ 0x00000064,
+ 0x00000088,
+ 0x14000200,
+ 0x0400021c,
+ 0x14000300,
+ 0x000003d0,
+ 0x040003e0,
+ 0x08000400,
+ 0x00000420,
+ 0x000004b0,
+ 0x000004e8,
+ 0x000004f4,
+ 0x04000520,
+ 0x0c000604,
+ 0x4c000644,
+ 0x00000698,
+ 0x04000750,
+/* 0x01e4: nvc0_tpc_mmio_tail */
+ 0x00000758,
+ 0x000002c4,
+ 0x000006e0,
+/* 0x01f0: nvcf_tpc_mmio_tail */
+ 0x000004bc,
+/* 0x01f4: nvc3_tpc_mmio_tail */
+ 0x00000544,
+/* 0x01f8: nvc1_tpc_mmio_tail */
+/* 0x01f8: nvd9_tpc_mmio_head */
+ 0x00000018,
+ 0x0000003c,
+ 0x00000048,
+ 0x00000064,
+ 0x00000088,
+ 0x14000200,
+ 0x0400021c,
+ 0x000002c4,
+ 0x14000300,
+ 0x000003d0,
+ 0x040003e0,
+ 0x08000400,
+ 0x08000420,
+ 0x000004b0,
+ 0x000004e8,
+ 0x000004f4,
+ 0x04000520,
+ 0x00000544,
+ 0x0c000604,
+ 0x4c000644,
+ 0x00000698,
+ 0x000006e0,
+ 0x08000750,
+};
+
+uint32_t nvc0_grgpc_code[] = {
+ 0x03060ef5,
+/* 0x0004: queue_put */
+ 0x9800d898,
+ 0x86f001d9,
+ 0x0489b808,
+ 0xf00c1bf4,
+ 0x21f502f7,
+ 0x00f802ec,
+/* 0x001c: queue_put_next */
+ 0xb60798c4,
+ 0x8dbb0384,
+ 0x0880b600,
+ 0x80008e80,
+ 0x90b6018f,
+ 0x0f94f001,
+ 0xf801d980,
+/* 0x0039: queue_get */
+ 0x0131f400,
+ 0x9800d898,
+ 0x89b801d9,
+ 0x210bf404,
+ 0xb60789c4,
+ 0x9dbb0394,
+ 0x0890b600,
+ 0x98009e98,
+ 0x80b6019f,
+ 0x0f84f001,
+ 0xf400d880,
+/* 0x0066: queue_get_done */
+ 0x00f80132,
+/* 0x0068: nv_rd32 */
+ 0x0728b7f1,
+ 0xb906b4b6,
+ 0xc9f002ec,
+ 0x00bcd01f,
+/* 0x0078: nv_rd32_wait */
+ 0xc800bccf,
+ 0x1bf41fcc,
+ 0x06a7f0fa,
+ 0x010321f5,
+ 0xf840bfcf,
+/* 0x008d: nv_wr32 */
+ 0x28b7f100,
+ 0x06b4b607,
+ 0xb980bfd0,
+ 0xc9f002ec,
+ 0x1ec9f01f,
+/* 0x00a3: nv_wr32_wait */
+ 0xcf00bcd0,
+ 0xccc800bc,
+ 0xfa1bf41f,
+/* 0x00ae: watchdog_reset */
+ 0x87f100f8,
+ 0x84b60430,
+ 0x1ff9f006,
+ 0xf8008fd0,
+/* 0x00bd: watchdog_clear */
+ 0x3087f100,
+ 0x0684b604,
+ 0xf80080d0,
+/* 0x00c9: wait_donez */
+ 0x3c87f100,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d000,
+ 0x081887f1,
+ 0xd00684b6,
+/* 0x00e2: wait_done_wait_donez */
+ 0x87f1008a,
+ 0x84b60400,
+ 0x0088cf06,
+ 0xf4888aff,
+ 0x87f1f31b,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00099,
+/* 0x0103: wait_doneo */
+ 0xf100f800,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00099f0,
+ 0x87f10089,
+ 0x84b60818,
+ 0x008ad006,
+/* 0x011c: wait_done_wait_doneo */
+ 0x040087f1,
+ 0xcf0684b6,
+ 0x8aff0088,
+ 0xf30bf488,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0099f094,
+ 0xf80089d0,
+/* 0x013d: mmctx_size */
+/* 0x013f: nv_mmctx_size_loop */
+ 0x9894bd00,
+ 0x85b600e8,
+ 0x0180b61a,
+ 0xbb0284b6,
+ 0xe0b60098,
+ 0x04efb804,
+ 0xb9eb1bf4,
+ 0x00f8029f,
+/* 0x015c: mmctx_xfer */
+ 0x083c87f1,
+ 0xbd0684b6,
+ 0x0199f094,
+ 0xf10089d0,
+ 0xb6071087,
+ 0x94bd0684,
+ 0xf405bbfd,
+ 0x8bd0090b,
+ 0x0099f000,
+/* 0x0180: mmctx_base_disabled */
+ 0xf405eefd,
+ 0x8ed00c0b,
+ 0xc08fd080,
+/* 0x018f: mmctx_multi_disabled */
+ 0xb70199f0,
+ 0xc8010080,
+ 0xb4b600ab,
+ 0x0cb9f010,
+ 0xb601aec8,
+ 0xbefd11e4,
+ 0x008bd005,
+/* 0x01a8: mmctx_exec_loop */
+/* 0x01a8: mmctx_wait_free */
+ 0xf0008ecf,
+ 0x0bf41fe4,
+ 0x00ce98fa,
+ 0xd005e9fd,
+ 0xc0b6c08e,
+ 0x04cdb804,
+ 0xc8e81bf4,
+ 0x1bf402ab,
+/* 0x01c9: mmctx_fini_wait */
+ 0x008bcf18,
+ 0xb01fb4f0,
+ 0x1bf410b4,
+ 0x02a7f0f7,
+ 0xf4c921f4,
+/* 0x01de: mmctx_stop */
+ 0xabc81b0e,
+ 0x10b4b600,
+ 0xf00cb9f0,
+ 0x8bd012b9,
+/* 0x01ed: mmctx_stop_wait */
+ 0x008bcf00,
+ 0xf412bbc8,
+/* 0x01f6: mmctx_done */
+ 0x87f1fa1b,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00199,
+/* 0x0207: strand_wait */
+ 0xf900f800,
+ 0x02a7f0a0,
+ 0xfcc921f4,
+/* 0x0213: strand_pre */
+ 0xf100f8a0,
+ 0xf04afc87,
+ 0x97f00283,
+ 0x0089d00c,
+ 0x020721f5,
+/* 0x0226: strand_post */
+ 0x87f100f8,
+ 0x83f04afc,
+ 0x0d97f002,
+ 0xf50089d0,
+ 0xf8020721,
+/* 0x0239: strand_set */
+ 0xfca7f100,
+ 0x02a3f04f,
+ 0x0500aba2,
+ 0xd00fc7f0,
+ 0xc7f000ac,
+ 0x00bcd00b,
+ 0x020721f5,
+ 0xf000aed0,
+ 0xbcd00ac7,
+ 0x0721f500,
+/* 0x0263: strand_ctx_init */
+ 0xf100f802,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00399f0,
+ 0x21f50089,
+ 0xe7f00213,
+ 0x3921f503,
+ 0xfca7f102,
+ 0x02a3f046,
+ 0x0400aba0,
+ 0xf040a0d0,
+ 0xbcd001c7,
+ 0x0721f500,
+ 0x010c9202,
+ 0xf000acd0,
+ 0xbcd002c7,
+ 0x0721f500,
+ 0x2621f502,
+ 0x8087f102,
+ 0x0684b608,
+ 0xb70089cf,
+ 0x95220080,
+/* 0x02ba: ctx_init_strand_loop */
+ 0x8ed008fe,
+ 0x408ed000,
+ 0xb6808acf,
+ 0xa0b606a5,
+ 0x00eabb01,
+ 0xb60480b6,
+ 0x1bf40192,
+ 0x08e4b6e8,
+ 0xf1f2efbc,
+ 0xb6085c87,
+ 0x94bd0684,
+ 0xd00399f0,
+ 0x00f80089,
+/* 0x02ec: error */
+ 0xe7f1e0f9,
+ 0xe3f09814,
+ 0x8d21f440,
+ 0x041ce0b7,
+ 0xf401f7f0,
+ 0xe0fc8d21,
+/* 0x0306: init */
+ 0x04bd00f8,
+ 0xf10004fe,
+ 0xf0120017,
+ 0x12d00227,
+ 0x3e17f100,
+ 0x0010fe04,
+ 0x040017f1,
+ 0xf0c010d0,
+ 0x12d00427,
+ 0x1031f400,
+ 0x060817f1,
+ 0xcf0614b6,
+ 0x37f00012,
+ 0x1f24f001,
+ 0xb60432bb,
+ 0x02800132,
+ 0x04038003,
+ 0x040010b7,
+ 0x800012cf,
+ 0x27f10002,
+ 0x24b60800,
+ 0x0022cf06,
+/* 0x035f: init_find_chipset */
+ 0xb65817f0,
+ 0x13980c10,
+ 0x0432b800,
+ 0xb00b0bf4,
+ 0x1bf40034,
+/* 0x0373: init_context */
+ 0xf100f8f1,
+ 0xb6080027,
+ 0x22cf0624,
+ 0xf134bd40,
+ 0xb6070047,
+ 0x25950644,
+ 0x0045d008,
+ 0xbd4045d0,
+ 0x58f4bde4,
+ 0x1f58021e,
+ 0x020e4003,
+ 0xf5040f40,
+ 0xbb013d21,
+ 0x3fbb002f,
+ 0x041e5800,
+ 0x40051f58,
+ 0x0f400a0e,
+ 0x3d21f50c,
+ 0x030e9801,
+ 0xbb00effd,
+ 0x3ebb002e,
+ 0x0040b700,
+ 0x0235b613,
+ 0xb60043d0,
+ 0x35b60825,
+ 0x0120b606,
+ 0xb60130b6,
+ 0x34b60824,
+ 0x022fb908,
+ 0x026321f5,
+ 0xf1003fbb,
+ 0xb6080017,
+ 0x13d00614,
+ 0x0010b740,
+ 0xf024bd08,
+ 0x12d01f29,
+/* 0x0401: main */
+ 0x0031f400,
+ 0xf00028f4,
+ 0x21f41cd7,
+ 0xf401f439,
+ 0xf404e4b0,
+ 0x81fe1e18,
+ 0x0627f001,
+ 0x12fd20bd,
+ 0x01e4b604,
+ 0xfe051efd,
+ 0x21f50018,
+ 0x0ef404c3,
+/* 0x0431: main_not_ctx_xfer */
+ 0x10ef94d3,
+ 0xf501f5f0,
+ 0xf402ec21,
+/* 0x043e: ih */
+ 0x80f9c60e,
+ 0xf90188fe,
+ 0xf990f980,
+ 0xf9b0f9a0,
+ 0xf9e0f9d0,
+ 0x800acff0,
+ 0xf404abc4,
+ 0xb7f11d0b,
+ 0xd7f01900,
+ 0x40becf1c,
+ 0xf400bfcf,
+ 0xb0b70421,
+ 0xe7f00400,
+ 0x00bed001,
+/* 0x0474: ih_no_fifo */
+ 0xfc400ad0,
+ 0xfce0fcf0,
+ 0xfcb0fcd0,
+ 0xfc90fca0,
+ 0x0088fe80,
+ 0x32f480fc,
+/* 0x048f: hub_barrier_done */
+ 0xf001f800,
+ 0x0e9801f7,
+ 0x04febb00,
+ 0x9418e7f1,
+ 0xf440e3f0,
+ 0x00f88d21,
+/* 0x04a4: ctx_redswitch */
+ 0x0614e7f1,
+ 0xf006e4b6,
+ 0xefd020f7,
+ 0x08f7f000,
+/* 0x04b4: ctx_redswitch_delay */
+ 0xf401f2b6,
+ 0xf7f1fd1b,
+ 0xefd00a20,
+/* 0x04c3: ctx_xfer */
+ 0xf100f800,
+ 0xb60a0417,
+ 0x1fd00614,
+ 0x0711f400,
+ 0x04a421f5,
+/* 0x04d4: ctx_xfer_not_load */
+ 0x4afc17f1,
+ 0xf00213f0,
+ 0x12d00c27,
+ 0x0721f500,
+ 0xfc27f102,
+ 0x0223f047,
+ 0xf00020d0,
+ 0x20b6012c,
+ 0x0012d003,
+ 0xf001acf0,
+ 0xb7f002a5,
+ 0x50b3f000,
+ 0xb6000c98,
+ 0xbcbb0fc4,
+ 0x010c9800,
+ 0xf0020d98,
+ 0x21f500e7,
+ 0xacf0015c,
+ 0x04a5f001,
+ 0x4000b7f1,
+ 0x9850b3f0,
+ 0xc4b6000c,
+ 0x00bcbb0f,
+ 0x98050c98,
+ 0x0f98060d,
+ 0x00e7f104,
+ 0x5c21f508,
+ 0x0721f501,
+ 0x0601f402,
+/* 0x054b: ctx_xfer_post */
+ 0xf11412f4,
+ 0xf04afc17,
+ 0x27f00213,
+ 0x0012d00d,
+ 0x020721f5,
+/* 0x055c: ctx_xfer_done */
+ 0x048f21f5,
+ 0x000000f8,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+};
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc
new file mode 100644
index 0000000..7b715fd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc
@@ -0,0 +1,451 @@
+/* fuc microcode for nve0 PGRAPH/GPC
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+/* To build:
+ * m4 nve0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grgpc.fuc.h
+ */
+
+/* TODO
+ * - bracket certain functions with scratch writes, useful for debugging
+ * - watchdog timer around ctx operations
+ */
+
+.section #nve0_grgpc_data
+include(`nve0.fuc')
+gpc_id: .b32 0
+gpc_mmio_list_head: .b32 0
+gpc_mmio_list_tail: .b32 0
+
+tpc_count: .b32 0
+tpc_mask: .b32 0
+tpc_mmio_list_head: .b32 0
+tpc_mmio_list_tail: .b32 0
+
+cmd_queue: queue_init
+
+// chipset descriptions
+chipsets:
+.b8 0xe4 0 0 0
+.b16 #nve4_gpc_mmio_head
+.b16 #nve4_gpc_mmio_tail
+.b16 #nve4_tpc_mmio_head
+.b16 #nve4_tpc_mmio_tail
+.b8 0xe7 0 0 0
+.b16 #nve4_gpc_mmio_head
+.b16 #nve4_gpc_mmio_tail
+.b16 #nve4_tpc_mmio_head
+.b16 #nve4_tpc_mmio_tail
+.b8 0 0 0 0
+
+// GPC mmio lists
+nve4_gpc_mmio_head:
+mmctx_data(0x000380, 1)
+mmctx_data(0x000400, 2)
+mmctx_data(0x00040c, 3)
+mmctx_data(0x000450, 9)
+mmctx_data(0x000600, 1)
+mmctx_data(0x000684, 1)
+mmctx_data(0x000700, 5)
+mmctx_data(0x000800, 1)
+mmctx_data(0x000808, 3)
+mmctx_data(0x000828, 1)
+mmctx_data(0x000830, 1)
+mmctx_data(0x0008d8, 1)
+mmctx_data(0x0008e0, 1)
+mmctx_data(0x0008e8, 6)
+mmctx_data(0x00091c, 1)
+mmctx_data(0x000924, 3)
+mmctx_data(0x000b00, 1)
+mmctx_data(0x000b08, 6)
+mmctx_data(0x000bb8, 1)
+mmctx_data(0x000c08, 1)
+mmctx_data(0x000c10, 8)
+mmctx_data(0x000c40, 1)
+mmctx_data(0x000c6c, 1)
+mmctx_data(0x000c80, 1)
+mmctx_data(0x000c8c, 1)
+mmctx_data(0x001000, 3)
+mmctx_data(0x001014, 1)
+mmctx_data(0x003024, 1)
+mmctx_data(0x0030c0, 2)
+mmctx_data(0x0030e4, 1)
+mmctx_data(0x003100, 6)
+mmctx_data(0x0031d0, 1)
+mmctx_data(0x0031e0, 2)
+nve4_gpc_mmio_tail:
+
+// TPC mmio lists
+nve4_tpc_mmio_head:
+mmctx_data(0x000048, 1)
+mmctx_data(0x000064, 1)
+mmctx_data(0x000088, 1)
+mmctx_data(0x000200, 6)
+mmctx_data(0x00021c, 2)
+mmctx_data(0x000230, 1)
+mmctx_data(0x0002c4, 1)
+mmctx_data(0x000400, 3)
+mmctx_data(0x000420, 3)
+mmctx_data(0x0004e8, 1)
+mmctx_data(0x0004f4, 1)
+mmctx_data(0x000604, 4)
+mmctx_data(0x000644, 22)
+mmctx_data(0x0006ac, 2)
+mmctx_data(0x0006c8, 1)
+mmctx_data(0x000730, 8)
+mmctx_data(0x000758, 1)
+mmctx_data(0x000778, 1)
+nve4_tpc_mmio_tail:
+
+.section #nve0_grgpc_code
+bra #init
+define(`include_code')
+include(`nve0.fuc')
+
+// reports an exception to the host
+//
+// In: $r15 error code (see nve0.fuc)
+//
+error:
+ push $r14
+ mov $r14 -0x67ec // 0x9814
+ sethi $r14 0x400000
+ call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
+ add b32 $r14 0x41c
+ mov $r15 1
+ call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET
+ pop $r14
+ ret
+
+// GPC fuc initialisation, executed by triggering ucode start, will
+// fall through to main loop after completion.
+//
+// Input:
+// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
+// CC_SCRATCH[1]: context base
+//
+// Output:
+// CC_SCRATCH[0]:
+// 31:31: set to signal completion
+// CC_SCRATCH[1]:
+// 31:0: GPC context size
+//
+init:
+ clear b32 $r0
+ mov $sp $r0
+
+ // enable fifo access
+ mov $r1 0x1200
+ mov $r2 2
+ iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
+
+ // setup i0 handler, and route all interrupts to it
+ mov $r1 #ih
+ mov $iv0 $r1
+ mov $r1 0x400
+ iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
+
+ // enable fifo interrupt
+ mov $r2 4
+ iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
+
+ // enable interrupts
+ bset $flags ie0
+
+ // figure out which GPC we are, and how many TPCs we have
+ mov $r1 0x608
+ shl b32 $r1 6
+ iord $r2 I[$r1 + 0x000] // UNITS
+ mov $r3 1
+ and $r2 0x1f
+ shl b32 $r3 $r2
+ sub b32 $r3 1
+ st b32 D[$r0 + #tpc_count] $r2
+ st b32 D[$r0 + #tpc_mask] $r3
+ add b32 $r1 0x400
+ iord $r2 I[$r1 + 0x000] // MYINDEX
+ st b32 D[$r0 + #gpc_id] $r2
+
+ // find context data for this chipset
+ mov $r2 0x800
+ shl b32 $r2 6
+ iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
+ mov $r1 #chipsets - 12
+ init_find_chipset:
+ add b32 $r1 12
+ ld b32 $r3 D[$r1 + 0x00]
+ cmpu b32 $r3 $r2
+ bra e #init_context
+ cmpu b32 $r3 0
+ bra ne #init_find_chipset
+ // unknown chipset
+ ret
+
+ // initialise context base, and size tracking
+ init_context:
+ mov $r2 0x800
+ shl b32 $r2 6
+ iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base
+ clear b32 $r3 // track GPC context size here
+
+ // set mmctx base addresses now so we don't have to do it later,
+ // they don't currently ever change
+ mov $r4 0x700
+ shl b32 $r4 6
+ shr b32 $r5 $r2 8
+ iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE
+ iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE
+
+ // calculate GPC mmio context size, store the chipset-specific
+ // mmio list pointers somewhere we can get at them later without
+ // re-parsing the chipset list
+ clear b32 $r14
+ clear b32 $r15
+ ld b16 $r14 D[$r1 + 4]
+ ld b16 $r15 D[$r1 + 6]
+ st b16 D[$r0 + #gpc_mmio_list_head] $r14
+ st b16 D[$r0 + #gpc_mmio_list_tail] $r15
+ call #mmctx_size
+ add b32 $r2 $r15
+ add b32 $r3 $r15
+
+ // calculate per-TPC mmio context size, store the list pointers
+ ld b16 $r14 D[$r1 + 8]
+ ld b16 $r15 D[$r1 + 10]
+ st b16 D[$r0 + #tpc_mmio_list_head] $r14
+ st b16 D[$r0 + #tpc_mmio_list_tail] $r15
+ call #mmctx_size
+ ld b32 $r14 D[$r0 + #tpc_count]
+ mulu $r14 $r15
+ add b32 $r2 $r14
+ add b32 $r3 $r14
+
+ // round up base/size to 256 byte boundary (for strand SWBASE)
+ add b32 $r4 0x1300
+ shr b32 $r3 2
+ iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!?
+ shr b32 $r2 8
+ shr b32 $r3 6
+ add b32 $r2 1
+ add b32 $r3 1
+ shl b32 $r2 8
+ shl b32 $r3 8
+
+ // calculate size of strand context data
+ mov b32 $r15 $r2
+ call #strand_ctx_init
+ add b32 $r3 $r15
+
+ // save context size, and tell HUB we're done
+ mov $r1 0x800
+ shl b32 $r1 6
+ iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size
+ add b32 $r1 0x800
+ clear b32 $r2
+ bset $r2 31
+ iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
+
+// Main program loop, very simple, sleeps until woken up by the interrupt
+// handler, pulls a command from the queue and executes its handler
+//
+main:
+ bset $flags $p0
+ sleep $p0
+ mov $r13 #cmd_queue
+ call #queue_get
+ bra $p1 #main
+
+ // 0x0000-0x0003 are all context transfers
+ cmpu b32 $r14 0x04
+ bra nc #main_not_ctx_xfer
+ // fetch $flags and mask off $p1/$p2
+ mov $r1 $flags
+ mov $r2 0x0006
+ not b32 $r2
+ and $r1 $r2
+ // set $p1/$p2 according to transfer type
+ shl b32 $r14 1
+ or $r1 $r14
+ mov $flags $r1
+ // transfer context data
+ call #ctx_xfer
+ bra #main
+
+ main_not_ctx_xfer:
+ shl b32 $r15 $r14 16
+ or $r15 E_BAD_COMMAND
+ call #error
+ bra #main
+
+// interrupt handler
+ih:
+ push $r8
+ mov $r8 $flags
+ push $r8
+ push $r9
+ push $r10
+ push $r11
+ push $r13
+ push $r14
+ push $r15
+
+ // incoming fifo command?
+ iord $r10 I[$r0 + 0x200] // INTR
+ and $r11 $r10 0x00000004
+ bra e #ih_no_fifo
+ // queue incoming fifo command for later processing
+ mov $r11 0x1900
+ mov $r13 #cmd_queue
+ iord $r14 I[$r11 + 0x100] // FIFO_CMD
+ iord $r15 I[$r11 + 0x000] // FIFO_DATA
+ call #queue_put
+ add b32 $r11 0x400
+ mov $r14 1
+ iowr I[$r11 + 0x000] $r14 // FIFO_ACK
+
+ // ack, and wake up main()
+ ih_no_fifo:
+ iowr I[$r0 + 0x100] $r10 // INTR_ACK
+
+ pop $r15
+ pop $r14
+ pop $r13
+ pop $r11
+ pop $r10
+ pop $r9
+ pop $r8
+ mov $flags $r8
+ pop $r8
+ bclr $flags $p0
+ iret
+
+// Set this GPC's bit in HUB_BAR, used to signal completion of various
+// activities to the HUB fuc
+//
+hub_barrier_done:
+ mov $r15 1
+ ld b32 $r14 D[$r0 + #gpc_id]
+ shl b32 $r15 $r14
+ mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
+ sethi $r14 0x400000
+ call #nv_wr32
+ ret
+
+// Disables various things, waits a bit, and re-enables them..
+//
+// Not sure how exactly this helps, perhaps "ENABLE" is not such a
+// good description for the bits we turn off? Anyways, without this,
+// funny things happen.
+//
+ctx_redswitch:
+ mov $r14 0x614
+ shl b32 $r14 6
+ mov $r15 0x020
+ iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
+ mov $r15 8
+ ctx_redswitch_delay:
+ sub b32 $r15 1
+ bra ne #ctx_redswitch_delay
+ mov $r15 0xa20
+ iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
+ ret
+
+// Transfer GPC context data between GPU and storage area
+//
+// In: $r15 context base address
+// $p1 clear on save, set on load
+// $p2 set if opposite direction done/will be done, so:
+// on save it means: "a load will follow this save"
+// on load it means: "a save preceeded this load"
+//
+ctx_xfer:
+ // set context base address
+ mov $r1 0xa04
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r15// MEM_BASE
+ bra not $p1 #ctx_xfer_not_load
+ call #ctx_redswitch
+ ctx_xfer_not_load:
+
+ // strands
+ mov $r1 0x4afc
+ sethi $r1 0x20000
+ mov $r2 0xc
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
+ call #strand_wait
+ mov $r2 0x47fc
+ sethi $r2 0x20000
+ iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
+ xbit $r2 $flags $p1
+ add b32 $r2 3
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
+
+ // mmio context
+ xbit $r10 $flags $p1 // direction
+ or $r10 2 // first
+ mov $r11 0x0000
+ sethi $r11 0x500000
+ ld b32 $r12 D[$r0 + #gpc_id]
+ shl b32 $r12 15
+ add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
+ ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
+ ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
+ mov $r14 0 // not multi
+ call #mmctx_xfer
+
+ // per-TPC mmio context
+ xbit $r10 $flags $p1 // direction
+ or $r10 4 // last
+ mov $r11 0x4000
+ sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
+ ld b32 $r12 D[$r0 + #gpc_id]
+ shl b32 $r12 15
+ add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
+ ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
+ ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
+ ld b32 $r15 D[$r0 + #tpc_mask]
+ mov $r14 0x800 // stride = 0x800
+ call #mmctx_xfer
+
+ // wait for strands to finish
+ call #strand_wait
+
+ // if load, or a save without a load following, do some
+ // unknown stuff that's done after finishing a block of
+ // strand commands
+ bra $p1 #ctx_xfer_post
+ bra not $p2 #ctx_xfer_done
+ ctx_xfer_post:
+ mov $r1 0x4afc
+ sethi $r1 0x20000
+ mov $r2 0xd
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
+ call #strand_wait
+
+ // mark completion in HUB's barrier
+ ctx_xfer_done:
+ call #hub_barrier_done
+ ret
+
+.align 256
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h
new file mode 100644
index 0000000..26c2165
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h
@@ -0,0 +1,530 @@
+uint32_t nve0_grgpc_data[] = {
+/* 0x0000: gpc_id */
+ 0x00000000,
+/* 0x0004: gpc_mmio_list_head */
+ 0x00000000,
+/* 0x0008: gpc_mmio_list_tail */
+ 0x00000000,
+/* 0x000c: tpc_count */
+ 0x00000000,
+/* 0x0010: tpc_mask */
+ 0x00000000,
+/* 0x0014: tpc_mmio_list_head */
+ 0x00000000,
+/* 0x0018: tpc_mmio_list_tail */
+ 0x00000000,
+/* 0x001c: cmd_queue */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+/* 0x0064: chipsets */
+ 0x000000e4,
+ 0x01040080,
+ 0x014c0104,
+ 0x000000e7,
+ 0x01040080,
+ 0x014c0104,
+ 0x00000000,
+/* 0x0080: nve4_gpc_mmio_head */
+ 0x00000380,
+ 0x04000400,
+ 0x0800040c,
+ 0x20000450,
+ 0x00000600,
+ 0x00000684,
+ 0x10000700,
+ 0x00000800,
+ 0x08000808,
+ 0x00000828,
+ 0x00000830,
+ 0x000008d8,
+ 0x000008e0,
+ 0x140008e8,
+ 0x0000091c,
+ 0x08000924,
+ 0x00000b00,
+ 0x14000b08,
+ 0x00000bb8,
+ 0x00000c08,
+ 0x1c000c10,
+ 0x00000c40,
+ 0x00000c6c,
+ 0x00000c80,
+ 0x00000c8c,
+ 0x08001000,
+ 0x00001014,
+ 0x00003024,
+ 0x040030c0,
+ 0x000030e4,
+ 0x14003100,
+ 0x000031d0,
+ 0x040031e0,
+/* 0x0104: nve4_gpc_mmio_tail */
+/* 0x0104: nve4_tpc_mmio_head */
+ 0x00000048,
+ 0x00000064,
+ 0x00000088,
+ 0x14000200,
+ 0x0400021c,
+ 0x00000230,
+ 0x000002c4,
+ 0x08000400,
+ 0x08000420,
+ 0x000004e8,
+ 0x000004f4,
+ 0x0c000604,
+ 0x54000644,
+ 0x040006ac,
+ 0x000006c8,
+ 0x1c000730,
+ 0x00000758,
+ 0x00000778,
+};
+
+uint32_t nve0_grgpc_code[] = {
+ 0x03060ef5,
+/* 0x0004: queue_put */
+ 0x9800d898,
+ 0x86f001d9,
+ 0x0489b808,
+ 0xf00c1bf4,
+ 0x21f502f7,
+ 0x00f802ec,
+/* 0x001c: queue_put_next */
+ 0xb60798c4,
+ 0x8dbb0384,
+ 0x0880b600,
+ 0x80008e80,
+ 0x90b6018f,
+ 0x0f94f001,
+ 0xf801d980,
+/* 0x0039: queue_get */
+ 0x0131f400,
+ 0x9800d898,
+ 0x89b801d9,
+ 0x210bf404,
+ 0xb60789c4,
+ 0x9dbb0394,
+ 0x0890b600,
+ 0x98009e98,
+ 0x80b6019f,
+ 0x0f84f001,
+ 0xf400d880,
+/* 0x0066: queue_get_done */
+ 0x00f80132,
+/* 0x0068: nv_rd32 */
+ 0x0728b7f1,
+ 0xb906b4b6,
+ 0xc9f002ec,
+ 0x00bcd01f,
+/* 0x0078: nv_rd32_wait */
+ 0xc800bccf,
+ 0x1bf41fcc,
+ 0x06a7f0fa,
+ 0x010321f5,
+ 0xf840bfcf,
+/* 0x008d: nv_wr32 */
+ 0x28b7f100,
+ 0x06b4b607,
+ 0xb980bfd0,
+ 0xc9f002ec,
+ 0x1ec9f01f,
+/* 0x00a3: nv_wr32_wait */
+ 0xcf00bcd0,
+ 0xccc800bc,
+ 0xfa1bf41f,
+/* 0x00ae: watchdog_reset */
+ 0x87f100f8,
+ 0x84b60430,
+ 0x1ff9f006,
+ 0xf8008fd0,
+/* 0x00bd: watchdog_clear */
+ 0x3087f100,
+ 0x0684b604,
+ 0xf80080d0,
+/* 0x00c9: wait_donez */
+ 0x3c87f100,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d000,
+ 0x081887f1,
+ 0xd00684b6,
+/* 0x00e2: wait_done_wait_donez */
+ 0x87f1008a,
+ 0x84b60400,
+ 0x0088cf06,
+ 0xf4888aff,
+ 0x87f1f31b,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00099,
+/* 0x0103: wait_doneo */
+ 0xf100f800,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00099f0,
+ 0x87f10089,
+ 0x84b60818,
+ 0x008ad006,
+/* 0x011c: wait_done_wait_doneo */
+ 0x040087f1,
+ 0xcf0684b6,
+ 0x8aff0088,
+ 0xf30bf488,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0099f094,
+ 0xf80089d0,
+/* 0x013d: mmctx_size */
+/* 0x013f: nv_mmctx_size_loop */
+ 0x9894bd00,
+ 0x85b600e8,
+ 0x0180b61a,
+ 0xbb0284b6,
+ 0xe0b60098,
+ 0x04efb804,
+ 0xb9eb1bf4,
+ 0x00f8029f,
+/* 0x015c: mmctx_xfer */
+ 0x083c87f1,
+ 0xbd0684b6,
+ 0x0199f094,
+ 0xf10089d0,
+ 0xb6071087,
+ 0x94bd0684,
+ 0xf405bbfd,
+ 0x8bd0090b,
+ 0x0099f000,
+/* 0x0180: mmctx_base_disabled */
+ 0xf405eefd,
+ 0x8ed00c0b,
+ 0xc08fd080,
+/* 0x018f: mmctx_multi_disabled */
+ 0xb70199f0,
+ 0xc8010080,
+ 0xb4b600ab,
+ 0x0cb9f010,
+ 0xb601aec8,
+ 0xbefd11e4,
+ 0x008bd005,
+/* 0x01a8: mmctx_exec_loop */
+/* 0x01a8: mmctx_wait_free */
+ 0xf0008ecf,
+ 0x0bf41fe4,
+ 0x00ce98fa,
+ 0xd005e9fd,
+ 0xc0b6c08e,
+ 0x04cdb804,
+ 0xc8e81bf4,
+ 0x1bf402ab,
+/* 0x01c9: mmctx_fini_wait */
+ 0x008bcf18,
+ 0xb01fb4f0,
+ 0x1bf410b4,
+ 0x02a7f0f7,
+ 0xf4c921f4,
+/* 0x01de: mmctx_stop */
+ 0xabc81b0e,
+ 0x10b4b600,
+ 0xf00cb9f0,
+ 0x8bd012b9,
+/* 0x01ed: mmctx_stop_wait */
+ 0x008bcf00,
+ 0xf412bbc8,
+/* 0x01f6: mmctx_done */
+ 0x87f1fa1b,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00199,
+/* 0x0207: strand_wait */
+ 0xf900f800,
+ 0x02a7f0a0,
+ 0xfcc921f4,
+/* 0x0213: strand_pre */
+ 0xf100f8a0,
+ 0xf04afc87,
+ 0x97f00283,
+ 0x0089d00c,
+ 0x020721f5,
+/* 0x0226: strand_post */
+ 0x87f100f8,
+ 0x83f04afc,
+ 0x0d97f002,
+ 0xf50089d0,
+ 0xf8020721,
+/* 0x0239: strand_set */
+ 0xfca7f100,
+ 0x02a3f04f,
+ 0x0500aba2,
+ 0xd00fc7f0,
+ 0xc7f000ac,
+ 0x00bcd00b,
+ 0x020721f5,
+ 0xf000aed0,
+ 0xbcd00ac7,
+ 0x0721f500,
+/* 0x0263: strand_ctx_init */
+ 0xf100f802,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00399f0,
+ 0x21f50089,
+ 0xe7f00213,
+ 0x3921f503,
+ 0xfca7f102,
+ 0x02a3f046,
+ 0x0400aba0,
+ 0xf040a0d0,
+ 0xbcd001c7,
+ 0x0721f500,
+ 0x010c9202,
+ 0xf000acd0,
+ 0xbcd002c7,
+ 0x0721f500,
+ 0x2621f502,
+ 0x8087f102,
+ 0x0684b608,
+ 0xb70089cf,
+ 0x95220080,
+/* 0x02ba: ctx_init_strand_loop */
+ 0x8ed008fe,
+ 0x408ed000,
+ 0xb6808acf,
+ 0xa0b606a5,
+ 0x00eabb01,
+ 0xb60480b6,
+ 0x1bf40192,
+ 0x08e4b6e8,
+ 0xf1f2efbc,
+ 0xb6085c87,
+ 0x94bd0684,
+ 0xd00399f0,
+ 0x00f80089,
+/* 0x02ec: error */
+ 0xe7f1e0f9,
+ 0xe3f09814,
+ 0x8d21f440,
+ 0x041ce0b7,
+ 0xf401f7f0,
+ 0xe0fc8d21,
+/* 0x0306: init */
+ 0x04bd00f8,
+ 0xf10004fe,
+ 0xf0120017,
+ 0x12d00227,
+ 0x3e17f100,
+ 0x0010fe04,
+ 0x040017f1,
+ 0xf0c010d0,
+ 0x12d00427,
+ 0x1031f400,
+ 0x060817f1,
+ 0xcf0614b6,
+ 0x37f00012,
+ 0x1f24f001,
+ 0xb60432bb,
+ 0x02800132,
+ 0x04038003,
+ 0x040010b7,
+ 0x800012cf,
+ 0x27f10002,
+ 0x24b60800,
+ 0x0022cf06,
+/* 0x035f: init_find_chipset */
+ 0xb65817f0,
+ 0x13980c10,
+ 0x0432b800,
+ 0xb00b0bf4,
+ 0x1bf40034,
+/* 0x0373: init_context */
+ 0xf100f8f1,
+ 0xb6080027,
+ 0x22cf0624,
+ 0xf134bd40,
+ 0xb6070047,
+ 0x25950644,
+ 0x0045d008,
+ 0xbd4045d0,
+ 0x58f4bde4,
+ 0x1f58021e,
+ 0x020e4003,
+ 0xf5040f40,
+ 0xbb013d21,
+ 0x3fbb002f,
+ 0x041e5800,
+ 0x40051f58,
+ 0x0f400a0e,
+ 0x3d21f50c,
+ 0x030e9801,
+ 0xbb00effd,
+ 0x3ebb002e,
+ 0x0040b700,
+ 0x0235b613,
+ 0xb60043d0,
+ 0x35b60825,
+ 0x0120b606,
+ 0xb60130b6,
+ 0x34b60824,
+ 0x022fb908,
+ 0x026321f5,
+ 0xf1003fbb,
+ 0xb6080017,
+ 0x13d00614,
+ 0x0010b740,
+ 0xf024bd08,
+ 0x12d01f29,
+/* 0x0401: main */
+ 0x0031f400,
+ 0xf00028f4,
+ 0x21f41cd7,
+ 0xf401f439,
+ 0xf404e4b0,
+ 0x81fe1e18,
+ 0x0627f001,
+ 0x12fd20bd,
+ 0x01e4b604,
+ 0xfe051efd,
+ 0x21f50018,
+ 0x0ef404c3,
+/* 0x0431: main_not_ctx_xfer */
+ 0x10ef94d3,
+ 0xf501f5f0,
+ 0xf402ec21,
+/* 0x043e: ih */
+ 0x80f9c60e,
+ 0xf90188fe,
+ 0xf990f980,
+ 0xf9b0f9a0,
+ 0xf9e0f9d0,
+ 0x800acff0,
+ 0xf404abc4,
+ 0xb7f11d0b,
+ 0xd7f01900,
+ 0x40becf1c,
+ 0xf400bfcf,
+ 0xb0b70421,
+ 0xe7f00400,
+ 0x00bed001,
+/* 0x0474: ih_no_fifo */
+ 0xfc400ad0,
+ 0xfce0fcf0,
+ 0xfcb0fcd0,
+ 0xfc90fca0,
+ 0x0088fe80,
+ 0x32f480fc,
+/* 0x048f: hub_barrier_done */
+ 0xf001f800,
+ 0x0e9801f7,
+ 0x04febb00,
+ 0x9418e7f1,
+ 0xf440e3f0,
+ 0x00f88d21,
+/* 0x04a4: ctx_redswitch */
+ 0x0614e7f1,
+ 0xf006e4b6,
+ 0xefd020f7,
+ 0x08f7f000,
+/* 0x04b4: ctx_redswitch_delay */
+ 0xf401f2b6,
+ 0xf7f1fd1b,
+ 0xefd00a20,
+/* 0x04c3: ctx_xfer */
+ 0xf100f800,
+ 0xb60a0417,
+ 0x1fd00614,
+ 0x0711f400,
+ 0x04a421f5,
+/* 0x04d4: ctx_xfer_not_load */
+ 0x4afc17f1,
+ 0xf00213f0,
+ 0x12d00c27,
+ 0x0721f500,
+ 0xfc27f102,
+ 0x0223f047,
+ 0xf00020d0,
+ 0x20b6012c,
+ 0x0012d003,
+ 0xf001acf0,
+ 0xb7f002a5,
+ 0x50b3f000,
+ 0xb6000c98,
+ 0xbcbb0fc4,
+ 0x010c9800,
+ 0xf0020d98,
+ 0x21f500e7,
+ 0xacf0015c,
+ 0x04a5f001,
+ 0x4000b7f1,
+ 0x9850b3f0,
+ 0xc4b6000c,
+ 0x00bcbb0f,
+ 0x98050c98,
+ 0x0f98060d,
+ 0x00e7f104,
+ 0x5c21f508,
+ 0x0721f501,
+ 0x0601f402,
+/* 0x054b: ctx_xfer_post */
+ 0xf11412f4,
+ 0xf04afc17,
+ 0x27f00213,
+ 0x0012d00d,
+ 0x020721f5,
+/* 0x055c: ctx_xfer_done */
+ 0x048f21f5,
+ 0x000000f8,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+};
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc
new file mode 100644
index 0000000..acfc457
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc
@@ -0,0 +1,856 @@
+/* fuc microcode for nvc0 PGRAPH/HUB
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+/* To build:
+ * m4 hubnvc0.fuc | envyas -a -w -m fuc -V fuc3 -o hubnvc0.fuc.h
+ */
+
+.section #nvc0_grhub_data
+include(`nvc0.fuc')
+gpc_count: .b32 0
+rop_count: .b32 0
+cmd_queue: queue_init
+hub_mmio_list_head: .b32 0
+hub_mmio_list_tail: .b32 0
+
+ctx_current: .b32 0
+
+chipsets:
+.b8 0xc0 0 0 0
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
+.b8 0xc1 0 0 0
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc1_hub_mmio_tail
+.b8 0xc3 0 0 0
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
+.b8 0xc4 0 0 0
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
+.b8 0xc8 0 0 0
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
+.b8 0xce 0 0 0
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
+.b8 0xcf 0 0 0
+.b16 #nvc0_hub_mmio_head
+.b16 #nvc0_hub_mmio_tail
+.b8 0xd9 0 0 0
+.b16 #nvd9_hub_mmio_head
+.b16 #nvd9_hub_mmio_tail
+.b8 0 0 0 0
+
+nvc0_hub_mmio_head:
+mmctx_data(0x17e91c, 2)
+mmctx_data(0x400204, 2)
+mmctx_data(0x404004, 11)
+mmctx_data(0x404044, 1)
+mmctx_data(0x404094, 14)
+mmctx_data(0x4040d0, 7)
+mmctx_data(0x4040f8, 1)
+mmctx_data(0x404130, 3)
+mmctx_data(0x404150, 3)
+mmctx_data(0x404164, 2)
+mmctx_data(0x404174, 3)
+mmctx_data(0x404200, 8)
+mmctx_data(0x404404, 14)
+mmctx_data(0x404460, 4)
+mmctx_data(0x404480, 1)
+mmctx_data(0x404498, 1)
+mmctx_data(0x404604, 4)
+mmctx_data(0x404618, 32)
+mmctx_data(0x404698, 21)
+mmctx_data(0x4046f0, 2)
+mmctx_data(0x404700, 22)
+mmctx_data(0x405800, 1)
+mmctx_data(0x405830, 3)
+mmctx_data(0x405854, 1)
+mmctx_data(0x405870, 4)
+mmctx_data(0x405a00, 2)
+mmctx_data(0x405a18, 1)
+mmctx_data(0x406020, 1)
+mmctx_data(0x406028, 4)
+mmctx_data(0x4064a8, 2)
+mmctx_data(0x4064b4, 2)
+mmctx_data(0x407804, 1)
+mmctx_data(0x40780c, 6)
+mmctx_data(0x4078bc, 1)
+mmctx_data(0x408000, 7)
+mmctx_data(0x408064, 1)
+mmctx_data(0x408800, 3)
+mmctx_data(0x408900, 4)
+mmctx_data(0x408980, 1)
+nvc0_hub_mmio_tail:
+mmctx_data(0x4064c0, 2)
+nvc1_hub_mmio_tail:
+
+nvd9_hub_mmio_head:
+mmctx_data(0x17e91c, 2)
+mmctx_data(0x400204, 2)
+mmctx_data(0x404004, 10)
+mmctx_data(0x404044, 1)
+mmctx_data(0x404094, 14)
+mmctx_data(0x4040d0, 7)
+mmctx_data(0x4040f8, 1)
+mmctx_data(0x404130, 3)
+mmctx_data(0x404150, 3)
+mmctx_data(0x404164, 2)
+mmctx_data(0x404178, 2)
+mmctx_data(0x404200, 8)
+mmctx_data(0x404404, 14)
+mmctx_data(0x404460, 4)
+mmctx_data(0x404480, 1)
+mmctx_data(0x404498, 1)
+mmctx_data(0x404604, 4)
+mmctx_data(0x404618, 32)
+mmctx_data(0x404698, 21)
+mmctx_data(0x4046f0, 2)
+mmctx_data(0x404700, 22)
+mmctx_data(0x405800, 1)
+mmctx_data(0x405830, 3)
+mmctx_data(0x405854, 1)
+mmctx_data(0x405870, 4)
+mmctx_data(0x405a00, 2)
+mmctx_data(0x405a18, 1)
+mmctx_data(0x406020, 1)
+mmctx_data(0x406028, 4)
+mmctx_data(0x4064a8, 2)
+mmctx_data(0x4064b4, 5)
+mmctx_data(0x407804, 1)
+mmctx_data(0x40780c, 6)
+mmctx_data(0x4078bc, 1)
+mmctx_data(0x408000, 7)
+mmctx_data(0x408064, 1)
+mmctx_data(0x408800, 3)
+mmctx_data(0x408900, 4)
+mmctx_data(0x408980, 1)
+nvd9_hub_mmio_tail:
+
+.align 256
+chan_data:
+chan_mmio_count: .b32 0
+chan_mmio_address: .b32 0
+
+.align 256
+xfer_data: .b32 0
+
+.section #nvc0_grhub_code
+bra #init
+define(`include_code')
+include(`nvc0.fuc')
+
+// reports an exception to the host
+//
+// In: $r15 error code (see nvc0.fuc)
+//
+error:
+ push $r14
+ mov $r14 0x814
+ shl b32 $r14 6
+ iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
+ mov $r14 0xc1c
+ shl b32 $r14 6
+ mov $r15 1
+ iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
+ pop $r14
+ ret
+
+// HUB fuc initialisation, executed by triggering ucode start, will
+// fall through to main loop after completion.
+//
+// Input:
+// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
+//
+// Output:
+// CC_SCRATCH[0]:
+// 31:31: set to signal completion
+// CC_SCRATCH[1]:
+// 31:0: total PGRAPH context size
+//
+init:
+ clear b32 $r0
+ mov $sp $r0
+ mov $xdbase $r0
+
+ // enable fifo access
+ mov $r1 0x1200
+ mov $r2 2
+ iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
+
+ // setup i0 handler, and route all interrupts to it
+ mov $r1 #ih
+ mov $iv0 $r1
+ mov $r1 0x400
+ iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
+
+ // route HUB_CHANNEL_SWITCH to fuc interrupt 8
+ mov $r3 0x404
+ shl b32 $r3 6
+ mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
+ iowr I[$r3 + 0x000] $r2
+
+ // not sure what these are, route them because NVIDIA does, and
+ // the IRQ handler will signal the host if we ever get one.. we
+ // may find out if/why we need to handle these if so..
+ //
+ mov $r2 0x2004
+ iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
+ mov $r2 0x200b
+ iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
+ mov $r2 0x200c
+ iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
+
+ // enable all INTR_UP interrupts
+ mov $r2 0xc24
+ shl b32 $r2 6
+ not b32 $r3 $r0
+ iowr I[$r2] $r3
+
+ // enable fifo, ctxsw, 9, 10, 15 interrupts
+ mov $r2 -0x78fc // 0x8704
+ sethi $r2 0
+ iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
+
+ // fifo level triggered, rest edge
+ sub b32 $r1 0x100
+ mov $r2 4
+ iowr I[$r1] $r2
+
+ // enable interrupts
+ bset $flags ie0
+
+ // fetch enabled GPC/ROP counts
+ mov $r14 -0x69fc // 0x409604
+ sethi $r14 0x400000
+ call #nv_rd32
+ extr $r1 $r15 16:20
+ st b32 D[$r0 + #rop_count] $r1
+ and $r15 0x1f
+ st b32 D[$r0 + #gpc_count] $r15
+
+ // set BAR_REQMASK to GPC mask
+ mov $r1 1
+ shl b32 $r1 $r15
+ sub b32 $r1 1
+ mov $r2 0x40c
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r1
+ iowr I[$r2 + 0x100] $r1
+
+ // find context data for this chipset
+ mov $r2 0x800
+ shl b32 $r2 6
+ iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
+ mov $r15 #chipsets - 8
+ init_find_chipset:
+ add b32 $r15 8
+ ld b32 $r3 D[$r15 + 0x00]
+ cmpu b32 $r3 $r2
+ bra e #init_context
+ cmpu b32 $r3 0
+ bra ne #init_find_chipset
+ // unknown chipset
+ ret
+
+ // context size calculation, reserve first 256 bytes for use by fuc
+ init_context:
+ mov $r1 256
+
+ // calculate size of mmio context data
+ ld b16 $r14 D[$r15 + 4]
+ ld b16 $r15 D[$r15 + 6]
+ sethi $r14 0
+ st b32 D[$r0 + #hub_mmio_list_head] $r14
+ st b32 D[$r0 + #hub_mmio_list_tail] $r15
+ call #mmctx_size
+
+ // set mmctx base addresses now so we don't have to do it later,
+ // they don't (currently) ever change
+ mov $r3 0x700
+ shl b32 $r3 6
+ shr b32 $r4 $r1 8
+ iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
+ iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
+ add b32 $r3 0x1300
+ add b32 $r1 $r15
+ shr b32 $r15 2
+ iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
+
+ // strands, base offset needs to be aligned to 256 bytes
+ shr b32 $r1 8
+ add b32 $r1 1
+ shl b32 $r1 8
+ mov b32 $r15 $r1
+ call #strand_ctx_init
+ add b32 $r1 $r15
+
+ // initialise each GPC in sequence by passing in the offset of its
+ // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
+ // has previously been uploaded by the host) running.
+ //
+ // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
+ // when it has completed, and return the size of its context data
+ // in GPCn_CC_SCRATCH[1]
+ //
+ ld b32 $r3 D[$r0 + #gpc_count]
+ mov $r4 0x2000
+ sethi $r4 0x500000
+ init_gpc:
+ // setup, and start GPC ucode running
+ add b32 $r14 $r4 0x804
+ mov b32 $r15 $r1
+ call #nv_wr32 // CC_SCRATCH[1] = ctx offset
+ add b32 $r14 $r4 0x800
+ mov b32 $r15 $r2
+ call #nv_wr32 // CC_SCRATCH[0] = chipset
+ add b32 $r14 $r4 0x10c
+ clear b32 $r15
+ call #nv_wr32
+ add b32 $r14 $r4 0x104
+ call #nv_wr32 // ENTRY
+ add b32 $r14 $r4 0x100
+ mov $r15 2 // CTRL_START_TRIGGER
+ call #nv_wr32 // CTRL
+
+ // wait for it to complete, and adjust context size
+ add b32 $r14 $r4 0x800
+ init_gpc_wait:
+ call #nv_rd32
+ xbit $r15 $r15 31
+ bra e #init_gpc_wait
+ add b32 $r14 $r4 0x804
+ call #nv_rd32
+ add b32 $r1 $r15
+
+ // next!
+ add b32 $r4 0x8000
+ sub b32 $r3 1
+ bra ne #init_gpc
+
+ // save context size, and tell host we're ready
+ mov $r2 0x800
+ shl b32 $r2 6
+ iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
+ add b32 $r2 0x800
+ clear b32 $r1
+ bset $r1 31
+ iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
+
+// Main program loop, very simple, sleeps until woken up by the interrupt
+// handler, pulls a command from the queue and executes its handler
+//
+main:
+ // sleep until we have something to do
+ bset $flags $p0
+ sleep $p0
+ mov $r13 #cmd_queue
+ call #queue_get
+ bra $p1 #main
+
+ // context switch, requested by GPU?
+ cmpu b32 $r14 0x4001
+ bra ne #main_not_ctx_switch
+ trace_set(T_AUTO)
+ mov $r1 0xb00
+ shl b32 $r1 6
+ iord $r2 I[$r1 + 0x100] // CHAN_NEXT
+ iord $r1 I[$r1 + 0x000] // CHAN_CUR
+
+ xbit $r3 $r1 31
+ bra e #chsw_no_prev
+ xbit $r3 $r2 31
+ bra e #chsw_prev_no_next
+ push $r2
+ mov b32 $r2 $r1
+ trace_set(T_SAVE)
+ bclr $flags $p1
+ bset $flags $p2
+ call #ctx_xfer
+ trace_clr(T_SAVE);
+ pop $r2
+ trace_set(T_LOAD);
+ bset $flags $p1
+ call #ctx_xfer
+ trace_clr(T_LOAD);
+ bra #chsw_done
+ chsw_prev_no_next:
+ push $r2
+ mov b32 $r2 $r1
+ bclr $flags $p1
+ bclr $flags $p2
+ call #ctx_xfer
+ pop $r2
+ mov $r1 0xb00
+ shl b32 $r1 6
+ iowr I[$r1] $r2
+ bra #chsw_done
+ chsw_no_prev:
+ xbit $r3 $r2 31
+ bra e #chsw_done
+ bset $flags $p1
+ bclr $flags $p2
+ call #ctx_xfer
+
+ // ack the context switch request
+ chsw_done:
+ mov $r1 0xb0c
+ shl b32 $r1 6
+ mov $r2 1
+ iowr I[$r1 + 0x000] $r2 // 0x409b0c
+ trace_clr(T_AUTO)
+ bra #main
+
+ // request to set current channel? (*not* a context switch)
+ main_not_ctx_switch:
+ cmpu b32 $r14 0x0001
+ bra ne #main_not_ctx_chan
+ mov b32 $r2 $r15
+ call #ctx_chan
+ bra #main_done
+
+ // request to store current channel context?
+ main_not_ctx_chan:
+ cmpu b32 $r14 0x0002
+ bra ne #main_not_ctx_save
+ trace_set(T_SAVE)
+ bclr $flags $p1
+ bclr $flags $p2
+ call #ctx_xfer
+ trace_clr(T_SAVE)
+ bra #main_done
+
+ main_not_ctx_save:
+ shl b32 $r15 $r14 16
+ or $r15 E_BAD_COMMAND
+ call #error
+ bra #main
+
+ main_done:
+ mov $r1 0x820
+ shl b32 $r1 6
+ clear b32 $r2
+ bset $r2 31
+ iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
+ bra #main
+
+// interrupt handler
+ih:
+ push $r8
+ mov $r8 $flags
+ push $r8
+ push $r9
+ push $r10
+ push $r11
+ push $r13
+ push $r14
+ push $r15
+
+ // incoming fifo command?
+ iord $r10 I[$r0 + 0x200] // INTR
+ and $r11 $r10 0x00000004
+ bra e #ih_no_fifo
+ // queue incoming fifo command for later processing
+ mov $r11 0x1900
+ mov $r13 #cmd_queue
+ iord $r14 I[$r11 + 0x100] // FIFO_CMD
+ iord $r15 I[$r11 + 0x000] // FIFO_DATA
+ call #queue_put
+ add b32 $r11 0x400
+ mov $r14 1
+ iowr I[$r11 + 0x000] $r14 // FIFO_ACK
+
+ // context switch request?
+ ih_no_fifo:
+ and $r11 $r10 0x00000100
+ bra e #ih_no_ctxsw
+ // enqueue a context switch for later processing
+ mov $r13 #cmd_queue
+ mov $r14 0x4001
+ call #queue_put
+
+ // anything we didn't handle, bring it to the host's attention
+ ih_no_ctxsw:
+ mov $r11 0x104
+ not b32 $r11
+ and $r11 $r10 $r11
+ bra e #ih_no_other
+ mov $r10 0xc1c
+ shl b32 $r10 6
+ iowr I[$r10] $r11 // INTR_UP_SET
+
+ // ack, and wake up main()
+ ih_no_other:
+ iowr I[$r0 + 0x100] $r10 // INTR_ACK
+
+ pop $r15
+ pop $r14
+ pop $r13
+ pop $r11
+ pop $r10
+ pop $r9
+ pop $r8
+ mov $flags $r8
+ pop $r8
+ bclr $flags $p0
+ iret
+
+// Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
+ctx_4160s:
+ mov $r14 0x4160
+ sethi $r14 0x400000
+ mov $r15 1
+ call #nv_wr32
+ ctx_4160s_wait:
+ call #nv_rd32
+ xbit $r15 $r15 4
+ bra e #ctx_4160s_wait
+ ret
+
+// Without clearing again at end of xfer, some things cause PGRAPH
+// to hang with STATUS=0x00000007 until it's cleared.. fbcon can
+// still function with it set however...
+ctx_4160c:
+ mov $r14 0x4160
+ sethi $r14 0x400000
+ clear b32 $r15
+ call #nv_wr32
+ ret
+
+// Again, not real sure
+//
+// In: $r15 value to set 0x404170 to
+//
+ctx_4170s:
+ mov $r14 0x4170
+ sethi $r14 0x400000
+ or $r15 0x10
+ call #nv_wr32
+ ret
+
+// Waits for a ctx_4170s() call to complete
+//
+ctx_4170w:
+ mov $r14 0x4170
+ sethi $r14 0x400000
+ call #nv_rd32
+ and $r15 0x10
+ bra ne #ctx_4170w
+ ret
+
+// Disables various things, waits a bit, and re-enables them..
+//
+// Not sure how exactly this helps, perhaps "ENABLE" is not such a
+// good description for the bits we turn off? Anyways, without this,
+// funny things happen.
+//
+ctx_redswitch:
+ mov $r14 0x614
+ shl b32 $r14 6
+ mov $r15 0x270
+ iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
+ mov $r15 8
+ ctx_redswitch_delay:
+ sub b32 $r15 1
+ bra ne #ctx_redswitch_delay
+ mov $r15 0x770
+ iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
+ ret
+
+// Not a clue what this is for, except that unless the value is 0x10, the
+// strand context is saved (and presumably restored) incorrectly..
+//
+// In: $r15 value to set to (0x00/0x10 are used)
+//
+ctx_86c:
+ mov $r14 0x86c
+ shl b32 $r14 6
+ iowr I[$r14] $r15 // HUB(0x86c) = val
+ mov $r14 -0x75ec
+ sethi $r14 0x400000
+ call #nv_wr32 // ROP(0xa14) = val
+ mov $r14 -0x5794
+ sethi $r14 0x410000
+ call #nv_wr32 // GPC(0x86c) = val
+ ret
+
+// ctx_load - load's a channel's ctxctl data, and selects its vm
+//
+// In: $r2 channel address
+//
+ctx_load:
+ trace_set(T_CHAN)
+
+ // switch to channel, somewhat magic in parts..
+ mov $r10 12 // DONE_UNK12
+ call #wait_donez
+ mov $r1 0xa24
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r0 // 0x409a24
+ mov $r3 0xb00
+ shl b32 $r3 6
+ iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
+ mov $r1 0xa0c
+ shl b32 $r1 6
+ mov $r4 7
+ iowr I[$r1 + 0x000] $r2 // MEM_CHAN
+ iowr I[$r1 + 0x100] $r4 // MEM_CMD
+ ctx_chan_wait_0:
+ iord $r4 I[$r1 + 0x100]
+ and $r4 0x1f
+ bra ne #ctx_chan_wait_0
+ iowr I[$r3 + 0x000] $r2 // CHAN_CUR
+
+ // load channel header, fetch PGRAPH context pointer
+ mov $xtargets $r0
+ bclr $r2 31
+ shl b32 $r2 4
+ add b32 $r2 2
+
+ trace_set(T_LCHAN)
+ mov $r1 0xa04
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r2 // MEM_BASE
+ mov $r1 0xa20
+ shl b32 $r1 6
+ mov $r2 0x0002
+ sethi $r2 0x80000000
+ iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
+ mov $r1 0x10 // chan + 0x0210
+ mov $r2 #xfer_data
+ sethi $r2 0x00020000 // 16 bytes
+ xdld $r1 $r2
+ xdwait
+ trace_clr(T_LCHAN)
+
+ // update current context
+ ld b32 $r1 D[$r0 + #xfer_data + 4]
+ shl b32 $r1 24
+ ld b32 $r2 D[$r0 + #xfer_data + 0]
+ shr b32 $r2 8
+ or $r1 $r2
+ st b32 D[$r0 + #ctx_current] $r1
+
+ // set transfer base to start of context, and fetch context header
+ trace_set(T_LCTXH)
+ mov $r2 0xa04
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r1 // MEM_BASE
+ mov $r2 1
+ mov $r1 0xa20
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
+ mov $r1 #chan_data
+ sethi $r1 0x00060000 // 256 bytes
+ xdld $r0 $r1
+ xdwait
+ trace_clr(T_LCTXH)
+
+ trace_clr(T_CHAN)
+ ret
+
+// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
+// the active channel for ctxctl, but not actually transfer
+// any context data. intended for use only during initial
+// context construction.
+//
+// In: $r2 channel address
+//
+ctx_chan:
+ call #ctx_4160s
+ call #ctx_load
+ mov $r10 12 // DONE_UNK12
+ call #wait_donez
+ mov $r1 0xa10
+ shl b32 $r1 6
+ mov $r2 5
+ iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
+ ctx_chan_wait:
+ iord $r2 I[$r1 + 0x000]
+ or $r2 $r2
+ bra ne #ctx_chan_wait
+ call #ctx_4160c
+ ret
+
+// Execute per-context state overrides list
+//
+// Only executed on the first load of a channel. Might want to look into
+// removing this and having the host directly modify the channel's context
+// to change this state... The nouveau DRM already builds this list as
+// it's definitely needed for NVIDIA's, so we may as well use it for now
+//
+// Input: $r1 mmio list length
+//
+ctx_mmio_exec:
+ // set transfer base to be the mmio list
+ ld b32 $r3 D[$r0 + #chan_mmio_address]
+ mov $r2 0xa04
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r3 // MEM_BASE
+
+ clear b32 $r3
+ ctx_mmio_loop:
+ // fetch next 256 bytes of mmio list if necessary
+ and $r4 $r3 0xff
+ bra ne #ctx_mmio_pull
+ mov $r5 #xfer_data
+ sethi $r5 0x00060000 // 256 bytes
+ xdld $r3 $r5
+ xdwait
+
+ // execute a single list entry
+ ctx_mmio_pull:
+ ld b32 $r14 D[$r4 + #xfer_data + 0x00]
+ ld b32 $r15 D[$r4 + #xfer_data + 0x04]
+ call #nv_wr32
+
+ // next!
+ add b32 $r3 8
+ sub b32 $r1 1
+ bra ne #ctx_mmio_loop
+
+ // set transfer base back to the current context
+ ctx_mmio_done:
+ ld b32 $r3 D[$r0 + #ctx_current]
+ iowr I[$r2 + 0x000] $r3 // MEM_BASE
+
+ // disable the mmio list now, we don't need/want to execute it again
+ st b32 D[$r0 + #chan_mmio_count] $r0
+ mov $r1 #chan_data
+ sethi $r1 0x00060000 // 256 bytes
+ xdst $r0 $r1
+ xdwait
+ ret
+
+// Transfer HUB context data between GPU and storage area
+//
+// In: $r2 channel address
+// $p1 clear on save, set on load
+// $p2 set if opposite direction done/will be done, so:
+// on save it means: "a load will follow this save"
+// on load it means: "a save preceeded this load"
+//
+ctx_xfer:
+ bra not $p1 #ctx_xfer_pre
+ bra $p2 #ctx_xfer_pre_load
+ ctx_xfer_pre:
+ mov $r15 0x10
+ call #ctx_86c
+ call #ctx_4160s
+ bra not $p1 #ctx_xfer_exec
+
+ ctx_xfer_pre_load:
+ mov $r15 2
+ call #ctx_4170s
+ call #ctx_4170w
+ call #ctx_redswitch
+ clear b32 $r15
+ call #ctx_4170s
+ call #ctx_load
+
+ // fetch context pointer, and initiate xfer on all GPCs
+ ctx_xfer_exec:
+ ld b32 $r1 D[$r0 + #ctx_current]
+ mov $r2 0x414
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
+ mov $r14 -0x5b00
+ sethi $r14 0x410000
+ mov b32 $r15 $r1
+ call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
+ add b32 $r14 4
+ xbit $r15 $flags $p1
+ xbit $r2 $flags $p2
+ shl b32 $r2 1
+ or $r15 $r2
+ call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
+
+ // strands
+ mov $r1 0x4afc
+ sethi $r1 0x20000
+ mov $r2 0xc
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
+ call #strand_wait
+ mov $r2 0x47fc
+ sethi $r2 0x20000
+ iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
+ xbit $r2 $flags $p1
+ add b32 $r2 3
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
+
+ // mmio context
+ xbit $r10 $flags $p1 // direction
+ or $r10 6 // first, last
+ mov $r11 0 // base = 0
+ ld b32 $r12 D[$r0 + #hub_mmio_list_head]
+ ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
+ mov $r14 0 // not multi
+ call #mmctx_xfer
+
+ // wait for GPCs to all complete
+ mov $r10 8 // DONE_BAR
+ call #wait_doneo
+
+ // wait for strand xfer to complete
+ call #strand_wait
+
+ // post-op
+ bra $p1 #ctx_xfer_post
+ mov $r10 12 // DONE_UNK12
+ call #wait_donez
+ mov $r1 0xa10
+ shl b32 $r1 6
+ mov $r2 5
+ iowr I[$r1] $r2 // MEM_CMD
+ ctx_xfer_post_save_wait:
+ iord $r2 I[$r1]
+ or $r2 $r2
+ bra ne #ctx_xfer_post_save_wait
+
+ bra $p2 #ctx_xfer_done
+ ctx_xfer_post:
+ mov $r15 2
+ call #ctx_4170s
+ clear b32 $r15
+ call #ctx_86c
+ call #strand_post
+ call #ctx_4170w
+ clear b32 $r15
+ call #ctx_4170s
+
+ bra not $p1 #ctx_xfer_no_post_mmio
+ ld b32 $r1 D[$r0 + #chan_mmio_count]
+ or $r1 $r1
+ bra e #ctx_xfer_no_post_mmio
+ call #ctx_mmio_exec
+
+ ctx_xfer_no_post_mmio:
+ call #ctx_4160c
+
+ ctx_xfer_done:
+ ret
+
+.align 256
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h
new file mode 100644
index 0000000..85a8d55
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h
@@ -0,0 +1,927 @@
+uint32_t nvc0_grhub_data[] = {
+/* 0x0000: gpc_count */
+ 0x00000000,
+/* 0x0004: rop_count */
+ 0x00000000,
+/* 0x0008: cmd_queue */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+/* 0x0050: hub_mmio_list_head */
+ 0x00000000,
+/* 0x0054: hub_mmio_list_tail */
+ 0x00000000,
+/* 0x0058: ctx_current */
+ 0x00000000,
+/* 0x005c: chipsets */
+ 0x000000c0,
+ 0x013c00a0,
+ 0x000000c1,
+ 0x014000a0,
+ 0x000000c3,
+ 0x013c00a0,
+ 0x000000c4,
+ 0x013c00a0,
+ 0x000000c8,
+ 0x013c00a0,
+ 0x000000ce,
+ 0x013c00a0,
+ 0x000000cf,
+ 0x013c00a0,
+ 0x000000d9,
+ 0x01dc0140,
+ 0x00000000,
+/* 0x00a0: nvc0_hub_mmio_head */
+ 0x0417e91c,
+ 0x04400204,
+ 0x28404004,
+ 0x00404044,
+ 0x34404094,
+ 0x184040d0,
+ 0x004040f8,
+ 0x08404130,
+ 0x08404150,
+ 0x04404164,
+ 0x08404174,
+ 0x1c404200,
+ 0x34404404,
+ 0x0c404460,
+ 0x00404480,
+ 0x00404498,
+ 0x0c404604,
+ 0x7c404618,
+ 0x50404698,
+ 0x044046f0,
+ 0x54404700,
+ 0x00405800,
+ 0x08405830,
+ 0x00405854,
+ 0x0c405870,
+ 0x04405a00,
+ 0x00405a18,
+ 0x00406020,
+ 0x0c406028,
+ 0x044064a8,
+ 0x044064b4,
+ 0x00407804,
+ 0x1440780c,
+ 0x004078bc,
+ 0x18408000,
+ 0x00408064,
+ 0x08408800,
+ 0x0c408900,
+ 0x00408980,
+/* 0x013c: nvc0_hub_mmio_tail */
+ 0x044064c0,
+/* 0x0140: nvc1_hub_mmio_tail */
+/* 0x0140: nvd9_hub_mmio_head */
+ 0x0417e91c,
+ 0x04400204,
+ 0x24404004,
+ 0x00404044,
+ 0x34404094,
+ 0x184040d0,
+ 0x004040f8,
+ 0x08404130,
+ 0x08404150,
+ 0x04404164,
+ 0x04404178,
+ 0x1c404200,
+ 0x34404404,
+ 0x0c404460,
+ 0x00404480,
+ 0x00404498,
+ 0x0c404604,
+ 0x7c404618,
+ 0x50404698,
+ 0x044046f0,
+ 0x54404700,
+ 0x00405800,
+ 0x08405830,
+ 0x00405854,
+ 0x0c405870,
+ 0x04405a00,
+ 0x00405a18,
+ 0x00406020,
+ 0x0c406028,
+ 0x044064a8,
+ 0x104064b4,
+ 0x00407804,
+ 0x1440780c,
+ 0x004078bc,
+ 0x18408000,
+ 0x00408064,
+ 0x08408800,
+ 0x0c408900,
+ 0x00408980,
+/* 0x01dc: nvd9_hub_mmio_tail */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+/* 0x0200: chan_data */
+/* 0x0200: chan_mmio_count */
+ 0x00000000,
+/* 0x0204: chan_mmio_address */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+/* 0x0300: xfer_data */
+ 0x00000000,
+};
+
+uint32_t nvc0_grhub_code[] = {
+ 0x03090ef5,
+/* 0x0004: queue_put */
+ 0x9800d898,
+ 0x86f001d9,
+ 0x0489b808,
+ 0xf00c1bf4,
+ 0x21f502f7,
+ 0x00f802ec,
+/* 0x001c: queue_put_next */
+ 0xb60798c4,
+ 0x8dbb0384,
+ 0x0880b600,
+ 0x80008e80,
+ 0x90b6018f,
+ 0x0f94f001,
+ 0xf801d980,
+/* 0x0039: queue_get */
+ 0x0131f400,
+ 0x9800d898,
+ 0x89b801d9,
+ 0x210bf404,
+ 0xb60789c4,
+ 0x9dbb0394,
+ 0x0890b600,
+ 0x98009e98,
+ 0x80b6019f,
+ 0x0f84f001,
+ 0xf400d880,
+/* 0x0066: queue_get_done */
+ 0x00f80132,
+/* 0x0068: nv_rd32 */
+ 0x0728b7f1,
+ 0xb906b4b6,
+ 0xc9f002ec,
+ 0x00bcd01f,
+/* 0x0078: nv_rd32_wait */
+ 0xc800bccf,
+ 0x1bf41fcc,
+ 0x06a7f0fa,
+ 0x010321f5,
+ 0xf840bfcf,
+/* 0x008d: nv_wr32 */
+ 0x28b7f100,
+ 0x06b4b607,
+ 0xb980bfd0,
+ 0xc9f002ec,
+ 0x1ec9f01f,
+/* 0x00a3: nv_wr32_wait */
+ 0xcf00bcd0,
+ 0xccc800bc,
+ 0xfa1bf41f,
+/* 0x00ae: watchdog_reset */
+ 0x87f100f8,
+ 0x84b60430,
+ 0x1ff9f006,
+ 0xf8008fd0,
+/* 0x00bd: watchdog_clear */
+ 0x3087f100,
+ 0x0684b604,
+ 0xf80080d0,
+/* 0x00c9: wait_donez */
+ 0x3c87f100,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d000,
+ 0x081887f1,
+ 0xd00684b6,
+/* 0x00e2: wait_done_wait_donez */
+ 0x87f1008a,
+ 0x84b60400,
+ 0x0088cf06,
+ 0xf4888aff,
+ 0x87f1f31b,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00099,
+/* 0x0103: wait_doneo */
+ 0xf100f800,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00099f0,
+ 0x87f10089,
+ 0x84b60818,
+ 0x008ad006,
+/* 0x011c: wait_done_wait_doneo */
+ 0x040087f1,
+ 0xcf0684b6,
+ 0x8aff0088,
+ 0xf30bf488,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0099f094,
+ 0xf80089d0,
+/* 0x013d: mmctx_size */
+/* 0x013f: nv_mmctx_size_loop */
+ 0x9894bd00,
+ 0x85b600e8,
+ 0x0180b61a,
+ 0xbb0284b6,
+ 0xe0b60098,
+ 0x04efb804,
+ 0xb9eb1bf4,
+ 0x00f8029f,
+/* 0x015c: mmctx_xfer */
+ 0x083c87f1,
+ 0xbd0684b6,
+ 0x0199f094,
+ 0xf10089d0,
+ 0xb6071087,
+ 0x94bd0684,
+ 0xf405bbfd,
+ 0x8bd0090b,
+ 0x0099f000,
+/* 0x0180: mmctx_base_disabled */
+ 0xf405eefd,
+ 0x8ed00c0b,
+ 0xc08fd080,
+/* 0x018f: mmctx_multi_disabled */
+ 0xb70199f0,
+ 0xc8010080,
+ 0xb4b600ab,
+ 0x0cb9f010,
+ 0xb601aec8,
+ 0xbefd11e4,
+ 0x008bd005,
+/* 0x01a8: mmctx_exec_loop */
+/* 0x01a8: mmctx_wait_free */
+ 0xf0008ecf,
+ 0x0bf41fe4,
+ 0x00ce98fa,
+ 0xd005e9fd,
+ 0xc0b6c08e,
+ 0x04cdb804,
+ 0xc8e81bf4,
+ 0x1bf402ab,
+/* 0x01c9: mmctx_fini_wait */
+ 0x008bcf18,
+ 0xb01fb4f0,
+ 0x1bf410b4,
+ 0x02a7f0f7,
+ 0xf4c921f4,
+/* 0x01de: mmctx_stop */
+ 0xabc81b0e,
+ 0x10b4b600,
+ 0xf00cb9f0,
+ 0x8bd012b9,
+/* 0x01ed: mmctx_stop_wait */
+ 0x008bcf00,
+ 0xf412bbc8,
+/* 0x01f6: mmctx_done */
+ 0x87f1fa1b,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00199,
+/* 0x0207: strand_wait */
+ 0xf900f800,
+ 0x02a7f0a0,
+ 0xfcc921f4,
+/* 0x0213: strand_pre */
+ 0xf100f8a0,
+ 0xf04afc87,
+ 0x97f00283,
+ 0x0089d00c,
+ 0x020721f5,
+/* 0x0226: strand_post */
+ 0x87f100f8,
+ 0x83f04afc,
+ 0x0d97f002,
+ 0xf50089d0,
+ 0xf8020721,
+/* 0x0239: strand_set */
+ 0xfca7f100,
+ 0x02a3f04f,
+ 0x0500aba2,
+ 0xd00fc7f0,
+ 0xc7f000ac,
+ 0x00bcd00b,
+ 0x020721f5,
+ 0xf000aed0,
+ 0xbcd00ac7,
+ 0x0721f500,
+/* 0x0263: strand_ctx_init */
+ 0xf100f802,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00399f0,
+ 0x21f50089,
+ 0xe7f00213,
+ 0x3921f503,
+ 0xfca7f102,
+ 0x02a3f046,
+ 0x0400aba0,
+ 0xf040a0d0,
+ 0xbcd001c7,
+ 0x0721f500,
+ 0x010c9202,
+ 0xf000acd0,
+ 0xbcd002c7,
+ 0x0721f500,
+ 0x2621f502,
+ 0x8087f102,
+ 0x0684b608,
+ 0xb70089cf,
+ 0x95220080,
+/* 0x02ba: ctx_init_strand_loop */
+ 0x8ed008fe,
+ 0x408ed000,
+ 0xb6808acf,
+ 0xa0b606a5,
+ 0x00eabb01,
+ 0xb60480b6,
+ 0x1bf40192,
+ 0x08e4b6e8,
+ 0xf1f2efbc,
+ 0xb6085c87,
+ 0x94bd0684,
+ 0xd00399f0,
+ 0x00f80089,
+/* 0x02ec: error */
+ 0xe7f1e0f9,
+ 0xe4b60814,
+ 0x00efd006,
+ 0x0c1ce7f1,
+ 0xf006e4b6,
+ 0xefd001f7,
+ 0xf8e0fc00,
+/* 0x0309: init */
+ 0xfe04bd00,
+ 0x07fe0004,
+ 0x0017f100,
+ 0x0227f012,
+ 0xf10012d0,
+ 0xfe05b917,
+ 0x17f10010,
+ 0x10d00400,
+ 0x0437f1c0,
+ 0x0634b604,
+ 0x200327f1,
+ 0xf10032d0,
+ 0xd0200427,
+ 0x27f10132,
+ 0x32d0200b,
+ 0x0c27f102,
+ 0x0732d020,
+ 0x0c2427f1,
+ 0xb90624b6,
+ 0x23d00003,
+ 0x0427f100,
+ 0x0023f087,
+ 0xb70012d0,
+ 0xf0010012,
+ 0x12d00427,
+ 0x1031f400,
+ 0x9604e7f1,
+ 0xf440e3f0,
+ 0xf1c76821,
+ 0x01018090,
+ 0x801ff4f0,
+ 0x17f0000f,
+ 0x041fbb01,
+ 0xf10112b6,
+ 0xb6040c27,
+ 0x21d00624,
+ 0x4021d000,
+ 0x080027f1,
+ 0xcf0624b6,
+ 0xf7f00022,
+/* 0x03a9: init_find_chipset */
+ 0x08f0b654,
+ 0xb800f398,
+ 0x0bf40432,
+ 0x0034b00b,
+ 0xf8f11bf4,
+/* 0x03bd: init_context */
+ 0x0017f100,
+ 0x02fe5801,
+ 0xf003ff58,
+ 0x0e8000e3,
+ 0x150f8014,
+ 0x013d21f5,
+ 0x070037f1,
+ 0x950634b6,
+ 0x34d00814,
+ 0x4034d000,
+ 0x130030b7,
+ 0xb6001fbb,
+ 0x3fd002f5,
+ 0x0815b600,
+ 0xb60110b6,
+ 0x1fb90814,
+ 0x6321f502,
+ 0x001fbb02,
+ 0xf1000398,
+ 0xf0200047,
+/* 0x040e: init_gpc */
+ 0x4ea05043,
+ 0x1fb90804,
+ 0x8d21f402,
+ 0x08004ea0,
+ 0xf4022fb9,
+ 0x4ea08d21,
+ 0xf4bd010c,
+ 0xa08d21f4,
+ 0xf401044e,
+ 0x4ea08d21,
+ 0xf7f00100,
+ 0x8d21f402,
+ 0x08004ea0,
+/* 0x0440: init_gpc_wait */
+ 0xc86821f4,
+ 0x0bf41fff,
+ 0x044ea0fa,
+ 0x6821f408,
+ 0xb7001fbb,
+ 0xb6800040,
+ 0x1bf40132,
+ 0x0027f1b4,
+ 0x0624b608,
+ 0xb74021d0,
+ 0xbd080020,
+ 0x1f19f014,
+/* 0x0473: main */
+ 0xf40021d0,
+ 0x28f40031,
+ 0x08d7f000,
+ 0xf43921f4,
+ 0xe4b1f401,
+ 0x1bf54001,
+ 0x87f100d1,
+ 0x84b6083c,
+ 0xf094bd06,
+ 0x89d00499,
+ 0x0017f100,
+ 0x0614b60b,
+ 0xcf4012cf,
+ 0x13c80011,
+ 0x7e0bf41f,
+ 0xf41f23c8,
+ 0x20f95a0b,
+ 0xf10212b9,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00799f0,
+ 0x32f40089,
+ 0x0231f401,
+ 0x082921f5,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0799f094,
+ 0xfc0089d0,
+ 0x3c87f120,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d006,
+ 0xf50131f4,
+ 0xf1082921,
+ 0xb6085c87,
+ 0x94bd0684,
+ 0xd00699f0,
+ 0x0ef40089,
+/* 0x0509: chsw_prev_no_next */
+ 0xb920f931,
+ 0x32f40212,
+ 0x0232f401,
+ 0x082921f5,
+ 0x17f120fc,
+ 0x14b60b00,
+ 0x0012d006,
+/* 0x0527: chsw_no_prev */
+ 0xc8130ef4,
+ 0x0bf41f23,
+ 0x0131f40d,
+ 0xf50232f4,
+/* 0x0537: chsw_done */
+ 0xf1082921,
+ 0xb60b0c17,
+ 0x27f00614,
+ 0x0012d001,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0499f094,
+ 0xf50089d0,
+/* 0x0557: main_not_ctx_switch */
+ 0xb0ff200e,
+ 0x1bf401e4,
+ 0x02f2b90d,
+ 0x07b521f5,
+/* 0x0567: main_not_ctx_chan */
+ 0xb0420ef4,
+ 0x1bf402e4,
+ 0x3c87f12e,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d007,
+ 0xf40132f4,
+ 0x21f50232,
+ 0x87f10829,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00799,
+ 0x110ef400,
+/* 0x0598: main_not_ctx_save */
+ 0xf010ef94,
+ 0x21f501f5,
+ 0x0ef502ec,
+/* 0x05a6: main_done */
+ 0x17f1fed1,
+ 0x14b60820,
+ 0xf024bd06,
+ 0x12d01f29,
+ 0xbe0ef500,
+/* 0x05b9: ih */
+ 0xfe80f9fe,
+ 0x80f90188,
+ 0xa0f990f9,
+ 0xd0f9b0f9,
+ 0xf0f9e0f9,
+ 0xc4800acf,
+ 0x0bf404ab,
+ 0x00b7f11d,
+ 0x08d7f019,
+ 0xcf40becf,
+ 0x21f400bf,
+ 0x00b0b704,
+ 0x01e7f004,
+/* 0x05ef: ih_no_fifo */
+ 0xe400bed0,
+ 0xf40100ab,
+ 0xd7f00d0b,
+ 0x01e7f108,
+ 0x0421f440,
+/* 0x0600: ih_no_ctxsw */
+ 0x0104b7f1,
+ 0xabffb0bd,
+ 0x0d0bf4b4,
+ 0x0c1ca7f1,
+ 0xd006a4b6,
+/* 0x0616: ih_no_other */
+ 0x0ad000ab,
+ 0xfcf0fc40,
+ 0xfcd0fce0,
+ 0xfca0fcb0,
+ 0xfe80fc90,
+ 0x80fc0088,
+ 0xf80032f4,
+/* 0x0631: ctx_4160s */
+ 0x60e7f101,
+ 0x40e3f041,
+ 0xf401f7f0,
+/* 0x063e: ctx_4160s_wait */
+ 0x21f48d21,
+ 0x04ffc868,
+ 0xf8fa0bf4,
+/* 0x0649: ctx_4160c */
+ 0x60e7f100,
+ 0x40e3f041,
+ 0x21f4f4bd,
+/* 0x0657: ctx_4170s */
+ 0xf100f88d,
+ 0xf04170e7,
+ 0xf5f040e3,
+ 0x8d21f410,
+/* 0x0666: ctx_4170w */
+ 0xe7f100f8,
+ 0xe3f04170,
+ 0x6821f440,
+ 0xf410f4f0,
+ 0x00f8f31b,
+/* 0x0678: ctx_redswitch */
+ 0x0614e7f1,
+ 0xf106e4b6,
+ 0xd00270f7,
+ 0xf7f000ef,
+/* 0x0689: ctx_redswitch_delay */
+ 0x01f2b608,
+ 0xf1fd1bf4,
+ 0xd00770f7,
+ 0x00f800ef,
+/* 0x0698: ctx_86c */
+ 0x086ce7f1,
+ 0xd006e4b6,
+ 0xe7f100ef,
+ 0xe3f08a14,
+ 0x8d21f440,
+ 0xa86ce7f1,
+ 0xf441e3f0,
+ 0x00f88d21,
+/* 0x06b8: ctx_load */
+ 0x083c87f1,
+ 0xbd0684b6,
+ 0x0599f094,
+ 0xf00089d0,
+ 0x21f40ca7,
+ 0x2417f1c9,
+ 0x0614b60a,
+ 0xf10010d0,
+ 0xb60b0037,
+ 0x32d00634,
+ 0x0c17f140,
+ 0x0614b60a,
+ 0xd00747f0,
+ 0x14d00012,
+/* 0x06f1: ctx_chan_wait_0 */
+ 0x4014cf40,
+ 0xf41f44f0,
+ 0x32d0fa1b,
+ 0x000bfe00,
+ 0xb61f2af0,
+ 0x20b60424,
+ 0x3c87f102,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d008,
+ 0x0a0417f1,
+ 0xd00614b6,
+ 0x17f10012,
+ 0x14b60a20,
+ 0x0227f006,
+ 0x800023f1,
+ 0xf00012d0,
+ 0x27f11017,
+ 0x23f00300,
+ 0x0512fa02,
+ 0x87f103f8,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00899,
+ 0xc1019800,
+ 0x981814b6,
+ 0x25b6c002,
+ 0x0512fd08,
+ 0xf1160180,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00999f0,
+ 0x27f10089,
+ 0x24b60a04,
+ 0x0021d006,
+ 0xf10127f0,
+ 0xb60a2017,
+ 0x12d00614,
+ 0x0017f100,
+ 0x0613f002,
+ 0xf80501fa,
+ 0x5c87f103,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d009,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0599f094,
+ 0xf80089d0,
+/* 0x07b5: ctx_chan */
+ 0x3121f500,
+ 0xb821f506,
+ 0x0ca7f006,
+ 0xf1c921f4,
+ 0xb60a1017,
+ 0x27f00614,
+ 0x0012d005,
+/* 0x07d0: ctx_chan_wait */
+ 0xfd0012cf,
+ 0x1bf40522,
+ 0x4921f5fa,
+/* 0x07df: ctx_mmio_exec */
+ 0x9800f806,
+ 0x27f18103,
+ 0x24b60a04,
+ 0x0023d006,
+/* 0x07ee: ctx_mmio_loop */
+ 0x34c434bd,
+ 0x0f1bf4ff,
+ 0x030057f1,
+ 0xfa0653f0,
+ 0x03f80535,
+/* 0x0800: ctx_mmio_pull */
+ 0x98c04e98,
+ 0x21f4c14f,
+ 0x0830b68d,
+ 0xf40112b6,
+/* 0x0812: ctx_mmio_done */
+ 0x0398df1b,
+ 0x0023d016,
+ 0xf1800080,
+ 0xf0020017,
+ 0x01fa0613,
+ 0xf803f806,
+/* 0x0829: ctx_xfer */
+ 0x0611f400,
+/* 0x082f: ctx_xfer_pre */
+ 0xf01102f4,
+ 0x21f510f7,
+ 0x21f50698,
+ 0x11f40631,
+/* 0x083d: ctx_xfer_pre_load */
+ 0x02f7f01c,
+ 0x065721f5,
+ 0x066621f5,
+ 0x067821f5,
+ 0x21f5f4bd,
+ 0x21f50657,
+/* 0x0856: ctx_xfer_exec */
+ 0x019806b8,
+ 0x1427f116,
+ 0x0624b604,
+ 0xf10020d0,
+ 0xf0a500e7,
+ 0x1fb941e3,
+ 0x8d21f402,
+ 0xf004e0b6,
+ 0x2cf001fc,
+ 0x0124b602,
+ 0xf405f2fd,
+ 0x17f18d21,
+ 0x13f04afc,
+ 0x0c27f002,
+ 0xf50012d0,
+ 0xf1020721,
+ 0xf047fc27,
+ 0x20d00223,
+ 0x012cf000,
+ 0xd00320b6,
+ 0xacf00012,
+ 0x06a5f001,
+ 0x9800b7f0,
+ 0x0d98140c,
+ 0x00e7f015,
+ 0x015c21f5,
+ 0xf508a7f0,
+ 0xf5010321,
+ 0xf4020721,
+ 0xa7f02201,
+ 0xc921f40c,
+ 0x0a1017f1,
+ 0xf00614b6,
+ 0x12d00527,
+/* 0x08dd: ctx_xfer_post_save_wait */
+ 0x0012cf00,
+ 0xf40522fd,
+ 0x02f4fa1b,
+/* 0x08e9: ctx_xfer_post */
+ 0x02f7f032,
+ 0x065721f5,
+ 0x21f5f4bd,
+ 0x21f50698,
+ 0x21f50226,
+ 0xf4bd0666,
+ 0x065721f5,
+ 0x981011f4,
+ 0x11fd8001,
+ 0x070bf405,
+ 0x07df21f5,
+/* 0x0914: ctx_xfer_no_post_mmio */
+ 0x064921f5,
+/* 0x0918: ctx_xfer_done */
+ 0x000000f8,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+};
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc
new file mode 100644
index 0000000..138eeaa
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc
@@ -0,0 +1,780 @@
+/* fuc microcode for nve0 PGRAPH/HUB
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+/* To build:
+ * m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h
+ */
+
+.section #nve0_grhub_data
+include(`nve0.fuc')
+gpc_count: .b32 0
+rop_count: .b32 0
+cmd_queue: queue_init
+hub_mmio_list_head: .b32 0
+hub_mmio_list_tail: .b32 0
+
+ctx_current: .b32 0
+
+chipsets:
+.b8 0xe4 0 0 0
+.b16 #nve4_hub_mmio_head
+.b16 #nve4_hub_mmio_tail
+.b8 0xe7 0 0 0
+.b16 #nve4_hub_mmio_head
+.b16 #nve4_hub_mmio_tail
+.b8 0 0 0 0
+
+nve4_hub_mmio_head:
+mmctx_data(0x17e91c, 2)
+mmctx_data(0x400204, 2)
+mmctx_data(0x404010, 7)
+mmctx_data(0x4040a8, 9)
+mmctx_data(0x4040d0, 7)
+mmctx_data(0x4040f8, 1)
+mmctx_data(0x404130, 3)
+mmctx_data(0x404150, 3)
+mmctx_data(0x404164, 1)
+mmctx_data(0x4041a0, 4)
+mmctx_data(0x404200, 4)
+mmctx_data(0x404404, 14)
+mmctx_data(0x404460, 4)
+mmctx_data(0x404480, 1)
+mmctx_data(0x404498, 1)
+mmctx_data(0x404604, 4)
+mmctx_data(0x404618, 4)
+mmctx_data(0x40462c, 2)
+mmctx_data(0x404640, 1)
+mmctx_data(0x404654, 1)
+mmctx_data(0x404660, 1)
+mmctx_data(0x404678, 19)
+mmctx_data(0x4046c8, 3)
+mmctx_data(0x404700, 3)
+mmctx_data(0x404718, 10)
+mmctx_data(0x404744, 2)
+mmctx_data(0x404754, 1)
+mmctx_data(0x405800, 1)
+mmctx_data(0x405830, 3)
+mmctx_data(0x405854, 1)
+mmctx_data(0x405870, 4)
+mmctx_data(0x405a00, 2)
+mmctx_data(0x405a18, 1)
+mmctx_data(0x405b00, 1)
+mmctx_data(0x405b10, 1)
+mmctx_data(0x406020, 1)
+mmctx_data(0x406028, 4)
+mmctx_data(0x4064a8, 2)
+mmctx_data(0x4064b4, 2)
+mmctx_data(0x4064c0, 12)
+mmctx_data(0x4064fc, 1)
+mmctx_data(0x407040, 1)
+mmctx_data(0x407804, 1)
+mmctx_data(0x40780c, 6)
+mmctx_data(0x4078bc, 1)
+mmctx_data(0x408000, 7)
+mmctx_data(0x408064, 1)
+mmctx_data(0x408800, 3)
+mmctx_data(0x408840, 1)
+mmctx_data(0x408900, 3)
+mmctx_data(0x408980, 1)
+nve4_hub_mmio_tail:
+
+.align 256
+chan_data:
+chan_mmio_count: .b32 0
+chan_mmio_address: .b32 0
+
+.align 256
+xfer_data: .b32 0
+
+.section #nve0_grhub_code
+bra #init
+define(`include_code')
+include(`nve0.fuc')
+
+// reports an exception to the host
+//
+// In: $r15 error code (see nve0.fuc)
+//
+error:
+ push $r14
+ mov $r14 0x814
+ shl b32 $r14 6
+ iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
+ mov $r14 0xc1c
+ shl b32 $r14 6
+ mov $r15 1
+ iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
+ pop $r14
+ ret
+
+// HUB fuc initialisation, executed by triggering ucode start, will
+// fall through to main loop after completion.
+//
+// Input:
+// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
+//
+// Output:
+// CC_SCRATCH[0]:
+// 31:31: set to signal completion
+// CC_SCRATCH[1]:
+// 31:0: total PGRAPH context size
+//
+init:
+ clear b32 $r0
+ mov $sp $r0
+ mov $xdbase $r0
+
+ // enable fifo access
+ mov $r1 0x1200
+ mov $r2 2
+ iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
+
+ // setup i0 handler, and route all interrupts to it
+ mov $r1 #ih
+ mov $iv0 $r1
+ mov $r1 0x400
+ iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
+
+ // route HUB_CHANNEL_SWITCH to fuc interrupt 8
+ mov $r3 0x404
+ shl b32 $r3 6
+ mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
+ iowr I[$r3 + 0x000] $r2
+
+ // not sure what these are, route them because NVIDIA does, and
+ // the IRQ handler will signal the host if we ever get one.. we
+ // may find out if/why we need to handle these if so..
+ //
+ mov $r2 0x2004
+ iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
+ mov $r2 0x200b
+ iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
+ mov $r2 0x200c
+ iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
+
+ // enable all INTR_UP interrupts
+ mov $r2 0xc24
+ shl b32 $r2 6
+ not b32 $r3 $r0
+ iowr I[$r2] $r3
+
+ // enable fifo, ctxsw, 9, 10, 15 interrupts
+ mov $r2 -0x78fc // 0x8704
+ sethi $r2 0
+ iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
+
+ // fifo level triggered, rest edge
+ sub b32 $r1 0x100
+ mov $r2 4
+ iowr I[$r1] $r2
+
+ // enable interrupts
+ bset $flags ie0
+
+ // fetch enabled GPC/ROP counts
+ mov $r14 -0x69fc // 0x409604
+ sethi $r14 0x400000
+ call #nv_rd32
+ extr $r1 $r15 16:20
+ st b32 D[$r0 + #rop_count] $r1
+ and $r15 0x1f
+ st b32 D[$r0 + #gpc_count] $r15
+
+ // set BAR_REQMASK to GPC mask
+ mov $r1 1
+ shl b32 $r1 $r15
+ sub b32 $r1 1
+ mov $r2 0x40c
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r1
+ iowr I[$r2 + 0x100] $r1
+
+ // find context data for this chipset
+ mov $r2 0x800
+ shl b32 $r2 6
+ iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
+ mov $r15 #chipsets - 8
+ init_find_chipset:
+ add b32 $r15 8
+ ld b32 $r3 D[$r15 + 0x00]
+ cmpu b32 $r3 $r2
+ bra e #init_context
+ cmpu b32 $r3 0
+ bra ne #init_find_chipset
+ // unknown chipset
+ ret
+
+ // context size calculation, reserve first 256 bytes for use by fuc
+ init_context:
+ mov $r1 256
+
+ // calculate size of mmio context data
+ ld b16 $r14 D[$r15 + 4]
+ ld b16 $r15 D[$r15 + 6]
+ sethi $r14 0
+ st b32 D[$r0 + #hub_mmio_list_head] $r14
+ st b32 D[$r0 + #hub_mmio_list_tail] $r15
+ call #mmctx_size
+
+ // set mmctx base addresses now so we don't have to do it later,
+ // they don't (currently) ever change
+ mov $r3 0x700
+ shl b32 $r3 6
+ shr b32 $r4 $r1 8
+ iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
+ iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
+ add b32 $r3 0x1300
+ add b32 $r1 $r15
+ shr b32 $r15 2
+ iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
+
+ // strands, base offset needs to be aligned to 256 bytes
+ shr b32 $r1 8
+ add b32 $r1 1
+ shl b32 $r1 8
+ mov b32 $r15 $r1
+ call #strand_ctx_init
+ add b32 $r1 $r15
+
+ // initialise each GPC in sequence by passing in the offset of its
+ // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
+ // has previously been uploaded by the host) running.
+ //
+ // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
+ // when it has completed, and return the size of its context data
+ // in GPCn_CC_SCRATCH[1]
+ //
+ ld b32 $r3 D[$r0 + #gpc_count]
+ mov $r4 0x2000
+ sethi $r4 0x500000
+ init_gpc:
+ // setup, and start GPC ucode running
+ add b32 $r14 $r4 0x804
+ mov b32 $r15 $r1
+ call #nv_wr32 // CC_SCRATCH[1] = ctx offset
+ add b32 $r14 $r4 0x800
+ mov b32 $r15 $r2
+ call #nv_wr32 // CC_SCRATCH[0] = chipset
+ add b32 $r14 $r4 0x10c
+ clear b32 $r15
+ call #nv_wr32
+ add b32 $r14 $r4 0x104
+ call #nv_wr32 // ENTRY
+ add b32 $r14 $r4 0x100
+ mov $r15 2 // CTRL_START_TRIGGER
+ call #nv_wr32 // CTRL
+
+ // wait for it to complete, and adjust context size
+ add b32 $r14 $r4 0x800
+ init_gpc_wait:
+ call #nv_rd32
+ xbit $r15 $r15 31
+ bra e #init_gpc_wait
+ add b32 $r14 $r4 0x804
+ call #nv_rd32
+ add b32 $r1 $r15
+
+ // next!
+ add b32 $r4 0x8000
+ sub b32 $r3 1
+ bra ne #init_gpc
+
+ // save context size, and tell host we're ready
+ mov $r2 0x800
+ shl b32 $r2 6
+ iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
+ add b32 $r2 0x800
+ clear b32 $r1
+ bset $r1 31
+ iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
+
+// Main program loop, very simple, sleeps until woken up by the interrupt
+// handler, pulls a command from the queue and executes its handler
+//
+main:
+ // sleep until we have something to do
+ bset $flags $p0
+ sleep $p0
+ mov $r13 #cmd_queue
+ call #queue_get
+ bra $p1 #main
+
+ // context switch, requested by GPU?
+ cmpu b32 $r14 0x4001
+ bra ne #main_not_ctx_switch
+ trace_set(T_AUTO)
+ mov $r1 0xb00
+ shl b32 $r1 6
+ iord $r2 I[$r1 + 0x100] // CHAN_NEXT
+ iord $r1 I[$r1 + 0x000] // CHAN_CUR
+
+ xbit $r3 $r1 31
+ bra e #chsw_no_prev
+ xbit $r3 $r2 31
+ bra e #chsw_prev_no_next
+ push $r2
+ mov b32 $r2 $r1
+ trace_set(T_SAVE)
+ bclr $flags $p1
+ bset $flags $p2
+ call #ctx_xfer
+ trace_clr(T_SAVE);
+ pop $r2
+ trace_set(T_LOAD);
+ bset $flags $p1
+ call #ctx_xfer
+ trace_clr(T_LOAD);
+ bra #chsw_done
+ chsw_prev_no_next:
+ push $r2
+ mov b32 $r2 $r1
+ bclr $flags $p1
+ bclr $flags $p2
+ call #ctx_xfer
+ pop $r2
+ mov $r1 0xb00
+ shl b32 $r1 6
+ iowr I[$r1] $r2
+ bra #chsw_done
+ chsw_no_prev:
+ xbit $r3 $r2 31
+ bra e #chsw_done
+ bset $flags $p1
+ bclr $flags $p2
+ call #ctx_xfer
+
+ // ack the context switch request
+ chsw_done:
+ mov $r1 0xb0c
+ shl b32 $r1 6
+ mov $r2 1
+ iowr I[$r1 + 0x000] $r2 // 0x409b0c
+ trace_clr(T_AUTO)
+ bra #main
+
+ // request to set current channel? (*not* a context switch)
+ main_not_ctx_switch:
+ cmpu b32 $r14 0x0001
+ bra ne #main_not_ctx_chan
+ mov b32 $r2 $r15
+ call #ctx_chan
+ bra #main_done
+
+ // request to store current channel context?
+ main_not_ctx_chan:
+ cmpu b32 $r14 0x0002
+ bra ne #main_not_ctx_save
+ trace_set(T_SAVE)
+ bclr $flags $p1
+ bclr $flags $p2
+ call #ctx_xfer
+ trace_clr(T_SAVE)
+ bra #main_done
+
+ main_not_ctx_save:
+ shl b32 $r15 $r14 16
+ or $r15 E_BAD_COMMAND
+ call #error
+ bra #main
+
+ main_done:
+ mov $r1 0x820
+ shl b32 $r1 6
+ clear b32 $r2
+ bset $r2 31
+ iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
+ bra #main
+
+// interrupt handler
+ih:
+ push $r8
+ mov $r8 $flags
+ push $r8
+ push $r9
+ push $r10
+ push $r11
+ push $r13
+ push $r14
+ push $r15
+
+ // incoming fifo command?
+ iord $r10 I[$r0 + 0x200] // INTR
+ and $r11 $r10 0x00000004
+ bra e #ih_no_fifo
+ // queue incoming fifo command for later processing
+ mov $r11 0x1900
+ mov $r13 #cmd_queue
+ iord $r14 I[$r11 + 0x100] // FIFO_CMD
+ iord $r15 I[$r11 + 0x000] // FIFO_DATA
+ call #queue_put
+ add b32 $r11 0x400
+ mov $r14 1
+ iowr I[$r11 + 0x000] $r14 // FIFO_ACK
+
+ // context switch request?
+ ih_no_fifo:
+ and $r11 $r10 0x00000100
+ bra e #ih_no_ctxsw
+ // enqueue a context switch for later processing
+ mov $r13 #cmd_queue
+ mov $r14 0x4001
+ call #queue_put
+
+ // anything we didn't handle, bring it to the host's attention
+ ih_no_ctxsw:
+ mov $r11 0x104
+ not b32 $r11
+ and $r11 $r10 $r11
+ bra e #ih_no_other
+ mov $r10 0xc1c
+ shl b32 $r10 6
+ iowr I[$r10] $r11 // INTR_UP_SET
+
+ // ack, and wake up main()
+ ih_no_other:
+ iowr I[$r0 + 0x100] $r10 // INTR_ACK
+
+ pop $r15
+ pop $r14
+ pop $r13
+ pop $r11
+ pop $r10
+ pop $r9
+ pop $r8
+ mov $flags $r8
+ pop $r8
+ bclr $flags $p0
+ iret
+
+// Again, not real sure
+//
+// In: $r15 value to set 0x404170 to
+//
+ctx_4170s:
+ mov $r14 0x4170
+ sethi $r14 0x400000
+ or $r15 0x10
+ call #nv_wr32
+ ret
+
+// Waits for a ctx_4170s() call to complete
+//
+ctx_4170w:
+ mov $r14 0x4170
+ sethi $r14 0x400000
+ call #nv_rd32
+ and $r15 0x10
+ bra ne #ctx_4170w
+ ret
+
+// Disables various things, waits a bit, and re-enables them..
+//
+// Not sure how exactly this helps, perhaps "ENABLE" is not such a
+// good description for the bits we turn off? Anyways, without this,
+// funny things happen.
+//
+ctx_redswitch:
+ mov $r14 0x614
+ shl b32 $r14 6
+ mov $r15 0x270
+ iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
+ mov $r15 8
+ ctx_redswitch_delay:
+ sub b32 $r15 1
+ bra ne #ctx_redswitch_delay
+ mov $r15 0x770
+ iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
+ ret
+
+// Not a clue what this is for, except that unless the value is 0x10, the
+// strand context is saved (and presumably restored) incorrectly..
+//
+// In: $r15 value to set to (0x00/0x10 are used)
+//
+ctx_86c:
+ mov $r14 0x86c
+ shl b32 $r14 6
+ iowr I[$r14] $r15 // HUB(0x86c) = val
+ mov $r14 -0x75ec
+ sethi $r14 0x400000
+ call #nv_wr32 // ROP(0xa14) = val
+ mov $r14 -0x5794
+ sethi $r14 0x410000
+ call #nv_wr32 // GPC(0x86c) = val
+ ret
+
+// ctx_load - load's a channel's ctxctl data, and selects its vm
+//
+// In: $r2 channel address
+//
+ctx_load:
+ trace_set(T_CHAN)
+
+ // switch to channel, somewhat magic in parts..
+ mov $r10 12 // DONE_UNK12
+ call #wait_donez
+ mov $r1 0xa24
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r0 // 0x409a24
+ mov $r3 0xb00
+ shl b32 $r3 6
+ iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
+ mov $r1 0xa0c
+ shl b32 $r1 6
+ mov $r4 7
+ iowr I[$r1 + 0x000] $r2 // MEM_CHAN
+ iowr I[$r1 + 0x100] $r4 // MEM_CMD
+ ctx_chan_wait_0:
+ iord $r4 I[$r1 + 0x100]
+ and $r4 0x1f
+ bra ne #ctx_chan_wait_0
+ iowr I[$r3 + 0x000] $r2 // CHAN_CUR
+
+ // load channel header, fetch PGRAPH context pointer
+ mov $xtargets $r0
+ bclr $r2 31
+ shl b32 $r2 4
+ add b32 $r2 2
+
+ trace_set(T_LCHAN)
+ mov $r1 0xa04
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r2 // MEM_BASE
+ mov $r1 0xa20
+ shl b32 $r1 6
+ mov $r2 0x0002
+ sethi $r2 0x80000000
+ iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
+ mov $r1 0x10 // chan + 0x0210
+ mov $r2 #xfer_data
+ sethi $r2 0x00020000 // 16 bytes
+ xdld $r1 $r2
+ xdwait
+ trace_clr(T_LCHAN)
+
+ // update current context
+ ld b32 $r1 D[$r0 + #xfer_data + 4]
+ shl b32 $r1 24
+ ld b32 $r2 D[$r0 + #xfer_data + 0]
+ shr b32 $r2 8
+ or $r1 $r2
+ st b32 D[$r0 + #ctx_current] $r1
+
+ // set transfer base to start of context, and fetch context header
+ trace_set(T_LCTXH)
+ mov $r2 0xa04
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r1 // MEM_BASE
+ mov $r2 1
+ mov $r1 0xa20
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
+ mov $r1 #chan_data
+ sethi $r1 0x00060000 // 256 bytes
+ xdld $r0 $r1
+ xdwait
+ trace_clr(T_LCTXH)
+
+ trace_clr(T_CHAN)
+ ret
+
+// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
+// the active channel for ctxctl, but not actually transfer
+// any context data. intended for use only during initial
+// context construction.
+//
+// In: $r2 channel address
+//
+ctx_chan:
+ call #ctx_load
+ mov $r10 12 // DONE_UNK12
+ call #wait_donez
+ mov $r1 0xa10
+ shl b32 $r1 6
+ mov $r2 5
+ iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
+ ctx_chan_wait:
+ iord $r2 I[$r1 + 0x000]
+ or $r2 $r2
+ bra ne #ctx_chan_wait
+ ret
+
+// Execute per-context state overrides list
+//
+// Only executed on the first load of a channel. Might want to look into
+// removing this and having the host directly modify the channel's context
+// to change this state... The nouveau DRM already builds this list as
+// it's definitely needed for NVIDIA's, so we may as well use it for now
+//
+// Input: $r1 mmio list length
+//
+ctx_mmio_exec:
+ // set transfer base to be the mmio list
+ ld b32 $r3 D[$r0 + #chan_mmio_address]
+ mov $r2 0xa04
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r3 // MEM_BASE
+
+ clear b32 $r3
+ ctx_mmio_loop:
+ // fetch next 256 bytes of mmio list if necessary
+ and $r4 $r3 0xff
+ bra ne #ctx_mmio_pull
+ mov $r5 #xfer_data
+ sethi $r5 0x00060000 // 256 bytes
+ xdld $r3 $r5
+ xdwait
+
+ // execute a single list entry
+ ctx_mmio_pull:
+ ld b32 $r14 D[$r4 + #xfer_data + 0x00]
+ ld b32 $r15 D[$r4 + #xfer_data + 0x04]
+ call #nv_wr32
+
+ // next!
+ add b32 $r3 8
+ sub b32 $r1 1
+ bra ne #ctx_mmio_loop
+
+ // set transfer base back to the current context
+ ctx_mmio_done:
+ ld b32 $r3 D[$r0 + #ctx_current]
+ iowr I[$r2 + 0x000] $r3 // MEM_BASE
+
+ // disable the mmio list now, we don't need/want to execute it again
+ st b32 D[$r0 + #chan_mmio_count] $r0
+ mov $r1 #chan_data
+ sethi $r1 0x00060000 // 256 bytes
+ xdst $r0 $r1
+ xdwait
+ ret
+
+// Transfer HUB context data between GPU and storage area
+//
+// In: $r2 channel address
+// $p1 clear on save, set on load
+// $p2 set if opposite direction done/will be done, so:
+// on save it means: "a load will follow this save"
+// on load it means: "a save preceeded this load"
+//
+ctx_xfer:
+ bra not $p1 #ctx_xfer_pre
+ bra $p2 #ctx_xfer_pre_load
+ ctx_xfer_pre:
+ mov $r15 0x10
+ call #ctx_86c
+ bra not $p1 #ctx_xfer_exec
+
+ ctx_xfer_pre_load:
+ mov $r15 2
+ call #ctx_4170s
+ call #ctx_4170w
+ call #ctx_redswitch
+ clear b32 $r15
+ call #ctx_4170s
+ call #ctx_load
+
+ // fetch context pointer, and initiate xfer on all GPCs
+ ctx_xfer_exec:
+ ld b32 $r1 D[$r0 + #ctx_current]
+ mov $r2 0x414
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
+ mov $r14 -0x5b00
+ sethi $r14 0x410000
+ mov b32 $r15 $r1
+ call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
+ add b32 $r14 4
+ xbit $r15 $flags $p1
+ xbit $r2 $flags $p2
+ shl b32 $r2 1
+ or $r15 $r2
+ call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
+
+ // strands
+ mov $r1 0x4afc
+ sethi $r1 0x20000
+ mov $r2 0xc
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
+ call #strand_wait
+ mov $r2 0x47fc
+ sethi $r2 0x20000
+ iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
+ xbit $r2 $flags $p1
+ add b32 $r2 3
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
+
+ // mmio context
+ xbit $r10 $flags $p1 // direction
+ or $r10 6 // first, last
+ mov $r11 0 // base = 0
+ ld b32 $r12 D[$r0 + #hub_mmio_list_head]
+ ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
+ mov $r14 0 // not multi
+ call #mmctx_xfer
+
+ // wait for GPCs to all complete
+ mov $r10 8 // DONE_BAR
+ call #wait_doneo
+
+ // wait for strand xfer to complete
+ call #strand_wait
+
+ // post-op
+ bra $p1 #ctx_xfer_post
+ mov $r10 12 // DONE_UNK12
+ call #wait_donez
+ mov $r1 0xa10
+ shl b32 $r1 6
+ mov $r2 5
+ iowr I[$r1] $r2 // MEM_CMD
+ ctx_xfer_post_save_wait:
+ iord $r2 I[$r1]
+ or $r2 $r2
+ bra ne #ctx_xfer_post_save_wait
+
+ bra $p2 #ctx_xfer_done
+ ctx_xfer_post:
+ mov $r15 2
+ call #ctx_4170s
+ clear b32 $r15
+ call #ctx_86c
+ call #strand_post
+ call #ctx_4170w
+ clear b32 $r15
+ call #ctx_4170s
+
+ bra not $p1 #ctx_xfer_no_post_mmio
+ ld b32 $r1 D[$r0 + #chan_mmio_count]
+ or $r1 $r1
+ bra e #ctx_xfer_no_post_mmio
+ call #ctx_mmio_exec
+
+ ctx_xfer_no_post_mmio:
+
+ ctx_xfer_done:
+ ret
+
+.align 256
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h
new file mode 100644
index 0000000..decf0c6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h
@@ -0,0 +1,857 @@
+uint32_t nve0_grhub_data[] = {
+/* 0x0000: gpc_count */
+ 0x00000000,
+/* 0x0004: rop_count */
+ 0x00000000,
+/* 0x0008: cmd_queue */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+/* 0x0050: hub_mmio_list_head */
+ 0x00000000,
+/* 0x0054: hub_mmio_list_tail */
+ 0x00000000,
+/* 0x0058: ctx_current */
+ 0x00000000,
+/* 0x005c: chipsets */
+ 0x000000e4,
+ 0x013c0070,
+ 0x000000e7,
+ 0x013c0070,
+ 0x00000000,
+/* 0x0070: nve4_hub_mmio_head */
+ 0x0417e91c,
+ 0x04400204,
+ 0x18404010,
+ 0x204040a8,
+ 0x184040d0,
+ 0x004040f8,
+ 0x08404130,
+ 0x08404150,
+ 0x00404164,
+ 0x0c4041a0,
+ 0x0c404200,
+ 0x34404404,
+ 0x0c404460,
+ 0x00404480,
+ 0x00404498,
+ 0x0c404604,
+ 0x0c404618,
+ 0x0440462c,
+ 0x00404640,
+ 0x00404654,
+ 0x00404660,
+ 0x48404678,
+ 0x084046c8,
+ 0x08404700,
+ 0x24404718,
+ 0x04404744,
+ 0x00404754,
+ 0x00405800,
+ 0x08405830,
+ 0x00405854,
+ 0x0c405870,
+ 0x04405a00,
+ 0x00405a18,
+ 0x00405b00,
+ 0x00405b10,
+ 0x00406020,
+ 0x0c406028,
+ 0x044064a8,
+ 0x044064b4,
+ 0x2c4064c0,
+ 0x004064fc,
+ 0x00407040,
+ 0x00407804,
+ 0x1440780c,
+ 0x004078bc,
+ 0x18408000,
+ 0x00408064,
+ 0x08408800,
+ 0x00408840,
+ 0x08408900,
+ 0x00408980,
+/* 0x013c: nve4_hub_mmio_tail */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+/* 0x0200: chan_data */
+/* 0x0200: chan_mmio_count */
+ 0x00000000,
+/* 0x0204: chan_mmio_address */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+/* 0x0300: xfer_data */
+ 0x00000000,
+};
+
+uint32_t nve0_grhub_code[] = {
+ 0x03090ef5,
+/* 0x0004: queue_put */
+ 0x9800d898,
+ 0x86f001d9,
+ 0x0489b808,
+ 0xf00c1bf4,
+ 0x21f502f7,
+ 0x00f802ec,
+/* 0x001c: queue_put_next */
+ 0xb60798c4,
+ 0x8dbb0384,
+ 0x0880b600,
+ 0x80008e80,
+ 0x90b6018f,
+ 0x0f94f001,
+ 0xf801d980,
+/* 0x0039: queue_get */
+ 0x0131f400,
+ 0x9800d898,
+ 0x89b801d9,
+ 0x210bf404,
+ 0xb60789c4,
+ 0x9dbb0394,
+ 0x0890b600,
+ 0x98009e98,
+ 0x80b6019f,
+ 0x0f84f001,
+ 0xf400d880,
+/* 0x0066: queue_get_done */
+ 0x00f80132,
+/* 0x0068: nv_rd32 */
+ 0x0728b7f1,
+ 0xb906b4b6,
+ 0xc9f002ec,
+ 0x00bcd01f,
+/* 0x0078: nv_rd32_wait */
+ 0xc800bccf,
+ 0x1bf41fcc,
+ 0x06a7f0fa,
+ 0x010321f5,
+ 0xf840bfcf,
+/* 0x008d: nv_wr32 */
+ 0x28b7f100,
+ 0x06b4b607,
+ 0xb980bfd0,
+ 0xc9f002ec,
+ 0x1ec9f01f,
+/* 0x00a3: nv_wr32_wait */
+ 0xcf00bcd0,
+ 0xccc800bc,
+ 0xfa1bf41f,
+/* 0x00ae: watchdog_reset */
+ 0x87f100f8,
+ 0x84b60430,
+ 0x1ff9f006,
+ 0xf8008fd0,
+/* 0x00bd: watchdog_clear */
+ 0x3087f100,
+ 0x0684b604,
+ 0xf80080d0,
+/* 0x00c9: wait_donez */
+ 0x3c87f100,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d000,
+ 0x081887f1,
+ 0xd00684b6,
+/* 0x00e2: wait_done_wait_donez */
+ 0x87f1008a,
+ 0x84b60400,
+ 0x0088cf06,
+ 0xf4888aff,
+ 0x87f1f31b,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00099,
+/* 0x0103: wait_doneo */
+ 0xf100f800,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00099f0,
+ 0x87f10089,
+ 0x84b60818,
+ 0x008ad006,
+/* 0x011c: wait_done_wait_doneo */
+ 0x040087f1,
+ 0xcf0684b6,
+ 0x8aff0088,
+ 0xf30bf488,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0099f094,
+ 0xf80089d0,
+/* 0x013d: mmctx_size */
+/* 0x013f: nv_mmctx_size_loop */
+ 0x9894bd00,
+ 0x85b600e8,
+ 0x0180b61a,
+ 0xbb0284b6,
+ 0xe0b60098,
+ 0x04efb804,
+ 0xb9eb1bf4,
+ 0x00f8029f,
+/* 0x015c: mmctx_xfer */
+ 0x083c87f1,
+ 0xbd0684b6,
+ 0x0199f094,
+ 0xf10089d0,
+ 0xb6071087,
+ 0x94bd0684,
+ 0xf405bbfd,
+ 0x8bd0090b,
+ 0x0099f000,
+/* 0x0180: mmctx_base_disabled */
+ 0xf405eefd,
+ 0x8ed00c0b,
+ 0xc08fd080,
+/* 0x018f: mmctx_multi_disabled */
+ 0xb70199f0,
+ 0xc8010080,
+ 0xb4b600ab,
+ 0x0cb9f010,
+ 0xb601aec8,
+ 0xbefd11e4,
+ 0x008bd005,
+/* 0x01a8: mmctx_exec_loop */
+/* 0x01a8: mmctx_wait_free */
+ 0xf0008ecf,
+ 0x0bf41fe4,
+ 0x00ce98fa,
+ 0xd005e9fd,
+ 0xc0b6c08e,
+ 0x04cdb804,
+ 0xc8e81bf4,
+ 0x1bf402ab,
+/* 0x01c9: mmctx_fini_wait */
+ 0x008bcf18,
+ 0xb01fb4f0,
+ 0x1bf410b4,
+ 0x02a7f0f7,
+ 0xf4c921f4,
+/* 0x01de: mmctx_stop */
+ 0xabc81b0e,
+ 0x10b4b600,
+ 0xf00cb9f0,
+ 0x8bd012b9,
+/* 0x01ed: mmctx_stop_wait */
+ 0x008bcf00,
+ 0xf412bbc8,
+/* 0x01f6: mmctx_done */
+ 0x87f1fa1b,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00199,
+/* 0x0207: strand_wait */
+ 0xf900f800,
+ 0x02a7f0a0,
+ 0xfcc921f4,
+/* 0x0213: strand_pre */
+ 0xf100f8a0,
+ 0xf04afc87,
+ 0x97f00283,
+ 0x0089d00c,
+ 0x020721f5,
+/* 0x0226: strand_post */
+ 0x87f100f8,
+ 0x83f04afc,
+ 0x0d97f002,
+ 0xf50089d0,
+ 0xf8020721,
+/* 0x0239: strand_set */
+ 0xfca7f100,
+ 0x02a3f04f,
+ 0x0500aba2,
+ 0xd00fc7f0,
+ 0xc7f000ac,
+ 0x00bcd00b,
+ 0x020721f5,
+ 0xf000aed0,
+ 0xbcd00ac7,
+ 0x0721f500,
+/* 0x0263: strand_ctx_init */
+ 0xf100f802,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00399f0,
+ 0x21f50089,
+ 0xe7f00213,
+ 0x3921f503,
+ 0xfca7f102,
+ 0x02a3f046,
+ 0x0400aba0,
+ 0xf040a0d0,
+ 0xbcd001c7,
+ 0x0721f500,
+ 0x010c9202,
+ 0xf000acd0,
+ 0xbcd002c7,
+ 0x0721f500,
+ 0x2621f502,
+ 0x8087f102,
+ 0x0684b608,
+ 0xb70089cf,
+ 0x95220080,
+/* 0x02ba: ctx_init_strand_loop */
+ 0x8ed008fe,
+ 0x408ed000,
+ 0xb6808acf,
+ 0xa0b606a5,
+ 0x00eabb01,
+ 0xb60480b6,
+ 0x1bf40192,
+ 0x08e4b6e8,
+ 0xf1f2efbc,
+ 0xb6085c87,
+ 0x94bd0684,
+ 0xd00399f0,
+ 0x00f80089,
+/* 0x02ec: error */
+ 0xe7f1e0f9,
+ 0xe4b60814,
+ 0x00efd006,
+ 0x0c1ce7f1,
+ 0xf006e4b6,
+ 0xefd001f7,
+ 0xf8e0fc00,
+/* 0x0309: init */
+ 0xfe04bd00,
+ 0x07fe0004,
+ 0x0017f100,
+ 0x0227f012,
+ 0xf10012d0,
+ 0xfe05b917,
+ 0x17f10010,
+ 0x10d00400,
+ 0x0437f1c0,
+ 0x0634b604,
+ 0x200327f1,
+ 0xf10032d0,
+ 0xd0200427,
+ 0x27f10132,
+ 0x32d0200b,
+ 0x0c27f102,
+ 0x0732d020,
+ 0x0c2427f1,
+ 0xb90624b6,
+ 0x23d00003,
+ 0x0427f100,
+ 0x0023f087,
+ 0xb70012d0,
+ 0xf0010012,
+ 0x12d00427,
+ 0x1031f400,
+ 0x9604e7f1,
+ 0xf440e3f0,
+ 0xf1c76821,
+ 0x01018090,
+ 0x801ff4f0,
+ 0x17f0000f,
+ 0x041fbb01,
+ 0xf10112b6,
+ 0xb6040c27,
+ 0x21d00624,
+ 0x4021d000,
+ 0x080027f1,
+ 0xcf0624b6,
+ 0xf7f00022,
+/* 0x03a9: init_find_chipset */
+ 0x08f0b654,
+ 0xb800f398,
+ 0x0bf40432,
+ 0x0034b00b,
+ 0xf8f11bf4,
+/* 0x03bd: init_context */
+ 0x0017f100,
+ 0x02fe5801,
+ 0xf003ff58,
+ 0x0e8000e3,
+ 0x150f8014,
+ 0x013d21f5,
+ 0x070037f1,
+ 0x950634b6,
+ 0x34d00814,
+ 0x4034d000,
+ 0x130030b7,
+ 0xb6001fbb,
+ 0x3fd002f5,
+ 0x0815b600,
+ 0xb60110b6,
+ 0x1fb90814,
+ 0x6321f502,
+ 0x001fbb02,
+ 0xf1000398,
+ 0xf0200047,
+/* 0x040e: init_gpc */
+ 0x4ea05043,
+ 0x1fb90804,
+ 0x8d21f402,
+ 0x08004ea0,
+ 0xf4022fb9,
+ 0x4ea08d21,
+ 0xf4bd010c,
+ 0xa08d21f4,
+ 0xf401044e,
+ 0x4ea08d21,
+ 0xf7f00100,
+ 0x8d21f402,
+ 0x08004ea0,
+/* 0x0440: init_gpc_wait */
+ 0xc86821f4,
+ 0x0bf41fff,
+ 0x044ea0fa,
+ 0x6821f408,
+ 0xb7001fbb,
+ 0xb6800040,
+ 0x1bf40132,
+ 0x0027f1b4,
+ 0x0624b608,
+ 0xb74021d0,
+ 0xbd080020,
+ 0x1f19f014,
+/* 0x0473: main */
+ 0xf40021d0,
+ 0x28f40031,
+ 0x08d7f000,
+ 0xf43921f4,
+ 0xe4b1f401,
+ 0x1bf54001,
+ 0x87f100d1,
+ 0x84b6083c,
+ 0xf094bd06,
+ 0x89d00499,
+ 0x0017f100,
+ 0x0614b60b,
+ 0xcf4012cf,
+ 0x13c80011,
+ 0x7e0bf41f,
+ 0xf41f23c8,
+ 0x20f95a0b,
+ 0xf10212b9,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00799f0,
+ 0x32f40089,
+ 0x0231f401,
+ 0x07fb21f5,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0799f094,
+ 0xfc0089d0,
+ 0x3c87f120,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d006,
+ 0xf50131f4,
+ 0xf107fb21,
+ 0xb6085c87,
+ 0x94bd0684,
+ 0xd00699f0,
+ 0x0ef40089,
+/* 0x0509: chsw_prev_no_next */
+ 0xb920f931,
+ 0x32f40212,
+ 0x0232f401,
+ 0x07fb21f5,
+ 0x17f120fc,
+ 0x14b60b00,
+ 0x0012d006,
+/* 0x0527: chsw_no_prev */
+ 0xc8130ef4,
+ 0x0bf41f23,
+ 0x0131f40d,
+ 0xf50232f4,
+/* 0x0537: chsw_done */
+ 0xf107fb21,
+ 0xb60b0c17,
+ 0x27f00614,
+ 0x0012d001,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0499f094,
+ 0xf50089d0,
+/* 0x0557: main_not_ctx_switch */
+ 0xb0ff200e,
+ 0x1bf401e4,
+ 0x02f2b90d,
+ 0x078f21f5,
+/* 0x0567: main_not_ctx_chan */
+ 0xb0420ef4,
+ 0x1bf402e4,
+ 0x3c87f12e,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d007,
+ 0xf40132f4,
+ 0x21f50232,
+ 0x87f107fb,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00799,
+ 0x110ef400,
+/* 0x0598: main_not_ctx_save */
+ 0xf010ef94,
+ 0x21f501f5,
+ 0x0ef502ec,
+/* 0x05a6: main_done */
+ 0x17f1fed1,
+ 0x14b60820,
+ 0xf024bd06,
+ 0x12d01f29,
+ 0xbe0ef500,
+/* 0x05b9: ih */
+ 0xfe80f9fe,
+ 0x80f90188,
+ 0xa0f990f9,
+ 0xd0f9b0f9,
+ 0xf0f9e0f9,
+ 0xc4800acf,
+ 0x0bf404ab,
+ 0x00b7f11d,
+ 0x08d7f019,
+ 0xcf40becf,
+ 0x21f400bf,
+ 0x00b0b704,
+ 0x01e7f004,
+/* 0x05ef: ih_no_fifo */
+ 0xe400bed0,
+ 0xf40100ab,
+ 0xd7f00d0b,
+ 0x01e7f108,
+ 0x0421f440,
+/* 0x0600: ih_no_ctxsw */
+ 0x0104b7f1,
+ 0xabffb0bd,
+ 0x0d0bf4b4,
+ 0x0c1ca7f1,
+ 0xd006a4b6,
+/* 0x0616: ih_no_other */
+ 0x0ad000ab,
+ 0xfcf0fc40,
+ 0xfcd0fce0,
+ 0xfca0fcb0,
+ 0xfe80fc90,
+ 0x80fc0088,
+ 0xf80032f4,
+/* 0x0631: ctx_4170s */
+ 0x70e7f101,
+ 0x40e3f041,
+ 0xf410f5f0,
+ 0x00f88d21,
+/* 0x0640: ctx_4170w */
+ 0x4170e7f1,
+ 0xf440e3f0,
+ 0xf4f06821,
+ 0xf31bf410,
+/* 0x0652: ctx_redswitch */
+ 0xe7f100f8,
+ 0xe4b60614,
+ 0x70f7f106,
+ 0x00efd002,
+/* 0x0663: ctx_redswitch_delay */
+ 0xb608f7f0,
+ 0x1bf401f2,
+ 0x70f7f1fd,
+ 0x00efd007,
+/* 0x0672: ctx_86c */
+ 0xe7f100f8,
+ 0xe4b6086c,
+ 0x00efd006,
+ 0x8a14e7f1,
+ 0xf440e3f0,
+ 0xe7f18d21,
+ 0xe3f0a86c,
+ 0x8d21f441,
+/* 0x0692: ctx_load */
+ 0x87f100f8,
+ 0x84b6083c,
+ 0xf094bd06,
+ 0x89d00599,
+ 0x0ca7f000,
+ 0xf1c921f4,
+ 0xb60a2417,
+ 0x10d00614,
+ 0x0037f100,
+ 0x0634b60b,
+ 0xf14032d0,
+ 0xb60a0c17,
+ 0x47f00614,
+ 0x0012d007,
+/* 0x06cb: ctx_chan_wait_0 */
+ 0xcf4014d0,
+ 0x44f04014,
+ 0xfa1bf41f,
+ 0xfe0032d0,
+ 0x2af0000b,
+ 0x0424b61f,
+ 0xf10220b6,
+ 0xb6083c87,
+ 0x94bd0684,
+ 0xd00899f0,
+ 0x17f10089,
+ 0x14b60a04,
+ 0x0012d006,
+ 0x0a2017f1,
+ 0xf00614b6,
+ 0x23f10227,
+ 0x12d08000,
+ 0x1017f000,
+ 0x030027f1,
+ 0xfa0223f0,
+ 0x03f80512,
+ 0x085c87f1,
+ 0xbd0684b6,
+ 0x0899f094,
+ 0x980089d0,
+ 0x14b6c101,
+ 0xc0029818,
+ 0xfd0825b6,
+ 0x01800512,
+ 0x3c87f116,
+ 0x0684b608,
+ 0x99f094bd,
+ 0x0089d009,
+ 0x0a0427f1,
+ 0xd00624b6,
+ 0x27f00021,
+ 0x2017f101,
+ 0x0614b60a,
+ 0xf10012d0,
+ 0xf0020017,
+ 0x01fa0613,
+ 0xf103f805,
+ 0xb6085c87,
+ 0x94bd0684,
+ 0xd00999f0,
+ 0x87f10089,
+ 0x84b6085c,
+ 0xf094bd06,
+ 0x89d00599,
+/* 0x078f: ctx_chan */
+ 0xf500f800,
+ 0xf0069221,
+ 0x21f40ca7,
+ 0x1017f1c9,
+ 0x0614b60a,
+ 0xd00527f0,
+/* 0x07a6: ctx_chan_wait */
+ 0x12cf0012,
+ 0x0522fd00,
+ 0xf8fa1bf4,
+/* 0x07b1: ctx_mmio_exec */
+ 0x81039800,
+ 0x0a0427f1,
+ 0xd00624b6,
+ 0x34bd0023,
+/* 0x07c0: ctx_mmio_loop */
+ 0xf4ff34c4,
+ 0x57f10f1b,
+ 0x53f00300,
+ 0x0535fa06,
+/* 0x07d2: ctx_mmio_pull */
+ 0x4e9803f8,
+ 0xc14f98c0,
+ 0xb68d21f4,
+ 0x12b60830,
+ 0xdf1bf401,
+/* 0x07e4: ctx_mmio_done */
+ 0xd0160398,
+ 0x00800023,
+ 0x0017f180,
+ 0x0613f002,
+ 0xf80601fa,
+/* 0x07fb: ctx_xfer */
+ 0xf400f803,
+ 0x02f40611,
+/* 0x0801: ctx_xfer_pre */
+ 0x10f7f00d,
+ 0x067221f5,
+/* 0x080b: ctx_xfer_pre_load */
+ 0xf01c11f4,
+ 0x21f502f7,
+ 0x21f50631,
+ 0x21f50640,
+ 0xf4bd0652,
+ 0x063121f5,
+ 0x069221f5,
+/* 0x0824: ctx_xfer_exec */
+ 0xf1160198,
+ 0xb6041427,
+ 0x20d00624,
+ 0x00e7f100,
+ 0x41e3f0a5,
+ 0xf4021fb9,
+ 0xe0b68d21,
+ 0x01fcf004,
+ 0xb6022cf0,
+ 0xf2fd0124,
+ 0x8d21f405,
+ 0x4afc17f1,
+ 0xf00213f0,
+ 0x12d00c27,
+ 0x0721f500,
+ 0xfc27f102,
+ 0x0223f047,
+ 0xf00020d0,
+ 0x20b6012c,
+ 0x0012d003,
+ 0xf001acf0,
+ 0xb7f006a5,
+ 0x140c9800,
+ 0xf0150d98,
+ 0x21f500e7,
+ 0xa7f0015c,
+ 0x0321f508,
+ 0x0721f501,
+ 0x2201f402,
+ 0xf40ca7f0,
+ 0x17f1c921,
+ 0x14b60a10,
+ 0x0527f006,
+/* 0x08ab: ctx_xfer_post_save_wait */
+ 0xcf0012d0,
+ 0x22fd0012,
+ 0xfa1bf405,
+/* 0x08b7: ctx_xfer_post */
+ 0xf02e02f4,
+ 0x21f502f7,
+ 0xf4bd0631,
+ 0x067221f5,
+ 0x022621f5,
+ 0x064021f5,
+ 0x21f5f4bd,
+ 0x11f40631,
+ 0x80019810,
+ 0xf40511fd,
+ 0x21f5070b,
+/* 0x08e2: ctx_xfer_no_post_mmio */
+/* 0x08e2: ctx_xfer_done */
+ 0x00f807b1,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+};
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nvc0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nvc0.fuc
new file mode 100644
index 0000000..e6b2288
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nvc0.fuc
@@ -0,0 +1,400 @@
+/* fuc microcode util functions for nvc0 PGRAPH
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)')
+define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))')
+
+ifdef(`include_code', `
+// Error codes
+define(`E_BAD_COMMAND', 0x01)
+define(`E_CMD_OVERFLOW', 0x02)
+
+// Util macros to help with debugging ucode hangs etc
+define(`T_WAIT', 0)
+define(`T_MMCTX', 1)
+define(`T_STRWAIT', 2)
+define(`T_STRINIT', 3)
+define(`T_AUTO', 4)
+define(`T_CHAN', 5)
+define(`T_LOAD', 6)
+define(`T_SAVE', 7)
+define(`T_LCHAN', 8)
+define(`T_LCTXH', 9)
+
+define(`trace_set', `
+ mov $r8 0x83c
+ shl b32 $r8 6
+ clear b32 $r9
+ bset $r9 $1
+ iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
+')
+
+define(`trace_clr', `
+ mov $r8 0x85c
+ shl b32 $r8 6
+ clear b32 $r9
+ bset $r9 $1
+ iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
+')
+
+// queue_put - add request to queue
+//
+// In : $r13 queue pointer
+// $r14 command
+// $r15 data
+//
+queue_put:
+ // make sure we have space..
+ ld b32 $r8 D[$r13 + 0x0] // GET
+ ld b32 $r9 D[$r13 + 0x4] // PUT
+ xor $r8 8
+ cmpu b32 $r8 $r9
+ bra ne #queue_put_next
+ mov $r15 E_CMD_OVERFLOW
+ call #error
+ ret
+
+ // store cmd/data on queue
+ queue_put_next:
+ and $r8 $r9 7
+ shl b32 $r8 3
+ add b32 $r8 $r13
+ add b32 $r8 8
+ st b32 D[$r8 + 0x0] $r14
+ st b32 D[$r8 + 0x4] $r15
+
+ // update PUT
+ add b32 $r9 1
+ and $r9 0xf
+ st b32 D[$r13 + 0x4] $r9
+ ret
+
+// queue_get - fetch request from queue
+//
+// In : $r13 queue pointer
+//
+// Out: $p1 clear on success (data available)
+// $r14 command
+// $r15 data
+//
+queue_get:
+ bset $flags $p1
+ ld b32 $r8 D[$r13 + 0x0] // GET
+ ld b32 $r9 D[$r13 + 0x4] // PUT
+ cmpu b32 $r8 $r9
+ bra e #queue_get_done
+ // fetch first cmd/data pair
+ and $r9 $r8 7
+ shl b32 $r9 3
+ add b32 $r9 $r13
+ add b32 $r9 8
+ ld b32 $r14 D[$r9 + 0x0]
+ ld b32 $r15 D[$r9 + 0x4]
+
+ // update GET
+ add b32 $r8 1
+ and $r8 0xf
+ st b32 D[$r13 + 0x0] $r8
+ bclr $flags $p1
+queue_get_done:
+ ret
+
+// nv_rd32 - read 32-bit value from nv register
+//
+// In : $r14 register
+// Out: $r15 value
+//
+nv_rd32:
+ mov $r11 0x728
+ shl b32 $r11 6
+ mov b32 $r12 $r14
+ bset $r12 31 // MMIO_CTRL_PENDING
+ iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
+ nv_rd32_wait:
+ iord $r12 I[$r11 + 0x000]
+ xbit $r12 $r12 31
+ bra ne #nv_rd32_wait
+ mov $r10 6 // DONE_MMIO_RD
+ call #wait_doneo
+ iord $r15 I[$r11 + 0x100] // MMIO_RDVAL
+ ret
+
+// nv_wr32 - write 32-bit value to nv register
+//
+// In : $r14 register
+// $r15 value
+//
+nv_wr32:
+ mov $r11 0x728
+ shl b32 $r11 6
+ iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL
+ mov b32 $r12 $r14
+ bset $r12 31 // MMIO_CTRL_PENDING
+ bset $r12 30 // MMIO_CTRL_WRITE
+ iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
+ nv_wr32_wait:
+ iord $r12 I[$r11 + 0x000]
+ xbit $r12 $r12 31
+ bra ne #nv_wr32_wait
+ ret
+
+// (re)set watchdog timer
+//
+// In : $r15 timeout
+//
+watchdog_reset:
+ mov $r8 0x430
+ shl b32 $r8 6
+ bset $r15 31
+ iowr I[$r8 + 0x000] $r15
+ ret
+
+// clear watchdog timer
+watchdog_clear:
+ mov $r8 0x430
+ shl b32 $r8 6
+ iowr I[$r8 + 0x000] $r0
+ ret
+
+// wait_done{z,o} - wait on FUC_DONE bit to become clear/set
+//
+// In : $r10 bit to wait on
+//
+define(`wait_done', `
+$1:
+ trace_set(T_WAIT);
+ mov $r8 0x818
+ shl b32 $r8 6
+ iowr I[$r8 + 0x000] $r10 // CC_SCRATCH[6] = wait bit
+ wait_done_$1:
+ mov $r8 0x400
+ shl b32 $r8 6
+ iord $r8 I[$r8 + 0x000] // DONE
+ xbit $r8 $r8 $r10
+ bra $2 #wait_done_$1
+ trace_clr(T_WAIT)
+ ret
+')
+wait_done(wait_donez, ne)
+wait_done(wait_doneo, e)
+
+// mmctx_size - determine size of a mmio list transfer
+//
+// In : $r14 mmio list head
+// $r15 mmio list tail
+// Out: $r15 transfer size (in bytes)
+//
+mmctx_size:
+ clear b32 $r9
+ nv_mmctx_size_loop:
+ ld b32 $r8 D[$r14]
+ shr b32 $r8 26
+ add b32 $r8 1
+ shl b32 $r8 2
+ add b32 $r9 $r8
+ add b32 $r14 4
+ cmpu b32 $r14 $r15
+ bra ne #nv_mmctx_size_loop
+ mov b32 $r15 $r9
+ ret
+
+// mmctx_xfer - execute a list of mmio transfers
+//
+// In : $r10 flags
+// bit 0: direction (0 = save, 1 = load)
+// bit 1: set if first transfer
+// bit 2: set if last transfer
+// $r11 base
+// $r12 mmio list head
+// $r13 mmio list tail
+// $r14 multi_stride
+// $r15 multi_mask
+//
+mmctx_xfer:
+ trace_set(T_MMCTX)
+ mov $r8 0x710
+ shl b32 $r8 6
+ clear b32 $r9
+ or $r11 $r11
+ bra e #mmctx_base_disabled
+ iowr I[$r8 + 0x000] $r11 // MMCTX_BASE
+ bset $r9 0 // BASE_EN
+ mmctx_base_disabled:
+ or $r14 $r14
+ bra e #mmctx_multi_disabled
+ iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE
+ iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK
+ bset $r9 1 // MULTI_EN
+ mmctx_multi_disabled:
+ add b32 $r8 0x100
+
+ xbit $r11 $r10 0
+ shl b32 $r11 16 // DIR
+ bset $r11 12 // QLIMIT = 0x10
+ xbit $r14 $r10 1
+ shl b32 $r14 17
+ or $r11 $r14 // START_TRIGGER
+ iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
+
+ // loop over the mmio list, and send requests to the hw
+ mmctx_exec_loop:
+ // wait for space in mmctx queue
+ mmctx_wait_free:
+ iord $r14 I[$r8 + 0x000] // MMCTX_CTRL
+ and $r14 0x1f
+ bra e #mmctx_wait_free
+
+ // queue up an entry
+ ld b32 $r14 D[$r12]
+ or $r14 $r9
+ iowr I[$r8 + 0x300] $r14
+ add b32 $r12 4
+ cmpu b32 $r12 $r13
+ bra ne #mmctx_exec_loop
+
+ xbit $r11 $r10 2
+ bra ne #mmctx_stop
+ // wait for queue to empty
+ mmctx_fini_wait:
+ iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
+ and $r11 0x1f
+ cmpu b32 $r11 0x10
+ bra ne #mmctx_fini_wait
+ mov $r10 2 // DONE_MMCTX
+ call #wait_donez
+ bra #mmctx_done
+ mmctx_stop:
+ xbit $r11 $r10 0
+ shl b32 $r11 16 // DIR
+ bset $r11 12 // QLIMIT = 0x10
+ bset $r11 18 // STOP_TRIGGER
+ iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
+ mmctx_stop_wait:
+ // wait for STOP_TRIGGER to clear
+ iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
+ xbit $r11 $r11 18
+ bra ne #mmctx_stop_wait
+ mmctx_done:
+ trace_clr(T_MMCTX)
+ ret
+
+// Wait for DONE_STRAND
+//
+strand_wait:
+ push $r10
+ mov $r10 2
+ call #wait_donez
+ pop $r10
+ ret
+
+// unknown - call before issuing strand commands
+//
+strand_pre:
+ mov $r8 0x4afc
+ sethi $r8 0x20000
+ mov $r9 0xc
+ iowr I[$r8] $r9
+ call #strand_wait
+ ret
+
+// unknown - call after issuing strand commands
+//
+strand_post:
+ mov $r8 0x4afc
+ sethi $r8 0x20000
+ mov $r9 0xd
+ iowr I[$r8] $r9
+ call #strand_wait
+ ret
+
+// Selects strand set?!
+//
+// In: $r14 id
+//
+strand_set:
+ mov $r10 0x4ffc
+ sethi $r10 0x20000
+ sub b32 $r11 $r10 0x500
+ mov $r12 0xf
+ iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf
+ mov $r12 0xb
+ iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb
+ call #strand_wait
+ iowr I[$r10 + 0x000] $r14 // 0x93c = <id>
+ mov $r12 0xa
+ iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa
+ call #strand_wait
+ ret
+
+// Initialise strand context data
+//
+// In : $r15 context base
+// Out: $r15 context size (in bytes)
+//
+// Strandset(?) 3 hardcoded currently
+//
+strand_ctx_init:
+ trace_set(T_STRINIT)
+ call #strand_pre
+ mov $r14 3
+ call #strand_set
+ mov $r10 0x46fc
+ sethi $r10 0x20000
+ add b32 $r11 $r10 0x400
+ iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0
+ mov $r12 1
+ iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE
+ call #strand_wait
+ sub b32 $r12 $r0 1
+ iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff
+ mov $r12 2
+ iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT
+ call #strand_wait
+ call #strand_post
+
+ // read the size of each strand, poke the context offset of
+ // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
+ // about it later then.
+ mov $r8 0x880
+ shl b32 $r8 6
+ iord $r9 I[$r8 + 0x000] // STRANDS
+ add b32 $r8 0x2200
+ shr b32 $r14 $r15 8
+ ctx_init_strand_loop:
+ iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE
+ iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE
+ iord $r10 I[$r8 + 0x200] // STRAND_SIZE
+ shr b32 $r10 6
+ add b32 $r10 1
+ add b32 $r14 $r10
+ add b32 $r8 4
+ sub b32 $r9 1
+ bra ne #ctx_init_strand_loop
+
+ shl b32 $r14 8
+ sub b32 $r15 $r14 $r15
+ trace_clr(T_STRINIT)
+ ret
+')
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nve0.fuc
new file mode 100644
index 0000000..f16a5d5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nve0.fuc
@@ -0,0 +1,400 @@
+/* fuc microcode util functions for nve0 PGRAPH
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)')
+define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))')
+
+ifdef(`include_code', `
+// Error codes
+define(`E_BAD_COMMAND', 0x01)
+define(`E_CMD_OVERFLOW', 0x02)
+
+// Util macros to help with debugging ucode hangs etc
+define(`T_WAIT', 0)
+define(`T_MMCTX', 1)
+define(`T_STRWAIT', 2)
+define(`T_STRINIT', 3)
+define(`T_AUTO', 4)
+define(`T_CHAN', 5)
+define(`T_LOAD', 6)
+define(`T_SAVE', 7)
+define(`T_LCHAN', 8)
+define(`T_LCTXH', 9)
+
+define(`trace_set', `
+ mov $r8 0x83c
+ shl b32 $r8 6
+ clear b32 $r9
+ bset $r9 $1
+ iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
+')
+
+define(`trace_clr', `
+ mov $r8 0x85c
+ shl b32 $r8 6
+ clear b32 $r9
+ bset $r9 $1
+ iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
+')
+
+// queue_put - add request to queue
+//
+// In : $r13 queue pointer
+// $r14 command
+// $r15 data
+//
+queue_put:
+ // make sure we have space..
+ ld b32 $r8 D[$r13 + 0x0] // GET
+ ld b32 $r9 D[$r13 + 0x4] // PUT
+ xor $r8 8
+ cmpu b32 $r8 $r9
+ bra ne #queue_put_next
+ mov $r15 E_CMD_OVERFLOW
+ call #error
+ ret
+
+ // store cmd/data on queue
+ queue_put_next:
+ and $r8 $r9 7
+ shl b32 $r8 3
+ add b32 $r8 $r13
+ add b32 $r8 8
+ st b32 D[$r8 + 0x0] $r14
+ st b32 D[$r8 + 0x4] $r15
+
+ // update PUT
+ add b32 $r9 1
+ and $r9 0xf
+ st b32 D[$r13 + 0x4] $r9
+ ret
+
+// queue_get - fetch request from queue
+//
+// In : $r13 queue pointer
+//
+// Out: $p1 clear on success (data available)
+// $r14 command
+// $r15 data
+//
+queue_get:
+ bset $flags $p1
+ ld b32 $r8 D[$r13 + 0x0] // GET
+ ld b32 $r9 D[$r13 + 0x4] // PUT
+ cmpu b32 $r8 $r9
+ bra e #queue_get_done
+ // fetch first cmd/data pair
+ and $r9 $r8 7
+ shl b32 $r9 3
+ add b32 $r9 $r13
+ add b32 $r9 8
+ ld b32 $r14 D[$r9 + 0x0]
+ ld b32 $r15 D[$r9 + 0x4]
+
+ // update GET
+ add b32 $r8 1
+ and $r8 0xf
+ st b32 D[$r13 + 0x0] $r8
+ bclr $flags $p1
+queue_get_done:
+ ret
+
+// nv_rd32 - read 32-bit value from nv register
+//
+// In : $r14 register
+// Out: $r15 value
+//
+nv_rd32:
+ mov $r11 0x728
+ shl b32 $r11 6
+ mov b32 $r12 $r14
+ bset $r12 31 // MMIO_CTRL_PENDING
+ iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
+ nv_rd32_wait:
+ iord $r12 I[$r11 + 0x000]
+ xbit $r12 $r12 31
+ bra ne #nv_rd32_wait
+ mov $r10 6 // DONE_MMIO_RD
+ call #wait_doneo
+ iord $r15 I[$r11 + 0x100] // MMIO_RDVAL
+ ret
+
+// nv_wr32 - write 32-bit value to nv register
+//
+// In : $r14 register
+// $r15 value
+//
+nv_wr32:
+ mov $r11 0x728
+ shl b32 $r11 6
+ iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL
+ mov b32 $r12 $r14
+ bset $r12 31 // MMIO_CTRL_PENDING
+ bset $r12 30 // MMIO_CTRL_WRITE
+ iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
+ nv_wr32_wait:
+ iord $r12 I[$r11 + 0x000]
+ xbit $r12 $r12 31
+ bra ne #nv_wr32_wait
+ ret
+
+// (re)set watchdog timer
+//
+// In : $r15 timeout
+//
+watchdog_reset:
+ mov $r8 0x430
+ shl b32 $r8 6
+ bset $r15 31
+ iowr I[$r8 + 0x000] $r15
+ ret
+
+// clear watchdog timer
+watchdog_clear:
+ mov $r8 0x430
+ shl b32 $r8 6
+ iowr I[$r8 + 0x000] $r0
+ ret
+
+// wait_done{z,o} - wait on FUC_DONE bit to become clear/set
+//
+// In : $r10 bit to wait on
+//
+define(`wait_done', `
+$1:
+ trace_set(T_WAIT);
+ mov $r8 0x818
+ shl b32 $r8 6
+ iowr I[$r8 + 0x000] $r10 // CC_SCRATCH[6] = wait bit
+ wait_done_$1:
+ mov $r8 0x400
+ shl b32 $r8 6
+ iord $r8 I[$r8 + 0x000] // DONE
+ xbit $r8 $r8 $r10
+ bra $2 #wait_done_$1
+ trace_clr(T_WAIT)
+ ret
+')
+wait_done(wait_donez, ne)
+wait_done(wait_doneo, e)
+
+// mmctx_size - determine size of a mmio list transfer
+//
+// In : $r14 mmio list head
+// $r15 mmio list tail
+// Out: $r15 transfer size (in bytes)
+//
+mmctx_size:
+ clear b32 $r9
+ nv_mmctx_size_loop:
+ ld b32 $r8 D[$r14]
+ shr b32 $r8 26
+ add b32 $r8 1
+ shl b32 $r8 2
+ add b32 $r9 $r8
+ add b32 $r14 4
+ cmpu b32 $r14 $r15
+ bra ne #nv_mmctx_size_loop
+ mov b32 $r15 $r9
+ ret
+
+// mmctx_xfer - execute a list of mmio transfers
+//
+// In : $r10 flags
+// bit 0: direction (0 = save, 1 = load)
+// bit 1: set if first transfer
+// bit 2: set if last transfer
+// $r11 base
+// $r12 mmio list head
+// $r13 mmio list tail
+// $r14 multi_stride
+// $r15 multi_mask
+//
+mmctx_xfer:
+ trace_set(T_MMCTX)
+ mov $r8 0x710
+ shl b32 $r8 6
+ clear b32 $r9
+ or $r11 $r11
+ bra e #mmctx_base_disabled
+ iowr I[$r8 + 0x000] $r11 // MMCTX_BASE
+ bset $r9 0 // BASE_EN
+ mmctx_base_disabled:
+ or $r14 $r14
+ bra e #mmctx_multi_disabled
+ iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE
+ iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK
+ bset $r9 1 // MULTI_EN
+ mmctx_multi_disabled:
+ add b32 $r8 0x100
+
+ xbit $r11 $r10 0
+ shl b32 $r11 16 // DIR
+ bset $r11 12 // QLIMIT = 0x10
+ xbit $r14 $r10 1
+ shl b32 $r14 17
+ or $r11 $r14 // START_TRIGGER
+ iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
+
+ // loop over the mmio list, and send requests to the hw
+ mmctx_exec_loop:
+ // wait for space in mmctx queue
+ mmctx_wait_free:
+ iord $r14 I[$r8 + 0x000] // MMCTX_CTRL
+ and $r14 0x1f
+ bra e #mmctx_wait_free
+
+ // queue up an entry
+ ld b32 $r14 D[$r12]
+ or $r14 $r9
+ iowr I[$r8 + 0x300] $r14
+ add b32 $r12 4
+ cmpu b32 $r12 $r13
+ bra ne #mmctx_exec_loop
+
+ xbit $r11 $r10 2
+ bra ne #mmctx_stop
+ // wait for queue to empty
+ mmctx_fini_wait:
+ iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
+ and $r11 0x1f
+ cmpu b32 $r11 0x10
+ bra ne #mmctx_fini_wait
+ mov $r10 2 // DONE_MMCTX
+ call #wait_donez
+ bra #mmctx_done
+ mmctx_stop:
+ xbit $r11 $r10 0
+ shl b32 $r11 16 // DIR
+ bset $r11 12 // QLIMIT = 0x10
+ bset $r11 18 // STOP_TRIGGER
+ iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
+ mmctx_stop_wait:
+ // wait for STOP_TRIGGER to clear
+ iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
+ xbit $r11 $r11 18
+ bra ne #mmctx_stop_wait
+ mmctx_done:
+ trace_clr(T_MMCTX)
+ ret
+
+// Wait for DONE_STRAND
+//
+strand_wait:
+ push $r10
+ mov $r10 2
+ call #wait_donez
+ pop $r10
+ ret
+
+// unknown - call before issuing strand commands
+//
+strand_pre:
+ mov $r8 0x4afc
+ sethi $r8 0x20000
+ mov $r9 0xc
+ iowr I[$r8] $r9
+ call #strand_wait
+ ret
+
+// unknown - call after issuing strand commands
+//
+strand_post:
+ mov $r8 0x4afc
+ sethi $r8 0x20000
+ mov $r9 0xd
+ iowr I[$r8] $r9
+ call #strand_wait
+ ret
+
+// Selects strand set?!
+//
+// In: $r14 id
+//
+strand_set:
+ mov $r10 0x4ffc
+ sethi $r10 0x20000
+ sub b32 $r11 $r10 0x500
+ mov $r12 0xf
+ iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf
+ mov $r12 0xb
+ iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb
+ call #strand_wait
+ iowr I[$r10 + 0x000] $r14 // 0x93c = <id>
+ mov $r12 0xa
+ iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa
+ call #strand_wait
+ ret
+
+// Initialise strand context data
+//
+// In : $r15 context base
+// Out: $r15 context size (in bytes)
+//
+// Strandset(?) 3 hardcoded currently
+//
+strand_ctx_init:
+ trace_set(T_STRINIT)
+ call #strand_pre
+ mov $r14 3
+ call #strand_set
+ mov $r10 0x46fc
+ sethi $r10 0x20000
+ add b32 $r11 $r10 0x400
+ iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0
+ mov $r12 1
+ iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE
+ call #strand_wait
+ sub b32 $r12 $r0 1
+ iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff
+ mov $r12 2
+ iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT
+ call #strand_wait
+ call #strand_post
+
+ // read the size of each strand, poke the context offset of
+ // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
+ // about it later then.
+ mov $r8 0x880
+ shl b32 $r8 6
+ iord $r9 I[$r8 + 0x000] // STRANDS
+ add b32 $r8 0x2200
+ shr b32 $r14 $r15 8
+ ctx_init_strand_loop:
+ iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE
+ iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE
+ iord $r10 I[$r8 + 0x200] // STRAND_SIZE
+ shr b32 $r10 6
+ add b32 $r10 1
+ add b32 $r14 $r10
+ add b32 $r8 4
+ sub b32 $r9 1
+ bra ne #ctx_init_strand_loop
+
+ shl b32 $r14 8
+ sub b32 $r15 $r14 $r15
+ trace_clr(T_STRINIT)
+ ret
+')
OpenPOWER on IntegriCloud