summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--Documentation/networking/dccp.txt1
-rw-r--r--Documentation/powerpc/booting-without-of.txt4
-rw-r--r--Documentation/powerpc/dts-bindings/4xx/cpm.txt52
-rw-r--r--MAINTAINERS5
-rw-r--r--arch/arm/mach-dove/common.c4
-rw-r--r--arch/arm/mach-tegra/include/mach/sdhci.h29
-rw-r--r--arch/microblaze/Kconfig.debug4
-rw-r--r--arch/microblaze/Makefile2
-rw-r--r--arch/microblaze/configs/mmu_defconfig1
-rw-r--r--arch/microblaze/include/asm/pvr.h185
-rw-r--r--arch/microblaze/kernel/cpu/cpuinfo.c1
-rw-r--r--arch/microblaze/kernel/entry.S46
-rw-r--r--arch/microblaze/kernel/exceptions.c3
-rw-r--r--arch/microblaze/kernel/hw_exception_handler.S9
-rw-r--r--arch/microblaze/kernel/prom.c4
-rw-r--r--arch/microblaze/kernel/vmlinux.lds.S16
-rw-r--r--arch/microblaze/lib/memmove.c2
-rw-r--r--arch/microblaze/lib/muldi3.S121
-rw-r--r--arch/microblaze/lib/muldi3.c60
-rw-r--r--arch/powerpc/Kconfig16
-rw-r--r--arch/powerpc/boot/dts/canyonlands.dts31
-rw-r--r--arch/powerpc/boot/dts/kilauea.dts9
-rw-r--r--arch/powerpc/boot/dts/mpc8308_p1m.dts8
-rw-r--r--arch/powerpc/boot/dts/mpc8308rdb.dts8
-rw-r--r--arch/powerpc/configs/40x/kilauea_defconfig5
-rw-r--r--arch/powerpc/configs/44x/canyonlands_defconfig3
-rw-r--r--arch/powerpc/include/asm/bitops.h9
-rw-r--r--arch/powerpc/include/asm/cputable.h9
-rw-r--r--arch/powerpc/include/asm/cputhreads.h15
-rw-r--r--arch/powerpc/include/asm/device.h6
-rw-r--r--arch/powerpc/include/asm/firmware.h3
-rw-r--r--arch/powerpc/include/asm/hvcall.h4
-rw-r--r--arch/powerpc/include/asm/lppaca.h5
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/mmzone.h5
-rw-r--r--arch/powerpc/include/asm/nvram.h52
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h8
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/include/asm/topology.h15
-rw-r--r--arch/powerpc/include/asm/vdso_datapage.h2
-rw-r--r--arch/powerpc/kernel/Makefile9
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/cputable.c22
-rw-r--r--arch/powerpc/kernel/crash_dump.c33
-rw-r--r--arch/powerpc/kernel/dma-iommu.c2
-rw-r--r--arch/powerpc/kernel/entry_32.S1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S1
-rw-r--r--arch/powerpc/kernel/fpu.S1
-rw-r--r--arch/powerpc/kernel/head_40x.S1
-rw-r--r--arch/powerpc/kernel/head_44x.S1
-rw-r--r--arch/powerpc/kernel/head_64.S7
-rw-r--r--arch/powerpc/kernel/head_8xx.S1
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S1
-rw-r--r--arch/powerpc/kernel/iommu.c14
-rw-r--r--arch/powerpc/kernel/misc.S5
-rw-r--r--arch/powerpc/kernel/misc_32.S1
-rw-r--r--arch/powerpc/kernel/misc_64.S1
-rw-r--r--arch/powerpc/kernel/nvram_64.c481
-rw-r--r--arch/powerpc/kernel/pci_64.c3
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c7
-rw-r--r--arch/powerpc/kernel/ppc_save_regs.S1
-rw-r--r--arch/powerpc/kernel/ptrace.c22
-rw-r--r--arch/powerpc/kernel/ptrace32.c7
-rw-r--r--arch/powerpc/kernel/rtas.c3
-rw-r--r--arch/powerpc/kernel/setup_64.c4
-rw-r--r--arch/powerpc/kernel/smp.c19
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/kernel/vector.S1
-rw-r--r--arch/powerpc/kernel/vio.c15
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/hweight_64.S110
-rw-r--r--arch/powerpc/mm/hash_utils_64.c2
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c12
-rw-r--r--arch/powerpc/mm/numa.c311
-rw-r--r--arch/powerpc/mm/pgtable_32.c3
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--arch/powerpc/platforms/44x/Makefile5
-rw-r--r--arch/powerpc/platforms/Kconfig7
-rw-r--r--arch/powerpc/platforms/cell/beat_iommu.c3
-rw-r--r--arch/powerpc/platforms/cell/spufs/lscsa_alloc.c3
-rw-r--r--arch/powerpc/platforms/chrp/time.c4
-rw-r--r--arch/powerpc/platforms/iseries/mf.c62
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c19
-rw-r--r--arch/powerpc/platforms/powermac/setup.c9
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig10
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c1
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S1
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c49
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c12
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c205
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c326
-rw-r--r--arch/powerpc/sysdev/Makefile1
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c9
-rw-r--r--arch/powerpc/sysdev/mpc8xxx_gpio.c75
-rw-r--r--arch/powerpc/sysdev/ppc4xx_cpm.c346
-rw-r--r--arch/powerpc/sysdev/tsi108_dev.c8
-rw-r--r--arch/sparc/kernel/cpu.c2
-rw-r--r--arch/sparc/kernel/pcr.c2
-rw-r--r--arch/x86/include/asm/acpi.h11
-rw-r--r--arch/x86/include/asm/amd_nb.h13
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/gpio.h5
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mach_traps.h12
-rw-r--r--arch/x86/include/asm/nmi.h20
-rw-r--r--arch/x86/include/asm/numa_64.h2
-rw-r--r--arch/x86/include/asm/perf_event_p4.h3
-rw-r--r--arch/x86/kernel/amd_nb.c7
-rw-r--r--arch/x86/kernel/aperture_64.c44
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c3
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c28
-rw-r--r--arch/x86/kernel/dumpstack.c6
-rw-r--r--arch/x86/kernel/entry_64.S36
-rw-r--r--arch/x86/kernel/kgdb.c7
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/traps.c102
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/mm/amdtopology_64.c86
-rw-r--r--arch/x86/mm/numa_64.c157
-rw-r--r--arch/x86/mm/srat_64.c26
-rw-r--r--arch/x86/oprofile/nmi_int.c3
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c2
-rw-r--r--arch/x86/pci/amd_bus.c33
-rw-r--r--drivers/atm/ambassador.c19
-rw-r--r--drivers/char/hvc_vio.c2
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c2
-rw-r--r--drivers/dma/Kconfig2
-rw-r--r--drivers/dma/mpc512x_dma.c187
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c56
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c32
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c5
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c9
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c4
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_cq.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c155
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c373
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c80
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c45
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c32
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c24
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c57
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h11
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h12
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c51
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c62
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c392
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h46
-rw-r--r--drivers/macintosh/macio_asic.c7
-rw-r--r--drivers/macintosh/therm_pm72.c30
-rw-r--r--drivers/mfd/sh_mobile_sdhi.c6
-rw-r--r--drivers/mmc/card/Kconfig1
-rw-r--r--drivers/mmc/core/Kconfig11
-rw-r--r--drivers/mmc/core/bus.c8
-rw-r--r--drivers/mmc/core/core.c206
-rw-r--r--drivers/mmc/core/core.h9
-rw-r--r--drivers/mmc/core/debugfs.c5
-rw-r--r--drivers/mmc/core/host.c206
-rw-r--r--drivers/mmc/core/host.h21
-rw-r--r--drivers/mmc/core/mmc.c91
-rw-r--r--drivers/mmc/core/mmc_ops.c101
-rw-r--r--drivers/mmc/core/mmc_ops.h1
-rw-r--r--drivers/mmc/core/sd.c16
-rw-r--r--drivers/mmc/core/sdio.c36
-rw-r--r--drivers/mmc/core/sdio_bus.c32
-rw-r--r--drivers/mmc/host/Kconfig37
-rw-r--r--drivers/mmc/host/Makefile3
-rw-r--r--drivers/mmc/host/davinci_mmc.c80
-rw-r--r--drivers/mmc/host/dw_mmc.c1796
-rw-r--r--drivers/mmc/host/dw_mmc.h168
-rw-r--r--drivers/mmc/host/mxcmmc.c53
-rw-r--r--drivers/mmc/host/sdhci-dove.c70
-rw-r--r--drivers/mmc/host/sdhci-pci.c161
-rw-r--r--drivers/mmc/host/sdhci-pltfm.c6
-rw-r--r--drivers/mmc/host/sdhci-pltfm.h2
-rw-r--r--drivers/mmc/host/sdhci-s3c.c66
-rw-r--r--drivers/mmc/host/sdhci-tegra.c257
-rw-r--r--drivers/mmc/host/sdhci.c45
-rw-r--r--drivers/mmc/host/sdhci.h3
-rw-r--r--drivers/mmc/host/tmio_mmc.c561
-rw-r--r--drivers/mmc/host/tmio_mmc.h228
-rw-r--r--drivers/net/Kconfig9
-rw-r--r--drivers/net/bfin_mac.c74
-rw-r--r--drivers/net/bfin_mac.h11
-rw-r--r--drivers/net/bnx2x/bnx2x.h1
-rw-r--r--drivers/net/bnx2x/bnx2x_dump.h988
-rw-r--r--drivers/net/bnx2x/bnx2x_ethtool.c22
-rw-r--r--drivers/net/bnx2x/bnx2x_init.h220
-rw-r--r--drivers/net/bnx2x/bnx2x_main.c70
-rw-r--r--drivers/net/bnx2x/bnx2x_reg.h74
-rw-r--r--drivers/net/bnx2x/bnx2x_stats.c5
-rw-r--r--drivers/net/cxgb4vf/cxgb4vf_main.c15
-rw-r--r--drivers/net/cxgb4vf/t4vf_hw.c11
-rw-r--r--drivers/net/e1000/e1000_hw.c328
-rw-r--r--drivers/net/e1000/e1000_hw.h59
-rw-r--r--drivers/net/e1000/e1000_main.c35
-rw-r--r--drivers/net/e1000/e1000_osdep.h19
-rw-r--r--drivers/net/e1000e/82571.c77
-rw-r--r--drivers/net/e1000e/e1000.h3
-rw-r--r--drivers/net/e1000e/es2lan.c4
-rw-r--r--drivers/net/e1000e/ethtool.c54
-rw-r--r--drivers/net/e1000e/hw.h1
-rw-r--r--drivers/net/e1000e/ich8lan.c77
-rw-r--r--drivers/net/e1000e/lib.c3
-rw-r--r--drivers/net/e1000e/netdev.c53
-rw-r--r--drivers/net/e1000e/phy.c40
-rw-r--r--drivers/net/ehea/ehea.h2
-rw-r--r--drivers/net/ehea/ehea_main.c6
-rw-r--r--drivers/net/fec.c248
-rw-r--r--drivers/net/fec.h5
-rw-r--r--drivers/net/forcedeth.c34
-rw-r--r--drivers/net/hamradio/yam.c4
-rw-r--r--drivers/net/ixgbe/ixgbe.h21
-rw-r--r--drivers/net/ixgbe/ixgbe_82599.c749
-rw-r--r--drivers/net/ixgbe/ixgbe_ethtool.c142
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c169
-rw-r--r--drivers/net/ixgbe/ixgbe_type.h91
-rw-r--r--drivers/net/mlx4/alloc.c3
-rw-r--r--drivers/net/mlx4/en_netdev.c3
-rw-r--r--drivers/net/mlx4/fw.c4
-rw-r--r--drivers/net/pcmcia/pcnet_cs.c1
-rw-r--r--drivers/net/ppp_async.c10
-rw-r--r--drivers/net/ppp_deflate.c9
-rw-r--r--drivers/net/ppp_generic.c9
-rw-r--r--drivers/net/ppp_mppe.c7
-rw-r--r--drivers/net/ppp_synctty.c3
-rw-r--r--drivers/net/qlcnic/qlcnic.h24
-rw-r--r--drivers/net/qlcnic/qlcnic_ethtool.c2
-rw-r--r--drivers/net/qlcnic/qlcnic_init.c63
-rw-r--r--drivers/net/qlcnic/qlcnic_main.c10
-rw-r--r--drivers/net/r8169.c143
-rw-r--r--drivers/net/sky2.c143
-rw-r--r--drivers/net/sky2.h6
-rw-r--r--drivers/net/xen-netfront.c2
-rw-r--r--drivers/ps3/Makefile2
-rw-r--r--drivers/rtc/class.c13
-rw-r--r--drivers/rtc/interface.c574
-rw-r--r--drivers/rtc/rtc-cmos.c3
-rw-r--r--drivers/rtc/rtc-dev.c104
-rw-r--r--drivers/rtc/rtc-lib.c28
-rw-r--r--drivers/watchdog/hpwdt.c2
-rw-r--r--fs/9p/Kconfig5
-rw-r--r--fs/9p/Makefile1
-rw-r--r--fs/9p/acl.c4
-rw-r--r--fs/9p/v9fs.h42
-rw-r--r--fs/9p/vfs_inode.c871
-rw-r--r--fs/9p/vfs_inode_dotl.c824
-rw-r--r--fs/9p/xattr.c2
-rw-r--r--fs/ext2/dir.c19
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext2/super.c25
-rw-r--r--fs/ext2/xattr.c10
-rw-r--r--fs/ext3/balloc.c266
-rw-r--r--fs/ext3/dir.c15
-rw-r--r--fs/ext3/inode.c6
-rw-r--r--fs/ext3/ioctl.c22
-rw-r--r--fs/ext3/namei.c138
-rw-r--r--fs/ext3/resize.c65
-rw-r--r--fs/ext3/super.c64
-rw-r--r--fs/ext3/xattr.c2
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/dir.c56
-rw-r--r--fs/ext4/ext4.h93
-rw-r--r--fs/ext4/ext4_extents.h8
-rw-r--r--fs/ext4/ext4_jbd2.h2
-rw-r--r--fs/ext4/extents.c88
-rw-r--r--fs/ext4/file.c22
-rw-r--r--fs/ext4/fsync.c4
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c74
-rw-r--r--fs/ext4/mballoc.c55
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/namei.c69
-rw-r--r--fs/ext4/page-io.c7
-rw-r--r--fs/ext4/resize.c64
-rw-r--r--fs/ext4/super.c288
-rw-r--r--fs/ext4/xattr.c28
-rw-r--r--fs/jbd2/journal.c34
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/transaction.c6
-rw-r--r--fs/lockd/Makefile6
-rw-r--r--fs/lockd/clnt4xdr.c605
-rw-r--r--fs/lockd/clntlock.c4
-rw-r--r--fs/lockd/clntproc.c18
-rw-r--r--fs/lockd/clntxdr.c627
-rw-r--r--fs/lockd/host.c409
-rw-r--r--fs/lockd/mon.c110
-rw-r--r--fs/lockd/svc4proc.c20
-rw-r--r--fs/lockd/svclock.c34
-rw-r--r--fs/lockd/svcproc.c28
-rw-r--r--fs/lockd/xdr.c287
-rw-r--r--fs/lockd/xdr4.c255
-rw-r--r--fs/mbcache.c12
-rw-r--r--fs/nfs/callback.c83
-rw-r--r--fs/nfs/callback.h59
-rw-r--r--fs/nfs/callback_proc.c326
-rw-r--r--fs/nfs/callback_xdr.c143
-rw-r--r--fs/nfs/client.c302
-rw-r--r--fs/nfs/delegation.c362
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c72
-rw-r--r--fs/nfs/idmap.c2
-rw-r--r--fs/nfs/inode.c3
-rw-r--r--fs/nfs/internal.h19
-rw-r--r--fs/nfs/mount_clnt.c83
-rw-r--r--fs/nfs/nfs2xdr.c1294
-rw-r--r--fs/nfs/nfs3xdr.c2817
-rw-r--r--fs/nfs/nfs4_fs.h13
-rw-r--r--fs/nfs/nfs4filelayout.c6
-rw-r--r--fs/nfs/nfs4proc.c188
-rw-r--r--fs/nfs/nfs4renewd.c11
-rw-r--r--fs/nfs/nfs4state.c293
-rw-r--r--fs/nfs/nfs4xdr.c1426
-rw-r--r--fs/nfs/pagelist.c7
-rw-r--r--fs/nfs/pnfs.c524
-rw-r--r--fs/nfs/pnfs.h76
-rw-r--r--fs/nfs/proc.c5
-rw-r--r--fs/nfs/super.c18
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfsd/nfs4callback.c690
-rw-r--r--fs/ocfs2/Kconfig2
-rw-r--r--fs/ocfs2/alloc.c77
-rw-r--r--fs/ocfs2/alloc.h4
-rw-r--r--fs/ocfs2/aops.c59
-rw-r--r--fs/ocfs2/cluster/heartbeat.c246
-rw-r--r--fs/ocfs2/cluster/netdebug.c286
-rw-r--r--fs/ocfs2/cluster/tcp.c145
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h33
-rw-r--r--fs/ocfs2/dlm/dlmast.c76
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h86
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c200
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h5
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c10
-rw-r--r--fs/ocfs2/dlm/dlmlock.c3
-rw-r--r--fs/ocfs2/dlm/dlmthread.c132
-rw-r--r--fs/ocfs2/namei.c5
-rw-r--r--fs/ocfs2/ocfs2.h5
-rw-r--r--fs/quota/dquot.c18
-rw-r--r--fs/quota/quota_tree.c9
-rw-r--r--fs/udf/Kconfig1
-rw-r--r--fs/udf/balloc.c3
-rw-r--r--fs/udf/dir.c5
-rw-r--r--fs/udf/file.c11
-rw-r--r--fs/udf/ialloc.c21
-rw-r--r--fs/udf/inode.c51
-rw-r--r--fs/udf/namei.c107
-rw-r--r--fs/udf/partition.c27
-rw-r--r--fs/udf/super.c67
-rw-r--r--fs/udf/symlink.c12
-rw-r--r--fs/udf/udf_i.h13
-rw-r--r--fs/udf/udf_sb.h22
-rw-r--r--fs/udf/udfdecl.h4
-rw-r--r--fs/xfs/linux-2.6/sv.h59
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c425
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h16
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c235
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h22
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c22
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c92
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h59
-rw-r--r--fs/xfs/quota/xfs_dquot.c1
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_alloc.c351
-rw-r--r--fs/xfs/xfs_attr_leaf.c4
-rw-r--r--fs/xfs/xfs_btree.c9
-rw-r--r--fs/xfs/xfs_buf_item.c32
-rw-r--r--fs/xfs/xfs_buf_item.h11
-rw-r--r--fs/xfs/xfs_extfree_item.c97
-rw-r--r--fs/xfs/xfs_extfree_item.h11
-rw-r--r--fs/xfs/xfs_fsops.c1
-rw-r--r--fs/xfs/xfs_iget.c79
-rw-r--r--fs/xfs/xfs_inode.c54
-rw-r--r--fs/xfs/xfs_inode.h15
-rw-r--r--fs/xfs/xfs_inode_item.c90
-rw-r--r--fs/xfs/xfs_iomap.c233
-rw-r--r--fs/xfs/xfs_iomap.h27
-rw-r--r--fs/xfs/xfs_log.c739
-rw-r--r--fs/xfs/xfs_log_cil.c17
-rw-r--r--fs/xfs/xfs_log_priv.h127
-rw-r--r--fs/xfs/xfs_log_recover.c620
-rw-r--r--fs/xfs/xfs_mount.c23
-rw-r--r--fs/xfs/xfs_mount.h14
-rw-r--r--fs/xfs/xfs_trans.c79
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c232
-rw-r--r--fs/xfs/xfs_trans_extfree.c8
-rw-r--r--fs/xfs/xfs_trans_priv.h35
-rw-r--r--fs/xfs/xfs_vnodeops.c61
-rw-r--r--include/linux/bfin_mac.h1
-rw-r--r--include/linux/dynamic_debug.h18
-rw-r--r--include/linux/etherdevice.h4
-rw-r--r--include/linux/ext3_fs.h10
-rw-r--r--include/linux/fec.h3
-rw-r--r--include/linux/if_bridge.h2
-rw-r--r--include/linux/jbd2.h20
-rw-r--r--include/linux/lockd/debug.h10
-rw-r--r--include/linux/lockd/lockd.h6
-rw-r--r--include/linux/mbcache.h11
-rw-r--r--include/linux/mfd/tmio.h5
-rw-r--r--include/linux/mmc/dw_mmc.h217
-rw-r--r--include/linux/mmc/host.h19
-rw-r--r--include/linux/mmc/mmc.h2
-rw-r--r--include/linux/mmc/sdhci.h6
-rw-r--r--include/linux/netdevice.h24
-rw-r--r--include/linux/netfilter/x_tables.h10
-rw-r--r--include/linux/nfs3.h3
-rw-r--r--include/linux/nfs4.h8
-rw-r--r--include/linux/nfs_fs_sb.h15
-rw-r--r--include/linux/nfs_xdr.h6
-rw-r--r--include/linux/pci_ids.h8
-rw-r--r--include/linux/quotaops.h5
-rw-r--r--include/linux/rtc.h51
-rw-r--r--include/linux/sunrpc/auth.h8
-rw-r--r--include/linux/sunrpc/bc_xprt.h15
-rw-r--r--include/linux/sunrpc/clnt.h4
-rw-r--r--include/linux/sunrpc/svc.h2
-rw-r--r--include/linux/sunrpc/svc_xprt.h1
-rw-r--r--include/linux/sunrpc/xdr.h14
-rw-r--r--include/linux/tracepoint.h4
-rw-r--r--include/net/ah.h2
-rw-r--r--include/net/arp.h1
-rw-r--r--include/net/phonet/phonet.h4
-rw-r--r--include/net/sch_generic.h20
-rw-r--r--include/net/sock.h4
-rw-r--r--include/trace/define_trace.h10
-rw-r--r--include/trace/events/skb.h4
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/perf_event.c82
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--lib/dynamic_debug.c9
-rw-r--r--net/9p/protocol.c22
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/caif/chnl_net.c18
-rw-r--r--net/core/dev.c149
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/dccp/dccp.h3
-rw-r--r--net/dccp/input.c2
-rw-r--r--net/dccp/sysctl.c4
-rw-r--r--net/ethernet/eth.c12
-rw-r--r--net/ipv4/ah4.c7
-rw-r--r--net/ipv4/arp.c29
-rw-r--r--net/ipv4/inet_connection_sock.c5
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c45
-rw-r--r--net/ipv4/netfilter/ip_tables.c45
-rw-r--r--net/ipv6/ah6.c8
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c45
-rw-r--r--net/netfilter/nf_conntrack_netlink.c18
-rw-r--r--net/netfilter/x_tables.c3
-rw-r--r--net/netlink/genetlink.c2
-rw-r--r--net/phonet/af_phonet.c6
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_ipt.c3
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/sched/act_nat.c3
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_police.c3
-rw-r--r--net/sched/act_simple.c3
-rw-r--r--net/sched/act_skbedit.c3
-rw-r--r--net/sched/sch_atm.c6
-rw-r--r--net/sched/sch_cbq.c6
-rw-r--r--net/sched/sch_drr.c8
-rw-r--r--net/sched/sch_dsmark.c3
-rw-r--r--net/sched/sch_hfsc.c6
-rw-r--r--net/sched/sch_htb.c17
-rw-r--r--net/sched/sch_ingress.c3
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c6
-rw-r--r--net/sched/sch_prio.c3
-rw-r--r--net/sched/sch_red.c3
-rw-r--r--net/sched/sch_sfq.c3
-rw-r--r--net/sched/sch_tbf.c3
-rw-r--r--net/sched/sch_teql.c3
-rw-r--r--net/sunrpc/auth.c28
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c44
-rw-r--r--net/sunrpc/bc_svc.c2
-rw-r--r--net/sunrpc/clnt.c21
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/rpcb_clnt.c147
-rw-r--r--net/sunrpc/svc.c36
-rw-r--r--net/sunrpc/svcsock.c106
-rw-r--r--net/sunrpc/xdr.c155
-rw-r--r--net/xfrm/xfrm_user.c6
-rw-r--r--tools/perf/Makefile2
-rw-r--r--tools/perf/builtin-record.c3
-rw-r--r--tools/perf/builtin-sched.c5
-rw-r--r--tools/perf/builtin-stat.c5
-rw-r--r--tools/perf/builtin-test.c116
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/util/evsel.c87
-rw-r--r--tools/perf/util/evsel.h2
-rw-r--r--tools/perf/util/parse-events.c74
-rw-r--r--tools/perf/util/session.c2
521 files changed, 24142 insertions, 12710 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f3dc951..ed3708f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -403,6 +403,10 @@ and is between 256 and 4096 characters. It is defined in the file
bttv.pll= See Documentation/video4linux/bttv/Insmod-options
bttv.tuner= and Documentation/video4linux/bttv/CARDLIST
+ bulk_remove=off [PPC] This parameter disables the use of the pSeries
+ firmware feature for flushing multiple hpte entries
+ at a time.
+
c101= [NET] Moxa C101 synchronous serial card
cachesize= [BUGS=X86-32] Override level 2 CPU cache size detection.
@@ -1490,6 +1494,10 @@ and is between 256 and 4096 characters. It is defined in the file
mtdparts= [MTD]
See drivers/mtd/cmdlinepart.c.
+ multitce=off [PPC] This parameter disables the use of the pSeries
+ firmware feature for updating multiple TCE entries
+ at a time.
+
onenand.bdry= [HW,MTD] Flex-OneNAND Boundary Configuration
Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock]
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index b395ca6..811872b 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -167,6 +167,7 @@ rx_ccid = 2
seq_window = 100
The initial sequence window (sec. 7.5.2) of the sender. This influences
the local ackno validity and the remote seqno validity windows (7.5.1).
+ Values in the range Wmin = 32 (RFC 4340, 7.5.2) up to 2^32-1 can be set.
tx_qlen = 5
The size of the transmit buffer in packets. A value of 0 corresponds
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 302db5d..3272ed59 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -131,7 +131,7 @@ order to avoid the degeneration that had become the ppc32 kernel entry
point and the way a new platform should be added to the kernel. The
legacy iSeries platform breaks those rules as it predates this scheme,
but no new board support will be accepted in the main tree that
-doesn't follows them properly. In addition, since the advent of the
+doesn't follow them properly. In addition, since the advent of the
arch/powerpc merged architecture for ppc32 and ppc64, new 32-bit
platforms and 32-bit platforms which move into arch/powerpc will be
required to use these rules as well.
@@ -1025,7 +1025,7 @@ dtc source code can be found at
WARNING: This version is still in early development stage; the
resulting device-tree "blobs" have not yet been validated with the
-kernel. The current generated bloc lacks a useful reserve map (it will
+kernel. The current generated block lacks a useful reserve map (it will
be fixed to generate an empty one, it's up to the bootloader to fill
it up) among others. The error handling needs work, bugs are lurking,
etc...
diff --git a/Documentation/powerpc/dts-bindings/4xx/cpm.txt b/Documentation/powerpc/dts-bindings/4xx/cpm.txt
new file mode 100644
index 0000000..ee45980
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/4xx/cpm.txt
@@ -0,0 +1,52 @@
+PPC4xx Clock Power Management (CPM) node
+
+Required properties:
+ - compatible : compatible list, currently only "ibm,cpm"
+ - dcr-access-method : "native"
+ - dcr-reg : < DCR register range >
+
+Optional properties:
+ - er-offset : All 4xx SoCs with a CPM controller have
+ one of two different order for the CPM
+ registers. Some have the CPM registers
+ in the following order (ER,FR,SR). The
+ others have them in the following order
+ (SR,ER,FR). For the second case set
+ er-offset = <1>.
+ - unused-units : specifier consist of one cell. For each
+ bit in the cell, the corresponding bit
+ in CPM will be set to turn off unused
+ devices.
+ - idle-doze : specifier consist of one cell. For each
+ bit in the cell, the corresponding bit
+ in CPM will be set to turn off unused
+ devices. This is usually just CPM[CPU].
+ - standby : specifier consist of one cell. For each
+ bit in the cell, the corresponding bit
+ in CPM will be set on standby and
+ restored on resume.
+ - suspend : specifier consist of one cell. For each
+ bit in the cell, the corresponding bit
+ in CPM will be set on suspend (mem) and
+ restored on resume. Note, for standby
+ and suspend the corresponding bits can
+ be different or the same. Usually for
+ standby only class 2 and 3 units are set.
+ However, the interface does not care.
+ If they are the same, the additional
+ power saving will be seeing if support
+ is available to put the DDR in self
+ refresh mode and any additional power
+ saving techniques for the specific SoC.
+
+Example:
+ CPM0: cpm {
+ compatible = "ibm,cpm";
+ dcr-access-method = "native";
+ dcr-reg = <0x160 0x003>;
+ er-offset = <0>;
+ unused-units = <0x00000100>;
+ idle-doze = <0x02000000>;
+ standby = <0xfeff0000>;
+ suspend = <0xfeff791d>;
+};
diff --git a/MAINTAINERS b/MAINTAINERS
index bb6c1ac..42f991e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1785,7 +1785,8 @@ S: Maintained
F: drivers/usb/atm/cxacru.c
CONFIGFS
-M: Joel Becker <joel.becker@oracle.com>
+M: Joel Becker <jlbec@evilplan.org>
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/configfs.git
S: Supported
F: fs/configfs/
F: include/linux/configfs.h
@@ -4549,7 +4550,7 @@ F: include/linux/oprofile.h
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
M: Mark Fasheh <mfasheh@suse.com>
-M: Joel Becker <joel.becker@oracle.com>
+M: Joel Becker <jlbec@evilplan.org>
L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
W: http://oss.oracle.com/projects/ocfs2/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2.git
diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index f7a1258..fe627ab 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -770,7 +770,7 @@ static struct resource dove_sdio0_resources[] = {
};
static struct platform_device dove_sdio0 = {
- .name = "sdhci-mv",
+ .name = "sdhci-dove",
.id = 0,
.dev = {
.dma_mask = &sdio_dmamask,
@@ -798,7 +798,7 @@ static struct resource dove_sdio1_resources[] = {
};
static struct platform_device dove_sdio1 = {
- .name = "sdhci-mv",
+ .name = "sdhci-dove",
.id = 1,
.dev = {
.dma_mask = &sdio_dmamask,
diff --git a/arch/arm/mach-tegra/include/mach/sdhci.h b/arch/arm/mach-tegra/include/mach/sdhci.h
new file mode 100644
index 0000000..3ad086e
--- /dev/null
+++ b/arch/arm/mach-tegra/include/mach/sdhci.h
@@ -0,0 +1,29 @@
+/*
+ * include/asm-arm/arch-tegra/include/mach/sdhci.h
+ *
+ * Copyright (C) 2009 Palm, Inc.
+ * Author: Yvonne Yip <y@palm.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ASM_ARM_ARCH_TEGRA_SDHCI_H
+#define __ASM_ARM_ARCH_TEGRA_SDHCI_H
+
+#include <linux/mmc/host.h>
+
+struct tegra_sdhci_platform_data {
+ int cd_gpio;
+ int wp_gpio;
+ int power_gpio;
+ int is_8bit;
+};
+
+#endif
diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug
index e66e25c..012e377 100644
--- a/arch/microblaze/Kconfig.debug
+++ b/arch/microblaze/Kconfig.debug
@@ -23,8 +23,4 @@ config HEART_BEAT
This option turns on/off heart beat kernel functionality.
First GPIO node is taken.
-config DEBUG_BOOTMEM
- depends on DEBUG_KERNEL
- bool "Debug BOOTMEM initialization"
-
endmenu
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 15f1f1d..6f432e6 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -17,7 +17,7 @@ export CPU_VER CPU_MAJOR CPU_MINOR CPU_REV
# The various CONFIG_XILINX cpu features options are integers 0/1/2...
# rather than bools y/n
-# Work out HW multipler support. This is icky.
+# Work out HW multipler support. This is tricky.
# 1. Spartan2 has no HW multiplers.
# 2. MicroBlaze v3.x always uses them, except in Spartan 2
# 3. All other FPGa/CPU ver combos, we can trust the CONFIG_ settings
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 8b422b1..ab8fbe7 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -66,5 +66,4 @@ CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_INFO=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_EARLY_PRINTK=y
-CONFIG_DEBUG_BOOTMEM=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/microblaze/include/asm/pvr.h b/arch/microblaze/include/asm/pvr.h
index 37db96a..a10bec6 100644
--- a/arch/microblaze/include/asm/pvr.h
+++ b/arch/microblaze/include/asm/pvr.h
@@ -1,9 +1,9 @@
/*
* Support for the MicroBlaze PVR (Processor Version Register)
*
- * Copyright (C) 2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2009 - 2011 Michal Simek <monstr@monstr.eu>
* Copyright (C) 2007 John Williams <john.williams@petalogix.com>
- * Copyright (C) 2007 - 2009 PetaLogix
+ * Copyright (C) 2007 - 2011 PetaLogix
*
* This file is subject to the terms and conditions of the GNU General
* Public License. See the file COPYING in the main directory of this
@@ -46,11 +46,11 @@ struct pvr_s {
#define PVR2_I_LMB_MASK 0x10000000
#define PVR2_INTERRUPT_IS_EDGE_MASK 0x08000000
#define PVR2_EDGE_IS_POSITIVE_MASK 0x04000000
-#define PVR2_D_PLB_MASK 0x02000000 /* new */
-#define PVR2_I_PLB_MASK 0x01000000 /* new */
-#define PVR2_INTERCONNECT 0x00800000 /* new */
-#define PVR2_USE_EXTEND_FSL 0x00080000 /* new */
-#define PVR2_USE_FSL_EXC 0x00040000 /* new */
+#define PVR2_D_PLB_MASK 0x02000000 /* new */
+#define PVR2_I_PLB_MASK 0x01000000 /* new */
+#define PVR2_INTERCONNECT 0x00800000 /* new */
+#define PVR2_USE_EXTEND_FSL 0x00080000 /* new */
+#define PVR2_USE_FSL_EXC 0x00040000 /* new */
#define PVR2_USE_MSR_INSTR 0x00020000
#define PVR2_USE_PCMP_INSTR 0x00010000
#define PVR2_AREA_OPTIMISED 0x00008000
@@ -59,7 +59,7 @@ struct pvr_s {
#define PVR2_USE_HW_MUL_MASK 0x00001000
#define PVR2_USE_FPU_MASK 0x00000800
#define PVR2_USE_MUL64_MASK 0x00000400
-#define PVR2_USE_FPU2_MASK 0x00000200 /* new */
+#define PVR2_USE_FPU2_MASK 0x00000200 /* new */
#define PVR2_USE_IPLBEXC 0x00000100
#define PVR2_USE_DPLBEXC 0x00000080
#define PVR2_OPCODE_0x0_ILL_MASK 0x00000040
@@ -122,96 +122,103 @@ struct pvr_s {
/* PVR access macros */
-#define PVR_IS_FULL(pvr) (pvr.pvr[0] & PVR0_PVR_FULL_MASK)
-#define PVR_USE_BARREL(pvr) (pvr.pvr[0] & PVR0_USE_BARREL_MASK)
-#define PVR_USE_DIV(pvr) (pvr.pvr[0] & PVR0_USE_DIV_MASK)
-#define PVR_USE_HW_MUL(pvr) (pvr.pvr[0] & PVR0_USE_HW_MUL_MASK)
-#define PVR_USE_FPU(pvr) (pvr.pvr[0] & PVR0_USE_FPU_MASK)
-#define PVR_USE_FPU2(pvr) (pvr.pvr[2] & PVR2_USE_FPU2_MASK)
-#define PVR_USE_ICACHE(pvr) (pvr.pvr[0] & PVR0_USE_ICACHE_MASK)
-#define PVR_USE_DCACHE(pvr) (pvr.pvr[0] & PVR0_USE_DCACHE_MASK)
-#define PVR_VERSION(pvr) ((pvr.pvr[0] & PVR0_VERSION_MASK) >> 8)
-#define PVR_USER1(pvr) (pvr.pvr[0] & PVR0_USER1_MASK)
-#define PVR_USER2(pvr) (pvr.pvr[1] & PVR1_USER2_MASK)
-
-#define PVR_D_OPB(pvr) (pvr.pvr[2] & PVR2_D_OPB_MASK)
-#define PVR_D_LMB(pvr) (pvr.pvr[2] & PVR2_D_LMB_MASK)
-#define PVR_I_OPB(pvr) (pvr.pvr[2] & PVR2_I_OPB_MASK)
-#define PVR_I_LMB(pvr) (pvr.pvr[2] & PVR2_I_LMB_MASK)
-#define PVR_INTERRUPT_IS_EDGE(pvr) \
- (pvr.pvr[2] & PVR2_INTERRUPT_IS_EDGE_MASK)
-#define PVR_EDGE_IS_POSITIVE(pvr) \
- (pvr.pvr[2] & PVR2_EDGE_IS_POSITIVE_MASK)
-#define PVR_USE_MSR_INSTR(pvr) (pvr.pvr[2] & PVR2_USE_MSR_INSTR)
-#define PVR_USE_PCMP_INSTR(pvr) (pvr.pvr[2] & PVR2_USE_PCMP_INSTR)
-#define PVR_AREA_OPTIMISED(pvr) (pvr.pvr[2] & PVR2_AREA_OPTIMISED)
-#define PVR_USE_MUL64(pvr) (pvr.pvr[2] & PVR2_USE_MUL64_MASK)
-#define PVR_OPCODE_0x0_ILLEGAL(pvr) \
- (pvr.pvr[2] & PVR2_OPCODE_0x0_ILL_MASK)
-#define PVR_UNALIGNED_EXCEPTION(pvr) \
- (pvr.pvr[2] & PVR2_UNALIGNED_EXC_MASK)
-#define PVR_ILL_OPCODE_EXCEPTION(pvr) \
- (pvr.pvr[2] & PVR2_ILL_OPCODE_EXC_MASK)
-#define PVR_IOPB_BUS_EXCEPTION(pvr) \
- (pvr.pvr[2] & PVR2_IOPB_BUS_EXC_MASK)
-#define PVR_DOPB_BUS_EXCEPTION(pvr) \
- (pvr.pvr[2] & PVR2_DOPB_BUS_EXC_MASK)
-#define PVR_DIV_ZERO_EXCEPTION(pvr) \
- (pvr.pvr[2] & PVR2_DIV_ZERO_EXC_MASK)
-#define PVR_FPU_EXCEPTION(pvr) (pvr.pvr[2] & PVR2_FPU_EXC_MASK)
-#define PVR_FSL_EXCEPTION(pvr) (pvr.pvr[2] & PVR2_USE_EXTEND_FSL)
-
-#define PVR_DEBUG_ENABLED(pvr) (pvr.pvr[3] & PVR3_DEBUG_ENABLED_MASK)
-#define PVR_NUMBER_OF_PC_BRK(pvr) \
- ((pvr.pvr[3] & PVR3_NUMBER_OF_PC_BRK_MASK) >> 25)
-#define PVR_NUMBER_OF_RD_ADDR_BRK(pvr) \
- ((pvr.pvr[3] & PVR3_NUMBER_OF_RD_ADDR_BRK_MASK) >> 19)
-#define PVR_NUMBER_OF_WR_ADDR_BRK(pvr) \
- ((pvr.pvr[3] & PVR3_NUMBER_OF_WR_ADDR_BRK_MASK) >> 13)
-#define PVR_FSL_LINKS(pvr) ((pvr.pvr[3] & PVR3_FSL_LINKS_MASK) >> 7)
-
-#define PVR_ICACHE_ADDR_TAG_BITS(pvr) \
- ((pvr.pvr[4] & PVR4_ICACHE_ADDR_TAG_BITS_MASK) >> 26)
-#define PVR_ICACHE_USE_FSL(pvr) (pvr.pvr[4] & PVR4_ICACHE_USE_FSL_MASK)
-#define PVR_ICACHE_ALLOW_WR(pvr) (pvr.pvr[4] & PVR4_ICACHE_ALLOW_WR_MASK)
-#define PVR_ICACHE_LINE_LEN(pvr) \
- (1 << ((pvr.pvr[4] & PVR4_ICACHE_LINE_LEN_MASK) >> 21))
-#define PVR_ICACHE_BYTE_SIZE(pvr) \
- (1 << ((pvr.pvr[4] & PVR4_ICACHE_BYTE_SIZE_MASK) >> 16))
-
-#define PVR_DCACHE_ADDR_TAG_BITS(pvr) \
- ((pvr.pvr[5] & PVR5_DCACHE_ADDR_TAG_BITS_MASK) >> 26)
-#define PVR_DCACHE_USE_FSL(pvr) (pvr.pvr[5] & PVR5_DCACHE_USE_FSL_MASK)
-#define PVR_DCACHE_ALLOW_WR(pvr) (pvr.pvr[5] & PVR5_DCACHE_ALLOW_WR_MASK)
+#define PVR_IS_FULL(_pvr) (_pvr.pvr[0] & PVR0_PVR_FULL_MASK)
+#define PVR_USE_BARREL(_pvr) (_pvr.pvr[0] & PVR0_USE_BARREL_MASK)
+#define PVR_USE_DIV(_pvr) (_pvr.pvr[0] & PVR0_USE_DIV_MASK)
+#define PVR_USE_HW_MUL(_pvr) (_pvr.pvr[0] & PVR0_USE_HW_MUL_MASK)
+#define PVR_USE_FPU(_pvr) (_pvr.pvr[0] & PVR0_USE_FPU_MASK)
+#define PVR_USE_FPU2(_pvr) (_pvr.pvr[2] & PVR2_USE_FPU2_MASK)
+#define PVR_USE_ICACHE(_pvr) (_pvr.pvr[0] & PVR0_USE_ICACHE_MASK)
+#define PVR_USE_DCACHE(_pvr) (_pvr.pvr[0] & PVR0_USE_DCACHE_MASK)
+#define PVR_VERSION(_pvr) ((_pvr.pvr[0] & PVR0_VERSION_MASK) >> 8)
+#define PVR_USER1(_pvr) (_pvr.pvr[0] & PVR0_USER1_MASK)
+#define PVR_USER2(_pvr) (_pvr.pvr[1] & PVR1_USER2_MASK)
+
+#define PVR_D_OPB(_pvr) (_pvr.pvr[2] & PVR2_D_OPB_MASK)
+#define PVR_D_LMB(_pvr) (_pvr.pvr[2] & PVR2_D_LMB_MASK)
+#define PVR_I_OPB(_pvr) (_pvr.pvr[2] & PVR2_I_OPB_MASK)
+#define PVR_I_LMB(_pvr) (_pvr.pvr[2] & PVR2_I_LMB_MASK)
+#define PVR_INTERRUPT_IS_EDGE(_pvr) \
+ (_pvr.pvr[2] & PVR2_INTERRUPT_IS_EDGE_MASK)
+#define PVR_EDGE_IS_POSITIVE(_pvr) \
+ (_pvr.pvr[2] & PVR2_EDGE_IS_POSITIVE_MASK)
+#define PVR_USE_MSR_INSTR(_pvr) (_pvr.pvr[2] & PVR2_USE_MSR_INSTR)
+#define PVR_USE_PCMP_INSTR(_pvr) (_pvr.pvr[2] & PVR2_USE_PCMP_INSTR)
+#define PVR_AREA_OPTIMISED(_pvr) (_pvr.pvr[2] & PVR2_AREA_OPTIMISED)
+#define PVR_USE_MUL64(_pvr) (_pvr.pvr[2] & PVR2_USE_MUL64_MASK)
+#define PVR_OPCODE_0x0_ILLEGAL(_pvr) \
+ (_pvr.pvr[2] & PVR2_OPCODE_0x0_ILL_MASK)
+#define PVR_UNALIGNED_EXCEPTION(_pvr) \
+ (_pvr.pvr[2] & PVR2_UNALIGNED_EXC_MASK)
+#define PVR_ILL_OPCODE_EXCEPTION(_pvr) \
+ (_pvr.pvr[2] & PVR2_ILL_OPCODE_EXC_MASK)
+#define PVR_IOPB_BUS_EXCEPTION(_pvr) \
+ (_pvr.pvr[2] & PVR2_IOPB_BUS_EXC_MASK)
+#define PVR_DOPB_BUS_EXCEPTION(_pvr) \
+ (_pvr.pvr[2] & PVR2_DOPB_BUS_EXC_MASK)
+#define PVR_DIV_ZERO_EXCEPTION(_pvr) \
+ (_pvr.pvr[2] & PVR2_DIV_ZERO_EXC_MASK)
+#define PVR_FPU_EXCEPTION(_pvr) (_pvr.pvr[2] & PVR2_FPU_EXC_MASK)
+#define PVR_FSL_EXCEPTION(_pvr) (_pvr.pvr[2] & PVR2_USE_EXTEND_FSL)
+
+#define PVR_DEBUG_ENABLED(_pvr) (_pvr.pvr[3] & PVR3_DEBUG_ENABLED_MASK)
+#define PVR_NUMBER_OF_PC_BRK(_pvr) \
+ ((_pvr.pvr[3] & PVR3_NUMBER_OF_PC_BRK_MASK) >> 25)
+#define PVR_NUMBER_OF_RD_ADDR_BRK(_pvr) \
+ ((_pvr.pvr[3] & PVR3_NUMBER_OF_RD_ADDR_BRK_MASK) >> 19)
+#define PVR_NUMBER_OF_WR_ADDR_BRK(_pvr) \
+ ((_pvr.pvr[3] & PVR3_NUMBER_OF_WR_ADDR_BRK_MASK) >> 13)
+#define PVR_FSL_LINKS(_pvr) ((_pvr.pvr[3] & PVR3_FSL_LINKS_MASK) >> 7)
+
+#define PVR_ICACHE_ADDR_TAG_BITS(_pvr) \
+ ((_pvr.pvr[4] & PVR4_ICACHE_ADDR_TAG_BITS_MASK) >> 26)
+#define PVR_ICACHE_USE_FSL(_pvr) \
+ (_pvr.pvr[4] & PVR4_ICACHE_USE_FSL_MASK)
+#define PVR_ICACHE_ALLOW_WR(_pvr) \
+ (_pvr.pvr[4] & PVR4_ICACHE_ALLOW_WR_MASK)
+#define PVR_ICACHE_LINE_LEN(_pvr) \
+ (1 << ((_pvr.pvr[4] & PVR4_ICACHE_LINE_LEN_MASK) >> 21))
+#define PVR_ICACHE_BYTE_SIZE(_pvr) \
+ (1 << ((_pvr.pvr[4] & PVR4_ICACHE_BYTE_SIZE_MASK) >> 16))
+
+#define PVR_DCACHE_ADDR_TAG_BITS(_pvr) \
+ ((_pvr.pvr[5] & PVR5_DCACHE_ADDR_TAG_BITS_MASK) >> 26)
+#define PVR_DCACHE_USE_FSL(_pvr) (_pvr.pvr[5] & PVR5_DCACHE_USE_FSL_MASK)
+#define PVR_DCACHE_ALLOW_WR(_pvr) \
+ (_pvr.pvr[5] & PVR5_DCACHE_ALLOW_WR_MASK)
/* FIXME two shifts on one line needs any comment */
-#define PVR_DCACHE_LINE_LEN(pvr) \
- (1 << ((pvr.pvr[5] & PVR5_DCACHE_LINE_LEN_MASK) >> 21))
-#define PVR_DCACHE_BYTE_SIZE(pvr) \
- (1 << ((pvr.pvr[5] & PVR5_DCACHE_BYTE_SIZE_MASK) >> 16))
+#define PVR_DCACHE_LINE_LEN(_pvr) \
+ (1 << ((_pvr.pvr[5] & PVR5_DCACHE_LINE_LEN_MASK) >> 21))
+#define PVR_DCACHE_BYTE_SIZE(_pvr) \
+ (1 << ((_pvr.pvr[5] & PVR5_DCACHE_BYTE_SIZE_MASK) >> 16))
-#define PVR_DCACHE_USE_WRITEBACK(pvr) \
- ((pvr.pvr[5] & PVR5_DCACHE_USE_WRITEBACK) >> 14)
+#define PVR_DCACHE_USE_WRITEBACK(_pvr) \
+ ((_pvr.pvr[5] & PVR5_DCACHE_USE_WRITEBACK) >> 14)
-#define PVR_ICACHE_BASEADDR(pvr) (pvr.pvr[6] & PVR6_ICACHE_BASEADDR_MASK)
-#define PVR_ICACHE_HIGHADDR(pvr) (pvr.pvr[7] & PVR7_ICACHE_HIGHADDR_MASK)
+#define PVR_ICACHE_BASEADDR(_pvr) \
+ (_pvr.pvr[6] & PVR6_ICACHE_BASEADDR_MASK)
+#define PVR_ICACHE_HIGHADDR(_pvr) \
+ (_pvr.pvr[7] & PVR7_ICACHE_HIGHADDR_MASK)
+#define PVR_DCACHE_BASEADDR(_pvr) \
+ (_pvr.pvr[8] & PVR8_DCACHE_BASEADDR_MASK)
+#define PVR_DCACHE_HIGHADDR(_pvr) \
+ (_pvr.pvr[9] & PVR9_DCACHE_HIGHADDR_MASK)
-#define PVR_DCACHE_BASEADDR(pvr) (pvr.pvr[8] & PVR8_DCACHE_BASEADDR_MASK)
-#define PVR_DCACHE_HIGHADDR(pvr) (pvr.pvr[9] & PVR9_DCACHE_HIGHADDR_MASK)
+#define PVR_TARGET_FAMILY(_pvr) \
+ ((_pvr.pvr[10] & PVR10_TARGET_FAMILY_MASK) >> 24)
-#define PVR_TARGET_FAMILY(pvr) ((pvr.pvr[10] & PVR10_TARGET_FAMILY_MASK) >> 24)
-
-#define PVR_MSR_RESET_VALUE(pvr) \
- (pvr.pvr[11] & PVR11_MSR_RESET_VALUE_MASK)
+#define PVR_MSR_RESET_VALUE(_pvr) \
+ (_pvr.pvr[11] & PVR11_MSR_RESET_VALUE_MASK)
/* mmu */
-#define PVR_USE_MMU(pvr) ((pvr.pvr[11] & PVR11_USE_MMU) >> 30)
-#define PVR_MMU_ITLB_SIZE(pvr) (pvr.pvr[11] & PVR11_MMU_ITLB_SIZE)
-#define PVR_MMU_DTLB_SIZE(pvr) (pvr.pvr[11] & PVR11_MMU_DTLB_SIZE)
-#define PVR_MMU_TLB_ACCESS(pvr) (pvr.pvr[11] & PVR11_MMU_TLB_ACCESS)
-#define PVR_MMU_ZONES(pvr) (pvr.pvr[11] & PVR11_MMU_ZONES)
+#define PVR_USE_MMU(_pvr) ((_pvr.pvr[11] & PVR11_USE_MMU) >> 30)
+#define PVR_MMU_ITLB_SIZE(_pvr) (_pvr.pvr[11] & PVR11_MMU_ITLB_SIZE)
+#define PVR_MMU_DTLB_SIZE(_pvr) (_pvr.pvr[11] & PVR11_MMU_DTLB_SIZE)
+#define PVR_MMU_TLB_ACCESS(_pvr) (_pvr.pvr[11] & PVR11_MMU_TLB_ACCESS)
+#define PVR_MMU_ZONES(_pvr) (_pvr.pvr[11] & PVR11_MMU_ZONES)
/* endian */
-#define PVR_ENDIAN(pvr) (pvr.pvr[0] & PVR0_ENDI)
+#define PVR_ENDIAN(_pvr) (_pvr.pvr[0] & PVR0_ENDI)
int cpu_has_pvr(void);
void get_pvr(struct pvr_s *pvr);
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index 87c79fa..2c309fc 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -32,6 +32,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
{"7.30.a", 0x10},
{"7.30.b", 0x11},
{"8.00.a", 0x12},
+ {"8.00.b", 0x13},
{NULL, 0},
};
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index 819238b..41c30cd 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -287,25 +287,44 @@
* are masked. This is nice, means we don't have to CLI before state save
*/
C_ENTRY(_user_exception):
- addi r14, r14, 4 /* return address is 4 byte after call */
swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+ addi r14, r14, 4 /* return address is 4 byte after call */
+
+ mfs r1, rmsr
+ nop
+ andi r1, r1, MSR_UMS
+ bnei r1, 1f
+
+/* Kernel-mode state save - kernel execve */
+ lwi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/
+ tophys(r1,r1);
+
+ addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */
+ SAVE_REGS
+ swi r1, r1, PTO + PT_MODE; /* pt_regs -> kernel mode */
+ brid 2f;
+ nop; /* Fill delay slot */
+
+/* User-mode state save. */
+1:
lwi r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
tophys(r1,r1);
lwi r1, r1, TS_THREAD_INFO; /* get stack from task_struct */
- /* MS these three instructions can be added to one */
- /* addik r1, r1, THREAD_SIZE; */
- /* tophys(r1,r1); */
- /* addik r1, r1, -STATE_SAVE_SIZE; */
- addik r1, r1, THREAD_SIZE + CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START - STATE_SAVE_SIZE;
+/* calculate kernel stack pointer from task struct 8k */
+ addik r1, r1, THREAD_SIZE;
+ tophys(r1,r1);
+
+ addik r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */
SAVE_REGS
swi r0, r1, PTO + PT_R3
swi r0, r1, PTO + PT_R4
+ swi r0, r1, PTO + PT_MODE; /* Was in user-mode. */
lwi r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
swi r11, r1, PTO+PT_R1; /* Store user SP. */
clear_ums;
- lwi CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
+2: lwi CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
/* Save away the syscall number. */
swi r12, r1, PTO+PT_R0;
tovirt(r1,r1)
@@ -375,6 +394,9 @@ C_ENTRY(ret_from_trap):
swi r3, r1, PTO + PT_R3
swi r4, r1, PTO + PT_R4
+ lwi r11, r1, PTO + PT_MODE;
+/* See if returning to kernel mode, if so, skip resched &c. */
+ bnei r11, 2f;
/* We're returning to user mode, so check for various conditions that
* trigger rescheduling. */
/* FIXME: Restructure all these flag checks. */
@@ -417,6 +439,16 @@ C_ENTRY(ret_from_trap):
RESTORE_REGS;
addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */
lwi r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */
+ bri 6f;
+
+/* Return to kernel state. */
+2: set_bip; /* Ints masked for state restore */
+ VM_OFF;
+ tophys(r1,r1);
+ RESTORE_REGS;
+ addik r1, r1, STATE_SAVE_SIZE /* Clean up stack space. */
+ tovirt(r1,r1);
+6:
TRAP_return: /* Make global symbol for debugging */
rtbd r14, 0; /* Instructions to return from an IRQ */
nop;
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 478f294..a7fa6ae7 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -25,6 +25,7 @@
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <asm/current.h>
+#include <asm/cacheflush.h>
#define MICROBLAZE_ILL_OPCODE_EXCEPTION 0x02
#define MICROBLAZE_IBUS_EXCEPTION 0x03
@@ -52,6 +53,8 @@ void die(const char *str, struct pt_regs *fp, long err)
void sw_exception(struct pt_regs *regs)
{
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->r16);
+ flush_dcache_range(regs->r16, regs->r16 + 0x4);
+ flush_icache_range(regs->r16, regs->r16 + 0x4);
}
void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S
index 7811954..25f6e07 100644
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ b/arch/microblaze/kernel/hw_exception_handler.S
@@ -945,11 +945,20 @@ store3: sbi r3, r4, 2;
store4: sbi r3, r4, 3; /* Delay slot */
ex_shw_vm:
/* Store the lower half-word, byte-by-byte into destination address */
+#ifdef __MICROBLAZEEL__
+ lbui r3, r5, 0;
+store5: sbi r3, r4, 0;
+ lbui r3, r5, 1;
+ brid ret_from_exc;
+store6: sbi r3, r4, 1; /* Delay slot */
+#else
lbui r3, r5, 2;
store5: sbi r3, r4, 0;
lbui r3, r5, 3;
brid ret_from_exc;
store6: sbi r3, r4, 1; /* Delay slot */
+#endif
+
ex_sw_end_vm: /* Exception handling of store word, ends. */
/* We have to prevent cases that get/put_user macros get unaligned pointer
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index a105301..c881393 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -61,14 +61,12 @@ static int __init early_init_dt_scan_serial(unsigned long node,
char *p;
int *addr;
- pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
+ pr_debug("search \"serial\", depth: %d, uname: %s\n", depth, uname);
/* find all serial nodes */
if (strncmp(uname, "serial", 6) != 0)
return 0;
- early_init_dt_check_for_initrd(node);
-
/* find compatible node with uartlite */
p = of_get_flat_dt_prop(node, "compatible", &l);
if ((strncmp(p, "xlnx,xps-uartlite", 17) != 0) &&
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index 96a88c3..3451bde 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -123,20 +123,10 @@ SECTIONS {
__init_end_before_initramfs = .;
- .init.ramfs ALIGN(PAGE_SIZE) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
- __initramfs_start = .;
- *(.init.ramfs)
- __initramfs_end = .;
- . = ALIGN(4);
- LONG(0);
-/*
- * FIXME this can break initramfs for MMU.
- * Pad init.ramfs up to page boundary,
- * so that __init_end == __bss_start. This will make image.elf
- * consistent with the image.bin
- */
- /* . = ALIGN(PAGE_SIZE); */
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+ INIT_RAM_FS
}
+
__init_end = .;
.bss ALIGN (PAGE_SIZE) : AT(ADDR(.bss) - LOAD_OFFSET) {
diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c
index 123e361..810fd68 100644
--- a/arch/microblaze/lib/memmove.c
+++ b/arch/microblaze/lib/memmove.c
@@ -182,7 +182,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
for (; c >= 4; c -= 4) {
value = *--i_src;
*--i_dst = buf_hold | ((value & 0xFF000000)>> 24);
- buf_hold = (value & 0xFFFFFF) << 8;;
+ buf_hold = (value & 0xFFFFFF) << 8;
}
#endif
/* Realign the source */
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S
deleted file mode 100644
index ceeaa8c..0000000
--- a/arch/microblaze/lib/muldi3.S
+++ /dev/null
@@ -1,121 +0,0 @@
-#include <linux/linkage.h>
-
-/*
- * Multiply operation for 64 bit integers, for devices with hard multiply
- * Input : Operand1[H] in Reg r5
- * Operand1[L] in Reg r6
- * Operand2[H] in Reg r7
- * Operand2[L] in Reg r8
- * Output: Result[H] in Reg r3
- * Result[L] in Reg r4
- *
- * Explaination:
- *
- * Both the input numbers are divided into 16 bit number as follows
- * op1 = A B C D
- * op2 = E F G H
- * result = D * H
- * + (C * H + D * G) << 16
- * + (B * H + C * G + D * F) << 32
- * + (A * H + B * G + C * F + D * E) << 48
- *
- * Only 64 bits of the output are considered
- */
-
- .text
- .globl __muldi3
- .type __muldi3, @function
- .ent __muldi3
-
-__muldi3:
- addi r1, r1, -40
-
-/* Save the input operands on the caller's stack */
- swi r5, r1, 44
- swi r6, r1, 48
- swi r7, r1, 52
- swi r8, r1, 56
-
-/* Store all the callee saved registers */
- sw r20, r1, r0
- swi r21, r1, 4
- swi r22, r1, 8
- swi r23, r1, 12
- swi r24, r1, 16
- swi r25, r1, 20
- swi r26, r1, 24
- swi r27, r1, 28
-
-/* Load all the 16 bit values for A thru H */
- lhui r20, r1, 44 /* A */
- lhui r21, r1, 46 /* B */
- lhui r22, r1, 48 /* C */
- lhui r23, r1, 50 /* D */
- lhui r24, r1, 52 /* E */
- lhui r25, r1, 54 /* F */
- lhui r26, r1, 56 /* G */
- lhui r27, r1, 58 /* H */
-
-/* D * H ==> LSB of the result on stack ==> Store1 */
- mul r9, r23, r27
- swi r9, r1, 36 /* Pos2 and Pos3 */
-
-/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
-/* Store the carry generated in position 2 for Pos 3 */
- lhui r11, r1, 36 /* Pos2 */
- mul r9, r22, r27 /* C * H */
- mul r10, r23, r26 /* D * G */
- add r9, r9, r10
- addc r12, r0, r0
- add r9, r9, r11
- addc r12, r12, r0 /* Store the Carry */
- shi r9, r1, 36 /* Store Pos2 */
- swi r9, r1, 32
- lhui r11, r1, 32
- shi r11, r1, 34 /* Store Pos1 */
-
-/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
- mul r9, r21, r27 /* B * H */
- mul r10, r22, r26 /* C * G */
- mul r7, r23, r25 /* D * F */
- add r9, r9, r11
- add r9, r9, r10
- add r9, r9, r7
- swi r9, r1, 32 /* Pos0 and Pos1 */
-
-/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
- lhui r11, r1, 32 /* Pos0 */
- mul r9, r20, r27 /* A * H */
- mul r10, r21, r26 /* B * G */
- mul r7, r22, r25 /* C * F */
- mul r8, r23, r24 /* D * E */
- add r9, r9, r11
- add r9, r9, r10
- add r9, r9, r7
- add r9, r9, r8
- sext16 r9, r9 /* Sign extend the MSB */
- shi r9, r1, 32
-
-/* Move results to r3 and r4 */
- lhui r3, r1, 32
- add r3, r3, r12
- shi r3, r1, 32
- lwi r3, r1, 32 /* Hi Part */
- lwi r4, r1, 36 /* Lo Part */
-
-/* Restore Callee saved registers */
- lw r20, r1, r0
- lwi r21, r1, 4
- lwi r22, r1, 8
- lwi r23, r1, 12
- lwi r24, r1, 16
- lwi r25, r1, 20
- lwi r26, r1, 24
- lwi r27, r1, 28
-
-/* Restore Frame and return */
- rtsd r15, 8
- addi r1, r1, 40
-
-.size __muldi3, . - __muldi3
-.end __muldi3
diff --git a/arch/microblaze/lib/muldi3.c b/arch/microblaze/lib/muldi3.c
new file mode 100644
index 0000000..d4860e1
--- /dev/null
+++ b/arch/microblaze/lib/muldi3.c
@@ -0,0 +1,60 @@
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+#define DWtype long long
+#define UWtype unsigned long
+#define UHWtype unsigned short
+
+#define W_TYPE_SIZE 32
+
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* If we still don't have umul_ppmm, define it using plain C. */
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __x0, __x1, __x2, __x3; \
+ UHWtype __ul, __vl, __uh, __vh; \
+ \
+ __ul = __ll_lowpart(u); \
+ __uh = __ll_highpart(u); \
+ __vl = __ll_lowpart(v); \
+ __vh = __ll_highpart(v); \
+ \
+ __x0 = (UWtype) __ul * __vl; \
+ __x1 = (UWtype) __ul * __vh; \
+ __x2 = (UWtype) __uh * __vl; \
+ __x3 = (UWtype) __uh * __vh; \
+ \
+ __x1 += __ll_highpart(__x0); /* this can't give carry */\
+ __x1 += __x2; /* but this indeed can */ \
+ if (__x1 < __x2) /* did we get it? */ \
+ __x3 += __ll_B; /* yes, add it in the proper pos */ \
+ \
+ (w1) = __x3 + __ll_highpart(__x1); \
+ (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
+ } while (0)
+#endif
+
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) ({ \
+ DWunion __w; \
+ umul_ppmm(__w.s.high, __w.s.low, u, v); \
+ __w.ll; \
+ })
+#endif
+
+DWtype __muldi3(DWtype u, DWtype v)
+{
+ const DWunion uu = {.ll = u};
+ const DWunion vv = {.ll = v};
+ DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
+
+ w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
+ + (UWtype) uu.s.high * (UWtype) vv.s.low);
+
+ return w.ll;
+}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 48fb479..959f38c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -20,6 +20,9 @@ config WORD_SIZE
config ARCH_PHYS_ADDR_T_64BIT
def_bool PPC64 || PHYS_64BIT
+config ARCH_DMA_ADDR_T_64BIT
+ def_bool ARCH_PHYS_ADDR_T_64BIT
+
config MMU
bool
default y
@@ -209,7 +212,7 @@ config ARCH_HIBERNATION_POSSIBLE
config ARCH_SUSPEND_POSSIBLE
def_bool y
depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
- PPC_85xx || PPC_86xx || PPC_PSERIES
+ PPC_85xx || PPC_86xx || PPC_PSERIES || 44x || 40x
config PPC_DCR_NATIVE
bool
@@ -595,13 +598,11 @@ config EXTRA_TARGETS
If unsure, leave blank
-if !44x || BROKEN
config ARCH_WANTS_FREEZER_CONTROL
def_bool y
depends on ADB_PMU
source kernel/power/Kconfig
-endif
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
@@ -682,6 +683,15 @@ config FSL_PMC
Freescale MPC85xx/MPC86xx power management controller support
(suspend/resume). For MPC83xx see platforms/83xx/suspend.c
+config PPC4xx_CPM
+ bool
+ default y
+ depends on SUSPEND && (44x || 40x)
+ help
+ PPC4xx Clock Power Management (CPM) support (suspend/resume).
+ It also enables support for two different idle states (idle-wait
+ and idle-doze).
+
config 4xx_SOC
bool
diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts
index a303703..5b27a4b 100644
--- a/arch/powerpc/boot/dts/canyonlands.dts
+++ b/arch/powerpc/boot/dts/canyonlands.dts
@@ -105,6 +105,15 @@
dcr-reg = <0x00c 0x002>;
};
+ CPM0: cpm {
+ compatible = "ibm,cpm";
+ dcr-access-method = "native";
+ dcr-reg = <0x160 0x003>;
+ unused-units = <0x00000100>;
+ idle-doze = <0x02000000>;
+ standby = <0xfeff791d>;
+ };
+
L2C0: l2c {
compatible = "ibm,l2-cache-460ex", "ibm,l2-cache";
dcr-reg = <0x020 0x008 /* Internal SRAM DCR's */
@@ -270,28 +279,6 @@
interrupts = <0x1 0x4>;
};
- UART2: serial@ef600500 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600500 0x00000008>;
- virtual-reg = <0xef600500>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>; /* Filled in by U-Boot */
- interrupt-parent = <&UIC1>;
- interrupts = <28 0x4>;
- };
-
- UART3: serial@ef600600 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600600 0x00000008>;
- virtual-reg = <0xef600600>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>; /* Filled in by U-Boot */
- interrupt-parent = <&UIC1>;
- interrupts = <29 0x4>;
- };
-
IIC0: i2c@ef600700 {
compatible = "ibm,iic-460ex", "ibm,iic";
reg = <0xef600700 0x00000014>;
diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts
index 083e68e..89edb16 100644
--- a/arch/powerpc/boot/dts/kilauea.dts
+++ b/arch/powerpc/boot/dts/kilauea.dts
@@ -82,6 +82,15 @@
interrupt-parent = <&UIC0>;
};
+ CPM0: cpm {
+ compatible = "ibm,cpm";
+ dcr-access-method = "native";
+ dcr-reg = <0x0b0 0x003>;
+ unused-units = <0x00000000>;
+ idle-doze = <0x02000000>;
+ standby = <0xe3e74800>;
+ };
+
plb {
compatible = "ibm,plb-405ex", "ibm,plb4";
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts
index 05a76cc..697b3f6 100644
--- a/arch/powerpc/boot/dts/mpc8308_p1m.dts
+++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts
@@ -297,6 +297,14 @@
interrupt-parent = < &ipic >;
};
+ dma@2c000 {
+ compatible = "fsl,mpc8308-dma", "fsl,mpc5121-dma";
+ reg = <0x2c000 0x1800>;
+ interrupts = <3 0x8
+ 94 0x8>;
+ interrupt-parent = < &ipic >;
+ };
+
};
pci0: pcie@e0009000 {
diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts
index a97eb2d..d3db02f 100644
--- a/arch/powerpc/boot/dts/mpc8308rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8308rdb.dts
@@ -265,6 +265,14 @@
interrupt-parent = < &ipic >;
};
+ dma@2c000 {
+ compatible = "fsl,mpc8308-dma", "fsl,mpc5121-dma";
+ reg = <0x2c000 0x1800>;
+ interrupts = <3 0x8
+ 94 0x8>;
+ interrupt-parent = < &ipic >;
+ };
+
};
pci0: pcie@e0009000 {
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig
index 4e19ee7..34b8c1a 100644
--- a/arch/powerpc/configs/40x/kilauea_defconfig
+++ b/arch/powerpc/configs/40x/kilauea_defconfig
@@ -12,6 +12,8 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_KILAUEA=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
# CONFIG_WALNUT is not set
CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
@@ -42,6 +44,9 @@ CONFIG_MTD_PHYSMAP_OF=y
CONFIG_MTD_NAND=y
CONFIG_MTD_NAND_NDFC=y
CONFIG_PROC_DEVICETREE=y
+CONFIG_PM=y
+CONFIG_SUSPEND=y
+CONFIG_PPC4xx_CPM=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
# CONFIG_MISC_DEVICES is not set
diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig
index 45c64d8..17e4dd9 100644
--- a/arch/powerpc/configs/44x/canyonlands_defconfig
+++ b/arch/powerpc/configs/44x/canyonlands_defconfig
@@ -42,6 +42,9 @@ CONFIG_MTD_PHYSMAP_OF=y
CONFIG_MTD_NAND=y
CONFIG_MTD_NAND_NDFC=y
CONFIG_PROC_DEVICETREE=y
+CONFIG_PM=y
+CONFIG_SUSPEND=y
+CONFIG_PPC4xx_CPM=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
# CONFIG_MISC_DEVICES is not set
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 30964ae..8a7e9314 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -267,7 +267,16 @@ static __inline__ int fls64(__u64 x)
#include <asm-generic/bitops/fls64.h>
#endif /* __powerpc64__ */
+#ifdef CONFIG_PPC64
+unsigned int __arch_hweight8(unsigned int w);
+unsigned int __arch_hweight16(unsigned int w);
+unsigned int __arch_hweight32(unsigned int w);
+unsigned long __arch_hweight64(__u64 w);
+#include <asm-generic/bitops/const_hweight.h>
+#else
#include <asm-generic/bitops/hweight.h>
+#endif
+
#include <asm-generic/bitops/find.h>
/* Little-endian versions */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index f3a1fdd..f0a211d 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -199,6 +199,8 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000)
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000)
+#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0400000000000000)
+#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0800000000000000)
#ifndef __ASSEMBLY__
@@ -403,21 +405,22 @@ extern const char *powerpc_base_platform;
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
- CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS)
+ CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \
+ CPU_FTR_POPCNTB)
#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
- CPU_FTR_STCX_CHECKS_ADDRESS)
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
- CPU_FTR_STCX_CHECKS_ADDRESS)
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index a8e1844..f71bb4c 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -61,22 +61,25 @@ static inline cpumask_t cpu_online_cores_map(void)
return cpu_thread_mask_to_cores(cpu_online_map);
}
-static inline int cpu_thread_to_core(int cpu)
-{
- return cpu >> threads_shift;
-}
+#ifdef CONFIG_SMP
+int cpu_core_index_of_thread(int cpu);
+int cpu_first_thread_of_core(int core);
+#else
+static inline int cpu_core_index_of_thread(int cpu) { return cpu; }
+static inline int cpu_first_thread_of_core(int core) { return core; }
+#endif
static inline int cpu_thread_in_core(int cpu)
{
return cpu & (threads_per_core - 1);
}
-static inline int cpu_first_thread_in_core(int cpu)
+static inline int cpu_first_thread_sibling(int cpu)
{
return cpu & ~(threads_per_core - 1);
}
-static inline int cpu_last_thread_in_core(int cpu)
+static inline int cpu_last_thread_sibling(int cpu)
{
return cpu | (threads_per_core - 1);
}
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index a3954e4..16d25c0 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -9,6 +9,12 @@
struct dma_map_ops;
struct device_node;
+/*
+ * Arch extensions to struct device.
+ *
+ * When adding fields, consider macio_add_one_device in
+ * drivers/macintosh/macio_asic.c
+ */
struct dev_archdata {
/* DMA operations on that device */
struct dma_map_ops *dma_ops;
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 20778a4..4ef662e 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -46,6 +46,7 @@
#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000)
#define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000)
#define FW_FEATURE_CMO ASM_CONST(0x0000000002000000)
+#define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000)
#ifndef __ASSEMBLY__
@@ -59,7 +60,7 @@ enum {
FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
- FW_FEATURE_CMO,
+ FW_FEATURE_CMO | FW_FEATURE_VPHN,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index de03ca5..ec089ac 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -232,7 +232,9 @@
#define H_GET_EM_PARMS 0x2B8
#define H_SET_MPP 0x2D0
#define H_GET_MPP 0x2D4
-#define MAX_HCALL_OPCODE H_GET_MPP
+#define H_HOME_NODE_ASSOCIATIVITY 0x2EC
+#define H_BEST_ENERGY 0x2F4
+#define MAX_HCALL_OPCODE H_BEST_ENERGY
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 7f5e0fe..380d48b 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -62,7 +62,10 @@ struct lppaca {
volatile u32 dyn_pir; // Dynamic ProcIdReg value x20-x23
u32 dsei_data; // DSEI data x24-x27
u64 sprg3; // SPRG3 value x28-x2F
- u8 reserved3[80]; // Reserved x30-x7F
+ u8 reserved3[40]; // Reserved x30-x57
+ volatile u8 vphn_assoc_counts[8]; // Virtual processor home node
+ // associativity change counters x58-x5F
+ u8 reserved4[32]; // Reserved x60-x7F
//=============================================================================
// CACHE_LINE_2 0x0080 - 0x00FF Contains local read-write data
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index d045b01..8433d36 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -27,9 +27,7 @@ struct iommu_table;
struct rtc_time;
struct file;
struct pci_controller;
-#ifdef CONFIG_KEXEC
struct kimage;
-#endif
#ifdef CONFIG_SMP
struct smp_ops_t {
@@ -72,7 +70,7 @@ struct machdep_calls {
int psize, int ssize);
void (*flush_hash_range)(unsigned long number, int local);
- /* special for kexec, to be called in real mode, linar mapping is
+ /* special for kexec, to be called in real mode, linear mapping is
* destroyed as well */
void (*hpte_clear_all)(void);
@@ -324,8 +322,6 @@ extern sys_ctrler_t sys_ctrler;
#endif /* CONFIG_PPC_PMAC */
-extern void setup_pci_ptrs(void);
-
#ifdef CONFIG_SMP
/* Poor default implementations */
extern void __devinit smp_generic_give_timebase(void);
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index aac87cb..fd3fd58 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -33,6 +33,9 @@ extern int numa_cpu_lookup_table[];
extern cpumask_var_t node_to_cpumask_map[];
#ifdef CONFIG_MEMORY_HOTPLUG
extern unsigned long max_pfn;
+u64 memory_hotplug_max(void);
+#else
+#define memory_hotplug_max() memblock_end_of_DRAM()
#endif
/*
@@ -42,6 +45,8 @@ extern unsigned long max_pfn;
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn)
+#else
+#define memory_hotplug_max() memblock_end_of_DRAM()
#endif /* CONFIG_NEED_MULTIPLE_NODES */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index 850b72f..92efe67 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -10,31 +10,7 @@
#ifndef _ASM_POWERPC_NVRAM_H
#define _ASM_POWERPC_NVRAM_H
-#include <linux/errno.h>
-
-#define NVRW_CNT 0x20
-#define NVRAM_HEADER_LEN 16 /* sizeof(struct nvram_header) */
-#define NVRAM_BLOCK_LEN 16
-#define NVRAM_MAX_REQ (2080/NVRAM_BLOCK_LEN)
-#define NVRAM_MIN_REQ (1056/NVRAM_BLOCK_LEN)
-
-#define NVRAM_AS0 0x74
-#define NVRAM_AS1 0x75
-#define NVRAM_DATA 0x77
-
-
-/* RTC Offsets */
-
-#define MOTO_RTC_SECONDS 0x1FF9
-#define MOTO_RTC_MINUTES 0x1FFA
-#define MOTO_RTC_HOURS 0x1FFB
-#define MOTO_RTC_DAY_OF_WEEK 0x1FFC
-#define MOTO_RTC_DAY_OF_MONTH 0x1FFD
-#define MOTO_RTC_MONTH 0x1FFE
-#define MOTO_RTC_YEAR 0x1FFF
-#define MOTO_RTC_CONTROLA 0x1FF8
-#define MOTO_RTC_CONTROLB 0x1FF9
-
+/* Signatures for nvram partitions */
#define NVRAM_SIG_SP 0x02 /* support processor */
#define NVRAM_SIG_OF 0x50 /* open firmware config */
#define NVRAM_SIG_FW 0x51 /* general firmware */
@@ -49,32 +25,19 @@
#define NVRAM_SIG_OS 0xa0 /* OS defined */
#define NVRAM_SIG_PANIC 0xa1 /* Apple OSX "panic" */
-/* If change this size, then change the size of NVNAME_LEN */
-struct nvram_header {
- unsigned char signature;
- unsigned char checksum;
- unsigned short length;
- char name[12];
-};
-
#ifdef __KERNEL__
+#include <linux/errno.h>
#include <linux/list.h>
-struct nvram_partition {
- struct list_head partition;
- struct nvram_header header;
- unsigned int index;
-};
-
-
+#ifdef CONFIG_PPC_PSERIES
extern int nvram_write_error_log(char * buff, int length,
unsigned int err_type, unsigned int err_seq);
extern int nvram_read_error_log(char * buff, int length,
unsigned int * err_type, unsigned int *err_seq);
extern int nvram_clear_error_log(void);
-
extern int pSeries_nvram_init(void);
+#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_MMIO_NVRAM
extern int mmio_nvram_init(void);
@@ -85,6 +48,13 @@ static inline int mmio_nvram_init(void)
}
#endif
+extern int __init nvram_scan_partitions(void);
+extern loff_t nvram_create_partition(const char *name, int sig,
+ int req_size, int min_size);
+extern int nvram_remove_partition(const char *name, int sig);
+extern int nvram_get_partition_size(loff_t data_index);
+extern loff_t nvram_find_partition(const char *name, int sig, int *out_size);
+
#endif /* __KERNEL__ */
/* PowerMac specific nvram stuffs */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 43adc8b..1255569 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -36,6 +36,8 @@
#define PPC_INST_NOP 0x60000000
#define PPC_INST_POPCNTB 0x7c0000f4
#define PPC_INST_POPCNTB_MASK 0xfc0007fe
+#define PPC_INST_POPCNTD 0x7c0003f4
+#define PPC_INST_POPCNTW 0x7c0002f4
#define PPC_INST_RFCI 0x4c000066
#define PPC_INST_RFDI 0x4c00004e
#define PPC_INST_RFMCI 0x4c00004c
@@ -88,6 +90,12 @@
__PPC_RB(b) | __PPC_EH(eh))
#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \
__PPC_RB(b))
+#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \
+ __PPC_RA(a) | __PPC_RS(s))
+#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \
+ __PPC_RA(a) | __PPC_RS(s))
+#define PPC_POPCNTW(a, s) stringify_in_c(.long PPC_INST_POPCNTW | \
+ __PPC_RA(a) | __PPC_RS(s))
#define PPC_RFCI stringify_in_c(.long PPC_INST_RFCI)
#define PPC_RFDI stringify_in_c(.long PPC_INST_RFDI)
#define PPC_RFMCI stringify_in_c(.long PPC_INST_RFMCI)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 4c14187..de1967a 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -122,7 +122,6 @@ extern struct task_struct *last_task_used_spe;
TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
#endif
-#ifdef __KERNEL__
#ifdef __powerpc64__
#define STACK_TOP_USER64 TASK_SIZE_USER64
@@ -139,7 +138,6 @@ extern struct task_struct *last_task_used_spe;
#define STACK_TOP_MAX STACK_TOP
#endif /* __powerpc64__ */
-#endif /* __KERNEL__ */
typedef struct {
unsigned long seg;
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index afe4aaa..7ef0d90 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -106,9 +106,22 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
int nid)
{
}
-
#endif /* CONFIG_NUMA */
+#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
+extern int start_topology_update(void);
+extern int stop_topology_update(void);
+#else
+static inline int start_topology_update(void)
+{
+ return 0;
+}
+static inline int stop_topology_update(void)
+{
+ return 0;
+}
+#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
+
#include <asm-generic/topology.h>
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index 08679c5..25e3922 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -116,9 +116,7 @@ struct vdso_data {
#endif /* CONFIG_PPC64 */
-#ifdef __KERNEL__
extern struct vdso_data *vdso_data;
-#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 36c30f3..3bb2a3e 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -29,8 +29,10 @@ endif
obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
init_task.o process.o systbl.o idle.o \
- signal.o sysfs.o cacheinfo.o
-obj-y += vdso32/
+ signal.o sysfs.o cacheinfo.o time.o \
+ prom.o traps.o setup-common.o \
+ udbg.o misc.o io.o dma.o \
+ misc_$(CONFIG_WORD_SIZE).o vdso32/
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
@@ -80,9 +82,6 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
extra-$(CONFIG_8xx) := head_8xx.o
extra-y += vmlinux.lds
-obj-y += time.o prom.o traps.o setup-common.o \
- udbg.o misc.o io.o dma.o \
- misc_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index bd0df2e..23e6a93 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -209,7 +209,6 @@ int main(void)
DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
/* Interrupt register frame */
- DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD);
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 96a908f..be5ab18 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -457,16 +457,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_power7,
- .cpu_restore = __restore_cpu_power7,
.oprofile_cpu_type = "ppc64/power7",
.oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV,
- .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR,
- .oprofile_mmcra_clear = POWER6_MMCRA_THRM |
- POWER6_MMCRA_OTHER,
.platform = "power7",
},
+ { /* Power7+ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004A0000,
+ .cpu_name = "POWER7+ (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .mmu_features = MMU_FTR_HPTE_TABLE |
+ MMU_FTR_TLBIE_206,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power7",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "power7+",
+ },
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 8e05c16..0a2af50 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -19,6 +19,7 @@
#include <asm/prom.h>
#include <asm/firmware.h>
#include <asm/uaccess.h>
+#include <asm/rtas.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -141,3 +142,35 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
return csize;
}
+
+#ifdef CONFIG_PPC_RTAS
+/*
+ * The crashkernel region will almost always overlap the RTAS region, so
+ * we have to be careful when shrinking the crashkernel region.
+ */
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+ const u32 *basep, *sizep;
+ unsigned int rtas_start = 0, rtas_end = 0;
+
+ basep = of_get_property(rtas.dev, "linux,rtas-base", NULL);
+ sizep = of_get_property(rtas.dev, "rtas-size", NULL);
+
+ if (basep && sizep) {
+ rtas_start = *basep;
+ rtas_end = *basep + *sizep;
+ }
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ /* Does this page overlap with the RTAS region? */
+ if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start))
+ continue;
+
+ ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
+ init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
+ free_page((unsigned long)__va(addr));
+ totalram_pages++;
+ }
+}
+#endif
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 6e54a0f..e755415 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -19,7 +19,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag)
{
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
- dma_handle, device_to_mask(dev), flag,
+ dma_handle, dev->coherent_dma_mask, flag,
dev_to_node(dev));
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ed4aeb9..c22dc1e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -31,6 +31,7 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/ftrace.h>
+#include <asm/ptrace.h>
#undef SHOW_SYSCALLS
#undef SHOW_SYSCALLS_TASK
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9f8b01d..8a81799 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -13,6 +13,7 @@
*/
#include <asm/exception-64s.h>
+#include <asm/ptrace.h>
/*
* We layout physical memory as follows:
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index e86c040..de36955 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -23,6 +23,7 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
#ifdef CONFIG_VSX
#define REST_32FPVSRS(n,c,base) \
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 8278e8b..9dd21a8 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -40,6 +40,7 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
/* As with the other PowerPC ports, it is expected that when code
* execution begins here, the following registers contain valid, yet
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 562305b..cbb3436 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -37,6 +37,7 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
#include <asm/synch.h>
#include "head_booke.h"
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index f0dd577..782f23d 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -38,6 +38,7 @@
#include <asm/page_64.h>
#include <asm/irqflags.h>
#include <asm/kvm_book3s_asm.h>
+#include <asm/ptrace.h>
/* The physical memory is layed out such that the secondary processor
* spin code sits at 0x0000...0x00ff. On server, the vectors follow
@@ -96,7 +97,7 @@ __secondary_hold_acknowledge:
.llong hvReleaseData-KERNELBASE
#endif /* CONFIG_PPC_ISERIES */
-#ifdef CONFIG_CRASH_DUMP
+#ifdef CONFIG_RELOCATABLE
/* This flag is set to 1 by a loader if the kernel should run
* at the loaded address instead of the linked address. This
* is used by kexec-tools to keep the the kdump kernel in the
@@ -384,12 +385,10 @@ _STATIC(__after_prom_start)
/* process relocations for the final address of the kernel */
lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
sldi r25,r25,32
-#ifdef CONFIG_CRASH_DUMP
lwz r7,__run_at_load-_stext(r26)
- cmplwi cr0,r7,1 /* kdump kernel ? - stay where we are */
+ cmplwi cr0,r7,1 /* flagged to stay where we are ? */
bne 1f
add r25,r25,r26
-#endif
1: mr r3,r25
bl .relocate
#endif
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 1f1a04b..1cbf64e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -29,6 +29,7 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
/* Macro to make the code more readable. */
#ifdef CONFIG_8xx_CPU6
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 529b817..3e02710 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -41,6 +41,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
+#include <asm/ptrace.h>
#include "head_booke.h"
/* As with the other PowerPC ports, it is expected that when code
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index d583917..961bb03 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -311,8 +311,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
/* Handle failure */
if (unlikely(entry == DMA_ERROR_CODE)) {
if (printk_ratelimit())
- printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
- " npages %lx\n", tbl, vaddr, npages);
+ dev_info(dev, "iommu_alloc failed, tbl %p "
+ "vaddr %lx npages %lu\n", tbl, vaddr,
+ npages);
goto failure;
}
@@ -579,9 +580,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
attrs);
if (dma_handle == DMA_ERROR_CODE) {
if (printk_ratelimit()) {
- printk(KERN_INFO "iommu_alloc failed, "
- "tbl %p vaddr %p npages %d\n",
- tbl, vaddr, npages);
+ dev_info(dev, "iommu_alloc failed, tbl %p "
+ "vaddr %p npages %d\n", tbl, vaddr,
+ npages);
}
} else
dma_handle |= (uaddr & ~IOMMU_PAGE_MASK);
@@ -627,7 +628,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
* the tce tables.
*/
if (order >= IOMAP_MAX_ORDER) {
- printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
+ dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n",
+ size);
return NULL;
}
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 2d29752..b69463e 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -122,8 +122,3 @@ _GLOBAL(longjmp)
mtlr r0
mr r3,r4
blr
-
-_GLOBAL(__setup_cpu_power7)
-_GLOBAL(__restore_cpu_power7)
- /* place holder */
- blr
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index a7a570d..094bd98 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -30,6 +30,7 @@
#include <asm/processor.h>
#include <asm/kexec.h>
#include <asm/bug.h>
+#include <asm/ptrace.h>
.text
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index e514490..206a321 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -25,6 +25,7 @@
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/kexec.h>
+#include <asm/ptrace.h>
.text
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 9cf197f..bb12b32 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -34,15 +34,26 @@
#undef DEBUG_NVRAM
-static struct nvram_partition * nvram_part;
-static long nvram_error_log_index = -1;
-static long nvram_error_log_size = 0;
+#define NVRAM_HEADER_LEN sizeof(struct nvram_header)
+#define NVRAM_BLOCK_LEN NVRAM_HEADER_LEN
+
+/* If change this size, then change the size of NVNAME_LEN */
+struct nvram_header {
+ unsigned char signature;
+ unsigned char checksum;
+ unsigned short length;
+ /* Terminating null required only for names < 12 chars. */
+ char name[12];
+};
-struct err_log_info {
- int error_type;
- unsigned int seq_num;
+struct nvram_partition {
+ struct list_head partition;
+ struct nvram_header header;
+ unsigned int index;
};
+static LIST_HEAD(nvram_partitions);
+
static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
{
int size;
@@ -186,14 +197,12 @@ static struct miscdevice nvram_dev = {
#ifdef DEBUG_NVRAM
static void __init nvram_print_partitions(char * label)
{
- struct list_head * p;
struct nvram_partition * tmp_part;
printk(KERN_WARNING "--------%s---------\n", label);
printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
- list_for_each(p, &nvram_part->partition) {
- tmp_part = list_entry(p, struct nvram_partition, partition);
- printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%s\n",
+ list_for_each_entry(tmp_part, &nvram_partitions, partition) {
+ printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12s\n",
tmp_part->index, tmp_part->header.signature,
tmp_part->header.checksum, tmp_part->header.length,
tmp_part->header.name);
@@ -228,95 +237,113 @@ static unsigned char __init nvram_checksum(struct nvram_header *p)
return c_sum;
}
-static int __init nvram_remove_os_partition(void)
+/**
+ * nvram_remove_partition - Remove one or more partitions in nvram
+ * @name: name of the partition to remove, or NULL for a
+ * signature only match
+ * @sig: signature of the partition(s) to remove
+ */
+
+int __init nvram_remove_partition(const char *name, int sig)
{
- struct list_head *i;
- struct list_head *j;
- struct nvram_partition * part;
- struct nvram_partition * cur_part;
+ struct nvram_partition *part, *prev, *tmp;
int rc;
- list_for_each(i, &nvram_part->partition) {
- part = list_entry(i, struct nvram_partition, partition);
- if (part->header.signature != NVRAM_SIG_OS)
+ list_for_each_entry(part, &nvram_partitions, partition) {
+ if (part->header.signature != sig)
continue;
-
- /* Make os partition a free partition */
+ if (name && strncmp(name, part->header.name, 12))
+ continue;
+
+ /* Make partition a free partition */
part->header.signature = NVRAM_SIG_FREE;
- sprintf(part->header.name, "wwwwwwwwwwww");
+ strncpy(part->header.name, "wwwwwwwwwwww", 12);
part->header.checksum = nvram_checksum(&part->header);
-
- /* Merge contiguous free partitions backwards */
- list_for_each_prev(j, &part->partition) {
- cur_part = list_entry(j, struct nvram_partition, partition);
- if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
- break;
- }
-
- part->header.length += cur_part->header.length;
- part->header.checksum = nvram_checksum(&part->header);
- part->index = cur_part->index;
-
- list_del(&cur_part->partition);
- kfree(cur_part);
- j = &part->partition; /* fixup our loop */
- }
-
- /* Merge contiguous free partitions forwards */
- list_for_each(j, &part->partition) {
- cur_part = list_entry(j, struct nvram_partition, partition);
- if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
- break;
- }
-
- part->header.length += cur_part->header.length;
- part->header.checksum = nvram_checksum(&part->header);
-
- list_del(&cur_part->partition);
- kfree(cur_part);
- j = &part->partition; /* fixup our loop */
- }
-
rc = nvram_write_header(part);
if (rc <= 0) {
- printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc);
+ printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
return rc;
}
+ }
+ /* Merge contiguous ones */
+ prev = NULL;
+ list_for_each_entry_safe(part, tmp, &nvram_partitions, partition) {
+ if (part->header.signature != NVRAM_SIG_FREE) {
+ prev = NULL;
+ continue;
+ }
+ if (prev) {
+ prev->header.length += part->header.length;
+ prev->header.checksum = nvram_checksum(&part->header);
+ rc = nvram_write_header(part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
+ return rc;
+ }
+ list_del(&part->partition);
+ kfree(part);
+ } else
+ prev = part;
}
return 0;
}
-/* nvram_create_os_partition
+/**
+ * nvram_create_partition - Create a partition in nvram
+ * @name: name of the partition to create
+ * @sig: signature of the partition to create
+ * @req_size: size of data to allocate in bytes
+ * @min_size: minimum acceptable size (0 means req_size)
*
- * Create a OS linux partition to buffer error logs.
- * Will create a partition starting at the first free
- * space found if space has enough room.
+ * Returns a negative error code or a positive nvram index
+ * of the beginning of the data area of the newly created
+ * partition. If you provided a min_size smaller than req_size
+ * you need to query for the actual size yourself after the
+ * call using nvram_partition_get_size().
*/
-static int __init nvram_create_os_partition(void)
+loff_t __init nvram_create_partition(const char *name, int sig,
+ int req_size, int min_size)
{
struct nvram_partition *part;
struct nvram_partition *new_part;
struct nvram_partition *free_part = NULL;
- int seq_init[2] = { 0, 0 };
+ static char nv_init_vals[16];
loff_t tmp_index;
long size = 0;
int rc;
-
+
+ /* Convert sizes from bytes to blocks */
+ req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+ min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+
+ /* If no minimum size specified, make it the same as the
+ * requested size
+ */
+ if (min_size == 0)
+ min_size = req_size;
+ if (min_size > req_size)
+ return -EINVAL;
+
+ /* Now add one block to each for the header */
+ req_size += 1;
+ min_size += 1;
+
/* Find a free partition that will give us the maximum needed size
If can't find one that will give us the minimum size needed */
- list_for_each_entry(part, &nvram_part->partition, partition) {
+ list_for_each_entry(part, &nvram_partitions, partition) {
if (part->header.signature != NVRAM_SIG_FREE)
continue;
- if (part->header.length >= NVRAM_MAX_REQ) {
- size = NVRAM_MAX_REQ;
+ if (part->header.length >= req_size) {
+ size = req_size;
free_part = part;
break;
}
- if (!size && part->header.length >= NVRAM_MIN_REQ) {
- size = NVRAM_MIN_REQ;
+ if (part->header.length > size &&
+ part->header.length >= min_size) {
+ size = part->header.length;
free_part = part;
}
}
@@ -326,136 +353,95 @@ static int __init nvram_create_os_partition(void)
/* Create our OS partition */
new_part = kmalloc(sizeof(*new_part), GFP_KERNEL);
if (!new_part) {
- printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n");
+ pr_err("nvram_create_os_partition: kmalloc failed\n");
return -ENOMEM;
}
new_part->index = free_part->index;
- new_part->header.signature = NVRAM_SIG_OS;
+ new_part->header.signature = sig;
new_part->header.length = size;
- strcpy(new_part->header.name, "ppc64,linux");
+ strncpy(new_part->header.name, name, 12);
new_part->header.checksum = nvram_checksum(&new_part->header);
rc = nvram_write_header(new_part);
if (rc <= 0) {
- printk(KERN_ERR "nvram_create_os_partition: nvram_write_header "
- "failed (%d)\n", rc);
- return rc;
- }
-
- /* make sure and initialize to zero the sequence number and the error
- type logged */
- tmp_index = new_part->index + NVRAM_HEADER_LEN;
- rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_create_os_partition: nvram_write "
+ pr_err("nvram_create_os_partition: nvram_write_header "
"failed (%d)\n", rc);
return rc;
}
-
- nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN;
- nvram_error_log_size = ((part->header.length - 1) *
- NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
-
list_add_tail(&new_part->partition, &free_part->partition);
- if (free_part->header.length <= size) {
+ /* Adjust or remove the partition we stole the space from */
+ if (free_part->header.length > size) {
+ free_part->index += size * NVRAM_BLOCK_LEN;
+ free_part->header.length -= size;
+ free_part->header.checksum = nvram_checksum(&free_part->header);
+ rc = nvram_write_header(free_part);
+ if (rc <= 0) {
+ pr_err("nvram_create_os_partition: nvram_write_header "
+ "failed (%d)\n", rc);
+ return rc;
+ }
+ } else {
list_del(&free_part->partition);
kfree(free_part);
- return 0;
}
- /* Adjust the partition we stole the space from */
- free_part->index += size * NVRAM_BLOCK_LEN;
- free_part->header.length -= size;
- free_part->header.checksum = nvram_checksum(&free_part->header);
-
- rc = nvram_write_header(free_part);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_create_os_partition: nvram_write_header "
- "failed (%d)\n", rc);
- return rc;
+ /* Clear the new partition */
+ for (tmp_index = new_part->index + NVRAM_HEADER_LEN;
+ tmp_index < ((size - 1) * NVRAM_BLOCK_LEN);
+ tmp_index += NVRAM_BLOCK_LEN) {
+ rc = ppc_md.nvram_write(nv_init_vals, NVRAM_BLOCK_LEN, &tmp_index);
+ if (rc <= 0) {
+ pr_err("nvram_create_partition: nvram_write failed (%d)\n", rc);
+ return rc;
+ }
}
-
- return 0;
+
+ return new_part->index + NVRAM_HEADER_LEN;
}
-
-/* nvram_setup_partition
- *
- * This will setup the partition we need for buffering the
- * error logs and cleanup partitions if needed.
- *
- * The general strategy is the following:
- * 1.) If there is ppc64,linux partition large enough then use it.
- * 2.) If there is not a ppc64,linux partition large enough, search
- * for a free partition that is large enough.
- * 3.) If there is not a free partition large enough remove
- * _all_ OS partitions and consolidate the space.
- * 4.) Will first try getting a chunk that will satisfy the maximum
- * error log size (NVRAM_MAX_REQ).
- * 5.) If the max chunk cannot be allocated then try finding a chunk
- * that will satisfy the minum needed (NVRAM_MIN_REQ).
+/**
+ * nvram_get_partition_size - Get the data size of an nvram partition
+ * @data_index: This is the offset of the start of the data of
+ * the partition. The same value that is returned by
+ * nvram_create_partition().
*/
-static int __init nvram_setup_partition(void)
+int nvram_get_partition_size(loff_t data_index)
{
- struct list_head * p;
- struct nvram_partition * part;
- int rc;
-
- /* For now, we don't do any of this on pmac, until I
- * have figured out if it's worth killing some unused stuffs
- * in our nvram, as Apple defined partitions use pretty much
- * all of the space
- */
- if (machine_is(powermac))
- return -ENOSPC;
-
- /* see if we have an OS partition that meets our needs.
- will try getting the max we need. If not we'll delete
- partitions and try again. */
- list_for_each(p, &nvram_part->partition) {
- part = list_entry(p, struct nvram_partition, partition);
- if (part->header.signature != NVRAM_SIG_OS)
- continue;
+ struct nvram_partition *part;
+
+ list_for_each_entry(part, &nvram_partitions, partition) {
+ if (part->index + NVRAM_HEADER_LEN == data_index)
+ return (part->header.length - 1) * NVRAM_BLOCK_LEN;
+ }
+ return -1;
+}
- if (strcmp(part->header.name, "ppc64,linux"))
- continue;
- if (part->header.length >= NVRAM_MIN_REQ) {
- /* found our partition */
- nvram_error_log_index = part->index + NVRAM_HEADER_LEN;
- nvram_error_log_size = ((part->header.length - 1) *
- NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
- return 0;
+/**
+ * nvram_find_partition - Find an nvram partition by signature and name
+ * @name: Name of the partition or NULL for any name
+ * @sig: Signature to test against
+ * @out_size: if non-NULL, returns the size of the data part of the partition
+ */
+loff_t nvram_find_partition(const char *name, int sig, int *out_size)
+{
+ struct nvram_partition *p;
+
+ list_for_each_entry(p, &nvram_partitions, partition) {
+ if (p->header.signature == sig &&
+ (!name || !strncmp(p->header.name, name, 12))) {
+ if (out_size)
+ *out_size = (p->header.length - 1) *
+ NVRAM_BLOCK_LEN;
+ return p->index + NVRAM_HEADER_LEN;
}
}
-
- /* try creating a partition with the free space we have */
- rc = nvram_create_os_partition();
- if (!rc) {
- return 0;
- }
-
- /* need to free up some space */
- rc = nvram_remove_os_partition();
- if (rc) {
- return rc;
- }
-
- /* create a partition in this new space */
- rc = nvram_create_os_partition();
- if (rc) {
- printk(KERN_ERR "nvram_create_os_partition: Could not find a "
- "NVRAM partition large enough\n");
- return rc;
- }
-
return 0;
}
-
-static int __init nvram_scan_partitions(void)
+int __init nvram_scan_partitions(void)
{
loff_t cur_index = 0;
struct nvram_header phead;
@@ -465,7 +451,7 @@ static int __init nvram_scan_partitions(void)
int total_size;
int err;
- if (ppc_md.nvram_size == NULL)
+ if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
return -ENODEV;
total_size = ppc_md.nvram_size();
@@ -512,12 +498,16 @@ static int __init nvram_scan_partitions(void)
memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN);
tmp_part->index = cur_index;
- list_add_tail(&tmp_part->partition, &nvram_part->partition);
+ list_add_tail(&tmp_part->partition, &nvram_partitions);
cur_index += phead.length * NVRAM_BLOCK_LEN;
}
err = 0;
+#ifdef DEBUG_NVRAM
+ nvram_print_partitions("NVRAM Partitions");
+#endif
+
out:
kfree(header);
return err;
@@ -525,9 +515,10 @@ static int __init nvram_scan_partitions(void)
static int __init nvram_init(void)
{
- int error;
int rc;
+ BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
+
if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
return -ENODEV;
@@ -537,29 +528,6 @@ static int __init nvram_init(void)
return rc;
}
- /* initialize our anchor for the nvram partition list */
- nvram_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
- if (!nvram_part) {
- printk(KERN_ERR "nvram_init: Failed kmalloc\n");
- return -ENOMEM;
- }
- INIT_LIST_HEAD(&nvram_part->partition);
-
- /* Get all the NVRAM partitions */
- error = nvram_scan_partitions();
- if (error) {
- printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n");
- return error;
- }
-
- if(nvram_setup_partition())
- printk(KERN_WARNING "nvram_init: Could not find nvram partition"
- " for nvram buffered error logging.\n");
-
-#ifdef DEBUG_NVRAM
- nvram_print_partitions("NVRAM Partitions");
-#endif
-
return rc;
}
@@ -568,135 +536,6 @@ void __exit nvram_cleanup(void)
misc_deregister( &nvram_dev );
}
-
-#ifdef CONFIG_PPC_PSERIES
-
-/* nvram_write_error_log
- *
- * We need to buffer the error logs into nvram to ensure that we have
- * the failure information to decode. If we have a severe error there
- * is no way to guarantee that the OS or the machine is in a state to
- * get back to user land and write the error to disk. For example if
- * the SCSI device driver causes a Machine Check by writing to a bad
- * IO address, there is no way of guaranteeing that the device driver
- * is in any state that is would also be able to write the error data
- * captured to disk, thus we buffer it in NVRAM for analysis on the
- * next boot.
- *
- * In NVRAM the partition containing the error log buffer will looks like:
- * Header (in bytes):
- * +-----------+----------+--------+------------+------------------+
- * | signature | checksum | length | name | data |
- * |0 |1 |2 3|4 15|16 length-1|
- * +-----------+----------+--------+------------+------------------+
- *
- * The 'data' section would look like (in bytes):
- * +--------------+------------+-----------------------------------+
- * | event_logged | sequence # | error log |
- * |0 3|4 7|8 nvram_error_log_size-1|
- * +--------------+------------+-----------------------------------+
- *
- * event_logged: 0 if event has not been logged to syslog, 1 if it has
- * sequence #: The unique sequence # for each event. (until it wraps)
- * error log: The error log from event_scan
- */
-int nvram_write_error_log(char * buff, int length,
- unsigned int err_type, unsigned int error_log_cnt)
-{
- int rc;
- loff_t tmp_index;
- struct err_log_info info;
-
- if (nvram_error_log_index == -1) {
- return -ESPIPE;
- }
-
- if (length > nvram_error_log_size) {
- length = nvram_error_log_size;
- }
-
- info.error_type = err_type;
- info.seq_num = error_log_cnt;
-
- tmp_index = nvram_error_log_index;
-
- rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
- return rc;
- }
-
- rc = ppc_md.nvram_write(buff, length, &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-/* nvram_read_error_log
- *
- * Reads nvram for error log for at most 'length'
- */
-int nvram_read_error_log(char * buff, int length,
- unsigned int * err_type, unsigned int * error_log_cnt)
-{
- int rc;
- loff_t tmp_index;
- struct err_log_info info;
-
- if (nvram_error_log_index == -1)
- return -1;
-
- if (length > nvram_error_log_size)
- length = nvram_error_log_size;
-
- tmp_index = nvram_error_log_index;
-
- rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
- return rc;
- }
-
- rc = ppc_md.nvram_read(buff, length, &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
- return rc;
- }
-
- *error_log_cnt = info.seq_num;
- *err_type = info.error_type;
-
- return 0;
-}
-
-/* This doesn't actually zero anything, but it sets the event_logged
- * word to tell that this event is safely in syslog.
- */
-int nvram_clear_error_log(void)
-{
- loff_t tmp_index;
- int clear_word = ERR_FLAG_ALREADY_LOGGED;
- int rc;
-
- if (nvram_error_log_index == -1)
- return -1;
-
- tmp_index = nvram_error_log_index;
-
- rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-#endif /* CONFIG_PPC_PSERIES */
-
module_init(nvram_init);
module_exit(nvram_cleanup);
MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index d43fc65..8515776 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -193,8 +193,7 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus)
hose->io_resource.start += io_virt_offset;
hose->io_resource.end += io_virt_offset;
- pr_debug(" hose->io_resource=0x%016llx...0x%016llx\n",
- hose->io_resource.start, hose->io_resource.end);
+ pr_debug(" hose->io_resource=%pR\n", &hose->io_resource);
return 0;
}
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index ab3e392..ef3ef56 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -186,3 +186,10 @@ EXPORT_SYMBOL(__mtdcr);
EXPORT_SYMBOL(__mfdcr);
#endif
EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(__arch_hweight8);
+EXPORT_SYMBOL(__arch_hweight16);
+EXPORT_SYMBOL(__arch_hweight32);
+EXPORT_SYMBOL(__arch_hweight64);
+#endif
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index 5113bd2..e83ba3f 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -11,6 +11,7 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
/*
* Grab the register values as they are now.
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index a9b3296..9065369 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1316,6 +1316,10 @@ static int set_dac_range(struct task_struct *child,
static long ppc_set_hwdebug(struct task_struct *child,
struct ppc_hw_breakpoint *bp_info)
{
+#ifndef CONFIG_PPC_ADV_DEBUG_REGS
+ unsigned long dabr;
+#endif
+
if (bp_info->version != 1)
return -ENOTSUPP;
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -1353,11 +1357,10 @@ static long ppc_set_hwdebug(struct task_struct *child,
/*
* We only support one data breakpoint
*/
- if (((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0) ||
- ((bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0) ||
- (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_WRITE) ||
- (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) ||
- (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE))
+ if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
+ (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
+ bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
return -EINVAL;
if (child->thread.dabr)
@@ -1366,7 +1369,14 @@ static long ppc_set_hwdebug(struct task_struct *child,
if ((unsigned long)bp_info->addr >= TASK_SIZE)
return -EIO;
- child->thread.dabr = (unsigned long)bp_info->addr;
+ dabr = (unsigned long)bp_info->addr & ~7UL;
+ dabr |= DABR_TRANSLATION;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dabr |= DABR_DATA_READ;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dabr |= DABR_DATA_WRITE;
+
+ child->thread.dabr = dabr;
return 1;
#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 8a6daf4..69c4be9 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -280,7 +280,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
/* We only support one DABR and no IABRS at the moment */
if (addr > 0)
break;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ ret = put_user(child->thread.dac1, (u32 __user *)data);
+#else
ret = put_user(child->thread.dabr, (u32 __user *)data);
+#endif
break;
}
@@ -312,6 +316,9 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
case PTRACE_SET_DEBUGREG:
case PTRACE_SYSCALL:
case PTRACE_CONT:
+ case PPC_PTRACE_GETHWDBGINFO:
+ case PPC_PTRACE_SETHWDEBUG:
+ case PPC_PTRACE_DELHWDEBUG:
ret = arch_ptrace(child, request, addr, data);
break;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8fe8bc6..2097f2b 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -41,6 +41,7 @@
#include <asm/atomic.h>
#include <asm/time.h>
#include <asm/mmu.h>
+#include <asm/topology.h>
struct rtas_t rtas = {
.lock = __ARCH_SPIN_LOCK_UNLOCKED
@@ -713,6 +714,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
int cpu;
slb_set_size(SLB_MIN_SIZE);
+ stop_topology_update();
printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id());
while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&
@@ -728,6 +730,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
rc = atomic_read(&data->error);
atomic_set(&data->error, rc);
+ start_topology_update();
if (wake_when_done) {
atomic_set(&data->done, 1);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ce6f61c..5a0401f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -437,8 +437,8 @@ static void __init irqstack_early_init(void)
unsigned int i;
/*
- * interrupt stacks must be under 256MB, we cannot afford to take
- * SLB misses on them.
+ * Interrupt stacks must be in the first segment since we
+ * cannot afford to take SLB misses on them.
*/
for_each_possible_cpu(i) {
softirq_ctx[i] = (struct thread_info *)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 68034bb..9813605 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -466,7 +466,20 @@ out:
return id;
}
-/* Must be called when no change can occur to cpu_present_mask,
+/* Helper routines for cpu to core mapping */
+int cpu_core_index_of_thread(int cpu)
+{
+ return cpu >> threads_shift;
+}
+EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);
+
+int cpu_first_thread_of_core(int core)
+{
+ return core << threads_shift;
+}
+EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
+
+/* Must be called when no change can occur to cpu_present_map,
* i.e. during cpu online or offline.
*/
static struct device_node *cpu_to_l2cache(int cpu)
@@ -514,7 +527,7 @@ int __devinit start_secondary(void *unused)
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
/* Update sibling maps */
- base = cpu_first_thread_in_core(cpu);
+ base = cpu_first_thread_sibling(cpu);
for (i = 0; i < threads_per_core; i++) {
if (cpu_is_offline(base + i))
continue;
@@ -600,7 +613,7 @@ int __cpu_disable(void)
return err;
/* Update sibling maps */
- base = cpu_first_thread_in_core(cpu);
+ base = cpu_first_thread_sibling(cpu);
for (i = 0; i < threads_per_core; i++) {
cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 0104069..09e4dea 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -155,7 +155,7 @@ EXPORT_SYMBOL_GPL(rtc_lock);
static u64 tb_to_ns_scale __read_mostly;
static unsigned tb_to_ns_shift __read_mostly;
-static unsigned long boot_tb __read_mostly;
+static u64 boot_tb __read_mostly;
extern struct timezone sys_tz;
static long timezone_offset;
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index fe46048..9de6f39 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -5,6 +5,7 @@
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/page.h>
+#include <asm/ptrace.h>
/*
* load_up_altivec(unused, unused, tsk)
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 441d2a7..1b695fd 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -600,6 +600,11 @@ static void vio_dma_iommu_unmap_sg(struct device *dev,
vio_cmo_dealloc(viodev, alloc_size);
}
+static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ return dma_iommu_ops.dma_supported(dev, mask);
+}
+
struct dma_map_ops vio_dma_mapping_ops = {
.alloc_coherent = vio_dma_iommu_alloc_coherent,
.free_coherent = vio_dma_iommu_free_coherent,
@@ -607,6 +612,7 @@ struct dma_map_ops vio_dma_mapping_ops = {
.unmap_sg = vio_dma_iommu_unmap_sg,
.map_page = vio_dma_iommu_map_page,
.unmap_page = vio_dma_iommu_unmap_page,
+ .dma_supported = vio_dma_iommu_dma_supported,
};
@@ -858,8 +864,7 @@ static void vio_cmo_bus_remove(struct vio_dev *viodev)
static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
{
- vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
- viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
+ set_dma_ops(&viodev->dev, &vio_dma_mapping_ops);
}
/**
@@ -1244,7 +1249,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
if (firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_set_dma_ops(viodev);
else
- viodev->dev.archdata.dma_ops = &dma_iommu_ops;
+ set_dma_ops(&viodev->dev, &dma_iommu_ops);
set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev));
set_dev_node(&viodev->dev, of_node_to_nid(of_node));
@@ -1252,6 +1257,10 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
viodev->dev.parent = &vio_bus_device.dev;
viodev->dev.bus = &vio_bus_type;
viodev->dev.release = vio_dev_release;
+ /* needed to ensure proper operation of coherent allocations
+ * later, in case driver doesn't set it explicitly */
+ dma_set_mask(&viodev->dev, DMA_BIT_MASK(64));
+ dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64));
/* register with generic device framework */
if (device_register(&viodev->dev)) {
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 889f2bc..166a6a0 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_HAS_IOMEM) += devres.o
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
memcpy_64.o usercopy_64.o mem_64.o string.o \
- checksum_wrappers_64.o
+ checksum_wrappers_64.o hweight_64.o
obj-$(CONFIG_XMON) += sstep.o ldstfp.o
obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
new file mode 100644
index 0000000..fda2786
--- /dev/null
+++ b/arch/powerpc/lib/hweight_64.S
@@ -0,0 +1,110 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2010
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+/* Note: This code relies on -mminimal-toc */
+
+_GLOBAL(__arch_hweight8)
+BEGIN_FTR_SECTION
+ b .__sw_hweight8
+ nop
+ nop
+FTR_SECTION_ELSE
+ PPC_POPCNTB(r3,r3)
+ clrldi r3,r3,64-8
+ blr
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+
+_GLOBAL(__arch_hweight16)
+BEGIN_FTR_SECTION
+ b .__sw_hweight16
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(50)
+ PPC_POPCNTB(r3,r3)
+ srdi r4,r3,8
+ add r3,r4,r3
+ clrldi r3,r3,64-8
+ blr
+ FTR_SECTION_ELSE_NESTED(50)
+ clrlwi r3,r3,16
+ PPC_POPCNTW(r3,r3)
+ clrldi r3,r3,64-8
+ blr
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+
+_GLOBAL(__arch_hweight32)
+BEGIN_FTR_SECTION
+ b .__sw_hweight32
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(51)
+ PPC_POPCNTB(r3,r3)
+ srdi r4,r3,16
+ add r3,r4,r3
+ srdi r4,r3,8
+ add r3,r4,r3
+ clrldi r3,r3,64-8
+ blr
+ FTR_SECTION_ELSE_NESTED(51)
+ PPC_POPCNTW(r3,r3)
+ clrldi r3,r3,64-8
+ blr
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+
+_GLOBAL(__arch_hweight64)
+BEGIN_FTR_SECTION
+ b .__sw_hweight64
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(52)
+ PPC_POPCNTB(r3,r3)
+ srdi r4,r3,32
+ add r3,r4,r3
+ srdi r4,r3,16
+ add r3,r4,r3
+ srdi r4,r3,8
+ add r3,r4,r3
+ clrldi r3,r3,64-8
+ blr
+ FTR_SECTION_ELSE_NESTED(52)
+ PPC_POPCNTD(r3,r3)
+ clrldi r3,r3,64-8
+ blr
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 5e95844..a5991fa 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1070,7 +1070,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long access, unsigned long trap)
{
unsigned long vsid;
- void *pgdir;
+ pgd_t *pgdir;
pte_t *ptep;
unsigned long flags;
int rc, ssize, local = 0;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 5ce9984..c0aab52 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -111,8 +111,8 @@ static unsigned int steal_context_smp(unsigned int id)
* a core map instead but this will do for now.
*/
for_each_cpu(cpu, mm_cpumask(mm)) {
- for (i = cpu_first_thread_in_core(cpu);
- i <= cpu_last_thread_in_core(cpu); i++)
+ for (i = cpu_first_thread_sibling(cpu);
+ i <= cpu_last_thread_sibling(cpu); i++)
__set_bit(id, stale_map[i]);
cpu = i - 1;
}
@@ -264,14 +264,14 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
*/
if (test_bit(id, stale_map[cpu])) {
pr_hardcont(" | stale flush %d [%d..%d]",
- id, cpu_first_thread_in_core(cpu),
- cpu_last_thread_in_core(cpu));
+ id, cpu_first_thread_sibling(cpu),
+ cpu_last_thread_sibling(cpu));
local_flush_tlb_mm(next);
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
- for (i = cpu_first_thread_in_core(cpu);
- i <= cpu_last_thread_in_core(cpu); i++) {
+ for (i = cpu_first_thread_sibling(cpu);
+ i <= cpu_last_thread_sibling(cpu); i++) {
__clear_bit(id, stale_map[i]);
}
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 74505b2..bf5cb91 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -20,10 +20,15 @@
#include <linux/memblock.h>
#include <linux/of.h>
#include <linux/pfn.h>
+#include <linux/cpuset.h>
+#include <linux/node.h>
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/system.h>
#include <asm/smp.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
+#include <asm/hvcall.h>
static int numa_enabled = 1;
@@ -163,7 +168,7 @@ static void __init get_node_active_region(unsigned long start_pfn,
work_with_active_regions(nid, get_active_region_work_fn, node_ar);
}
-static void __cpuinit map_cpu_to_node(int cpu, int node)
+static void map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
@@ -173,7 +178,7 @@ static void __cpuinit map_cpu_to_node(int cpu, int node)
cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
}
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
static void unmap_cpu_from_node(unsigned long cpu)
{
int node = numa_cpu_lookup_table[cpu];
@@ -187,7 +192,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
cpu, node);
}
}
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
/* must hold reference to node during call */
static const int *of_get_associativity(struct device_node *dev)
@@ -246,32 +251,41 @@ static void initialize_distance_lookup_table(int nid,
/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
* info is found.
*/
-static int of_node_to_nid_single(struct device_node *device)
+static int associativity_to_nid(const unsigned int *associativity)
{
int nid = -1;
- const unsigned int *tmp;
if (min_common_depth == -1)
goto out;
- tmp = of_get_associativity(device);
- if (!tmp)
- goto out;
-
- if (tmp[0] >= min_common_depth)
- nid = tmp[min_common_depth];
+ if (associativity[0] >= min_common_depth)
+ nid = associativity[min_common_depth];
/* POWER4 LPAR uses 0xffff as invalid node */
if (nid == 0xffff || nid >= MAX_NUMNODES)
nid = -1;
- if (nid > 0 && tmp[0] >= distance_ref_points_depth)
- initialize_distance_lookup_table(nid, tmp);
+ if (nid > 0 && associativity[0] >= distance_ref_points_depth)
+ initialize_distance_lookup_table(nid, associativity);
out:
return nid;
}
+/* Returns the nid associated with the given device tree node,
+ * or -1 if not found.
+ */
+static int of_node_to_nid_single(struct device_node *device)
+{
+ int nid = -1;
+ const unsigned int *tmp;
+
+ tmp = of_get_associativity(device);
+ if (tmp)
+ nid = associativity_to_nid(tmp);
+ return nid;
+}
+
/* Walk the device tree upwards, looking for an associativity id */
int of_node_to_nid(struct device_node *device)
{
@@ -1247,4 +1261,275 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
return nid;
}
+static u64 hot_add_drconf_memory_max(void)
+{
+ struct device_node *memory = NULL;
+ unsigned int drconf_cell_cnt = 0;
+ u64 lmb_size = 0;
+ const u32 *dm = 0;
+
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (memory) {
+ drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
+ lmb_size = of_get_lmb_size(memory);
+ of_node_put(memory);
+ }
+ return lmb_size * drconf_cell_cnt;
+}
+
+/*
+ * memory_hotplug_max - return max address of memory that may be added
+ *
+ * This is currently only used on systems that support drconfig memory
+ * hotplug.
+ */
+u64 memory_hotplug_max(void)
+{
+ return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
+}
#endif /* CONFIG_MEMORY_HOTPLUG */
+
+/* Vrtual Processor Home Node (VPHN) support */
+#ifdef CONFIG_PPC_SPLPAR
+#define VPHN_NR_CHANGE_CTRS (8)
+static u8 vphn_cpu_change_counts[NR_CPUS][VPHN_NR_CHANGE_CTRS];
+static cpumask_t cpu_associativity_changes_mask;
+static int vphn_enabled;
+static void set_topology_timer(void);
+
+/*
+ * Store the current values of the associativity change counters in the
+ * hypervisor.
+ */
+static void setup_cpu_associativity_change_counters(void)
+{
+ int cpu = 0;
+
+ for_each_possible_cpu(cpu) {
+ int i = 0;
+ u8 *counts = vphn_cpu_change_counts[cpu];
+ volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+
+ for (i = 0; i < VPHN_NR_CHANGE_CTRS; i++) {
+ counts[i] = hypervisor_counts[i];
+ }
+ }
+}
+
+/*
+ * The hypervisor maintains a set of 8 associativity change counters in
+ * the VPA of each cpu that correspond to the associativity levels in the
+ * ibm,associativity-reference-points property. When an associativity
+ * level changes, the corresponding counter is incremented.
+ *
+ * Set a bit in cpu_associativity_changes_mask for each cpu whose home
+ * node associativity levels have changed.
+ *
+ * Returns the number of cpus with unhandled associativity changes.
+ */
+static int update_cpu_associativity_changes_mask(void)
+{
+ int cpu = 0, nr_cpus = 0;
+ cpumask_t *changes = &cpu_associativity_changes_mask;
+
+ cpumask_clear(changes);
+
+ for_each_possible_cpu(cpu) {
+ int i, changed = 0;
+ u8 *counts = vphn_cpu_change_counts[cpu];
+ volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+
+ for (i = 0; i < VPHN_NR_CHANGE_CTRS; i++) {
+ if (hypervisor_counts[i] > counts[i]) {
+ counts[i] = hypervisor_counts[i];
+ changed = 1;
+ }
+ }
+ if (changed) {
+ cpumask_set_cpu(cpu, changes);
+ nr_cpus++;
+ }
+ }
+
+ return nr_cpus;
+}
+
+/* 6 64-bit registers unpacked into 12 32-bit associativity values */
+#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32))
+
+/*
+ * Convert the associativity domain numbers returned from the hypervisor
+ * to the sequence they would appear in the ibm,associativity property.
+ */
+static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked)
+{
+ int i = 0;
+ int nr_assoc_doms = 0;
+ const u16 *field = (const u16*) packed;
+
+#define VPHN_FIELD_UNUSED (0xffff)
+#define VPHN_FIELD_MSB (0x8000)
+#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB)
+
+ for (i = 0; i < VPHN_ASSOC_BUFSIZE; i++) {
+ if (*field == VPHN_FIELD_UNUSED) {
+ /* All significant fields processed, and remaining
+ * fields contain the reserved value of all 1's.
+ * Just store them.
+ */
+ unpacked[i] = *((u32*)field);
+ field += 2;
+ }
+ else if (*field & VPHN_FIELD_MSB) {
+ /* Data is in the lower 15 bits of this field */
+ unpacked[i] = *field & VPHN_FIELD_MASK;
+ field++;
+ nr_assoc_doms++;
+ }
+ else {
+ /* Data is in the lower 15 bits of this field
+ * concatenated with the next 16 bit field
+ */
+ unpacked[i] = *((u32*)field);
+ field += 2;
+ nr_assoc_doms++;
+ }
+ }
+
+ return nr_assoc_doms;
+}
+
+/*
+ * Retrieve the new associativity information for a virtual processor's
+ * home node.
+ */
+static long hcall_vphn(unsigned long cpu, unsigned int *associativity)
+{
+ long rc = 0;
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ u64 flags = 1;
+ int hwcpu = get_hard_smp_processor_id(cpu);
+
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
+ vphn_unpack_associativity(retbuf, associativity);
+
+ return rc;
+}
+
+static long vphn_get_associativity(unsigned long cpu,
+ unsigned int *associativity)
+{
+ long rc = 0;
+
+ rc = hcall_vphn(cpu, associativity);
+
+ switch (rc) {
+ case H_FUNCTION:
+ printk(KERN_INFO
+ "VPHN is not supported. Disabling polling...\n");
+ stop_topology_update();
+ break;
+ case H_HARDWARE:
+ printk(KERN_ERR
+ "hcall_vphn() experienced a hardware fault "
+ "preventing VPHN. Disabling polling...\n");
+ stop_topology_update();
+ }
+
+ return rc;
+}
+
+/*
+ * Update the node maps and sysfs entries for each cpu whose home node
+ * has changed.
+ */
+int arch_update_cpu_topology(void)
+{
+ int cpu = 0, nid = 0, old_nid = 0;
+ unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
+ struct sys_device *sysdev = NULL;
+
+ for_each_cpu_mask(cpu, cpu_associativity_changes_mask) {
+ vphn_get_associativity(cpu, associativity);
+ nid = associativity_to_nid(associativity);
+
+ if (nid < 0 || !node_online(nid))
+ nid = first_online_node;
+
+ old_nid = numa_cpu_lookup_table[cpu];
+
+ /* Disable hotplug while we update the cpu
+ * masks and sysfs.
+ */
+ get_online_cpus();
+ unregister_cpu_under_node(cpu, old_nid);
+ unmap_cpu_from_node(cpu);
+ map_cpu_to_node(cpu, nid);
+ register_cpu_under_node(cpu, nid);
+ put_online_cpus();
+
+ sysdev = get_cpu_sysdev(cpu);
+ if (sysdev)
+ kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+ }
+
+ return 1;
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+ rebuild_sched_domains();
+}
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+void topology_schedule_update(void)
+{
+ schedule_work(&topology_work);
+}
+
+static void topology_timer_fn(unsigned long ignored)
+{
+ if (!vphn_enabled)
+ return;
+ if (update_cpu_associativity_changes_mask() > 0)
+ topology_schedule_update();
+ set_topology_timer();
+}
+static struct timer_list topology_timer =
+ TIMER_INITIALIZER(topology_timer_fn, 0, 0);
+
+static void set_topology_timer(void)
+{
+ topology_timer.data = 0;
+ topology_timer.expires = jiffies + 60 * HZ;
+ add_timer(&topology_timer);
+}
+
+/*
+ * Start polling for VPHN associativity changes.
+ */
+int start_topology_update(void)
+{
+ int rc = 0;
+
+ if (firmware_has_feature(FW_FEATURE_VPHN)) {
+ vphn_enabled = 1;
+ setup_cpu_associativity_change_counters();
+ init_timer_deferrable(&topology_timer);
+ set_topology_timer();
+ rc = 1;
+ }
+
+ return rc;
+}
+__initcall(start_topology_update);
+
+/*
+ * Disable polling for VPHN associativity changes.
+ */
+int stop_topology_update(void)
+{
+ vphn_enabled = 0;
+ return del_timer_sync(&topology_timer);
+}
+#endif /* CONFIG_PPC_SPLPAR */
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a87ead0..8dc41c0 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -78,7 +78,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
/* pgdir take page or two with 4K pages and a page fraction otherwise */
#ifndef CONFIG_PPC_4K_PAGES
- ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
+ ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
#else
ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
PGDIR_ORDER - PAGE_SHIFT);
@@ -230,6 +230,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (area == 0)
return NULL;
+ area->phys_addr = p;
v = (unsigned long) area->addr;
} else {
v = (ioremap_bot -= size);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 21d6dfa..88927a0 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -223,6 +223,8 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
caller);
if (area == NULL)
return NULL;
+
+ area->phys_addr = paligned;
ret = __ioremap_at(paligned, area->addr, size, flags);
if (!ret)
vunmap(area->addr);
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index 82ff326..c04d16d 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -1,4 +1,7 @@
-obj-$(CONFIG_44x) := misc_44x.o idle.o
+obj-$(CONFIG_44x) += misc_44x.o
+ifneq ($(CONFIG_PPC4xx_CPM),y)
+obj-$(CONFIG_44x) += idle.o
+endif
obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o
obj-$(CONFIG_EBONY) += ebony.o
obj-$(CONFIG_SAM440EP) += sam440ep.o
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 956154f..2057682 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -313,13 +313,14 @@ config OF_RTC
source "arch/powerpc/sysdev/bestcomm/Kconfig"
config MPC8xxx_GPIO
- bool "MPC8xxx GPIO support"
- depends on PPC_MPC831x || PPC_MPC834x || PPC_MPC837x || FSL_SOC_BOOKE || PPC_86xx
+ bool "MPC512x/MPC8xxx GPIO support"
+ depends on PPC_MPC512x || PPC_MPC831x || PPC_MPC834x || PPC_MPC837x || \
+ FSL_SOC_BOOKE || PPC_86xx
select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
help
Say Y here if you're going to use hardware that connects to the
- MPC831x/834x/837x/8572/8610 GPIOs.
+ MPC512x/831x/834x/837x/8572/8610 GPIOs.
config SIMPLE_GPIO
bool "Support for simple, memory-mapped GPIO controllers"
diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
index beec405..3ce6855 100644
--- a/arch/powerpc/platforms/cell/beat_iommu.c
+++ b/arch/powerpc/platforms/cell/beat_iommu.c
@@ -76,7 +76,7 @@ static void __init celleb_init_direct_mapping(void)
static void celleb_dma_dev_setup(struct device *dev)
{
- dev->archdata.dma_ops = get_pci_dma_ops();
+ set_dma_ops(dev, &dma_direct_ops);
set_dma_offset(dev, celleb_dma_direct_offset);
}
@@ -106,7 +106,6 @@ static struct notifier_block celleb_of_bus_notifier = {
static int __init celleb_init_iommu(void)
{
celleb_init_direct_mapping();
- set_pci_dma_ops(&dma_direct_ops);
ppc_md.pci_dma_dev_setup = celleb_pci_dma_dev_setup;
bus_register_notifier(&platform_bus_type, &celleb_of_bus_notifier);
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
index a101abf..3b894f5 100644
--- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -36,10 +36,9 @@ static int spu_alloc_lscsa_std(struct spu_state *csa)
struct spu_lscsa *lscsa;
unsigned char *p;
- lscsa = vmalloc(sizeof(struct spu_lscsa));
+ lscsa = vzalloc(sizeof(struct spu_lscsa));
if (!lscsa)
return -ENOMEM;
- memset(lscsa, 0, sizeof(struct spu_lscsa));
csa->lscsa = lscsa;
/* Set LS pages reserved to allow for user-space mapping. */
diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c
index 054dfe5..f803f4b 100644
--- a/arch/powerpc/platforms/chrp/time.c
+++ b/arch/powerpc/platforms/chrp/time.c
@@ -29,6 +29,10 @@
extern spinlock_t rtc_lock;
+#define NVRAM_AS0 0x74
+#define NVRAM_AS1 0x75
+#define NVRAM_DATA 0x77
+
static int nvram_as1 = NVRAM_AS1;
static int nvram_as0 = NVRAM_AS0;
static int nvram_data = NVRAM_DATA;
diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c
index 42d0a88..b5e026b 100644
--- a/arch/powerpc/platforms/iseries/mf.c
+++ b/arch/powerpc/platforms/iseries/mf.c
@@ -1045,71 +1045,9 @@ static const struct file_operations mf_side_proc_fops = {
.write = mf_side_proc_write,
};
-#if 0
-static void mf_getSrcHistory(char *buffer, int size)
-{
- struct IplTypeReturnStuff return_stuff;
- struct pending_event *ev = new_pending_event();
- int rc = 0;
- char *pages[4];
-
- pages[0] = kmalloc(4096, GFP_ATOMIC);
- pages[1] = kmalloc(4096, GFP_ATOMIC);
- pages[2] = kmalloc(4096, GFP_ATOMIC);
- pages[3] = kmalloc(4096, GFP_ATOMIC);
- if ((ev == NULL) || (pages[0] == NULL) || (pages[1] == NULL)
- || (pages[2] == NULL) || (pages[3] == NULL))
- return -ENOMEM;
-
- return_stuff.xType = 0;
- return_stuff.xRc = 0;
- return_stuff.xDone = 0;
- ev->event.hp_lp_event.xSubtype = 6;
- ev->event.hp_lp_event.x.xSubtypeData =
- subtype_data('M', 'F', 'V', 'I');
- ev->event.data.vsp_cmd.xEvent = &return_stuff;
- ev->event.data.vsp_cmd.cmd = 4;
- ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex();
- ev->event.data.vsp_cmd.result_code = 0xFF;
- ev->event.data.vsp_cmd.reserved = 0;
- ev->event.data.vsp_cmd.sub_data.page[0] = iseries_hv_addr(pages[0]);
- ev->event.data.vsp_cmd.sub_data.page[1] = iseries_hv_addr(pages[1]);
- ev->event.data.vsp_cmd.sub_data.page[2] = iseries_hv_addr(pages[2]);
- ev->event.data.vsp_cmd.sub_data.page[3] = iseries_hv_addr(pages[3]);
- mb();
- if (signal_event(ev) != 0)
- return;
-
- while (return_stuff.xDone != 1)
- udelay(10);
- if (return_stuff.xRc == 0)
- memcpy(buffer, pages[0], size);
- kfree(pages[0]);
- kfree(pages[1]);
- kfree(pages[2]);
- kfree(pages[3]);
-}
-#endif
-
static int mf_src_proc_show(struct seq_file *m, void *v)
{
-#if 0
- int len;
-
- mf_getSrcHistory(page, count);
- len = count;
- len -= off;
- if (len < count) {
- *eof = 1;
- if (len <= 0)
- return 0;
- } else
- len = count;
- *start = page + off;
- return len;
-#else
return 0;
-#endif
}
static int mf_src_proc_open(struct inode *inode, struct file *file)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 1f9fb2c..14943ef 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -156,20 +156,12 @@ static void iommu_table_iobmap_setup(void)
static void pci_dma_bus_setup_pasemi(struct pci_bus *bus)
{
- struct device_node *dn;
-
pr_debug("pci_dma_bus_setup, bus %p, bus->self %p\n", bus, bus->self);
if (!iommu_table_iobmap_inited) {
iommu_table_iobmap_inited = 1;
iommu_table_iobmap_setup();
}
-
- dn = pci_bus_to_OF_node(bus);
-
- if (dn)
- PCI_DN(dn)->iommu_table = &iommu_table_iobmap;
-
}
@@ -192,9 +184,6 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
set_iommu_table_base(&dev->dev, &iommu_table_iobmap);
}
-static void pci_dma_bus_setup_null(struct pci_bus *b) { }
-static void pci_dma_dev_setup_null(struct pci_dev *d) { }
-
int __init iob_init(struct device_node *dn)
{
unsigned long tmp;
@@ -251,14 +240,8 @@ void __init iommu_init_early_pasemi(void)
iommu_off = of_chosen &&
of_get_property(of_chosen, "linux,iommu-off", NULL);
#endif
- if (iommu_off) {
- /* Direct I/O, IOMMU off */
- ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_null;
- ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_null;
- set_pci_dma_ops(&dma_direct_ops);
-
+ if (iommu_off)
return;
- }
iob_init(NULL);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 9deb274..d5aceb7 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -506,6 +506,15 @@ static int __init pmac_declare_of_platform_devices(void)
of_platform_device_create(np, "smu", NULL);
of_node_put(np);
}
+ np = of_find_node_by_type(NULL, "fcu");
+ if (np == NULL) {
+ /* Some machines have strangely broken device-tree */
+ np = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/fan@15e");
+ }
+ if (np) {
+ of_platform_device_create(np, "temperature", NULL);
+ of_node_put(np);
+ }
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 3139814..5d1b743 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -33,6 +33,16 @@ config PSERIES_MSI
depends on PCI_MSI && EEH
default y
+config PSERIES_ENERGY
+ tristate "pSeries energy management capabilities driver"
+ depends on PPC_PSERIES
+ default y
+ help
+ Provides interface to platform energy management capabilities
+ on supported PSERIES platforms.
+ Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
+ and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
+
config SCANLOG
tristate "Scanlog dump interface"
depends on RTAS_PROC && PPC_PSERIES
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 59eb8bd..fc52378 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
obj-$(CONFIG_PSERIES_MSI) += msi.o
+obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 0a14d8c..0b0eff0 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -55,6 +55,7 @@ firmware_features_table[FIRMWARE_MAX_FEATURES] = {
{FW_FEATURE_XDABR, "hcall-xdabr"},
{FW_FEATURE_MULTITCE, "hcall-multi-tce"},
{FW_FEATURE_SPLPAR, "hcall-splpar"},
+ {FW_FEATURE_VPHN, "hcall-vphn"},
};
/* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 48d2057..fd05fde 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -11,6 +11,7 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
#define STK_PARM(i) (48 + ((i)-3)*8)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index a77bcae..edea60b 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -140,7 +140,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
return ret;
}
-static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
+static DEFINE_PER_CPU(u64 *, tce_page);
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
@@ -323,14 +323,13 @@ static void iommu_table_setparms(struct pci_controller *phb,
static void iommu_table_setparms_lpar(struct pci_controller *phb,
struct device_node *dn,
struct iommu_table *tbl,
- const void *dma_window,
- int bussubno)
+ const void *dma_window)
{
unsigned long offset, size;
- tbl->it_busno = bussubno;
of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
+ tbl->it_busno = phb->bus->number;
tbl->it_base = 0;
tbl->it_blocksize = 16;
tbl->it_type = TCE_PCI;
@@ -450,14 +449,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
if (!ppci->iommu_table) {
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
ppci->phb->node);
- iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window,
- bus->number);
+ iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
pr_debug(" created table: %p\n", ppci->iommu_table);
}
-
- if (pdn != dn)
- PCI_DN(dn)->iommu_table = ppci->iommu_table;
}
@@ -533,21 +528,11 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
}
pr_debug(" parent is %s\n", pdn->full_name);
- /* Check for parent == NULL so we don't try to setup the empty EADS
- * slots on POWER4 machines.
- */
- if (dma_window == NULL || pdn->parent == NULL) {
- pr_debug(" no dma window for device, linking to parent\n");
- set_iommu_table_base(&dev->dev, PCI_DN(pdn)->iommu_table);
- return;
- }
-
pci = PCI_DN(pdn);
if (!pci->iommu_table) {
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
pci->phb->node);
- iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window,
- pci->phb->bus->number);
+ iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
pr_debug(" created table: %p\n", pci->iommu_table);
} else {
@@ -571,8 +556,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
switch (action) {
case PSERIES_RECONFIG_REMOVE:
- if (pci && pci->iommu_table &&
- of_get_property(np, "ibm,dma-window", NULL))
+ if (pci && pci->iommu_table)
iommu_free_table(pci->iommu_table, np->full_name);
break;
default:
@@ -589,13 +573,8 @@ static struct notifier_block iommu_reconfig_nb = {
/* These are called very early. */
void iommu_init_early_pSeries(void)
{
- if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) {
- /* Direct I/O, IOMMU off */
- ppc_md.pci_dma_dev_setup = NULL;
- ppc_md.pci_dma_bus_setup = NULL;
- set_pci_dma_ops(&dma_direct_ops);
+ if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
return;
- }
if (firmware_has_feature(FW_FEATURE_LPAR)) {
if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
@@ -622,3 +601,17 @@ void iommu_init_early_pSeries(void)
set_pci_dma_ops(&dma_iommu_ops);
}
+static int __init disable_multitce(char *str)
+{
+ if (strcmp(str, "off") == 0 &&
+ firmware_has_feature(FW_FEATURE_LPAR) &&
+ firmware_has_feature(FW_FEATURE_MULTITCE)) {
+ printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
+ ppc_md.tce_build = tce_build_pSeriesLP;
+ ppc_md.tce_free = tce_free_pSeriesLP;
+ powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
+ }
+ return 1;
+}
+
+__setup("multitce=", disable_multitce);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f129040..5d3ea9f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -627,6 +627,18 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
}
+static int __init disable_bulk_remove(char *str)
+{
+ if (strcmp(str, "off") == 0 &&
+ firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+ printk(KERN_INFO "Disabling BULK_REMOVE firmware feature");
+ powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
+ }
+ return 1;
+}
+
+__setup("bulk_remove=", disable_bulk_remove);
+
void __init hpte_init_lpar(void)
{
ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate;
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index bc3c7f2..7e828ba 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -22,11 +22,25 @@
#include <asm/prom.h>
#include <asm/machdep.h>
+/* Max bytes to read/write in one go */
+#define NVRW_CNT 0x20
+
static unsigned int nvram_size;
static int nvram_fetch, nvram_store;
static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
static DEFINE_SPINLOCK(nvram_lock);
+static long nvram_error_log_index = -1;
+static long nvram_error_log_size = 0;
+
+struct err_log_info {
+ int error_type;
+ unsigned int seq_num;
+};
+#define NVRAM_MAX_REQ 2079
+#define NVRAM_MIN_REQ 1055
+
+#define NVRAM_LOG_PART_NAME "ibm,rtas-log"
static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
{
@@ -119,6 +133,197 @@ static ssize_t pSeries_nvram_get_size(void)
return nvram_size ? nvram_size : -ENODEV;
}
+
+/* nvram_write_error_log
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode. If we have a severe error there
+ * is no way to guarantee that the OS or the machine is in a state to
+ * get back to user land and write the error to disk. For example if
+ * the SCSI device driver causes a Machine Check by writing to a bad
+ * IO address, there is no way of guaranteeing that the device driver
+ * is in any state that is would also be able to write the error data
+ * captured to disk, thus we buffer it in NVRAM for analysis on the
+ * next boot.
+ *
+ * In NVRAM the partition containing the error log buffer will looks like:
+ * Header (in bytes):
+ * +-----------+----------+--------+------------+------------------+
+ * | signature | checksum | length | name | data |
+ * |0 |1 |2 3|4 15|16 length-1|
+ * +-----------+----------+--------+------------+------------------+
+ *
+ * The 'data' section would look like (in bytes):
+ * +--------------+------------+-----------------------------------+
+ * | event_logged | sequence # | error log |
+ * |0 3|4 7|8 nvram_error_log_size-1|
+ * +--------------+------------+-----------------------------------+
+ *
+ * event_logged: 0 if event has not been logged to syslog, 1 if it has
+ * sequence #: The unique sequence # for each event. (until it wraps)
+ * error log: The error log from event_scan
+ */
+int nvram_write_error_log(char * buff, int length,
+ unsigned int err_type, unsigned int error_log_cnt)
+{
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
+
+ if (nvram_error_log_index == -1) {
+ return -ESPIPE;
+ }
+
+ if (length > nvram_error_log_size) {
+ length = nvram_error_log_size;
+ }
+
+ info.error_type = err_type;
+ info.seq_num = error_log_cnt;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ rc = ppc_md.nvram_write(buff, length, &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char * buff, int length,
+ unsigned int * err_type, unsigned int * error_log_cnt)
+{
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
+
+ if (nvram_error_log_index == -1)
+ return -1;
+
+ if (length > nvram_error_log_size)
+ length = nvram_error_log_size;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ return rc;
+ }
+
+ rc = ppc_md.nvram_read(buff, length, &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ return rc;
+ }
+
+ *error_log_cnt = info.seq_num;
+ *err_type = info.error_type;
+
+ return 0;
+}
+
+/* This doesn't actually zero anything, but it sets the event_logged
+ * word to tell that this event is safely in syslog.
+ */
+int nvram_clear_error_log(void)
+{
+ loff_t tmp_index;
+ int clear_word = ERR_FLAG_ALREADY_LOGGED;
+ int rc;
+
+ if (nvram_error_log_index == -1)
+ return -1;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+/* pseries_nvram_init_log_partition
+ *
+ * This will setup the partition we need for buffering the
+ * error logs and cleanup partitions if needed.
+ *
+ * The general strategy is the following:
+ * 1.) If there is log partition large enough then use it.
+ * 2.) If there is none large enough, search
+ * for a free partition that is large enough.
+ * 3.) If there is not a free partition large enough remove
+ * _all_ OS partitions and consolidate the space.
+ * 4.) Will first try getting a chunk that will satisfy the maximum
+ * error log size (NVRAM_MAX_REQ).
+ * 5.) If the max chunk cannot be allocated then try finding a chunk
+ * that will satisfy the minum needed (NVRAM_MIN_REQ).
+ */
+static int __init pseries_nvram_init_log_partition(void)
+{
+ loff_t p;
+ int size;
+
+ /* Scan nvram for partitions */
+ nvram_scan_partitions();
+
+ /* Lookg for ours */
+ p = nvram_find_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS, &size);
+
+ /* Found one but too small, remove it */
+ if (p && size < NVRAM_MIN_REQ) {
+ pr_info("nvram: Found too small "NVRAM_LOG_PART_NAME" partition"
+ ",removing it...");
+ nvram_remove_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS);
+ p = 0;
+ }
+
+ /* Create one if we didn't find */
+ if (!p) {
+ p = nvram_create_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS,
+ NVRAM_MAX_REQ, NVRAM_MIN_REQ);
+ /* No room for it, try to get rid of any OS partition
+ * and try again
+ */
+ if (p == -ENOSPC) {
+ pr_info("nvram: No room to create "NVRAM_LOG_PART_NAME
+ " partition, deleting all OS partitions...");
+ nvram_remove_partition(NULL, NVRAM_SIG_OS);
+ p = nvram_create_partition(NVRAM_LOG_PART_NAME,
+ NVRAM_SIG_OS, NVRAM_MAX_REQ,
+ NVRAM_MIN_REQ);
+ }
+ }
+
+ if (p <= 0) {
+ pr_err("nvram: Failed to find or create "NVRAM_LOG_PART_NAME
+ " partition, err %d\n", (int)p);
+ return 0;
+ }
+
+ nvram_error_log_index = p;
+ nvram_error_log_size = nvram_get_partition_size(p) -
+ sizeof(struct err_log_info);
+
+ return 0;
+}
+machine_arch_initcall(pseries, pseries_nvram_init_log_partition);
+
int __init pSeries_nvram_init(void)
{
struct device_node *nvram;
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
new file mode 100644
index 0000000..c8b3c69
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -0,0 +1,326 @@
+/*
+ * POWER platform energy management driver
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This pseries platform device driver provides access to
+ * platform energy management capabilities.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "pseries_energy"
+
+/* Driver flags */
+
+static int sysfs_entries;
+
+/* Helper routines */
+
+/*
+ * Routine to detect firmware support for hcall
+ * return 1 if H_BEST_ENERGY is supported
+ * else return 0
+ */
+
+static int check_for_h_best_energy(void)
+{
+ struct device_node *rtas = NULL;
+ const char *hypertas, *s;
+ int length;
+ int rc = 0;
+
+ rtas = of_find_node_by_path("/rtas");
+ if (!rtas)
+ return 0;
+
+ hypertas = of_get_property(rtas, "ibm,hypertas-functions", &length);
+ if (!hypertas) {
+ of_node_put(rtas);
+ return 0;
+ }
+
+ /* hypertas will have list of strings with hcall names */
+ for (s = hypertas; s < hypertas + length; s += strlen(s) + 1) {
+ if (!strncmp("hcall-best-energy-1", s, 19)) {
+ rc = 1; /* Found the string */
+ break;
+ }
+ }
+ of_node_put(rtas);
+ return rc;
+}
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+static u32 cpu_to_drc_index(int cpu)
+{
+ struct device_node *dn = NULL;
+ const int *indexes;
+ int i;
+ int rc = 1;
+ u32 ret = 0;
+
+ dn = of_find_node_by_path("/cpus");
+ if (dn == NULL)
+ goto err;
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+ /* Convert logical cpu number to core number */
+ i = cpu_core_index_of_thread(cpu);
+ /*
+ * The first element indexes[0] is the number of drc_indexes
+ * returned in the list. Hence i+1 will get the drc_index
+ * corresponding to core number i.
+ */
+ WARN_ON(i > indexes[0]);
+ ret = indexes[i + 1];
+ rc = 0;
+
+err_of_node_put:
+ of_node_put(dn);
+err:
+ if (rc)
+ printk(KERN_WARNING "cpu_to_drc_index(%d) failed", cpu);
+ return ret;
+}
+
+static int drc_index_to_cpu(u32 drc_index)
+{
+ struct device_node *dn = NULL;
+ const int *indexes;
+ int i, cpu = 0;
+ int rc = 1;
+
+ dn = of_find_node_by_path("/cpus");
+ if (dn == NULL)
+ goto err;
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+ /*
+ * First element in the array is the number of drc_indexes
+ * returned. Search through the list to find the matching
+ * drc_index and get the core number
+ */
+ for (i = 0; i < indexes[0]; i++) {
+ if (indexes[i + 1] == drc_index)
+ break;
+ }
+ /* Convert core number to logical cpu number */
+ cpu = cpu_first_thread_of_core(i);
+ rc = 0;
+
+err_of_node_put:
+ of_node_put(dn);
+err:
+ if (rc)
+ printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index);
+ return cpu;
+}
+
+/*
+ * pseries hypervisor call H_BEST_ENERGY provides hints to OS on
+ * preferred logical cpus to activate or deactivate for optimized
+ * energy consumption.
+ */
+
+#define FLAGS_MODE1 0x004E200000080E01
+#define FLAGS_MODE2 0x004E200000080401
+#define FLAGS_ACTIVATE 0x100
+
+static ssize_t get_best_energy_list(char *page, int activate)
+{
+ int rc, cnt, i, cpu;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ unsigned long flags = 0;
+ u32 *buf_page;
+ char *s = page;
+
+ buf_page = (u32 *) get_zeroed_page(GFP_KERNEL);
+ if (!buf_page)
+ return -ENOMEM;
+
+ flags = FLAGS_MODE1;
+ if (activate)
+ flags |= FLAGS_ACTIVATE;
+
+ rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page),
+ 0, 0, 0, 0, 0, 0);
+ if (rc != H_SUCCESS) {
+ free_page((unsigned long) buf_page);
+ return -EINVAL;
+ }
+
+ cnt = retbuf[0];
+ for (i = 0; i < cnt; i++) {
+ cpu = drc_index_to_cpu(buf_page[2*i+1]);
+ if ((cpu_online(cpu) && !activate) ||
+ (!cpu_online(cpu) && activate))
+ s += sprintf(s, "%d,", cpu);
+ }
+ if (s > page) { /* Something to show */
+ s--; /* Suppress last comma */
+ s += sprintf(s, "\n");
+ }
+
+ free_page((unsigned long) buf_page);
+ return s-page;
+}
+
+static ssize_t get_best_energy_data(struct sys_device *dev,
+ char *page, int activate)
+{
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ unsigned long flags = 0;
+
+ flags = FLAGS_MODE2;
+ if (activate)
+ flags |= FLAGS_ACTIVATE;
+
+ rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags,
+ cpu_to_drc_index(dev->id),
+ 0, 0, 0, 0, 0, 0, 0);
+
+ if (rc != H_SUCCESS)
+ return -EINVAL;
+
+ return sprintf(page, "%lu\n", retbuf[1] >> 32);
+}
+
+/* Wrapper functions */
+
+static ssize_t cpu_activate_hint_list_show(struct sysdev_class *class,
+ struct sysdev_class_attribute *attr, char *page)
+{
+ return get_best_energy_list(page, 1);
+}
+
+static ssize_t cpu_deactivate_hint_list_show(struct sysdev_class *class,
+ struct sysdev_class_attribute *attr, char *page)
+{
+ return get_best_energy_list(page, 0);
+}
+
+static ssize_t percpu_activate_hint_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *page)
+{
+ return get_best_energy_data(dev, page, 1);
+}
+
+static ssize_t percpu_deactivate_hint_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *page)
+{
+ return get_best_energy_data(dev, page, 0);
+}
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/system/cpu/pseries_activate_hint_list
+ * /sys/devices/system/cpu/pseries_deactivate_hint_list
+ * Comma separated list of cpus to activate or deactivate
+ * /sys/devices/system/cpu/cpuN/pseries_activate_hint
+ * /sys/devices/system/cpu/cpuN/pseries_deactivate_hint
+ * Per-cpu value of the hint
+ */
+
+struct sysdev_class_attribute attr_cpu_activate_hint_list =
+ _SYSDEV_CLASS_ATTR(pseries_activate_hint_list, 0444,
+ cpu_activate_hint_list_show, NULL);
+
+struct sysdev_class_attribute attr_cpu_deactivate_hint_list =
+ _SYSDEV_CLASS_ATTR(pseries_deactivate_hint_list, 0444,
+ cpu_deactivate_hint_list_show, NULL);
+
+struct sysdev_attribute attr_percpu_activate_hint =
+ _SYSDEV_ATTR(pseries_activate_hint, 0444,
+ percpu_activate_hint_show, NULL);
+
+struct sysdev_attribute attr_percpu_deactivate_hint =
+ _SYSDEV_ATTR(pseries_deactivate_hint, 0444,
+ percpu_deactivate_hint_show, NULL);
+
+static int __init pseries_energy_init(void)
+{
+ int cpu, err;
+ struct sys_device *cpu_sys_dev;
+
+ if (!check_for_h_best_energy()) {
+ printk(KERN_INFO "Hypercall H_BEST_ENERGY not supported\n");
+ return 0;
+ }
+ /* Create the sysfs files */
+ err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &attr_cpu_activate_hint_list.attr);
+ if (!err)
+ err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &attr_cpu_deactivate_hint_list.attr);
+
+ if (err)
+ return err;
+ for_each_possible_cpu(cpu) {
+ cpu_sys_dev = get_cpu_sysdev(cpu);
+ err = sysfs_create_file(&cpu_sys_dev->kobj,
+ &attr_percpu_activate_hint.attr);
+ if (err)
+ break;
+ err = sysfs_create_file(&cpu_sys_dev->kobj,
+ &attr_percpu_deactivate_hint.attr);
+ if (err)
+ break;
+ }
+
+ if (err)
+ return err;
+
+ sysfs_entries = 1; /* Removed entries on cleanup */
+ return 0;
+
+}
+
+static void __exit pseries_energy_cleanup(void)
+{
+ int cpu;
+ struct sys_device *cpu_sys_dev;
+
+ if (!sysfs_entries)
+ return;
+
+ /* Remove the sysfs files */
+ sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
+ &attr_cpu_activate_hint_list.attr);
+
+ sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
+ &attr_cpu_deactivate_hint_list.attr);
+
+ for_each_possible_cpu(cpu) {
+ cpu_sys_dev = get_cpu_sysdev(cpu);
+ sysfs_remove_file(&cpu_sys_dev->kobj,
+ &attr_percpu_activate_hint.attr);
+ sysfs_remove_file(&cpu_sys_dev->kobj,
+ &attr_percpu_deactivate_hint.attr);
+ }
+}
+
+module_init(pseries_energy_init);
+module_exit(pseries_energy_cleanup);
+MODULE_DESCRIPTION("Driver for pSeries platform energy management");
+MODULE_AUTHOR("Vaidyanathan Srinivasan");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 0bef9da..9c29734 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_OF_RTC) += of_rtc.o
ifeq ($(CONFIG_PCI),y)
obj-$(CONFIG_4xx) += ppc4xx_pci.o
endif
+obj-$(CONFIG_PPC4xx_CPM) += ppc4xx_cpm.o
obj-$(CONFIG_PPC4xx_GPIO) += ppc4xx_gpio.o
obj-$(CONFIG_CPM) += cpm_common.o
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 17cf15e..8e9e06a7c 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -312,17 +312,10 @@ static void pci_dma_dev_setup_dart(struct pci_dev *dev)
static void pci_dma_bus_setup_dart(struct pci_bus *bus)
{
- struct device_node *dn;
-
if (!iommu_table_dart_inited) {
iommu_table_dart_inited = 1;
iommu_table_dart_setup();
}
-
- dn = pci_bus_to_OF_node(bus);
-
- if (dn)
- PCI_DN(dn)->iommu_table = &iommu_table_dart;
}
static bool dart_device_on_pcie(struct device *dev)
@@ -373,7 +366,7 @@ void __init iommu_init_early_dart(void)
if (dn == NULL) {
dn = of_find_compatible_node(NULL, "dart", "u4-dart");
if (dn == NULL)
- goto bail;
+ return; /* use default direct_dma_ops */
dart_is_u4 = 1;
}
diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c
index c0ea05e..c48cd81 100644
--- a/arch/powerpc/sysdev/mpc8xxx_gpio.c
+++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c
@@ -1,5 +1,5 @@
/*
- * GPIOs on MPC8349/8572/8610 and compatible
+ * GPIOs on MPC512x/8349/8572/8610 and compatible
*
* Copyright (C) 2008 Peter Korsgaard <jacmet@sunsite.dk>
*
@@ -26,6 +26,7 @@
#define GPIO_IER 0x0c
#define GPIO_IMR 0x10
#define GPIO_ICR 0x14
+#define GPIO_ICR2 0x18
struct mpc8xxx_gpio_chip {
struct of_mm_gpio_chip mm_gc;
@@ -37,6 +38,7 @@ struct mpc8xxx_gpio_chip {
*/
u32 data;
struct irq_host *irq;
+ void *of_dev_id_data;
};
static inline u32 mpc8xxx_gpio2mask(unsigned int gpio)
@@ -215,6 +217,51 @@ static int mpc8xxx_irq_set_type(unsigned int virq, unsigned int flow_type)
return 0;
}
+static int mpc512x_irq_set_type(unsigned int virq, unsigned int flow_type)
+{
+ struct mpc8xxx_gpio_chip *mpc8xxx_gc = get_irq_chip_data(virq);
+ struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc;
+ unsigned long gpio = virq_to_hw(virq);
+ void __iomem *reg;
+ unsigned int shift;
+ unsigned long flags;
+
+ if (gpio < 16) {
+ reg = mm->regs + GPIO_ICR;
+ shift = (15 - gpio) * 2;
+ } else {
+ reg = mm->regs + GPIO_ICR2;
+ shift = (15 - (gpio % 16)) * 2;
+ }
+
+ switch (flow_type) {
+ case IRQ_TYPE_EDGE_FALLING:
+ case IRQ_TYPE_LEVEL_LOW:
+ spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
+ clrsetbits_be32(reg, 3 << shift, 2 << shift);
+ spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
+ break;
+
+ case IRQ_TYPE_EDGE_RISING:
+ case IRQ_TYPE_LEVEL_HIGH:
+ spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
+ clrsetbits_be32(reg, 3 << shift, 1 << shift);
+ spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
+ break;
+
+ case IRQ_TYPE_EDGE_BOTH:
+ spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
+ clrbits32(reg, 3 << shift);
+ spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct irq_chip mpc8xxx_irq_chip = {
.name = "mpc8xxx-gpio",
.unmask = mpc8xxx_irq_unmask,
@@ -226,6 +273,11 @@ static struct irq_chip mpc8xxx_irq_chip = {
static int mpc8xxx_gpio_irq_map(struct irq_host *h, unsigned int virq,
irq_hw_number_t hw)
{
+ struct mpc8xxx_gpio_chip *mpc8xxx_gc = h->host_data;
+
+ if (mpc8xxx_gc->of_dev_id_data)
+ mpc8xxx_irq_chip.set_type = mpc8xxx_gc->of_dev_id_data;
+
set_irq_chip_data(virq, h->host_data);
set_irq_chip_and_handler(virq, &mpc8xxx_irq_chip, handle_level_irq);
set_irq_type(virq, IRQ_TYPE_NONE);
@@ -253,11 +305,20 @@ static struct irq_host_ops mpc8xxx_gpio_irq_ops = {
.xlate = mpc8xxx_gpio_irq_xlate,
};
+static struct of_device_id mpc8xxx_gpio_ids[] __initdata = {
+ { .compatible = "fsl,mpc8349-gpio", },
+ { .compatible = "fsl,mpc8572-gpio", },
+ { .compatible = "fsl,mpc8610-gpio", },
+ { .compatible = "fsl,mpc5121-gpio", .data = mpc512x_irq_set_type, },
+ {}
+};
+
static void __init mpc8xxx_add_controller(struct device_node *np)
{
struct mpc8xxx_gpio_chip *mpc8xxx_gc;
struct of_mm_gpio_chip *mm_gc;
struct gpio_chip *gc;
+ const struct of_device_id *id;
unsigned hwirq;
int ret;
@@ -297,6 +358,10 @@ static void __init mpc8xxx_add_controller(struct device_node *np)
if (!mpc8xxx_gc->irq)
goto skip_irq;
+ id = of_match_node(mpc8xxx_gpio_ids, np);
+ if (id)
+ mpc8xxx_gc->of_dev_id_data = id->data;
+
mpc8xxx_gc->irq->host_data = mpc8xxx_gc;
/* ack and mask all irqs */
@@ -321,13 +386,7 @@ static int __init mpc8xxx_add_gpiochips(void)
{
struct device_node *np;
- for_each_compatible_node(np, NULL, "fsl,mpc8349-gpio")
- mpc8xxx_add_controller(np);
-
- for_each_compatible_node(np, NULL, "fsl,mpc8572-gpio")
- mpc8xxx_add_controller(np);
-
- for_each_compatible_node(np, NULL, "fsl,mpc8610-gpio")
+ for_each_matching_node(np, mpc8xxx_gpio_ids)
mpc8xxx_add_controller(np);
for_each_compatible_node(np, NULL, "fsl,qoriq-gpio")
diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/sysdev/ppc4xx_cpm.c
new file mode 100644
index 0000000..73b86cc
--- /dev/null
+++ b/arch/powerpc/sysdev/ppc4xx_cpm.c
@@ -0,0 +1,346 @@
+/*
+ * PowerPC 4xx Clock and Power Management
+ *
+ * Copyright (C) 2010, Applied Micro Circuits Corporation
+ * Victor Gallardo (vgallardo@apm.com)
+ *
+ * Based on arch/powerpc/platforms/44x/idle.c:
+ * Jerone Young <jyoung5@us.ibm.com>
+ * Copyright 2008 IBM Corp.
+ *
+ * Based on arch/powerpc/sysdev/fsl_pmc.c:
+ * Anton Vorontsov <avorontsov@ru.mvista.com>
+ * Copyright 2009 MontaVista Software, Inc.
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/sysfs.h>
+#include <linux/cpu.h>
+#include <linux/suspend.h>
+#include <asm/dcr.h>
+#include <asm/dcr-native.h>
+#include <asm/machdep.h>
+
+#define CPM_ER 0
+#define CPM_FR 1
+#define CPM_SR 2
+
+#define CPM_IDLE_WAIT 0
+#define CPM_IDLE_DOZE 1
+
+struct cpm {
+ dcr_host_t dcr_host;
+ unsigned int dcr_offset[3];
+ unsigned int powersave_off;
+ unsigned int unused;
+ unsigned int idle_doze;
+ unsigned int standby;
+ unsigned int suspend;
+};
+
+static struct cpm cpm;
+
+struct cpm_idle_mode {
+ unsigned int enabled;
+ const char *name;
+};
+
+static struct cpm_idle_mode idle_mode[] = {
+ [CPM_IDLE_WAIT] = { 1, "wait" }, /* default */
+ [CPM_IDLE_DOZE] = { 0, "doze" },
+};
+
+static unsigned int cpm_set(unsigned int cpm_reg, unsigned int mask)
+{
+ unsigned int value;
+
+ /* CPM controller supports 3 different types of sleep interface
+ * known as class 1, 2 and 3. For class 1 units, they are
+ * unconditionally put to sleep when the corresponding CPM bit is
+ * set. For class 2 and 3 units this is not case; if they can be
+ * put to to sleep, they will. Here we do not verify, we just
+ * set them and expect them to eventually go off when they can.
+ */
+ value = dcr_read(cpm.dcr_host, cpm.dcr_offset[cpm_reg]);
+ dcr_write(cpm.dcr_host, cpm.dcr_offset[cpm_reg], value | mask);
+
+ /* return old state, to restore later if needed */
+ return value;
+}
+
+static void cpm_idle_wait(void)
+{
+ unsigned long msr_save;
+
+ /* save off initial state */
+ msr_save = mfmsr();
+ /* sync required when CPM0_ER[CPU] is set */
+ mb();
+ /* set wait state MSR */
+ mtmsr(msr_save|MSR_WE|MSR_EE|MSR_CE|MSR_DE);
+ isync();
+ /* return to initial state */
+ mtmsr(msr_save);
+ isync();
+}
+
+static void cpm_idle_sleep(unsigned int mask)
+{
+ unsigned int er_save;
+
+ /* update CPM_ER state */
+ er_save = cpm_set(CPM_ER, mask);
+
+ /* go to wait state so that CPM0_ER[CPU] can take effect */
+ cpm_idle_wait();
+
+ /* restore CPM_ER state */
+ dcr_write(cpm.dcr_host, cpm.dcr_offset[CPM_ER], er_save);
+}
+
+static void cpm_idle_doze(void)
+{
+ cpm_idle_sleep(cpm.idle_doze);
+}
+
+static void cpm_idle_config(int mode)
+{
+ int i;
+
+ if (idle_mode[mode].enabled)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(idle_mode); i++)
+ idle_mode[i].enabled = 0;
+
+ idle_mode[mode].enabled = 1;
+}
+
+static ssize_t cpm_idle_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ char *s = buf;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(idle_mode); i++) {
+ if (idle_mode[i].enabled)
+ s += sprintf(s, "[%s] ", idle_mode[i].name);
+ else
+ s += sprintf(s, "%s ", idle_mode[i].name);
+ }
+
+ *(s-1) = '\n'; /* convert the last space to a newline */
+
+ return s - buf;
+}
+
+static ssize_t cpm_idle_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ int i;
+ char *p;
+ int len;
+
+ p = memchr(buf, '\n', n);
+ len = p ? p - buf : n;
+
+ for (i = 0; i < ARRAY_SIZE(idle_mode); i++) {
+ if (strncmp(buf, idle_mode[i].name, len) == 0) {
+ cpm_idle_config(i);
+ return n;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static struct kobj_attribute cpm_idle_attr =
+ __ATTR(idle, 0644, cpm_idle_show, cpm_idle_store);
+
+static void cpm_idle_config_sysfs(void)
+{
+ struct sys_device *sys_dev;
+ unsigned long ret;
+
+ sys_dev = get_cpu_sysdev(0);
+
+ ret = sysfs_create_file(&sys_dev->kobj,
+ &cpm_idle_attr.attr);
+ if (ret)
+ printk(KERN_WARNING
+ "cpm: failed to create idle sysfs entry\n");
+}
+
+static void cpm_idle(void)
+{
+ if (idle_mode[CPM_IDLE_DOZE].enabled)
+ cpm_idle_doze();
+ else
+ cpm_idle_wait();
+}
+
+static int cpm_suspend_valid(suspend_state_t state)
+{
+ switch (state) {
+ case PM_SUSPEND_STANDBY:
+ return !!cpm.standby;
+ case PM_SUSPEND_MEM:
+ return !!cpm.suspend;
+ default:
+ return 0;
+ }
+}
+
+static void cpm_suspend_standby(unsigned int mask)
+{
+ unsigned long tcr_save;
+
+ /* disable decrement interrupt */
+ tcr_save = mfspr(SPRN_TCR);
+ mtspr(SPRN_TCR, tcr_save & ~TCR_DIE);
+
+ /* go to sleep state */
+ cpm_idle_sleep(mask);
+
+ /* restore decrement interrupt */
+ mtspr(SPRN_TCR, tcr_save);
+}
+
+static int cpm_suspend_enter(suspend_state_t state)
+{
+ switch (state) {
+ case PM_SUSPEND_STANDBY:
+ cpm_suspend_standby(cpm.standby);
+ break;
+ case PM_SUSPEND_MEM:
+ cpm_suspend_standby(cpm.suspend);
+ break;
+ }
+
+ return 0;
+}
+
+static struct platform_suspend_ops cpm_suspend_ops = {
+ .valid = cpm_suspend_valid,
+ .enter = cpm_suspend_enter,
+};
+
+static int cpm_get_uint_property(struct device_node *np,
+ const char *name)
+{
+ int len;
+ const unsigned int *prop = of_get_property(np, name, &len);
+
+ if (prop == NULL || len < sizeof(u32))
+ return 0;
+
+ return *prop;
+}
+
+static int __init cpm_init(void)
+{
+ struct device_node *np;
+ int dcr_base, dcr_len;
+ int ret = 0;
+
+ if (!cpm.powersave_off) {
+ cpm_idle_config(CPM_IDLE_WAIT);
+ ppc_md.power_save = &cpm_idle;
+ }
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,cpm");
+ if (!np) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ dcr_base = dcr_resource_start(np, 0);
+ dcr_len = dcr_resource_len(np, 0);
+
+ if (dcr_base == 0 || dcr_len == 0) {
+ printk(KERN_ERR "cpm: could not parse dcr property for %s\n",
+ np->full_name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ cpm.dcr_host = dcr_map(np, dcr_base, dcr_len);
+
+ if (!DCR_MAP_OK(cpm.dcr_host)) {
+ printk(KERN_ERR "cpm: failed to map dcr property for %s\n",
+ np->full_name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* All 4xx SoCs with a CPM controller have one of two
+ * different order for the CPM registers. Some have the
+ * CPM registers in the following order (ER,FR,SR). The
+ * others have them in the following order (SR,ER,FR).
+ */
+
+ if (cpm_get_uint_property(np, "er-offset") == 0) {
+ cpm.dcr_offset[CPM_ER] = 0;
+ cpm.dcr_offset[CPM_FR] = 1;
+ cpm.dcr_offset[CPM_SR] = 2;
+ } else {
+ cpm.dcr_offset[CPM_ER] = 1;
+ cpm.dcr_offset[CPM_FR] = 2;
+ cpm.dcr_offset[CPM_SR] = 0;
+ }
+
+ /* Now let's see what IPs to turn off for the following modes */
+
+ cpm.unused = cpm_get_uint_property(np, "unused-units");
+ cpm.idle_doze = cpm_get_uint_property(np, "idle-doze");
+ cpm.standby = cpm_get_uint_property(np, "standby");
+ cpm.suspend = cpm_get_uint_property(np, "suspend");
+
+ /* If some IPs are unused let's turn them off now */
+
+ if (cpm.unused) {
+ cpm_set(CPM_ER, cpm.unused);
+ cpm_set(CPM_FR, cpm.unused);
+ }
+
+ /* Now let's export interfaces */
+
+ if (!cpm.powersave_off && cpm.idle_doze)
+ cpm_idle_config_sysfs();
+
+ if (cpm.standby || cpm.suspend)
+ suspend_set_ops(&cpm_suspend_ops);
+out:
+ if (np)
+ of_node_put(np);
+ return ret;
+}
+
+late_initcall(cpm_init);
+
+static int __init cpm_powersave_off(char *arg)
+{
+ cpm.powersave_off = 1;
+ return 0;
+}
+__setup("powersave=off", cpm_powersave_off);
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
index c2d675b..ee05680 100644
--- a/arch/powerpc/sysdev/tsi108_dev.c
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -84,8 +84,8 @@ static int __init tsi108_eth_of_init(void)
memset(&tsi_eth_data, 0, sizeof(tsi_eth_data));
ret = of_address_to_resource(np, 0, &r[0]);
- DBG("%s: name:start->end = %s:0x%lx-> 0x%lx\n",
- __func__,r[0].name, r[0].start, r[0].end);
+ DBG("%s: name:start->end = %s:%pR\n",
+ __func__, r[0].name, &r[0]);
if (ret)
goto err;
@@ -93,8 +93,8 @@ static int __init tsi108_eth_of_init(void)
r[1].start = irq_of_parse_and_map(np, 0);
r[1].end = irq_of_parse_and_map(np, 0);
r[1].flags = IORESOURCE_IRQ;
- DBG("%s: name:start->end = %s:0x%lx-> 0x%lx\n",
- __func__,r[1].name, r[1].start, r[1].end);
+ DBG("%s: name:start->end = %s:%pR\n",
+ __func__, r[1].name, &r[1]);
tsi_eth_dev =
platform_device_register_simple("tsi-ethernet", i++, &r[0],
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index e447938..0dc714f 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -375,5 +375,5 @@ static int __init cpu_type_probe(void)
return 0;
}
-arch_initcall(cpu_type_probe);
+early_initcall(cpu_type_probe);
#endif
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index b87873c..ae96cf5 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -168,4 +168,4 @@ out_unregister:
return err;
}
-arch_initcall(pcr_arch_init);
+early_initcall(pcr_arch_init);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 55d106b..211ca3f 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -185,17 +185,16 @@ struct bootnode;
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
-extern int acpi_get_nodes(struct bootnode *physnodes);
+extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
+ unsigned long end);
extern int acpi_scan_nodes(unsigned long start, unsigned long end);
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
+
+#ifdef CONFIG_NUMA_EMU
extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
int num_nodes);
-#else
-static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
- int num_nodes)
-{
-}
#endif
+#endif /* CONFIG_ACPI_NUMA */
#define acpi_unlazy_tlb(x) leave_mm(x)
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 6aee50d..64dc82e 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -3,16 +3,27 @@
#include <linux/pci.h>
+struct amd_nb_bus_dev_range {
+ u8 bus;
+ u8 dev_base;
+ u8 dev_limit;
+};
+
extern struct pci_device_id amd_nb_misc_ids[];
+extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
struct bootnode;
extern int early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
-extern int amd_get_nodes(struct bootnode *nodes);
extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int amd_scan_nodes(void);
+#ifdef CONFIG_NUMA_EMU
+extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
+extern void amd_get_nodes(struct bootnode *nodes);
+#endif
+
struct amd_northbridge {
struct pci_dev *misc;
};
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 0141b23..4729b2b 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -116,11 +116,11 @@ enum fixed_addresses {
#endif
FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
- __end_of_permanent_fixed_addresses,
-
#ifdef CONFIG_X86_MRST
FIX_LNW_VRTC,
#endif
+ __end_of_permanent_fixed_addresses,
+
/*
* 256 temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h
index 49dbfdf..91d915a 100644
--- a/arch/x86/include/asm/gpio.h
+++ b/arch/x86/include/asm/gpio.h
@@ -38,12 +38,9 @@ static inline int gpio_cansleep(unsigned int gpio)
return __gpio_cansleep(gpio);
}
-/*
- * Not implemented, yet.
- */
static inline int gpio_to_irq(unsigned int gpio)
{
- return -ENOSYS;
+ return __gpio_to_irq(gpio);
}
static inline int irq_to_gpio(unsigned int irq)
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f23eb25..ca242d3 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -18,7 +18,6 @@ enum die_val {
DIE_TRAP,
DIE_GPF,
DIE_CALL,
- DIE_NMI_IPI,
DIE_PAGE_FAULT,
DIE_NMIUNKNOWN,
};
diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h
index f792060..72a8b52 100644
--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -7,9 +7,19 @@
#include <asm/mc146818rtc.h>
+#define NMI_REASON_PORT 0x61
+
+#define NMI_REASON_SERR 0x80
+#define NMI_REASON_IOCHK 0x40
+#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK)
+
+#define NMI_REASON_CLEAR_SERR 0x04
+#define NMI_REASON_CLEAR_IOCHK 0x08
+#define NMI_REASON_CLEAR_MASK 0x0f
+
static inline unsigned char get_nmi_reason(void)
{
- return inb(0x61);
+ return inb(NMI_REASON_PORT);
}
static inline void reassert_nmi(void)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c4021b9..c76f5b9 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -23,6 +23,26 @@ void arch_trigger_all_cpu_backtrace(void);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
#endif
+/*
+ * Define some priorities for the nmi notifier call chain.
+ *
+ * Create a local nmi bit that has a higher priority than
+ * external nmis, because the local ones are more frequent.
+ *
+ * Also setup some default high/normal/low settings for
+ * subsystems to registers with. Using 4 bits to seperate
+ * the priorities. This can go alot higher if needed be.
+ */
+
+#define NMI_LOCAL_SHIFT 16 /* randomly picked */
+#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT)
+#define NMI_HIGH_PRIOR (1ULL << 8)
+#define NMI_NORMAL_PRIOR (1ULL << 4)
+#define NMI_LOW_PRIOR (1ULL << 0)
+#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
+#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
+#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR)
+
void stop_nmi(void);
void restart_nmi(void);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 823e070..5ae8728 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -38,7 +38,7 @@ extern void __cpuinit numa_add_cpu(int cpu);
extern void __cpuinit numa_remove_cpu(int cpu);
#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE ((u64)64 << 20)
+#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
#endif /* CONFIG_NUMA_EMU */
#else
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 295e2ff..e2f6a99 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -20,6 +20,9 @@
#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
#define ARCH_P4_MAX_CCCR (18)
+#define ARCH_P4_CNTRVAL_BITS (40)
+#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
+
#define P4_ESCR_EVENT_MASK 0x7e000000U
#define P4_ESCR_EVENT_SHIFT 25
#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index affacb5..0a99f71 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -20,6 +20,13 @@ struct pci_device_id amd_nb_misc_ids[] = {
};
EXPORT_SYMBOL(amd_nb_misc_ids);
+const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
+ { 0x00, 0x18, 0x20 },
+ { 0xff, 0x00, 0x20 },
+ { 0xfe, 0x00, 0x20 },
+ { }
+};
+
struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index dcd7c83..5955a78 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -39,18 +39,6 @@ int fallback_aper_force __initdata;
int fix_aperture __initdata = 1;
-struct bus_dev_range {
- int bus;
- int dev_base;
- int dev_limit;
-};
-
-static struct bus_dev_range bus_dev_ranges[] __initdata = {
- { 0x00, 0x18, 0x20},
- { 0xff, 0x00, 0x20},
- { 0xfe, 0x00, 0x20}
-};
-
static struct resource gart_resource = {
.name = "GART",
.flags = IORESOURCE_MEM,
@@ -294,13 +282,13 @@ void __init early_gart_iommu_check(void)
search_agp_bridge(&agp_aper_order, &valid_agp);
fix = 0;
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus;
int dev_base, dev_limit;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
@@ -349,13 +337,13 @@ void __init early_gart_iommu_check(void)
return;
/* disable them all at first */
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus;
int dev_base, dev_limit;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
@@ -390,14 +378,14 @@ int __init gart_iommu_hole_init(void)
fix = 0;
node = 0;
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus;
int dev_base, dev_limit;
u32 ctl;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
@@ -505,7 +493,7 @@ out:
}
/* Fix up the north bridges */
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus, dev_base, dev_limit;
/*
@@ -514,9 +502,9 @@ out:
*/
u32 ctl = DISTLBWALKPRB | aper_order << 1;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a51345b..06c196d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -684,7 +684,7 @@ static int __init calibrate_APIC_clock(void)
lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
lapic_clockevent.shift);
lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
lapic_clockevent.min_delta_ns =
clockevent_delta2ns(0xF, &lapic_clockevent);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 72ec29e..79fd43c 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -68,7 +68,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
switch (cmd) {
case DIE_NMI:
- case DIE_NMI_IPI:
break;
default:
@@ -96,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
static __read_mostly struct notifier_block backtrace_notifier = {
.notifier_call = arch_trigger_all_cpu_backtrace_handler,
.next = NULL,
- .priority = 1
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
static int __init register_trigger_all_cpu_backtrace(void)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index ecca5f4..bd16b58 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -378,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = {
static __cpuinit void set_x2apic_extra_bits(int pnode)
{
- __this_cpu_write(x2apic_extra_bits, (pnode << 6));
+ __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
}
/*
@@ -641,7 +641,7 @@ void __cpuinit uv_cpu_init(void)
*/
int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
{
- if (reason != DIE_NMI_IPI)
+ if (reason != DIE_NMIUNKNOWN)
return NOTIFY_OK;
if (in_crash_kexec)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7..a779719 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,6 +25,7 @@
#include <linux/gfp.h>
#include <asm/mce.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
struct mce *m = &__get_cpu_var(injectm);
- if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
+ if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpumask_clear_cpu(cpu, mce_inject_cpumask);
if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
static struct notifier_block mce_raise_nb = {
.notifier_call = mce_raise_notify,
- .priority = 1000,
+ .priority = NMI_LOCAL_NORMAL_PRIOR,
};
/* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0492101..9d977a2 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1267,7 +1267,6 @@ perf_event_nmi_handler(struct notifier_block *self,
switch (cmd) {
case DIE_NMI:
- case DIE_NMI_IPI:
break;
case DIE_NMIUNKNOWN:
this_nmi = percpu_read(irq_stat.__nmi_count);
@@ -1317,7 +1316,7 @@ perf_event_nmi_handler(struct notifier_block *self,
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
.notifier_call = perf_event_nmi_handler,
.next = NULL,
- .priority = 1
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
static struct event_constraint unconstrained;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 81400b9..e56b9bf 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -753,19 +753,21 @@ out:
static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
{
- int overflow = 0;
- u32 low, high;
+ u64 v;
- rdmsr(hwc->config_base + hwc->idx, low, high);
-
- /* we need to check high bit for unflagged overflows */
- if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) {
- overflow = 1;
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
- ((u64)low) & ~P4_CCCR_OVF);
+ /* an official way for overflow indication */
+ rdmsrl(hwc->config_base + hwc->idx, v);
+ if (v & P4_CCCR_OVF) {
+ wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
+ return 1;
}
- return overflow;
+ /* it might be unflagged overflow */
+ rdmsrl(hwc->event_base + hwc->idx, v);
+ if (!(v & ARCH_P4_CNTRVAL_MASK))
+ return 1;
+
+ return 0;
}
static void p4_pmu_disable_pebs(void)
@@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = {
*/
.num_counters = ARCH_P4_MAX_CCCR,
.apic = 1,
- .cntval_bits = 40,
- .cntval_mask = (1ULL << 40) - 1,
- .max_period = (1ULL << 39) - 1,
+ .cntval_bits = ARCH_P4_CNTRVAL_BITS,
+ .cntval_mask = ARCH_P4_CNTRVAL_MASK,
+ .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
.hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events,
/*
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 8474c99..d6fb146 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -197,14 +197,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
*/
void dump_stack(void)
{
- unsigned long bp = 0;
unsigned long stack;
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- get_bp(bp);
-#endif
-
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e3ba417..d3b895f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64)
ENTRY(save_args)
XCPT_FRAME
cld
- movq_cfi rdi, RDI+16-ARGOFFSET
- movq_cfi rsi, RSI+16-ARGOFFSET
- movq_cfi rdx, RDX+16-ARGOFFSET
- movq_cfi rcx, RCX+16-ARGOFFSET
- movq_cfi rax, RAX+16-ARGOFFSET
- movq_cfi r8, R8+16-ARGOFFSET
- movq_cfi r9, R9+16-ARGOFFSET
- movq_cfi r10, R10+16-ARGOFFSET
- movq_cfi r11, R11+16-ARGOFFSET
-
- leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
+ /*
+ * start from rbp in pt_regs and jump over
+ * return address.
+ */
+ movq_cfi rdi, RDI+8-RBP
+ movq_cfi rsi, RSI+8-RBP
+ movq_cfi rdx, RDX+8-RBP
+ movq_cfi rcx, RCX+8-RBP
+ movq_cfi rax, RAX+8-RBP
+ movq_cfi r8, R8+8-RBP
+ movq_cfi r9, R9+8-RBP
+ movq_cfi r10, R10+8-RBP
+ movq_cfi r11, R11+8-RBP
+
+ leaq -RBP+8(%rsp),%rdi /* arg1 for handler */
movq_cfi rbp, 8 /* push %rbp */
leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
testl $3, CS(%rdi)
@@ -782,8 +786,9 @@ END(interrupt)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
- subq $ORIG_RAX-ARGOFFSET+8, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8
+ /* reserve pt_regs for scratch regs and rbp */
+ subq $ORIG_RAX-RBP, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
call save_args
PARTIAL_FRAME 0
call \func
@@ -808,9 +813,14 @@ ret_from_intr:
TRACE_IRQS_OFF
decl PER_CPU_VAR(irq_count)
leaveq
+
CFI_RESTORE rbp
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
+
+ /* we did not save rbx, restore only from ARGOFFSET */
+ addq $8, %rsp
+ CFI_ADJUST_CFA_OFFSET -8
exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index cd21b65..a413000 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -48,6 +48,7 @@
#include <asm/apicdef.h>
#include <asm/system.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
{
@@ -525,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
}
return NOTIFY_DONE;
- case DIE_NMI_IPI:
- /* Just ignore, we will handle the roundup on DIE_NMI. */
- return NOTIFY_DONE;
-
case DIE_NMIUNKNOWN:
if (was_in_debug_nmi[raw_smp_processor_id()]) {
was_in_debug_nmi[raw_smp_processor_id()] = 0;
@@ -606,7 +603,7 @@ static struct notifier_block kgdb_notifier = {
/*
* Lowest-prio notifier priority, we want to be notified last:
*/
- .priority = -INT_MAX,
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
/**
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c495aa8d..fc7aae1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -18,6 +18,7 @@
#include <asm/pci_x86.h>
#include <asm/virtext.h>
#include <asm/cpu.h>
+#include <asm/nmi.h>
#ifdef CONFIG_X86_32
# include <linux/ctype.h>
@@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self,
{
int cpu;
- if (val != DIE_NMI_IPI)
+ if (val != DIE_NMI)
return NOTIFY_OK;
cpu = raw_smp_processor_id();
@@ -778,6 +779,8 @@ static void smp_send_nmi_allbutself(void)
static struct notifier_block crash_nmi_nb = {
.notifier_call = crash_nmi_callback,
+ /* we want to be the first one called */
+ .priority = NMI_LOCAL_HIGH_PRIOR+1,
};
/* Halt all other CPUs, calling the specified function on each of them
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c7149c9..763df77 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -97,12 +97,12 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
*/
static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
-void cpu_hotplug_driver_lock()
+void cpu_hotplug_driver_lock(void)
{
mutex_lock(&x86_cpu_hotplug_driver_mutex);
}
-void cpu_hotplug_driver_unlock()
+void cpu_hotplug_driver_unlock(void)
{
mutex_unlock(&x86_cpu_hotplug_driver_mutex);
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c76aaca..b9b6716 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -84,6 +84,11 @@ EXPORT_SYMBOL_GPL(used_vectors);
static int ignore_nmis;
int unknown_nmi_panic;
+/*
+ * Prevent NMI reason port (0x61) being accessed simultaneously, can
+ * only be used in NMI handler.
+ */
+static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
static inline void conditional_sti(struct pt_regs *regs)
{
@@ -310,15 +315,15 @@ static int __init setup_unknown_nmi_panic(char *str)
__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
{
- printk(KERN_EMERG
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
-
- printk(KERN_EMERG
- "You have some hardware problem, likely on the PCI bus.\n");
+ pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
+ /*
+ * On some machines, PCI SERR line is used to report memory
+ * errors. EDAC makes use of it.
+ */
#if defined(CONFIG_EDAC)
if (edac_handler_set()) {
edac_atomic_assert_error();
@@ -329,11 +334,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ pr_emerg("Dazed and confused, but trying to continue\n");
- /* Clear and disable the memory parity error line. */
- reason = (reason & 0xf) | 4;
- outb(reason, 0x61);
+ /* Clear and disable the PCI SERR error line. */
+ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
+ outb(reason, NMI_REASON_PORT);
}
static notrace __kprobes void
@@ -341,15 +346,17 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
{
unsigned long i;
- printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+ pr_emerg(
+ "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
show_registers(regs);
if (panic_on_io_nmi)
panic("NMI IOCK error: Not continuing");
/* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
+ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+ outb(reason, NMI_REASON_PORT);
i = 20000;
while (--i) {
@@ -357,8 +364,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
udelay(100);
}
- reason &= ~8;
- outb(reason, 0x61);
+ reason &= ~NMI_REASON_CLEAR_IOCHK;
+ outb(reason, NMI_REASON_PORT);
}
static notrace __kprobes void
@@ -377,57 +384,50 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
return;
}
#endif
- printk(KERN_EMERG
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
+ pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+ pr_emerg("Do you have a strange power saving mode enabled?\n");
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ pr_emerg("Dazed and confused, but trying to continue\n");
}
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
- int cpu;
- cpu = smp_processor_id();
-
- /* Only the BSP gets external NMIs from the system. */
- if (!cpu)
- reason = get_nmi_reason();
+ /*
+ * CPU-specific NMI must be processed before non-CPU-specific
+ * NMI, otherwise we may lose it, because the CPU-specific
+ * NMI can not be detected/processed on other CPUs.
+ */
+ if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+ return;
- if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
+ /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
+ raw_spin_lock(&nmi_reason_lock);
+ reason = get_nmi_reason();
-#ifdef CONFIG_X86_LOCAL_APIC
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
+ if (reason & NMI_REASON_MASK) {
+ if (reason & NMI_REASON_SERR)
+ pci_serr_error(reason, regs);
+ else if (reason & NMI_REASON_IOCHK)
+ io_check_error(reason, regs);
+#ifdef CONFIG_X86_32
+ /*
+ * Reassert NMI in case it became active
+ * meanwhile as it's edge-triggered:
+ */
+ reassert_nmi();
#endif
- unknown_nmi_error(reason, regs);
-
+ raw_spin_unlock(&nmi_reason_lock);
return;
}
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
- return;
+ raw_spin_unlock(&nmi_reason_lock);
- /* AK: following checks seem to be broken on modern chipsets. FIXME */
- if (reason & 0x80)
- mem_parity_error(reason, regs);
- if (reason & 0x40)
- io_check_error(reason, regs);
-#ifdef CONFIG_X86_32
- /*
- * Reassert NMI in case it became active meanwhile
- * as it's edge-triggered:
- */
- reassert_nmi();
-#endif
+ unknown_nmi_error(reason, regs);
}
dotraplinkage notrace __kprobes void
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 03d2ea8..823f79a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -965,7 +965,7 @@ out:
static int __init init_tsc_clocksource(void)
{
- if (!cpu_has_tsc || tsc_disabled > 0)
+ if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
return 0;
if (tsc_clocksource_reliable)
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index 08a0069..f21962c 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -27,6 +27,7 @@
#include <asm/amd_nb.h>
static struct bootnode __initdata nodes[8];
+static unsigned char __initdata nodeids[8];
static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
static __init int find_northbridge(void)
@@ -68,19 +69,6 @@ static __init void early_get_boot_cpu_id(void)
#endif
}
-int __init amd_get_nodes(struct bootnode *physnodes)
-{
- int i;
- int ret = 0;
-
- for_each_node_mask(i, nodes_parsed) {
- physnodes[ret].start = nodes[i].start;
- physnodes[ret].end = nodes[i].end;
- ret++;
- }
- return ret;
-}
-
int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long start = PFN_PHYS(start_pfn);
@@ -113,7 +101,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
- nodeid = limit & 7;
+ nodeids[i] = nodeid = limit & 7;
if ((base & 3) == 0) {
if (i < numnodes)
pr_info("Skipping disabled node %d\n", i);
@@ -193,6 +181,76 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
return 0;
}
+#ifdef CONFIG_NUMA_EMU
+static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
+void __init amd_get_nodes(struct bootnode *physnodes)
+{
+ int i;
+
+ for_each_node_mask(i, nodes_parsed) {
+ physnodes[i].start = nodes[i].start;
+ physnodes[i].end = nodes[i].end;
+ }
+}
+
+static int __init find_node_by_addr(unsigned long addr)
+{
+ int ret = NUMA_NO_NODE;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ if (addr >= nodes[i].start && addr < nodes[i].end) {
+ ret = i;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
+ * setup to represent the physical topology but reflect the emulated
+ * environment. For each emulated node, the real node which it appears on is
+ * found and a fake pxm to nid mapping is created which mirrors the actual
+ * locality. node_distance() then represents the correct distances between
+ * emulated nodes by using the fake acpi mappings to pxms.
+ */
+void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
+{
+ unsigned int bits;
+ unsigned int cores;
+ unsigned int apicid_base = 0;
+ int i;
+
+ bits = boot_cpu_data.x86_coreid_bits;
+ cores = 1 << bits;
+ early_get_boot_cpu_id();
+ if (boot_cpu_physical_apicid > 0)
+ apicid_base = boot_cpu_physical_apicid;
+
+ for (i = 0; i < nr_nodes; i++) {
+ int index;
+ int nid;
+ int j;
+
+ nid = find_node_by_addr(nodes[i].start);
+ if (nid == NUMA_NO_NODE)
+ continue;
+
+ index = nodeids[nid] << bits;
+ if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
+ for (j = apicid_base; j < cores + apicid_base; j++)
+ fake_apicid_to_node[index + j] = i;
+#ifdef CONFIG_ACPI_NUMA
+ __acpi_map_pxm_to_node(nid, i);
+#endif
+ }
+ memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+}
+#endif /* CONFIG_NUMA_EMU */
+
int __init amd_scan_nodes(void)
{
unsigned int bits;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7762a51..1e72102 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -260,30 +260,30 @@ void __init numa_init_array(void)
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode physnodes[MAX_NUMNODES] __initdata;
+static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
static char *cmdline __initdata;
static int __init setup_physnodes(unsigned long start, unsigned long end,
int acpi, int amd)
{
- int nr_nodes = 0;
int ret = 0;
int i;
+ memset(physnodes, 0, sizeof(physnodes));
#ifdef CONFIG_ACPI_NUMA
if (acpi)
- nr_nodes = acpi_get_nodes(physnodes);
+ acpi_get_nodes(physnodes, start, end);
#endif
#ifdef CONFIG_AMD_NUMA
if (amd)
- nr_nodes = amd_get_nodes(physnodes);
+ amd_get_nodes(physnodes);
#endif
/*
* Basic sanity checking on the physical node map: there may be errors
* if the SRAT or AMD code incorrectly reported the topology or the mem=
* kernel parameter is used.
*/
- for (i = 0; i < nr_nodes; i++) {
+ for (i = 0; i < MAX_NUMNODES; i++) {
if (physnodes[i].start == physnodes[i].end)
continue;
if (physnodes[i].start > end) {
@@ -298,17 +298,6 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
physnodes[i].start = start;
if (physnodes[i].end > end)
physnodes[i].end = end;
- }
-
- /*
- * Remove all nodes that have no memory or were truncated because of the
- * limited address range.
- */
- for (i = 0; i < nr_nodes; i++) {
- if (physnodes[i].start == physnodes[i].end)
- continue;
- physnodes[ret].start = physnodes[i].start;
- physnodes[ret].end = physnodes[i].end;
ret++;
}
@@ -324,6 +313,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
return ret;
}
+static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
+{
+ int i;
+
+ BUG_ON(acpi && amd);
+#ifdef CONFIG_ACPI_NUMA
+ if (acpi)
+ acpi_fake_nodes(nodes, nr_nodes);
+#endif
+#ifdef CONFIG_AMD_NUMA
+ if (amd)
+ amd_fake_nodes(nodes, nr_nodes);
+#endif
+ if (!acpi && !amd)
+ for (i = 0; i < nr_cpu_ids; i++)
+ numa_set_node(i, 0);
+}
+
/*
* Setups up nid to range from addr to addr + size. If the end
* boundary is greater than max_addr, then max_addr is used instead.
@@ -352,8 +359,7 @@ static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
* Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
* to max_addr. The return value is the number of nodes allocated.
*/
-static int __init split_nodes_interleave(u64 addr, u64 max_addr,
- int nr_phys_nodes, int nr_nodes)
+static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
{
nodemask_t physnode_mask = NODE_MASK_NONE;
u64 size;
@@ -384,7 +390,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
return -1;
}
- for (i = 0; i < nr_phys_nodes; i++)
+ for (i = 0; i < MAX_NUMNODES; i++)
if (physnodes[i].start != physnodes[i].end)
node_set(i, physnode_mask);
@@ -553,11 +559,9 @@ static int __init numa_emulation(unsigned long start_pfn,
{
u64 addr = start_pfn << PAGE_SHIFT;
u64 max_addr = last_pfn << PAGE_SHIFT;
- int num_phys_nodes;
int num_nodes;
int i;
- num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);
/*
* If the numa=fake command-line contains a 'M' or 'G', it represents
* the fixed node size. Otherwise, if it is just a single number N,
@@ -572,7 +576,7 @@ static int __init numa_emulation(unsigned long start_pfn,
unsigned long n;
n = simple_strtoul(cmdline, NULL, 0);
- num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
+ num_nodes = split_nodes_interleave(addr, max_addr, n);
}
if (num_nodes < 0)
@@ -595,7 +599,8 @@ static int __init numa_emulation(unsigned long start_pfn,
nodes[i].end >> PAGE_SHIFT);
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
- acpi_fake_nodes(nodes, num_nodes);
+ setup_physnodes(addr, max_addr, acpi, amd);
+ fake_physnodes(acpi, amd, num_nodes);
numa_init_array();
return 0;
}
@@ -610,8 +615,12 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
nodes_clear(node_online_map);
#ifdef CONFIG_NUMA_EMU
+ setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
+ acpi, amd);
if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
return;
+ setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
+ acpi, amd);
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
#endif
@@ -767,6 +776,7 @@ void __cpuinit numa_clear_node(int cpu)
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+#ifndef CONFIG_NUMA_EMU
void __cpuinit numa_add_cpu(int cpu)
{
cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
@@ -776,34 +786,115 @@ void __cpuinit numa_remove_cpu(int cpu)
{
cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
+#else
+void __cpuinit numa_add_cpu(int cpu)
+{
+ unsigned long addr;
+ u16 apicid;
+ int physnid;
+ int nid = NUMA_NO_NODE;
+
+ apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+ if (apicid != BAD_APICID)
+ nid = apicid_to_node[apicid];
+ if (nid == NUMA_NO_NODE)
+ nid = early_cpu_to_node(cpu);
+ BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
+
+ /*
+ * Use the starting address of the emulated node to find which physical
+ * node it is allocated on.
+ */
+ addr = node_start_pfn(nid) << PAGE_SHIFT;
+ for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
+ if (addr >= physnodes[physnid].start &&
+ addr < physnodes[physnid].end)
+ break;
+
+ /*
+ * Map the cpu to each emulated node that is allocated on the physical
+ * node of the cpu's apic id.
+ */
+ for_each_online_node(nid) {
+ addr = node_start_pfn(nid) << PAGE_SHIFT;
+ if (addr >= physnodes[physnid].start &&
+ addr < physnodes[physnid].end)
+ cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+ }
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ int i;
+
+ for_each_online_node(i)
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
+}
+#endif /* !CONFIG_NUMA_EMU */
#else /* CONFIG_DEBUG_PER_CPU_MAPS */
+static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ struct cpumask *mask;
+ char buf[64];
+
+ mask = node_to_cpumask_map[node];
+ if (!mask) {
+ pr_err("node_to_cpumask_map[%i] NULL\n", node);
+ dump_stack();
+ return NULL;
+ }
+
+ cpulist_scnprintf(buf, sizeof(buf), mask);
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ enable ? "numa_add_cpu" : "numa_remove_cpu",
+ cpu, node, buf);
+ return mask;
+}
/*
* --------- debug versions of the numa functions ---------
*/
+#ifndef CONFIG_NUMA_EMU
static void __cpuinit numa_set_cpumask(int cpu, int enable)
{
- int node = early_cpu_to_node(cpu);
struct cpumask *mask;
- char buf[64];
- mask = node_to_cpumask_map[node];
- if (mask == NULL) {
- printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
- dump_stack();
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
return;
- }
if (enable)
cpumask_set_cpu(cpu, mask);
else
cpumask_clear_cpu(cpu, mask);
+}
+#else
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ struct cpumask *mask;
+ int i;
- cpulist_scnprintf(buf, sizeof(buf), mask);
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
- enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
+ for_each_online_node(i) {
+ unsigned long addr;
+
+ addr = node_start_pfn(i) << PAGE_SHIFT;
+ if (addr < physnodes[node].start ||
+ addr >= physnodes[node].end)
+ continue;
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
+ return;
+
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+ }
}
+#endif /* CONFIG_NUMA_EMU */
void __cpuinit numa_add_cpu(int cpu)
{
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 171a0aa..603d285 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -349,18 +349,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
void __init acpi_numa_arch_fixup(void) {}
-int __init acpi_get_nodes(struct bootnode *physnodes)
+#ifdef CONFIG_NUMA_EMU
+void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
+ unsigned long end)
{
int i;
- int ret = 0;
for_each_node_mask(i, nodes_parsed) {
- physnodes[ret].start = nodes[i].start;
- physnodes[ret].end = nodes[i].end;
- ret++;
+ cutoff_node(i, start, end);
+ physnodes[i].start = nodes[i].start;
+ physnodes[i].end = nodes[i].end;
}
- return ret;
}
+#endif /* CONFIG_NUMA_EMU */
/* Use the information discovered above to actually set up the nodes. */
int __init acpi_scan_nodes(unsigned long start, unsigned long end)
@@ -505,8 +506,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
{
int i, j;
- printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
- "topology.\n");
for (i = 0; i < num_nodes; i++) {
int nid, pxm;
@@ -526,6 +525,17 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
fake_apicid_to_node[j] == NUMA_NO_NODE)
fake_apicid_to_node[j] = i;
}
+
+ /*
+ * If there are apicid-to-node mappings for physical nodes that do not
+ * have a corresponding emulated node, it should default to a guaranteed
+ * value.
+ */
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ if (apicid_to_node[i] != NUMA_NO_NODE &&
+ fake_apicid_to_node[i] == NUMA_NO_NODE)
+ fake_apicid_to_node[i] = 0;
+
for (i = 0; i < num_nodes; i++)
__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f24a853..e2b7b0c 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -65,7 +65,6 @@ static int profile_exceptions_notify(struct notifier_block *self,
switch (val) {
case DIE_NMI:
- case DIE_NMI_IPI:
if (ctr_running)
model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
else if (!nmi_enabled)
@@ -361,7 +360,7 @@ static void nmi_cpu_setup(void *dummy)
static struct notifier_block profile_exceptions_nb = {
.notifier_call = profile_exceptions_notify,
.next = NULL,
- .priority = 2
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
static void nmi_cpu_restore_registers(struct op_msrs *msrs)
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index 0636dd9..720bf5a 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
static struct notifier_block profile_timer_exceptions_nb = {
.notifier_call = profile_timer_exceptions_notify,
.next = NULL,
- .priority = 0
+ .priority = NMI_LOW_PRIOR,
};
static int timer_start(void)
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index fc1e8fe..e27dffb 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -4,6 +4,7 @@
#include <linux/cpu.h>
#include <linux/range.h>
+#include <asm/amd_nb.h>
#include <asm/pci_x86.h>
#include <asm/pci-direct.h>
@@ -378,6 +379,34 @@ static struct notifier_block __cpuinitdata amd_cpu_notifier = {
.notifier_call = amd_cpu_notify,
};
+static void __init pci_enable_pci_io_ecs(void)
+{
+#ifdef CONFIG_AMD_NB
+ unsigned int i, n;
+
+ for (n = i = 0; !n && amd_nb_bus_dev_ranges[i].dev_limit; ++i) {
+ u8 bus = amd_nb_bus_dev_ranges[i].bus;
+ u8 slot = amd_nb_bus_dev_ranges[i].dev_base;
+ u8 limit = amd_nb_bus_dev_ranges[i].dev_limit;
+
+ for (; slot < limit; ++slot) {
+ u32 val = read_pci_config(bus, slot, 3, 0);
+
+ if (!early_is_amd_nb(val))
+ continue;
+
+ val = read_pci_config(bus, slot, 3, 0x8c);
+ if (!(val & (ENABLE_CF8_EXT_CFG >> 32))) {
+ val |= ENABLE_CF8_EXT_CFG >> 32;
+ write_pci_config(bus, slot, 3, 0x8c, val);
+ }
+ ++n;
+ }
+ }
+ pr_info("Extended Config Space enabled on %u nodes\n", n);
+#endif
+}
+
static int __init pci_io_ecs_init(void)
{
int cpu;
@@ -386,6 +415,10 @@ static int __init pci_io_ecs_init(void)
if (boot_cpu_data.x86 < 0x10)
return 0;
+ /* Try the PCI method first. */
+ if (early_pci_allowed())
+ pci_enable_pci_io_ecs();
+
register_cpu_notifier(&amd_cpu_notifier);
for_each_online_cpu(cpu)
amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index ffe9b65..9f47e86 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -1926,8 +1926,9 @@ static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
const struct firmware *fw;
unsigned long start_address;
const struct ihex_binrec *rec;
+ const char *errmsg = 0;
int res;
-
+
res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev);
if (res) {
PRINTK (KERN_ERR, "Cannot load microcode data");
@@ -1937,8 +1938,8 @@ static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
/* First record contains just the start address */
rec = (const struct ihex_binrec *)fw->data;
if (be16_to_cpu(rec->len) != sizeof(__be32) || be32_to_cpu(rec->addr)) {
- PRINTK (KERN_ERR, "Bad microcode data (no start record)");
- return -EINVAL;
+ errmsg = "no start record";
+ goto fail;
}
start_address = be32_to_cpup((__be32 *)rec->data);
@@ -1950,12 +1951,12 @@ static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
PRINTD (DBG_LOAD, "starting region (%x, %u)", be32_to_cpu(rec->addr),
be16_to_cpu(rec->len));
if (be16_to_cpu(rec->len) > 4 * MAX_TRANSFER_DATA) {
- PRINTK (KERN_ERR, "Bad microcode data (record too long)");
- return -EINVAL;
+ errmsg = "record too long";
+ goto fail;
}
if (be16_to_cpu(rec->len) & 3) {
- PRINTK (KERN_ERR, "Bad microcode data (odd number of bytes)");
- return -EINVAL;
+ errmsg = "odd number of bytes";
+ goto fail;
}
res = loader_write(lb, dev, rec);
if (res)
@@ -1970,6 +1971,10 @@ static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
res = loader_start(lb, dev, start_address);
return res;
+fail:
+ release_firmware(fw);
+ PRINTK(KERN_ERR, "Bad microcode data (%s)", errmsg);
+ return -EINVAL;
}
/********** give adapter parameters **********/
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c
index 27370e9..5e2f52b 100644
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -39,7 +39,7 @@
#include "hvc_console.h"
-char hvc_driver_name[] = "hvc_console";
+static const char hvc_driver_name[] = "hvc_console";
static struct vio_device_id hvc_driver_table[] __devinitdata = {
{"serial", "hvterm1"},
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index f4d334f..320668f 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -1081,7 +1081,7 @@ ipmi_nmi(struct notifier_block *self, unsigned long val, void *data)
{
struct die_args *args = data;
- if (val != DIE_NMI)
+ if (val != DIE_NMIUNKNOWN)
return NOTIFY_OK;
/* Hack, if it's a memory or I/O error, ignore it. */
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 6ee2359..ef13873 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -109,7 +109,7 @@ config FSL_DMA
config MPC512X_DMA
tristate "Freescale MPC512x built-in DMA engine support"
- depends on PPC_MPC512x
+ depends on PPC_MPC512x || PPC_MPC831x
select DMA_ENGINE
---help---
Enable support for the Freescale MPC512x built-in DMA engine.
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 4e9cbf3..59c2701 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -1,6 +1,7 @@
/*
* Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
* Copyright (C) Semihalf 2009
+ * Copyright (C) Ilya Yanok, Emcraft Systems 2010
*
* Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
* (defines, structures and comments) was taken from MPC5121 DMA driver
@@ -70,6 +71,8 @@
#define MPC_DMA_DMAES_SBE (1 << 1)
#define MPC_DMA_DMAES_DBE (1 << 0)
+#define MPC_DMA_DMAGPOR_SNOOP_ENABLE (1 << 6)
+
#define MPC_DMA_TSIZE_1 0x00
#define MPC_DMA_TSIZE_2 0x01
#define MPC_DMA_TSIZE_4 0x02
@@ -104,7 +107,10 @@ struct __attribute__ ((__packed__)) mpc_dma_regs {
/* 0x30 */
u32 dmahrsh; /* DMA hw request status high(ch63~32) */
u32 dmahrsl; /* DMA hardware request status low(ch31~0) */
- u32 dmaihsa; /* DMA interrupt high select AXE(ch63~32) */
+ union {
+ u32 dmaihsa; /* DMA interrupt high select AXE(ch63~32) */
+ u32 dmagpor; /* (General purpose register on MPC8308) */
+ };
u32 dmailsa; /* DMA interrupt low select AXE(ch31~0) */
/* 0x40 ~ 0xff */
u32 reserve0[48]; /* Reserved */
@@ -195,7 +201,9 @@ struct mpc_dma {
struct mpc_dma_regs __iomem *regs;
struct mpc_dma_tcd __iomem *tcd;
int irq;
+ int irq2;
uint error_status;
+ int is_mpc8308;
/* Lock for error_status field in this structure */
spinlock_t error_status_lock;
@@ -252,11 +260,13 @@ static void mpc_dma_execute(struct mpc_dma_chan *mchan)
prev = mdesc;
}
- prev->tcd->start = 0;
prev->tcd->int_maj = 1;
/* Send first descriptor in chain into hardware */
memcpy_toio(&mdma->tcd[cid], first->tcd, sizeof(struct mpc_dma_tcd));
+
+ if (first != prev)
+ mdma->tcd[cid].e_sg = 1;
out_8(&mdma->regs->dmassrt, cid);
}
@@ -274,6 +284,9 @@ static void mpc_dma_irq_process(struct mpc_dma *mdma, u32 is, u32 es, int off)
spin_lock(&mchan->lock);
+ out_8(&mdma->regs->dmacint, ch + off);
+ out_8(&mdma->regs->dmacerr, ch + off);
+
/* Check error status */
if (es & (1 << ch))
list_for_each_entry(mdesc, &mchan->active, node)
@@ -302,36 +315,68 @@ static irqreturn_t mpc_dma_irq(int irq, void *data)
spin_unlock(&mdma->error_status_lock);
/* Handle interrupt on each channel */
- mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmainth),
+ if (mdma->dma.chancnt > 32) {
+ mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmainth),
in_be32(&mdma->regs->dmaerrh), 32);
+ }
mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmaintl),
in_be32(&mdma->regs->dmaerrl), 0);
- /* Ack interrupt on all channels */
- out_be32(&mdma->regs->dmainth, 0xFFFFFFFF);
- out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF);
- out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF);
- out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF);
-
/* Schedule tasklet */
tasklet_schedule(&mdma->tasklet);
return IRQ_HANDLED;
}
-/* DMA Tasklet */
-static void mpc_dma_tasklet(unsigned long data)
+/* proccess completed descriptors */
+static void mpc_dma_process_completed(struct mpc_dma *mdma)
{
- struct mpc_dma *mdma = (void *)data;
dma_cookie_t last_cookie = 0;
struct mpc_dma_chan *mchan;
struct mpc_dma_desc *mdesc;
struct dma_async_tx_descriptor *desc;
unsigned long flags;
LIST_HEAD(list);
- uint es;
int i;
+ for (i = 0; i < mdma->dma.chancnt; i++) {
+ mchan = &mdma->channels[i];
+
+ /* Get all completed descriptors */
+ spin_lock_irqsave(&mchan->lock, flags);
+ if (!list_empty(&mchan->completed))
+ list_splice_tail_init(&mchan->completed, &list);
+ spin_unlock_irqrestore(&mchan->lock, flags);
+
+ if (list_empty(&list))
+ continue;
+
+ /* Execute callbacks and run dependencies */
+ list_for_each_entry(mdesc, &list, node) {
+ desc = &mdesc->desc;
+
+ if (desc->callback)
+ desc->callback(desc->callback_param);
+
+ last_cookie = desc->cookie;
+ dma_run_dependencies(desc);
+ }
+
+ /* Free descriptors */
+ spin_lock_irqsave(&mchan->lock, flags);
+ list_splice_tail_init(&list, &mchan->free);
+ mchan->completed_cookie = last_cookie;
+ spin_unlock_irqrestore(&mchan->lock, flags);
+ }
+}
+
+/* DMA Tasklet */
+static void mpc_dma_tasklet(unsigned long data)
+{
+ struct mpc_dma *mdma = (void *)data;
+ unsigned long flags;
+ uint es;
+
spin_lock_irqsave(&mdma->error_status_lock, flags);
es = mdma->error_status;
mdma->error_status = 0;
@@ -370,35 +415,7 @@ static void mpc_dma_tasklet(unsigned long data)
dev_err(mdma->dma.dev, "- Destination Bus Error\n");
}
- for (i = 0; i < mdma->dma.chancnt; i++) {
- mchan = &mdma->channels[i];
-
- /* Get all completed descriptors */
- spin_lock_irqsave(&mchan->lock, flags);
- if (!list_empty(&mchan->completed))
- list_splice_tail_init(&mchan->completed, &list);
- spin_unlock_irqrestore(&mchan->lock, flags);
-
- if (list_empty(&list))
- continue;
-
- /* Execute callbacks and run dependencies */
- list_for_each_entry(mdesc, &list, node) {
- desc = &mdesc->desc;
-
- if (desc->callback)
- desc->callback(desc->callback_param);
-
- last_cookie = desc->cookie;
- dma_run_dependencies(desc);
- }
-
- /* Free descriptors */
- spin_lock_irqsave(&mchan->lock, flags);
- list_splice_tail_init(&list, &mchan->free);
- mchan->completed_cookie = last_cookie;
- spin_unlock_irqrestore(&mchan->lock, flags);
- }
+ mpc_dma_process_completed(mdma);
}
/* Submit descriptor to hardware */
@@ -563,6 +580,7 @@ static struct dma_async_tx_descriptor *
mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
size_t len, unsigned long flags)
{
+ struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
struct mpc_dma_desc *mdesc = NULL;
struct mpc_dma_tcd *tcd;
@@ -577,8 +595,11 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
}
spin_unlock_irqrestore(&mchan->lock, iflags);
- if (!mdesc)
+ if (!mdesc) {
+ /* try to free completed descriptors */
+ mpc_dma_process_completed(mdma);
return NULL;
+ }
mdesc->error = 0;
tcd = mdesc->tcd;
@@ -591,7 +612,8 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
tcd->dsize = MPC_DMA_TSIZE_32;
tcd->soff = 32;
tcd->doff = 32;
- } else if (IS_ALIGNED(src | dst | len, 16)) {
+ } else if (!mdma->is_mpc8308 && IS_ALIGNED(src | dst | len, 16)) {
+ /* MPC8308 doesn't support 16 byte transfers */
tcd->ssize = MPC_DMA_TSIZE_16;
tcd->dsize = MPC_DMA_TSIZE_16;
tcd->soff = 16;
@@ -651,6 +673,15 @@ static int __devinit mpc_dma_probe(struct platform_device *op,
return -EINVAL;
}
+ if (of_device_is_compatible(dn, "fsl,mpc8308-dma")) {
+ mdma->is_mpc8308 = 1;
+ mdma->irq2 = irq_of_parse_and_map(dn, 1);
+ if (mdma->irq2 == NO_IRQ) {
+ dev_err(dev, "Error mapping IRQ!\n");
+ return -EINVAL;
+ }
+ }
+
retval = of_address_to_resource(dn, 0, &res);
if (retval) {
dev_err(dev, "Error parsing memory region!\n");
@@ -681,11 +712,23 @@ static int __devinit mpc_dma_probe(struct platform_device *op,
return -EINVAL;
}
+ if (mdma->is_mpc8308) {
+ retval = devm_request_irq(dev, mdma->irq2, &mpc_dma_irq, 0,
+ DRV_NAME, mdma);
+ if (retval) {
+ dev_err(dev, "Error requesting IRQ2!\n");
+ return -EINVAL;
+ }
+ }
+
spin_lock_init(&mdma->error_status_lock);
dma = &mdma->dma;
dma->dev = dev;
- dma->chancnt = MPC_DMA_CHANNELS;
+ if (!mdma->is_mpc8308)
+ dma->chancnt = MPC_DMA_CHANNELS;
+ else
+ dma->chancnt = 16; /* MPC8308 DMA has only 16 channels */
dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
dma->device_free_chan_resources = mpc_dma_free_chan_resources;
dma->device_issue_pending = mpc_dma_issue_pending;
@@ -721,26 +764,40 @@ static int __devinit mpc_dma_probe(struct platform_device *op,
* - Round-robin group arbitration,
* - Round-robin channel arbitration.
*/
- out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
- MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA);
-
- /* Disable hardware DMA requests */
- out_be32(&mdma->regs->dmaerqh, 0);
- out_be32(&mdma->regs->dmaerql, 0);
-
- /* Disable error interrupts */
- out_be32(&mdma->regs->dmaeeih, 0);
- out_be32(&mdma->regs->dmaeeil, 0);
-
- /* Clear interrupts status */
- out_be32(&mdma->regs->dmainth, 0xFFFFFFFF);
- out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF);
- out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF);
- out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF);
-
- /* Route interrupts to IPIC */
- out_be32(&mdma->regs->dmaihsa, 0);
- out_be32(&mdma->regs->dmailsa, 0);
+ if (!mdma->is_mpc8308) {
+ out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
+ MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA);
+
+ /* Disable hardware DMA requests */
+ out_be32(&mdma->regs->dmaerqh, 0);
+ out_be32(&mdma->regs->dmaerql, 0);
+
+ /* Disable error interrupts */
+ out_be32(&mdma->regs->dmaeeih, 0);
+ out_be32(&mdma->regs->dmaeeil, 0);
+
+ /* Clear interrupts status */
+ out_be32(&mdma->regs->dmainth, 0xFFFFFFFF);
+ out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF);
+ out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF);
+ out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF);
+
+ /* Route interrupts to IPIC */
+ out_be32(&mdma->regs->dmaihsa, 0);
+ out_be32(&mdma->regs->dmailsa, 0);
+ } else {
+ /* MPC8308 has 16 channels and lacks some registers */
+ out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
+
+ /* enable snooping */
+ out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
+ /* Disable error interrupts */
+ out_be32(&mdma->regs->dmaeeil, 0);
+
+ /* Clear interrupts status */
+ out_be32(&mdma->regs->dmaintl, 0xFFFF);
+ out_be32(&mdma->regs->dmaerrl, 0xFFFF);
+ }
/* Register DMA engine */
dev_set_drvdata(dev, mdma);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 09dda0b..c3f5aca 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -189,6 +189,7 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
}
+#ifdef notyet
int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
{
struct rdma_cq_setup setup;
@@ -200,6 +201,7 @@ int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
setup.ovfl_mode = 1;
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
}
+#endif
static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
{
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index a237d49..c5406da 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -335,8 +335,6 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
int iwch_post_zb_read(struct iwch_qp *qhp);
int iwch_register_device(struct iwch_dev *dev);
void iwch_unregister_device(struct iwch_dev *dev);
-int iwch_quiesce_qps(struct iwch_cq *chp);
-int iwch_resume_qps(struct iwch_cq *chp);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp, int shift);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 0993137..1b4cd09 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -1149,59 +1149,3 @@ out:
PDBG("%s exit state %d\n", __func__, qhp->attr.state);
return ret;
}
-
-static int quiesce_qp(struct iwch_qp *qhp)
-{
- spin_lock_irq(&qhp->lock);
- iwch_quiesce_tid(qhp->ep);
- qhp->flags |= QP_QUIESCED;
- spin_unlock_irq(&qhp->lock);
- return 0;
-}
-
-static int resume_qp(struct iwch_qp *qhp)
-{
- spin_lock_irq(&qhp->lock);
- iwch_resume_tid(qhp->ep);
- qhp->flags &= ~QP_QUIESCED;
- spin_unlock_irq(&qhp->lock);
- return 0;
-}
-
-int iwch_quiesce_qps(struct iwch_cq *chp)
-{
- int i;
- struct iwch_qp *qhp;
-
- for (i=0; i < T3_MAX_NUM_QP; i++) {
- qhp = get_qhp(chp->rhp, i);
- if (!qhp)
- continue;
- if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
- quiesce_qp(qhp);
- continue;
- }
- if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
- quiesce_qp(qhp);
- }
- return 0;
-}
-
-int iwch_resume_qps(struct iwch_cq *chp)
-{
- int i;
- struct iwch_qp *qhp;
-
- for (i=0; i < T3_MAX_NUM_QP; i++) {
- qhp = get_qhp(chp->rhp, i);
- if (!qhp)
- continue;
- if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
- resume_qp(qhp);
- continue;
- }
- if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
- resume_qp(qhp);
- }
- return 0;
-}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 16032cd..cc600c2 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -760,7 +760,6 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count);
int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid);
u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
-int c4iw_post_zb_read(struct c4iw_qp *qhp);
int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 057cb25..2080090 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -892,36 +892,6 @@ static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
}
}
-int c4iw_post_zb_read(struct c4iw_qp *qhp)
-{
- union t4_wr *wqe;
- struct sk_buff *skb;
- u8 len16;
-
- PDBG("%s enter\n", __func__);
- skb = alloc_skb(40, GFP_KERNEL);
- if (!skb) {
- printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
- return -ENOMEM;
- }
- set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
-
- wqe = (union t4_wr *)skb_put(skb, sizeof wqe->read);
- memset(wqe, 0, sizeof wqe->read);
- wqe->read.r2 = cpu_to_be64(0);
- wqe->read.stag_sink = cpu_to_be32(1);
- wqe->read.to_sink_hi = cpu_to_be32(0);
- wqe->read.to_sink_lo = cpu_to_be32(1);
- wqe->read.stag_src = cpu_to_be32(1);
- wqe->read.plen = cpu_to_be32(0);
- wqe->read.to_src_hi = cpu_to_be32(0);
- wqe->read.to_src_lo = cpu_to_be32(1);
- len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
- init_wr_hdr(wqe, 0, FW_RI_RDMA_READ_WR, FW_RI_COMPLETION_FLAG, len16);
-
- return c4iw_ofld_send(&qhp->rhp->rdev, skb);
-}
-
static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
gfp_t gfp)
{
@@ -1029,7 +999,6 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
wqe->cookie = (unsigned long) &ep->com.wr_wait;
wqe->u.fini.type = FW_RI_TYPE_FINI;
- c4iw_init_wr_wait(&ep->com.wr_wait);
ret = c4iw_ofld_send(&rhp->rdev, skb);
if (ret)
goto out;
@@ -1125,7 +1094,6 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
if (qhp->attr.mpa_attr.initiator)
build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
- c4iw_init_wr_wait(&qhp->ep->com.wr_wait);
ret = c4iw_ofld_send(&rhp->rdev, skb);
if (ret)
goto out;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 765f0fc..b33f045 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -530,9 +530,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
for (j = 0; j < 6; j++) {
if (!pdev->resource[j].start)
continue;
- ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
- j, (unsigned long long)pdev->resource[j].start,
- (unsigned long long)pdev->resource[j].end,
+ ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
+ j, &pdev->resource[j],
(unsigned long long)pci_resource_len(pdev, j));
}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 5a219a2..e8df155 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -397,10 +397,14 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
cq->resize_buf = NULL;
cq->resize_umem = NULL;
} else {
+ struct mlx4_ib_cq_buf tmp_buf;
+ int tmp_cqe = 0;
+
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
mlx4_ib_cq_resize_copy_cqes(cq);
- mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+ tmp_buf = cq->buf;
+ tmp_cqe = cq->ibcq.cqe;
cq->buf = cq->resize_buf->buf;
cq->ibcq.cqe = cq->resize_buf->cqe;
@@ -408,6 +412,9 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
cq->resize_buf = NULL;
}
spin_unlock_irq(&cq->lock);
+
+ if (tmp_cqe)
+ mlx4_ib_free_cq_buf(dev, &tmp_buf, tmp_cqe);
}
goto out;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index c9a8dd6..57ffa50 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -211,6 +211,8 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
if (agent) {
send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ if (IS_ERR(send_buf))
+ return;
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 5648659..03a59534 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -171,6 +171,8 @@ static void forward_trap(struct mthca_dev *dev,
if (agent) {
send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ if (IS_ERR(send_buf))
+ return;
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 3892e2c..5a4c364 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -908,8 +908,8 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
nesvnic->nic_index &&
mc_index < max_pft_entries_avaiable) {
nes_debug(NES_DBG_NIC_RX,
- "mc_index=%d skipping nic_index=%d,\
- used for=%d \n", mc_index,
+ "mc_index=%d skipping nic_index=%d, "
+ "used for=%d \n", mc_index,
nesvnic->nic_index,
nesadapter->pft_mcast_map[mc_index]);
mc_index++;
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 64c9e7d..73225ee 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -766,7 +766,7 @@ struct qib_devdata {
void (*f_sdma_hw_start_up)(struct qib_pportdata *);
void (*f_sdma_init_early)(struct qib_pportdata *);
void (*f_set_cntr_sample)(struct qib_pportdata *, u32, u32);
- void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32);
+ void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32, u32);
u32 (*f_hdrqempty)(struct qib_ctxtdata *);
u64 (*f_portcntr)(struct qib_pportdata *, u32);
u32 (*f_read_cntrs)(struct qib_devdata *, loff_t, char **,
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
index a86cbf8..5246aa4 100644
--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ b/drivers/infiniband/hw/qib/qib_cq.c
@@ -100,7 +100,8 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
- (cq->notify == IB_CQ_SOLICITED && solicited)) {
+ (cq->notify == IB_CQ_SOLICITED &&
+ (solicited || entry->status != IB_WC_SUCCESS))) {
cq->notify = IB_CQ_NONE;
cq->triggered++;
/*
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 9cd1936..23e584f 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -71,6 +71,11 @@ MODULE_DESCRIPTION("QLogic IB driver");
*/
#define QIB_PIO_MAXIBHDR 128
+/*
+ * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
+ */
+#define QIB_MAX_PKT_RECV 64
+
struct qlogic_ib_stats qib_stats;
const char *qib_get_unit_name(int unit)
@@ -284,14 +289,147 @@ static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
* Returns 1 if error was a CRC, else 0.
* Needed for some chip's synthesized error counters.
*/
-static u32 qib_rcv_hdrerr(struct qib_pportdata *ppd, u32 ctxt,
- u32 eflags, u32 l, u32 etail, __le32 *rhf_addr,
- struct qib_message_header *hdr)
+static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
+ u32 ctxt, u32 eflags, u32 l, u32 etail,
+ __le32 *rhf_addr, struct qib_message_header *rhdr)
{
u32 ret = 0;
if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
ret = 1;
+ else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
+ /* For TIDERR and RC QPs premptively schedule a NAK */
+ struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
+ struct qib_other_headers *ohdr = NULL;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+ struct qib_qp *qp = NULL;
+ u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
+ u16 lid = be16_to_cpu(hdr->lrh[1]);
+ int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ u32 qp_num;
+ u32 opcode;
+ u32 psn;
+ int diff;
+ unsigned long flags;
+
+ /* Sanity check packet */
+ if (tlen < 24)
+ goto drop;
+
+ if (lid < QIB_MULTICAST_LID_BASE) {
+ lid &= ~((1 << ppd->lmc) - 1);
+ if (unlikely(lid != ppd->lid))
+ goto drop;
+ }
+
+ /* Check for GRH */
+ if (lnh == QIB_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else if (lnh == QIB_LRH_GRH) {
+ u32 vtf;
+
+ ohdr = &hdr->u.l.oth;
+ if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ goto drop;
+ vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+ if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+ goto drop;
+ } else
+ goto drop;
+
+ /* Get opcode and PSN from packet */
+ opcode = be32_to_cpu(ohdr->bth[0]);
+ opcode >>= 24;
+ psn = be32_to_cpu(ohdr->bth[2]);
+
+ /* Get the destination QP number. */
+ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+ if (qp_num != QIB_MULTICAST_QPN) {
+ int ruc_res;
+ qp = qib_lookup_qpn(ibp, qp_num);
+ if (!qp)
+ goto drop;
+
+ /*
+ * Handle only RC QPs - for other QP types drop error
+ * packet.
+ */
+ spin_lock(&qp->r_lock);
+
+ /* Check for valid receive state. */
+ if (!(ib_qib_state_ops[qp->state] &
+ QIB_PROCESS_RECV_OK)) {
+ ibp->n_pkt_drops++;
+ goto unlock;
+ }
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ruc_res =
+ qib_ruc_check_hdr(
+ ibp, hdr,
+ lnh == QIB_LRH_GRH,
+ qp,
+ be32_to_cpu(ohdr->bth[0]));
+ if (ruc_res) {
+ spin_unlock_irqrestore(&qp->s_lock,
+ flags);
+ goto unlock;
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Only deal with RDMA Writes for now */
+ if (opcode <
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
+ diff = qib_cmp24(psn, qp->r_psn);
+ if (!qp->r_nak_state && diff >= 0) {
+ ibp->n_rc_seqnak++;
+ qp->r_nak_state =
+ IB_NAK_PSN_ERROR;
+ /* Use the expected PSN. */
+ qp->r_ack_psn = qp->r_psn;
+ /*
+ * Wait to send the sequence
+ * NAK until all packets
+ * in the receive queue have
+ * been processed.
+ * Otherwise, we end up
+ * propagating congestion.
+ */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |=
+ QIB_R_RSP_NAK;
+ atomic_inc(
+ &qp->refcount);
+ list_add_tail(
+ &qp->rspwait,
+ &rcd->qp_wait_list);
+ }
+ } /* Out of sequence NAK */
+ } /* QP Request NAKs */
+ break;
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ case IB_QPT_UC:
+ default:
+ /* For now don't handle any other QP types */
+ break;
+ }
+
+unlock:
+ spin_unlock(&qp->r_lock);
+ /*
+ * Notify qib_destroy_qp() if it is waiting
+ * for us to finish.
+ */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ } /* Unicast QP */
+ } /* Valid packet with TIDErr */
+
+drop:
return ret;
}
@@ -335,7 +473,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
}
- for (last = 0, i = 1; !last && i <= 64; i += !last) {
+ for (last = 0, i = 1; !last; i += !last) {
hdr = dd->f_get_msgheader(dd, rhf_addr);
eflags = qib_hdrget_err_flags(rhf_addr);
etype = qib_hdrget_rcv_type(rhf_addr);
@@ -371,7 +509,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
* packets; only qibhdrerr should be set.
*/
if (unlikely(eflags))
- crcs += qib_rcv_hdrerr(ppd, rcd->ctxt, eflags, l,
+ crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
etail, rhf_addr, hdr);
else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
qib_ib_rcv(rcd, hdr, ebuf, tlen);
@@ -384,6 +522,9 @@ move_along:
l += rsize;
if (l >= maxcnt)
l = 0;
+ if (i == QIB_MAX_PKT_RECV)
+ last = 1;
+
rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
if (dd->flags & QIB_NODMA_RTAIL) {
u32 seq = qib_hdrget_seq(rhf_addr);
@@ -402,7 +543,7 @@ move_along:
*/
lval = l;
if (!last && !(i & 0xf)) {
- dd->f_update_usrhead(rcd, lval, updegr, etail);
+ dd->f_update_usrhead(rcd, lval, updegr, etail, i);
updegr = 0;
}
}
@@ -444,7 +585,7 @@ bail:
* if no packets were processed.
*/
lval = (u64)rcd->head | dd->rhdrhead_intr_off;
- dd->f_update_usrhead(rcd, lval, updegr, etail);
+ dd->f_update_usrhead(rcd, lval, updegr, etail, i);
return crcs;
}
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 79d9971..75bfad1 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1379,17 +1379,17 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
/* find device (with ACTIVE ports) with fewest ctxts in use */
for (ndev = 0; ndev < devmax; ndev++) {
struct qib_devdata *dd = qib_lookup(ndev);
- unsigned cused = 0, cfree = 0;
+ unsigned cused = 0, cfree = 0, pusable = 0;
if (!dd)
continue;
if (port && port <= dd->num_pports &&
usable(dd->pport + port - 1))
- dusable = 1;
+ pusable = 1;
else
for (i = 0; i < dd->num_pports; i++)
if (usable(dd->pport + i))
- dusable++;
- if (!dusable)
+ pusable++;
+ if (!pusable)
continue;
for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts;
ctxt++)
@@ -1397,7 +1397,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
cused++;
else
cfree++;
- if (cfree && cused < inuse) {
+ if (pusable && cfree && cused < inuse) {
udd = dd;
inuse = cused;
}
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index a5e29db..774dea8 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -2074,7 +2074,7 @@ static void qib_6120_config_ctxts(struct qib_devdata *dd)
}
static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd,
- u32 updegr, u32 egrhd)
+ u32 updegr, u32 egrhd, u32 npkts)
{
qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
if (updegr)
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 6fd8d74..127a0d5 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -2297,7 +2297,7 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd)
nchipctxts = qib_read_kreg32(dd, kr_portcnt);
dd->cspec->numctxts = nchipctxts;
if (qib_n_krcv_queues > 1) {
- dd->qpn_mask = 0x3f;
+ dd->qpn_mask = 0x3e;
dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports;
if (dd->first_user_ctxt > nchipctxts)
dd->first_user_ctxt = nchipctxts;
@@ -2703,7 +2703,7 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what)
}
static void qib_update_7220_usrhead(struct qib_ctxtdata *rcd, u64 hd,
- u32 updegr, u32 egrhd)
+ u32 updegr, u32 egrhd, u32 npkts)
{
qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
if (updegr)
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 584d443..dbbb0e8 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -71,6 +71,9 @@ static void qib_7322_mini_pcs_reset(struct qib_pportdata *);
static u32 ahb_mod(struct qib_devdata *, int, int, int, u32, u32);
static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned);
+static void serdes_7322_los_enable(struct qib_pportdata *, int);
+static int serdes_7322_init_old(struct qib_pportdata *);
+static int serdes_7322_init_new(struct qib_pportdata *);
#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb))
@@ -111,6 +114,21 @@ static ushort qib_singleport;
module_param_named(singleport, qib_singleport, ushort, S_IRUGO);
MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space");
+/*
+ * Receive header queue sizes
+ */
+static unsigned qib_rcvhdrcnt;
+module_param_named(rcvhdrcnt, qib_rcvhdrcnt, uint, S_IRUGO);
+MODULE_PARM_DESC(rcvhdrcnt, "receive header count");
+
+static unsigned qib_rcvhdrsize;
+module_param_named(rcvhdrsize, qib_rcvhdrsize, uint, S_IRUGO);
+MODULE_PARM_DESC(rcvhdrsize, "receive header size in 32-bit words");
+
+static unsigned qib_rcvhdrentsize;
+module_param_named(rcvhdrentsize, qib_rcvhdrentsize, uint, S_IRUGO);
+MODULE_PARM_DESC(rcvhdrentsize, "receive header entry size in 32-bit words");
+
#define MAX_ATTEN_LEN 64 /* plenty for any real system */
/* for read back, default index is ~5m copper cable */
static char txselect_list[MAX_ATTEN_LEN] = "10";
@@ -544,6 +562,7 @@ static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *);
#define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */
#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */
+#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */
#define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */
#define H1_FORCE_VAL 8
@@ -604,6 +623,7 @@ struct qib_chippport_specific {
u8 ibmalfusesnap;
struct qib_qsfp_data qsfp_data;
char epmsgbuf[192]; /* for port error interrupt msg buffer */
+ u8 bounced;
};
static struct {
@@ -1677,6 +1697,8 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
(ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR))) {
force_h1(ppd);
ppd->cpspec->qdr_reforce = 1;
+ if (!ppd->dd->cspec->r1)
+ serdes_7322_los_enable(ppd, 0);
} else if (ppd->cpspec->qdr_reforce &&
(ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR)) &&
(ibclt == IB_7322_LT_STATE_CFGENH ||
@@ -1692,18 +1714,37 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
ibclt <= IB_7322_LT_STATE_SLEEPQUIET)))
adj_tx_serdes(ppd);
- if (!ppd->cpspec->qdr_dfe_on && ibclt != IB_7322_LT_STATE_LINKUP &&
- ibclt <= IB_7322_LT_STATE_SLEEPQUIET) {
- ppd->cpspec->qdr_dfe_on = 1;
- ppd->cpspec->qdr_dfe_time = 0;
- /* On link down, reenable QDR adaptation */
- qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
- ppd->dd->cspec->r1 ?
- QDR_STATIC_ADAPT_DOWN_R1 :
- QDR_STATIC_ADAPT_DOWN);
+ if (ibclt != IB_7322_LT_STATE_LINKUP) {
+ u8 ltstate = qib_7322_phys_portstate(ibcst);
+ u8 pibclt = (u8)SYM_FIELD(ppd->lastibcstat, IBCStatusA_0,
+ LinkTrainingState);
+ if (!ppd->dd->cspec->r1 &&
+ pibclt == IB_7322_LT_STATE_LINKUP &&
+ ltstate != IB_PHYSPORTSTATE_LINK_ERR_RECOVER &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_RETRAIN &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE)
+ /* If the link went down (but no into recovery,
+ * turn LOS back on */
+ serdes_7322_los_enable(ppd, 1);
+ if (!ppd->cpspec->qdr_dfe_on &&
+ ibclt <= IB_7322_LT_STATE_SLEEPQUIET) {
+ ppd->cpspec->qdr_dfe_on = 1;
+ ppd->cpspec->qdr_dfe_time = 0;
+ /* On link down, reenable QDR adaptation */
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
+ ppd->dd->cspec->r1 ?
+ QDR_STATIC_ADAPT_DOWN_R1 :
+ QDR_STATIC_ADAPT_DOWN);
+ printk(KERN_INFO QIB_DRV_NAME
+ " IB%u:%u re-enabled QDR adaptation "
+ "ibclt %x\n", ppd->dd->unit, ppd->port, ibclt);
+ }
}
}
+static int qib_7322_set_ib_cfg(struct qib_pportdata *, int, u32);
+
/*
* This is per-pport error handling.
* will likely get it's own MSIx interrupt (one for each port,
@@ -1840,7 +1881,23 @@ static noinline void handle_7322_p_errors(struct qib_pportdata *ppd)
IB_PHYSPORTSTATE_DISABLED)
qib_set_ib_7322_lstate(ppd, 0,
QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
- else
+ else {
+ u32 lstate;
+ /*
+ * We need the current logical link state before
+ * lflags are set in handle_e_ibstatuschanged.
+ */
+ lstate = qib_7322_iblink_state(ibcs);
+
+ if (IS_QMH(dd) && !ppd->cpspec->bounced &&
+ ltstate == IB_PHYSPORTSTATE_LINKUP &&
+ (lstate >= IB_PORT_INIT &&
+ lstate <= IB_PORT_ACTIVE)) {
+ ppd->cpspec->bounced = 1;
+ qib_7322_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
+ IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL);
+ }
+
/*
* Since going into a recovery state causes the link
* state to go down and since recovery is transitory,
@@ -1854,6 +1911,7 @@ static noinline void handle_7322_p_errors(struct qib_pportdata *ppd)
ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT &&
ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE)
qib_handle_e_ibstatuschanged(ppd, ibcs);
+ }
}
if (*msg && iserr)
qib_dev_porterr(dd, ppd->port, "%s error\n", msg);
@@ -2785,7 +2843,6 @@ static irqreturn_t qib_7322intr(int irq, void *data)
ctxtrbits &= ~rmask;
if (dd->rcd[i]) {
qib_kreceive(dd->rcd[i], NULL, &npkts);
- adjust_rcv_timeout(dd->rcd[i], npkts);
}
}
rmask <<= 1;
@@ -2835,7 +2892,6 @@ static irqreturn_t qib_7322pintr(int irq, void *data)
(1ULL << QIB_I_RCVURG_LSB)) << rcd->ctxt);
qib_kreceive(rcd, NULL, &npkts);
- adjust_rcv_timeout(rcd, npkts);
return IRQ_HANDLED;
}
@@ -3157,6 +3213,10 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd)
case BOARD_QME7342:
n = "InfiniPath_QME7342";
break;
+ case 8:
+ n = "InfiniPath_QME7362";
+ dd->flags |= QIB_HAS_QSFP;
+ break;
case 15:
n = "InfiniPath_QLE7342_TEST";
dd->flags |= QIB_HAS_QSFP;
@@ -3475,11 +3535,6 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd)
nchipctxts = qib_read_kreg32(dd, kr_contextcnt);
dd->cspec->numctxts = nchipctxts;
if (qib_n_krcv_queues > 1 && dd->num_pports) {
- /*
- * Set the mask for which bits from the QPN are used
- * to select a context number.
- */
- dd->qpn_mask = 0x3f;
dd->first_user_ctxt = NUM_IB_PORTS +
(qib_n_krcv_queues - 1) * dd->num_pports;
if (dd->first_user_ctxt > nchipctxts)
@@ -3530,8 +3585,11 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd)
/* kr_rcvegrcnt changes based on the number of contexts enabled */
dd->cspec->rcvegrcnt = qib_read_kreg32(dd, kr_rcvegrcnt);
- dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt,
- dd->num_pports > 1 ? 1024U : 2048U);
+ if (qib_rcvhdrcnt)
+ dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, qib_rcvhdrcnt);
+ else
+ dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt,
+ dd->num_pports > 1 ? 1024U : 2048U);
}
static int qib_7322_get_ib_cfg(struct qib_pportdata *ppd, int which)
@@ -4002,8 +4060,14 @@ static int qib_7322_set_ib_table(struct qib_pportdata *ppd, int which, void *t)
}
static void qib_update_7322_usrhead(struct qib_ctxtdata *rcd, u64 hd,
- u32 updegr, u32 egrhd)
+ u32 updegr, u32 egrhd, u32 npkts)
{
+ /*
+ * Need to write timeout register before updating rcvhdrhead to ensure
+ * that the timer is enabled on reception of a packet.
+ */
+ if (hd >> IBA7322_HDRHEAD_PKTINT_SHIFT)
+ adjust_rcv_timeout(rcd, npkts);
qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
if (updegr)
@@ -5522,7 +5586,7 @@ static void qsfp_7322_event(struct work_struct *work)
u64 now = get_jiffies_64();
if (time_after64(now, pwrup))
break;
- msleep(1);
+ msleep(20);
}
ret = qib_refresh_qsfp_cache(ppd, &qd->cache);
/*
@@ -5579,6 +5643,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
u32 pidx, unit, port, deflt, h1;
unsigned long val;
int any = 0, seth1;
+ int txdds_size;
str = txselect_list;
@@ -5587,6 +5652,10 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
for (pidx = 0; pidx < dd->num_pports; ++pidx)
dd->pport[pidx].cpspec->no_eep = deflt;
+ txdds_size = TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ;
+ if (IS_QME(dd) || IS_QMH(dd))
+ txdds_size += TXDDS_MFG_SZ;
+
while (*nxt && nxt[1]) {
str = ++nxt;
unit = simple_strtoul(str, &nxt, 0);
@@ -5609,7 +5678,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
;
continue;
}
- if (val >= TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ)
+ if (val >= txdds_size)
continue;
seth1 = 0;
h1 = 0; /* gcc thinks it might be used uninitted */
@@ -5661,10 +5730,11 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
return -ENOSPC;
}
val = simple_strtoul(str, &n, 0);
- if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ)) {
+ if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ TXDDS_MFG_SZ)) {
printk(KERN_INFO QIB_DRV_NAME
"txselect_values must start with a number < %d\n",
- TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ);
+ TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
return -EINVAL;
}
strcpy(txselect_list, str);
@@ -5810,7 +5880,8 @@ static void write_7322_initregs(struct qib_devdata *dd)
unsigned n, regno;
unsigned long flags;
- if (!dd->qpn_mask || !dd->pport[pidx].link_speed_supported)
+ if (dd->n_krcv_queues < 2 ||
+ !dd->pport[pidx].link_speed_supported)
continue;
ppd = &dd->pport[pidx];
@@ -6097,8 +6168,10 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
ppd++;
}
- dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE;
- dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE;
+ dd->rcvhdrentsize = qib_rcvhdrentsize ?
+ qib_rcvhdrentsize : QIB_RCVHDR_ENTSIZE;
+ dd->rcvhdrsize = qib_rcvhdrsize ?
+ qib_rcvhdrsize : QIB_DFLT_RCVHDRSIZE;
dd->rhf_offset = dd->rcvhdrentsize - sizeof(u64) / sizeof(u32);
/* we always allocate at least 2048 bytes for eager buffers */
@@ -6495,7 +6568,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start,
/* make sure we see an updated copy next time around */
sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
sleeps++;
- msleep(1);
+ msleep(20);
}
switch (which) {
@@ -6993,6 +7066,12 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
{ 0, 1, 0, 12 }, /* QMH7342 backplane settings */
};
+static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = {
+ /* amp, pre, main, post */
+ { 0, 0, 0, 0 }, /* QME7342 mfg settings */
+ { 0, 0, 0, 6 }, /* QME7342 P2 mfg settings */
+};
+
static const struct txdds_ent *get_atten_table(const struct txdds_ent *txdds,
unsigned atten)
{
@@ -7066,6 +7145,16 @@ static void find_best_ent(struct qib_pportdata *ppd,
*sdr_dds = &txdds_extra_sdr[idx];
*ddr_dds = &txdds_extra_ddr[idx];
*qdr_dds = &txdds_extra_qdr[idx];
+ } else if ((IS_QME(ppd->dd) || IS_QMH(ppd->dd)) &&
+ ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ TXDDS_MFG_SZ)) {
+ idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ);
+ printk(KERN_INFO QIB_DRV_NAME
+ " IB%u:%u use idx %u into txdds_mfg\n",
+ ppd->dd->unit, ppd->port, idx);
+ *sdr_dds = &txdds_extra_mfg[idx];
+ *ddr_dds = &txdds_extra_mfg[idx];
+ *qdr_dds = &txdds_extra_mfg[idx];
} else {
/* this shouldn't happen, it's range checked */
*sdr_dds = txdds_sdr + qib_long_atten;
@@ -7210,9 +7299,30 @@ static void ibsd_wr_allchans(struct qib_pportdata *ppd, int addr, unsigned data,
}
}
+static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable)
+{
+ u64 data = qib_read_kreg_port(ppd, krp_serdesctrl);
+ printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS %s\n",
+ ppd->dd->unit, ppd->port, (enable ? "on" : "off"));
+ if (enable)
+ data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
+ else
+ data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
+ qib_write_kreg_port(ppd, krp_serdesctrl, data);
+}
+
static int serdes_7322_init(struct qib_pportdata *ppd)
{
- u64 data;
+ int ret = 0;
+ if (ppd->dd->cspec->r1)
+ ret = serdes_7322_init_old(ppd);
+ else
+ ret = serdes_7322_init_new(ppd);
+ return ret;
+}
+
+static int serdes_7322_init_old(struct qib_pportdata *ppd)
+{
u32 le_val;
/*
@@ -7270,11 +7380,7 @@ static int serdes_7322_init(struct qib_pportdata *ppd)
ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */
ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */
- data = qib_read_kreg_port(ppd, krp_serdesctrl);
- /* Turn off IB latency mode */
- data &= ~SYM_MASK(IBSerdesCtrl_0, IB_LAT_MODE);
- qib_write_kreg_port(ppd, krp_serdesctrl, data |
- SYM_MASK(IBSerdesCtrl_0, RXLOSEN));
+ serdes_7322_los_enable(ppd, 1);
/* rxbistena; set 0 to avoid effects of it switch later */
ibsd_wr_allchans(ppd, 9, 0 << 15, 1 << 15);
@@ -7314,6 +7420,205 @@ static int serdes_7322_init(struct qib_pportdata *ppd)
return 0;
}
+static int serdes_7322_init_new(struct qib_pportdata *ppd)
+{
+ u64 tstart;
+ u32 le_val, rxcaldone;
+ int chan, chan_done = (1 << SERDES_CHANS) - 1;
+
+ /*
+ * Initialize the Tx DDS tables. Also done every QSFP event,
+ * for adapters with QSFP
+ */
+ init_txdds_table(ppd, 0);
+
+ /* Clear cmode-override, may be set from older driver */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14);
+
+ /* ensure no tx overrides from earlier driver loads */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+
+ /* START OF LSI SUGGESTED SERDES BRINGUP */
+ /* Reset - Calibration Setup */
+ /* Stop DFE adaptaion */
+ ibsd_wr_allchans(ppd, 1, 0, BMASK(9, 1));
+ /* Disable LE1 */
+ ibsd_wr_allchans(ppd, 13, 0, BMASK(5, 5));
+ /* Disable autoadapt for LE1 */
+ ibsd_wr_allchans(ppd, 1, 0, BMASK(15, 15));
+ /* Disable LE2 */
+ ibsd_wr_allchans(ppd, 13, 0, BMASK(6, 6));
+ /* Disable VGA */
+ ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0));
+ /* Disable AFE Offset Cancel */
+ ibsd_wr_allchans(ppd, 12, 0, BMASK(12, 12));
+ /* Disable Timing Loop */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(3, 3));
+ /* Disable Frequency Loop */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(4, 4));
+ /* Disable Baseline Wander Correction */
+ ibsd_wr_allchans(ppd, 13, 0, BMASK(13, 13));
+ /* Disable RX Calibration */
+ ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10));
+ /* Disable RX Offset Calibration */
+ ibsd_wr_allchans(ppd, 12, 0, BMASK(4, 4));
+ /* Select BB CDR */
+ ibsd_wr_allchans(ppd, 2, (1 << 15), BMASK(15, 15));
+ /* CDR Step Size */
+ ibsd_wr_allchans(ppd, 5, 0, BMASK(9, 8));
+ /* Enable phase Calibration */
+ ibsd_wr_allchans(ppd, 12, (1 << 5), BMASK(5, 5));
+ /* DFE Bandwidth [2:14-12] */
+ ibsd_wr_allchans(ppd, 2, (4 << 12), BMASK(14, 12));
+ /* DFE Config (4 taps only) */
+ ibsd_wr_allchans(ppd, 16, 0, BMASK(1, 0));
+ /* Gain Loop Bandwidth */
+ if (!ppd->dd->cspec->r1) {
+ ibsd_wr_allchans(ppd, 12, 1 << 12, BMASK(12, 12));
+ ibsd_wr_allchans(ppd, 12, 2 << 8, BMASK(11, 8));
+ } else {
+ ibsd_wr_allchans(ppd, 19, (3 << 11), BMASK(13, 11));
+ }
+ /* Baseline Wander Correction Gain [13:4-0] (leave as default) */
+ /* Baseline Wander Correction Gain [3:7-5] (leave as default) */
+ /* Data Rate Select [5:7-6] (leave as default) */
+ /* RX Parralel Word Width [3:10-8] (leave as default) */
+
+ /* RX REST */
+ /* Single- or Multi-channel reset */
+ /* RX Analog reset */
+ /* RX Digital reset */
+ ibsd_wr_allchans(ppd, 0, 0, BMASK(15, 13));
+ msleep(20);
+ /* RX Analog reset */
+ ibsd_wr_allchans(ppd, 0, (1 << 14), BMASK(14, 14));
+ msleep(20);
+ /* RX Digital reset */
+ ibsd_wr_allchans(ppd, 0, (1 << 13), BMASK(13, 13));
+ msleep(20);
+
+ /* setup LoS params; these are subsystem, so chan == 5 */
+ /* LoS filter threshold_count on, ch 0-3, set to 8 */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 5, 8 << 11, BMASK(14, 11));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 8 << 4, BMASK(7, 4));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 8, 8 << 11, BMASK(14, 11));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 8 << 4, BMASK(7, 4));
+
+ /* LoS filter threshold_count off, ch 0-3, set to 4 */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 6, 4 << 0, BMASK(3, 0));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 4 << 8, BMASK(11, 8));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 4 << 0, BMASK(3, 0));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 4 << 8, BMASK(11, 8));
+
+ /* LoS filter select enabled */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 1 << 15, 1 << 15);
+
+ /* LoS target data: SDR=4, DDR=2, QDR=1 */
+ ibsd_wr_allchans(ppd, 14, (1 << 3), BMASK(5, 3)); /* QDR */
+ ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */
+ ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */
+
+ /* Turn on LOS on initial SERDES init */
+ serdes_7322_los_enable(ppd, 1);
+ /* FLoop LOS gate: PPM filter enabled */
+ ibsd_wr_allchans(ppd, 38, 0 << 10, 1 << 10);
+
+ /* RX LATCH CALIBRATION */
+ /* Enable Eyefinder Phase Calibration latch */
+ ibsd_wr_allchans(ppd, 15, 1, BMASK(0, 0));
+ /* Enable RX Offset Calibration latch */
+ ibsd_wr_allchans(ppd, 12, (1 << 4), BMASK(4, 4));
+ msleep(20);
+ /* Start Calibration */
+ ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10));
+ tstart = get_jiffies_64();
+ while (chan_done &&
+ !time_after64(tstart, tstart + msecs_to_jiffies(500))) {
+ msleep(20);
+ for (chan = 0; chan < SERDES_CHANS; ++chan) {
+ rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx),
+ (chan + (chan >> 1)),
+ 25, 0, 0);
+ if ((~rxcaldone & (u32)BMASK(9, 9)) == 0 &&
+ (~chan_done & (1 << chan)) == 0)
+ chan_done &= ~(1 << chan);
+ }
+ }
+ if (chan_done) {
+ printk(KERN_INFO QIB_DRV_NAME
+ " Serdes %d calibration not done after .5 sec: 0x%x\n",
+ IBSD(ppd->hw_pidx), chan_done);
+ } else {
+ for (chan = 0; chan < SERDES_CHANS; ++chan) {
+ rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx),
+ (chan + (chan >> 1)),
+ 25, 0, 0);
+ if ((~rxcaldone & (u32)BMASK(10, 10)) == 0)
+ printk(KERN_INFO QIB_DRV_NAME
+ " Serdes %d chan %d calibration "
+ "failed\n", IBSD(ppd->hw_pidx), chan);
+ }
+ }
+
+ /* Turn off Calibration */
+ ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10));
+ msleep(20);
+
+ /* BRING RX UP */
+ /* Set LE2 value (May be overridden in qsfp_7322_event) */
+ le_val = IS_QME(ppd->dd) ? LE2_QME : LE2_DEFAULT;
+ ibsd_wr_allchans(ppd, 13, (le_val << 7), BMASK(9, 7));
+ /* Set LE2 Loop bandwidth */
+ ibsd_wr_allchans(ppd, 3, (7 << 5), BMASK(7, 5));
+ /* Enable LE2 */
+ ibsd_wr_allchans(ppd, 13, (1 << 6), BMASK(6, 6));
+ msleep(20);
+ /* Enable H0 only */
+ ibsd_wr_allchans(ppd, 1, 1, BMASK(9, 1));
+ /* gain hi stop 32 (22) (6:1) lo stop 7 (10:7) target 22 (13) (15:11) */
+ le_val = (ppd->dd->cspec->r1 || IS_QME(ppd->dd)) ? 0xb6c0 : 0x6bac;
+ ibsd_wr_allchans(ppd, 21, le_val, 0xfffe);
+ /* Enable VGA */
+ ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0));
+ msleep(20);
+ /* Set Frequency Loop Bandwidth */
+ ibsd_wr_allchans(ppd, 2, (7 << 5), BMASK(8, 5));
+ /* Enable Frequency Loop */
+ ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4));
+ /* Set Timing Loop Bandwidth */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(11, 9));
+ /* Enable Timing Loop */
+ ibsd_wr_allchans(ppd, 2, (1 << 3), BMASK(3, 3));
+ msleep(50);
+ /* Enable DFE
+ * Set receive adaptation mode. SDR and DDR adaptation are
+ * always on, and QDR is initially enabled; later disabled.
+ */
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(0), 0ULL);
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(1), 0ULL);
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
+ ppd->dd->cspec->r1 ?
+ QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN);
+ ppd->cpspec->qdr_dfe_on = 1;
+ /* Disable LE1 */
+ ibsd_wr_allchans(ppd, 13, (0 << 5), (1 << 5));
+ /* Disable auto adapt for LE1 */
+ ibsd_wr_allchans(ppd, 1, (0 << 15), BMASK(15, 15));
+ msleep(20);
+ /* Enable AFE Offset Cancel */
+ ibsd_wr_allchans(ppd, 12, (1 << 12), BMASK(12, 12));
+ /* Enable Baseline Wander Correction */
+ ibsd_wr_allchans(ppd, 12, (1 << 13), BMASK(13, 13));
+ /* Termination: rxtermctrl_r2d addr 11 bits [12:11] = 1 */
+ ibsd_wr_allchans(ppd, 11, (1 << 11), BMASK(12, 11));
+ /* VGA output common mode */
+ ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2));
+
+ return 0;
+}
+
/* start adjust QMH serdes parameters */
static void set_man_code(struct qib_pportdata *ppd, int chan, int code)
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index f3b5039..7896afb 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -92,9 +92,11 @@ unsigned long *qib_cpulist;
/* set number of contexts we'll actually use */
void qib_set_ctxtcnt(struct qib_devdata *dd)
{
- if (!qib_cfgctxts)
+ if (!qib_cfgctxts) {
dd->cfgctxts = dd->first_user_ctxt + num_online_cpus();
- else if (qib_cfgctxts < dd->num_pports)
+ if (dd->cfgctxts > dd->ctxtcnt)
+ dd->cfgctxts = dd->ctxtcnt;
+ } else if (qib_cfgctxts < dd->num_pports)
dd->cfgctxts = dd->ctxtcnt;
else if (qib_cfgctxts <= dd->ctxtcnt)
dd->cfgctxts = qib_cfgctxts;
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index 54a40828..a693c56 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -131,7 +131,8 @@ void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs)
/* start a 75msec timer to clear symbol errors */
mod_timer(&ppd->symerr_clear_timer,
msecs_to_jiffies(75));
- } else if (ltstate == IB_PHYSPORTSTATE_LINKUP) {
+ } else if (ltstate == IB_PHYSPORTSTATE_LINKUP &&
+ !(ppd->lflags & QIBL_LINKACTIVE)) {
/* active, but not active defered */
qib_hol_up(ppd); /* useful only for 6120 now */
*ppd->statusp |=
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 4b80eb1..8fd19a4 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -136,7 +136,6 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
struct qib_mregion *mr;
unsigned n, m;
size_t off;
- int ret = 0;
unsigned long flags;
/*
@@ -152,6 +151,8 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
if (!dev->dma_mr)
goto bail;
atomic_inc(&dev->dma_mr->refcount);
+ spin_unlock_irqrestore(&rkt->lock, flags);
+
isge->mr = dev->dma_mr;
isge->vaddr = (void *) sge->addr;
isge->length = sge->length;
@@ -170,19 +171,34 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
+ atomic_inc(&mr->refcount);
+ spin_unlock_irqrestore(&rkt->lock, flags);
off += mr->offset;
- m = 0;
- n = 0;
- while (off >= mr->map[m]->segs[n].length) {
- off -= mr->map[m]->segs[n].length;
- n++;
- if (n >= QIB_SEGSZ) {
- m++;
- n = 0;
+ if (mr->page_shift) {
+ /*
+ page sizes are uniform power of 2 so no loop is necessary
+ entries_spanned_by_off is the number of times the loop below
+ would have executed.
+ */
+ size_t entries_spanned_by_off;
+
+ entries_spanned_by_off = off >> mr->page_shift;
+ off -= (entries_spanned_by_off << mr->page_shift);
+ m = entries_spanned_by_off/QIB_SEGSZ;
+ n = entries_spanned_by_off%QIB_SEGSZ;
+ } else {
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
}
}
- atomic_inc(&mr->refcount);
isge->mr = mr;
isge->vaddr = mr->map[m]->segs[n].vaddr + off;
isge->length = mr->map[m]->segs[n].length - off;
@@ -190,10 +206,10 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
isge->m = m;
isge->n = n;
ok:
- ret = 1;
+ return 1;
bail:
spin_unlock_irqrestore(&rkt->lock, flags);
- return ret;
+ return 0;
}
/**
@@ -214,7 +230,6 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
struct qib_mregion *mr;
unsigned n, m;
size_t off;
- int ret = 0;
unsigned long flags;
/*
@@ -231,6 +246,8 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
if (!dev->dma_mr)
goto bail;
atomic_inc(&dev->dma_mr->refcount);
+ spin_unlock_irqrestore(&rkt->lock, flags);
+
sge->mr = dev->dma_mr;
sge->vaddr = (void *) vaddr;
sge->length = len;
@@ -248,19 +265,34 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
+ atomic_inc(&mr->refcount);
+ spin_unlock_irqrestore(&rkt->lock, flags);
off += mr->offset;
- m = 0;
- n = 0;
- while (off >= mr->map[m]->segs[n].length) {
- off -= mr->map[m]->segs[n].length;
- n++;
- if (n >= QIB_SEGSZ) {
- m++;
- n = 0;
+ if (mr->page_shift) {
+ /*
+ page sizes are uniform power of 2 so no loop is necessary
+ entries_spanned_by_off is the number of times the loop below
+ would have executed.
+ */
+ size_t entries_spanned_by_off;
+
+ entries_spanned_by_off = off >> mr->page_shift;
+ off -= (entries_spanned_by_off << mr->page_shift);
+ m = entries_spanned_by_off/QIB_SEGSZ;
+ n = entries_spanned_by_off%QIB_SEGSZ;
+ } else {
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
}
}
- atomic_inc(&mr->refcount);
sge->mr = mr;
sge->vaddr = mr->map[m]->segs[n].vaddr + off;
sge->length = mr->map[m]->segs[n].length - off;
@@ -268,10 +300,10 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
sge->m = m;
sge->n = n;
ok:
- ret = 1;
+ return 1;
bail:
spin_unlock_irqrestore(&rkt->lock, flags);
- return ret;
+ return 0;
}
/*
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 94b0d1f..5ad224e 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -668,8 +668,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
lid = be16_to_cpu(pip->lid);
/* Must be a valid unicast LID address. */
if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE)
- goto err;
- if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
if (ppd->lid != lid)
qib_set_uevent_bits(ppd, _QIB_EVENT_LID_CHANGE_BIT);
if (ppd->lmc != (pip->mkeyprot_resv_lmc & 7))
@@ -683,8 +683,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
msl = pip->neighbormtu_mastersmsl & 0xF;
/* Must be a valid unicast LID address. */
if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE)
- goto err;
- if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
spin_lock_irqsave(&ibp->lock, flags);
if (ibp->sm_ah) {
if (smlid != ibp->sm_lid)
@@ -707,8 +707,9 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
if (lwe == 0xFF)
lwe = ppd->link_width_supported;
else if (lwe >= 16 || (lwe & ~ppd->link_width_supported))
- goto err;
- set_link_width_enabled(ppd, lwe);
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (lwe != ppd->link_width_enabled)
+ set_link_width_enabled(ppd, lwe);
}
lse = pip->linkspeedactive_enabled & 0xF;
@@ -721,8 +722,9 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
if (lse == 15)
lse = ppd->link_speed_supported;
else if (lse >= 8 || (lse & ~ppd->link_speed_supported))
- goto err;
- set_link_speed_enabled(ppd, lse);
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (lse != ppd->link_speed_enabled)
+ set_link_speed_enabled(ppd, lse);
}
/* Set link down default state. */
@@ -738,7 +740,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
IB_LINKINITCMD_POLL);
break;
default:
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
}
ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
@@ -748,15 +750,17 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF);
if (mtu == -1)
- goto err;
- qib_set_mtu(ppd, mtu);
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ qib_set_mtu(ppd, mtu);
/* Set operational VLs */
vls = (pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF;
if (vls) {
if (vls > ppd->vls_supported)
- goto err;
- (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls);
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls);
}
if (pip->mkey_violations == 0)
@@ -770,10 +774,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
ore = pip->localphyerrors_overrunerrors;
if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF))
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
if (set_overrunthreshold(ppd, (ore & 0xF)))
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -792,7 +796,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
state = pip->linkspeed_portstate & 0xF;
lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
/*
* Only state changes of DOWN, ARM, and ACTIVE are valid
@@ -812,8 +816,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
lstate = QIB_IB_LINKDOWN;
else if (lstate == 3)
lstate = QIB_IB_LINKDOWN_DISABLE;
- else
- goto err;
+ else {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ break;
+ }
spin_lock_irqsave(&ppd->lflags_lock, flags);
ppd->lflags &= ~QIBL_LINKV;
spin_unlock_irqrestore(&ppd->lflags_lock, flags);
@@ -835,8 +841,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
qib_set_linkstate(ppd, QIB_IB_LINKACTIVE);
break;
default:
- /* XXX We have already partially updated our state! */
- goto err;
+ smp->status |= IB_SMP_INVALID_FIELD;
}
ret = subn_get_portinfo(smp, ibdev, port);
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 5f95f0f..08944e2 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -39,7 +39,6 @@
/* Fast memory region */
struct qib_fmr {
struct ib_fmr ibfmr;
- u8 page_shift;
struct qib_mregion mr; /* must be last */
};
@@ -107,6 +106,7 @@ static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
goto bail;
}
mr->mr.mapsz = m;
+ mr->mr.page_shift = 0;
mr->mr.max_segs = count;
/*
@@ -231,6 +231,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.access_flags = mr_access_flags;
mr->umem = umem;
+ if (is_power_of_2(umem->page_size))
+ mr->mr.page_shift = ilog2(umem->page_size);
m = 0;
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list) {
@@ -390,7 +392,7 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
fmr->mr.offset = 0;
fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages;
- fmr->page_shift = fmr_attr->page_shift;
+ fmr->mr.page_shift = fmr_attr->page_shift;
atomic_set(&fmr->mr.refcount, 0);
ret = &fmr->ibfmr;
@@ -437,7 +439,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = iova;
fmr->mr.iova = iova;
- ps = 1 << fmr->page_shift;
+ ps = 1 << fmr->mr.page_shift;
fmr->mr.length = list_len * ps;
m = 0;
n = 0;
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 6c39851..e16751f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -48,13 +48,12 @@ static inline unsigned mk_qpn(struct qib_qpn_table *qpt,
static inline unsigned find_next_offset(struct qib_qpn_table *qpt,
struct qpn_map *map, unsigned off,
- unsigned r)
+ unsigned n)
{
if (qpt->mask) {
off++;
- if ((off & qpt->mask) >> 1 != r)
- off = ((off & qpt->mask) ?
- (off | qpt->mask) + 1 : off) | (r << 1);
+ if (((off & qpt->mask) >> 1) >= n)
+ off = (off | qpt->mask) + 2;
} else
off = find_next_zero_bit(map->page, BITS_PER_PAGE, off);
return off;
@@ -123,7 +122,6 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
u32 ret;
- int r;
if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
unsigned n;
@@ -139,15 +137,11 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
goto bail;
}
- r = smp_processor_id();
- if (r >= dd->n_krcv_queues)
- r %= dd->n_krcv_queues;
- qpn = qpt->last + 1;
+ qpn = qpt->last + 2;
if (qpn >= QPN_MAX)
qpn = 2;
- if (qpt->mask && ((qpn & qpt->mask) >> 1) != r)
- qpn = ((qpn & qpt->mask) ? (qpn | qpt->mask) + 1 : qpn) |
- (r << 1);
+ if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
+ qpn = (qpn | qpt->mask) + 2;
offset = qpn & BITS_PER_PAGE_MASK;
map = &qpt->map[qpn / BITS_PER_PAGE];
max_scan = qpt->nmaps - !offset;
@@ -163,7 +157,8 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
ret = qpn;
goto bail;
}
- offset = find_next_offset(qpt, map, offset, r);
+ offset = find_next_offset(qpt, map, offset,
+ dd->n_krcv_queues);
qpn = mk_qpn(qpt, map, offset);
/*
* This test differs from alloc_pidmap().
@@ -183,13 +178,13 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
if (qpt->nmaps == QPNMAP_ENTRIES)
break;
map = &qpt->map[qpt->nmaps++];
- offset = qpt->mask ? (r << 1) : 0;
+ offset = 0;
} else if (map < &qpt->map[qpt->nmaps]) {
++map;
- offset = qpt->mask ? (r << 1) : 0;
+ offset = 0;
} else {
map = &qpt->map[0];
- offset = qpt->mask ? (r << 1) : 2;
+ offset = 2;
}
qpn = mk_qpn(qpt, map, offset);
}
@@ -468,6 +463,10 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
del_timer(&qp->s_timer);
}
+
+ if (qp->s_flags & QIB_S_ANY_WAIT_SEND)
+ qp->s_flags &= ~QIB_S_ANY_WAIT_SEND;
+
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) {
qp->s_flags &= ~QIB_S_ANY_WAIT_IO;
@@ -1061,7 +1060,6 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
}
qp->ibqp.qp_num = err;
qp->port_num = init_attr->port_num;
- qp->processor_id = smp_processor_id();
qib_reset_qp(qp, init_attr->qp_type);
break;
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 955fb71..8245237 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -1407,6 +1407,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
struct qib_ctxtdata *rcd)
{
struct qib_swqe *wqe;
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
enum ib_wc_status status;
unsigned long flags;
int diff;
@@ -1414,6 +1415,29 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
u32 aeth;
u64 val;
+ if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) {
+ /*
+ * If ACK'd PSN on SDMA busy list try to make progress to
+ * reclaim SDMA credits.
+ */
+ if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) &&
+ (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) {
+
+ /*
+ * If send tasklet not running attempt to progress
+ * SDMA queue.
+ */
+ if (!(qp->s_flags & QIB_S_BUSY)) {
+ /* Acquire SDMA Lock */
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ /* Invoke sdma make progress */
+ qib_sdma_make_progress(ppd);
+ /* Release SDMA Lock */
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ }
+ }
+ }
+
spin_lock_irqsave(&qp->s_lock, flags);
/* Ignore invalid responses. */
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index e1b3da2..4a51fd1 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -445,13 +445,14 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
- /* Get the number of bytes the message was padded by. */
+ /*
+ * Get the number of bytes the message was padded by
+ * and drop incomplete packets.
+ */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- if (unlikely(tlen < (hdrsize + pad + 4))) {
- /* Drop incomplete packets. */
- ibp->n_pkt_drops++;
- goto bail;
- }
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto drop;
+
tlen -= hdrsize + pad + 4;
/*
@@ -460,10 +461,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
*/
if (qp->ibqp.qp_num) {
if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
- hdr->lrh[3] == IB_LID_PERMISSIVE)) {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ hdr->lrh[3] == IB_LID_PERMISSIVE))
+ goto drop;
if (qp->ibqp.qp_num > 1) {
u16 pkey1, pkey2;
@@ -476,7 +475,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
0xF,
src_qp, qp->ibqp.qp_num,
hdr->lrh[3], hdr->lrh[1]);
- goto bail;
+ return;
}
}
if (unlikely(qkey != qp->qkey)) {
@@ -484,30 +483,24 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
src_qp, qp->ibqp.qp_num,
hdr->lrh[3], hdr->lrh[1]);
- goto bail;
+ return;
}
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
(tlen != 256 ||
- (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))
+ goto drop;
} else {
struct ib_smp *smp;
/* Drop invalid MAD packets (see 13.5.3.1). */
- if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15) {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)
+ goto drop;
smp = (struct ib_smp *) data;
if ((hdr->lrh[1] == IB_LID_PERMISSIVE ||
hdr->lrh[3] == IB_LID_PERMISSIVE) &&
- smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ goto drop;
}
/*
@@ -519,14 +512,12 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
wc.ex.imm_data = ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
- hdrsize += sizeof(u32);
+ tlen -= sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
wc.ex.imm_data = 0;
wc.wc_flags = 0;
- } else {
- ibp->n_pkt_drops++;
- goto bail;
- }
+ } else
+ goto drop;
/*
* A GRH is expected to preceed the data even if not
@@ -556,8 +547,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
/* Silently drop packets which are too big. */
if (unlikely(wc.byte_len > qp->r_len)) {
qp->r_flags |= QIB_R_REUSE_SGE;
- ibp->n_pkt_drops++;
- return;
+ goto drop;
}
if (has_grh) {
qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
@@ -594,5 +584,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
cpu_to_be32(IB_BTH_SOLICITED)) != 0);
-bail:;
+ return;
+
+drop:
+ ibp->n_pkt_drops++;
}
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 4c19e06..66208bc 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -382,6 +382,7 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
kmem_cache_free(pq->pkt_slab, pkt);
}
+ INIT_LIST_HEAD(list);
}
/*
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index bd57c12..63b22a9 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -301,6 +301,7 @@ struct qib_mregion {
int access_flags;
u32 max_segs; /* number of qib_segs in all the arrays */
u32 mapsz; /* size of the map array */
+ u8 page_shift; /* 0 - non unform/non powerof2 sizes */
atomic_t refcount;
struct qib_segarray *map[0]; /* the segments */
};
@@ -435,7 +436,6 @@ struct qib_qp {
spinlock_t r_lock; /* used for APM */
spinlock_t s_lock;
atomic_t s_dma_busy;
- unsigned processor_id; /* Processor ID QP is bound to */
u32 s_flags;
u32 s_cur_size; /* size of send packet in bytes */
u32 s_len; /* total length of s_sge */
@@ -813,13 +813,8 @@ extern struct workqueue_struct *qib_cq_wq;
*/
static inline void qib_schedule_send(struct qib_qp *qp)
{
- if (qib_send_ok(qp)) {
- if (qp->processor_id == smp_processor_id())
- queue_work(qib_wq, &qp->s_work);
- else
- queue_work_on(qp->processor_id,
- qib_wq, &qp->s_work);
- }
+ if (qib_send_ok(qp))
+ queue_work(qib_wq, &qp->s_work);
}
static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 9d9a9dc..55855ee 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,7 +1,6 @@
config INFINIBAND_IPOIB
tristate "IP-over-InfiniBand"
depends on NETDEVICES && INET && (IPV6 || IPV6=n)
- select INET_LRO
---help---
Support for the IP-over-InfiniBand protocol (IPoIB). This
transports IP packets over InfiniBand so you can use your IB
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 753a983..ab97f92 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -50,7 +50,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
-#include <linux/inet_lro.h>
+#include <linux/sched.h>
/* constants */
@@ -100,9 +100,6 @@ enum {
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
IPOIB_MCAST_FLAG_ATTACHED = 3,
- IPOIB_MAX_LRO_DESCRIPTORS = 8,
- IPOIB_LRO_MAX_AGGR = 64,
-
MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256,
};
@@ -262,11 +259,6 @@ struct ipoib_ethtool_st {
u16 max_coalesced_frames;
};
-struct ipoib_lro {
- struct net_lro_mgr lro_mgr;
- struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
-};
-
/*
* Device private locking: network stack tx_lock protects members used
* in TX fast path, lock protects everything else. lock nests inside
@@ -352,8 +344,6 @@ struct ipoib_dev_priv {
int hca_caps;
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
-
- struct ipoib_lro lro;
};
struct ipoib_ah {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index bb10041..c1c49f2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1480,6 +1480,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
+ priv->dev->features |= NETIF_F_GRO;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
dev->features |= NETIF_F_TSO;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 1a1657c..19f7f52 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -106,63 +106,12 @@ static int ipoib_set_coalesce(struct net_device *dev,
return 0;
}
-static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = {
- "LRO aggregated", "LRO flushed",
- "LRO avg aggr", "LRO no desc"
-};
-
-static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
-{
- switch (stringset) {
- case ETH_SS_STATS:
- memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys));
- break;
- }
-}
-
-static int ipoib_get_sset_count(struct net_device *dev, int sset)
-{
- switch (sset) {
- case ETH_SS_STATS:
- return ARRAY_SIZE(ipoib_stats_keys);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static void ipoib_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats, uint64_t *data)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int index = 0;
-
- /* Get LRO statistics */
- data[index++] = priv->lro.lro_mgr.stats.aggregated;
- data[index++] = priv->lro.lro_mgr.stats.flushed;
- if (priv->lro.lro_mgr.stats.flushed)
- data[index++] = priv->lro.lro_mgr.stats.aggregated /
- priv->lro.lro_mgr.stats.flushed;
- else
- data[index++] = 0;
- data[index++] = priv->lro.lro_mgr.stats.no_desc;
-}
-
-static int ipoib_set_flags(struct net_device *dev, u32 flags)
-{
- return ethtool_op_set_flags(dev, flags, ETH_FLAG_LRO);
-}
-
static const struct ethtool_ops ipoib_ethtool_ops = {
.get_drvinfo = ipoib_get_drvinfo,
.get_rx_csum = ipoib_get_rx_csum,
.set_tso = ipoib_set_tso,
.get_coalesce = ipoib_get_coalesce,
.set_coalesce = ipoib_set_coalesce,
- .get_flags = ethtool_op_get_flags,
- .set_flags = ipoib_set_flags,
- .get_strings = ipoib_get_strings,
- .get_sset_count = ipoib_get_sset_count,
- .get_ethtool_stats = ipoib_get_ethtool_stats,
};
void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index dfa7190..806d029 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -295,10 +295,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
skb->ip_summed = CHECKSUM_UNNECESSARY;
- if (dev->features & NETIF_F_LRO)
- lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
- else
- netif_receive_skb(skb);
+ napi_gro_receive(&priv->napi, skb);
repost:
if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -450,9 +447,6 @@ poll_more:
}
if (done < budget) {
- if (dev->features & NETIF_F_LRO)
- lro_flush_all(&priv->lro.lro_mgr);
-
napi_complete(napi);
if (unlikely(ib_req_notify_cq(priv->recv_cq,
IB_CQ_NEXT_COMP |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9ff7bc7..7a07a72 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -60,15 +60,6 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
-static int lro;
-module_param(lro, bool, 0444);
-MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
-
-static int lro_max_aggr = IPOIB_LRO_MAX_AGGR;
-module_param(lro_max_aggr, int, 0644);
-MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated "
- "(default = 64)");
-
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
int ipoib_debug_level;
@@ -976,54 +967,6 @@ static const struct header_ops ipoib_header_ops = {
.create = ipoib_hard_header,
};
-static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
- void **tcph, u64 *hdr_flags, void *priv)
-{
- unsigned int ip_len;
- struct iphdr *iph;
-
- if (unlikely(skb->protocol != htons(ETH_P_IP)))
- return -1;
-
- /*
- * In the future we may add an else clause that verifies the
- * checksum and allows devices which do not calculate checksum
- * to use LRO.
- */
- if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
- return -1;
-
- /* Check for non-TCP packet */
- skb_reset_network_header(skb);
- iph = ip_hdr(skb);
- if (iph->protocol != IPPROTO_TCP)
- return -1;
-
- ip_len = ip_hdrlen(skb);
- skb_set_transport_header(skb, ip_len);
- *tcph = tcp_hdr(skb);
-
- /* check if IP header and TCP header are complete */
- if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
- return -1;
-
- *hdr_flags = LRO_IPV4 | LRO_TCP;
- *iphdr = iph;
-
- return 0;
-}
-
-static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
-{
- priv->lro.lro_mgr.max_aggr = lro_max_aggr;
- priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS;
- priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
- priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
- priv->lro.lro_mgr.features = LRO_F_NAPI;
- priv->lro.lro_mgr.dev = priv->dev;
- priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
-}
-
static const struct net_device_ops ipoib_netdev_ops = {
.ndo_open = ipoib_open,
.ndo_stop = ipoib_stop,
@@ -1067,8 +1010,6 @@ static void ipoib_setup(struct net_device *dev)
priv->dev = dev;
- ipoib_lro_setup(priv);
-
spin_lock_init(&priv->lock);
mutex_init(&priv->vlan_mutex);
@@ -1218,8 +1159,7 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
}
- if (lro)
- priv->dev->features |= NETIF_F_LRO;
+ priv->dev->features |= NETIF_F_GRO;
if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->features |= NETIF_F_TSO;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 1e1e347..4b62105 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -441,18 +441,28 @@ static void srp_disconnect_target(struct srp_target_port *target)
wait_for_completion(&target->done);
}
+static bool srp_change_state(struct srp_target_port *target,
+ enum srp_target_state old,
+ enum srp_target_state new)
+{
+ bool changed = false;
+
+ spin_lock_irq(&target->lock);
+ if (target->state == old) {
+ target->state = new;
+ changed = true;
+ }
+ spin_unlock_irq(&target->lock);
+ return changed;
+}
+
static void srp_remove_work(struct work_struct *work)
{
struct srp_target_port *target =
container_of(work, struct srp_target_port, work);
- spin_lock_irq(target->scsi_host->host_lock);
- if (target->state != SRP_TARGET_DEAD) {
- spin_unlock_irq(target->scsi_host->host_lock);
+ if (!srp_change_state(target, SRP_TARGET_DEAD, SRP_TARGET_REMOVED))
return;
- }
- target->state = SRP_TARGET_REMOVED;
- spin_unlock_irq(target->scsi_host->host_lock);
spin_lock(&target->srp_host->target_lock);
list_del(&target->list);
@@ -539,33 +549,34 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
scsi_sg_count(scmnd), scmnd->sc_data_direction);
}
-static void srp_remove_req(struct srp_target_port *target, struct srp_request *req)
+static void srp_remove_req(struct srp_target_port *target,
+ struct srp_request *req, s32 req_lim_delta)
{
+ unsigned long flags;
+
srp_unmap_data(req->scmnd, target, req);
- list_move_tail(&req->list, &target->free_reqs);
+ spin_lock_irqsave(&target->lock, flags);
+ target->req_lim += req_lim_delta;
+ req->scmnd = NULL;
+ list_add_tail(&req->list, &target->free_reqs);
+ spin_unlock_irqrestore(&target->lock, flags);
}
static void srp_reset_req(struct srp_target_port *target, struct srp_request *req)
{
req->scmnd->result = DID_RESET << 16;
req->scmnd->scsi_done(req->scmnd);
- srp_remove_req(target, req);
+ srp_remove_req(target, req, 0);
}
static int srp_reconnect_target(struct srp_target_port *target)
{
struct ib_qp_attr qp_attr;
- struct srp_request *req, *tmp;
struct ib_wc wc;
- int ret;
+ int i, ret;
- spin_lock_irq(target->scsi_host->host_lock);
- if (target->state != SRP_TARGET_LIVE) {
- spin_unlock_irq(target->scsi_host->host_lock);
+ if (!srp_change_state(target, SRP_TARGET_LIVE, SRP_TARGET_CONNECTING))
return -EAGAIN;
- }
- target->state = SRP_TARGET_CONNECTING;
- spin_unlock_irq(target->scsi_host->host_lock);
srp_disconnect_target(target);
/*
@@ -590,27 +601,23 @@ static int srp_reconnect_target(struct srp_target_port *target)
while (ib_poll_cq(target->send_cq, 1, &wc) > 0)
; /* nothing */
- spin_lock_irq(target->scsi_host->host_lock);
- list_for_each_entry_safe(req, tmp, &target->req_queue, list)
- srp_reset_req(target, req);
- spin_unlock_irq(target->scsi_host->host_lock);
+ for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
+ struct srp_request *req = &target->req_ring[i];
+ if (req->scmnd)
+ srp_reset_req(target, req);
+ }
- target->rx_head = 0;
- target->tx_head = 0;
- target->tx_tail = 0;
+ INIT_LIST_HEAD(&target->free_tx);
+ for (i = 0; i < SRP_SQ_SIZE; ++i)
+ list_add(&target->tx_ring[i]->list, &target->free_tx);
target->qp_in_error = 0;
ret = srp_connect_target(target);
if (ret)
goto err;
- spin_lock_irq(target->scsi_host->host_lock);
- if (target->state == SRP_TARGET_CONNECTING) {
- ret = 0;
- target->state = SRP_TARGET_LIVE;
- } else
+ if (!srp_change_state(target, SRP_TARGET_CONNECTING, SRP_TARGET_LIVE))
ret = -EAGAIN;
- spin_unlock_irq(target->scsi_host->host_lock);
return ret;
@@ -620,17 +627,20 @@ err:
/*
* We couldn't reconnect, so kill our target port off.
- * However, we have to defer the real removal because we might
- * be in the context of the SCSI error handler now, which
- * would deadlock if we call scsi_remove_host().
+ * However, we have to defer the real removal because we
+ * are in the context of the SCSI error handler now, which
+ * will deadlock if we call scsi_remove_host().
+ *
+ * Schedule our work inside the lock to avoid a race with
+ * the flush_scheduled_work() in srp_remove_one().
*/
- spin_lock_irq(target->scsi_host->host_lock);
+ spin_lock_irq(&target->lock);
if (target->state == SRP_TARGET_CONNECTING) {
target->state = SRP_TARGET_DEAD;
INIT_WORK(&target->work, srp_remove_work);
schedule_work(&target->work);
}
- spin_unlock_irq(target->scsi_host->host_lock);
+ spin_unlock_irq(&target->lock);
return ret;
}
@@ -758,7 +768,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
struct srp_direct_buf *buf = (void *) cmd->add_data;
buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
- buf->key = cpu_to_be32(dev->mr->rkey);
+ buf->key = cpu_to_be32(target->rkey);
buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
} else if (srp_map_fmr(target, scat, count, req,
(void *) cmd->add_data)) {
@@ -783,7 +793,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
buf->desc_list[i].va =
cpu_to_be64(ib_sg_dma_address(ibdev, sg));
buf->desc_list[i].key =
- cpu_to_be32(dev->mr->rkey);
+ cpu_to_be32(target->rkey);
buf->desc_list[i].len = cpu_to_be32(dma_len);
datalen += dma_len;
}
@@ -796,7 +806,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
buf->table_desc.va =
cpu_to_be64(req->cmd->dma + sizeof *cmd + sizeof *buf);
buf->table_desc.key =
- cpu_to_be32(target->srp_host->srp_dev->mr->rkey);
+ cpu_to_be32(target->rkey);
buf->table_desc.len =
cpu_to_be32(count * sizeof (struct srp_direct_buf));
@@ -812,9 +822,23 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
}
/*
- * Must be called with target->scsi_host->host_lock held to protect
- * req_lim and tx_head. Lock cannot be dropped between call here and
- * call to __srp_post_send().
+ * Return an IU and possible credit to the free pool
+ */
+static void srp_put_tx_iu(struct srp_target_port *target, struct srp_iu *iu,
+ enum srp_iu_type iu_type)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&target->lock, flags);
+ list_add(&iu->list, &target->free_tx);
+ if (iu_type != SRP_IU_RSP)
+ ++target->req_lim;
+ spin_unlock_irqrestore(&target->lock, flags);
+}
+
+/*
+ * Must be called with target->lock held to protect req_lim and free_tx.
+ * If IU is not sent, it must be returned using srp_put_tx_iu().
*
* Note:
* An upper limit for the number of allocated information units for each
@@ -833,83 +857,59 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
srp_send_completion(target->send_cq, target);
- if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
+ if (list_empty(&target->free_tx))
return NULL;
/* Initiator responses to target requests do not consume credits */
- if (target->req_lim <= rsv && iu_type != SRP_IU_RSP) {
- ++target->zero_req_lim;
- return NULL;
+ if (iu_type != SRP_IU_RSP) {
+ if (target->req_lim <= rsv) {
+ ++target->zero_req_lim;
+ return NULL;
+ }
+
+ --target->req_lim;
}
- iu = target->tx_ring[target->tx_head & SRP_SQ_MASK];
- iu->type = iu_type;
+ iu = list_first_entry(&target->free_tx, struct srp_iu, list);
+ list_del(&iu->list);
return iu;
}
-/*
- * Must be called with target->scsi_host->host_lock held to protect
- * req_lim and tx_head.
- */
-static int __srp_post_send(struct srp_target_port *target,
- struct srp_iu *iu, int len)
+static int srp_post_send(struct srp_target_port *target,
+ struct srp_iu *iu, int len)
{
struct ib_sge list;
struct ib_send_wr wr, *bad_wr;
- int ret = 0;
list.addr = iu->dma;
list.length = len;
- list.lkey = target->srp_host->srp_dev->mr->lkey;
+ list.lkey = target->lkey;
wr.next = NULL;
- wr.wr_id = target->tx_head & SRP_SQ_MASK;
+ wr.wr_id = (uintptr_t) iu;
wr.sg_list = &list;
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
- ret = ib_post_send(target->qp, &wr, &bad_wr);
-
- if (!ret) {
- ++target->tx_head;
- if (iu->type != SRP_IU_RSP)
- --target->req_lim;
- }
-
- return ret;
+ return ib_post_send(target->qp, &wr, &bad_wr);
}
-static int srp_post_recv(struct srp_target_port *target)
+static int srp_post_recv(struct srp_target_port *target, struct srp_iu *iu)
{
- unsigned long flags;
- struct srp_iu *iu;
- struct ib_sge list;
struct ib_recv_wr wr, *bad_wr;
- unsigned int next;
- int ret;
-
- spin_lock_irqsave(target->scsi_host->host_lock, flags);
-
- next = target->rx_head & SRP_RQ_MASK;
- wr.wr_id = next;
- iu = target->rx_ring[next];
+ struct ib_sge list;
list.addr = iu->dma;
list.length = iu->size;
- list.lkey = target->srp_host->srp_dev->mr->lkey;
+ list.lkey = target->lkey;
wr.next = NULL;
+ wr.wr_id = (uintptr_t) iu;
wr.sg_list = &list;
wr.num_sge = 1;
- ret = ib_post_recv(target->qp, &wr, &bad_wr);
- if (!ret)
- ++target->rx_head;
-
- spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
-
- return ret;
+ return ib_post_recv(target->qp, &wr, &bad_wr);
}
static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
@@ -917,23 +917,18 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
struct srp_request *req;
struct scsi_cmnd *scmnd;
unsigned long flags;
- s32 delta;
-
- delta = (s32) be32_to_cpu(rsp->req_lim_delta);
-
- spin_lock_irqsave(target->scsi_host->host_lock, flags);
-
- target->req_lim += delta;
-
- req = &target->req_ring[rsp->tag & ~SRP_TAG_TSK_MGMT];
if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
- if (be32_to_cpu(rsp->resp_data_len) < 4)
- req->tsk_status = -1;
- else
- req->tsk_status = rsp->data[3];
- complete(&req->done);
+ spin_lock_irqsave(&target->lock, flags);
+ target->req_lim += be32_to_cpu(rsp->req_lim_delta);
+ spin_unlock_irqrestore(&target->lock, flags);
+
+ target->tsk_mgmt_status = -1;
+ if (be32_to_cpu(rsp->resp_data_len) >= 4)
+ target->tsk_mgmt_status = rsp->data[3];
+ complete(&target->tsk_mgmt_done);
} else {
+ req = &target->req_ring[rsp->tag];
scmnd = req->scmnd;
if (!scmnd)
shost_printk(KERN_ERR, target->scsi_host,
@@ -953,49 +948,42 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
- if (!req->tsk_mgmt) {
- scmnd->host_scribble = (void *) -1L;
- scmnd->scsi_done(scmnd);
-
- srp_remove_req(target, req);
- } else
- req->cmd_done = 1;
+ srp_remove_req(target, req, be32_to_cpu(rsp->req_lim_delta));
+ scmnd->host_scribble = NULL;
+ scmnd->scsi_done(scmnd);
}
-
- spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
}
static int srp_response_common(struct srp_target_port *target, s32 req_delta,
void *rsp, int len)
{
- struct ib_device *dev;
+ struct ib_device *dev = target->srp_host->srp_dev->dev;
unsigned long flags;
struct srp_iu *iu;
- int err = 1;
+ int err;
- dev = target->srp_host->srp_dev->dev;
-
- spin_lock_irqsave(target->scsi_host->host_lock, flags);
+ spin_lock_irqsave(&target->lock, flags);
target->req_lim += req_delta;
-
iu = __srp_get_tx_iu(target, SRP_IU_RSP);
+ spin_unlock_irqrestore(&target->lock, flags);
+
if (!iu) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"no IU available to send response\n");
- goto out;
+ return 1;
}
ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
memcpy(iu->buf, rsp, len);
ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
- err = __srp_post_send(target, iu, len);
- if (err)
+ err = srp_post_send(target, iu, len);
+ if (err) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"unable to post response: %d\n", err);
+ srp_put_tx_iu(target, iu, SRP_IU_RSP);
+ }
-out:
- spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
return err;
}
@@ -1032,14 +1020,11 @@ static void srp_process_aer_req(struct srp_target_port *target,
static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
{
- struct ib_device *dev;
- struct srp_iu *iu;
+ struct ib_device *dev = target->srp_host->srp_dev->dev;
+ struct srp_iu *iu = (struct srp_iu *) wc->wr_id;
int res;
u8 opcode;
- iu = target->rx_ring[wc->wr_id];
-
- dev = target->srp_host->srp_dev->dev;
ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len,
DMA_FROM_DEVICE);
@@ -1080,7 +1065,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
ib_dma_sync_single_for_device(dev, iu->dma, target->max_ti_iu_len,
DMA_FROM_DEVICE);
- res = srp_post_recv(target);
+ res = srp_post_recv(target, iu);
if (res != 0)
shost_printk(KERN_ERR, target->scsi_host,
PFX "Recv failed with error code %d\n", res);
@@ -1109,6 +1094,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
{
struct srp_target_port *target = target_ptr;
struct ib_wc wc;
+ struct srp_iu *iu;
while (ib_poll_cq(cq, 1, &wc) > 0) {
if (wc.status) {
@@ -1119,18 +1105,19 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
break;
}
- ++target->tx_tail;
+ iu = (struct srp_iu *) wc.wr_id;
+ list_add(&iu->list, &target->free_tx);
}
}
-static int srp_queuecommand_lck(struct scsi_cmnd *scmnd,
- void (*done)(struct scsi_cmnd *))
+static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
{
- struct srp_target_port *target = host_to_target(scmnd->device->host);
+ struct srp_target_port *target = host_to_target(shost);
struct srp_request *req;
struct srp_iu *iu;
struct srp_cmd *cmd;
struct ib_device *dev;
+ unsigned long flags;
int len;
if (target->state == SRP_TARGET_CONNECTING)
@@ -1139,11 +1126,19 @@ static int srp_queuecommand_lck(struct scsi_cmnd *scmnd,
if (target->state == SRP_TARGET_DEAD ||
target->state == SRP_TARGET_REMOVED) {
scmnd->result = DID_BAD_TARGET << 16;
- done(scmnd);
+ scmnd->scsi_done(scmnd);
return 0;
}
+ spin_lock_irqsave(&target->lock, flags);
iu = __srp_get_tx_iu(target, SRP_IU_CMD);
+ if (iu) {
+ req = list_first_entry(&target->free_reqs, struct srp_request,
+ list);
+ list_del(&req->list);
+ }
+ spin_unlock_irqrestore(&target->lock, flags);
+
if (!iu)
goto err;
@@ -1151,11 +1146,8 @@ static int srp_queuecommand_lck(struct scsi_cmnd *scmnd,
ib_dma_sync_single_for_cpu(dev, iu->dma, srp_max_iu_len,
DMA_TO_DEVICE);
- req = list_first_entry(&target->free_reqs, struct srp_request, list);
-
- scmnd->scsi_done = done;
scmnd->result = 0;
- scmnd->host_scribble = (void *) (long) req->index;
+ scmnd->host_scribble = (void *) req;
cmd = iu->buf;
memset(cmd, 0, sizeof *cmd);
@@ -1167,37 +1159,38 @@ static int srp_queuecommand_lck(struct scsi_cmnd *scmnd,
req->scmnd = scmnd;
req->cmd = iu;
- req->cmd_done = 0;
- req->tsk_mgmt = NULL;
len = srp_map_data(scmnd, target, req);
if (len < 0) {
shost_printk(KERN_ERR, target->scsi_host,
PFX "Failed to map data\n");
- goto err;
+ goto err_iu;
}
ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len,
DMA_TO_DEVICE);
- if (__srp_post_send(target, iu, len)) {
+ if (srp_post_send(target, iu, len)) {
shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
goto err_unmap;
}
- list_move_tail(&req->list, &target->req_queue);
-
return 0;
err_unmap:
srp_unmap_data(scmnd, target, req);
+err_iu:
+ srp_put_tx_iu(target, iu, SRP_IU_CMD);
+
+ spin_lock_irqsave(&target->lock, flags);
+ list_add(&req->list, &target->free_reqs);
+ spin_unlock_irqrestore(&target->lock, flags);
+
err:
return SCSI_MLQUEUE_HOST_BUSY;
}
-static DEF_SCSI_QCMD(srp_queuecommand)
-
static int srp_alloc_iu_bufs(struct srp_target_port *target)
{
int i;
@@ -1216,6 +1209,8 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
GFP_KERNEL, DMA_TO_DEVICE);
if (!target->tx_ring[i])
goto err;
+
+ list_add(&target->tx_ring[i]->list, &target->free_tx);
}
return 0;
@@ -1377,7 +1372,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
break;
for (i = 0; i < SRP_RQ_SIZE; i++) {
- target->status = srp_post_recv(target);
+ struct srp_iu *iu = target->rx_ring[i];
+ target->status = srp_post_recv(target, iu);
if (target->status)
break;
}
@@ -1442,25 +1438,24 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
}
static int srp_send_tsk_mgmt(struct srp_target_port *target,
- struct srp_request *req, u8 func)
+ u64 req_tag, unsigned int lun, u8 func)
{
struct ib_device *dev = target->srp_host->srp_dev->dev;
struct srp_iu *iu;
struct srp_tsk_mgmt *tsk_mgmt;
- spin_lock_irq(target->scsi_host->host_lock);
-
if (target->state == SRP_TARGET_DEAD ||
- target->state == SRP_TARGET_REMOVED) {
- req->scmnd->result = DID_BAD_TARGET << 16;
- goto out;
- }
+ target->state == SRP_TARGET_REMOVED)
+ return -1;
- init_completion(&req->done);
+ init_completion(&target->tsk_mgmt_done);
+ spin_lock_irq(&target->lock);
iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
+ spin_unlock_irq(&target->lock);
+
if (!iu)
- goto out;
+ return -1;
ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
@@ -1468,70 +1463,46 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
tsk_mgmt->opcode = SRP_TSK_MGMT;
- tsk_mgmt->lun = cpu_to_be64((u64) req->scmnd->device->lun << 48);
- tsk_mgmt->tag = req->index | SRP_TAG_TSK_MGMT;
+ tsk_mgmt->lun = cpu_to_be64((u64) lun << 48);
+ tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
tsk_mgmt->tsk_mgmt_func = func;
- tsk_mgmt->task_tag = req->index;
+ tsk_mgmt->task_tag = req_tag;
ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
- if (__srp_post_send(target, iu, sizeof *tsk_mgmt))
- goto out;
-
- req->tsk_mgmt = iu;
-
- spin_unlock_irq(target->scsi_host->host_lock);
-
- if (!wait_for_completion_timeout(&req->done,
- msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
+ if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
+ srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
return -1;
+ }
- return 0;
-
-out:
- spin_unlock_irq(target->scsi_host->host_lock);
- return -1;
-}
-
-static int srp_find_req(struct srp_target_port *target,
- struct scsi_cmnd *scmnd,
- struct srp_request **req)
-{
- if (scmnd->host_scribble == (void *) -1L)
+ if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
+ msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
return -1;
- *req = &target->req_ring[(long) scmnd->host_scribble];
-
return 0;
}
static int srp_abort(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
- struct srp_request *req;
+ struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
int ret = SUCCESS;
shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
- if (target->qp_in_error)
+ if (!req || target->qp_in_error)
return FAILED;
- if (srp_find_req(target, scmnd, &req))
+ if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
+ SRP_TSK_ABORT_TASK))
return FAILED;
- if (srp_send_tsk_mgmt(target, req, SRP_TSK_ABORT_TASK))
- return FAILED;
-
- spin_lock_irq(target->scsi_host->host_lock);
- if (req->cmd_done) {
- srp_remove_req(target, req);
- scmnd->scsi_done(scmnd);
- } else if (!req->tsk_status) {
- srp_remove_req(target, req);
- scmnd->result = DID_ABORT << 16;
- } else
- ret = FAILED;
-
- spin_unlock_irq(target->scsi_host->host_lock);
+ if (req->scmnd) {
+ if (!target->tsk_mgmt_status) {
+ srp_remove_req(target, req, 0);
+ scmnd->result = DID_ABORT << 16;
+ } else
+ ret = FAILED;
+ }
return ret;
}
@@ -1539,26 +1510,23 @@ static int srp_abort(struct scsi_cmnd *scmnd)
static int srp_reset_device(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
- struct srp_request *req, *tmp;
+ int i;
shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
if (target->qp_in_error)
return FAILED;
- if (srp_find_req(target, scmnd, &req))
+ if (srp_send_tsk_mgmt(target, SRP_TAG_NO_REQ, scmnd->device->lun,
+ SRP_TSK_LUN_RESET))
return FAILED;
- if (srp_send_tsk_mgmt(target, req, SRP_TSK_LUN_RESET))
+ if (target->tsk_mgmt_status)
return FAILED;
- if (req->tsk_status)
- return FAILED;
-
- spin_lock_irq(target->scsi_host->host_lock);
- list_for_each_entry_safe(req, tmp, &target->req_queue, list)
- if (req->scmnd->device == scmnd->device)
+ for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
+ struct srp_request *req = &target->req_ring[i];
+ if (req->scmnd && req->scmnd->device == scmnd->device)
srp_reset_req(target, req);
-
- spin_unlock_irq(target->scsi_host->host_lock);
+ }
return SUCCESS;
}
@@ -1987,9 +1955,12 @@ static ssize_t srp_create_target(struct device *dev,
target->io_class = SRP_REV16A_IB_IO_CLASS;
target->scsi_host = target_host;
target->srp_host = host;
+ target->lkey = host->srp_dev->mr->lkey;
+ target->rkey = host->srp_dev->mr->rkey;
+ spin_lock_init(&target->lock);
+ INIT_LIST_HEAD(&target->free_tx);
INIT_LIST_HEAD(&target->free_reqs);
- INIT_LIST_HEAD(&target->req_queue);
for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
target->req_ring[i].index = i;
list_add_tail(&target->req_ring[i].list, &target->free_reqs);
@@ -2217,9 +2188,9 @@ static void srp_remove_one(struct ib_device *device)
*/
spin_lock(&host->target_lock);
list_for_each_entry(target, &host->target_list, list) {
- spin_lock_irq(target->scsi_host->host_lock);
+ spin_lock_irq(&target->lock);
target->state = SRP_TARGET_REMOVED;
- spin_unlock_irq(target->scsi_host->host_lock);
+ spin_unlock_irq(&target->lock);
}
spin_unlock(&host->target_lock);
@@ -2258,8 +2229,7 @@ static int __init srp_init_module(void)
{
int ret;
- BUILD_BUG_ON_NOT_POWER_OF_2(SRP_SQ_SIZE);
- BUILD_BUG_ON_NOT_POWER_OF_2(SRP_RQ_SIZE);
+ BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
if (srp_sg_tablesize > 255) {
printk(KERN_WARNING PFX "Clamping srp_sg_tablesize to 255\n");
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index ed0dce9..9dc6fc3 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -59,16 +59,15 @@ enum {
SRP_RQ_SHIFT = 6,
SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT,
- SRP_RQ_MASK = SRP_RQ_SIZE - 1,
SRP_SQ_SIZE = SRP_RQ_SIZE,
- SRP_SQ_MASK = SRP_SQ_SIZE - 1,
SRP_RSP_SQ_SIZE = 1,
SRP_REQ_SQ_SIZE = SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,
SRP_TSK_MGMT_SQ_SIZE = 1,
SRP_CMD_SQ_SIZE = SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE,
- SRP_TAG_TSK_MGMT = 1 << (SRP_RQ_SHIFT + 1),
+ SRP_TAG_NO_REQ = ~0U,
+ SRP_TAG_TSK_MGMT = 1U << 31,
SRP_FMR_SIZE = 256,
SRP_FMR_POOL_SIZE = 1024,
@@ -113,15 +112,29 @@ struct srp_request {
struct list_head list;
struct scsi_cmnd *scmnd;
struct srp_iu *cmd;
- struct srp_iu *tsk_mgmt;
struct ib_pool_fmr *fmr;
- struct completion done;
short index;
- u8 cmd_done;
- u8 tsk_status;
};
struct srp_target_port {
+ /* These are RW in the hot path, and commonly used together */
+ struct list_head free_tx;
+ struct list_head free_reqs;
+ spinlock_t lock;
+ s32 req_lim;
+
+ /* These are read-only in the hot path */
+ struct ib_cq *send_cq ____cacheline_aligned_in_smp;
+ struct ib_cq *recv_cq;
+ struct ib_qp *qp;
+ u32 lkey;
+ u32 rkey;
+ enum srp_target_state state;
+
+ /* Everything above this point is used in the hot path of
+ * command processing. Try to keep them packed into cachelines.
+ */
+
__be64 id_ext;
__be64 ioc_guid;
__be64 service_id;
@@ -138,24 +151,13 @@ struct srp_target_port {
int path_query_id;
struct ib_cm_id *cm_id;
- struct ib_cq *recv_cq;
- struct ib_cq *send_cq;
- struct ib_qp *qp;
int max_ti_iu_len;
- s32 req_lim;
int zero_req_lim;
- unsigned rx_head;
- struct srp_iu *rx_ring[SRP_RQ_SIZE];
-
- unsigned tx_head;
- unsigned tx_tail;
struct srp_iu *tx_ring[SRP_SQ_SIZE];
-
- struct list_head free_reqs;
- struct list_head req_queue;
+ struct srp_iu *rx_ring[SRP_RQ_SIZE];
struct srp_request req_ring[SRP_CMD_SQ_SIZE];
struct work_struct work;
@@ -163,16 +165,18 @@ struct srp_target_port {
struct list_head list;
struct completion done;
int status;
- enum srp_target_state state;
int qp_in_error;
+
+ struct completion tsk_mgmt_done;
+ u8 tsk_mgmt_status;
};
struct srp_iu {
+ struct list_head list;
u64 dma;
void *buf;
size_t size;
enum dma_data_direction direction;
- enum srp_iu_type type;
};
#endif /* IB_SRP_H */
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index b6e7ddc..4daf9e5 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -387,11 +387,10 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
/* Set the DMA ops to the ones from the PCI device, this could be
* fishy if we didn't know that on PowerMac it's always direct ops
* or iommu ops that will work fine
+ *
+ * To get all the fields, copy all archdata
*/
- dev->ofdev.dev.archdata.dma_ops =
- chip->lbus.pdev->dev.archdata.dma_ops;
- dev->ofdev.dev.archdata.dma_data =
- chip->lbus.pdev->dev.archdata.dma_data;
+ dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata;
#endif /* CONFIG_PCI */
#ifdef DEBUG
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index 4454927..2e041fd 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -2213,6 +2213,9 @@ static void fcu_lookup_fans(struct device_node *fcu_node)
static int fcu_of_probe(struct platform_device* dev, const struct of_device_id *match)
{
state = state_detached;
+ of_dev = dev;
+
+ dev_info(&dev->dev, "PowerMac G5 Thermal control driver %s\n", VERSION);
/* Lookup the fans in the device tree */
fcu_lookup_fans(dev->dev.of_node);
@@ -2235,6 +2238,7 @@ static const struct of_device_id fcu_match[] =
},
{},
};
+MODULE_DEVICE_TABLE(of, fcu_match);
static struct of_platform_driver fcu_of_platform_driver =
{
@@ -2252,8 +2256,6 @@ static struct of_platform_driver fcu_of_platform_driver =
*/
static int __init therm_pm72_init(void)
{
- struct device_node *np;
-
rackmac = of_machine_is_compatible("RackMac3,1");
if (!of_machine_is_compatible("PowerMac7,2") &&
@@ -2261,34 +2263,12 @@ static int __init therm_pm72_init(void)
!rackmac)
return -ENODEV;
- printk(KERN_INFO "PowerMac G5 Thermal control driver %s\n", VERSION);
-
- np = of_find_node_by_type(NULL, "fcu");
- if (np == NULL) {
- /* Some machines have strangely broken device-tree */
- np = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/fan@15e");
- if (np == NULL) {
- printk(KERN_ERR "Can't find FCU in device-tree !\n");
- return -ENODEV;
- }
- }
- of_dev = of_platform_device_create(np, "temperature", NULL);
- if (of_dev == NULL) {
- printk(KERN_ERR "Can't register FCU platform device !\n");
- return -ENODEV;
- }
-
- of_register_platform_driver(&fcu_of_platform_driver);
-
- return 0;
+ return of_register_platform_driver(&fcu_of_platform_driver);
}
static void __exit therm_pm72_exit(void)
{
of_unregister_platform_driver(&fcu_of_platform_driver);
-
- if (of_dev)
- of_device_unregister(of_dev);
}
module_init(therm_pm72_init);
diff --git a/drivers/mfd/sh_mobile_sdhi.c b/drivers/mfd/sh_mobile_sdhi.c
index f1714f9..0a7df44 100644
--- a/drivers/mfd/sh_mobile_sdhi.c
+++ b/drivers/mfd/sh_mobile_sdhi.c
@@ -131,11 +131,17 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
*/
mmc_data->flags |= TMIO_MMC_BLKSZ_2BYTES;
+ /*
+ * All SDHI blocks support SDIO IRQ signalling.
+ */
+ mmc_data->flags |= TMIO_MMC_SDIO_IRQ;
+
if (p && p->dma_slave_tx >= 0 && p->dma_slave_rx >= 0) {
priv->param_tx.slave_id = p->dma_slave_tx;
priv->param_rx.slave_id = p->dma_slave_rx;
priv->dma_priv.chan_priv_tx = &priv->param_tx;
priv->dma_priv.chan_priv_rx = &priv->param_rx;
+ priv->dma_priv.alignment_shift = 1; /* 2-byte alignment */
mmc_data->dma = &priv->dma_priv;
}
diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig
index 57e4416..2a876c4 100644
--- a/drivers/mmc/card/Kconfig
+++ b/drivers/mmc/card/Kconfig
@@ -16,6 +16,7 @@ config MMC_BLOCK
config MMC_BLOCK_MINORS
int "Number of minors per block device"
+ depends on MMC_BLOCK
range 4 256
default 8
help
diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index bb22ffd..ef10387 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -16,3 +16,14 @@ config MMC_UNSAFE_RESUME
This option sets a default which can be overridden by the
module parameter "removable=0" or "removable=1".
+
+config MMC_CLKGATE
+ bool "MMC host clock gating (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ This will attempt to aggressively gate the clock to the MMC card.
+ This is done to save power due to gating off the logic and bus
+ noise when the MMC card is not in use. Your host driver has to
+ support handling this in order for it to be of any use.
+
+ If unsure, say N.
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index af8dc6a..63667a8 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -303,14 +303,14 @@ int mmc_add_card(struct mmc_card *card)
type, card->rca);
}
- ret = device_add(&card->dev);
- if (ret)
- return ret;
-
#ifdef CONFIG_DEBUG_FS
mmc_add_card_debugfs(card);
#endif
+ ret = device_add(&card->dev);
+ if (ret)
+ return ret;
+
mmc_card_set_present(card);
return 0;
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index a3a780f..6625c05 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -22,6 +22,7 @@
#include <linux/scatterlist.h>
#include <linux/log2.h>
#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
#include <linux/mmc/card.h>
#include <linux/mmc/host.h>
@@ -130,6 +131,8 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
if (mrq->done)
mrq->done(mrq);
+
+ mmc_host_clk_gate(host);
}
}
@@ -190,6 +193,7 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
mrq->stop->mrq = mrq;
}
}
+ mmc_host_clk_ungate(host);
host->ops->request(host, mrq);
}
@@ -295,8 +299,9 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card)
unsigned int timeout_us, limit_us;
timeout_us = data->timeout_ns / 1000;
- timeout_us += data->timeout_clks * 1000 /
- (card->host->ios.clock / 1000);
+ if (mmc_host_clk_rate(card->host))
+ timeout_us += data->timeout_clks * 1000 /
+ (mmc_host_clk_rate(card->host) / 1000);
if (data->flags & MMC_DATA_WRITE)
/*
@@ -614,6 +619,8 @@ static inline void mmc_set_ios(struct mmc_host *host)
ios->power_mode, ios->chip_select, ios->vdd,
ios->bus_width, ios->timing);
+ if (ios->clock > 0)
+ mmc_set_ungated(host);
host->ops->set_ios(host, ios);
}
@@ -641,6 +648,61 @@ void mmc_set_clock(struct mmc_host *host, unsigned int hz)
mmc_set_ios(host);
}
+#ifdef CONFIG_MMC_CLKGATE
+/*
+ * This gates the clock by setting it to 0 Hz.
+ */
+void mmc_gate_clock(struct mmc_host *host)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->clk_lock, flags);
+ host->clk_old = host->ios.clock;
+ host->ios.clock = 0;
+ host->clk_gated = true;
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+ mmc_set_ios(host);
+}
+
+/*
+ * This restores the clock from gating by using the cached
+ * clock value.
+ */
+void mmc_ungate_clock(struct mmc_host *host)
+{
+ /*
+ * We should previously have gated the clock, so the clock shall
+ * be 0 here! The clock may however be 0 during initialization,
+ * when some request operations are performed before setting
+ * the frequency. When ungate is requested in that situation
+ * we just ignore the call.
+ */
+ if (host->clk_old) {
+ BUG_ON(host->ios.clock);
+ /* This call will also set host->clk_gated to false */
+ mmc_set_clock(host, host->clk_old);
+ }
+}
+
+void mmc_set_ungated(struct mmc_host *host)
+{
+ unsigned long flags;
+
+ /*
+ * We've been given a new frequency while the clock is gated,
+ * so make sure we regard this as ungating it.
+ */
+ spin_lock_irqsave(&host->clk_lock, flags);
+ host->clk_gated = false;
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+}
+
+#else
+void mmc_set_ungated(struct mmc_host *host)
+{
+}
+#endif
+
/*
* Change the bus mode (open drain/push-pull) of a host.
*/
@@ -1424,35 +1486,57 @@ int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
}
EXPORT_SYMBOL(mmc_set_blocklen);
+static int mmc_rescan_try_freq(struct mmc_host *host, unsigned freq)
+{
+ host->f_init = freq;
+
+#ifdef CONFIG_MMC_DEBUG
+ pr_info("%s: %s: trying to init card at %u Hz\n",
+ mmc_hostname(host), __func__, host->f_init);
+#endif
+ mmc_power_up(host);
+ sdio_reset(host);
+ mmc_go_idle(host);
+
+ mmc_send_if_cond(host, host->ocr_avail);
+
+ /* Order's important: probe SDIO, then SD, then MMC */
+ if (!mmc_attach_sdio(host))
+ return 0;
+ if (!mmc_attach_sd(host))
+ return 0;
+ if (!mmc_attach_mmc(host))
+ return 0;
+
+ mmc_power_off(host);
+ return -EIO;
+}
+
void mmc_rescan(struct work_struct *work)
{
+ static const unsigned freqs[] = { 400000, 300000, 200000, 100000 };
struct mmc_host *host =
container_of(work, struct mmc_host, detect.work);
- u32 ocr;
- int err;
- unsigned long flags;
int i;
- const unsigned freqs[] = { 400000, 300000, 200000, 100000 };
-
- spin_lock_irqsave(&host->lock, flags);
- if (host->rescan_disable) {
- spin_unlock_irqrestore(&host->lock, flags);
+ if (host->rescan_disable)
return;
- }
-
- spin_unlock_irqrestore(&host->lock, flags);
-
mmc_bus_get(host);
- /* if there is a card registered, check whether it is still present */
- if ((host->bus_ops != NULL) && host->bus_ops->detect && !host->bus_dead)
+ /*
+ * if there is a _removable_ card registered, check whether it is
+ * still present
+ */
+ if (host->bus_ops && host->bus_ops->detect && !host->bus_dead
+ && mmc_card_is_removable(host))
host->bus_ops->detect(host);
+ /*
+ * Let mmc_bus_put() free the bus/bus_ops if we've found that
+ * the card is no longer present.
+ */
mmc_bus_put(host);
-
-
mmc_bus_get(host);
/* if there still is a card present, stop here */
@@ -1461,8 +1545,6 @@ void mmc_rescan(struct work_struct *work)
goto out;
}
- /* detect a newly inserted card */
-
/*
* Only we can add a new handler, so it's safe to
* release the lock here.
@@ -1472,72 +1554,16 @@ void mmc_rescan(struct work_struct *work)
if (host->ops->get_cd && host->ops->get_cd(host) == 0)
goto out;
+ mmc_claim_host(host);
for (i = 0; i < ARRAY_SIZE(freqs); i++) {
- mmc_claim_host(host);
-
- if (freqs[i] >= host->f_min)
- host->f_init = freqs[i];
- else if (!i || freqs[i-1] > host->f_min)
- host->f_init = host->f_min;
- else {
- mmc_release_host(host);
- goto out;
- }
-#ifdef CONFIG_MMC_DEBUG
- pr_info("%s: %s: trying to init card at %u Hz\n",
- mmc_hostname(host), __func__, host->f_init);
-#endif
- mmc_power_up(host);
- sdio_reset(host);
- mmc_go_idle(host);
-
- mmc_send_if_cond(host, host->ocr_avail);
-
- /*
- * First we search for SDIO...
- */
- err = mmc_send_io_op_cond(host, 0, &ocr);
- if (!err) {
- if (mmc_attach_sdio(host, ocr)) {
- mmc_claim_host(host);
- /*
- * Try SDMEM (but not MMC) even if SDIO
- * is broken.
- */
- if (mmc_send_app_op_cond(host, 0, &ocr))
- goto out_fail;
-
- if (mmc_attach_sd(host, ocr))
- mmc_power_off(host);
- }
- goto out;
- }
-
- /*
- * ...then normal SD...
- */
- err = mmc_send_app_op_cond(host, 0, &ocr);
- if (!err) {
- if (mmc_attach_sd(host, ocr))
- mmc_power_off(host);
- goto out;
- }
-
- /*
- * ...and finally MMC.
- */
- err = mmc_send_op_cond(host, 0, &ocr);
- if (!err) {
- if (mmc_attach_mmc(host, ocr))
- mmc_power_off(host);
- goto out;
- }
-
-out_fail:
- mmc_release_host(host);
- mmc_power_off(host);
+ if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min)))
+ break;
+ if (freqs[i] < host->f_min)
+ break;
}
-out:
+ mmc_release_host(host);
+
+ out:
if (host->caps & MMC_CAP_NEEDS_POLL)
mmc_schedule_delayed_work(&host->detect, HZ);
}
@@ -1721,6 +1747,18 @@ int mmc_resume_host(struct mmc_host *host)
if (!(host->pm_flags & MMC_PM_KEEP_POWER)) {
mmc_power_up(host);
mmc_select_voltage(host, host->ocr);
+ /*
+ * Tell runtime PM core we just powered up the card,
+ * since it still believes the card is powered off.
+ * Note that currently runtime PM is only enabled
+ * for SDIO cards that are MMC_CAP_POWER_OFF_CARD
+ */
+ if (mmc_card_sdio(host->card) &&
+ (host->caps & MMC_CAP_POWER_OFF_CARD)) {
+ pm_runtime_disable(&host->card->dev);
+ pm_runtime_set_active(&host->card->dev);
+ pm_runtime_enable(&host->card->dev);
+ }
}
BUG_ON(!host->bus_ops->resume);
err = host->bus_ops->resume(host);
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 77240cd..ca1fdde 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -33,6 +33,9 @@ void mmc_init_erase(struct mmc_card *card);
void mmc_set_chip_select(struct mmc_host *host, int mode);
void mmc_set_clock(struct mmc_host *host, unsigned int hz);
+void mmc_gate_clock(struct mmc_host *host);
+void mmc_ungate_clock(struct mmc_host *host);
+void mmc_set_ungated(struct mmc_host *host);
void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode);
void mmc_set_bus_width(struct mmc_host *host, unsigned int width);
void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
@@ -54,9 +57,9 @@ void mmc_rescan(struct work_struct *work);
void mmc_start_host(struct mmc_host *host);
void mmc_stop_host(struct mmc_host *host);
-int mmc_attach_mmc(struct mmc_host *host, u32 ocr);
-int mmc_attach_sd(struct mmc_host *host, u32 ocr);
-int mmc_attach_sdio(struct mmc_host *host, u32 ocr);
+int mmc_attach_mmc(struct mmc_host *host);
+int mmc_attach_sd(struct mmc_host *host);
+int mmc_attach_sdio(struct mmc_host *host);
/* Module parameters */
extern int use_spi_crc;
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index eed1405..998797e 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -183,6 +183,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
&mmc_clock_fops))
goto err_node;
+#ifdef CONFIG_MMC_CLKGATE
+ if (!debugfs_create_u32("clk_delay", (S_IRUSR | S_IWUSR),
+ root, &host->clk_delay))
+ goto err_node;
+#endif
return;
err_node:
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 10b8af2..b3ac6c5 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 2003 Russell King, All Rights Reserved.
* Copyright (C) 2007-2008 Pierre Ossman
+ * Copyright (C) 2010 Linus Walleij
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -20,6 +21,7 @@
#include <linux/suspend.h>
#include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
#include "core.h"
#include "host.h"
@@ -50,6 +52,205 @@ void mmc_unregister_host_class(void)
static DEFINE_IDR(mmc_host_idr);
static DEFINE_SPINLOCK(mmc_host_lock);
+#ifdef CONFIG_MMC_CLKGATE
+
+/*
+ * Enabling clock gating will make the core call out to the host
+ * once up and once down when it performs a request or card operation
+ * intermingled in any fashion. The driver will see this through
+ * set_ios() operations with ios.clock field set to 0 to gate (disable)
+ * the block clock, and to the old frequency to enable it again.
+ */
+static void mmc_host_clk_gate_delayed(struct mmc_host *host)
+{
+ unsigned long tick_ns;
+ unsigned long freq = host->ios.clock;
+ unsigned long flags;
+
+ if (!freq) {
+ pr_debug("%s: frequency set to 0 in disable function, "
+ "this means the clock is already disabled.\n",
+ mmc_hostname(host));
+ return;
+ }
+ /*
+ * New requests may have appeared while we were scheduling,
+ * then there is no reason to delay the check before
+ * clk_disable().
+ */
+ spin_lock_irqsave(&host->clk_lock, flags);
+
+ /*
+ * Delay n bus cycles (at least 8 from MMC spec) before attempting
+ * to disable the MCI block clock. The reference count may have
+ * gone up again after this delay due to rescheduling!
+ */
+ if (!host->clk_requests) {
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+ tick_ns = DIV_ROUND_UP(1000000000, freq);
+ ndelay(host->clk_delay * tick_ns);
+ } else {
+ /* New users appeared while waiting for this work */
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+ return;
+ }
+ mutex_lock(&host->clk_gate_mutex);
+ spin_lock_irqsave(&host->clk_lock, flags);
+ if (!host->clk_requests) {
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+ /* This will set host->ios.clock to 0 */
+ mmc_gate_clock(host);
+ spin_lock_irqsave(&host->clk_lock, flags);
+ pr_debug("%s: gated MCI clock\n", mmc_hostname(host));
+ }
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+ mutex_unlock(&host->clk_gate_mutex);
+}
+
+/*
+ * Internal work. Work to disable the clock at some later point.
+ */
+static void mmc_host_clk_gate_work(struct work_struct *work)
+{
+ struct mmc_host *host = container_of(work, struct mmc_host,
+ clk_gate_work);
+
+ mmc_host_clk_gate_delayed(host);
+}
+
+/**
+ * mmc_host_clk_ungate - ungate hardware MCI clocks
+ * @host: host to ungate.
+ *
+ * Makes sure the host ios.clock is restored to a non-zero value
+ * past this call. Increase clock reference count and ungate clock
+ * if we're the first user.
+ */
+void mmc_host_clk_ungate(struct mmc_host *host)
+{
+ unsigned long flags;
+
+ mutex_lock(&host->clk_gate_mutex);
+ spin_lock_irqsave(&host->clk_lock, flags);
+ if (host->clk_gated) {
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+ mmc_ungate_clock(host);
+ spin_lock_irqsave(&host->clk_lock, flags);
+ pr_debug("%s: ungated MCI clock\n", mmc_hostname(host));
+ }
+ host->clk_requests++;
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+ mutex_unlock(&host->clk_gate_mutex);
+}
+
+/**
+ * mmc_host_may_gate_card - check if this card may be gated
+ * @card: card to check.
+ */
+static bool mmc_host_may_gate_card(struct mmc_card *card)
+{
+ /* If there is no card we may gate it */
+ if (!card)
+ return true;
+ /*
+ * Don't gate SDIO cards! These need to be clocked at all times
+ * since they may be independent systems generating interrupts
+ * and other events. The clock requests counter from the core will
+ * go down to zero since the core does not need it, but we will not
+ * gate the clock, because there is somebody out there that may still
+ * be using it.
+ */
+ if (mmc_card_sdio(card))
+ return false;
+
+ return true;
+}
+
+/**
+ * mmc_host_clk_gate - gate off hardware MCI clocks
+ * @host: host to gate.
+ *
+ * Calls the host driver with ios.clock set to zero as often as possible
+ * in order to gate off hardware MCI clocks. Decrease clock reference
+ * count and schedule disabling of clock.
+ */
+void mmc_host_clk_gate(struct mmc_host *host)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->clk_lock, flags);
+ host->clk_requests--;
+ if (mmc_host_may_gate_card(host->card) &&
+ !host->clk_requests)
+ schedule_work(&host->clk_gate_work);
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+}
+
+/**
+ * mmc_host_clk_rate - get current clock frequency setting
+ * @host: host to get the clock frequency for.
+ *
+ * Returns current clock frequency regardless of gating.
+ */
+unsigned int mmc_host_clk_rate(struct mmc_host *host)
+{
+ unsigned long freq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->clk_lock, flags);
+ if (host->clk_gated)
+ freq = host->clk_old;
+ else
+ freq = host->ios.clock;
+ spin_unlock_irqrestore(&host->clk_lock, flags);
+ return freq;
+}
+
+/**
+ * mmc_host_clk_init - set up clock gating code
+ * @host: host with potential clock to control
+ */
+static inline void mmc_host_clk_init(struct mmc_host *host)
+{
+ host->clk_requests = 0;
+ /* Hold MCI clock for 8 cycles by default */
+ host->clk_delay = 8;
+ host->clk_gated = false;
+ INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
+ spin_lock_init(&host->clk_lock);
+ mutex_init(&host->clk_gate_mutex);
+}
+
+/**
+ * mmc_host_clk_exit - shut down clock gating code
+ * @host: host with potential clock to control
+ */
+static inline void mmc_host_clk_exit(struct mmc_host *host)
+{
+ /*
+ * Wait for any outstanding gate and then make sure we're
+ * ungated before exiting.
+ */
+ if (cancel_work_sync(&host->clk_gate_work))
+ mmc_host_clk_gate_delayed(host);
+ if (host->clk_gated)
+ mmc_host_clk_ungate(host);
+ /* There should be only one user now */
+ WARN_ON(host->clk_requests > 1);
+}
+
+#else
+
+static inline void mmc_host_clk_init(struct mmc_host *host)
+{
+}
+
+static inline void mmc_host_clk_exit(struct mmc_host *host)
+{
+}
+
+#endif
+
/**
* mmc_alloc_host - initialise the per-host structure.
* @extra: sizeof private data structure
@@ -82,6 +283,8 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
host->class_dev.class = &mmc_host_class;
device_initialize(&host->class_dev);
+ mmc_host_clk_init(host);
+
spin_lock_init(&host->lock);
init_waitqueue_head(&host->wq);
INIT_DELAYED_WORK(&host->detect, mmc_rescan);
@@ -163,6 +366,8 @@ void mmc_remove_host(struct mmc_host *host)
device_del(&host->class_dev);
led_trigger_unregister_simple(host->led);
+
+ mmc_host_clk_exit(host);
}
EXPORT_SYMBOL(mmc_remove_host);
@@ -183,4 +388,3 @@ void mmc_free_host(struct mmc_host *host)
}
EXPORT_SYMBOL(mmc_free_host);
-
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index 8c87e11..de199f9 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -10,10 +10,31 @@
*/
#ifndef _MMC_CORE_HOST_H
#define _MMC_CORE_HOST_H
+#include <linux/mmc/host.h>
int mmc_register_host_class(void);
void mmc_unregister_host_class(void);
+#ifdef CONFIG_MMC_CLKGATE
+void mmc_host_clk_ungate(struct mmc_host *host);
+void mmc_host_clk_gate(struct mmc_host *host);
+unsigned int mmc_host_clk_rate(struct mmc_host *host);
+
+#else
+static inline void mmc_host_clk_ungate(struct mmc_host *host)
+{
+}
+
+static inline void mmc_host_clk_gate(struct mmc_host *host)
+{
+}
+
+static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
+{
+ return host->ios.clock;
+}
+#endif
+
void mmc_host_deeper_disable(struct work_struct *work);
#endif
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 77f93c3..16006ef 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -534,39 +534,57 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
*/
if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
(host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
- unsigned ext_csd_bit, bus_width;
-
- if (host->caps & MMC_CAP_8_BIT_DATA) {
- if (ddr)
- ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_8;
- else
- ext_csd_bit = EXT_CSD_BUS_WIDTH_8;
- bus_width = MMC_BUS_WIDTH_8;
- } else {
- if (ddr)
- ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_4;
- else
- ext_csd_bit = EXT_CSD_BUS_WIDTH_4;
- bus_width = MMC_BUS_WIDTH_4;
+ static unsigned ext_csd_bits[][2] = {
+ { EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 },
+ { EXT_CSD_BUS_WIDTH_4, EXT_CSD_DDR_BUS_WIDTH_4 },
+ { EXT_CSD_BUS_WIDTH_1, EXT_CSD_BUS_WIDTH_1 },
+ };
+ static unsigned bus_widths[] = {
+ MMC_BUS_WIDTH_8,
+ MMC_BUS_WIDTH_4,
+ MMC_BUS_WIDTH_1
+ };
+ unsigned idx, bus_width = 0;
+
+ if (host->caps & MMC_CAP_8_BIT_DATA)
+ idx = 0;
+ else
+ idx = 1;
+ for (; idx < ARRAY_SIZE(bus_widths); idx++) {
+ bus_width = bus_widths[idx];
+ if (bus_width == MMC_BUS_WIDTH_1)
+ ddr = 0; /* no DDR for 1-bit width */
+ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+ EXT_CSD_BUS_WIDTH,
+ ext_csd_bits[idx][0]);
+ if (!err) {
+ mmc_set_bus_width_ddr(card->host,
+ bus_width, MMC_SDR_MODE);
+ /*
+ * If controller can't handle bus width test,
+ * use the highest bus width to maintain
+ * compatibility with previous MMC behavior.
+ */
+ if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
+ break;
+ err = mmc_bus_test(card, bus_width);
+ if (!err)
+ break;
+ }
}
- err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
- EXT_CSD_BUS_WIDTH, ext_csd_bit);
-
- if (err && err != -EBADMSG)
- goto free_card;
-
+ if (!err && ddr) {
+ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+ EXT_CSD_BUS_WIDTH,
+ ext_csd_bits[idx][1]);
+ }
if (err) {
printk(KERN_WARNING "%s: switch to bus width %d ddr %d "
- "failed\n", mmc_hostname(card->host),
- 1 << bus_width, ddr);
- err = 0;
- } else {
- if (ddr)
- mmc_card_set_ddr_mode(card);
- else
- ddr = MMC_SDR_MODE;
-
+ "failed\n", mmc_hostname(card->host),
+ 1 << bus_width, ddr);
+ goto free_card;
+ } else if (ddr) {
+ mmc_card_set_ddr_mode(card);
mmc_set_bus_width_ddr(card->host, bus_width, ddr);
}
}
@@ -737,14 +755,21 @@ static void mmc_attach_bus_ops(struct mmc_host *host)
/*
* Starting point for MMC card init.
*/
-int mmc_attach_mmc(struct mmc_host *host, u32 ocr)
+int mmc_attach_mmc(struct mmc_host *host)
{
int err;
+ u32 ocr;
BUG_ON(!host);
WARN_ON(!host->claimed);
+ err = mmc_send_op_cond(host, 0, &ocr);
+ if (err)
+ return err;
+
mmc_attach_bus_ops(host);
+ if (host->ocr_avail_mmc)
+ host->ocr_avail = host->ocr_avail_mmc;
/*
* We need to get OCR a different way for SPI.
@@ -784,20 +809,20 @@ int mmc_attach_mmc(struct mmc_host *host, u32 ocr)
goto err;
mmc_release_host(host);
-
err = mmc_add_card(host->card);
+ mmc_claim_host(host);
if (err)
goto remove_card;
return 0;
remove_card:
+ mmc_release_host(host);
mmc_remove_card(host->card);
- host->card = NULL;
mmc_claim_host(host);
+ host->card = NULL;
err:
mmc_detach_bus(host);
- mmc_release_host(host);
printk(KERN_ERR "%s: error %d whilst initialising MMC card\n",
mmc_hostname(host), err);
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 326447c..60842f8 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -462,3 +462,104 @@ int mmc_send_status(struct mmc_card *card, u32 *status)
return 0;
}
+static int
+mmc_send_bus_test(struct mmc_card *card, struct mmc_host *host, u8 opcode,
+ u8 len)
+{
+ struct mmc_request mrq;
+ struct mmc_command cmd;
+ struct mmc_data data;
+ struct scatterlist sg;
+ u8 *data_buf;
+ u8 *test_buf;
+ int i, err;
+ static u8 testdata_8bit[8] = { 0x55, 0xaa, 0, 0, 0, 0, 0, 0 };
+ static u8 testdata_4bit[4] = { 0x5a, 0, 0, 0 };
+
+ /* dma onto stack is unsafe/nonportable, but callers to this
+ * routine normally provide temporary on-stack buffers ...
+ */
+ data_buf = kmalloc(len, GFP_KERNEL);
+ if (!data_buf)
+ return -ENOMEM;
+
+ if (len == 8)
+ test_buf = testdata_8bit;
+ else if (len == 4)
+ test_buf = testdata_4bit;
+ else {
+ printk(KERN_ERR "%s: Invalid bus_width %d\n",
+ mmc_hostname(host), len);
+ kfree(data_buf);
+ return -EINVAL;
+ }
+
+ if (opcode == MMC_BUS_TEST_W)
+ memcpy(data_buf, test_buf, len);
+
+ memset(&mrq, 0, sizeof(struct mmc_request));
+ memset(&cmd, 0, sizeof(struct mmc_command));
+ memset(&data, 0, sizeof(struct mmc_data));
+
+ mrq.cmd = &cmd;
+ mrq.data = &data;
+ cmd.opcode = opcode;
+ cmd.arg = 0;
+
+ /* NOTE HACK: the MMC_RSP_SPI_R1 is always correct here, but we
+ * rely on callers to never use this with "native" calls for reading
+ * CSD or CID. Native versions of those commands use the R2 type,
+ * not R1 plus a data block.
+ */
+ cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
+
+ data.blksz = len;
+ data.blocks = 1;
+ if (opcode == MMC_BUS_TEST_R)
+ data.flags = MMC_DATA_READ;
+ else
+ data.flags = MMC_DATA_WRITE;
+
+ data.sg = &sg;
+ data.sg_len = 1;
+ sg_init_one(&sg, data_buf, len);
+ mmc_wait_for_req(host, &mrq);
+ err = 0;
+ if (opcode == MMC_BUS_TEST_R) {
+ for (i = 0; i < len / 4; i++)
+ if ((test_buf[i] ^ data_buf[i]) != 0xff) {
+ err = -EIO;
+ break;
+ }
+ }
+ kfree(data_buf);
+
+ if (cmd.error)
+ return cmd.error;
+ if (data.error)
+ return data.error;
+
+ return err;
+}
+
+int mmc_bus_test(struct mmc_card *card, u8 bus_width)
+{
+ int err, width;
+
+ if (bus_width == MMC_BUS_WIDTH_8)
+ width = 8;
+ else if (bus_width == MMC_BUS_WIDTH_4)
+ width = 4;
+ else if (bus_width == MMC_BUS_WIDTH_1)
+ return 0; /* no need for test */
+ else
+ return -EINVAL;
+
+ /*
+ * Ignore errors from BUS_TEST_W. BUS_TEST_R will fail if there
+ * is a problem. This improves chances that the test will work.
+ */
+ mmc_send_bus_test(card, card->host, MMC_BUS_TEST_W, width);
+ err = mmc_send_bus_test(card, card->host, MMC_BUS_TEST_R, width);
+ return err;
+}
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 653eb8e..e6d44b8 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -26,6 +26,7 @@ int mmc_send_cid(struct mmc_host *host, u32 *cid);
int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
int mmc_spi_set_crc(struct mmc_host *host, int use_crc);
int mmc_card_sleepawake(struct mmc_host *host, int sleep);
+int mmc_bus_test(struct mmc_card *card, u8 bus_width);
#endif
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 49da4df..d18c32b 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -764,14 +764,21 @@ static void mmc_sd_attach_bus_ops(struct mmc_host *host)
/*
* Starting point for SD card init.
*/
-int mmc_attach_sd(struct mmc_host *host, u32 ocr)
+int mmc_attach_sd(struct mmc_host *host)
{
int err;
+ u32 ocr;
BUG_ON(!host);
WARN_ON(!host->claimed);
+ err = mmc_send_app_op_cond(host, 0, &ocr);
+ if (err)
+ return err;
+
mmc_sd_attach_bus_ops(host);
+ if (host->ocr_avail_sd)
+ host->ocr_avail = host->ocr_avail_sd;
/*
* We need to get OCR a different way for SPI.
@@ -795,7 +802,8 @@ int mmc_attach_sd(struct mmc_host *host, u32 ocr)
ocr &= ~0x7F;
}
- if (ocr & MMC_VDD_165_195) {
+ if ((ocr & MMC_VDD_165_195) &&
+ !(host->ocr_avail_sd & MMC_VDD_165_195)) {
printk(KERN_WARNING "%s: SD card claims to support the "
"incompletely defined 'low voltage range'. This "
"will be ignored.\n", mmc_hostname(host));
@@ -820,20 +828,20 @@ int mmc_attach_sd(struct mmc_host *host, u32 ocr)
goto err;
mmc_release_host(host);
-
err = mmc_add_card(host->card);
+ mmc_claim_host(host);
if (err)
goto remove_card;
return 0;
remove_card:
+ mmc_release_host(host);
mmc_remove_card(host->card);
host->card = NULL;
mmc_claim_host(host);
err:
mmc_detach_bus(host);
- mmc_release_host(host);
printk(KERN_ERR "%s: error %d whilst initialising SD card\n",
mmc_hostname(host), err);
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index efef5f9..5c4a54d 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -627,15 +627,27 @@ static int mmc_sdio_suspend(struct mmc_host *host)
static int mmc_sdio_resume(struct mmc_host *host)
{
- int i, err;
+ int i, err = 0;
BUG_ON(!host);
BUG_ON(!host->card);
/* Basic card reinitialization. */
mmc_claim_host(host);
- err = mmc_sdio_init_card(host, host->ocr, host->card,
+
+ /* No need to reinitialize powered-resumed nonremovable cards */
+ if (mmc_card_is_removable(host) || !mmc_card_is_powered_resumed(host))
+ err = mmc_sdio_init_card(host, host->ocr, host->card,
(host->pm_flags & MMC_PM_KEEP_POWER));
+ else if (mmc_card_is_powered_resumed(host)) {
+ /* We may have switched to 1-bit mode during suspend */
+ err = sdio_enable_4bit_bus(host->card);
+ if (err > 0) {
+ mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
+ err = 0;
+ }
+ }
+
if (!err && host->sdio_irqs)
mmc_signal_sdio_irq(host);
mmc_release_host(host);
@@ -690,16 +702,22 @@ static const struct mmc_bus_ops mmc_sdio_ops = {
/*
* Starting point for SDIO card init.
*/
-int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
+int mmc_attach_sdio(struct mmc_host *host)
{
- int err;
- int i, funcs;
+ int err, i, funcs;
+ u32 ocr;
struct mmc_card *card;
BUG_ON(!host);
WARN_ON(!host->claimed);
+ err = mmc_send_io_op_cond(host, 0, &ocr);
+ if (err)
+ return err;
+
mmc_attach_bus(host, &mmc_sdio_ops);
+ if (host->ocr_avail_sdio)
+ host->ocr_avail = host->ocr_avail_sdio;
/*
* Sanity check the voltages that the card claims to
@@ -769,12 +787,12 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
pm_runtime_enable(&card->sdio_func[i]->dev);
}
- mmc_release_host(host);
-
/*
* First add the card to the driver model...
*/
+ mmc_release_host(host);
err = mmc_add_card(host->card);
+ mmc_claim_host(host);
if (err)
goto remove_added;
@@ -792,15 +810,17 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
remove_added:
/* Remove without lock if the device has been added. */
+ mmc_release_host(host);
mmc_sdio_remove(host);
mmc_claim_host(host);
remove:
/* And with lock if it hasn't been added. */
+ mmc_release_host(host);
if (host->card)
mmc_sdio_remove(host);
+ mmc_claim_host(host);
err:
mmc_detach_bus(host);
- mmc_release_host(host);
printk(KERN_ERR "%s: error %d whilst initialising SDIO card\n",
mmc_hostname(host), err);
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 203da44..d29b9c3 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -197,44 +197,12 @@ out:
#ifdef CONFIG_PM_RUNTIME
-static int sdio_bus_pm_prepare(struct device *dev)
-{
- struct sdio_func *func = dev_to_sdio_func(dev);
-
- /*
- * Resume an SDIO device which was suspended at run time at this
- * point, in order to allow standard SDIO suspend/resume paths
- * to keep working as usual.
- *
- * Ultimately, the SDIO driver itself will decide (in its
- * suspend handler, or lack thereof) whether the card should be
- * removed or kept, and if kept, at what power state.
- *
- * At this point, PM core have increased our use count, so it's
- * safe to directly resume the device. After system is resumed
- * again, PM core will drop back its runtime PM use count, and if
- * needed device will be suspended again.
- *
- * The end result is guaranteed to be a power state that is
- * coherent with the device's runtime PM use count.
- *
- * The return value of pm_runtime_resume is deliberately unchecked
- * since there is little point in failing system suspend if a
- * device can't be resumed.
- */
- if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
- pm_runtime_resume(dev);
-
- return 0;
-}
-
static const struct dev_pm_ops sdio_bus_pm_ops = {
SET_RUNTIME_PM_OPS(
pm_generic_runtime_suspend,
pm_generic_runtime_resume,
pm_generic_runtime_idle
)
- .prepare = sdio_bus_pm_prepare,
};
#define SDIO_PM_OPS_PTR (&sdio_bus_pm_ops)
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index e960a93..c22a4c0 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -142,6 +142,27 @@ config MMC_SDHCI_ESDHC_IMX
If unsure, say N.
+config MMC_SDHCI_DOVE
+ bool "SDHCI support on Marvell's Dove SoC"
+ depends on ARCH_DOVE
+ depends on MMC_SDHCI_PLTFM
+ select MMC_SDHCI_IO_ACCESSORS
+ help
+ This selects the Secure Digital Host Controller Interface in
+ Marvell's Dove SoC.
+
+ If unsure, say N.
+
+config MMC_SDHCI_TEGRA
+ tristate "SDHCI platform support for the Tegra SD/MMC Controller"
+ depends on MMC_SDHCI_PLTFM && ARCH_TEGRA
+ select MMC_SDHCI_IO_ACCESSORS
+ help
+ This selects the Tegra SD/MMC controller. If you have a Tegra
+ platform with SD or MMC devices, say Y or M here.
+
+ If unsure, say N.
+
config MMC_SDHCI_S3C
tristate "SDHCI support on Samsung S3C SoC"
depends on MMC_SDHCI && PLAT_SAMSUNG
@@ -460,6 +481,22 @@ config SDH_BFIN_MISSING_CMD_PULLUP_WORKAROUND
help
If you say yes here SD-Cards may work on the EZkit.
+config MMC_DW
+ tristate "Synopsys DesignWare Memory Card Interface"
+ depends on ARM
+ help
+ This selects support for the Synopsys DesignWare Mobile Storage IP
+ block, this provides host support for SD and MMC interfaces, in both
+ PIO and external DMA modes.
+
+config MMC_DW_IDMAC
+ bool "Internal DMAC interface"
+ depends on MMC_DW
+ help
+ This selects support for the internal DMAC block within the Synopsys
+ Designware Mobile Storage IP block. This disables the external DMA
+ interface.
+
config MMC_SH_MMCIF
tristate "SuperH Internal MMCIF support"
depends on MMC_BLOCK && (SUPERH || ARCH_SHMOBILE)
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 7b645ff..e834fb2 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_MMC_TMIO) += tmio_mmc.o
obj-$(CONFIG_MMC_CB710) += cb710-mmc.o
obj-$(CONFIG_MMC_VIA_SDMMC) += via-sdmmc.o
obj-$(CONFIG_SDH_BFIN) += bfin_sdh.o
+obj-$(CONFIG_MMC_DW) += dw_mmc.o
obj-$(CONFIG_MMC_SH_MMCIF) += sh_mmcif.o
obj-$(CONFIG_MMC_JZ4740) += jz4740_mmc.o
obj-$(CONFIG_MMC_USHC) += ushc.o
@@ -39,6 +40,8 @@ obj-$(CONFIG_MMC_SDHCI_PLTFM) += sdhci-platform.o
sdhci-platform-y := sdhci-pltfm.o
sdhci-platform-$(CONFIG_MMC_SDHCI_CNS3XXX) += sdhci-cns3xxx.o
sdhci-platform-$(CONFIG_MMC_SDHCI_ESDHC_IMX) += sdhci-esdhc-imx.o
+sdhci-platform-$(CONFIG_MMC_SDHCI_DOVE) += sdhci-dove.o
+sdhci-platform-$(CONFIG_MMC_SDHCI_TEGRA) += sdhci-tegra.o
obj-$(CONFIG_MMC_SDHCI_OF) += sdhci-of.o
sdhci-of-y := sdhci-of-core.o
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index e15547c..0076c74 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -66,8 +66,8 @@
#define DAVINCI_MMCBLNC 0x60
#define DAVINCI_SDIOCTL 0x64
#define DAVINCI_SDIOST0 0x68
-#define DAVINCI_SDIOEN 0x6C
-#define DAVINCI_SDIOST 0x70
+#define DAVINCI_SDIOIEN 0x6C
+#define DAVINCI_SDIOIST 0x70
#define DAVINCI_MMCFIFOCTL 0x74 /* FIFO Control Register */
/* DAVINCI_MMCCTL definitions */
@@ -131,6 +131,14 @@
#define MMCFIFOCTL_ACCWD_2 (2 << 3) /* access width of 2 bytes */
#define MMCFIFOCTL_ACCWD_1 (3 << 3) /* access width of 1 byte */
+/* DAVINCI_SDIOST0 definitions */
+#define SDIOST0_DAT1_HI BIT(0)
+
+/* DAVINCI_SDIOIEN definitions */
+#define SDIOIEN_IOINTEN BIT(0)
+
+/* DAVINCI_SDIOIST definitions */
+#define SDIOIST_IOINT BIT(0)
/* MMCSD Init clock in Hz in opendrain mode */
#define MMCSD_INIT_CLOCK 200000
@@ -164,7 +172,7 @@ struct mmc_davinci_host {
unsigned int mmc_input_clk;
void __iomem *base;
struct resource *mem_res;
- int irq;
+ int mmc_irq, sdio_irq;
unsigned char bus_mode;
#define DAVINCI_MMC_DATADIR_NONE 0
@@ -184,6 +192,7 @@ struct mmc_davinci_host {
u32 rxdma, txdma;
bool use_dma;
bool do_dma;
+ bool sdio_int;
/* Scatterlist DMA uses one or more parameter RAM entries:
* the main one (associated with rxdma or txdma) plus zero or
@@ -480,7 +489,7 @@ static void mmc_davinci_send_dma_request(struct mmc_davinci_host *host,
struct scatterlist *sg;
unsigned sg_len;
unsigned bytes_left = host->bytes_left;
- const unsigned shift = ffs(rw_threshold) - 1;;
+ const unsigned shift = ffs(rw_threshold) - 1;
if (host->data_dir == DAVINCI_MMC_DATADIR_WRITE) {
template = &host->tx_template;
@@ -866,6 +875,19 @@ mmc_davinci_xfer_done(struct mmc_davinci_host *host, struct mmc_data *data)
{
host->data = NULL;
+ if (host->mmc->caps & MMC_CAP_SDIO_IRQ) {
+ /*
+ * SDIO Interrupt Detection work-around as suggested by
+ * Davinci Errata (TMS320DM355 Silicon Revision 1.1 Errata
+ * 2.1.6): Signal SDIO interrupt only if it is enabled by core
+ */
+ if (host->sdio_int && !(readl(host->base + DAVINCI_SDIOST0) &
+ SDIOST0_DAT1_HI)) {
+ writel(SDIOIST_IOINT, host->base + DAVINCI_SDIOIST);
+ mmc_signal_sdio_irq(host->mmc);
+ }
+ }
+
if (host->do_dma) {
davinci_abort_dma(host);
@@ -932,6 +954,21 @@ davinci_abort_data(struct mmc_davinci_host *host, struct mmc_data *data)
mmc_davinci_reset_ctrl(host, 0);
}
+static irqreturn_t mmc_davinci_sdio_irq(int irq, void *dev_id)
+{
+ struct mmc_davinci_host *host = dev_id;
+ unsigned int status;
+
+ status = readl(host->base + DAVINCI_SDIOIST);
+ if (status & SDIOIST_IOINT) {
+ dev_dbg(mmc_dev(host->mmc),
+ "SDIO interrupt status %x\n", status);
+ writel(status | SDIOIST_IOINT, host->base + DAVINCI_SDIOIST);
+ mmc_signal_sdio_irq(host->mmc);
+ }
+ return IRQ_HANDLED;
+}
+
static irqreturn_t mmc_davinci_irq(int irq, void *dev_id)
{
struct mmc_davinci_host *host = (struct mmc_davinci_host *)dev_id;
@@ -1076,11 +1113,32 @@ static int mmc_davinci_get_ro(struct mmc_host *mmc)
return config->get_ro(pdev->id);
}
+static void mmc_davinci_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+ struct mmc_davinci_host *host = mmc_priv(mmc);
+
+ if (enable) {
+ if (!(readl(host->base + DAVINCI_SDIOST0) & SDIOST0_DAT1_HI)) {
+ writel(SDIOIST_IOINT, host->base + DAVINCI_SDIOIST);
+ mmc_signal_sdio_irq(host->mmc);
+ } else {
+ host->sdio_int = true;
+ writel(readl(host->base + DAVINCI_SDIOIEN) |
+ SDIOIEN_IOINTEN, host->base + DAVINCI_SDIOIEN);
+ }
+ } else {
+ host->sdio_int = false;
+ writel(readl(host->base + DAVINCI_SDIOIEN) & ~SDIOIEN_IOINTEN,
+ host->base + DAVINCI_SDIOIEN);
+ }
+}
+
static struct mmc_host_ops mmc_davinci_ops = {
.request = mmc_davinci_request,
.set_ios = mmc_davinci_set_ios,
.get_cd = mmc_davinci_get_cd,
.get_ro = mmc_davinci_get_ro,
+ .enable_sdio_irq = mmc_davinci_enable_sdio_irq,
};
/*----------------------------------------------------------------------*/
@@ -1209,7 +1267,8 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev)
host->nr_sg = MAX_NR_SG;
host->use_dma = use_dma;
- host->irq = irq;
+ host->mmc_irq = irq;
+ host->sdio_irq = platform_get_irq(pdev, 1);
if (host->use_dma && davinci_acquire_dma_channels(host) != 0)
host->use_dma = 0;
@@ -1270,6 +1329,13 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev)
if (ret)
goto out;
+ if (host->sdio_irq >= 0) {
+ ret = request_irq(host->sdio_irq, mmc_davinci_sdio_irq, 0,
+ mmc_hostname(mmc), host);
+ if (!ret)
+ mmc->caps |= MMC_CAP_SDIO_IRQ;
+ }
+
rename_region(mem, mmc_hostname(mmc));
dev_info(mmc_dev(host->mmc), "Using %s, %d-bit mode\n",
@@ -1313,7 +1379,9 @@ static int __exit davinci_mmcsd_remove(struct platform_device *pdev)
mmc_davinci_cpufreq_deregister(host);
mmc_remove_host(host->mmc);
- free_irq(host->irq, host);
+ free_irq(host->mmc_irq, host);
+ if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
+ free_irq(host->sdio_irq, host);
davinci_release_dma_channels(host);
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
new file mode 100644
index 0000000..2fcc825
--- /dev/null
+++ b/drivers/mmc/host/dw_mmc.c
@@ -0,0 +1,1796 @@
+/*
+ * Synopsys DesignWare Multimedia Card Interface driver
+ * (Based on NXP driver for lpc 31xx)
+ *
+ * Copyright (C) 2009 NXP Semiconductors
+ * Copyright (C) 2009, 2010 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/dw_mmc.h>
+#include <linux/bitops.h>
+
+#include "dw_mmc.h"
+
+/* Common flag combinations */
+#define DW_MCI_DATA_ERROR_FLAGS (SDMMC_INT_DTO | SDMMC_INT_DCRC | \
+ SDMMC_INT_HTO | SDMMC_INT_SBE | \
+ SDMMC_INT_EBE)
+#define DW_MCI_CMD_ERROR_FLAGS (SDMMC_INT_RTO | SDMMC_INT_RCRC | \
+ SDMMC_INT_RESP_ERR)
+#define DW_MCI_ERROR_FLAGS (DW_MCI_DATA_ERROR_FLAGS | \
+ DW_MCI_CMD_ERROR_FLAGS | SDMMC_INT_HLE)
+#define DW_MCI_SEND_STATUS 1
+#define DW_MCI_RECV_STATUS 2
+#define DW_MCI_DMA_THRESHOLD 16
+
+#ifdef CONFIG_MMC_DW_IDMAC
+struct idmac_desc {
+ u32 des0; /* Control Descriptor */
+#define IDMAC_DES0_DIC BIT(1)
+#define IDMAC_DES0_LD BIT(2)
+#define IDMAC_DES0_FD BIT(3)
+#define IDMAC_DES0_CH BIT(4)
+#define IDMAC_DES0_ER BIT(5)
+#define IDMAC_DES0_CES BIT(30)
+#define IDMAC_DES0_OWN BIT(31)
+
+ u32 des1; /* Buffer sizes */
+#define IDMAC_SET_BUFFER1_SIZE(d, s) \
+ ((d)->des1 = ((d)->des1 & 0x03ffc000) | ((s) & 0x3fff))
+
+ u32 des2; /* buffer 1 physical address */
+
+ u32 des3; /* buffer 2 physical address */
+};
+#endif /* CONFIG_MMC_DW_IDMAC */
+
+/**
+ * struct dw_mci_slot - MMC slot state
+ * @mmc: The mmc_host representing this slot.
+ * @host: The MMC controller this slot is using.
+ * @ctype: Card type for this slot.
+ * @mrq: mmc_request currently being processed or waiting to be
+ * processed, or NULL when the slot is idle.
+ * @queue_node: List node for placing this node in the @queue list of
+ * &struct dw_mci.
+ * @clock: Clock rate configured by set_ios(). Protected by host->lock.
+ * @flags: Random state bits associated with the slot.
+ * @id: Number of this slot.
+ * @last_detect_state: Most recently observed card detect state.
+ */
+struct dw_mci_slot {
+ struct mmc_host *mmc;
+ struct dw_mci *host;
+
+ u32 ctype;
+
+ struct mmc_request *mrq;
+ struct list_head queue_node;
+
+ unsigned int clock;
+ unsigned long flags;
+#define DW_MMC_CARD_PRESENT 0
+#define DW_MMC_CARD_NEED_INIT 1
+ int id;
+ int last_detect_state;
+};
+
+#if defined(CONFIG_DEBUG_FS)
+static int dw_mci_req_show(struct seq_file *s, void *v)
+{
+ struct dw_mci_slot *slot = s->private;
+ struct mmc_request *mrq;
+ struct mmc_command *cmd;
+ struct mmc_command *stop;
+ struct mmc_data *data;
+
+ /* Make sure we get a consistent snapshot */
+ spin_lock_bh(&slot->host->lock);
+ mrq = slot->mrq;
+
+ if (mrq) {
+ cmd = mrq->cmd;
+ data = mrq->data;
+ stop = mrq->stop;
+
+ if (cmd)
+ seq_printf(s,
+ "CMD%u(0x%x) flg %x rsp %x %x %x %x err %d\n",
+ cmd->opcode, cmd->arg, cmd->flags,
+ cmd->resp[0], cmd->resp[1], cmd->resp[2],
+ cmd->resp[2], cmd->error);
+ if (data)
+ seq_printf(s, "DATA %u / %u * %u flg %x err %d\n",
+ data->bytes_xfered, data->blocks,
+ data->blksz, data->flags, data->error);
+ if (stop)
+ seq_printf(s,
+ "CMD%u(0x%x) flg %x rsp %x %x %x %x err %d\n",
+ stop->opcode, stop->arg, stop->flags,
+ stop->resp[0], stop->resp[1], stop->resp[2],
+ stop->resp[2], stop->error);
+ }
+
+ spin_unlock_bh(&slot->host->lock);
+
+ return 0;
+}
+
+static int dw_mci_req_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, dw_mci_req_show, inode->i_private);
+}
+
+static const struct file_operations dw_mci_req_fops = {
+ .owner = THIS_MODULE,
+ .open = dw_mci_req_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int dw_mci_regs_show(struct seq_file *s, void *v)
+{
+ seq_printf(s, "STATUS:\t0x%08x\n", SDMMC_STATUS);
+ seq_printf(s, "RINTSTS:\t0x%08x\n", SDMMC_RINTSTS);
+ seq_printf(s, "CMD:\t0x%08x\n", SDMMC_CMD);
+ seq_printf(s, "CTRL:\t0x%08x\n", SDMMC_CTRL);
+ seq_printf(s, "INTMASK:\t0x%08x\n", SDMMC_INTMASK);
+ seq_printf(s, "CLKENA:\t0x%08x\n", SDMMC_CLKENA);
+
+ return 0;
+}
+
+static int dw_mci_regs_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, dw_mci_regs_show, inode->i_private);
+}
+
+static const struct file_operations dw_mci_regs_fops = {
+ .owner = THIS_MODULE,
+ .open = dw_mci_regs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void dw_mci_init_debugfs(struct dw_mci_slot *slot)
+{
+ struct mmc_host *mmc = slot->mmc;
+ struct dw_mci *host = slot->host;
+ struct dentry *root;
+ struct dentry *node;
+
+ root = mmc->debugfs_root;
+ if (!root)
+ return;
+
+ node = debugfs_create_file("regs", S_IRUSR, root, host,
+ &dw_mci_regs_fops);
+ if (!node)
+ goto err;
+
+ node = debugfs_create_file("req", S_IRUSR, root, slot,
+ &dw_mci_req_fops);
+ if (!node)
+ goto err;
+
+ node = debugfs_create_u32("state", S_IRUSR, root, (u32 *)&host->state);
+ if (!node)
+ goto err;
+
+ node = debugfs_create_x32("pending_events", S_IRUSR, root,
+ (u32 *)&host->pending_events);
+ if (!node)
+ goto err;
+
+ node = debugfs_create_x32("completed_events", S_IRUSR, root,
+ (u32 *)&host->completed_events);
+ if (!node)
+ goto err;
+
+ return;
+
+err:
+ dev_err(&mmc->class_dev, "failed to initialize debugfs for slot\n");
+}
+#endif /* defined(CONFIG_DEBUG_FS) */
+
+static void dw_mci_set_timeout(struct dw_mci *host)
+{
+ /* timeout (maximum) */
+ mci_writel(host, TMOUT, 0xffffffff);
+}
+
+static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
+{
+ struct mmc_data *data;
+ u32 cmdr;
+ cmd->error = -EINPROGRESS;
+
+ cmdr = cmd->opcode;
+
+ if (cmdr == MMC_STOP_TRANSMISSION)
+ cmdr |= SDMMC_CMD_STOP;
+ else
+ cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
+
+ if (cmd->flags & MMC_RSP_PRESENT) {
+ /* We expect a response, so set this bit */
+ cmdr |= SDMMC_CMD_RESP_EXP;
+ if (cmd->flags & MMC_RSP_136)
+ cmdr |= SDMMC_CMD_RESP_LONG;
+ }
+
+ if (cmd->flags & MMC_RSP_CRC)
+ cmdr |= SDMMC_CMD_RESP_CRC;
+
+ data = cmd->data;
+ if (data) {
+ cmdr |= SDMMC_CMD_DAT_EXP;
+ if (data->flags & MMC_DATA_STREAM)
+ cmdr |= SDMMC_CMD_STRM_MODE;
+ if (data->flags & MMC_DATA_WRITE)
+ cmdr |= SDMMC_CMD_DAT_WR;
+ }
+
+ return cmdr;
+}
+
+static void dw_mci_start_command(struct dw_mci *host,
+ struct mmc_command *cmd, u32 cmd_flags)
+{
+ host->cmd = cmd;
+ dev_vdbg(&host->pdev->dev,
+ "start command: ARGR=0x%08x CMDR=0x%08x\n",
+ cmd->arg, cmd_flags);
+
+ mci_writel(host, CMDARG, cmd->arg);
+ wmb();
+
+ mci_writel(host, CMD, cmd_flags | SDMMC_CMD_START);
+}
+
+static void send_stop_cmd(struct dw_mci *host, struct mmc_data *data)
+{
+ dw_mci_start_command(host, data->stop, host->stop_cmdr);
+}
+
+/* DMA interface functions */
+static void dw_mci_stop_dma(struct dw_mci *host)
+{
+ if (host->use_dma) {
+ host->dma_ops->stop(host);
+ host->dma_ops->cleanup(host);
+ } else {
+ /* Data transfer was stopped by the interrupt handler */
+ set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+ }
+}
+
+#ifdef CONFIG_MMC_DW_IDMAC
+static void dw_mci_dma_cleanup(struct dw_mci *host)
+{
+ struct mmc_data *data = host->data;
+
+ if (data)
+ dma_unmap_sg(&host->pdev->dev, data->sg, data->sg_len,
+ ((data->flags & MMC_DATA_WRITE)
+ ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
+}
+
+static void dw_mci_idmac_stop_dma(struct dw_mci *host)
+{
+ u32 temp;
+
+ /* Disable and reset the IDMAC interface */
+ temp = mci_readl(host, CTRL);
+ temp &= ~SDMMC_CTRL_USE_IDMAC;
+ temp |= SDMMC_CTRL_DMA_RESET;
+ mci_writel(host, CTRL, temp);
+
+ /* Stop the IDMAC running */
+ temp = mci_readl(host, BMOD);
+ temp &= ~SDMMC_IDMAC_ENABLE;
+ mci_writel(host, BMOD, temp);
+}
+
+static void dw_mci_idmac_complete_dma(struct dw_mci *host)
+{
+ struct mmc_data *data = host->data;
+
+ dev_vdbg(&host->pdev->dev, "DMA complete\n");
+
+ host->dma_ops->cleanup(host);
+
+ /*
+ * If the card was removed, data will be NULL. No point in trying to
+ * send the stop command or waiting for NBUSY in this case.
+ */
+ if (data) {
+ set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+ tasklet_schedule(&host->tasklet);
+ }
+}
+
+static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
+ unsigned int sg_len)
+{
+ int i;
+ struct idmac_desc *desc = host->sg_cpu;
+
+ for (i = 0; i < sg_len; i++, desc++) {
+ unsigned int length = sg_dma_len(&data->sg[i]);
+ u32 mem_addr = sg_dma_address(&data->sg[i]);
+
+ /* Set the OWN bit and disable interrupts for this descriptor */
+ desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC | IDMAC_DES0_CH;
+
+ /* Buffer length */
+ IDMAC_SET_BUFFER1_SIZE(desc, length);
+
+ /* Physical address to DMA to/from */
+ desc->des2 = mem_addr;
+ }
+
+ /* Set first descriptor */
+ desc = host->sg_cpu;
+ desc->des0 |= IDMAC_DES0_FD;
+
+ /* Set last descriptor */
+ desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc);
+ desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
+ desc->des0 |= IDMAC_DES0_LD;
+
+ wmb();
+}
+
+static void dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len)
+{
+ u32 temp;
+
+ dw_mci_translate_sglist(host, host->data, sg_len);
+
+ /* Select IDMAC interface */
+ temp = mci_readl(host, CTRL);
+ temp |= SDMMC_CTRL_USE_IDMAC;
+ mci_writel(host, CTRL, temp);
+
+ wmb();
+
+ /* Enable the IDMAC */
+ temp = mci_readl(host, BMOD);
+ temp |= SDMMC_IDMAC_ENABLE;
+ mci_writel(host, BMOD, temp);
+
+ /* Start it running */
+ mci_writel(host, PLDMND, 1);
+}
+
+static int dw_mci_idmac_init(struct dw_mci *host)
+{
+ struct idmac_desc *p;
+ int i;
+
+ /* Number of descriptors in the ring buffer */
+ host->ring_size = PAGE_SIZE / sizeof(struct idmac_desc);
+
+ /* Forward link the descriptor list */
+ for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++)
+ p->des3 = host->sg_dma + (sizeof(struct idmac_desc) * (i + 1));
+
+ /* Set the last descriptor as the end-of-ring descriptor */
+ p->des3 = host->sg_dma;
+ p->des0 = IDMAC_DES0_ER;
+
+ /* Mask out interrupts - get Tx & Rx complete only */
+ mci_writel(host, IDINTEN, SDMMC_IDMAC_INT_NI | SDMMC_IDMAC_INT_RI |
+ SDMMC_IDMAC_INT_TI);
+
+ /* Set the descriptor base address */
+ mci_writel(host, DBADDR, host->sg_dma);
+ return 0;
+}
+
+static struct dw_mci_dma_ops dw_mci_idmac_ops = {
+ .init = dw_mci_idmac_init,
+ .start = dw_mci_idmac_start_dma,
+ .stop = dw_mci_idmac_stop_dma,
+ .complete = dw_mci_idmac_complete_dma,
+ .cleanup = dw_mci_dma_cleanup,
+};
+#endif /* CONFIG_MMC_DW_IDMAC */
+
+static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
+{
+ struct scatterlist *sg;
+ unsigned int i, direction, sg_len;
+ u32 temp;
+
+ /* If we don't have a channel, we can't do DMA */
+ if (!host->use_dma)
+ return -ENODEV;
+
+ /*
+ * We don't do DMA on "complex" transfers, i.e. with
+ * non-word-aligned buffers or lengths. Also, we don't bother
+ * with all the DMA setup overhead for short transfers.
+ */
+ if (data->blocks * data->blksz < DW_MCI_DMA_THRESHOLD)
+ return -EINVAL;
+ if (data->blksz & 3)
+ return -EINVAL;
+
+ for_each_sg(data->sg, sg, data->sg_len, i) {
+ if (sg->offset & 3 || sg->length & 3)
+ return -EINVAL;
+ }
+
+ if (data->flags & MMC_DATA_READ)
+ direction = DMA_FROM_DEVICE;
+ else
+ direction = DMA_TO_DEVICE;
+
+ sg_len = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len,
+ direction);
+
+ dev_vdbg(&host->pdev->dev,
+ "sd sg_cpu: %#lx sg_dma: %#lx sg_len: %d\n",
+ (unsigned long)host->sg_cpu, (unsigned long)host->sg_dma,
+ sg_len);
+
+ /* Enable the DMA interface */
+ temp = mci_readl(host, CTRL);
+ temp |= SDMMC_CTRL_DMA_ENABLE;
+ mci_writel(host, CTRL, temp);
+
+ /* Disable RX/TX IRQs, let DMA handle it */
+ temp = mci_readl(host, INTMASK);
+ temp &= ~(SDMMC_INT_RXDR | SDMMC_INT_TXDR);
+ mci_writel(host, INTMASK, temp);
+
+ host->dma_ops->start(host, sg_len);
+
+ return 0;
+}
+
+static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
+{
+ u32 temp;
+
+ data->error = -EINPROGRESS;
+
+ WARN_ON(host->data);
+ host->sg = NULL;
+ host->data = data;
+
+ if (dw_mci_submit_data_dma(host, data)) {
+ host->sg = data->sg;
+ host->pio_offset = 0;
+ if (data->flags & MMC_DATA_READ)
+ host->dir_status = DW_MCI_RECV_STATUS;
+ else
+ host->dir_status = DW_MCI_SEND_STATUS;
+
+ temp = mci_readl(host, INTMASK);
+ temp |= SDMMC_INT_TXDR | SDMMC_INT_RXDR;
+ mci_writel(host, INTMASK, temp);
+
+ temp = mci_readl(host, CTRL);
+ temp &= ~SDMMC_CTRL_DMA_ENABLE;
+ mci_writel(host, CTRL, temp);
+ }
+}
+
+static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg)
+{
+ struct dw_mci *host = slot->host;
+ unsigned long timeout = jiffies + msecs_to_jiffies(500);
+ unsigned int cmd_status = 0;
+
+ mci_writel(host, CMDARG, arg);
+ wmb();
+ mci_writel(host, CMD, SDMMC_CMD_START | cmd);
+
+ while (time_before(jiffies, timeout)) {
+ cmd_status = mci_readl(host, CMD);
+ if (!(cmd_status & SDMMC_CMD_START))
+ return;
+ }
+ dev_err(&slot->mmc->class_dev,
+ "Timeout sending command (cmd %#x arg %#x status %#x)\n",
+ cmd, arg, cmd_status);
+}
+
+static void dw_mci_setup_bus(struct dw_mci_slot *slot)
+{
+ struct dw_mci *host = slot->host;
+ u32 div;
+
+ if (slot->clock != host->current_speed) {
+ if (host->bus_hz % slot->clock)
+ /*
+ * move the + 1 after the divide to prevent
+ * over-clocking the card.
+ */
+ div = ((host->bus_hz / slot->clock) >> 1) + 1;
+ else
+ div = (host->bus_hz / slot->clock) >> 1;
+
+ dev_info(&slot->mmc->class_dev,
+ "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ"
+ " div = %d)\n", slot->id, host->bus_hz, slot->clock,
+ div ? ((host->bus_hz / div) >> 1) : host->bus_hz, div);
+
+ /* disable clock */
+ mci_writel(host, CLKENA, 0);
+ mci_writel(host, CLKSRC, 0);
+
+ /* inform CIU */
+ mci_send_cmd(slot,
+ SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+
+ /* set clock to desired speed */
+ mci_writel(host, CLKDIV, div);
+
+ /* inform CIU */
+ mci_send_cmd(slot,
+ SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+
+ /* enable clock */
+ mci_writel(host, CLKENA, SDMMC_CLKEN_ENABLE);
+
+ /* inform CIU */
+ mci_send_cmd(slot,
+ SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+
+ host->current_speed = slot->clock;
+ }
+
+ /* Set the current slot bus width */
+ mci_writel(host, CTYPE, slot->ctype);
+}
+
+static void dw_mci_start_request(struct dw_mci *host,
+ struct dw_mci_slot *slot)
+{
+ struct mmc_request *mrq;
+ struct mmc_command *cmd;
+ struct mmc_data *data;
+ u32 cmdflags;
+
+ mrq = slot->mrq;
+ if (host->pdata->select_slot)
+ host->pdata->select_slot(slot->id);
+
+ /* Slot specific timing and width adjustment */
+ dw_mci_setup_bus(slot);
+
+ host->cur_slot = slot;
+ host->mrq = mrq;
+
+ host->pending_events = 0;
+ host->completed_events = 0;
+ host->data_status = 0;
+
+ data = mrq->data;
+ if (data) {
+ dw_mci_set_timeout(host);
+ mci_writel(host, BYTCNT, data->blksz*data->blocks);
+ mci_writel(host, BLKSIZ, data->blksz);
+ }
+
+ cmd = mrq->cmd;
+ cmdflags = dw_mci_prepare_command(slot->mmc, cmd);
+
+ /* this is the first command, send the initialization clock */
+ if (test_and_clear_bit(DW_MMC_CARD_NEED_INIT, &slot->flags))
+ cmdflags |= SDMMC_CMD_INIT;
+
+ if (data) {
+ dw_mci_submit_data(host, data);
+ wmb();
+ }
+
+ dw_mci_start_command(host, cmd, cmdflags);
+
+ if (mrq->stop)
+ host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
+}
+
+static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
+ struct mmc_request *mrq)
+{
+ dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n",
+ host->state);
+
+ spin_lock_bh(&host->lock);
+ slot->mrq = mrq;
+
+ if (host->state == STATE_IDLE) {
+ host->state = STATE_SENDING_CMD;
+ dw_mci_start_request(host, slot);
+ } else {
+ list_add_tail(&slot->queue_node, &host->queue);
+ }
+
+ spin_unlock_bh(&host->lock);
+}
+
+static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+ struct dw_mci_slot *slot = mmc_priv(mmc);
+ struct dw_mci *host = slot->host;
+
+ WARN_ON(slot->mrq);
+
+ if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) {
+ mrq->cmd->error = -ENOMEDIUM;
+ mmc_request_done(mmc, mrq);
+ return;
+ }
+
+ /* We don't support multiple blocks of weird lengths. */
+ dw_mci_queue_request(host, slot, mrq);
+}
+
+static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ struct dw_mci_slot *slot = mmc_priv(mmc);
+
+ /* set default 1 bit mode */
+ slot->ctype = SDMMC_CTYPE_1BIT;
+
+ switch (ios->bus_width) {
+ case MMC_BUS_WIDTH_1:
+ slot->ctype = SDMMC_CTYPE_1BIT;
+ break;
+ case MMC_BUS_WIDTH_4:
+ slot->ctype = SDMMC_CTYPE_4BIT;
+ break;
+ }
+
+ if (ios->clock) {
+ /*
+ * Use mirror of ios->clock to prevent race with mmc
+ * core ios update when finding the minimum.
+ */
+ slot->clock = ios->clock;
+ }
+
+ switch (ios->power_mode) {
+ case MMC_POWER_UP:
+ set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags);
+ break;
+ default:
+ break;
+ }
+}
+
+static int dw_mci_get_ro(struct mmc_host *mmc)
+{
+ int read_only;
+ struct dw_mci_slot *slot = mmc_priv(mmc);
+ struct dw_mci_board *brd = slot->host->pdata;
+
+ /* Use platform get_ro function, else try on board write protect */
+ if (brd->get_ro)
+ read_only = brd->get_ro(slot->id);
+ else
+ read_only =
+ mci_readl(slot->host, WRTPRT) & (1 << slot->id) ? 1 : 0;
+
+ dev_dbg(&mmc->class_dev, "card is %s\n",
+ read_only ? "read-only" : "read-write");
+
+ return read_only;
+}
+
+static int dw_mci_get_cd(struct mmc_host *mmc)
+{
+ int present;
+ struct dw_mci_slot *slot = mmc_priv(mmc);
+ struct dw_mci_board *brd = slot->host->pdata;
+
+ /* Use platform get_cd function, else try onboard card detect */
+ if (brd->get_cd)
+ present = !brd->get_cd(slot->id);
+ else
+ present = (mci_readl(slot->host, CDETECT) & (1 << slot->id))
+ == 0 ? 1 : 0;
+
+ if (present)
+ dev_dbg(&mmc->class_dev, "card is present\n");
+ else
+ dev_dbg(&mmc->class_dev, "card is not present\n");
+
+ return present;
+}
+
+static const struct mmc_host_ops dw_mci_ops = {
+ .request = dw_mci_request,
+ .set_ios = dw_mci_set_ios,
+ .get_ro = dw_mci_get_ro,
+ .get_cd = dw_mci_get_cd,
+};
+
+static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
+ __releases(&host->lock)
+ __acquires(&host->lock)
+{
+ struct dw_mci_slot *slot;
+ struct mmc_host *prev_mmc = host->cur_slot->mmc;
+
+ WARN_ON(host->cmd || host->data);
+
+ host->cur_slot->mrq = NULL;
+ host->mrq = NULL;
+ if (!list_empty(&host->queue)) {
+ slot = list_entry(host->queue.next,
+ struct dw_mci_slot, queue_node);
+ list_del(&slot->queue_node);
+ dev_vdbg(&host->pdev->dev, "list not empty: %s is next\n",
+ mmc_hostname(slot->mmc));
+ host->state = STATE_SENDING_CMD;
+ dw_mci_start_request(host, slot);
+ } else {
+ dev_vdbg(&host->pdev->dev, "list empty\n");
+ host->state = STATE_IDLE;
+ }
+
+ spin_unlock(&host->lock);
+ mmc_request_done(prev_mmc, mrq);
+ spin_lock(&host->lock);
+}
+
+static void dw_mci_command_complete(struct dw_mci *host, struct mmc_command *cmd)
+{
+ u32 status = host->cmd_status;
+
+ host->cmd_status = 0;
+
+ /* Read the response from the card (up to 16 bytes) */
+ if (cmd->flags & MMC_RSP_PRESENT) {
+ if (cmd->flags & MMC_RSP_136) {
+ cmd->resp[3] = mci_readl(host, RESP0);
+ cmd->resp[2] = mci_readl(host, RESP1);
+ cmd->resp[1] = mci_readl(host, RESP2);
+ cmd->resp[0] = mci_readl(host, RESP3);
+ } else {
+ cmd->resp[0] = mci_readl(host, RESP0);
+ cmd->resp[1] = 0;
+ cmd->resp[2] = 0;
+ cmd->resp[3] = 0;
+ }
+ }
+
+ if (status & SDMMC_INT_RTO)
+ cmd->error = -ETIMEDOUT;
+ else if ((cmd->flags & MMC_RSP_CRC) && (status & SDMMC_INT_RCRC))
+ cmd->error = -EILSEQ;
+ else if (status & SDMMC_INT_RESP_ERR)
+ cmd->error = -EIO;
+ else
+ cmd->error = 0;
+
+ if (cmd->error) {
+ /* newer ip versions need a delay between retries */
+ if (host->quirks & DW_MCI_QUIRK_RETRY_DELAY)
+ mdelay(20);
+
+ if (cmd->data) {
+ host->data = NULL;
+ dw_mci_stop_dma(host);
+ }
+ }
+}
+
+static void dw_mci_tasklet_func(unsigned long priv)
+{
+ struct dw_mci *host = (struct dw_mci *)priv;
+ struct mmc_data *data;
+ struct mmc_command *cmd;
+ enum dw_mci_state state;
+ enum dw_mci_state prev_state;
+ u32 status;
+
+ spin_lock(&host->lock);
+
+ state = host->state;
+ data = host->data;
+
+ do {
+ prev_state = state;
+
+ switch (state) {
+ case STATE_IDLE:
+ break;
+
+ case STATE_SENDING_CMD:
+ if (!test_and_clear_bit(EVENT_CMD_COMPLETE,
+ &host->pending_events))
+ break;
+
+ cmd = host->cmd;
+ host->cmd = NULL;
+ set_bit(EVENT_CMD_COMPLETE, &host->completed_events);
+ dw_mci_command_complete(host, host->mrq->cmd);
+ if (!host->mrq->data || cmd->error) {
+ dw_mci_request_end(host, host->mrq);
+ goto unlock;
+ }
+
+ prev_state = state = STATE_SENDING_DATA;
+ /* fall through */
+
+ case STATE_SENDING_DATA:
+ if (test_and_clear_bit(EVENT_DATA_ERROR,
+ &host->pending_events)) {
+ dw_mci_stop_dma(host);
+ if (data->stop)
+ send_stop_cmd(host, data);
+ state = STATE_DATA_ERROR;
+ break;
+ }
+
+ if (!test_and_clear_bit(EVENT_XFER_COMPLETE,
+ &host->pending_events))
+ break;
+
+ set_bit(EVENT_XFER_COMPLETE, &host->completed_events);
+ prev_state = state = STATE_DATA_BUSY;
+ /* fall through */
+
+ case STATE_DATA_BUSY:
+ if (!test_and_clear_bit(EVENT_DATA_COMPLETE,
+ &host->pending_events))
+ break;
+
+ host->data = NULL;
+ set_bit(EVENT_DATA_COMPLETE, &host->completed_events);
+ status = host->data_status;
+
+ if (status & DW_MCI_DATA_ERROR_FLAGS) {
+ if (status & SDMMC_INT_DTO) {
+ dev_err(&host->pdev->dev,
+ "data timeout error\n");
+ data->error = -ETIMEDOUT;
+ } else if (status & SDMMC_INT_DCRC) {
+ dev_err(&host->pdev->dev,
+ "data CRC error\n");
+ data->error = -EILSEQ;
+ } else {
+ dev_err(&host->pdev->dev,
+ "data FIFO error "
+ "(status=%08x)\n",
+ status);
+ data->error = -EIO;
+ }
+ } else {
+ data->bytes_xfered = data->blocks * data->blksz;
+ data->error = 0;
+ }
+
+ if (!data->stop) {
+ dw_mci_request_end(host, host->mrq);
+ goto unlock;
+ }
+
+ prev_state = state = STATE_SENDING_STOP;
+ if (!data->error)
+ send_stop_cmd(host, data);
+ /* fall through */
+
+ case STATE_SENDING_STOP:
+ if (!test_and_clear_bit(EVENT_CMD_COMPLETE,
+ &host->pending_events))
+ break;
+
+ host->cmd = NULL;
+ dw_mci_command_complete(host, host->mrq->stop);
+ dw_mci_request_end(host, host->mrq);
+ goto unlock;
+
+ case STATE_DATA_ERROR:
+ if (!test_and_clear_bit(EVENT_XFER_COMPLETE,
+ &host->pending_events))
+ break;
+
+ state = STATE_DATA_BUSY;
+ break;
+ }
+ } while (state != prev_state);
+
+ host->state = state;
+unlock:
+ spin_unlock(&host->lock);
+
+}
+
+static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
+{
+ u16 *pdata = (u16 *)buf;
+
+ WARN_ON(cnt % 2 != 0);
+
+ cnt = cnt >> 1;
+ while (cnt > 0) {
+ mci_writew(host, DATA, *pdata++);
+ cnt--;
+ }
+}
+
+static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
+{
+ u16 *pdata = (u16 *)buf;
+
+ WARN_ON(cnt % 2 != 0);
+
+ cnt = cnt >> 1;
+ while (cnt > 0) {
+ *pdata++ = mci_readw(host, DATA);
+ cnt--;
+ }
+}
+
+static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
+{
+ u32 *pdata = (u32 *)buf;
+
+ WARN_ON(cnt % 4 != 0);
+ WARN_ON((unsigned long)pdata & 0x3);
+
+ cnt = cnt >> 2;
+ while (cnt > 0) {
+ mci_writel(host, DATA, *pdata++);
+ cnt--;
+ }
+}
+
+static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
+{
+ u32 *pdata = (u32 *)buf;
+
+ WARN_ON(cnt % 4 != 0);
+ WARN_ON((unsigned long)pdata & 0x3);
+
+ cnt = cnt >> 2;
+ while (cnt > 0) {
+ *pdata++ = mci_readl(host, DATA);
+ cnt--;
+ }
+}
+
+static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
+{
+ u64 *pdata = (u64 *)buf;
+
+ WARN_ON(cnt % 8 != 0);
+
+ cnt = cnt >> 3;
+ while (cnt > 0) {
+ mci_writeq(host, DATA, *pdata++);
+ cnt--;
+ }
+}
+
+static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
+{
+ u64 *pdata = (u64 *)buf;
+
+ WARN_ON(cnt % 8 != 0);
+
+ cnt = cnt >> 3;
+ while (cnt > 0) {
+ *pdata++ = mci_readq(host, DATA);
+ cnt--;
+ }
+}
+
+static void dw_mci_read_data_pio(struct dw_mci *host)
+{
+ struct scatterlist *sg = host->sg;
+ void *buf = sg_virt(sg);
+ unsigned int offset = host->pio_offset;
+ struct mmc_data *data = host->data;
+ int shift = host->data_shift;
+ u32 status;
+ unsigned int nbytes = 0, len, old_len, count = 0;
+
+ do {
+ len = SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift;
+ if (count == 0)
+ old_len = len;
+
+ if (offset + len <= sg->length) {
+ host->pull_data(host, (void *)(buf + offset), len);
+
+ offset += len;
+ nbytes += len;
+
+ if (offset == sg->length) {
+ flush_dcache_page(sg_page(sg));
+ host->sg = sg = sg_next(sg);
+ if (!sg)
+ goto done;
+
+ offset = 0;
+ buf = sg_virt(sg);
+ }
+ } else {
+ unsigned int remaining = sg->length - offset;
+ host->pull_data(host, (void *)(buf + offset),
+ remaining);
+ nbytes += remaining;
+
+ flush_dcache_page(sg_page(sg));
+ host->sg = sg = sg_next(sg);
+ if (!sg)
+ goto done;
+
+ offset = len - remaining;
+ buf = sg_virt(sg);
+ host->pull_data(host, buf, offset);
+ nbytes += offset;
+ }
+
+ status = mci_readl(host, MINTSTS);
+ mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
+ if (status & DW_MCI_DATA_ERROR_FLAGS) {
+ host->data_status = status;
+ data->bytes_xfered += nbytes;
+ smp_wmb();
+
+ set_bit(EVENT_DATA_ERROR, &host->pending_events);
+
+ tasklet_schedule(&host->tasklet);
+ return;
+ }
+ count++;
+ } while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/
+ len = SDMMC_GET_FCNT(mci_readl(host, STATUS));
+ host->pio_offset = offset;
+ data->bytes_xfered += nbytes;
+ return;
+
+done:
+ data->bytes_xfered += nbytes;
+ smp_wmb();
+ set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+}
+
+static void dw_mci_write_data_pio(struct dw_mci *host)
+{
+ struct scatterlist *sg = host->sg;
+ void *buf = sg_virt(sg);
+ unsigned int offset = host->pio_offset;
+ struct mmc_data *data = host->data;
+ int shift = host->data_shift;
+ u32 status;
+ unsigned int nbytes = 0, len;
+
+ do {
+ len = SDMMC_FIFO_SZ -
+ (SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
+ if (offset + len <= sg->length) {
+ host->push_data(host, (void *)(buf + offset), len);
+
+ offset += len;
+ nbytes += len;
+ if (offset == sg->length) {
+ host->sg = sg = sg_next(sg);
+ if (!sg)
+ goto done;
+
+ offset = 0;
+ buf = sg_virt(sg);
+ }
+ } else {
+ unsigned int remaining = sg->length - offset;
+
+ host->push_data(host, (void *)(buf + offset),
+ remaining);
+ nbytes += remaining;
+
+ host->sg = sg = sg_next(sg);
+ if (!sg)
+ goto done;
+
+ offset = len - remaining;
+ buf = sg_virt(sg);
+ host->push_data(host, (void *)buf, offset);
+ nbytes += offset;
+ }
+
+ status = mci_readl(host, MINTSTS);
+ mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
+ if (status & DW_MCI_DATA_ERROR_FLAGS) {
+ host->data_status = status;
+ data->bytes_xfered += nbytes;
+
+ smp_wmb();
+
+ set_bit(EVENT_DATA_ERROR, &host->pending_events);
+
+ tasklet_schedule(&host->tasklet);
+ return;
+ }
+ } while (status & SDMMC_INT_TXDR); /* if TXDR write again */
+
+ host->pio_offset = offset;
+ data->bytes_xfered += nbytes;
+
+ return;
+
+done:
+ data->bytes_xfered += nbytes;
+ smp_wmb();
+ set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+}
+
+static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status)
+{
+ if (!host->cmd_status)
+ host->cmd_status = status;
+
+ smp_wmb();
+
+ set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
+ tasklet_schedule(&host->tasklet);
+}
+
+static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
+{
+ struct dw_mci *host = dev_id;
+ u32 status, pending;
+ unsigned int pass_count = 0;
+
+ do {
+ status = mci_readl(host, RINTSTS);
+ pending = mci_readl(host, MINTSTS); /* read-only mask reg */
+
+ /*
+ * DTO fix - version 2.10a and below, and only if internal DMA
+ * is configured.
+ */
+ if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) {
+ if (!pending &&
+ ((mci_readl(host, STATUS) >> 17) & 0x1fff))
+ pending |= SDMMC_INT_DATA_OVER;
+ }
+
+ if (!pending)
+ break;
+
+ if (pending & DW_MCI_CMD_ERROR_FLAGS) {
+ mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS);
+ host->cmd_status = status;
+ smp_wmb();
+ set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
+ tasklet_schedule(&host->tasklet);
+ }
+
+ if (pending & DW_MCI_DATA_ERROR_FLAGS) {
+ /* if there is an error report DATA_ERROR */
+ mci_writel(host, RINTSTS, DW_MCI_DATA_ERROR_FLAGS);
+ host->data_status = status;
+ smp_wmb();
+ set_bit(EVENT_DATA_ERROR, &host->pending_events);
+ tasklet_schedule(&host->tasklet);
+ }
+
+ if (pending & SDMMC_INT_DATA_OVER) {
+ mci_writel(host, RINTSTS, SDMMC_INT_DATA_OVER);
+ if (!host->data_status)
+ host->data_status = status;
+ smp_wmb();
+ if (host->dir_status == DW_MCI_RECV_STATUS) {
+ if (host->sg != NULL)
+ dw_mci_read_data_pio(host);
+ }
+ set_bit(EVENT_DATA_COMPLETE, &host->pending_events);
+ tasklet_schedule(&host->tasklet);
+ }
+
+ if (pending & SDMMC_INT_RXDR) {
+ mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
+ if (host->sg)
+ dw_mci_read_data_pio(host);
+ }
+
+ if (pending & SDMMC_INT_TXDR) {
+ mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
+ if (host->sg)
+ dw_mci_write_data_pio(host);
+ }
+
+ if (pending & SDMMC_INT_CMD_DONE) {
+ mci_writel(host, RINTSTS, SDMMC_INT_CMD_DONE);
+ dw_mci_cmd_interrupt(host, status);
+ }
+
+ if (pending & SDMMC_INT_CD) {
+ mci_writel(host, RINTSTS, SDMMC_INT_CD);
+ tasklet_schedule(&host->card_tasklet);
+ }
+
+ } while (pass_count++ < 5);
+
+#ifdef CONFIG_MMC_DW_IDMAC
+ /* Handle DMA interrupts */
+ pending = mci_readl(host, IDSTS);
+ if (pending & (SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI)) {
+ mci_writel(host, IDSTS, SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI);
+ mci_writel(host, IDSTS, SDMMC_IDMAC_INT_NI);
+ set_bit(EVENT_DATA_COMPLETE, &host->pending_events);
+ host->dma_ops->complete(host);
+ }
+#endif
+
+ return IRQ_HANDLED;
+}
+
+static void dw_mci_tasklet_card(unsigned long data)
+{
+ struct dw_mci *host = (struct dw_mci *)data;
+ int i;
+
+ for (i = 0; i < host->num_slots; i++) {
+ struct dw_mci_slot *slot = host->slot[i];
+ struct mmc_host *mmc = slot->mmc;
+ struct mmc_request *mrq;
+ int present;
+ u32 ctrl;
+
+ present = dw_mci_get_cd(mmc);
+ while (present != slot->last_detect_state) {
+ spin_lock(&host->lock);
+
+ dev_dbg(&slot->mmc->class_dev, "card %s\n",
+ present ? "inserted" : "removed");
+
+ /* Card change detected */
+ slot->last_detect_state = present;
+
+ /* Power up slot */
+ if (present != 0) {
+ if (host->pdata->setpower)
+ host->pdata->setpower(slot->id,
+ mmc->ocr_avail);
+
+ set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+ }
+
+ /* Clean up queue if present */
+ mrq = slot->mrq;
+ if (mrq) {
+ if (mrq == host->mrq) {
+ host->data = NULL;
+ host->cmd = NULL;
+
+ switch (host->state) {
+ case STATE_IDLE:
+ break;
+ case STATE_SENDING_CMD:
+ mrq->cmd->error = -ENOMEDIUM;
+ if (!mrq->data)
+ break;
+ /* fall through */
+ case STATE_SENDING_DATA:
+ mrq->data->error = -ENOMEDIUM;
+ dw_mci_stop_dma(host);
+ break;
+ case STATE_DATA_BUSY:
+ case STATE_DATA_ERROR:
+ if (mrq->data->error == -EINPROGRESS)
+ mrq->data->error = -ENOMEDIUM;
+ if (!mrq->stop)
+ break;
+ /* fall through */
+ case STATE_SENDING_STOP:
+ mrq->stop->error = -ENOMEDIUM;
+ break;
+ }
+
+ dw_mci_request_end(host, mrq);
+ } else {
+ list_del(&slot->queue_node);
+ mrq->cmd->error = -ENOMEDIUM;
+ if (mrq->data)
+ mrq->data->error = -ENOMEDIUM;
+ if (mrq->stop)
+ mrq->stop->error = -ENOMEDIUM;
+
+ spin_unlock(&host->lock);
+ mmc_request_done(slot->mmc, mrq);
+ spin_lock(&host->lock);
+ }
+ }
+
+ /* Power down slot */
+ if (present == 0) {
+ if (host->pdata->setpower)
+ host->pdata->setpower(slot->id, 0);
+ clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+
+ /*
+ * Clear down the FIFO - doing so generates a
+ * block interrupt, hence setting the
+ * scatter-gather pointer to NULL.
+ */
+ host->sg = NULL;
+
+ ctrl = mci_readl(host, CTRL);
+ ctrl |= SDMMC_CTRL_FIFO_RESET;
+ mci_writel(host, CTRL, ctrl);
+
+#ifdef CONFIG_MMC_DW_IDMAC
+ ctrl = mci_readl(host, BMOD);
+ ctrl |= 0x01; /* Software reset of DMA */
+ mci_writel(host, BMOD, ctrl);
+#endif
+
+ }
+
+ spin_unlock(&host->lock);
+ present = dw_mci_get_cd(mmc);
+ }
+
+ mmc_detect_change(slot->mmc,
+ msecs_to_jiffies(host->pdata->detect_delay_ms));
+ }
+}
+
+static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id)
+{
+ struct mmc_host *mmc;
+ struct dw_mci_slot *slot;
+
+ mmc = mmc_alloc_host(sizeof(struct dw_mci_slot), &host->pdev->dev);
+ if (!mmc)
+ return -ENOMEM;
+
+ slot = mmc_priv(mmc);
+ slot->id = id;
+ slot->mmc = mmc;
+ slot->host = host;
+
+ mmc->ops = &dw_mci_ops;
+ mmc->f_min = DIV_ROUND_UP(host->bus_hz, 510);
+ mmc->f_max = host->bus_hz;
+
+ if (host->pdata->get_ocr)
+ mmc->ocr_avail = host->pdata->get_ocr(id);
+ else
+ mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+
+ /*
+ * Start with slot power disabled, it will be enabled when a card
+ * is detected.
+ */
+ if (host->pdata->setpower)
+ host->pdata->setpower(id, 0);
+
+ mmc->caps = 0;
+ if (host->pdata->get_bus_wd)
+ if (host->pdata->get_bus_wd(slot->id) >= 4)
+ mmc->caps |= MMC_CAP_4_BIT_DATA;
+
+ if (host->pdata->quirks & DW_MCI_QUIRK_HIGHSPEED)
+ mmc->caps |= MMC_CAP_SD_HIGHSPEED;
+
+#ifdef CONFIG_MMC_DW_IDMAC
+ mmc->max_segs = host->ring_size;
+ mmc->max_blk_size = 65536;
+ mmc->max_blk_count = host->ring_size;
+ mmc->max_seg_size = 0x1000;
+ mmc->max_req_size = mmc->max_seg_size * mmc->max_blk_count;
+#else
+ if (host->pdata->blk_settings) {
+ mmc->max_segs = host->pdata->blk_settings->max_segs;
+ mmc->max_blk_size = host->pdata->blk_settings->max_blk_size;
+ mmc->max_blk_count = host->pdata->blk_settings->max_blk_count;
+ mmc->max_req_size = host->pdata->blk_settings->max_req_size;
+ mmc->max_seg_size = host->pdata->blk_settings->max_seg_size;
+ } else {
+ /* Useful defaults if platform data is unset. */
+ mmc->max_segs = 64;
+ mmc->max_blk_size = 65536; /* BLKSIZ is 16 bits */
+ mmc->max_blk_count = 512;
+ mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+ mmc->max_seg_size = mmc->max_req_size;
+ }
+#endif /* CONFIG_MMC_DW_IDMAC */
+
+ if (dw_mci_get_cd(mmc))
+ set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+ else
+ clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+
+ host->slot[id] = slot;
+ mmc_add_host(mmc);
+
+#if defined(CONFIG_DEBUG_FS)
+ dw_mci_init_debugfs(slot);
+#endif
+
+ /* Card initially undetected */
+ slot->last_detect_state = 0;
+
+ return 0;
+}
+
+static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
+{
+ /* Shutdown detect IRQ */
+ if (slot->host->pdata->exit)
+ slot->host->pdata->exit(id);
+
+ /* Debugfs stuff is cleaned up by mmc core */
+ mmc_remove_host(slot->mmc);
+ slot->host->slot[id] = NULL;
+ mmc_free_host(slot->mmc);
+}
+
+static void dw_mci_init_dma(struct dw_mci *host)
+{
+ /* Alloc memory for sg translation */
+ host->sg_cpu = dma_alloc_coherent(&host->pdev->dev, PAGE_SIZE,
+ &host->sg_dma, GFP_KERNEL);
+ if (!host->sg_cpu) {
+ dev_err(&host->pdev->dev, "%s: could not alloc DMA memory\n",
+ __func__);
+ goto no_dma;
+ }
+
+ /* Determine which DMA interface to use */
+#ifdef CONFIG_MMC_DW_IDMAC
+ host->dma_ops = &dw_mci_idmac_ops;
+ dev_info(&host->pdev->dev, "Using internal DMA controller.\n");
+#endif
+
+ if (!host->dma_ops)
+ goto no_dma;
+
+ if (host->dma_ops->init) {
+ if (host->dma_ops->init(host)) {
+ dev_err(&host->pdev->dev, "%s: Unable to initialize "
+ "DMA Controller.\n", __func__);
+ goto no_dma;
+ }
+ } else {
+ dev_err(&host->pdev->dev, "DMA initialization not found.\n");
+ goto no_dma;
+ }
+
+ host->use_dma = 1;
+ return;
+
+no_dma:
+ dev_info(&host->pdev->dev, "Using PIO mode.\n");
+ host->use_dma = 0;
+ return;
+}
+
+static bool mci_wait_reset(struct device *dev, struct dw_mci *host)
+{
+ unsigned long timeout = jiffies + msecs_to_jiffies(500);
+ unsigned int ctrl;
+
+ mci_writel(host, CTRL, (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET |
+ SDMMC_CTRL_DMA_RESET));
+
+ /* wait till resets clear */
+ do {
+ ctrl = mci_readl(host, CTRL);
+ if (!(ctrl & (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET |
+ SDMMC_CTRL_DMA_RESET)))
+ return true;
+ } while (time_before(jiffies, timeout));
+
+ dev_err(dev, "Timeout resetting block (ctrl %#x)\n", ctrl);
+
+ return false;
+}
+
+static int dw_mci_probe(struct platform_device *pdev)
+{
+ struct dw_mci *host;
+ struct resource *regs;
+ struct dw_mci_board *pdata;
+ int irq, ret, i, width;
+ u32 fifo_size;
+
+ regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!regs)
+ return -ENXIO;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ host = kzalloc(sizeof(struct dw_mci), GFP_KERNEL);
+ if (!host)
+ return -ENOMEM;
+
+ host->pdev = pdev;
+ host->pdata = pdata = pdev->dev.platform_data;
+ if (!pdata || !pdata->init) {
+ dev_err(&pdev->dev,
+ "Platform data must supply init function\n");
+ ret = -ENODEV;
+ goto err_freehost;
+ }
+
+ if (!pdata->select_slot && pdata->num_slots > 1) {
+ dev_err(&pdev->dev,
+ "Platform data must supply select_slot function\n");
+ ret = -ENODEV;
+ goto err_freehost;
+ }
+
+ if (!pdata->bus_hz) {
+ dev_err(&pdev->dev,
+ "Platform data must supply bus speed\n");
+ ret = -ENODEV;
+ goto err_freehost;
+ }
+
+ host->bus_hz = pdata->bus_hz;
+ host->quirks = pdata->quirks;
+
+ spin_lock_init(&host->lock);
+ INIT_LIST_HEAD(&host->queue);
+
+ ret = -ENOMEM;
+ host->regs = ioremap(regs->start, regs->end - regs->start + 1);
+ if (!host->regs)
+ goto err_freehost;
+
+ host->dma_ops = pdata->dma_ops;
+ dw_mci_init_dma(host);
+
+ /*
+ * Get the host data width - this assumes that HCON has been set with
+ * the correct values.
+ */
+ i = (mci_readl(host, HCON) >> 7) & 0x7;
+ if (!i) {
+ host->push_data = dw_mci_push_data16;
+ host->pull_data = dw_mci_pull_data16;
+ width = 16;
+ host->data_shift = 1;
+ } else if (i == 2) {
+ host->push_data = dw_mci_push_data64;
+ host->pull_data = dw_mci_pull_data64;
+ width = 64;
+ host->data_shift = 3;
+ } else {
+ /* Check for a reserved value, and warn if it is */
+ WARN((i != 1),
+ "HCON reports a reserved host data width!\n"
+ "Defaulting to 32-bit access.\n");
+ host->push_data = dw_mci_push_data32;
+ host->pull_data = dw_mci_pull_data32;
+ width = 32;
+ host->data_shift = 2;
+ }
+
+ /* Reset all blocks */
+ if (!mci_wait_reset(&pdev->dev, host)) {
+ ret = -ENODEV;
+ goto err_dmaunmap;
+ }
+
+ /* Clear the interrupts for the host controller */
+ mci_writel(host, RINTSTS, 0xFFFFFFFF);
+ mci_writel(host, INTMASK, 0); /* disable all mmc interrupt first */
+
+ /* Put in max timeout */
+ mci_writel(host, TMOUT, 0xFFFFFFFF);
+
+ /*
+ * FIFO threshold settings RxMark = fifo_size / 2 - 1,
+ * Tx Mark = fifo_size / 2 DMA Size = 8
+ */
+ fifo_size = mci_readl(host, FIFOTH);
+ fifo_size = (fifo_size >> 16) & 0x7ff;
+ mci_writel(host, FIFOTH, ((0x2 << 28) | ((fifo_size/2 - 1) << 16) |
+ ((fifo_size/2) << 0)));
+
+ /* disable clock to CIU */
+ mci_writel(host, CLKENA, 0);
+ mci_writel(host, CLKSRC, 0);
+
+ tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
+ tasklet_init(&host->card_tasklet,
+ dw_mci_tasklet_card, (unsigned long)host);
+
+ ret = request_irq(irq, dw_mci_interrupt, 0, "dw-mci", host);
+ if (ret)
+ goto err_dmaunmap;
+
+ platform_set_drvdata(pdev, host);
+
+ if (host->pdata->num_slots)
+ host->num_slots = host->pdata->num_slots;
+ else
+ host->num_slots = ((mci_readl(host, HCON) >> 1) & 0x1F) + 1;
+
+ /* We need at least one slot to succeed */
+ for (i = 0; i < host->num_slots; i++) {
+ ret = dw_mci_init_slot(host, i);
+ if (ret) {
+ ret = -ENODEV;
+ goto err_init_slot;
+ }
+ }
+
+ /*
+ * Enable interrupts for command done, data over, data empty, card det,
+ * receive ready and error such as transmit, receive timeout, crc error
+ */
+ mci_writel(host, RINTSTS, 0xFFFFFFFF);
+ mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER |
+ SDMMC_INT_TXDR | SDMMC_INT_RXDR |
+ DW_MCI_ERROR_FLAGS | SDMMC_INT_CD);
+ mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */
+
+ dev_info(&pdev->dev, "DW MMC controller at irq %d, "
+ "%d bit host data width\n", irq, width);
+ if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO)
+ dev_info(&pdev->dev, "Internal DMAC interrupt fix enabled.\n");
+
+ return 0;
+
+err_init_slot:
+ /* De-init any initialized slots */
+ while (i > 0) {
+ if (host->slot[i])
+ dw_mci_cleanup_slot(host->slot[i], i);
+ i--;
+ }
+ free_irq(irq, host);
+
+err_dmaunmap:
+ if (host->use_dma && host->dma_ops->exit)
+ host->dma_ops->exit(host);
+ dma_free_coherent(&host->pdev->dev, PAGE_SIZE,
+ host->sg_cpu, host->sg_dma);
+ iounmap(host->regs);
+
+err_freehost:
+ kfree(host);
+ return ret;
+}
+
+static int __exit dw_mci_remove(struct platform_device *pdev)
+{
+ struct dw_mci *host = platform_get_drvdata(pdev);
+ int i;
+
+ mci_writel(host, RINTSTS, 0xFFFFFFFF);
+ mci_writel(host, INTMASK, 0); /* disable all mmc interrupt first */
+
+ platform_set_drvdata(pdev, NULL);
+
+ for (i = 0; i < host->num_slots; i++) {
+ dev_dbg(&pdev->dev, "remove slot %d\n", i);
+ if (host->slot[i])
+ dw_mci_cleanup_slot(host->slot[i], i);
+ }
+
+ /* disable clock to CIU */
+ mci_writel(host, CLKENA, 0);
+ mci_writel(host, CLKSRC, 0);
+
+ free_irq(platform_get_irq(pdev, 0), host);
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
+
+ if (host->use_dma && host->dma_ops->exit)
+ host->dma_ops->exit(host);
+
+ iounmap(host->regs);
+
+ kfree(host);
+ return 0;
+}
+
+#ifdef CONFIG_PM
+/*
+ * TODO: we should probably disable the clock to the card in the suspend path.
+ */
+static int dw_mci_suspend(struct platform_device *pdev, pm_message_t mesg)
+{
+ int i, ret;
+ struct dw_mci *host = platform_get_drvdata(pdev);
+
+ for (i = 0; i < host->num_slots; i++) {
+ struct dw_mci_slot *slot = host->slot[i];
+ if (!slot)
+ continue;
+ ret = mmc_suspend_host(slot->mmc);
+ if (ret < 0) {
+ while (--i >= 0) {
+ slot = host->slot[i];
+ if (slot)
+ mmc_resume_host(host->slot[i]->mmc);
+ }
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int dw_mci_resume(struct platform_device *pdev)
+{
+ int i, ret;
+ struct dw_mci *host = platform_get_drvdata(pdev);
+
+ for (i = 0; i < host->num_slots; i++) {
+ struct dw_mci_slot *slot = host->slot[i];
+ if (!slot)
+ continue;
+ ret = mmc_resume_host(host->slot[i]->mmc);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+#else
+#define dw_mci_suspend NULL
+#define dw_mci_resume NULL
+#endif /* CONFIG_PM */
+
+static struct platform_driver dw_mci_driver = {
+ .remove = __exit_p(dw_mci_remove),
+ .suspend = dw_mci_suspend,
+ .resume = dw_mci_resume,
+ .driver = {
+ .name = "dw_mmc",
+ },
+};
+
+static int __init dw_mci_init(void)
+{
+ return platform_driver_probe(&dw_mci_driver, dw_mci_probe);
+}
+
+static void __exit dw_mci_exit(void)
+{
+ platform_driver_unregister(&dw_mci_driver);
+}
+
+module_init(dw_mci_init);
+module_exit(dw_mci_exit);
+
+MODULE_DESCRIPTION("DW Multimedia Card Interface driver");
+MODULE_AUTHOR("NXP Semiconductor VietNam");
+MODULE_AUTHOR("Imagination Technologies Ltd");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
new file mode 100644
index 0000000..5dd55a7
--- /dev/null
+++ b/drivers/mmc/host/dw_mmc.h
@@ -0,0 +1,168 @@
+/*
+ * Synopsys DesignWare Multimedia Card Interface driver
+ * (Based on NXP driver for lpc 31xx)
+ *
+ * Copyright (C) 2009 NXP Semiconductors
+ * Copyright (C) 2009, 2010 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _DW_MMC_H_
+#define _DW_MMC_H_
+
+#define SDMMC_CTRL 0x000
+#define SDMMC_PWREN 0x004
+#define SDMMC_CLKDIV 0x008
+#define SDMMC_CLKSRC 0x00c
+#define SDMMC_CLKENA 0x010
+#define SDMMC_TMOUT 0x014
+#define SDMMC_CTYPE 0x018
+#define SDMMC_BLKSIZ 0x01c
+#define SDMMC_BYTCNT 0x020
+#define SDMMC_INTMASK 0x024
+#define SDMMC_CMDARG 0x028
+#define SDMMC_CMD 0x02c
+#define SDMMC_RESP0 0x030
+#define SDMMC_RESP1 0x034
+#define SDMMC_RESP2 0x038
+#define SDMMC_RESP3 0x03c
+#define SDMMC_MINTSTS 0x040
+#define SDMMC_RINTSTS 0x044
+#define SDMMC_STATUS 0x048
+#define SDMMC_FIFOTH 0x04c
+#define SDMMC_CDETECT 0x050
+#define SDMMC_WRTPRT 0x054
+#define SDMMC_GPIO 0x058
+#define SDMMC_TCBCNT 0x05c
+#define SDMMC_TBBCNT 0x060
+#define SDMMC_DEBNCE 0x064
+#define SDMMC_USRID 0x068
+#define SDMMC_VERID 0x06c
+#define SDMMC_HCON 0x070
+#define SDMMC_BMOD 0x080
+#define SDMMC_PLDMND 0x084
+#define SDMMC_DBADDR 0x088
+#define SDMMC_IDSTS 0x08c
+#define SDMMC_IDINTEN 0x090
+#define SDMMC_DSCADDR 0x094
+#define SDMMC_BUFADDR 0x098
+#define SDMMC_DATA 0x100
+#define SDMMC_DATA_ADR 0x100
+
+/* shift bit field */
+#define _SBF(f, v) ((v) << (f))
+
+/* Control register defines */
+#define SDMMC_CTRL_USE_IDMAC BIT(25)
+#define SDMMC_CTRL_CEATA_INT_EN BIT(11)
+#define SDMMC_CTRL_SEND_AS_CCSD BIT(10)
+#define SDMMC_CTRL_SEND_CCSD BIT(9)
+#define SDMMC_CTRL_ABRT_READ_DATA BIT(8)
+#define SDMMC_CTRL_SEND_IRQ_RESP BIT(7)
+#define SDMMC_CTRL_READ_WAIT BIT(6)
+#define SDMMC_CTRL_DMA_ENABLE BIT(5)
+#define SDMMC_CTRL_INT_ENABLE BIT(4)
+#define SDMMC_CTRL_DMA_RESET BIT(2)
+#define SDMMC_CTRL_FIFO_RESET BIT(1)
+#define SDMMC_CTRL_RESET BIT(0)
+/* Clock Enable register defines */
+#define SDMMC_CLKEN_LOW_PWR BIT(16)
+#define SDMMC_CLKEN_ENABLE BIT(0)
+/* time-out register defines */
+#define SDMMC_TMOUT_DATA(n) _SBF(8, (n))
+#define SDMMC_TMOUT_DATA_MSK 0xFFFFFF00
+#define SDMMC_TMOUT_RESP(n) ((n) & 0xFF)
+#define SDMMC_TMOUT_RESP_MSK 0xFF
+/* card-type register defines */
+#define SDMMC_CTYPE_8BIT BIT(16)
+#define SDMMC_CTYPE_4BIT BIT(0)
+#define SDMMC_CTYPE_1BIT 0
+/* Interrupt status & mask register defines */
+#define SDMMC_INT_SDIO BIT(16)
+#define SDMMC_INT_EBE BIT(15)
+#define SDMMC_INT_ACD BIT(14)
+#define SDMMC_INT_SBE BIT(13)
+#define SDMMC_INT_HLE BIT(12)
+#define SDMMC_INT_FRUN BIT(11)
+#define SDMMC_INT_HTO BIT(10)
+#define SDMMC_INT_DTO BIT(9)
+#define SDMMC_INT_RTO BIT(8)
+#define SDMMC_INT_DCRC BIT(7)
+#define SDMMC_INT_RCRC BIT(6)
+#define SDMMC_INT_RXDR BIT(5)
+#define SDMMC_INT_TXDR BIT(4)
+#define SDMMC_INT_DATA_OVER BIT(3)
+#define SDMMC_INT_CMD_DONE BIT(2)
+#define SDMMC_INT_RESP_ERR BIT(1)
+#define SDMMC_INT_CD BIT(0)
+#define SDMMC_INT_ERROR 0xbfc2
+/* Command register defines */
+#define SDMMC_CMD_START BIT(31)
+#define SDMMC_CMD_CCS_EXP BIT(23)
+#define SDMMC_CMD_CEATA_RD BIT(22)
+#define SDMMC_CMD_UPD_CLK BIT(21)
+#define SDMMC_CMD_INIT BIT(15)
+#define SDMMC_CMD_STOP BIT(14)
+#define SDMMC_CMD_PRV_DAT_WAIT BIT(13)
+#define SDMMC_CMD_SEND_STOP BIT(12)
+#define SDMMC_CMD_STRM_MODE BIT(11)
+#define SDMMC_CMD_DAT_WR BIT(10)
+#define SDMMC_CMD_DAT_EXP BIT(9)
+#define SDMMC_CMD_RESP_CRC BIT(8)
+#define SDMMC_CMD_RESP_LONG BIT(7)
+#define SDMMC_CMD_RESP_EXP BIT(6)
+#define SDMMC_CMD_INDX(n) ((n) & 0x1F)
+/* Status register defines */
+#define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FF)
+#define SDMMC_FIFO_SZ 32
+/* Internal DMAC interrupt defines */
+#define SDMMC_IDMAC_INT_AI BIT(9)
+#define SDMMC_IDMAC_INT_NI BIT(8)
+#define SDMMC_IDMAC_INT_CES BIT(5)
+#define SDMMC_IDMAC_INT_DU BIT(4)
+#define SDMMC_IDMAC_INT_FBE BIT(2)
+#define SDMMC_IDMAC_INT_RI BIT(1)
+#define SDMMC_IDMAC_INT_TI BIT(0)
+/* Internal DMAC bus mode bits */
+#define SDMMC_IDMAC_ENABLE BIT(7)
+#define SDMMC_IDMAC_FB BIT(1)
+#define SDMMC_IDMAC_SWRESET BIT(0)
+
+/* Register access macros */
+#define mci_readl(dev, reg) \
+ __raw_readl(dev->regs + SDMMC_##reg)
+#define mci_writel(dev, reg, value) \
+ __raw_writel((value), dev->regs + SDMMC_##reg)
+
+/* 16-bit FIFO access macros */
+#define mci_readw(dev, reg) \
+ __raw_readw(dev->regs + SDMMC_##reg)
+#define mci_writew(dev, reg, value) \
+ __raw_writew((value), dev->regs + SDMMC_##reg)
+
+/* 64-bit FIFO access macros */
+#ifdef readq
+#define mci_readq(dev, reg) \
+ __raw_readq(dev->regs + SDMMC_##reg)
+#define mci_writeq(dev, reg, value) \
+ __raw_writeq((value), dev->regs + SDMMC_##reg)
+#else
+/*
+ * Dummy readq implementation for architectures that don't define it.
+ *
+ * We would assume that none of these architectures would configure
+ * the IP block with a 64bit FIFO width, so this code will never be
+ * executed on those machines. Defining these macros here keeps the
+ * rest of the code free from ifdefs.
+ */
+#define mci_readq(dev, reg) \
+ (*(volatile u64 __force *)(dev->regs + SDMMC_##reg))
+#define mci_writeq(dev, reg, value) \
+ (*(volatile u64 __force *)(dev->regs + SDMMC_##reg) = value)
+#endif
+
+#endif /* _DW_MMC_H_ */
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index bdd2cbb..4428594 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -31,6 +31,7 @@
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/gpio.h>
+#include <linux/regulator/consumer.h>
#include <asm/dma.h>
#include <asm/irq.h>
@@ -141,10 +142,49 @@ struct mxcmci_host {
struct work_struct datawork;
spinlock_t lock;
+
+ struct regulator *vcc;
};
static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
+static inline void mxcmci_init_ocr(struct mxcmci_host *host)
+{
+ host->vcc = regulator_get(mmc_dev(host->mmc), "vmmc");
+
+ if (IS_ERR(host->vcc)) {
+ host->vcc = NULL;
+ } else {
+ host->mmc->ocr_avail = mmc_regulator_get_ocrmask(host->vcc);
+ if (host->pdata && host->pdata->ocr_avail)
+ dev_warn(mmc_dev(host->mmc),
+ "pdata->ocr_avail will not be used\n");
+ }
+
+ if (host->vcc == NULL) {
+ /* fall-back to platform data */
+ if (host->pdata && host->pdata->ocr_avail)
+ host->mmc->ocr_avail = host->pdata->ocr_avail;
+ else
+ host->mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+ }
+}
+
+static inline void mxcmci_set_power(struct mxcmci_host *host,
+ unsigned char power_mode,
+ unsigned int vdd)
+{
+ if (host->vcc) {
+ if (power_mode == MMC_POWER_UP)
+ mmc_regulator_set_ocr(host->mmc, host->vcc, vdd);
+ else if (power_mode == MMC_POWER_OFF)
+ mmc_regulator_set_ocr(host->mmc, host->vcc, 0);
+ }
+
+ if (host->pdata && host->pdata->setpower)
+ host->pdata->setpower(mmc_dev(host->mmc), vdd);
+}
+
static inline int mxcmci_use_dma(struct mxcmci_host *host)
{
return host->do_dma;
@@ -680,9 +720,9 @@ static void mxcmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
host->cmdat &= ~CMD_DAT_CONT_BUS_WIDTH_4;
if (host->power_mode != ios->power_mode) {
- if (host->pdata && host->pdata->setpower)
- host->pdata->setpower(mmc_dev(mmc), ios->vdd);
+ mxcmci_set_power(host, ios->power_mode, ios->vdd);
host->power_mode = ios->power_mode;
+
if (ios->power_mode == MMC_POWER_ON)
host->cmdat |= CMD_DAT_CONT_INIT;
}
@@ -807,10 +847,7 @@ static int mxcmci_probe(struct platform_device *pdev)
host->pdata = pdev->dev.platform_data;
spin_lock_init(&host->lock);
- if (host->pdata && host->pdata->ocr_avail)
- mmc->ocr_avail = host->pdata->ocr_avail;
- else
- mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+ mxcmci_init_ocr(host);
if (host->pdata && host->pdata->dat3_card_detect)
host->default_irq_mask =
@@ -915,6 +952,9 @@ static int mxcmci_remove(struct platform_device *pdev)
mmc_remove_host(mmc);
+ if (host->vcc)
+ regulator_put(host->vcc);
+
if (host->pdata && host->pdata->exit)
host->pdata->exit(&pdev->dev, mmc);
@@ -927,7 +967,6 @@ static int mxcmci_remove(struct platform_device *pdev)
clk_put(host->clk);
release_mem_region(host->res->start, resource_size(host->res));
- release_resource(host->res);
mmc_free_host(mmc);
diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
new file mode 100644
index 0000000..2aeef4f
--- /dev/null
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -0,0 +1,70 @@
+/*
+ * sdhci-dove.c Support for SDHCI on Marvell's Dove SoC
+ *
+ * Author: Saeed Bishara <saeed@marvell.com>
+ * Mike Rapoport <mike@compulab.co.il>
+ * Based on sdhci-cns3xxx.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/mmc/host.h>
+
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
+
+static u16 sdhci_dove_readw(struct sdhci_host *host, int reg)
+{
+ u16 ret;
+
+ switch (reg) {
+ case SDHCI_HOST_VERSION:
+ case SDHCI_SLOT_INT_STATUS:
+ /* those registers don't exist */
+ return 0;
+ default:
+ ret = readw(host->ioaddr + reg);
+ }
+ return ret;
+}
+
+static u32 sdhci_dove_readl(struct sdhci_host *host, int reg)
+{
+ u32 ret;
+
+ switch (reg) {
+ case SDHCI_CAPABILITIES:
+ ret = readl(host->ioaddr + reg);
+ /* Mask the support for 3.0V */
+ ret &= ~SDHCI_CAN_VDD_300;
+ break;
+ default:
+ ret = readl(host->ioaddr + reg);
+ }
+ return ret;
+}
+
+static struct sdhci_ops sdhci_dove_ops = {
+ .read_w = sdhci_dove_readw,
+ .read_l = sdhci_dove_readl,
+};
+
+struct sdhci_pltfm_data sdhci_dove_pdata = {
+ .ops = &sdhci_dove_ops,
+ .quirks = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER |
+ SDHCI_QUIRK_NO_BUSY_IRQ |
+ SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
+ SDHCI_QUIRK_FORCE_DMA,
+};
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 3d9c246..0dc905b 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -176,6 +176,74 @@ static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc_sdio = {
.quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
};
+/* O2Micro extra registers */
+#define O2_SD_LOCK_WP 0xD3
+#define O2_SD_MULTI_VCC3V 0xEE
+#define O2_SD_CLKREQ 0xEC
+#define O2_SD_CAPS 0xE0
+#define O2_SD_ADMA1 0xE2
+#define O2_SD_ADMA2 0xE7
+#define O2_SD_INF_MOD 0xF1
+
+static int o2_probe(struct sdhci_pci_chip *chip)
+{
+ int ret;
+ u8 scratch;
+
+ switch (chip->pdev->device) {
+ case PCI_DEVICE_ID_O2_8220:
+ case PCI_DEVICE_ID_O2_8221:
+ case PCI_DEVICE_ID_O2_8320:
+ case PCI_DEVICE_ID_O2_8321:
+ /* This extra setup is required due to broken ADMA. */
+ ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
+ if (ret)
+ return ret;
+ scratch &= 0x7f;
+ pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
+
+ /* Set Multi 3 to VCC3V# */
+ pci_write_config_byte(chip->pdev, O2_SD_MULTI_VCC3V, 0x08);
+
+ /* Disable CLK_REQ# support after media DET */
+ ret = pci_read_config_byte(chip->pdev, O2_SD_CLKREQ, &scratch);
+ if (ret)
+ return ret;
+ scratch |= 0x20;
+ pci_write_config_byte(chip->pdev, O2_SD_CLKREQ, scratch);
+
+ /* Choose capabilities, enable SDMA. We have to write 0x01
+ * to the capabilities register first to unlock it.
+ */
+ ret = pci_read_config_byte(chip->pdev, O2_SD_CAPS, &scratch);
+ if (ret)
+ return ret;
+ scratch |= 0x01;
+ pci_write_config_byte(chip->pdev, O2_SD_CAPS, scratch);
+ pci_write_config_byte(chip->pdev, O2_SD_CAPS, 0x73);
+
+ /* Disable ADMA1/2 */
+ pci_write_config_byte(chip->pdev, O2_SD_ADMA1, 0x39);
+ pci_write_config_byte(chip->pdev, O2_SD_ADMA2, 0x08);
+
+ /* Disable the infinite transfer mode */
+ ret = pci_read_config_byte(chip->pdev, O2_SD_INF_MOD, &scratch);
+ if (ret)
+ return ret;
+ scratch |= 0x08;
+ pci_write_config_byte(chip->pdev, O2_SD_INF_MOD, scratch);
+
+ /* Lock WP */
+ ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
+ if (ret)
+ return ret;
+ scratch |= 0x80;
+ pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
+ }
+
+ return 0;
+}
+
static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
{
u8 scratch;
@@ -204,6 +272,7 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
static int jmicron_probe(struct sdhci_pci_chip *chip)
{
int ret;
+ u16 mmcdev = 0;
if (chip->pdev->revision == 0) {
chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR |
@@ -225,12 +294,17 @@ static int jmicron_probe(struct sdhci_pci_chip *chip)
* 2. The MMC interface has a lower subfunction number
* than the SD interface.
*/
- if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD) {
+ if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD)
+ mmcdev = PCI_DEVICE_ID_JMICRON_JMB38X_MMC;
+ else if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_SD)
+ mmcdev = PCI_DEVICE_ID_JMICRON_JMB388_ESD;
+
+ if (mmcdev) {
struct pci_dev *sd_dev;
sd_dev = NULL;
while ((sd_dev = pci_get_device(PCI_VENDOR_ID_JMICRON,
- PCI_DEVICE_ID_JMICRON_JMB38X_MMC, sd_dev)) != NULL) {
+ mmcdev, sd_dev)) != NULL) {
if ((PCI_SLOT(chip->pdev->devfn) ==
PCI_SLOT(sd_dev->devfn)) &&
(chip->pdev->bus == sd_dev->bus))
@@ -290,13 +364,25 @@ static int jmicron_probe_slot(struct sdhci_pci_slot *slot)
slot->host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
}
+ /* JM388 MMC doesn't support 1.8V while SD supports it */
+ if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
+ slot->host->ocr_avail_sd = MMC_VDD_32_33 | MMC_VDD_33_34 |
+ MMC_VDD_29_30 | MMC_VDD_30_31 |
+ MMC_VDD_165_195; /* allow 1.8V */
+ slot->host->ocr_avail_mmc = MMC_VDD_32_33 | MMC_VDD_33_34 |
+ MMC_VDD_29_30 | MMC_VDD_30_31; /* no 1.8V for MMC */
+ }
+
/*
* The secondary interface requires a bit set to get the
* interrupts.
*/
- if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC)
+ if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+ slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD)
jmicron_enable_mmc(slot->host, 1);
+ slot->host->mmc->caps |= MMC_CAP_BUS_WIDTH_TEST;
+
return 0;
}
@@ -305,7 +391,8 @@ static void jmicron_remove_slot(struct sdhci_pci_slot *slot, int dead)
if (dead)
return;
- if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC)
+ if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+ slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD)
jmicron_enable_mmc(slot->host, 0);
}
@@ -313,7 +400,8 @@ static int jmicron_suspend(struct sdhci_pci_chip *chip, pm_message_t state)
{
int i;
- if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) {
+ if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+ chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
for (i = 0;i < chip->num_slots;i++)
jmicron_enable_mmc(chip->slots[i]->host, 0);
}
@@ -325,7 +413,8 @@ static int jmicron_resume(struct sdhci_pci_chip *chip)
{
int ret, i;
- if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) {
+ if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+ chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
for (i = 0;i < chip->num_slots;i++)
jmicron_enable_mmc(chip->slots[i]->host, 1);
}
@@ -339,6 +428,10 @@ static int jmicron_resume(struct sdhci_pci_chip *chip)
return 0;
}
+static const struct sdhci_pci_fixes sdhci_o2 = {
+ .probe = o2_probe,
+};
+
static const struct sdhci_pci_fixes sdhci_jmicron = {
.probe = jmicron_probe,
@@ -510,6 +603,22 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
},
{
+ .vendor = PCI_VENDOR_ID_JMICRON,
+ .device = PCI_DEVICE_ID_JMICRON_JMB388_SD,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)&sdhci_jmicron,
+ },
+
+ {
+ .vendor = PCI_VENDOR_ID_JMICRON,
+ .device = PCI_DEVICE_ID_JMICRON_JMB388_ESD,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)&sdhci_jmicron,
+ },
+
+ {
.vendor = PCI_VENDOR_ID_SYSKONNECT,
.device = 0x8000,
.subvendor = PCI_ANY_ID,
@@ -589,6 +698,46 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
.driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
},
+ {
+ .vendor = PCI_VENDOR_ID_O2,
+ .device = PCI_DEVICE_ID_O2_8120,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)&sdhci_o2,
+ },
+
+ {
+ .vendor = PCI_VENDOR_ID_O2,
+ .device = PCI_DEVICE_ID_O2_8220,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)&sdhci_o2,
+ },
+
+ {
+ .vendor = PCI_VENDOR_ID_O2,
+ .device = PCI_DEVICE_ID_O2_8221,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)&sdhci_o2,
+ },
+
+ {
+ .vendor = PCI_VENDOR_ID_O2,
+ .device = PCI_DEVICE_ID_O2_8320,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)&sdhci_o2,
+ },
+
+ {
+ .vendor = PCI_VENDOR_ID_O2,
+ .device = PCI_DEVICE_ID_O2_8321,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .driver_data = (kernel_ulong_t)&sdhci_o2,
+ },
+
{ /* Generic SD host controller */
PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)
},
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 0502f89..dbab040 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -170,6 +170,12 @@ static const struct platform_device_id sdhci_pltfm_ids[] = {
#ifdef CONFIG_MMC_SDHCI_ESDHC_IMX
{ "sdhci-esdhc-imx", (kernel_ulong_t)&sdhci_esdhc_imx_pdata },
#endif
+#ifdef CONFIG_MMC_SDHCI_DOVE
+ { "sdhci-dove", (kernel_ulong_t)&sdhci_dove_pdata },
+#endif
+#ifdef CONFIG_MMC_SDHCI_TEGRA
+ { "sdhci-tegra", (kernel_ulong_t)&sdhci_tegra_pdata },
+#endif
{ },
};
MODULE_DEVICE_TABLE(platform, sdhci_pltfm_ids);
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index c1bfe48..ea2e44d 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -22,5 +22,7 @@ struct sdhci_pltfm_host {
extern struct sdhci_pltfm_data sdhci_cns3xxx_pdata;
extern struct sdhci_pltfm_data sdhci_esdhc_imx_pdata;
+extern struct sdhci_pltfm_data sdhci_dove_pdata;
+extern struct sdhci_pltfm_data sdhci_tegra_pdata;
#endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index aacb862..1720358 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -130,6 +130,15 @@ static unsigned int sdhci_s3c_consider_clock(struct sdhci_s3c *ourhost,
if (!clksrc)
return UINT_MAX;
+ /*
+ * Clock divider's step is different as 1 from that of host controller
+ * when 'clk_type' is S3C_SDHCI_CLK_DIV_EXTERNAL.
+ */
+ if (ourhost->pdata->clk_type) {
+ rate = clk_round_rate(clksrc, wanted);
+ return wanted - rate;
+ }
+
rate = clk_get_rate(clksrc);
for (div = 1; div < 256; div *= 2) {
@@ -232,6 +241,42 @@ static unsigned int sdhci_s3c_get_min_clock(struct sdhci_host *host)
return min;
}
+/* sdhci_cmu_get_max_clk - callback to get maximum clock frequency.*/
+static unsigned int sdhci_cmu_get_max_clock(struct sdhci_host *host)
+{
+ struct sdhci_s3c *ourhost = to_s3c(host);
+
+ return clk_round_rate(ourhost->clk_bus[ourhost->cur_clk], UINT_MAX);
+}
+
+/* sdhci_cmu_get_min_clock - callback to get minimal supported clock value. */
+static unsigned int sdhci_cmu_get_min_clock(struct sdhci_host *host)
+{
+ struct sdhci_s3c *ourhost = to_s3c(host);
+
+ /*
+ * initial clock can be in the frequency range of
+ * 100KHz-400KHz, so we set it as max value.
+ */
+ return clk_round_rate(ourhost->clk_bus[ourhost->cur_clk], 400000);
+}
+
+/* sdhci_cmu_set_clock - callback on clock change.*/
+static void sdhci_cmu_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+ struct sdhci_s3c *ourhost = to_s3c(host);
+
+ /* don't bother if the clock is going off */
+ if (clock == 0)
+ return;
+
+ sdhci_s3c_set_clock(host, clock);
+
+ clk_set_rate(ourhost->clk_bus[ourhost->cur_clk], clock);
+
+ host->clock = clock;
+}
+
static struct sdhci_ops sdhci_s3c_ops = {
.get_max_clock = sdhci_s3c_get_max_clk,
.set_clock = sdhci_s3c_set_clock,
@@ -361,6 +406,13 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev)
clks++;
sc->clk_bus[ptr] = clk;
+
+ /*
+ * save current clock index to know which clock bus
+ * is used later in overriding functions.
+ */
+ sc->cur_clk = ptr;
+
clk_enable(clk);
dev_info(dev, "clock source %d: %s (%ld Hz)\n",
@@ -427,6 +479,20 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev)
/* HSMMC on Samsung SoCs uses SDCLK as timeout clock */
host->quirks |= SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK;
+ /*
+ * If controller does not have internal clock divider,
+ * we can use overriding functions instead of default.
+ */
+ if (pdata->clk_type) {
+ sdhci_s3c_ops.set_clock = sdhci_cmu_set_clock;
+ sdhci_s3c_ops.get_min_clock = sdhci_cmu_get_min_clock;
+ sdhci_s3c_ops.get_max_clock = sdhci_cmu_get_max_clock;
+ }
+
+ /* It supports additional host capabilities if needed */
+ if (pdata->host_caps)
+ host->mmc->caps |= pdata->host_caps;
+
ret = sdhci_add_host(host);
if (ret) {
dev_err(dev, "sdhci_add_host() failed\n");
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
new file mode 100644
index 0000000..4823ee9
--- /dev/null
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/host.h>
+
+#include <mach/gpio.h>
+#include <mach/sdhci.h>
+
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
+
+static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg)
+{
+ u32 val;
+
+ if (unlikely(reg == SDHCI_PRESENT_STATE)) {
+ /* Use wp_gpio here instead? */
+ val = readl(host->ioaddr + reg);
+ return val | SDHCI_WRITE_PROTECT;
+ }
+
+ return readl(host->ioaddr + reg);
+}
+
+static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
+{
+ if (unlikely(reg == SDHCI_HOST_VERSION)) {
+ /* Erratum: Version register is invalid in HW. */
+ return SDHCI_SPEC_200;
+ }
+
+ return readw(host->ioaddr + reg);
+}
+
+static void tegra_sdhci_writel(struct sdhci_host *host, u32 val, int reg)
+{
+ /* Seems like we're getting spurious timeout and crc errors, so
+ * disable signalling of them. In case of real errors software
+ * timers should take care of eventually detecting them.
+ */
+ if (unlikely(reg == SDHCI_SIGNAL_ENABLE))
+ val &= ~(SDHCI_INT_TIMEOUT|SDHCI_INT_CRC);
+
+ writel(val, host->ioaddr + reg);
+
+ if (unlikely(reg == SDHCI_INT_ENABLE)) {
+ /* Erratum: Must enable block gap interrupt detection */
+ u8 gap_ctrl = readb(host->ioaddr + SDHCI_BLOCK_GAP_CONTROL);
+ if (val & SDHCI_INT_CARD_INT)
+ gap_ctrl |= 0x8;
+ else
+ gap_ctrl &= ~0x8;
+ writeb(gap_ctrl, host->ioaddr + SDHCI_BLOCK_GAP_CONTROL);
+ }
+}
+
+static unsigned int tegra_sdhci_get_ro(struct sdhci_host *sdhci)
+{
+ struct platform_device *pdev = to_platform_device(mmc_dev(sdhci->mmc));
+ struct tegra_sdhci_platform_data *plat;
+
+ plat = pdev->dev.platform_data;
+
+ if (!gpio_is_valid(plat->wp_gpio))
+ return -1;
+
+ return gpio_get_value(plat->wp_gpio);
+}
+
+static irqreturn_t carddetect_irq(int irq, void *data)
+{
+ struct sdhci_host *sdhost = (struct sdhci_host *)data;
+
+ tasklet_schedule(&sdhost->card_tasklet);
+ return IRQ_HANDLED;
+};
+
+static int tegra_sdhci_8bit(struct sdhci_host *host, int bus_width)
+{
+ struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
+ struct tegra_sdhci_platform_data *plat;
+ u32 ctrl;
+
+ plat = pdev->dev.platform_data;
+
+ ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+ if (plat->is_8bit && bus_width == MMC_BUS_WIDTH_8) {
+ ctrl &= ~SDHCI_CTRL_4BITBUS;
+ ctrl |= SDHCI_CTRL_8BITBUS;
+ } else {
+ ctrl &= ~SDHCI_CTRL_8BITBUS;
+ if (bus_width == MMC_BUS_WIDTH_4)
+ ctrl |= SDHCI_CTRL_4BITBUS;
+ else
+ ctrl &= ~SDHCI_CTRL_4BITBUS;
+ }
+ sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+ return 0;
+}
+
+
+static int tegra_sdhci_pltfm_init(struct sdhci_host *host,
+ struct sdhci_pltfm_data *pdata)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
+ struct tegra_sdhci_platform_data *plat;
+ struct clk *clk;
+ int rc;
+
+ plat = pdev->dev.platform_data;
+ if (plat == NULL) {
+ dev_err(mmc_dev(host->mmc), "missing platform data\n");
+ return -ENXIO;
+ }
+
+ if (gpio_is_valid(plat->power_gpio)) {
+ rc = gpio_request(plat->power_gpio, "sdhci_power");
+ if (rc) {
+ dev_err(mmc_dev(host->mmc),
+ "failed to allocate power gpio\n");
+ goto out;
+ }
+ tegra_gpio_enable(plat->power_gpio);
+ gpio_direction_output(plat->power_gpio, 1);
+ }
+
+ if (gpio_is_valid(plat->cd_gpio)) {
+ rc = gpio_request(plat->cd_gpio, "sdhci_cd");
+ if (rc) {
+ dev_err(mmc_dev(host->mmc),
+ "failed to allocate cd gpio\n");
+ goto out_power;
+ }
+ tegra_gpio_enable(plat->cd_gpio);
+ gpio_direction_input(plat->cd_gpio);
+
+ rc = request_irq(gpio_to_irq(plat->cd_gpio), carddetect_irq,
+ IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+ mmc_hostname(host->mmc), host);
+
+ if (rc) {
+ dev_err(mmc_dev(host->mmc), "request irq error\n");
+ goto out_cd;
+ }
+
+ }
+
+ if (gpio_is_valid(plat->wp_gpio)) {
+ rc = gpio_request(plat->wp_gpio, "sdhci_wp");
+ if (rc) {
+ dev_err(mmc_dev(host->mmc),
+ "failed to allocate wp gpio\n");
+ goto out_cd;
+ }
+ tegra_gpio_enable(plat->wp_gpio);
+ gpio_direction_input(plat->wp_gpio);
+ }
+
+ clk = clk_get(mmc_dev(host->mmc), NULL);
+ if (IS_ERR(clk)) {
+ dev_err(mmc_dev(host->mmc), "clk err\n");
+ rc = PTR_ERR(clk);
+ goto out_wp;
+ }
+ clk_enable(clk);
+ pltfm_host->clk = clk;
+
+ if (plat->is_8bit)
+ host->mmc->caps |= MMC_CAP_8_BIT_DATA;
+
+ return 0;
+
+out_wp:
+ if (gpio_is_valid(plat->wp_gpio)) {
+ tegra_gpio_disable(plat->wp_gpio);
+ gpio_free(plat->wp_gpio);
+ }
+
+out_cd:
+ if (gpio_is_valid(plat->cd_gpio)) {
+ tegra_gpio_disable(plat->cd_gpio);
+ gpio_free(plat->cd_gpio);
+ }
+
+out_power:
+ if (gpio_is_valid(plat->power_gpio)) {
+ tegra_gpio_disable(plat->power_gpio);
+ gpio_free(plat->power_gpio);
+ }
+
+out:
+ return rc;
+}
+
+static void tegra_sdhci_pltfm_exit(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
+ struct tegra_sdhci_platform_data *plat;
+
+ plat = pdev->dev.platform_data;
+
+ if (gpio_is_valid(plat->wp_gpio)) {
+ tegra_gpio_disable(plat->wp_gpio);
+ gpio_free(plat->wp_gpio);
+ }
+
+ if (gpio_is_valid(plat->cd_gpio)) {
+ tegra_gpio_disable(plat->cd_gpio);
+ gpio_free(plat->cd_gpio);
+ }
+
+ if (gpio_is_valid(plat->power_gpio)) {
+ tegra_gpio_disable(plat->power_gpio);
+ gpio_free(plat->power_gpio);
+ }
+
+ clk_disable(pltfm_host->clk);
+ clk_put(pltfm_host->clk);
+}
+
+static struct sdhci_ops tegra_sdhci_ops = {
+ .get_ro = tegra_sdhci_get_ro,
+ .read_l = tegra_sdhci_readl,
+ .read_w = tegra_sdhci_readw,
+ .write_l = tegra_sdhci_writel,
+ .platform_8bit_width = tegra_sdhci_8bit,
+};
+
+struct sdhci_pltfm_data sdhci_tegra_pdata = {
+ .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
+ SDHCI_QUIRK_SINGLE_POWER_WRITE |
+ SDHCI_QUIRK_NO_HISPD_BIT |
+ SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+ .ops = &tegra_sdhci_ops,
+ .init = tegra_sdhci_pltfm_init,
+ .exit = tegra_sdhci_pltfm_exit,
+};
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a25db42..9e15f41 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -23,6 +23,7 @@
#include <linux/leds.h>
+#include <linux/mmc/mmc.h>
#include <linux/mmc/host.h>
#include "sdhci.h"
@@ -77,8 +78,11 @@ static void sdhci_dumpregs(struct sdhci_host *host)
printk(KERN_DEBUG DRIVER_NAME ": AC12 err: 0x%08x | Slot int: 0x%08x\n",
sdhci_readw(host, SDHCI_ACMD12_ERR),
sdhci_readw(host, SDHCI_SLOT_INT_STATUS));
- printk(KERN_DEBUG DRIVER_NAME ": Caps: 0x%08x | Max curr: 0x%08x\n",
+ printk(KERN_DEBUG DRIVER_NAME ": Caps: 0x%08x | Caps_1: 0x%08x\n",
sdhci_readl(host, SDHCI_CAPABILITIES),
+ sdhci_readl(host, SDHCI_CAPABILITIES_1));
+ printk(KERN_DEBUG DRIVER_NAME ": Cmd: 0x%08x | Max curr: 0x%08x\n",
+ sdhci_readw(host, SDHCI_COMMAND),
sdhci_readl(host, SDHCI_MAX_CURRENT));
if (host->flags & SDHCI_USE_ADMA)
@@ -1518,7 +1522,11 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
if (intmask & SDHCI_INT_DATA_TIMEOUT)
host->data->error = -ETIMEDOUT;
- else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT))
+ else if (intmask & SDHCI_INT_DATA_END_BIT)
+ host->data->error = -EILSEQ;
+ else if ((intmask & SDHCI_INT_DATA_CRC) &&
+ SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
+ != MMC_BUS_TEST_R)
host->data->error = -EILSEQ;
else if (intmask & SDHCI_INT_ADMA_ERROR) {
printk(KERN_ERR "%s: ADMA error\n", mmc_hostname(host->mmc));
@@ -1736,7 +1744,7 @@ EXPORT_SYMBOL_GPL(sdhci_alloc_host);
int sdhci_add_host(struct sdhci_host *host)
{
struct mmc_host *mmc;
- unsigned int caps;
+ unsigned int caps, ocr_avail;
int ret;
WARN_ON(host == NULL);
@@ -1890,13 +1898,26 @@ int sdhci_add_host(struct sdhci_host *host)
mmc_card_is_removable(mmc))
mmc->caps |= MMC_CAP_NEEDS_POLL;
- mmc->ocr_avail = 0;
+ ocr_avail = 0;
if (caps & SDHCI_CAN_VDD_330)
- mmc->ocr_avail |= MMC_VDD_32_33|MMC_VDD_33_34;
+ ocr_avail |= MMC_VDD_32_33 | MMC_VDD_33_34;
if (caps & SDHCI_CAN_VDD_300)
- mmc->ocr_avail |= MMC_VDD_29_30|MMC_VDD_30_31;
+ ocr_avail |= MMC_VDD_29_30 | MMC_VDD_30_31;
if (caps & SDHCI_CAN_VDD_180)
- mmc->ocr_avail |= MMC_VDD_165_195;
+ ocr_avail |= MMC_VDD_165_195;
+
+ mmc->ocr_avail = ocr_avail;
+ mmc->ocr_avail_sdio = ocr_avail;
+ if (host->ocr_avail_sdio)
+ mmc->ocr_avail_sdio &= host->ocr_avail_sdio;
+ mmc->ocr_avail_sd = ocr_avail;
+ if (host->ocr_avail_sd)
+ mmc->ocr_avail_sd &= host->ocr_avail_sd;
+ else /* normal SD controllers don't support 1.8V */
+ mmc->ocr_avail_sd &= ~MMC_VDD_165_195;
+ mmc->ocr_avail_mmc = ocr_avail;
+ if (host->ocr_avail_mmc)
+ mmc->ocr_avail_mmc &= host->ocr_avail_mmc;
if (mmc->ocr_avail == 0) {
printk(KERN_ERR "%s: Hardware doesn't report any "
@@ -1928,10 +1949,14 @@ int sdhci_add_host(struct sdhci_host *host)
* of bytes. When doing hardware scatter/gather, each entry cannot
* be larger than 64 KiB though.
*/
- if (host->flags & SDHCI_USE_ADMA)
- mmc->max_seg_size = 65536;
- else
+ if (host->flags & SDHCI_USE_ADMA) {
+ if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC)
+ mmc->max_seg_size = 65535;
+ else
+ mmc->max_seg_size = 65536;
+ } else {
mmc->max_seg_size = mmc->max_req_size;
+ }
/*
* Maximum block size. This varies from controller to controller and
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index e42d7f0..6e0969e 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -52,6 +52,7 @@
#define SDHCI_CMD_RESP_SHORT_BUSY 0x03
#define SDHCI_MAKE_CMD(c, f) (((c & 0xff) << 8) | (f & 0xff))
+#define SDHCI_GET_CMD(c) ((c>>8) & 0x3f)
#define SDHCI_RESPONSE 0x10
@@ -165,7 +166,7 @@
#define SDHCI_CAN_VDD_180 0x04000000
#define SDHCI_CAN_64BIT 0x10000000
-/* 44-47 reserved for more caps */
+#define SDHCI_CAPABILITIES_1 0x44
#define SDHCI_MAX_CURRENT 0x48
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index e7765a8..e3c6ef2 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -25,16 +25,261 @@
* double buffer support
*
*/
-#include <linux/module.h>
-#include <linux/irq.h>
-#include <linux/device.h>
+
#include <linux/delay.h>
+#include <linux/device.h>
#include <linux/dmaengine.h>
-#include <linux/mmc/host.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
#include <linux/mfd/core.h>
#include <linux/mfd/tmio.h>
+#include <linux/mmc/host.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+#define CTL_SD_CMD 0x00
+#define CTL_ARG_REG 0x04
+#define CTL_STOP_INTERNAL_ACTION 0x08
+#define CTL_XFER_BLK_COUNT 0xa
+#define CTL_RESPONSE 0x0c
+#define CTL_STATUS 0x1c
+#define CTL_IRQ_MASK 0x20
+#define CTL_SD_CARD_CLK_CTL 0x24
+#define CTL_SD_XFER_LEN 0x26
+#define CTL_SD_MEM_CARD_OPT 0x28
+#define CTL_SD_ERROR_DETAIL_STATUS 0x2c
+#define CTL_SD_DATA_PORT 0x30
+#define CTL_TRANSACTION_CTL 0x34
+#define CTL_SDIO_STATUS 0x36
+#define CTL_SDIO_IRQ_MASK 0x38
+#define CTL_RESET_SD 0xe0
+#define CTL_SDIO_REGS 0x100
+#define CTL_CLK_AND_WAIT_CTL 0x138
+#define CTL_RESET_SDIO 0x1e0
+
+/* Definitions for values the CTRL_STATUS register can take. */
+#define TMIO_STAT_CMDRESPEND 0x00000001
+#define TMIO_STAT_DATAEND 0x00000004
+#define TMIO_STAT_CARD_REMOVE 0x00000008
+#define TMIO_STAT_CARD_INSERT 0x00000010
+#define TMIO_STAT_SIGSTATE 0x00000020
+#define TMIO_STAT_WRPROTECT 0x00000080
+#define TMIO_STAT_CARD_REMOVE_A 0x00000100
+#define TMIO_STAT_CARD_INSERT_A 0x00000200
+#define TMIO_STAT_SIGSTATE_A 0x00000400
+#define TMIO_STAT_CMD_IDX_ERR 0x00010000
+#define TMIO_STAT_CRCFAIL 0x00020000
+#define TMIO_STAT_STOPBIT_ERR 0x00040000
+#define TMIO_STAT_DATATIMEOUT 0x00080000
+#define TMIO_STAT_RXOVERFLOW 0x00100000
+#define TMIO_STAT_TXUNDERRUN 0x00200000
+#define TMIO_STAT_CMDTIMEOUT 0x00400000
+#define TMIO_STAT_RXRDY 0x01000000
+#define TMIO_STAT_TXRQ 0x02000000
+#define TMIO_STAT_ILL_FUNC 0x20000000
+#define TMIO_STAT_CMD_BUSY 0x40000000
+#define TMIO_STAT_ILL_ACCESS 0x80000000
+
+/* Definitions for values the CTRL_SDIO_STATUS register can take. */
+#define TMIO_SDIO_STAT_IOIRQ 0x0001
+#define TMIO_SDIO_STAT_EXPUB52 0x4000
+#define TMIO_SDIO_STAT_EXWT 0x8000
+#define TMIO_SDIO_MASK_ALL 0xc007
+
+/* Define some IRQ masks */
+/* This is the mask used at reset by the chip */
+#define TMIO_MASK_ALL 0x837f031d
+#define TMIO_MASK_READOP (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND)
+#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND)
+#define TMIO_MASK_CMD (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT | \
+ TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT)
+#define TMIO_MASK_IRQ (TMIO_MASK_READOP | TMIO_MASK_WRITEOP | TMIO_MASK_CMD)
+
+#define enable_mmc_irqs(host, i) \
+ do { \
+ u32 mask;\
+ mask = sd_ctrl_read32((host), CTL_IRQ_MASK); \
+ mask &= ~((i) & TMIO_MASK_IRQ); \
+ sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
+ } while (0)
+
+#define disable_mmc_irqs(host, i) \
+ do { \
+ u32 mask;\
+ mask = sd_ctrl_read32((host), CTL_IRQ_MASK); \
+ mask |= ((i) & TMIO_MASK_IRQ); \
+ sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
+ } while (0)
+
+#define ack_mmc_irqs(host, i) \
+ do { \
+ sd_ctrl_write32((host), CTL_STATUS, ~(i)); \
+ } while (0)
+
+/* This is arbitrary, just noone needed any higher alignment yet */
+#define MAX_ALIGN 4
+
+struct tmio_mmc_host {
+ void __iomem *ctl;
+ unsigned long bus_shift;
+ struct mmc_command *cmd;
+ struct mmc_request *mrq;
+ struct mmc_data *data;
+ struct mmc_host *mmc;
+ int irq;
+ unsigned int sdio_irq_enabled;
+
+ /* Callbacks for clock / power control */
+ void (*set_pwr)(struct platform_device *host, int state);
+ void (*set_clk_div)(struct platform_device *host, int state);
+
+ /* pio related stuff */
+ struct scatterlist *sg_ptr;
+ struct scatterlist *sg_orig;
+ unsigned int sg_len;
+ unsigned int sg_off;
+
+ struct platform_device *pdev;
+
+ /* DMA support */
+ struct dma_chan *chan_rx;
+ struct dma_chan *chan_tx;
+ struct tasklet_struct dma_complete;
+ struct tasklet_struct dma_issue;
+#ifdef CONFIG_TMIO_MMC_DMA
+ unsigned int dma_sglen;
+ u8 bounce_buf[PAGE_CACHE_SIZE] __attribute__((aligned(MAX_ALIGN)));
+ struct scatterlist bounce_sg;
+#endif
+
+ /* Track lost interrupts */
+ struct delayed_work delayed_reset_work;
+ spinlock_t lock;
+ unsigned long last_req_ts;
+};
+
+static void tmio_check_bounce_buffer(struct tmio_mmc_host *host);
+
+static u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
+{
+ return readw(host->ctl + (addr << host->bus_shift));
+}
+
+static void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
+ u16 *buf, int count)
+{
+ readsw(host->ctl + (addr << host->bus_shift), buf, count);
+}
-#include "tmio_mmc.h"
+static u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
+{
+ return readw(host->ctl + (addr << host->bus_shift)) |
+ readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
+}
+
+static void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val)
+{
+ writew(val, host->ctl + (addr << host->bus_shift));
+}
+
+static void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
+ u16 *buf, int count)
+{
+ writesw(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
+static void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val)
+{
+ writew(val, host->ctl + (addr << host->bus_shift));
+ writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
+}
+
+static void tmio_mmc_init_sg(struct tmio_mmc_host *host, struct mmc_data *data)
+{
+ host->sg_len = data->sg_len;
+ host->sg_ptr = data->sg;
+ host->sg_orig = data->sg;
+ host->sg_off = 0;
+}
+
+static int tmio_mmc_next_sg(struct tmio_mmc_host *host)
+{
+ host->sg_ptr = sg_next(host->sg_ptr);
+ host->sg_off = 0;
+ return --host->sg_len;
+}
+
+static char *tmio_mmc_kmap_atomic(struct scatterlist *sg, unsigned long *flags)
+{
+ local_irq_save(*flags);
+ return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
+}
+
+static void tmio_mmc_kunmap_atomic(void *virt, unsigned long *flags)
+{
+ kunmap_atomic(virt, KM_BIO_SRC_IRQ);
+ local_irq_restore(*flags);
+}
+
+#ifdef CONFIG_MMC_DEBUG
+
+#define STATUS_TO_TEXT(a) \
+ do { \
+ if (status & TMIO_STAT_##a) \
+ printk(#a); \
+ } while (0)
+
+void pr_debug_status(u32 status)
+{
+ printk(KERN_DEBUG "status: %08x = ", status);
+ STATUS_TO_TEXT(CARD_REMOVE);
+ STATUS_TO_TEXT(CARD_INSERT);
+ STATUS_TO_TEXT(SIGSTATE);
+ STATUS_TO_TEXT(WRPROTECT);
+ STATUS_TO_TEXT(CARD_REMOVE_A);
+ STATUS_TO_TEXT(CARD_INSERT_A);
+ STATUS_TO_TEXT(SIGSTATE_A);
+ STATUS_TO_TEXT(CMD_IDX_ERR);
+ STATUS_TO_TEXT(STOPBIT_ERR);
+ STATUS_TO_TEXT(ILL_FUNC);
+ STATUS_TO_TEXT(CMD_BUSY);
+ STATUS_TO_TEXT(CMDRESPEND);
+ STATUS_TO_TEXT(DATAEND);
+ STATUS_TO_TEXT(CRCFAIL);
+ STATUS_TO_TEXT(DATATIMEOUT);
+ STATUS_TO_TEXT(CMDTIMEOUT);
+ STATUS_TO_TEXT(RXOVERFLOW);
+ STATUS_TO_TEXT(TXUNDERRUN);
+ STATUS_TO_TEXT(RXRDY);
+ STATUS_TO_TEXT(TXRQ);
+ STATUS_TO_TEXT(ILL_ACCESS);
+ printk("\n");
+}
+
+#else
+#define pr_debug_status(s) do { } while (0)
+#endif
+
+static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+ struct tmio_mmc_host *host = mmc_priv(mmc);
+
+ if (enable) {
+ host->sdio_irq_enabled = 1;
+ sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0001);
+ sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK,
+ (TMIO_SDIO_MASK_ALL & ~TMIO_SDIO_STAT_IOIRQ));
+ } else {
+ sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK, TMIO_SDIO_MASK_ALL);
+ sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0000);
+ host->sdio_irq_enabled = 0;
+ }
+}
static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
{
@@ -55,8 +300,23 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
{
+ struct mfd_cell *cell = host->pdev->dev.platform_data;
+ struct tmio_mmc_data *pdata = cell->driver_data;
+
+ /*
+ * Testing on sh-mobile showed that SDIO IRQs are unmasked when
+ * CTL_CLK_AND_WAIT_CTL gets written, so we have to disable the
+ * device IRQ here and restore the SDIO IRQ mask before
+ * re-enabling the device IRQ.
+ */
+ if (pdata->flags & TMIO_MMC_SDIO_IRQ)
+ disable_irq(host->irq);
sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0000);
msleep(10);
+ if (pdata->flags & TMIO_MMC_SDIO_IRQ) {
+ tmio_mmc_enable_sdio_irq(host->mmc, host->sdio_irq_enabled);
+ enable_irq(host->irq);
+ }
sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~0x0100 &
sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
msleep(10);
@@ -64,11 +324,21 @@ static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
{
+ struct mfd_cell *cell = host->pdev->dev.platform_data;
+ struct tmio_mmc_data *pdata = cell->driver_data;
+
sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, 0x0100 |
sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
msleep(10);
+ /* see comment in tmio_mmc_clk_stop above */
+ if (pdata->flags & TMIO_MMC_SDIO_IRQ)
+ disable_irq(host->irq);
sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100);
msleep(10);
+ if (pdata->flags & TMIO_MMC_SDIO_IRQ) {
+ tmio_mmc_enable_sdio_irq(host->mmc, host->sdio_irq_enabled);
+ enable_irq(host->irq);
+ }
}
static void reset(struct tmio_mmc_host *host)
@@ -82,15 +352,60 @@ static void reset(struct tmio_mmc_host *host)
msleep(10);
}
+static void tmio_mmc_reset_work(struct work_struct *work)
+{
+ struct tmio_mmc_host *host = container_of(work, struct tmio_mmc_host,
+ delayed_reset_work.work);
+ struct mmc_request *mrq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
+ mrq = host->mrq;
+
+ /* request already finished */
+ if (!mrq
+ || time_is_after_jiffies(host->last_req_ts +
+ msecs_to_jiffies(2000))) {
+ spin_unlock_irqrestore(&host->lock, flags);
+ return;
+ }
+
+ dev_warn(&host->pdev->dev,
+ "timeout waiting for hardware interrupt (CMD%u)\n",
+ mrq->cmd->opcode);
+
+ if (host->data)
+ host->data->error = -ETIMEDOUT;
+ else if (host->cmd)
+ host->cmd->error = -ETIMEDOUT;
+ else
+ mrq->cmd->error = -ETIMEDOUT;
+
+ host->cmd = NULL;
+ host->data = NULL;
+ host->mrq = NULL;
+
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ reset(host);
+
+ mmc_request_done(host->mmc, mrq);
+}
+
static void
tmio_mmc_finish_request(struct tmio_mmc_host *host)
{
struct mmc_request *mrq = host->mrq;
+ if (!mrq)
+ return;
+
host->mrq = NULL;
host->cmd = NULL;
host->data = NULL;
+ cancel_delayed_work(&host->delayed_reset_work);
+
mmc_request_done(host->mmc, mrq);
}
@@ -200,6 +515,7 @@ static void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
return;
}
+/* needs to be called with host->lock held */
static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host)
{
struct mmc_data *data = host->data;
@@ -233,6 +549,8 @@ static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host)
if (data->flags & MMC_DATA_READ) {
if (!host->chan_rx)
disable_mmc_irqs(host, TMIO_MASK_READOP);
+ else
+ tmio_check_bounce_buffer(host);
dev_dbg(&host->pdev->dev, "Complete Rx request %p\n",
host->mrq);
} else {
@@ -254,10 +572,12 @@ static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host)
static void tmio_mmc_data_irq(struct tmio_mmc_host *host)
{
- struct mmc_data *data = host->data;
+ struct mmc_data *data;
+ spin_lock(&host->lock);
+ data = host->data;
if (!data)
- return;
+ goto out;
if (host->chan_tx && (data->flags & MMC_DATA_WRITE)) {
/*
@@ -278,6 +598,8 @@ static void tmio_mmc_data_irq(struct tmio_mmc_host *host)
} else {
tmio_mmc_do_data_irq(host);
}
+out:
+ spin_unlock(&host->lock);
}
static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
@@ -286,9 +608,11 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
struct mmc_command *cmd = host->cmd;
int i, addr;
+ spin_lock(&host->lock);
+
if (!host->cmd) {
pr_debug("Spurious CMD irq\n");
- return;
+ goto out;
}
host->cmd = NULL;
@@ -324,8 +648,7 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
if (!host->chan_rx)
enable_mmc_irqs(host, TMIO_MASK_READOP);
} else {
- struct dma_chan *chan = host->chan_tx;
- if (!chan)
+ if (!host->chan_tx)
enable_mmc_irqs(host, TMIO_MASK_WRITEOP);
else
tasklet_schedule(&host->dma_issue);
@@ -334,13 +657,19 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
tmio_mmc_finish_request(host);
}
+out:
+ spin_unlock(&host->lock);
+
return;
}
static irqreturn_t tmio_mmc_irq(int irq, void *devid)
{
struct tmio_mmc_host *host = devid;
+ struct mfd_cell *cell = host->pdev->dev.platform_data;
+ struct tmio_mmc_data *pdata = cell->driver_data;
unsigned int ireg, irq_mask, status;
+ unsigned int sdio_ireg, sdio_irq_mask, sdio_status;
pr_debug("MMC IRQ begin\n");
@@ -348,6 +677,29 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid)
irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK);
ireg = status & TMIO_MASK_IRQ & ~irq_mask;
+ sdio_ireg = 0;
+ if (!ireg && pdata->flags & TMIO_MMC_SDIO_IRQ) {
+ sdio_status = sd_ctrl_read16(host, CTL_SDIO_STATUS);
+ sdio_irq_mask = sd_ctrl_read16(host, CTL_SDIO_IRQ_MASK);
+ sdio_ireg = sdio_status & TMIO_SDIO_MASK_ALL & ~sdio_irq_mask;
+
+ sd_ctrl_write16(host, CTL_SDIO_STATUS, sdio_status & ~TMIO_SDIO_MASK_ALL);
+
+ if (sdio_ireg && !host->sdio_irq_enabled) {
+ pr_warning("tmio_mmc: Spurious SDIO IRQ, disabling! 0x%04x 0x%04x 0x%04x\n",
+ sdio_status, sdio_irq_mask, sdio_ireg);
+ tmio_mmc_enable_sdio_irq(host->mmc, 0);
+ goto out;
+ }
+
+ if (host->mmc->caps & MMC_CAP_SDIO_IRQ &&
+ sdio_ireg & TMIO_SDIO_STAT_IOIRQ)
+ mmc_signal_sdio_irq(host->mmc);
+
+ if (sdio_ireg)
+ goto out;
+ }
+
pr_debug_status(status);
pr_debug_status(ireg);
@@ -375,8 +727,10 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid)
*/
/* Command completion */
- if (ireg & TMIO_MASK_CMD) {
- ack_mmc_irqs(host, TMIO_MASK_CMD);
+ if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) {
+ ack_mmc_irqs(host,
+ TMIO_STAT_CMDRESPEND |
+ TMIO_STAT_CMDTIMEOUT);
tmio_mmc_cmd_irq(host, status);
}
@@ -407,6 +761,16 @@ out:
}
#ifdef CONFIG_TMIO_MMC_DMA
+static void tmio_check_bounce_buffer(struct tmio_mmc_host *host)
+{
+ if (host->sg_ptr == &host->bounce_sg) {
+ unsigned long flags;
+ void *sg_vaddr = tmio_mmc_kmap_atomic(host->sg_orig, &flags);
+ memcpy(sg_vaddr, host->bounce_buf, host->bounce_sg.length);
+ tmio_mmc_kunmap_atomic(sg_vaddr, &flags);
+ }
+}
+
static void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
{
#if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE)
@@ -427,12 +791,39 @@ static void tmio_dma_complete(void *arg)
enable_mmc_irqs(host, TMIO_STAT_DATAEND);
}
-static int tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
+static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
{
- struct scatterlist *sg = host->sg_ptr;
+ struct scatterlist *sg = host->sg_ptr, *sg_tmp;
struct dma_async_tx_descriptor *desc = NULL;
struct dma_chan *chan = host->chan_rx;
- int ret;
+ struct mfd_cell *cell = host->pdev->dev.platform_data;
+ struct tmio_mmc_data *pdata = cell->driver_data;
+ dma_cookie_t cookie;
+ int ret, i;
+ bool aligned = true, multiple = true;
+ unsigned int align = (1 << pdata->dma->alignment_shift) - 1;
+
+ for_each_sg(sg, sg_tmp, host->sg_len, i) {
+ if (sg_tmp->offset & align)
+ aligned = false;
+ if (sg_tmp->length & align) {
+ multiple = false;
+ break;
+ }
+ }
+
+ if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+ align >= MAX_ALIGN)) || !multiple) {
+ ret = -EINVAL;
+ goto pio;
+ }
+
+ /* The only sg element can be unaligned, use our bounce buffer then */
+ if (!aligned) {
+ sg_init_one(&host->bounce_sg, host->bounce_buf, sg->length);
+ host->sg_ptr = &host->bounce_sg;
+ sg = host->sg_ptr;
+ }
ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, DMA_FROM_DEVICE);
if (ret > 0) {
@@ -442,21 +833,21 @@ static int tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
}
if (desc) {
- host->desc = desc;
desc->callback = tmio_dma_complete;
desc->callback_param = host;
- host->cookie = desc->tx_submit(desc);
- if (host->cookie < 0) {
- host->desc = NULL;
- ret = host->cookie;
+ cookie = desc->tx_submit(desc);
+ if (cookie < 0) {
+ desc = NULL;
+ ret = cookie;
} else {
chan->device->device_issue_pending(chan);
}
}
dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
- __func__, host->sg_len, ret, host->cookie, host->mrq);
+ __func__, host->sg_len, ret, cookie, host->mrq);
- if (!host->desc) {
+pio:
+ if (!desc) {
/* DMA failed, fall back to PIO */
if (ret >= 0)
ret = -EIO;
@@ -471,24 +862,49 @@ static int tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
dev_warn(&host->pdev->dev,
"DMA failed: %d, falling back to PIO\n", ret);
tmio_mmc_enable_dma(host, false);
- reset(host);
- /* Fail this request, let above layers recover */
- host->mrq->cmd->error = ret;
- tmio_mmc_finish_request(host);
}
dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
- desc, host->cookie, host->sg_len);
-
- return ret > 0 ? 0 : ret;
+ desc, cookie, host->sg_len);
}
-static int tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
+static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
{
- struct scatterlist *sg = host->sg_ptr;
+ struct scatterlist *sg = host->sg_ptr, *sg_tmp;
struct dma_async_tx_descriptor *desc = NULL;
struct dma_chan *chan = host->chan_tx;
- int ret;
+ struct mfd_cell *cell = host->pdev->dev.platform_data;
+ struct tmio_mmc_data *pdata = cell->driver_data;
+ dma_cookie_t cookie;
+ int ret, i;
+ bool aligned = true, multiple = true;
+ unsigned int align = (1 << pdata->dma->alignment_shift) - 1;
+
+ for_each_sg(sg, sg_tmp, host->sg_len, i) {
+ if (sg_tmp->offset & align)
+ aligned = false;
+ if (sg_tmp->length & align) {
+ multiple = false;
+ break;
+ }
+ }
+
+ if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+ align >= MAX_ALIGN)) || !multiple) {
+ ret = -EINVAL;
+ goto pio;
+ }
+
+ /* The only sg element can be unaligned, use our bounce buffer then */
+ if (!aligned) {
+ unsigned long flags;
+ void *sg_vaddr = tmio_mmc_kmap_atomic(sg, &flags);
+ sg_init_one(&host->bounce_sg, host->bounce_buf, sg->length);
+ memcpy(host->bounce_buf, sg_vaddr, host->bounce_sg.length);
+ tmio_mmc_kunmap_atomic(sg_vaddr, &flags);
+ host->sg_ptr = &host->bounce_sg;
+ sg = host->sg_ptr;
+ }
ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, DMA_TO_DEVICE);
if (ret > 0) {
@@ -498,19 +914,19 @@ static int tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
}
if (desc) {
- host->desc = desc;
desc->callback = tmio_dma_complete;
desc->callback_param = host;
- host->cookie = desc->tx_submit(desc);
- if (host->cookie < 0) {
- host->desc = NULL;
- ret = host->cookie;
+ cookie = desc->tx_submit(desc);
+ if (cookie < 0) {
+ desc = NULL;
+ ret = cookie;
}
}
dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
- __func__, host->sg_len, ret, host->cookie, host->mrq);
+ __func__, host->sg_len, ret, cookie, host->mrq);
- if (!host->desc) {
+pio:
+ if (!desc) {
/* DMA failed, fall back to PIO */
if (ret >= 0)
ret = -EIO;
@@ -525,30 +941,22 @@ static int tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
dev_warn(&host->pdev->dev,
"DMA failed: %d, falling back to PIO\n", ret);
tmio_mmc_enable_dma(host, false);
- reset(host);
- /* Fail this request, let above layers recover */
- host->mrq->cmd->error = ret;
- tmio_mmc_finish_request(host);
}
dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__,
- desc, host->cookie);
-
- return ret > 0 ? 0 : ret;
+ desc, cookie);
}
-static int tmio_mmc_start_dma(struct tmio_mmc_host *host,
+static void tmio_mmc_start_dma(struct tmio_mmc_host *host,
struct mmc_data *data)
{
if (data->flags & MMC_DATA_READ) {
if (host->chan_rx)
- return tmio_mmc_start_dma_rx(host);
+ tmio_mmc_start_dma_rx(host);
} else {
if (host->chan_tx)
- return tmio_mmc_start_dma_tx(host);
+ tmio_mmc_start_dma_tx(host);
}
-
- return 0;
}
static void tmio_issue_tasklet_fn(unsigned long priv)
@@ -562,6 +970,12 @@ static void tmio_issue_tasklet_fn(unsigned long priv)
static void tmio_tasklet_fn(unsigned long arg)
{
struct tmio_mmc_host *host = (struct tmio_mmc_host *)arg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
+
+ if (!host->data)
+ goto out;
if (host->data->flags & MMC_DATA_READ)
dma_unmap_sg(&host->pdev->dev, host->sg_ptr, host->dma_sglen,
@@ -571,6 +985,8 @@ static void tmio_tasklet_fn(unsigned long arg)
DMA_TO_DEVICE);
tmio_mmc_do_data_irq(host);
+out:
+ spin_unlock_irqrestore(&host->lock, flags);
}
/* It might be necessary to make filter MFD specific */
@@ -584,9 +1000,6 @@ static bool tmio_mmc_filter(struct dma_chan *chan, void *arg)
static void tmio_mmc_request_dma(struct tmio_mmc_host *host,
struct tmio_mmc_data *pdata)
{
- host->cookie = -EINVAL;
- host->desc = NULL;
-
/* We can only either use DMA for both Tx and Rx or not use it at all */
if (pdata->dma) {
dma_cap_mask_t mask;
@@ -632,15 +1045,15 @@ static void tmio_mmc_release_dma(struct tmio_mmc_host *host)
host->chan_rx = NULL;
dma_release_channel(chan);
}
-
- host->cookie = -EINVAL;
- host->desc = NULL;
}
#else
-static int tmio_mmc_start_dma(struct tmio_mmc_host *host,
+static void tmio_check_bounce_buffer(struct tmio_mmc_host *host)
+{
+}
+
+static void tmio_mmc_start_dma(struct tmio_mmc_host *host,
struct mmc_data *data)
{
- return 0;
}
static void tmio_mmc_request_dma(struct tmio_mmc_host *host,
@@ -682,7 +1095,9 @@ static int tmio_mmc_start_data(struct tmio_mmc_host *host,
sd_ctrl_write16(host, CTL_SD_XFER_LEN, data->blksz);
sd_ctrl_write16(host, CTL_XFER_BLK_COUNT, data->blocks);
- return tmio_mmc_start_dma(host, data);
+ tmio_mmc_start_dma(host, data);
+
+ return 0;
}
/* Process requests from the MMC layer */
@@ -694,6 +1109,8 @@ static void tmio_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
if (host->mrq)
pr_debug("request not null\n");
+ host->last_req_ts = jiffies;
+ wmb();
host->mrq = mrq;
if (mrq->data) {
@@ -703,10 +1120,14 @@ static void tmio_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
}
ret = tmio_mmc_start_command(host, mrq->cmd);
- if (!ret)
+ if (!ret) {
+ schedule_delayed_work(&host->delayed_reset_work,
+ msecs_to_jiffies(2000));
return;
+ }
fail:
+ host->mrq = NULL;
mrq->cmd->error = ret;
mmc_request_done(mmc, mrq);
}
@@ -780,6 +1201,7 @@ static const struct mmc_host_ops tmio_mmc_ops = {
.set_ios = tmio_mmc_set_ios,
.get_ro = tmio_mmc_get_ro,
.get_cd = tmio_mmc_get_cd,
+ .enable_sdio_irq = tmio_mmc_enable_sdio_irq,
};
#ifdef CONFIG_PM
@@ -864,10 +1286,15 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
goto host_free;
mmc->ops = &tmio_mmc_ops;
- mmc->caps = MMC_CAP_4_BIT_DATA;
- mmc->caps |= pdata->capabilities;
+ mmc->caps = MMC_CAP_4_BIT_DATA | pdata->capabilities;
mmc->f_max = pdata->hclk;
mmc->f_min = mmc->f_max / 512;
+ mmc->max_segs = 32;
+ mmc->max_blk_size = 512;
+ mmc->max_blk_count = (PAGE_CACHE_SIZE / mmc->max_blk_size) *
+ mmc->max_segs;
+ mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+ mmc->max_seg_size = mmc->max_req_size;
if (pdata->ocr_mask)
mmc->ocr_avail = pdata->ocr_mask;
else
@@ -890,12 +1317,19 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
goto cell_disable;
disable_mmc_irqs(host, TMIO_MASK_ALL);
+ if (pdata->flags & TMIO_MMC_SDIO_IRQ)
+ tmio_mmc_enable_sdio_irq(mmc, 0);
ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED |
IRQF_TRIGGER_FALLING, dev_name(&dev->dev), host);
if (ret)
goto cell_disable;
+ spin_lock_init(&host->lock);
+
+ /* Init delayed work for request timeouts */
+ INIT_DELAYED_WORK(&host->delayed_reset_work, tmio_mmc_reset_work);
+
/* See if we also get DMA */
tmio_mmc_request_dma(host, pdata);
@@ -934,6 +1368,7 @@ static int __devexit tmio_mmc_remove(struct platform_device *dev)
if (mmc) {
struct tmio_mmc_host *host = mmc_priv(mmc);
mmc_remove_host(mmc);
+ cancel_delayed_work_sync(&host->delayed_reset_work);
tmio_mmc_release_dma(host);
free_irq(host->irq, host);
if (cell->disable)
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
deleted file mode 100644
index 0fedc78..0000000
--- a/drivers/mmc/host/tmio_mmc.h
+++ /dev/null
@@ -1,228 +0,0 @@
-/* Definitons for use with the tmio_mmc.c
- *
- * (c) 2004 Ian Molton <spyro@f2s.com>
- * (c) 2007 Ian Molton <spyro@f2s.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/highmem.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-
-#define CTL_SD_CMD 0x00
-#define CTL_ARG_REG 0x04
-#define CTL_STOP_INTERNAL_ACTION 0x08
-#define CTL_XFER_BLK_COUNT 0xa
-#define CTL_RESPONSE 0x0c
-#define CTL_STATUS 0x1c
-#define CTL_IRQ_MASK 0x20
-#define CTL_SD_CARD_CLK_CTL 0x24
-#define CTL_SD_XFER_LEN 0x26
-#define CTL_SD_MEM_CARD_OPT 0x28
-#define CTL_SD_ERROR_DETAIL_STATUS 0x2c
-#define CTL_SD_DATA_PORT 0x30
-#define CTL_TRANSACTION_CTL 0x34
-#define CTL_RESET_SD 0xe0
-#define CTL_SDIO_REGS 0x100
-#define CTL_CLK_AND_WAIT_CTL 0x138
-#define CTL_RESET_SDIO 0x1e0
-
-/* Definitions for values the CTRL_STATUS register can take. */
-#define TMIO_STAT_CMDRESPEND 0x00000001
-#define TMIO_STAT_DATAEND 0x00000004
-#define TMIO_STAT_CARD_REMOVE 0x00000008
-#define TMIO_STAT_CARD_INSERT 0x00000010
-#define TMIO_STAT_SIGSTATE 0x00000020
-#define TMIO_STAT_WRPROTECT 0x00000080
-#define TMIO_STAT_CARD_REMOVE_A 0x00000100
-#define TMIO_STAT_CARD_INSERT_A 0x00000200
-#define TMIO_STAT_SIGSTATE_A 0x00000400
-#define TMIO_STAT_CMD_IDX_ERR 0x00010000
-#define TMIO_STAT_CRCFAIL 0x00020000
-#define TMIO_STAT_STOPBIT_ERR 0x00040000
-#define TMIO_STAT_DATATIMEOUT 0x00080000
-#define TMIO_STAT_RXOVERFLOW 0x00100000
-#define TMIO_STAT_TXUNDERRUN 0x00200000
-#define TMIO_STAT_CMDTIMEOUT 0x00400000
-#define TMIO_STAT_RXRDY 0x01000000
-#define TMIO_STAT_TXRQ 0x02000000
-#define TMIO_STAT_ILL_FUNC 0x20000000
-#define TMIO_STAT_CMD_BUSY 0x40000000
-#define TMIO_STAT_ILL_ACCESS 0x80000000
-
-/* Define some IRQ masks */
-/* This is the mask used at reset by the chip */
-#define TMIO_MASK_ALL 0x837f031d
-#define TMIO_MASK_READOP (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND)
-#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND)
-#define TMIO_MASK_CMD (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT | \
- TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT)
-#define TMIO_MASK_IRQ (TMIO_MASK_READOP | TMIO_MASK_WRITEOP | TMIO_MASK_CMD)
-
-
-#define enable_mmc_irqs(host, i) \
- do { \
- u32 mask;\
- mask = sd_ctrl_read32((host), CTL_IRQ_MASK); \
- mask &= ~((i) & TMIO_MASK_IRQ); \
- sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
- } while (0)
-
-#define disable_mmc_irqs(host, i) \
- do { \
- u32 mask;\
- mask = sd_ctrl_read32((host), CTL_IRQ_MASK); \
- mask |= ((i) & TMIO_MASK_IRQ); \
- sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
- } while (0)
-
-#define ack_mmc_irqs(host, i) \
- do { \
- sd_ctrl_write32((host), CTL_STATUS, ~(i)); \
- } while (0)
-
-
-struct tmio_mmc_host {
- void __iomem *ctl;
- unsigned long bus_shift;
- struct mmc_command *cmd;
- struct mmc_request *mrq;
- struct mmc_data *data;
- struct mmc_host *mmc;
- int irq;
-
- /* Callbacks for clock / power control */
- void (*set_pwr)(struct platform_device *host, int state);
- void (*set_clk_div)(struct platform_device *host, int state);
-
- /* pio related stuff */
- struct scatterlist *sg_ptr;
- unsigned int sg_len;
- unsigned int sg_off;
-
- struct platform_device *pdev;
-
- /* DMA support */
- struct dma_chan *chan_rx;
- struct dma_chan *chan_tx;
- struct tasklet_struct dma_complete;
- struct tasklet_struct dma_issue;
-#ifdef CONFIG_TMIO_MMC_DMA
- struct dma_async_tx_descriptor *desc;
- unsigned int dma_sglen;
- dma_cookie_t cookie;
-#endif
-};
-
-#include <linux/io.h>
-
-static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
-{
- return readw(host->ctl + (addr << host->bus_shift));
-}
-
-static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
- u16 *buf, int count)
-{
- readsw(host->ctl + (addr << host->bus_shift), buf, count);
-}
-
-static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
-{
- return readw(host->ctl + (addr << host->bus_shift)) |
- readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
-}
-
-static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr,
- u16 val)
-{
- writew(val, host->ctl + (addr << host->bus_shift));
-}
-
-static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
- u16 *buf, int count)
-{
- writesw(host->ctl + (addr << host->bus_shift), buf, count);
-}
-
-static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr,
- u32 val)
-{
- writew(val, host->ctl + (addr << host->bus_shift));
- writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
-}
-
-#include <linux/scatterlist.h>
-#include <linux/blkdev.h>
-
-static inline void tmio_mmc_init_sg(struct tmio_mmc_host *host,
- struct mmc_data *data)
-{
- host->sg_len = data->sg_len;
- host->sg_ptr = data->sg;
- host->sg_off = 0;
-}
-
-static inline int tmio_mmc_next_sg(struct tmio_mmc_host *host)
-{
- host->sg_ptr = sg_next(host->sg_ptr);
- host->sg_off = 0;
- return --host->sg_len;
-}
-
-static inline char *tmio_mmc_kmap_atomic(struct scatterlist *sg,
- unsigned long *flags)
-{
- local_irq_save(*flags);
- return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
-}
-
-static inline void tmio_mmc_kunmap_atomic(void *virt,
- unsigned long *flags)
-{
- kunmap_atomic(virt, KM_BIO_SRC_IRQ);
- local_irq_restore(*flags);
-}
-
-#ifdef CONFIG_MMC_DEBUG
-
-#define STATUS_TO_TEXT(a) \
- do { \
- if (status & TMIO_STAT_##a) \
- printk(#a); \
- } while (0)
-
-void pr_debug_status(u32 status)
-{
- printk(KERN_DEBUG "status: %08x = ", status);
- STATUS_TO_TEXT(CARD_REMOVE);
- STATUS_TO_TEXT(CARD_INSERT);
- STATUS_TO_TEXT(SIGSTATE);
- STATUS_TO_TEXT(WRPROTECT);
- STATUS_TO_TEXT(CARD_REMOVE_A);
- STATUS_TO_TEXT(CARD_INSERT_A);
- STATUS_TO_TEXT(SIGSTATE_A);
- STATUS_TO_TEXT(CMD_IDX_ERR);
- STATUS_TO_TEXT(STOPBIT_ERR);
- STATUS_TO_TEXT(ILL_FUNC);
- STATUS_TO_TEXT(CMD_BUSY);
- STATUS_TO_TEXT(CMDRESPEND);
- STATUS_TO_TEXT(DATAEND);
- STATUS_TO_TEXT(CRCFAIL);
- STATUS_TO_TEXT(DATATIMEOUT);
- STATUS_TO_TEXT(CMDTIMEOUT);
- STATUS_TO_TEXT(RXOVERFLOW);
- STATUS_TO_TEXT(TXUNDERRUN);
- STATUS_TO_TEXT(RXRDY);
- STATUS_TO_TEXT(TXRQ);
- STATUS_TO_TEXT(ILL_ACCESS);
- printk("\n");
-}
-
-#else
-#define pr_debug_status(s) do { } while (0)
-#endif
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 3fda24a..ff652c7 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -1944,19 +1944,12 @@ config 68360_ENET
config FEC
bool "FEC ethernet controller (of ColdFire and some i.MX CPUs)"
depends on M523x || M527x || M5272 || M528x || M520x || M532x || \
- MACH_MX27 || ARCH_MX35 || ARCH_MX25 || ARCH_MX5
+ MACH_MX27 || ARCH_MX35 || ARCH_MX25 || ARCH_MX5 || SOC_IMX28
select PHYLIB
help
Say Y here if you want to use the built-in 10/100 Fast ethernet
controller on some Motorola ColdFire and Freescale i.MX processors.
-config FEC2
- bool "Second FEC ethernet controller (on some ColdFire CPUs)"
- depends on FEC
- help
- Say Y here if you want to use the second built-in 10/100 Fast
- ethernet controller on some Motorola ColdFire processors.
-
config FEC_MPC52xx
tristate "MPC52xx FEC driver"
depends on PPC_MPC52xx && PPC_BESTCOMM
diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index ce1e5e9..0b9fc51 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -8,6 +8,11 @@
* Licensed under the GPL-2 or later.
*/
+#define DRV_VERSION "1.1"
+#define DRV_DESC "Blackfin on-chip Ethernet MAC driver"
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
@@ -41,12 +46,7 @@
#include "bfin_mac.h"
-#define DRV_NAME "bfin_mac"
-#define DRV_VERSION "1.1"
-#define DRV_AUTHOR "Bryan Wu, Luke Yang"
-#define DRV_DESC "Blackfin on-chip Ethernet MAC driver"
-
-MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_AUTHOR("Bryan Wu, Luke Yang");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION(DRV_DESC);
MODULE_ALIAS("platform:bfin_mac");
@@ -189,8 +189,7 @@ static int desc_list_init(void)
/* allocate a new skb for next time receive */
new_skb = dev_alloc_skb(PKT_BUF_SZ + NET_IP_ALIGN);
if (!new_skb) {
- printk(KERN_NOTICE DRV_NAME
- ": init: low on mem - packet dropped\n");
+ pr_notice("init: low on mem - packet dropped\n");
goto init_error;
}
skb_reserve(new_skb, NET_IP_ALIGN);
@@ -240,7 +239,7 @@ static int desc_list_init(void)
init_error:
desc_list_free();
- printk(KERN_ERR DRV_NAME ": kmalloc failed\n");
+ pr_err("kmalloc failed\n");
return -ENOMEM;
}
@@ -259,8 +258,7 @@ static int bfin_mdio_poll(void)
while ((bfin_read_EMAC_STAADD()) & STABUSY) {
udelay(1);
if (timeout_cnt-- < 0) {
- printk(KERN_ERR DRV_NAME
- ": wait MDC/MDIO transaction to complete timeout\n");
+ pr_err("wait MDC/MDIO transaction to complete timeout\n");
return -ETIMEDOUT;
}
}
@@ -350,9 +348,9 @@ static void bfin_mac_adjust_link(struct net_device *dev)
opmode &= ~RMII_10;
break;
default:
- printk(KERN_WARNING
- "%s: Ack! Speed (%d) is not 10/100!\n",
- DRV_NAME, phydev->speed);
+ netdev_warn(dev,
+ "Ack! Speed (%d) is not 10/100!\n",
+ phydev->speed);
break;
}
bfin_write_EMAC_OPMODE(opmode);
@@ -417,14 +415,13 @@ static int mii_probe(struct net_device *dev, int phy_mode)
/* now we are supposed to have a proper phydev, to attach to... */
if (!phydev) {
- printk(KERN_INFO "%s: Don't found any phy device at all\n",
- dev->name);
+ netdev_err(dev, "no phy device found\n");
return -ENODEV;
}
if (phy_mode != PHY_INTERFACE_MODE_RMII &&
phy_mode != PHY_INTERFACE_MODE_MII) {
- printk(KERN_INFO "%s: Invalid phy interface mode\n", dev->name);
+ netdev_err(dev, "invalid phy interface mode\n");
return -EINVAL;
}
@@ -432,7 +429,7 @@ static int mii_probe(struct net_device *dev, int phy_mode)
0, phy_mode);
if (IS_ERR(phydev)) {
- printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
+ netdev_err(dev, "could not attach PHY\n");
return PTR_ERR(phydev);
}
@@ -453,11 +450,10 @@ static int mii_probe(struct net_device *dev, int phy_mode)
lp->old_duplex = -1;
lp->phydev = phydev;
- printk(KERN_INFO "%s: attached PHY driver [%s] "
- "(mii_bus:phy_addr=%s, irq=%d, mdc_clk=%dHz(mdc_div=%d)"
- "@sclk=%dMHz)\n",
- DRV_NAME, phydev->drv->name, dev_name(&phydev->dev), phydev->irq,
- MDC_CLK, mdc_div, sclk/1000000);
+ pr_info("attached PHY driver [%s] "
+ "(mii_bus:phy_addr=%s, irq=%d, mdc_clk=%dHz(mdc_div=%d)@sclk=%dMHz)\n",
+ phydev->drv->name, dev_name(&phydev->dev), phydev->irq,
+ MDC_CLK, mdc_div, sclk/1000000);
return 0;
}
@@ -502,7 +498,7 @@ bfin_mac_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
static void bfin_mac_ethtool_getdrvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
- strcpy(info->driver, DRV_NAME);
+ strcpy(info->driver, KBUILD_MODNAME);
strcpy(info->version, DRV_VERSION);
strcpy(info->fw_version, "N/A");
strcpy(info->bus_info, dev_name(&dev->dev));
@@ -562,7 +558,7 @@ static const struct ethtool_ops bfin_mac_ethtool_ops = {
};
/**************************************************************************/
-void setup_system_regs(struct net_device *dev)
+static void setup_system_regs(struct net_device *dev)
{
struct bfin_mac_local *lp = netdev_priv(dev);
int i;
@@ -592,6 +588,10 @@ void setup_system_regs(struct net_device *dev)
bfin_write_EMAC_MMC_CTL(RSTC | CROLL);
+ /* Set vlan regs to let 1522 bytes long packets pass through */
+ bfin_write_EMAC_VLAN1(lp->vlan1_mask);
+ bfin_write_EMAC_VLAN2(lp->vlan2_mask);
+
/* Initialize the TX DMA channel registers */
bfin_write_DMA2_X_COUNT(0);
bfin_write_DMA2_X_MODIFY(4);
@@ -827,8 +827,7 @@ static void bfin_tx_hwtstamp(struct net_device *netdev, struct sk_buff *skb)
while ((!(bfin_read_EMAC_PTP_ISTAT() & TXTL)) && (--timeout_cnt))
udelay(1);
if (timeout_cnt == 0)
- printk(KERN_ERR DRV_NAME
- ": fails to timestamp the TX packet\n");
+ netdev_err(netdev, "timestamp the TX packet failed\n");
else {
struct skb_shared_hwtstamps shhwtstamps;
u64 ns;
@@ -1083,8 +1082,7 @@ static void bfin_mac_rx(struct net_device *dev)
* we which case we simply drop the packet
*/
if (current_rx_ptr->status.status_word & RX_ERROR_MASK) {
- printk(KERN_NOTICE DRV_NAME
- ": rx: receive error - packet dropped\n");
+ netdev_notice(dev, "rx: receive error - packet dropped\n");
dev->stats.rx_dropped++;
goto out;
}
@@ -1094,8 +1092,7 @@ static void bfin_mac_rx(struct net_device *dev)
new_skb = dev_alloc_skb(PKT_BUF_SZ + NET_IP_ALIGN);
if (!new_skb) {
- printk(KERN_NOTICE DRV_NAME
- ": rx: low on mem - packet dropped\n");
+ netdev_notice(dev, "rx: low on mem - packet dropped\n");
dev->stats.rx_dropped++;
goto out;
}
@@ -1213,7 +1210,7 @@ static int bfin_mac_enable(struct phy_device *phydev)
int ret;
u32 opmode;
- pr_debug("%s: %s\n", DRV_NAME, __func__);
+ pr_debug("%s\n", __func__);
/* Set RX DMA */
bfin_write_DMA1_NEXT_DESC_PTR(&(rx_list_head->desc_a));
@@ -1323,7 +1320,7 @@ static void bfin_mac_set_multicast_list(struct net_device *dev)
u32 sysctl;
if (dev->flags & IFF_PROMISC) {
- printk(KERN_INFO "%s: set to promisc mode\n", dev->name);
+ netdev_info(dev, "set promisc mode\n");
sysctl = bfin_read_EMAC_OPMODE();
sysctl |= PR;
bfin_write_EMAC_OPMODE(sysctl);
@@ -1393,7 +1390,7 @@ static int bfin_mac_open(struct net_device *dev)
* address using ifconfig eth0 hw ether xx:xx:xx:xx:xx:xx
*/
if (!is_valid_ether_addr(dev->dev_addr)) {
- printk(KERN_WARNING DRV_NAME ": no valid ethernet hw addr\n");
+ netdev_warn(dev, "no valid ethernet hw addr\n");
return -EINVAL;
}
@@ -1527,6 +1524,9 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
goto out_err_mii_probe;
}
+ lp->vlan1_mask = ETH_P_8021Q | mii_bus_data->vlan1_mask;
+ lp->vlan2_mask = ETH_P_8021Q | mii_bus_data->vlan2_mask;
+
/* Fill in the fields of the device structure with ethernet values. */
ether_setup(ndev);
@@ -1558,7 +1558,7 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
bfin_mac_hwtstamp_init(ndev);
/* now, print out the card info, in a short format.. */
- dev_info(&pdev->dev, "%s, Version %s\n", DRV_DESC, DRV_VERSION);
+ netdev_info(ndev, "%s, Version %s\n", DRV_DESC, DRV_VERSION);
return 0;
@@ -1650,7 +1650,7 @@ static int __devinit bfin_mii_bus_probe(struct platform_device *pdev)
* so set the GPIO pins to Ethernet mode
*/
pin_req = mii_bus_pd->mac_peripherals;
- rc = peripheral_request_list(pin_req, DRV_NAME);
+ rc = peripheral_request_list(pin_req, KBUILD_MODNAME);
if (rc) {
dev_err(&pdev->dev, "Requesting peripherals failed!\n");
return rc;
@@ -1739,7 +1739,7 @@ static struct platform_driver bfin_mac_driver = {
.resume = bfin_mac_resume,
.suspend = bfin_mac_suspend,
.driver = {
- .name = DRV_NAME,
+ .name = KBUILD_MODNAME,
.owner = THIS_MODULE,
},
};
diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
index aed68be..f8559ac 100644
--- a/drivers/net/bfin_mac.h
+++ b/drivers/net/bfin_mac.h
@@ -17,7 +17,14 @@
#include <linux/etherdevice.h>
#include <linux/bfin_mac.h>
+/*
+ * Disable hardware checksum for bug #5600 if writeback cache is
+ * enabled. Otherwize, corrupted RX packet will be sent up stack
+ * without error mark.
+ */
+#ifndef CONFIG_BFIN_EXTMEM_WRITEBACK
#define BFIN_MAC_CSUM_OFFLOAD
+#endif
#define TX_RECLAIM_JIFFIES (HZ / 5)
@@ -68,7 +75,6 @@ struct bfin_mac_local {
*/
struct net_device_stats stats;
- unsigned char Mac[6]; /* MAC address of the board */
spinlock_t lock;
int wol; /* Wake On Lan */
@@ -76,6 +82,9 @@ struct bfin_mac_local {
struct timer_list tx_reclaim_timer;
struct net_device *ndev;
+ /* Data for EMAC_VLAN1 regs */
+ u16 vlan1_mask, vlan2_mask;
+
/* MII and PHY stuffs */
int old_link; /* used by bf537_adjust_link */
int old_speed;
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 77d6c8d..6a858a2 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -636,6 +636,7 @@ struct bnx2x_common {
#define CHIP_METAL(bp) (bp->common.chip_id & 0x00000ff0)
#define CHIP_BOND_ID(bp) (bp->common.chip_id & 0x0000000f)
+#define CHIP_PARITY_ENABLED(bp) (CHIP_IS_E1(bp) || CHIP_IS_E1H(bp))
int flash_size;
#define NVRAM_1MB_SIZE 0x20000 /* 1M bit in bytes */
diff --git a/drivers/net/bnx2x/bnx2x_dump.h b/drivers/net/bnx2x/bnx2x_dump.h
index dc18c25..fb3ff7c 100644
--- a/drivers/net/bnx2x/bnx2x_dump.h
+++ b/drivers/net/bnx2x/bnx2x_dump.h
@@ -1,10 +1,16 @@
/* bnx2x_dump.h: Broadcom Everest network driver.
*
- * Copyright (c) 2009 Broadcom Corporation
+ * Copyright (c) 2011 Broadcom Corporation
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation.
+ * Unless you and Broadcom execute a separate written software license
+ * agreement governing use of this software, this software is licensed to you
+ * under the terms of the GNU General Public License version 2, available
+ * at http://www.gnu.org/licenses/old-licenses/gpl-2.0.html (the "GPL").
+ *
+ * Notwithstanding the above, under no circumstances may you combine this
+ * software in any way with any other Broadcom software provided under a
+ * license other than the GPL, without Broadcom's express prior written
+ * consent.
*/
@@ -17,53 +23,53 @@
#define BNX2X_DUMP_H
-struct dump_sign {
- u32 time_stamp;
- u32 diag_ver;
- u32 grc_dump_ver;
-};
-#define TSTORM_WAITP_ADDR 0x1b8a80
-#define CSTORM_WAITP_ADDR 0x238a80
-#define XSTORM_WAITP_ADDR 0x2b8a80
-#define USTORM_WAITP_ADDR 0x338a80
-#define TSTORM_CAM_MODE 0x1b1440
+/*definitions */
+#define XSTORM_WAITP_ADDR 0x2b8a80
+#define TSTORM_WAITP_ADDR 0x1b8a80
+#define USTORM_WAITP_ADDR 0x338a80
+#define CSTORM_WAITP_ADDR 0x238a80
+#define TSTORM_CAM_MODE 0x1B1440
-#define RI_E1 0x1
-#define RI_E1H 0x2
+#define MAX_TIMER_PENDING 200
+#define TIMER_SCAN_DONT_CARE 0xFF
+#define RI_E1 0x1
+#define RI_E1H 0x2
#define RI_E2 0x4
-#define RI_ONLINE 0x100
+#define RI_ONLINE 0x100
#define RI_PATH0_DUMP 0x200
#define RI_PATH1_DUMP 0x400
-#define RI_E1_OFFLINE (RI_E1)
-#define RI_E1_ONLINE (RI_E1 | RI_ONLINE)
-#define RI_E1H_OFFLINE (RI_E1H)
-#define RI_E1H_ONLINE (RI_E1H | RI_ONLINE)
-#define RI_E2_OFFLINE (RI_E2)
-#define RI_E2_ONLINE (RI_E2 | RI_ONLINE)
-#define RI_E1E1H_OFFLINE (RI_E1 | RI_E1H)
-#define RI_E1E1H_ONLINE (RI_E1 | RI_E1H | RI_ONLINE)
-#define RI_E1HE2_OFFLINE (RI_E2 | RI_E1H)
-#define RI_E1HE2_ONLINE (RI_E2 | RI_E1H | RI_ONLINE)
-#define RI_E1E2_OFFLINE (RI_E2 | RI_E1)
-#define RI_E1E2_ONLINE (RI_E2 | RI_E1 | RI_ONLINE)
-#define RI_ALL_OFFLINE (RI_E1 | RI_E1H | RI_E2)
-#define RI_ALL_ONLINE (RI_E1 | RI_E1H | RI_E2 | RI_ONLINE)
-
-#define MAX_TIMER_PENDING 200
-#define TIMER_SCAN_DONT_CARE 0xFF
+#define RI_E1_OFFLINE (RI_E1)
+#define RI_E1_ONLINE (RI_E1 | RI_ONLINE)
+#define RI_E1H_OFFLINE (RI_E1H)
+#define RI_E1H_ONLINE (RI_E1H | RI_ONLINE)
+#define RI_E2_OFFLINE (RI_E2)
+#define RI_E2_ONLINE (RI_E2 | RI_ONLINE)
+#define RI_E1E1H_OFFLINE (RI_E1 | RI_E1H)
+#define RI_E1E1H_ONLINE (RI_E1 | RI_E1H | RI_ONLINE)
+#define RI_E1HE2_OFFLINE (RI_E2 | RI_E1H)
+#define RI_E1HE2_ONLINE (RI_E2 | RI_E1H | RI_ONLINE)
+#define RI_E1E2_OFFLINE (RI_E2 | RI_E1)
+#define RI_E1E2_ONLINE (RI_E2 | RI_E1 | RI_ONLINE)
+#define RI_ALL_OFFLINE (RI_E1 | RI_E1H | RI_E2)
+#define RI_ALL_ONLINE (RI_E1 | RI_E1H | RI_E2 | RI_ONLINE)
+struct dump_sign {
+ u32 time_stamp;
+ u32 diag_ver;
+ u32 grc_dump_ver;
+};
struct dump_hdr {
- u32 hdr_size; /* in dwords, excluding this field */
- struct dump_sign dump_sign;
- u32 xstorm_waitp;
- u32 tstorm_waitp;
- u32 ustorm_waitp;
- u32 cstorm_waitp;
- u16 info;
- u8 idle_chk;
- u8 reserved;
+ u32 hdr_size; /* in dwords, excluding this field */
+ struct dump_sign dump_sign;
+ u32 xstorm_waitp;
+ u32 tstorm_waitp;
+ u32 ustorm_waitp;
+ u32 cstorm_waitp;
+ u16 info;
+ u8 idle_chk;
+ u8 reserved;
};
struct reg_addr {
@@ -80,202 +86,185 @@ struct wreg_addr {
u16 info;
};
-
-#define REGS_COUNT 558
+#define REGS_COUNT 834
static const struct reg_addr reg_addrs[REGS_COUNT] = {
{ 0x2000, 341, RI_ALL_ONLINE }, { 0x2800, 103, RI_ALL_ONLINE },
{ 0x3000, 287, RI_ALL_ONLINE }, { 0x3800, 331, RI_ALL_ONLINE },
- { 0x8800, 6, RI_E1_ONLINE }, { 0xa000, 223, RI_ALL_ONLINE },
- { 0xa388, 1, RI_ALL_ONLINE }, { 0xa398, 1, RI_ALL_ONLINE },
- { 0xa39c, 7, RI_E1H_ONLINE }, { 0xa3c0, 3, RI_E1H_ONLINE },
- { 0xa3d0, 1, RI_E1H_ONLINE }, { 0xa3d8, 1, RI_E1H_ONLINE },
- { 0xa3e0, 1, RI_E1H_ONLINE }, { 0xa3e8, 1, RI_E1H_ONLINE },
- { 0xa3f0, 1, RI_E1H_ONLINE }, { 0xa3f8, 1, RI_E1H_ONLINE },
- { 0xa400, 69, RI_ALL_ONLINE }, { 0xa518, 1, RI_ALL_ONLINE },
- { 0xa520, 1, RI_ALL_ONLINE }, { 0xa528, 1, RI_ALL_ONLINE },
- { 0xa530, 1, RI_ALL_ONLINE }, { 0xa538, 1, RI_ALL_ONLINE },
- { 0xa540, 1, RI_ALL_ONLINE }, { 0xa548, 1, RI_ALL_ONLINE },
- { 0xa550, 1, RI_ALL_ONLINE }, { 0xa558, 1, RI_ALL_ONLINE },
- { 0xa560, 1, RI_ALL_ONLINE }, { 0xa568, 1, RI_ALL_ONLINE },
- { 0xa570, 1, RI_ALL_ONLINE }, { 0xa580, 1, RI_ALL_ONLINE },
- { 0xa590, 1, RI_ALL_ONLINE }, { 0xa5a0, 1, RI_ALL_ONLINE },
- { 0xa5c0, 1, RI_ALL_ONLINE }, { 0xa5e0, 1, RI_E1H_ONLINE },
- { 0xa5e8, 1, RI_E1H_ONLINE }, { 0xa5f0, 1, RI_E1H_ONLINE },
- { 0xa5f8, 10, RI_E1H_ONLINE }, { 0x10000, 236, RI_ALL_ONLINE },
- { 0x103bc, 1, RI_ALL_ONLINE }, { 0x103cc, 1, RI_ALL_ONLINE },
- { 0x103dc, 1, RI_ALL_ONLINE }, { 0x10400, 57, RI_ALL_ONLINE },
- { 0x104e8, 2, RI_ALL_ONLINE }, { 0x104f4, 2, RI_ALL_ONLINE },
- { 0x10500, 146, RI_ALL_ONLINE }, { 0x10750, 2, RI_ALL_ONLINE },
- { 0x10760, 2, RI_ALL_ONLINE }, { 0x10770, 2, RI_ALL_ONLINE },
- { 0x10780, 2, RI_ALL_ONLINE }, { 0x10790, 2, RI_ALL_ONLINE },
- { 0x107a0, 2, RI_ALL_ONLINE }, { 0x107b0, 2, RI_ALL_ONLINE },
- { 0x107c0, 2, RI_ALL_ONLINE }, { 0x107d0, 2, RI_ALL_ONLINE },
- { 0x107e0, 2, RI_ALL_ONLINE }, { 0x10880, 2, RI_ALL_ONLINE },
- { 0x10900, 2, RI_ALL_ONLINE }, { 0x12000, 1, RI_ALL_ONLINE },
- { 0x14000, 1, RI_ALL_ONLINE }, { 0x16000, 26, RI_E1H_ONLINE },
- { 0x16070, 18, RI_E1H_ONLINE }, { 0x160c0, 27, RI_E1H_ONLINE },
- { 0x16140, 1, RI_E1H_ONLINE }, { 0x16160, 1, RI_E1H_ONLINE },
- { 0x16180, 2, RI_E1H_ONLINE }, { 0x161c0, 2, RI_E1H_ONLINE },
- { 0x16204, 5, RI_E1H_ONLINE }, { 0x18000, 1, RI_E1H_ONLINE },
- { 0x18008, 1, RI_E1H_ONLINE }, { 0x20000, 24, RI_ALL_ONLINE },
- { 0x20060, 8, RI_ALL_ONLINE }, { 0x20080, 138, RI_ALL_ONLINE },
- { 0x202b4, 1, RI_ALL_ONLINE }, { 0x202c4, 1, RI_ALL_ONLINE },
- { 0x20400, 2, RI_ALL_ONLINE }, { 0x2040c, 8, RI_ALL_ONLINE },
- { 0x2042c, 18, RI_E1H_ONLINE }, { 0x20480, 1, RI_ALL_ONLINE },
- { 0x20500, 1, RI_ALL_ONLINE }, { 0x20600, 1, RI_ALL_ONLINE },
- { 0x28000, 1, RI_ALL_ONLINE }, { 0x28004, 8191, RI_ALL_OFFLINE },
- { 0x30000, 1, RI_ALL_ONLINE }, { 0x30004, 16383, RI_ALL_OFFLINE },
- { 0x40000, 98, RI_ALL_ONLINE }, { 0x40194, 1, RI_ALL_ONLINE },
- { 0x401a4, 1, RI_ALL_ONLINE }, { 0x401a8, 11, RI_E1H_ONLINE },
- { 0x40200, 4, RI_ALL_ONLINE }, { 0x40400, 43, RI_ALL_ONLINE },
- { 0x404b8, 1, RI_ALL_ONLINE }, { 0x404c8, 1, RI_ALL_ONLINE },
- { 0x404cc, 3, RI_E1H_ONLINE }, { 0x40500, 2, RI_ALL_ONLINE },
+ { 0x8800, 6, RI_ALL_ONLINE }, { 0x8818, 1, RI_E1HE2_ONLINE },
+ { 0x9000, 164, RI_E2_ONLINE }, { 0x9400, 33, RI_E2_ONLINE },
+ { 0xa000, 27, RI_ALL_ONLINE }, { 0xa06c, 1, RI_E1E1H_ONLINE },
+ { 0xa070, 71, RI_ALL_ONLINE }, { 0xa18c, 4, RI_E1E1H_ONLINE },
+ { 0xa19c, 62, RI_ALL_ONLINE }, { 0xa294, 2, RI_E1E1H_ONLINE },
+ { 0xa29c, 56, RI_ALL_ONLINE }, { 0xa39c, 7, RI_E1HE2_ONLINE },
+ { 0xa3c0, 3, RI_E1HE2_ONLINE }, { 0xa3d0, 1, RI_E1HE2_ONLINE },
+ { 0xa3d8, 1, RI_E1HE2_ONLINE }, { 0xa3e0, 1, RI_E1HE2_ONLINE },
+ { 0xa3e8, 1, RI_E1HE2_ONLINE }, { 0xa3f0, 1, RI_E1HE2_ONLINE },
+ { 0xa3f8, 1, RI_E1HE2_ONLINE }, { 0xa400, 43, RI_ALL_ONLINE },
+ { 0xa4ac, 2, RI_E1E1H_ONLINE }, { 0xa4b4, 1, RI_ALL_ONLINE },
+ { 0xa4b8, 2, RI_E1E1H_ONLINE }, { 0xa4c0, 3, RI_ALL_ONLINE },
+ { 0xa4cc, 5, RI_E1E1H_ONLINE }, { 0xa4e0, 9, RI_ALL_ONLINE },
+ { 0xa504, 1, RI_E1E1H_ONLINE }, { 0xa508, 3, RI_ALL_ONLINE },
+ { 0xa518, 1, RI_ALL_ONLINE }, { 0xa520, 1, RI_ALL_ONLINE },
+ { 0xa528, 1, RI_ALL_ONLINE }, { 0xa530, 1, RI_ALL_ONLINE },
+ { 0xa538, 1, RI_ALL_ONLINE }, { 0xa540, 1, RI_ALL_ONLINE },
+ { 0xa548, 1, RI_E1E1H_ONLINE }, { 0xa550, 1, RI_E1E1H_ONLINE },
+ { 0xa558, 1, RI_E1E1H_ONLINE }, { 0xa560, 1, RI_E1E1H_ONLINE },
+ { 0xa568, 1, RI_E1E1H_ONLINE }, { 0xa570, 1, RI_ALL_ONLINE },
+ { 0xa580, 1, RI_ALL_ONLINE }, { 0xa590, 1, RI_ALL_ONLINE },
+ { 0xa5a0, 1, RI_ALL_ONLINE }, { 0xa5c0, 1, RI_ALL_ONLINE },
+ { 0xa5e0, 1, RI_E1HE2_ONLINE }, { 0xa5e8, 1, RI_E1HE2_ONLINE },
+ { 0xa5f0, 1, RI_E1HE2_ONLINE }, { 0xa5f8, 10, RI_E1HE2_ONLINE },
+ { 0xa620, 111, RI_E2_ONLINE }, { 0xa800, 51, RI_E2_ONLINE },
+ { 0xa8d4, 4, RI_E2_ONLINE }, { 0xa8e8, 1, RI_E2_ONLINE },
+ { 0xa8f0, 1, RI_E2_ONLINE }, { 0x10000, 236, RI_ALL_ONLINE },
+ { 0x10400, 57, RI_ALL_ONLINE }, { 0x104e8, 2, RI_ALL_ONLINE },
+ { 0x104f4, 2, RI_ALL_ONLINE }, { 0x10500, 146, RI_ALL_ONLINE },
+ { 0x10750, 2, RI_ALL_ONLINE }, { 0x10760, 2, RI_ALL_ONLINE },
+ { 0x10770, 2, RI_ALL_ONLINE }, { 0x10780, 2, RI_ALL_ONLINE },
+ { 0x10790, 2, RI_ALL_ONLINE }, { 0x107a0, 2, RI_ALL_ONLINE },
+ { 0x107b0, 2, RI_ALL_ONLINE }, { 0x107c0, 2, RI_ALL_ONLINE },
+ { 0x107d0, 2, RI_ALL_ONLINE }, { 0x107e0, 2, RI_ALL_ONLINE },
+ { 0x10880, 2, RI_ALL_ONLINE }, { 0x10900, 2, RI_ALL_ONLINE },
+ { 0x16000, 26, RI_E1HE2_ONLINE }, { 0x16070, 18, RI_E1HE2_ONLINE },
+ { 0x160c0, 27, RI_E1HE2_ONLINE }, { 0x16140, 1, RI_E1HE2_ONLINE },
+ { 0x16160, 1, RI_E1HE2_ONLINE }, { 0x16180, 2, RI_E1HE2_ONLINE },
+ { 0x161c0, 2, RI_E1HE2_ONLINE }, { 0x16204, 5, RI_E1HE2_ONLINE },
+ { 0x18000, 1, RI_E1HE2_ONLINE }, { 0x18008, 1, RI_E1HE2_ONLINE },
+ { 0x18010, 35, RI_E2_ONLINE }, { 0x180a4, 2, RI_E2_ONLINE },
+ { 0x180c0, 191, RI_E2_ONLINE }, { 0x18440, 1, RI_E2_ONLINE },
+ { 0x18460, 1, RI_E2_ONLINE }, { 0x18480, 2, RI_E2_ONLINE },
+ { 0x184c0, 2, RI_E2_ONLINE }, { 0x18500, 15, RI_E2_ONLINE },
+ { 0x20000, 24, RI_ALL_ONLINE }, { 0x20060, 8, RI_ALL_ONLINE },
+ { 0x20080, 94, RI_ALL_ONLINE }, { 0x201f8, 1, RI_E1E1H_ONLINE },
+ { 0x201fc, 1, RI_ALL_ONLINE }, { 0x20200, 1, RI_E1E1H_ONLINE },
+ { 0x20204, 1, RI_ALL_ONLINE }, { 0x20208, 1, RI_E1E1H_ONLINE },
+ { 0x2020c, 39, RI_ALL_ONLINE }, { 0x202c8, 1, RI_E2_ONLINE },
+ { 0x202d8, 4, RI_E2_ONLINE }, { 0x20400, 2, RI_ALL_ONLINE },
+ { 0x2040c, 8, RI_ALL_ONLINE }, { 0x2042c, 18, RI_E1HE2_ONLINE },
+ { 0x20480, 1, RI_ALL_ONLINE }, { 0x20500, 1, RI_ALL_ONLINE },
+ { 0x20600, 1, RI_ALL_ONLINE }, { 0x28000, 1, RI_ALL_ONLINE },
+ { 0x28004, 8191, RI_ALL_OFFLINE }, { 0x30000, 1, RI_ALL_ONLINE },
+ { 0x30004, 16383, RI_ALL_OFFLINE }, { 0x40000, 98, RI_ALL_ONLINE },
+ { 0x401a8, 8, RI_E1HE2_ONLINE }, { 0x401c8, 1, RI_E1H_ONLINE },
+ { 0x401cc, 2, RI_E1HE2_ONLINE }, { 0x401d4, 2, RI_E2_ONLINE },
+ { 0x40200, 4, RI_ALL_ONLINE }, { 0x40220, 18, RI_E2_ONLINE },
+ { 0x40400, 43, RI_ALL_ONLINE }, { 0x404cc, 3, RI_E1HE2_ONLINE },
+ { 0x404e0, 1, RI_E2_ONLINE }, { 0x40500, 2, RI_ALL_ONLINE },
{ 0x40510, 2, RI_ALL_ONLINE }, { 0x40520, 2, RI_ALL_ONLINE },
{ 0x40530, 2, RI_ALL_ONLINE }, { 0x40540, 2, RI_ALL_ONLINE },
- { 0x42000, 164, RI_ALL_ONLINE }, { 0x4229c, 1, RI_ALL_ONLINE },
- { 0x422ac, 1, RI_ALL_ONLINE }, { 0x422bc, 1, RI_ALL_ONLINE },
- { 0x422d4, 5, RI_E1H_ONLINE }, { 0x42400, 49, RI_ALL_ONLINE },
- { 0x424c8, 38, RI_ALL_ONLINE }, { 0x42568, 2, RI_ALL_ONLINE },
- { 0x42800, 1, RI_ALL_ONLINE }, { 0x50000, 20, RI_ALL_ONLINE },
- { 0x50050, 8, RI_ALL_ONLINE }, { 0x50070, 88, RI_ALL_ONLINE },
- { 0x501dc, 1, RI_ALL_ONLINE }, { 0x501ec, 1, RI_ALL_ONLINE },
- { 0x501f0, 4, RI_E1H_ONLINE }, { 0x50200, 2, RI_ALL_ONLINE },
- { 0x5020c, 7, RI_ALL_ONLINE }, { 0x50228, 6, RI_E1H_ONLINE },
- { 0x50240, 1, RI_ALL_ONLINE }, { 0x50280, 1, RI_ALL_ONLINE },
+ { 0x40550, 10, RI_E2_ONLINE }, { 0x40610, 2, RI_E2_ONLINE },
+ { 0x42000, 164, RI_ALL_ONLINE }, { 0x422c0, 4, RI_E2_ONLINE },
+ { 0x422d4, 5, RI_E1HE2_ONLINE }, { 0x422e8, 1, RI_E2_ONLINE },
+ { 0x42400, 49, RI_ALL_ONLINE }, { 0x424c8, 38, RI_ALL_ONLINE },
+ { 0x42568, 2, RI_ALL_ONLINE }, { 0x42640, 5, RI_E2_ONLINE },
+ { 0x42800, 1, RI_ALL_ONLINE }, { 0x50000, 1, RI_ALL_ONLINE },
+ { 0x50004, 19, RI_ALL_ONLINE }, { 0x50050, 8, RI_ALL_ONLINE },
+ { 0x50070, 88, RI_ALL_ONLINE }, { 0x501f0, 4, RI_E1HE2_ONLINE },
+ { 0x50200, 2, RI_ALL_ONLINE }, { 0x5020c, 7, RI_ALL_ONLINE },
+ { 0x50228, 6, RI_E1HE2_ONLINE }, { 0x50240, 1, RI_ALL_ONLINE },
+ { 0x50280, 1, RI_ALL_ONLINE }, { 0x50300, 1, RI_E2_ONLINE },
+ { 0x5030c, 1, RI_E2_ONLINE }, { 0x50318, 1, RI_E2_ONLINE },
+ { 0x5031c, 1, RI_E2_ONLINE }, { 0x50320, 2, RI_E2_ONLINE },
{ 0x52000, 1, RI_ALL_ONLINE }, { 0x54000, 1, RI_ALL_ONLINE },
{ 0x54004, 3327, RI_ALL_OFFLINE }, { 0x58000, 1, RI_ALL_ONLINE },
- { 0x58004, 8191, RI_ALL_OFFLINE }, { 0x60000, 71, RI_ALL_ONLINE },
- { 0x60128, 1, RI_ALL_ONLINE }, { 0x60138, 1, RI_ALL_ONLINE },
- { 0x6013c, 24, RI_E1H_ONLINE }, { 0x60200, 1, RI_ALL_ONLINE },
+ { 0x58004, 8191, RI_E1E1H_OFFLINE }, { 0x60000, 26, RI_ALL_ONLINE },
+ { 0x60068, 8, RI_E1E1H_ONLINE }, { 0x60088, 12, RI_ALL_ONLINE },
+ { 0x600b8, 9, RI_E1E1H_ONLINE }, { 0x600dc, 1, RI_ALL_ONLINE },
+ { 0x600e0, 5, RI_E1E1H_ONLINE }, { 0x600f4, 1, RI_ALL_ONLINE },
+ { 0x600f8, 1, RI_E1E1H_ONLINE }, { 0x600fc, 8, RI_ALL_ONLINE },
+ { 0x6013c, 24, RI_E1H_ONLINE }, { 0x6019c, 2, RI_E2_ONLINE },
+ { 0x601ac, 18, RI_E2_ONLINE }, { 0x60200, 1, RI_ALL_ONLINE },
+ { 0x60204, 2, RI_ALL_OFFLINE }, { 0x60210, 13, RI_E2_ONLINE },
{ 0x61000, 1, RI_ALL_ONLINE }, { 0x61004, 511, RI_ALL_OFFLINE },
- { 0x70000, 8, RI_ALL_ONLINE }, { 0x70020, 21496, RI_ALL_OFFLINE },
- { 0x85000, 3, RI_ALL_ONLINE }, { 0x8500c, 4, RI_ALL_OFFLINE },
- { 0x8501c, 7, RI_ALL_ONLINE }, { 0x85038, 4, RI_ALL_OFFLINE },
- { 0x85048, 1, RI_ALL_ONLINE }, { 0x8504c, 109, RI_ALL_OFFLINE },
- { 0x85200, 32, RI_ALL_ONLINE }, { 0x85280, 11104, RI_ALL_OFFLINE },
- { 0xa0000, 16384, RI_ALL_ONLINE }, { 0xb0000, 16384, RI_E1H_ONLINE },
- { 0xc1000, 7, RI_ALL_ONLINE }, { 0xc1028, 1, RI_ALL_ONLINE },
- { 0xc1038, 1, RI_ALL_ONLINE }, { 0xc1800, 2, RI_ALL_ONLINE },
- { 0xc2000, 164, RI_ALL_ONLINE }, { 0xc229c, 1, RI_ALL_ONLINE },
- { 0xc22ac, 1, RI_ALL_ONLINE }, { 0xc22bc, 1, RI_ALL_ONLINE },
+ { 0x70000, 8, RI_ALL_ONLINE }, { 0x70020, 8184, RI_ALL_OFFLINE },
+ { 0x85000, 3, RI_ALL_ONLINE }, { 0x8501c, 7, RI_ALL_ONLINE },
+ { 0x85048, 1, RI_ALL_ONLINE }, { 0x85200, 32, RI_ALL_ONLINE },
+ { 0xc1000, 7, RI_ALL_ONLINE }, { 0xc103c, 2, RI_E2_ONLINE },
+ { 0xc1800, 2, RI_ALL_ONLINE }, { 0xc2000, 164, RI_ALL_ONLINE },
+ { 0xc22c0, 5, RI_E2_ONLINE }, { 0xc22d8, 4, RI_E2_ONLINE },
{ 0xc2400, 49, RI_ALL_ONLINE }, { 0xc24c8, 38, RI_ALL_ONLINE },
{ 0xc2568, 2, RI_ALL_ONLINE }, { 0xc2600, 1, RI_ALL_ONLINE },
- { 0xc4000, 165, RI_ALL_ONLINE }, { 0xc42a0, 1, RI_ALL_ONLINE },
- { 0xc42b0, 1, RI_ALL_ONLINE }, { 0xc42c0, 1, RI_ALL_ONLINE },
- { 0xc42e0, 7, RI_E1H_ONLINE }, { 0xc4400, 51, RI_ALL_ONLINE },
- { 0xc44d0, 38, RI_ALL_ONLINE }, { 0xc4570, 2, RI_ALL_ONLINE },
+ { 0xc4000, 165, RI_ALL_ONLINE }, { 0xc42d8, 2, RI_E2_ONLINE },
+ { 0xc42e0, 7, RI_E1HE2_ONLINE }, { 0xc42fc, 1, RI_E2_ONLINE },
+ { 0xc4400, 51, RI_ALL_ONLINE }, { 0xc44d0, 38, RI_ALL_ONLINE },
+ { 0xc4570, 2, RI_ALL_ONLINE }, { 0xc4578, 5, RI_E2_ONLINE },
{ 0xc4600, 1, RI_ALL_ONLINE }, { 0xd0000, 19, RI_ALL_ONLINE },
{ 0xd004c, 8, RI_ALL_ONLINE }, { 0xd006c, 91, RI_ALL_ONLINE },
- { 0xd01e4, 1, RI_ALL_ONLINE }, { 0xd01f4, 1, RI_ALL_ONLINE },
- { 0xd0200, 2, RI_ALL_ONLINE }, { 0xd020c, 7, RI_ALL_ONLINE },
- { 0xd0228, 18, RI_E1H_ONLINE }, { 0xd0280, 1, RI_ALL_ONLINE },
- { 0xd0300, 1, RI_ALL_ONLINE }, { 0xd0400, 1, RI_ALL_ONLINE },
- { 0xd4000, 1, RI_ALL_ONLINE }, { 0xd4004, 2559, RI_ALL_OFFLINE },
- { 0xd8000, 1, RI_ALL_ONLINE }, { 0xd8004, 8191, RI_ALL_OFFLINE },
- { 0xe0000, 21, RI_ALL_ONLINE }, { 0xe0054, 8, RI_ALL_ONLINE },
- { 0xe0074, 85, RI_ALL_ONLINE }, { 0xe01d4, 1, RI_ALL_ONLINE },
- { 0xe01e4, 1, RI_ALL_ONLINE }, { 0xe0200, 2, RI_ALL_ONLINE },
- { 0xe020c, 8, RI_ALL_ONLINE }, { 0xe022c, 18, RI_E1H_ONLINE },
+ { 0xd01fc, 1, RI_E2_ONLINE }, { 0xd0200, 2, RI_ALL_ONLINE },
+ { 0xd020c, 7, RI_ALL_ONLINE }, { 0xd0228, 18, RI_E1HE2_ONLINE },
+ { 0xd0280, 1, RI_ALL_ONLINE }, { 0xd0300, 1, RI_ALL_ONLINE },
+ { 0xd0400, 1, RI_ALL_ONLINE }, { 0xd4000, 1, RI_ALL_ONLINE },
+ { 0xd4004, 2559, RI_ALL_OFFLINE }, { 0xd8000, 1, RI_ALL_ONLINE },
+ { 0xd8004, 8191, RI_ALL_OFFLINE }, { 0xe0000, 21, RI_ALL_ONLINE },
+ { 0xe0054, 8, RI_ALL_ONLINE }, { 0xe0074, 49, RI_ALL_ONLINE },
+ { 0xe0138, 1, RI_E1E1H_ONLINE }, { 0xe013c, 35, RI_ALL_ONLINE },
+ { 0xe01f4, 2, RI_E2_ONLINE }, { 0xe0200, 2, RI_ALL_ONLINE },
+ { 0xe020c, 8, RI_ALL_ONLINE }, { 0xe022c, 18, RI_E1HE2_ONLINE },
{ 0xe0280, 1, RI_ALL_ONLINE }, { 0xe0300, 1, RI_ALL_ONLINE },
{ 0xe1000, 1, RI_ALL_ONLINE }, { 0xe2000, 1, RI_ALL_ONLINE },
{ 0xe2004, 2047, RI_ALL_OFFLINE }, { 0xf0000, 1, RI_ALL_ONLINE },
{ 0xf0004, 16383, RI_ALL_OFFLINE }, { 0x101000, 12, RI_ALL_ONLINE },
- { 0x10103c, 1, RI_ALL_ONLINE }, { 0x10104c, 1, RI_ALL_ONLINE },
- { 0x101050, 1, RI_E1H_ONLINE }, { 0x101100, 1, RI_ALL_ONLINE },
- { 0x101800, 8, RI_ALL_ONLINE }, { 0x102000, 18, RI_ALL_ONLINE },
- { 0x102054, 1, RI_ALL_ONLINE }, { 0x102064, 1, RI_ALL_ONLINE },
+ { 0x101050, 1, RI_E1HE2_ONLINE }, { 0x101054, 3, RI_E2_ONLINE },
+ { 0x101100, 1, RI_ALL_ONLINE }, { 0x101800, 8, RI_ALL_ONLINE },
+ { 0x102000, 18, RI_ALL_ONLINE }, { 0x102068, 6, RI_E2_ONLINE },
{ 0x102080, 17, RI_ALL_ONLINE }, { 0x1020c8, 8, RI_E1H_ONLINE },
- { 0x102400, 1, RI_ALL_ONLINE }, { 0x103000, 26, RI_ALL_ONLINE },
- { 0x103074, 1, RI_ALL_ONLINE }, { 0x103084, 1, RI_ALL_ONLINE },
- { 0x103094, 1, RI_ALL_ONLINE }, { 0x103098, 5, RI_E1H_ONLINE },
+ { 0x1020e8, 9, RI_E2_ONLINE }, { 0x102400, 1, RI_ALL_ONLINE },
+ { 0x103000, 26, RI_ALL_ONLINE }, { 0x103098, 5, RI_E1HE2_ONLINE },
+ { 0x1030ac, 10, RI_E2_ONLINE }, { 0x1030d8, 8, RI_E2_ONLINE },
+ { 0x103400, 1, RI_E2_ONLINE }, { 0x103404, 135, RI_E2_OFFLINE },
{ 0x103800, 8, RI_ALL_ONLINE }, { 0x104000, 63, RI_ALL_ONLINE },
- { 0x104108, 1, RI_ALL_ONLINE }, { 0x104118, 1, RI_ALL_ONLINE },
- { 0x104200, 17, RI_ALL_ONLINE }, { 0x104400, 64, RI_ALL_ONLINE },
- { 0x104500, 192, RI_ALL_OFFLINE }, { 0x104800, 64, RI_ALL_ONLINE },
- { 0x104900, 192, RI_ALL_OFFLINE }, { 0x105000, 7, RI_ALL_ONLINE },
- { 0x10501c, 1, RI_ALL_OFFLINE }, { 0x105020, 3, RI_ALL_ONLINE },
- { 0x10502c, 1, RI_ALL_OFFLINE }, { 0x105030, 3, RI_ALL_ONLINE },
- { 0x10503c, 1, RI_ALL_OFFLINE }, { 0x105040, 3, RI_ALL_ONLINE },
- { 0x10504c, 1, RI_ALL_OFFLINE }, { 0x105050, 3, RI_ALL_ONLINE },
- { 0x10505c, 1, RI_ALL_OFFLINE }, { 0x105060, 3, RI_ALL_ONLINE },
- { 0x10506c, 1, RI_ALL_OFFLINE }, { 0x105070, 3, RI_ALL_ONLINE },
- { 0x10507c, 1, RI_ALL_OFFLINE }, { 0x105080, 3, RI_ALL_ONLINE },
- { 0x10508c, 1, RI_ALL_OFFLINE }, { 0x105090, 3, RI_ALL_ONLINE },
- { 0x10509c, 1, RI_ALL_OFFLINE }, { 0x1050a0, 3, RI_ALL_ONLINE },
- { 0x1050ac, 1, RI_ALL_OFFLINE }, { 0x1050b0, 3, RI_ALL_ONLINE },
- { 0x1050bc, 1, RI_ALL_OFFLINE }, { 0x1050c0, 3, RI_ALL_ONLINE },
- { 0x1050cc, 1, RI_ALL_OFFLINE }, { 0x1050d0, 3, RI_ALL_ONLINE },
- { 0x1050dc, 1, RI_ALL_OFFLINE }, { 0x1050e0, 3, RI_ALL_ONLINE },
- { 0x1050ec, 1, RI_ALL_OFFLINE }, { 0x1050f0, 3, RI_ALL_ONLINE },
- { 0x1050fc, 1, RI_ALL_OFFLINE }, { 0x105100, 3, RI_ALL_ONLINE },
- { 0x10510c, 1, RI_ALL_OFFLINE }, { 0x105110, 3, RI_ALL_ONLINE },
- { 0x10511c, 1, RI_ALL_OFFLINE }, { 0x105120, 3, RI_ALL_ONLINE },
- { 0x10512c, 1, RI_ALL_OFFLINE }, { 0x105130, 3, RI_ALL_ONLINE },
- { 0x10513c, 1, RI_ALL_OFFLINE }, { 0x105140, 3, RI_ALL_ONLINE },
- { 0x10514c, 1, RI_ALL_OFFLINE }, { 0x105150, 3, RI_ALL_ONLINE },
- { 0x10515c, 1, RI_ALL_OFFLINE }, { 0x105160, 3, RI_ALL_ONLINE },
- { 0x10516c, 1, RI_ALL_OFFLINE }, { 0x105170, 3, RI_ALL_ONLINE },
- { 0x10517c, 1, RI_ALL_OFFLINE }, { 0x105180, 3, RI_ALL_ONLINE },
- { 0x10518c, 1, RI_ALL_OFFLINE }, { 0x105190, 3, RI_ALL_ONLINE },
- { 0x10519c, 1, RI_ALL_OFFLINE }, { 0x1051a0, 3, RI_ALL_ONLINE },
- { 0x1051ac, 1, RI_ALL_OFFLINE }, { 0x1051b0, 3, RI_ALL_ONLINE },
- { 0x1051bc, 1, RI_ALL_OFFLINE }, { 0x1051c0, 3, RI_ALL_ONLINE },
- { 0x1051cc, 1, RI_ALL_OFFLINE }, { 0x1051d0, 3, RI_ALL_ONLINE },
- { 0x1051dc, 1, RI_ALL_OFFLINE }, { 0x1051e0, 3, RI_ALL_ONLINE },
- { 0x1051ec, 1, RI_ALL_OFFLINE }, { 0x1051f0, 3, RI_ALL_ONLINE },
- { 0x1051fc, 1, RI_ALL_OFFLINE }, { 0x105200, 3, RI_ALL_ONLINE },
- { 0x10520c, 1, RI_ALL_OFFLINE }, { 0x105210, 3, RI_ALL_ONLINE },
- { 0x10521c, 1, RI_ALL_OFFLINE }, { 0x105220, 3, RI_ALL_ONLINE },
- { 0x10522c, 1, RI_ALL_OFFLINE }, { 0x105230, 3, RI_ALL_ONLINE },
- { 0x10523c, 1, RI_ALL_OFFLINE }, { 0x105240, 3, RI_ALL_ONLINE },
- { 0x10524c, 1, RI_ALL_OFFLINE }, { 0x105250, 3, RI_ALL_ONLINE },
- { 0x10525c, 1, RI_ALL_OFFLINE }, { 0x105260, 3, RI_ALL_ONLINE },
- { 0x10526c, 1, RI_ALL_OFFLINE }, { 0x105270, 3, RI_ALL_ONLINE },
- { 0x10527c, 1, RI_ALL_OFFLINE }, { 0x105280, 3, RI_ALL_ONLINE },
- { 0x10528c, 1, RI_ALL_OFFLINE }, { 0x105290, 3, RI_ALL_ONLINE },
- { 0x10529c, 1, RI_ALL_OFFLINE }, { 0x1052a0, 3, RI_ALL_ONLINE },
- { 0x1052ac, 1, RI_ALL_OFFLINE }, { 0x1052b0, 3, RI_ALL_ONLINE },
- { 0x1052bc, 1, RI_ALL_OFFLINE }, { 0x1052c0, 3, RI_ALL_ONLINE },
- { 0x1052cc, 1, RI_ALL_OFFLINE }, { 0x1052d0, 3, RI_ALL_ONLINE },
- { 0x1052dc, 1, RI_ALL_OFFLINE }, { 0x1052e0, 3, RI_ALL_ONLINE },
- { 0x1052ec, 1, RI_ALL_OFFLINE }, { 0x1052f0, 3, RI_ALL_ONLINE },
- { 0x1052fc, 1, RI_ALL_OFFLINE }, { 0x105300, 3, RI_ALL_ONLINE },
- { 0x10530c, 1, RI_ALL_OFFLINE }, { 0x105310, 3, RI_ALL_ONLINE },
- { 0x10531c, 1, RI_ALL_OFFLINE }, { 0x105320, 3, RI_ALL_ONLINE },
- { 0x10532c, 1, RI_ALL_OFFLINE }, { 0x105330, 3, RI_ALL_ONLINE },
- { 0x10533c, 1, RI_ALL_OFFLINE }, { 0x105340, 3, RI_ALL_ONLINE },
- { 0x10534c, 1, RI_ALL_OFFLINE }, { 0x105350, 3, RI_ALL_ONLINE },
- { 0x10535c, 1, RI_ALL_OFFLINE }, { 0x105360, 3, RI_ALL_ONLINE },
- { 0x10536c, 1, RI_ALL_OFFLINE }, { 0x105370, 3, RI_ALL_ONLINE },
- { 0x10537c, 1, RI_ALL_OFFLINE }, { 0x105380, 3, RI_ALL_ONLINE },
- { 0x10538c, 1, RI_ALL_OFFLINE }, { 0x105390, 3, RI_ALL_ONLINE },
- { 0x10539c, 1, RI_ALL_OFFLINE }, { 0x1053a0, 3, RI_ALL_ONLINE },
- { 0x1053ac, 1, RI_ALL_OFFLINE }, { 0x1053b0, 3, RI_ALL_ONLINE },
- { 0x1053bc, 1, RI_ALL_OFFLINE }, { 0x1053c0, 3, RI_ALL_ONLINE },
- { 0x1053cc, 1, RI_ALL_OFFLINE }, { 0x1053d0, 3, RI_ALL_ONLINE },
- { 0x1053dc, 1, RI_ALL_OFFLINE }, { 0x1053e0, 3, RI_ALL_ONLINE },
- { 0x1053ec, 1, RI_ALL_OFFLINE }, { 0x1053f0, 3, RI_ALL_ONLINE },
- { 0x1053fc, 769, RI_ALL_OFFLINE }, { 0x108000, 33, RI_ALL_ONLINE },
- { 0x108090, 1, RI_ALL_ONLINE }, { 0x1080a0, 1, RI_ALL_ONLINE },
- { 0x1080ac, 5, RI_E1H_ONLINE }, { 0x108100, 5, RI_ALL_ONLINE },
- { 0x108120, 5, RI_ALL_ONLINE }, { 0x108200, 74, RI_ALL_ONLINE },
- { 0x108400, 74, RI_ALL_ONLINE }, { 0x108800, 152, RI_ALL_ONLINE },
- { 0x109000, 1, RI_ALL_ONLINE }, { 0x120000, 347, RI_ALL_ONLINE },
- { 0x120578, 1, RI_ALL_ONLINE }, { 0x120588, 1, RI_ALL_ONLINE },
- { 0x120598, 1, RI_ALL_ONLINE }, { 0x12059c, 23, RI_E1H_ONLINE },
- { 0x120614, 1, RI_E1H_ONLINE }, { 0x12061c, 30, RI_E1H_ONLINE },
- { 0x12080c, 65, RI_ALL_ONLINE }, { 0x120a00, 2, RI_ALL_ONLINE },
- { 0x122000, 2, RI_ALL_ONLINE }, { 0x128000, 2, RI_E1H_ONLINE },
- { 0x140000, 114, RI_ALL_ONLINE }, { 0x1401d4, 1, RI_ALL_ONLINE },
- { 0x1401e4, 1, RI_ALL_ONLINE }, { 0x140200, 6, RI_ALL_ONLINE },
- { 0x144000, 4, RI_ALL_ONLINE }, { 0x148000, 4, RI_ALL_ONLINE },
- { 0x14c000, 4, RI_ALL_ONLINE }, { 0x150000, 4, RI_ALL_ONLINE },
- { 0x154000, 4, RI_ALL_ONLINE }, { 0x158000, 4, RI_ALL_ONLINE },
- { 0x15c000, 7, RI_E1H_ONLINE }, { 0x161000, 7, RI_ALL_ONLINE },
- { 0x161028, 1, RI_ALL_ONLINE }, { 0x161038, 1, RI_ALL_ONLINE },
- { 0x161800, 2, RI_ALL_ONLINE }, { 0x164000, 60, RI_ALL_ONLINE },
- { 0x1640fc, 1, RI_ALL_ONLINE }, { 0x16410c, 1, RI_ALL_ONLINE },
- { 0x164110, 2, RI_E1H_ONLINE }, { 0x164200, 1, RI_ALL_ONLINE },
+ { 0x10411c, 16, RI_E2_ONLINE }, { 0x104200, 17, RI_ALL_ONLINE },
+ { 0x104400, 64, RI_ALL_ONLINE }, { 0x104500, 192, RI_ALL_OFFLINE },
+ { 0x104800, 64, RI_ALL_ONLINE }, { 0x104900, 192, RI_ALL_OFFLINE },
+ { 0x105000, 256, RI_ALL_ONLINE }, { 0x105400, 768, RI_ALL_OFFLINE },
+ { 0x107000, 7, RI_E2_ONLINE }, { 0x108000, 33, RI_E1E1H_ONLINE },
+ { 0x1080ac, 5, RI_E1H_ONLINE }, { 0x108100, 5, RI_E1E1H_ONLINE },
+ { 0x108120, 5, RI_E1E1H_ONLINE }, { 0x108200, 74, RI_E1E1H_ONLINE },
+ { 0x108400, 74, RI_E1E1H_ONLINE }, { 0x108800, 152, RI_E1E1H_ONLINE },
+ { 0x110000, 111, RI_E2_ONLINE }, { 0x110200, 4, RI_E2_ONLINE },
+ { 0x120000, 2, RI_ALL_ONLINE }, { 0x120008, 4, RI_ALL_ONLINE },
+ { 0x120018, 3, RI_ALL_ONLINE }, { 0x120024, 4, RI_ALL_ONLINE },
+ { 0x120034, 3, RI_ALL_ONLINE }, { 0x120040, 4, RI_ALL_ONLINE },
+ { 0x120050, 3, RI_ALL_ONLINE }, { 0x12005c, 4, RI_ALL_ONLINE },
+ { 0x12006c, 3, RI_ALL_ONLINE }, { 0x120078, 4, RI_ALL_ONLINE },
+ { 0x120088, 3, RI_ALL_ONLINE }, { 0x120094, 4, RI_ALL_ONLINE },
+ { 0x1200a4, 3, RI_ALL_ONLINE }, { 0x1200b0, 4, RI_ALL_ONLINE },
+ { 0x1200c0, 3, RI_ALL_ONLINE }, { 0x1200cc, 4, RI_ALL_ONLINE },
+ { 0x1200dc, 3, RI_ALL_ONLINE }, { 0x1200e8, 4, RI_ALL_ONLINE },
+ { 0x1200f8, 3, RI_ALL_ONLINE }, { 0x120104, 4, RI_ALL_ONLINE },
+ { 0x120114, 1, RI_ALL_ONLINE }, { 0x120118, 22, RI_ALL_ONLINE },
+ { 0x120170, 2, RI_E1E1H_ONLINE }, { 0x120178, 243, RI_ALL_ONLINE },
+ { 0x120544, 4, RI_E1E1H_ONLINE }, { 0x120554, 7, RI_ALL_ONLINE },
+ { 0x12059c, 6, RI_E1HE2_ONLINE }, { 0x1205b4, 1, RI_E1HE2_ONLINE },
+ { 0x1205b8, 16, RI_E1HE2_ONLINE }, { 0x1205f8, 4, RI_E2_ONLINE },
+ { 0x120618, 1, RI_E2_ONLINE }, { 0x12061c, 20, RI_E1HE2_ONLINE },
+ { 0x12066c, 11, RI_E1HE2_ONLINE }, { 0x120698, 5, RI_E2_ONLINE },
+ { 0x1206b0, 76, RI_E2_ONLINE }, { 0x1207fc, 1, RI_E2_ONLINE },
+ { 0x120808, 66, RI_ALL_ONLINE }, { 0x120910, 7, RI_E2_ONLINE },
+ { 0x120930, 9, RI_E2_ONLINE }, { 0x120a00, 2, RI_ALL_ONLINE },
+ { 0x122000, 2, RI_ALL_ONLINE }, { 0x122008, 2046, RI_E1_OFFLINE },
+ { 0x128000, 2, RI_E1HE2_ONLINE }, { 0x128008, 6142, RI_E1HE2_OFFLINE },
+ { 0x130000, 35, RI_E2_ONLINE }, { 0x130100, 29, RI_E2_ONLINE },
+ { 0x130180, 1, RI_E2_ONLINE }, { 0x130200, 1, RI_E2_ONLINE },
+ { 0x130280, 1, RI_E2_ONLINE }, { 0x130300, 5, RI_E2_ONLINE },
+ { 0x130380, 1, RI_E2_ONLINE }, { 0x130400, 1, RI_E2_ONLINE },
+ { 0x130480, 5, RI_E2_ONLINE }, { 0x130800, 72, RI_E2_ONLINE },
+ { 0x131000, 136, RI_E2_ONLINE }, { 0x132000, 148, RI_E2_ONLINE },
+ { 0x134000, 544, RI_E2_ONLINE }, { 0x140000, 64, RI_ALL_ONLINE },
+ { 0x140100, 5, RI_E1E1H_ONLINE }, { 0x140114, 45, RI_ALL_ONLINE },
+ { 0x140200, 6, RI_ALL_ONLINE }, { 0x140220, 4, RI_E2_ONLINE },
+ { 0x140240, 4, RI_E2_ONLINE }, { 0x140260, 4, RI_E2_ONLINE },
+ { 0x140280, 4, RI_E2_ONLINE }, { 0x1402a0, 4, RI_E2_ONLINE },
+ { 0x1402c0, 4, RI_E2_ONLINE }, { 0x1402e0, 13, RI_E2_ONLINE },
+ { 0x144000, 4, RI_E1E1H_ONLINE }, { 0x148000, 4, RI_E1E1H_ONLINE },
+ { 0x14c000, 4, RI_E1E1H_ONLINE }, { 0x150000, 4, RI_E1E1H_ONLINE },
+ { 0x154000, 4, RI_E1E1H_ONLINE }, { 0x158000, 4, RI_E1E1H_ONLINE },
+ { 0x15c000, 2, RI_E1HE2_ONLINE }, { 0x15c008, 5, RI_E1H_ONLINE },
+ { 0x15c020, 27, RI_E2_ONLINE }, { 0x15c090, 13, RI_E2_ONLINE },
+ { 0x15c0c8, 34, RI_E2_ONLINE }, { 0x161000, 7, RI_ALL_ONLINE },
+ { 0x16103c, 2, RI_E2_ONLINE }, { 0x161800, 2, RI_ALL_ONLINE },
+ { 0x164000, 60, RI_ALL_ONLINE }, { 0x164110, 2, RI_E1HE2_ONLINE },
+ { 0x164118, 15, RI_E2_ONLINE }, { 0x164200, 1, RI_ALL_ONLINE },
{ 0x164208, 1, RI_ALL_ONLINE }, { 0x164210, 1, RI_ALL_ONLINE },
{ 0x164218, 1, RI_ALL_ONLINE }, { 0x164220, 1, RI_ALL_ONLINE },
{ 0x164228, 1, RI_ALL_ONLINE }, { 0x164230, 1, RI_ALL_ONLINE },
@@ -284,169 +273,298 @@ static const struct reg_addr reg_addrs[REGS_COUNT] = {
{ 0x164258, 1, RI_ALL_ONLINE }, { 0x164260, 1, RI_ALL_ONLINE },
{ 0x164270, 2, RI_ALL_ONLINE }, { 0x164280, 2, RI_ALL_ONLINE },
{ 0x164800, 2, RI_ALL_ONLINE }, { 0x165000, 2, RI_ALL_ONLINE },
- { 0x166000, 164, RI_ALL_ONLINE }, { 0x16629c, 1, RI_ALL_ONLINE },
- { 0x1662ac, 1, RI_ALL_ONLINE }, { 0x1662bc, 1, RI_ALL_ONLINE },
+ { 0x166000, 164, RI_ALL_ONLINE }, { 0x1662cc, 7, RI_E2_ONLINE },
{ 0x166400, 49, RI_ALL_ONLINE }, { 0x1664c8, 38, RI_ALL_ONLINE },
- { 0x166568, 2, RI_ALL_ONLINE }, { 0x166800, 1, RI_ALL_ONLINE },
- { 0x168000, 270, RI_ALL_ONLINE }, { 0x168444, 1, RI_ALL_ONLINE },
- { 0x168454, 1, RI_ALL_ONLINE }, { 0x168800, 19, RI_ALL_ONLINE },
- { 0x168900, 1, RI_ALL_ONLINE }, { 0x168a00, 128, RI_ALL_ONLINE },
- { 0x16a000, 1, RI_ALL_ONLINE }, { 0x16a004, 1535, RI_ALL_OFFLINE },
- { 0x16c000, 1, RI_ALL_ONLINE }, { 0x16c004, 1535, RI_ALL_OFFLINE },
- { 0x16e000, 16, RI_E1H_ONLINE }, { 0x16e100, 1, RI_E1H_ONLINE },
- { 0x16e200, 2, RI_E1H_ONLINE }, { 0x16e400, 183, RI_E1H_ONLINE },
- { 0x170000, 93, RI_ALL_ONLINE }, { 0x170180, 1, RI_ALL_ONLINE },
- { 0x170190, 1, RI_ALL_ONLINE }, { 0x170200, 4, RI_ALL_ONLINE },
- { 0x170214, 1, RI_ALL_ONLINE }, { 0x178000, 1, RI_ALL_ONLINE },
- { 0x180000, 61, RI_ALL_ONLINE }, { 0x180100, 1, RI_ALL_ONLINE },
- { 0x180110, 1, RI_ALL_ONLINE }, { 0x180120, 1, RI_ALL_ONLINE },
- { 0x180130, 1, RI_ALL_ONLINE }, { 0x18013c, 2, RI_E1H_ONLINE },
- { 0x180200, 58, RI_ALL_ONLINE }, { 0x180340, 4, RI_ALL_ONLINE },
- { 0x180400, 1, RI_ALL_ONLINE }, { 0x180404, 255, RI_ALL_OFFLINE },
+ { 0x166568, 2, RI_ALL_ONLINE }, { 0x166570, 5, RI_E2_ONLINE },
+ { 0x166800, 1, RI_ALL_ONLINE }, { 0x168000, 137, RI_ALL_ONLINE },
+ { 0x168224, 2, RI_E1E1H_ONLINE }, { 0x16822c, 29, RI_ALL_ONLINE },
+ { 0x1682a0, 12, RI_E1E1H_ONLINE }, { 0x1682d0, 12, RI_ALL_ONLINE },
+ { 0x168300, 2, RI_E1E1H_ONLINE }, { 0x168308, 68, RI_ALL_ONLINE },
+ { 0x168418, 2, RI_E1E1H_ONLINE }, { 0x168420, 6, RI_ALL_ONLINE },
+ { 0x168800, 19, RI_ALL_ONLINE }, { 0x168900, 1, RI_ALL_ONLINE },
+ { 0x168a00, 128, RI_ALL_ONLINE }, { 0x16a000, 1, RI_ALL_ONLINE },
+ { 0x16a004, 1535, RI_ALL_OFFLINE }, { 0x16c000, 1, RI_ALL_ONLINE },
+ { 0x16c004, 1535, RI_ALL_OFFLINE }, { 0x16e000, 16, RI_E1H_ONLINE },
+ { 0x16e040, 8, RI_E2_ONLINE }, { 0x16e100, 1, RI_E1H_ONLINE },
+ { 0x16e200, 2, RI_E1H_ONLINE }, { 0x16e400, 161, RI_E1H_ONLINE },
+ { 0x16e684, 2, RI_E1HE2_ONLINE }, { 0x16e68c, 12, RI_E1H_ONLINE },
+ { 0x16e6bc, 4, RI_E1HE2_ONLINE }, { 0x16e6cc, 4, RI_E1H_ONLINE },
+ { 0x16e6e0, 12, RI_E2_ONLINE }, { 0x16e768, 17, RI_E2_ONLINE },
+ { 0x170000, 24, RI_ALL_ONLINE }, { 0x170060, 4, RI_E1E1H_ONLINE },
+ { 0x170070, 65, RI_ALL_ONLINE }, { 0x170194, 11, RI_E2_ONLINE },
+ { 0x1701c4, 1, RI_E2_ONLINE }, { 0x1701cc, 7, RI_E2_ONLINE },
+ { 0x1701ec, 1, RI_E2_ONLINE }, { 0x1701f4, 1, RI_E2_ONLINE },
+ { 0x170200, 4, RI_ALL_ONLINE }, { 0x170214, 1, RI_ALL_ONLINE },
+ { 0x170218, 77, RI_E2_ONLINE }, { 0x170400, 64, RI_E2_ONLINE },
+ { 0x178000, 1, RI_ALL_ONLINE }, { 0x180000, 61, RI_ALL_ONLINE },
+ { 0x18013c, 2, RI_E1HE2_ONLINE }, { 0x180200, 58, RI_ALL_ONLINE },
+ { 0x180340, 4, RI_ALL_ONLINE }, { 0x180380, 1, RI_E2_ONLINE },
+ { 0x180388, 1, RI_E2_ONLINE }, { 0x180390, 1, RI_E2_ONLINE },
+ { 0x180398, 1, RI_E2_ONLINE }, { 0x1803a0, 5, RI_E2_ONLINE },
+ { 0x180400, 1, RI_ALL_ONLINE }, { 0x180404, 255, RI_E1E1H_OFFLINE },
{ 0x181000, 4, RI_ALL_ONLINE }, { 0x181010, 1020, RI_ALL_OFFLINE },
- { 0x1a0000, 1, RI_ALL_ONLINE }, { 0x1a0004, 1023, RI_ALL_OFFLINE },
- { 0x1a1000, 1, RI_ALL_ONLINE }, { 0x1a1004, 4607, RI_ALL_OFFLINE },
- { 0x1a5800, 2560, RI_E1H_OFFLINE }, { 0x1a8000, 64, RI_ALL_OFFLINE },
- { 0x1a8100, 1984, RI_E1H_OFFLINE }, { 0x1aa000, 1, RI_E1H_ONLINE },
- { 0x1aa004, 6655, RI_E1H_OFFLINE }, { 0x1b1800, 128, RI_ALL_OFFLINE },
- { 0x1b1c00, 128, RI_ALL_OFFLINE }, { 0x1b2000, 1, RI_ALL_OFFLINE },
- { 0x1b2400, 64, RI_E1H_OFFLINE }, { 0x1b8200, 1, RI_ALL_ONLINE },
+ { 0x1a0000, 1, RI_ALL_ONLINE }, { 0x1a0004, 5631, RI_ALL_OFFLINE },
+ { 0x1a5800, 2560, RI_E1HE2_OFFLINE }, { 0x1a8000, 1, RI_ALL_ONLINE },
+ { 0x1a8004, 8191, RI_E1HE2_OFFLINE }, { 0x1b0000, 1, RI_ALL_ONLINE },
+ { 0x1b0004, 15, RI_E1H_OFFLINE }, { 0x1b0040, 1, RI_E1HE2_ONLINE },
+ { 0x1b0044, 239, RI_E1H_OFFLINE }, { 0x1b0400, 1, RI_ALL_ONLINE },
+ { 0x1b0404, 255, RI_E1H_OFFLINE }, { 0x1b0800, 1, RI_ALL_ONLINE },
+ { 0x1b0840, 1, RI_E1HE2_ONLINE }, { 0x1b0c00, 1, RI_ALL_ONLINE },
+ { 0x1b1000, 1, RI_ALL_ONLINE }, { 0x1b1040, 1, RI_E1HE2_ONLINE },
+ { 0x1b1400, 1, RI_ALL_ONLINE }, { 0x1b1440, 1, RI_E1HE2_ONLINE },
+ { 0x1b1480, 1, RI_E1HE2_ONLINE }, { 0x1b14c0, 1, RI_E1HE2_ONLINE },
+ { 0x1b1800, 128, RI_ALL_OFFLINE }, { 0x1b1c00, 128, RI_ALL_OFFLINE },
+ { 0x1b2000, 1, RI_ALL_ONLINE }, { 0x1b2400, 1, RI_E1HE2_ONLINE },
+ { 0x1b2404, 5631, RI_E2_OFFLINE }, { 0x1b8000, 1, RI_ALL_ONLINE },
+ { 0x1b8040, 1, RI_ALL_ONLINE }, { 0x1b8080, 1, RI_ALL_ONLINE },
+ { 0x1b80c0, 1, RI_ALL_ONLINE }, { 0x1b8100, 1, RI_ALL_ONLINE },
+ { 0x1b8140, 1, RI_ALL_ONLINE }, { 0x1b8180, 1, RI_ALL_ONLINE },
+ { 0x1b81c0, 1, RI_ALL_ONLINE }, { 0x1b8200, 1, RI_ALL_ONLINE },
{ 0x1b8240, 1, RI_ALL_ONLINE }, { 0x1b8280, 1, RI_ALL_ONLINE },
- { 0x1b82c0, 1, RI_ALL_ONLINE }, { 0x1b8a00, 1, RI_ALL_ONLINE },
- { 0x1b8a80, 1, RI_ALL_ONLINE }, { 0x1c0000, 2, RI_ALL_ONLINE },
- { 0x200000, 65, RI_ALL_ONLINE }, { 0x200110, 1, RI_ALL_ONLINE },
- { 0x200120, 1, RI_ALL_ONLINE }, { 0x200130, 1, RI_ALL_ONLINE },
- { 0x200140, 1, RI_ALL_ONLINE }, { 0x20014c, 2, RI_E1H_ONLINE },
- { 0x200200, 58, RI_ALL_ONLINE }, { 0x200340, 4, RI_ALL_ONLINE },
- { 0x200400, 1, RI_ALL_ONLINE }, { 0x200404, 255, RI_ALL_OFFLINE },
- { 0x202000, 4, RI_ALL_ONLINE }, { 0x202010, 2044, RI_ALL_OFFLINE },
- { 0x220000, 1, RI_ALL_ONLINE }, { 0x220004, 1023, RI_ALL_OFFLINE },
- { 0x221000, 1, RI_ALL_ONLINE }, { 0x221004, 4607, RI_ALL_OFFLINE },
- { 0x225800, 1536, RI_E1H_OFFLINE }, { 0x227000, 1, RI_E1H_ONLINE },
- { 0x227004, 1023, RI_E1H_OFFLINE }, { 0x228000, 64, RI_ALL_OFFLINE },
- { 0x228100, 8640, RI_E1H_OFFLINE }, { 0x231800, 128, RI_ALL_OFFLINE },
- { 0x231c00, 128, RI_ALL_OFFLINE }, { 0x232000, 1, RI_ALL_OFFLINE },
- { 0x232400, 64, RI_E1H_OFFLINE }, { 0x238200, 1, RI_ALL_ONLINE },
- { 0x238240, 1, RI_ALL_ONLINE }, { 0x238280, 1, RI_ALL_ONLINE },
- { 0x2382c0, 1, RI_ALL_ONLINE }, { 0x238a00, 1, RI_ALL_ONLINE },
- { 0x238a80, 1, RI_ALL_ONLINE }, { 0x240000, 2, RI_ALL_ONLINE },
- { 0x280000, 65, RI_ALL_ONLINE }, { 0x280110, 1, RI_ALL_ONLINE },
- { 0x280120, 1, RI_ALL_ONLINE }, { 0x280130, 1, RI_ALL_ONLINE },
- { 0x280140, 1, RI_ALL_ONLINE }, { 0x28014c, 2, RI_E1H_ONLINE },
- { 0x280200, 58, RI_ALL_ONLINE }, { 0x280340, 4, RI_ALL_ONLINE },
- { 0x280400, 1, RI_ALL_ONLINE }, { 0x280404, 255, RI_ALL_OFFLINE },
- { 0x282000, 4, RI_ALL_ONLINE }, { 0x282010, 2044, RI_ALL_OFFLINE },
- { 0x2a0000, 1, RI_ALL_ONLINE }, { 0x2a0004, 1023, RI_ALL_OFFLINE },
- { 0x2a1000, 1, RI_ALL_ONLINE }, { 0x2a1004, 4607, RI_ALL_OFFLINE },
- { 0x2a5800, 2560, RI_E1H_OFFLINE }, { 0x2a8000, 64, RI_ALL_OFFLINE },
- { 0x2a8100, 960, RI_E1H_OFFLINE }, { 0x2a9000, 1, RI_E1H_ONLINE },
- { 0x2a9004, 7679, RI_E1H_OFFLINE }, { 0x2b1800, 128, RI_ALL_OFFLINE },
- { 0x2b1c00, 128, RI_ALL_OFFLINE }, { 0x2b2000, 1, RI_ALL_OFFLINE },
- { 0x2b2400, 64, RI_E1H_OFFLINE }, { 0x2b8200, 1, RI_ALL_ONLINE },
- { 0x2b8240, 1, RI_ALL_ONLINE }, { 0x2b8280, 1, RI_ALL_ONLINE },
- { 0x2b82c0, 1, RI_ALL_ONLINE }, { 0x2b8a00, 1, RI_ALL_ONLINE },
- { 0x2b8a80, 1, RI_ALL_ONLINE }, { 0x2c0000, 2, RI_ALL_ONLINE },
- { 0x300000, 65, RI_ALL_ONLINE }, { 0x300110, 1, RI_ALL_ONLINE },
- { 0x300120, 1, RI_ALL_ONLINE }, { 0x300130, 1, RI_ALL_ONLINE },
- { 0x300140, 1, RI_ALL_ONLINE }, { 0x30014c, 2, RI_E1H_ONLINE },
+ { 0x1b82c0, 1, RI_ALL_ONLINE }, { 0x1b8300, 1, RI_ALL_ONLINE },
+ { 0x1b8340, 1, RI_ALL_ONLINE }, { 0x1b8380, 1, RI_ALL_ONLINE },
+ { 0x1b83c0, 1, RI_ALL_ONLINE }, { 0x1b8400, 1, RI_ALL_ONLINE },
+ { 0x1b8440, 1, RI_ALL_ONLINE }, { 0x1b8480, 1, RI_ALL_ONLINE },
+ { 0x1b84c0, 1, RI_ALL_ONLINE }, { 0x1b8500, 1, RI_ALL_ONLINE },
+ { 0x1b8540, 1, RI_ALL_ONLINE }, { 0x1b8580, 1, RI_ALL_ONLINE },
+ { 0x1b85c0, 19, RI_E2_ONLINE }, { 0x1b8800, 1, RI_ALL_ONLINE },
+ { 0x1b8840, 1, RI_ALL_ONLINE }, { 0x1b8880, 1, RI_ALL_ONLINE },
+ { 0x1b88c0, 1, RI_ALL_ONLINE }, { 0x1b8900, 1, RI_ALL_ONLINE },
+ { 0x1b8940, 1, RI_ALL_ONLINE }, { 0x1b8980, 1, RI_ALL_ONLINE },
+ { 0x1b89c0, 1, RI_ALL_ONLINE }, { 0x1b8a00, 1, RI_ALL_ONLINE },
+ { 0x1b8a40, 1, RI_ALL_ONLINE }, { 0x1b8a80, 1, RI_ALL_ONLINE },
+ { 0x1b8ac0, 1, RI_ALL_ONLINE }, { 0x1b8b00, 1, RI_ALL_ONLINE },
+ { 0x1b8b40, 1, RI_ALL_ONLINE }, { 0x1b8b80, 1, RI_ALL_ONLINE },
+ { 0x1b8bc0, 1, RI_ALL_ONLINE }, { 0x1b8c00, 1, RI_ALL_ONLINE },
+ { 0x1b8c40, 1, RI_ALL_ONLINE }, { 0x1b8c80, 1, RI_ALL_ONLINE },
+ { 0x1b8cc0, 1, RI_ALL_ONLINE }, { 0x1b8cc4, 1, RI_E2_ONLINE },
+ { 0x1b8d00, 1, RI_ALL_ONLINE }, { 0x1b8d40, 1, RI_ALL_ONLINE },
+ { 0x1b8d80, 1, RI_ALL_ONLINE }, { 0x1b8dc0, 1, RI_ALL_ONLINE },
+ { 0x1b8e00, 1, RI_ALL_ONLINE }, { 0x1b8e40, 1, RI_ALL_ONLINE },
+ { 0x1b8e80, 1, RI_ALL_ONLINE }, { 0x1b8e84, 1, RI_E2_ONLINE },
+ { 0x1b8ec0, 1, RI_E1HE2_ONLINE }, { 0x1b8f00, 1, RI_E1HE2_ONLINE },
+ { 0x1b8f40, 1, RI_E1HE2_ONLINE }, { 0x1b8f80, 1, RI_E1HE2_ONLINE },
+ { 0x1b8fc0, 1, RI_E1HE2_ONLINE }, { 0x1b8fc4, 2, RI_E2_ONLINE },
+ { 0x1b8fd0, 6, RI_E2_ONLINE }, { 0x1b9000, 1, RI_E2_ONLINE },
+ { 0x1b9040, 3, RI_E2_ONLINE }, { 0x1b9400, 14, RI_E2_ONLINE },
+ { 0x1b943c, 19, RI_E2_ONLINE }, { 0x1b9490, 10, RI_E2_ONLINE },
+ { 0x1c0000, 2, RI_ALL_ONLINE }, { 0x200000, 65, RI_ALL_ONLINE },
+ { 0x20014c, 2, RI_E1HE2_ONLINE }, { 0x200200, 58, RI_ALL_ONLINE },
+ { 0x200340, 4, RI_ALL_ONLINE }, { 0x200380, 1, RI_E2_ONLINE },
+ { 0x200388, 1, RI_E2_ONLINE }, { 0x200390, 1, RI_E2_ONLINE },
+ { 0x200398, 1, RI_E2_ONLINE }, { 0x2003a0, 1, RI_E2_ONLINE },
+ { 0x2003a8, 2, RI_E2_ONLINE }, { 0x200400, 1, RI_ALL_ONLINE },
+ { 0x200404, 255, RI_E1E1H_OFFLINE }, { 0x202000, 4, RI_ALL_ONLINE },
+ { 0x202010, 2044, RI_ALL_OFFLINE }, { 0x220000, 1, RI_ALL_ONLINE },
+ { 0x220004, 5631, RI_ALL_OFFLINE }, { 0x225800, 2560, RI_E1HE2_OFFLINE},
+ { 0x228000, 1, RI_ALL_ONLINE }, { 0x228004, 8191, RI_E1HE2_OFFLINE },
+ { 0x230000, 1, RI_ALL_ONLINE }, { 0x230004, 15, RI_E1H_OFFLINE },
+ { 0x230040, 1, RI_E1HE2_ONLINE }, { 0x230044, 239, RI_E1H_OFFLINE },
+ { 0x230400, 1, RI_ALL_ONLINE }, { 0x230404, 255, RI_E1H_OFFLINE },
+ { 0x230800, 1, RI_ALL_ONLINE }, { 0x230840, 1, RI_E1HE2_ONLINE },
+ { 0x230c00, 1, RI_ALL_ONLINE }, { 0x231000, 1, RI_ALL_ONLINE },
+ { 0x231040, 1, RI_E1HE2_ONLINE }, { 0x231400, 1, RI_ALL_ONLINE },
+ { 0x231440, 1, RI_E1HE2_ONLINE }, { 0x231480, 1, RI_E1HE2_ONLINE },
+ { 0x2314c0, 1, RI_E1HE2_ONLINE }, { 0x231800, 128, RI_ALL_OFFLINE },
+ { 0x231c00, 128, RI_ALL_OFFLINE }, { 0x232000, 1, RI_ALL_ONLINE },
+ { 0x232400, 1, RI_E1HE2_ONLINE }, { 0x232404, 5631, RI_E2_OFFLINE },
+ { 0x238000, 1, RI_ALL_ONLINE }, { 0x238040, 1, RI_ALL_ONLINE },
+ { 0x238080, 1, RI_ALL_ONLINE }, { 0x2380c0, 1, RI_ALL_ONLINE },
+ { 0x238100, 1, RI_ALL_ONLINE }, { 0x238140, 1, RI_ALL_ONLINE },
+ { 0x238180, 1, RI_ALL_ONLINE }, { 0x2381c0, 1, RI_ALL_ONLINE },
+ { 0x238200, 1, RI_ALL_ONLINE }, { 0x238240, 1, RI_ALL_ONLINE },
+ { 0x238280, 1, RI_ALL_ONLINE }, { 0x2382c0, 1, RI_ALL_ONLINE },
+ { 0x238300, 1, RI_ALL_ONLINE }, { 0x238340, 1, RI_ALL_ONLINE },
+ { 0x238380, 1, RI_ALL_ONLINE }, { 0x2383c0, 1, RI_ALL_ONLINE },
+ { 0x238400, 1, RI_ALL_ONLINE }, { 0x238440, 1, RI_ALL_ONLINE },
+ { 0x238480, 1, RI_ALL_ONLINE }, { 0x2384c0, 1, RI_ALL_ONLINE },
+ { 0x238500, 1, RI_ALL_ONLINE }, { 0x238540, 1, RI_ALL_ONLINE },
+ { 0x238580, 1, RI_ALL_ONLINE }, { 0x2385c0, 19, RI_E2_ONLINE },
+ { 0x238800, 1, RI_ALL_ONLINE }, { 0x238840, 1, RI_ALL_ONLINE },
+ { 0x238880, 1, RI_ALL_ONLINE }, { 0x2388c0, 1, RI_ALL_ONLINE },
+ { 0x238900, 1, RI_ALL_ONLINE }, { 0x238940, 1, RI_ALL_ONLINE },
+ { 0x238980, 1, RI_ALL_ONLINE }, { 0x2389c0, 1, RI_ALL_ONLINE },
+ { 0x238a00, 1, RI_ALL_ONLINE }, { 0x238a40, 1, RI_ALL_ONLINE },
+ { 0x238a80, 1, RI_ALL_ONLINE }, { 0x238ac0, 1, RI_ALL_ONLINE },
+ { 0x238b00, 1, RI_ALL_ONLINE }, { 0x238b40, 1, RI_ALL_ONLINE },
+ { 0x238b80, 1, RI_ALL_ONLINE }, { 0x238bc0, 1, RI_ALL_ONLINE },
+ { 0x238c00, 1, RI_ALL_ONLINE }, { 0x238c40, 1, RI_ALL_ONLINE },
+ { 0x238c80, 1, RI_ALL_ONLINE }, { 0x238cc0, 1, RI_ALL_ONLINE },
+ { 0x238cc4, 1, RI_E2_ONLINE }, { 0x238d00, 1, RI_ALL_ONLINE },
+ { 0x238d40, 1, RI_ALL_ONLINE }, { 0x238d80, 1, RI_ALL_ONLINE },
+ { 0x238dc0, 1, RI_ALL_ONLINE }, { 0x238e00, 1, RI_ALL_ONLINE },
+ { 0x238e40, 1, RI_ALL_ONLINE }, { 0x238e80, 1, RI_ALL_ONLINE },
+ { 0x238e84, 1, RI_E2_ONLINE }, { 0x238ec0, 1, RI_E1HE2_ONLINE },
+ { 0x238f00, 1, RI_E1HE2_ONLINE }, { 0x238f40, 1, RI_E1HE2_ONLINE },
+ { 0x238f80, 1, RI_E1HE2_ONLINE }, { 0x238fc0, 1, RI_E1HE2_ONLINE },
+ { 0x238fc4, 2, RI_E2_ONLINE }, { 0x238fd0, 6, RI_E2_ONLINE },
+ { 0x239000, 1, RI_E2_ONLINE }, { 0x239040, 3, RI_E2_ONLINE },
+ { 0x240000, 2, RI_ALL_ONLINE }, { 0x280000, 65, RI_ALL_ONLINE },
+ { 0x28014c, 2, RI_E1HE2_ONLINE }, { 0x280200, 58, RI_ALL_ONLINE },
+ { 0x280340, 4, RI_ALL_ONLINE }, { 0x280380, 1, RI_E2_ONLINE },
+ { 0x280388, 1, RI_E2_ONLINE }, { 0x280390, 1, RI_E2_ONLINE },
+ { 0x280398, 1, RI_E2_ONLINE }, { 0x2803a0, 1, RI_E2_ONLINE },
+ { 0x2803a8, 2, RI_E2_ONLINE }, { 0x280400, 1, RI_ALL_ONLINE },
+ { 0x280404, 255, RI_E1E1H_OFFLINE }, { 0x282000, 4, RI_ALL_ONLINE },
+ { 0x282010, 2044, RI_ALL_OFFLINE }, { 0x2a0000, 1, RI_ALL_ONLINE },
+ { 0x2a0004, 5631, RI_ALL_OFFLINE }, { 0x2a5800, 2560, RI_E1HE2_OFFLINE},
+ { 0x2a8000, 1, RI_ALL_ONLINE }, { 0x2a8004, 8191, RI_E1HE2_OFFLINE },
+ { 0x2b0000, 1, RI_ALL_ONLINE }, { 0x2b0004, 15, RI_E1H_OFFLINE },
+ { 0x2b0040, 1, RI_E1HE2_ONLINE }, { 0x2b0044, 239, RI_E1H_OFFLINE },
+ { 0x2b0400, 1, RI_ALL_ONLINE }, { 0x2b0404, 255, RI_E1H_OFFLINE },
+ { 0x2b0800, 1, RI_ALL_ONLINE }, { 0x2b0840, 1, RI_E1HE2_ONLINE },
+ { 0x2b0c00, 1, RI_ALL_ONLINE }, { 0x2b1000, 1, RI_ALL_ONLINE },
+ { 0x2b1040, 1, RI_E1HE2_ONLINE }, { 0x2b1400, 1, RI_ALL_ONLINE },
+ { 0x2b1440, 1, RI_E1HE2_ONLINE }, { 0x2b1480, 1, RI_E1HE2_ONLINE },
+ { 0x2b14c0, 1, RI_E1HE2_ONLINE }, { 0x2b1800, 128, RI_ALL_OFFLINE },
+ { 0x2b1c00, 128, RI_ALL_OFFLINE }, { 0x2b2000, 1, RI_ALL_ONLINE },
+ { 0x2b2400, 1, RI_E1HE2_ONLINE }, { 0x2b2404, 5631, RI_E2_OFFLINE },
+ { 0x2b8000, 1, RI_ALL_ONLINE }, { 0x2b8040, 1, RI_ALL_ONLINE },
+ { 0x2b8080, 1, RI_ALL_ONLINE }, { 0x2b80c0, 1, RI_ALL_ONLINE },
+ { 0x2b8100, 1, RI_ALL_ONLINE }, { 0x2b8140, 1, RI_ALL_ONLINE },
+ { 0x2b8180, 1, RI_ALL_ONLINE }, { 0x2b81c0, 1, RI_ALL_ONLINE },
+ { 0x2b8200, 1, RI_ALL_ONLINE }, { 0x2b8240, 1, RI_ALL_ONLINE },
+ { 0x2b8280, 1, RI_ALL_ONLINE }, { 0x2b82c0, 1, RI_ALL_ONLINE },
+ { 0x2b8300, 1, RI_ALL_ONLINE }, { 0x2b8340, 1, RI_ALL_ONLINE },
+ { 0x2b8380, 1, RI_ALL_ONLINE }, { 0x2b83c0, 1, RI_ALL_ONLINE },
+ { 0x2b8400, 1, RI_ALL_ONLINE }, { 0x2b8440, 1, RI_ALL_ONLINE },
+ { 0x2b8480, 1, RI_ALL_ONLINE }, { 0x2b84c0, 1, RI_ALL_ONLINE },
+ { 0x2b8500, 1, RI_ALL_ONLINE }, { 0x2b8540, 1, RI_ALL_ONLINE },
+ { 0x2b8580, 1, RI_ALL_ONLINE }, { 0x2b85c0, 19, RI_E2_ONLINE },
+ { 0x2b8800, 1, RI_ALL_ONLINE }, { 0x2b8840, 1, RI_ALL_ONLINE },
+ { 0x2b8880, 1, RI_ALL_ONLINE }, { 0x2b88c0, 1, RI_ALL_ONLINE },
+ { 0x2b8900, 1, RI_ALL_ONLINE }, { 0x2b8940, 1, RI_ALL_ONLINE },
+ { 0x2b8980, 1, RI_ALL_ONLINE }, { 0x2b89c0, 1, RI_ALL_ONLINE },
+ { 0x2b8a00, 1, RI_ALL_ONLINE }, { 0x2b8a40, 1, RI_ALL_ONLINE },
+ { 0x2b8a80, 1, RI_ALL_ONLINE }, { 0x2b8ac0, 1, RI_ALL_ONLINE },
+ { 0x2b8b00, 1, RI_ALL_ONLINE }, { 0x2b8b40, 1, RI_ALL_ONLINE },
+ { 0x2b8b80, 1, RI_ALL_ONLINE }, { 0x2b8bc0, 1, RI_ALL_ONLINE },
+ { 0x2b8c00, 1, RI_ALL_ONLINE }, { 0x2b8c40, 1, RI_ALL_ONLINE },
+ { 0x2b8c80, 1, RI_ALL_ONLINE }, { 0x2b8cc0, 1, RI_ALL_ONLINE },
+ { 0x2b8cc4, 1, RI_E2_ONLINE }, { 0x2b8d00, 1, RI_ALL_ONLINE },
+ { 0x2b8d40, 1, RI_ALL_ONLINE }, { 0x2b8d80, 1, RI_ALL_ONLINE },
+ { 0x2b8dc0, 1, RI_ALL_ONLINE }, { 0x2b8e00, 1, RI_ALL_ONLINE },
+ { 0x2b8e40, 1, RI_ALL_ONLINE }, { 0x2b8e80, 1, RI_ALL_ONLINE },
+ { 0x2b8e84, 1, RI_E2_ONLINE }, { 0x2b8ec0, 1, RI_E1HE2_ONLINE },
+ { 0x2b8f00, 1, RI_E1HE2_ONLINE }, { 0x2b8f40, 1, RI_E1HE2_ONLINE },
+ { 0x2b8f80, 1, RI_E1HE2_ONLINE }, { 0x2b8fc0, 1, RI_E1HE2_ONLINE },
+ { 0x2b8fc4, 2, RI_E2_ONLINE }, { 0x2b8fd0, 6, RI_E2_ONLINE },
+ { 0x2b9000, 1, RI_E2_ONLINE }, { 0x2b9040, 3, RI_E2_ONLINE },
+ { 0x2b9400, 14, RI_E2_ONLINE }, { 0x2b943c, 19, RI_E2_ONLINE },
+ { 0x2b9490, 10, RI_E2_ONLINE }, { 0x2c0000, 2, RI_ALL_ONLINE },
+ { 0x300000, 65, RI_ALL_ONLINE }, { 0x30014c, 2, RI_E1HE2_ONLINE },
{ 0x300200, 58, RI_ALL_ONLINE }, { 0x300340, 4, RI_ALL_ONLINE },
- { 0x300400, 1, RI_ALL_ONLINE }, { 0x300404, 255, RI_ALL_OFFLINE },
+ { 0x300380, 1, RI_E2_ONLINE }, { 0x300388, 1, RI_E2_ONLINE },
+ { 0x300390, 1, RI_E2_ONLINE }, { 0x300398, 1, RI_E2_ONLINE },
+ { 0x3003a0, 1, RI_E2_ONLINE }, { 0x3003a8, 2, RI_E2_ONLINE },
+ { 0x300400, 1, RI_ALL_ONLINE }, { 0x300404, 255, RI_E1E1H_OFFLINE },
{ 0x302000, 4, RI_ALL_ONLINE }, { 0x302010, 2044, RI_ALL_OFFLINE },
- { 0x320000, 1, RI_ALL_ONLINE }, { 0x320004, 1023, RI_ALL_OFFLINE },
- { 0x321000, 1, RI_ALL_ONLINE }, { 0x321004, 4607, RI_ALL_OFFLINE },
- { 0x325800, 2560, RI_E1H_OFFLINE }, { 0x328000, 64, RI_ALL_OFFLINE },
- { 0x328100, 536, RI_E1H_OFFLINE }, { 0x328960, 1, RI_E1H_ONLINE },
- { 0x328964, 8103, RI_E1H_OFFLINE }, { 0x331800, 128, RI_ALL_OFFLINE },
- { 0x331c00, 128, RI_ALL_OFFLINE }, { 0x332000, 1, RI_ALL_OFFLINE },
- { 0x332400, 64, RI_E1H_OFFLINE }, { 0x338200, 1, RI_ALL_ONLINE },
+ { 0x320000, 1, RI_ALL_ONLINE }, { 0x320004, 5631, RI_ALL_OFFLINE },
+ { 0x325800, 2560, RI_E1HE2_OFFLINE }, { 0x328000, 1, RI_ALL_ONLINE },
+ { 0x328004, 8191, RI_E1HE2_OFFLINE }, { 0x330000, 1, RI_ALL_ONLINE },
+ { 0x330004, 15, RI_E1H_OFFLINE }, { 0x330040, 1, RI_E1HE2_ONLINE },
+ { 0x330044, 239, RI_E1H_OFFLINE }, { 0x330400, 1, RI_ALL_ONLINE },
+ { 0x330404, 255, RI_E1H_OFFLINE }, { 0x330800, 1, RI_ALL_ONLINE },
+ { 0x330840, 1, RI_E1HE2_ONLINE }, { 0x330c00, 1, RI_ALL_ONLINE },
+ { 0x331000, 1, RI_ALL_ONLINE }, { 0x331040, 1, RI_E1HE2_ONLINE },
+ { 0x331400, 1, RI_ALL_ONLINE }, { 0x331440, 1, RI_E1HE2_ONLINE },
+ { 0x331480, 1, RI_E1HE2_ONLINE }, { 0x3314c0, 1, RI_E1HE2_ONLINE },
+ { 0x331800, 128, RI_ALL_OFFLINE }, { 0x331c00, 128, RI_ALL_OFFLINE },
+ { 0x332000, 1, RI_ALL_ONLINE }, { 0x332400, 1, RI_E1HE2_ONLINE },
+ { 0x332404, 5631, RI_E2_OFFLINE }, { 0x338000, 1, RI_ALL_ONLINE },
+ { 0x338040, 1, RI_ALL_ONLINE }, { 0x338080, 1, RI_ALL_ONLINE },
+ { 0x3380c0, 1, RI_ALL_ONLINE }, { 0x338100, 1, RI_ALL_ONLINE },
+ { 0x338140, 1, RI_ALL_ONLINE }, { 0x338180, 1, RI_ALL_ONLINE },
+ { 0x3381c0, 1, RI_ALL_ONLINE }, { 0x338200, 1, RI_ALL_ONLINE },
{ 0x338240, 1, RI_ALL_ONLINE }, { 0x338280, 1, RI_ALL_ONLINE },
- { 0x3382c0, 1, RI_ALL_ONLINE }, { 0x338a00, 1, RI_ALL_ONLINE },
- { 0x338a80, 1, RI_ALL_ONLINE }, { 0x340000, 2, RI_ALL_ONLINE }
+ { 0x3382c0, 1, RI_ALL_ONLINE }, { 0x338300, 1, RI_ALL_ONLINE },
+ { 0x338340, 1, RI_ALL_ONLINE }, { 0x338380, 1, RI_ALL_ONLINE },
+ { 0x3383c0, 1, RI_ALL_ONLINE }, { 0x338400, 1, RI_ALL_ONLINE },
+ { 0x338440, 1, RI_ALL_ONLINE }, { 0x338480, 1, RI_ALL_ONLINE },
+ { 0x3384c0, 1, RI_ALL_ONLINE }, { 0x338500, 1, RI_ALL_ONLINE },
+ { 0x338540, 1, RI_ALL_ONLINE }, { 0x338580, 1, RI_ALL_ONLINE },
+ { 0x3385c0, 19, RI_E2_ONLINE }, { 0x338800, 1, RI_ALL_ONLINE },
+ { 0x338840, 1, RI_ALL_ONLINE }, { 0x338880, 1, RI_ALL_ONLINE },
+ { 0x3388c0, 1, RI_ALL_ONLINE }, { 0x338900, 1, RI_ALL_ONLINE },
+ { 0x338940, 1, RI_ALL_ONLINE }, { 0x338980, 1, RI_ALL_ONLINE },
+ { 0x3389c0, 1, RI_ALL_ONLINE }, { 0x338a00, 1, RI_ALL_ONLINE },
+ { 0x338a40, 1, RI_ALL_ONLINE }, { 0x338a80, 1, RI_ALL_ONLINE },
+ { 0x338ac0, 1, RI_ALL_ONLINE }, { 0x338b00, 1, RI_ALL_ONLINE },
+ { 0x338b40, 1, RI_ALL_ONLINE }, { 0x338b80, 1, RI_ALL_ONLINE },
+ { 0x338bc0, 1, RI_ALL_ONLINE }, { 0x338c00, 1, RI_ALL_ONLINE },
+ { 0x338c40, 1, RI_ALL_ONLINE }, { 0x338c80, 1, RI_ALL_ONLINE },
+ { 0x338cc0, 1, RI_ALL_ONLINE }, { 0x338cc4, 1, RI_E2_ONLINE },
+ { 0x338d00, 1, RI_ALL_ONLINE }, { 0x338d40, 1, RI_ALL_ONLINE },
+ { 0x338d80, 1, RI_ALL_ONLINE }, { 0x338dc0, 1, RI_ALL_ONLINE },
+ { 0x338e00, 1, RI_ALL_ONLINE }, { 0x338e40, 1, RI_ALL_ONLINE },
+ { 0x338e80, 1, RI_ALL_ONLINE }, { 0x338e84, 1, RI_E2_ONLINE },
+ { 0x338ec0, 1, RI_E1HE2_ONLINE }, { 0x338f00, 1, RI_E1HE2_ONLINE },
+ { 0x338f40, 1, RI_E1HE2_ONLINE }, { 0x338f80, 1, RI_E1HE2_ONLINE },
+ { 0x338fc0, 1, RI_E1HE2_ONLINE }, { 0x338fc4, 2, RI_E2_ONLINE },
+ { 0x338fd0, 6, RI_E2_ONLINE }, { 0x339000, 1, RI_E2_ONLINE },
+ { 0x339040, 3, RI_E2_ONLINE }, { 0x340000, 2, RI_ALL_ONLINE },
};
-
-#define IDLE_REGS_COUNT 277
+#define IDLE_REGS_COUNT 237
static const struct reg_addr idle_addrs[IDLE_REGS_COUNT] = {
- { 0x2114, 1, RI_ALL_ONLINE }, { 0x2120, 1, RI_ALL_ONLINE },
- { 0x212c, 4, RI_ALL_ONLINE }, { 0x2814, 1, RI_ALL_ONLINE },
- { 0x281c, 2, RI_ALL_ONLINE }, { 0xa38c, 1, RI_ALL_ONLINE },
+ { 0x2104, 1, RI_ALL_ONLINE }, { 0x2110, 2, RI_ALL_ONLINE },
+ { 0x211c, 8, RI_ALL_ONLINE }, { 0x2814, 1, RI_ALL_ONLINE },
+ { 0x281c, 2, RI_ALL_ONLINE }, { 0x2854, 1, RI_ALL_ONLINE },
+ { 0x285c, 1, RI_ALL_ONLINE }, { 0x9010, 7, RI_E2_ONLINE },
+ { 0x9030, 1, RI_E2_ONLINE }, { 0x9068, 16, RI_E2_ONLINE },
+ { 0x9230, 2, RI_E2_ONLINE }, { 0x9244, 1, RI_E2_ONLINE },
+ { 0x9298, 1, RI_E2_ONLINE }, { 0x92a8, 1, RI_E2_ONLINE },
+ { 0xa38c, 1, RI_ALL_ONLINE }, { 0xa3c4, 1, RI_E1HE2_ONLINE },
{ 0xa408, 1, RI_ALL_ONLINE }, { 0xa42c, 12, RI_ALL_ONLINE },
- { 0xa600, 5, RI_E1H_ONLINE }, { 0xa618, 1, RI_E1H_ONLINE },
- { 0xc09c, 1, RI_ALL_ONLINE }, { 0x103b0, 1, RI_ALL_ONLINE },
- { 0x103c0, 1, RI_ALL_ONLINE }, { 0x103d0, 1, RI_E1H_ONLINE },
- { 0x2021c, 11, RI_ALL_ONLINE }, { 0x202a8, 1, RI_ALL_ONLINE },
- { 0x202b8, 1, RI_ALL_ONLINE }, { 0x20404, 1, RI_ALL_ONLINE },
- { 0x2040c, 2, RI_ALL_ONLINE }, { 0x2041c, 2, RI_ALL_ONLINE },
- { 0x40154, 14, RI_ALL_ONLINE }, { 0x40198, 1, RI_ALL_ONLINE },
- { 0x404ac, 1, RI_ALL_ONLINE }, { 0x404bc, 1, RI_ALL_ONLINE },
- { 0x42290, 1, RI_ALL_ONLINE }, { 0x422a0, 1, RI_ALL_ONLINE },
- { 0x422b0, 1, RI_ALL_ONLINE }, { 0x42548, 1, RI_ALL_ONLINE },
- { 0x42550, 1, RI_ALL_ONLINE }, { 0x42558, 1, RI_ALL_ONLINE },
- { 0x50160, 8, RI_ALL_ONLINE }, { 0x501d0, 1, RI_ALL_ONLINE },
- { 0x501e0, 1, RI_ALL_ONLINE }, { 0x50204, 1, RI_ALL_ONLINE },
- { 0x5020c, 2, RI_ALL_ONLINE }, { 0x5021c, 1, RI_ALL_ONLINE },
- { 0x60090, 1, RI_ALL_ONLINE }, { 0x6011c, 1, RI_ALL_ONLINE },
- { 0x6012c, 1, RI_ALL_ONLINE }, { 0xc101c, 1, RI_ALL_ONLINE },
- { 0xc102c, 1, RI_ALL_ONLINE }, { 0xc2290, 1, RI_ALL_ONLINE },
- { 0xc22a0, 1, RI_ALL_ONLINE }, { 0xc22b0, 1, RI_ALL_ONLINE },
- { 0xc2548, 1, RI_ALL_ONLINE }, { 0xc2550, 1, RI_ALL_ONLINE },
- { 0xc2558, 1, RI_ALL_ONLINE }, { 0xc4294, 1, RI_ALL_ONLINE },
- { 0xc42a4, 1, RI_ALL_ONLINE }, { 0xc42b4, 1, RI_ALL_ONLINE },
- { 0xc4550, 1, RI_ALL_ONLINE }, { 0xc4558, 1, RI_ALL_ONLINE },
- { 0xc4560, 1, RI_ALL_ONLINE }, { 0xd016c, 8, RI_ALL_ONLINE },
- { 0xd01d8, 1, RI_ALL_ONLINE }, { 0xd01e8, 1, RI_ALL_ONLINE },
- { 0xd0204, 1, RI_ALL_ONLINE }, { 0xd020c, 3, RI_ALL_ONLINE },
- { 0xe0154, 8, RI_ALL_ONLINE }, { 0xe01c8, 1, RI_ALL_ONLINE },
- { 0xe01d8, 1, RI_ALL_ONLINE }, { 0xe0204, 1, RI_ALL_ONLINE },
- { 0xe020c, 2, RI_ALL_ONLINE }, { 0xe021c, 2, RI_ALL_ONLINE },
- { 0x101014, 1, RI_ALL_ONLINE }, { 0x101030, 1, RI_ALL_ONLINE },
- { 0x101040, 1, RI_ALL_ONLINE }, { 0x102058, 1, RI_ALL_ONLINE },
- { 0x102080, 16, RI_ALL_ONLINE }, { 0x103004, 2, RI_ALL_ONLINE },
- { 0x103068, 1, RI_ALL_ONLINE }, { 0x103078, 1, RI_ALL_ONLINE },
- { 0x103088, 1, RI_ALL_ONLINE }, { 0x10309c, 2, RI_E1H_ONLINE },
+ { 0xa600, 5, RI_E1HE2_ONLINE }, { 0xa618, 1, RI_E1HE2_ONLINE },
+ { 0xa714, 1, RI_E2_ONLINE }, { 0xa720, 1, RI_E2_ONLINE },
+ { 0xa750, 1, RI_E2_ONLINE }, { 0xc09c, 1, RI_E1E1H_ONLINE },
+ { 0x103b0, 1, RI_ALL_ONLINE }, { 0x103c0, 1, RI_ALL_ONLINE },
+ { 0x103d0, 1, RI_E1H_ONLINE }, { 0x183bc, 1, RI_E2_ONLINE },
+ { 0x183cc, 1, RI_E2_ONLINE }, { 0x2021c, 11, RI_ALL_ONLINE },
+ { 0x202a8, 1, RI_ALL_ONLINE }, { 0x202b8, 1, RI_ALL_ONLINE },
+ { 0x20404, 1, RI_ALL_ONLINE }, { 0x2040c, 2, RI_ALL_ONLINE },
+ { 0x2041c, 2, RI_ALL_ONLINE }, { 0x40154, 14, RI_ALL_ONLINE },
+ { 0x40198, 1, RI_ALL_ONLINE }, { 0x404ac, 1, RI_ALL_ONLINE },
+ { 0x404bc, 1, RI_ALL_ONLINE }, { 0x42290, 1, RI_ALL_ONLINE },
+ { 0x422a0, 1, RI_ALL_ONLINE }, { 0x422b0, 1, RI_ALL_ONLINE },
+ { 0x42548, 1, RI_ALL_ONLINE }, { 0x42550, 1, RI_ALL_ONLINE },
+ { 0x42558, 1, RI_ALL_ONLINE }, { 0x50160, 8, RI_ALL_ONLINE },
+ { 0x501d0, 1, RI_ALL_ONLINE }, { 0x501e0, 1, RI_ALL_ONLINE },
+ { 0x50204, 1, RI_ALL_ONLINE }, { 0x5020c, 2, RI_ALL_ONLINE },
+ { 0x5021c, 1, RI_ALL_ONLINE }, { 0x60090, 1, RI_ALL_ONLINE },
+ { 0x6011c, 1, RI_ALL_ONLINE }, { 0x6012c, 1, RI_ALL_ONLINE },
+ { 0xc101c, 1, RI_ALL_ONLINE }, { 0xc102c, 1, RI_ALL_ONLINE },
+ { 0xc2290, 1, RI_ALL_ONLINE }, { 0xc22a0, 1, RI_ALL_ONLINE },
+ { 0xc22b0, 1, RI_ALL_ONLINE }, { 0xc2548, 1, RI_ALL_ONLINE },
+ { 0xc2550, 1, RI_ALL_ONLINE }, { 0xc2558, 1, RI_ALL_ONLINE },
+ { 0xc4294, 1, RI_ALL_ONLINE }, { 0xc42a4, 1, RI_ALL_ONLINE },
+ { 0xc42b4, 1, RI_ALL_ONLINE }, { 0xc4550, 1, RI_ALL_ONLINE },
+ { 0xc4558, 1, RI_ALL_ONLINE }, { 0xc4560, 1, RI_ALL_ONLINE },
+ { 0xd016c, 8, RI_ALL_ONLINE }, { 0xd01d8, 1, RI_ALL_ONLINE },
+ { 0xd01e8, 1, RI_ALL_ONLINE }, { 0xd0204, 1, RI_ALL_ONLINE },
+ { 0xd020c, 3, RI_ALL_ONLINE }, { 0xe0154, 8, RI_ALL_ONLINE },
+ { 0xe01c8, 1, RI_ALL_ONLINE }, { 0xe01d8, 1, RI_ALL_ONLINE },
+ { 0xe0204, 1, RI_ALL_ONLINE }, { 0xe020c, 2, RI_ALL_ONLINE },
+ { 0xe021c, 2, RI_ALL_ONLINE }, { 0x101014, 1, RI_ALL_ONLINE },
+ { 0x101030, 1, RI_ALL_ONLINE }, { 0x101040, 1, RI_ALL_ONLINE },
+ { 0x102058, 1, RI_ALL_ONLINE }, { 0x102080, 16, RI_ALL_ONLINE },
+ { 0x103004, 2, RI_ALL_ONLINE }, { 0x103068, 1, RI_ALL_ONLINE },
+ { 0x103078, 1, RI_ALL_ONLINE }, { 0x103088, 1, RI_ALL_ONLINE },
+ { 0x10309c, 2, RI_E1HE2_ONLINE }, { 0x1030b8, 2, RI_E2_ONLINE },
+ { 0x1030cc, 1, RI_E2_ONLINE }, { 0x1030e0, 1, RI_E2_ONLINE },
{ 0x104004, 1, RI_ALL_ONLINE }, { 0x104018, 1, RI_ALL_ONLINE },
{ 0x104020, 1, RI_ALL_ONLINE }, { 0x10403c, 1, RI_ALL_ONLINE },
{ 0x1040fc, 1, RI_ALL_ONLINE }, { 0x10410c, 1, RI_ALL_ONLINE },
{ 0x104400, 64, RI_ALL_ONLINE }, { 0x104800, 64, RI_ALL_ONLINE },
- { 0x105000, 3, RI_ALL_ONLINE }, { 0x105010, 3, RI_ALL_ONLINE },
- { 0x105020, 3, RI_ALL_ONLINE }, { 0x105030, 3, RI_ALL_ONLINE },
- { 0x105040, 3, RI_ALL_ONLINE }, { 0x105050, 3, RI_ALL_ONLINE },
- { 0x105060, 3, RI_ALL_ONLINE }, { 0x105070, 3, RI_ALL_ONLINE },
- { 0x105080, 3, RI_ALL_ONLINE }, { 0x105090, 3, RI_ALL_ONLINE },
- { 0x1050a0, 3, RI_ALL_ONLINE }, { 0x1050b0, 3, RI_ALL_ONLINE },
- { 0x1050c0, 3, RI_ALL_ONLINE }, { 0x1050d0, 3, RI_ALL_ONLINE },
- { 0x1050e0, 3, RI_ALL_ONLINE }, { 0x1050f0, 3, RI_ALL_ONLINE },
- { 0x105100, 3, RI_ALL_ONLINE }, { 0x105110, 3, RI_ALL_ONLINE },
- { 0x105120, 3, RI_ALL_ONLINE }, { 0x105130, 3, RI_ALL_ONLINE },
- { 0x105140, 3, RI_ALL_ONLINE }, { 0x105150, 3, RI_ALL_ONLINE },
- { 0x105160, 3, RI_ALL_ONLINE }, { 0x105170, 3, RI_ALL_ONLINE },
- { 0x105180, 3, RI_ALL_ONLINE }, { 0x105190, 3, RI_ALL_ONLINE },
- { 0x1051a0, 3, RI_ALL_ONLINE }, { 0x1051b0, 3, RI_ALL_ONLINE },
- { 0x1051c0, 3, RI_ALL_ONLINE }, { 0x1051d0, 3, RI_ALL_ONLINE },
- { 0x1051e0, 3, RI_ALL_ONLINE }, { 0x1051f0, 3, RI_ALL_ONLINE },
- { 0x105200, 3, RI_ALL_ONLINE }, { 0x105210, 3, RI_ALL_ONLINE },
- { 0x105220, 3, RI_ALL_ONLINE }, { 0x105230, 3, RI_ALL_ONLINE },
- { 0x105240, 3, RI_ALL_ONLINE }, { 0x105250, 3, RI_ALL_ONLINE },
- { 0x105260, 3, RI_ALL_ONLINE }, { 0x105270, 3, RI_ALL_ONLINE },
- { 0x105280, 3, RI_ALL_ONLINE }, { 0x105290, 3, RI_ALL_ONLINE },
- { 0x1052a0, 3, RI_ALL_ONLINE }, { 0x1052b0, 3, RI_ALL_ONLINE },
- { 0x1052c0, 3, RI_ALL_ONLINE }, { 0x1052d0, 3, RI_ALL_ONLINE },
- { 0x1052e0, 3, RI_ALL_ONLINE }, { 0x1052f0, 3, RI_ALL_ONLINE },
- { 0x105300, 3, RI_ALL_ONLINE }, { 0x105310, 3, RI_ALL_ONLINE },
- { 0x105320, 3, RI_ALL_ONLINE }, { 0x105330, 3, RI_ALL_ONLINE },
- { 0x105340, 3, RI_ALL_ONLINE }, { 0x105350, 3, RI_ALL_ONLINE },
- { 0x105360, 3, RI_ALL_ONLINE }, { 0x105370, 3, RI_ALL_ONLINE },
- { 0x105380, 3, RI_ALL_ONLINE }, { 0x105390, 3, RI_ALL_ONLINE },
- { 0x1053a0, 3, RI_ALL_ONLINE }, { 0x1053b0, 3, RI_ALL_ONLINE },
- { 0x1053c0, 3, RI_ALL_ONLINE }, { 0x1053d0, 3, RI_ALL_ONLINE },
- { 0x1053e0, 3, RI_ALL_ONLINE }, { 0x1053f0, 3, RI_ALL_ONLINE },
- { 0x108094, 1, RI_ALL_ONLINE }, { 0x1201b0, 2, RI_ALL_ONLINE },
- { 0x12032c, 1, RI_ALL_ONLINE }, { 0x12036c, 3, RI_ALL_ONLINE },
- { 0x120408, 2, RI_ALL_ONLINE }, { 0x120414, 15, RI_ALL_ONLINE },
- { 0x120478, 2, RI_ALL_ONLINE }, { 0x12052c, 1, RI_ALL_ONLINE },
- { 0x120564, 3, RI_ALL_ONLINE }, { 0x12057c, 1, RI_ALL_ONLINE },
- { 0x12058c, 1, RI_ALL_ONLINE }, { 0x120608, 1, RI_E1H_ONLINE },
- { 0x120808, 1, RI_E1_ONLINE }, { 0x12080c, 2, RI_ALL_ONLINE },
+ { 0x105000, 256, RI_ALL_ONLINE }, { 0x108094, 1, RI_E1E1H_ONLINE },
+ { 0x1201b0, 2, RI_ALL_ONLINE }, { 0x12032c, 1, RI_ALL_ONLINE },
+ { 0x12036c, 3, RI_ALL_ONLINE }, { 0x120408, 2, RI_ALL_ONLINE },
+ { 0x120414, 15, RI_ALL_ONLINE }, { 0x120478, 2, RI_ALL_ONLINE },
+ { 0x12052c, 1, RI_ALL_ONLINE }, { 0x120564, 3, RI_ALL_ONLINE },
+ { 0x12057c, 1, RI_ALL_ONLINE }, { 0x12058c, 1, RI_ALL_ONLINE },
+ { 0x120608, 1, RI_E1HE2_ONLINE }, { 0x120738, 1, RI_E2_ONLINE },
+ { 0x120778, 2, RI_E2_ONLINE }, { 0x120808, 3, RI_ALL_ONLINE },
{ 0x120818, 1, RI_ALL_ONLINE }, { 0x120820, 1, RI_ALL_ONLINE },
{ 0x120828, 1, RI_ALL_ONLINE }, { 0x120830, 1, RI_ALL_ONLINE },
{ 0x120838, 1, RI_ALL_ONLINE }, { 0x120840, 1, RI_ALL_ONLINE },
@@ -462,48 +580,50 @@ static const struct reg_addr idle_addrs[IDLE_REGS_COUNT] = {
{ 0x1208d8, 1, RI_ALL_ONLINE }, { 0x1208e0, 1, RI_ALL_ONLINE },
{ 0x1208e8, 1, RI_ALL_ONLINE }, { 0x1208f0, 1, RI_ALL_ONLINE },
{ 0x1208f8, 1, RI_ALL_ONLINE }, { 0x120900, 1, RI_ALL_ONLINE },
- { 0x120908, 1, RI_ALL_ONLINE }, { 0x14005c, 2, RI_ALL_ONLINE },
- { 0x1400d0, 2, RI_ALL_ONLINE }, { 0x1400e0, 1, RI_ALL_ONLINE },
- { 0x1401c8, 1, RI_ALL_ONLINE }, { 0x140200, 6, RI_ALL_ONLINE },
- { 0x16101c, 1, RI_ALL_ONLINE }, { 0x16102c, 1, RI_ALL_ONLINE },
- { 0x164014, 2, RI_ALL_ONLINE }, { 0x1640f0, 1, RI_ALL_ONLINE },
- { 0x166290, 1, RI_ALL_ONLINE }, { 0x1662a0, 1, RI_ALL_ONLINE },
- { 0x1662b0, 1, RI_ALL_ONLINE }, { 0x166548, 1, RI_ALL_ONLINE },
- { 0x166550, 1, RI_ALL_ONLINE }, { 0x166558, 1, RI_ALL_ONLINE },
- { 0x168000, 1, RI_ALL_ONLINE }, { 0x168008, 1, RI_ALL_ONLINE },
- { 0x168010, 1, RI_ALL_ONLINE }, { 0x168018, 1, RI_ALL_ONLINE },
- { 0x168028, 2, RI_ALL_ONLINE }, { 0x168058, 4, RI_ALL_ONLINE },
- { 0x168070, 1, RI_ALL_ONLINE }, { 0x168238, 1, RI_ALL_ONLINE },
- { 0x1682d0, 2, RI_ALL_ONLINE }, { 0x1682e0, 1, RI_ALL_ONLINE },
- { 0x168300, 67, RI_ALL_ONLINE }, { 0x168410, 2, RI_ALL_ONLINE },
+ { 0x120908, 1, RI_ALL_ONLINE }, { 0x120940, 5, RI_E2_ONLINE },
+ { 0x130030, 1, RI_E2_ONLINE }, { 0x13004c, 3, RI_E2_ONLINE },
+ { 0x130064, 2, RI_E2_ONLINE }, { 0x13009c, 1, RI_E2_ONLINE },
+ { 0x130130, 1, RI_E2_ONLINE }, { 0x13016c, 1, RI_E2_ONLINE },
+ { 0x130300, 1, RI_E2_ONLINE }, { 0x130480, 1, RI_E2_ONLINE },
+ { 0x14005c, 2, RI_ALL_ONLINE }, { 0x1400d0, 2, RI_ALL_ONLINE },
+ { 0x1400e0, 1, RI_ALL_ONLINE }, { 0x1401c8, 1, RI_ALL_ONLINE },
+ { 0x140200, 6, RI_ALL_ONLINE }, { 0x16101c, 1, RI_ALL_ONLINE },
+ { 0x16102c, 1, RI_ALL_ONLINE }, { 0x164014, 2, RI_ALL_ONLINE },
+ { 0x1640f0, 1, RI_ALL_ONLINE }, { 0x166290, 1, RI_ALL_ONLINE },
+ { 0x1662a0, 1, RI_ALL_ONLINE }, { 0x1662b0, 1, RI_ALL_ONLINE },
+ { 0x166548, 1, RI_ALL_ONLINE }, { 0x166550, 1, RI_ALL_ONLINE },
+ { 0x166558, 1, RI_ALL_ONLINE }, { 0x168000, 1, RI_ALL_ONLINE },
+ { 0x168008, 1, RI_ALL_ONLINE }, { 0x168010, 1, RI_ALL_ONLINE },
+ { 0x168018, 1, RI_ALL_ONLINE }, { 0x168028, 2, RI_ALL_ONLINE },
+ { 0x168058, 4, RI_ALL_ONLINE }, { 0x168070, 1, RI_ALL_ONLINE },
+ { 0x168238, 1, RI_ALL_ONLINE }, { 0x1682d0, 2, RI_ALL_ONLINE },
+ { 0x1682e0, 1, RI_ALL_ONLINE }, { 0x168300, 2, RI_E1E1H_ONLINE },
+ { 0x168308, 65, RI_ALL_ONLINE }, { 0x168410, 2, RI_ALL_ONLINE },
{ 0x168438, 1, RI_ALL_ONLINE }, { 0x168448, 1, RI_ALL_ONLINE },
{ 0x168a00, 128, RI_ALL_ONLINE }, { 0x16e200, 128, RI_E1H_ONLINE },
- { 0x16e404, 2, RI_E1H_ONLINE }, { 0x16e584, 70, RI_E1H_ONLINE },
- { 0x1700a4, 1, RI_ALL_ONLINE }, { 0x1700ac, 2, RI_ALL_ONLINE },
- { 0x1700c0, 1, RI_ALL_ONLINE }, { 0x170174, 1, RI_ALL_ONLINE },
- { 0x170184, 1, RI_ALL_ONLINE }, { 0x1800f4, 1, RI_ALL_ONLINE },
- { 0x180104, 1, RI_ALL_ONLINE }, { 0x180114, 1, RI_ALL_ONLINE },
- { 0x180124, 1, RI_ALL_ONLINE }, { 0x18026c, 1, RI_ALL_ONLINE },
- { 0x1802a0, 1, RI_ALL_ONLINE }, { 0x1a1000, 1, RI_ALL_ONLINE },
- { 0x1aa000, 1, RI_E1H_ONLINE }, { 0x1b8000, 1, RI_ALL_ONLINE },
- { 0x1b8040, 1, RI_ALL_ONLINE }, { 0x1b8080, 1, RI_ALL_ONLINE },
- { 0x1b80c0, 1, RI_ALL_ONLINE }, { 0x200104, 1, RI_ALL_ONLINE },
- { 0x200114, 1, RI_ALL_ONLINE }, { 0x200124, 1, RI_ALL_ONLINE },
- { 0x200134, 1, RI_ALL_ONLINE }, { 0x20026c, 1, RI_ALL_ONLINE },
- { 0x2002a0, 1, RI_ALL_ONLINE }, { 0x221000, 1, RI_ALL_ONLINE },
- { 0x227000, 1, RI_E1H_ONLINE }, { 0x238000, 1, RI_ALL_ONLINE },
- { 0x238040, 1, RI_ALL_ONLINE }, { 0x238080, 1, RI_ALL_ONLINE },
- { 0x2380c0, 1, RI_ALL_ONLINE }, { 0x280104, 1, RI_ALL_ONLINE },
- { 0x280114, 1, RI_ALL_ONLINE }, { 0x280124, 1, RI_ALL_ONLINE },
- { 0x280134, 1, RI_ALL_ONLINE }, { 0x28026c, 1, RI_ALL_ONLINE },
- { 0x2802a0, 1, RI_ALL_ONLINE }, { 0x2a1000, 1, RI_ALL_ONLINE },
- { 0x2a9000, 1, RI_E1H_ONLINE }, { 0x2b8000, 1, RI_ALL_ONLINE },
- { 0x2b8040, 1, RI_ALL_ONLINE }, { 0x2b8080, 1, RI_ALL_ONLINE },
- { 0x2b80c0, 1, RI_ALL_ONLINE }, { 0x300104, 1, RI_ALL_ONLINE },
+ { 0x16e404, 2, RI_E1H_ONLINE }, { 0x16e584, 64, RI_E1H_ONLINE },
+ { 0x16e684, 2, RI_E1HE2_ONLINE }, { 0x16e68c, 4, RI_E1H_ONLINE },
+ { 0x16e6fc, 4, RI_E2_ONLINE }, { 0x1700a4, 1, RI_ALL_ONLINE },
+ { 0x1700ac, 2, RI_ALL_ONLINE }, { 0x1700c0, 1, RI_ALL_ONLINE },
+ { 0x170174, 1, RI_ALL_ONLINE }, { 0x170184, 1, RI_ALL_ONLINE },
+ { 0x1800f4, 1, RI_ALL_ONLINE }, { 0x180104, 1, RI_ALL_ONLINE },
+ { 0x180114, 1, RI_ALL_ONLINE }, { 0x180124, 1, RI_ALL_ONLINE },
+ { 0x18026c, 1, RI_ALL_ONLINE }, { 0x1802a0, 1, RI_ALL_ONLINE },
+ { 0x1b8000, 1, RI_ALL_ONLINE }, { 0x1b8040, 1, RI_ALL_ONLINE },
+ { 0x1b8080, 1, RI_ALL_ONLINE }, { 0x1b80c0, 1, RI_ALL_ONLINE },
+ { 0x200104, 1, RI_ALL_ONLINE }, { 0x200114, 1, RI_ALL_ONLINE },
+ { 0x200124, 1, RI_ALL_ONLINE }, { 0x200134, 1, RI_ALL_ONLINE },
+ { 0x20026c, 1, RI_ALL_ONLINE }, { 0x2002a0, 1, RI_ALL_ONLINE },
+ { 0x238000, 1, RI_ALL_ONLINE }, { 0x238040, 1, RI_ALL_ONLINE },
+ { 0x238080, 1, RI_ALL_ONLINE }, { 0x2380c0, 1, RI_ALL_ONLINE },
+ { 0x280104, 1, RI_ALL_ONLINE }, { 0x280114, 1, RI_ALL_ONLINE },
+ { 0x280124, 1, RI_ALL_ONLINE }, { 0x280134, 1, RI_ALL_ONLINE },
+ { 0x28026c, 1, RI_ALL_ONLINE }, { 0x2802a0, 1, RI_ALL_ONLINE },
+ { 0x2b8000, 1, RI_ALL_ONLINE }, { 0x2b8040, 1, RI_ALL_ONLINE },
+ { 0x2b8080, 1, RI_ALL_ONLINE }, { 0x300104, 1, RI_ALL_ONLINE },
{ 0x300114, 1, RI_ALL_ONLINE }, { 0x300124, 1, RI_ALL_ONLINE },
{ 0x300134, 1, RI_ALL_ONLINE }, { 0x30026c, 1, RI_ALL_ONLINE },
- { 0x3002a0, 1, RI_ALL_ONLINE }, { 0x321000, 1, RI_ALL_ONLINE },
- { 0x328960, 1, RI_E1H_ONLINE }, { 0x338000, 1, RI_ALL_ONLINE },
+ { 0x3002a0, 1, RI_ALL_ONLINE }, { 0x338000, 1, RI_ALL_ONLINE },
{ 0x338040, 1, RI_ALL_ONLINE }, { 0x338080, 1, RI_ALL_ONLINE },
{ 0x3380c0, 1, RI_ALL_ONLINE }
};
@@ -515,7 +635,6 @@ static const struct wreg_addr wreg_addrs_e1[WREGS_COUNT_E1] = {
{ 0x1b0c00, 192, 1, read_reg_e1_0, RI_E1_OFFLINE }
};
-
#define WREGS_COUNT_E1H 1
static const u32 read_reg_e1h_0[] = { 0x1b1040, 0x1b1000 };
@@ -530,22 +649,53 @@ static const struct wreg_addr wreg_addrs_e2[WREGS_COUNT_E2] = {
{ 0x1b0c00, 128, 2, read_reg_e2_0, RI_E2_OFFLINE }
};
-static const struct dump_sign dump_sign_all = { 0x49aa93ee, 0x40835, 0x22 };
-
+static const struct dump_sign dump_sign_all = { 0x4d18b0a4, 0x60010, 0x3a };
#define TIMER_REGS_COUNT_E1 2
-static const u32 timer_status_regs_e1[TIMER_REGS_COUNT_E1] =
- { 0x164014, 0x164018 };
-static const u32 timer_scan_regs_e1[TIMER_REGS_COUNT_E1] =
- { 0x1640d0, 0x1640d4 };
+static const u32 timer_status_regs_e1[TIMER_REGS_COUNT_E1] = {
+ 0x164014, 0x164018 };
+static const u32 timer_scan_regs_e1[TIMER_REGS_COUNT_E1] = {
+ 0x1640d0, 0x1640d4 };
#define TIMER_REGS_COUNT_E1H 2
-static const u32 timer_status_regs_e1h[TIMER_REGS_COUNT_E1H] =
- { 0x164014, 0x164018 };
-static const u32 timer_scan_regs_e1h[TIMER_REGS_COUNT_E1H] =
- { 0x1640d0, 0x1640d4 };
+static const u32 timer_status_regs_e1h[TIMER_REGS_COUNT_E1H] = {
+ 0x164014, 0x164018 };
+static const u32 timer_scan_regs_e1h[TIMER_REGS_COUNT_E1H] = {
+ 0x1640d0, 0x1640d4 };
+
+#define TIMER_REGS_COUNT_E2 2
+
+static const u32 timer_status_regs_e2[TIMER_REGS_COUNT_E2] = {
+ 0x164014, 0x164018 };
+static const u32 timer_scan_regs_e2[TIMER_REGS_COUNT_E2] = {
+ 0x1640d0, 0x1640d4 };
+
+#define PAGE_MODE_VALUES_E1 0
+
+#define PAGE_READ_REGS_E1 0
+
+#define PAGE_WRITE_REGS_E1 0
+
+static const u32 page_vals_e1[] = { 0 };
+
+static const u32 page_write_regs_e1[] = { 0 };
+
+static const struct reg_addr page_read_regs_e1[] = { { 0x0, 0, RI_E1_ONLINE } };
+
+#define PAGE_MODE_VALUES_E1H 0
+
+#define PAGE_READ_REGS_E1H 0
+
+#define PAGE_WRITE_REGS_E1H 0
+
+static const u32 page_vals_e1h[] = { 0 };
+
+static const u32 page_write_regs_e1h[] = { 0 };
+
+static const struct reg_addr page_read_regs_e1h[] = {
+ { 0x0, 0, RI_E1H_ONLINE } };
#define PAGE_MODE_VALUES_E2 2
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index 99c672d..5b44a8b 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -24,6 +24,7 @@
#include "bnx2x.h"
#include "bnx2x_cmn.h"
#include "bnx2x_dump.h"
+#include "bnx2x_init.h"
/* Note: in the format strings below %s is replaced by the queue-name which is
* either its index or 'fcoe' for the fcoe queue. Make sure the format string
@@ -472,7 +473,7 @@ static int bnx2x_get_regs_len(struct net_device *dev)
{
struct bnx2x *bp = netdev_priv(dev);
int regdump_len = 0;
- int i;
+ int i, j, k;
if (CHIP_IS_E1(bp)) {
for (i = 0; i < REGS_COUNT; i++)
@@ -502,6 +503,15 @@ static int bnx2x_get_regs_len(struct net_device *dev)
if (IS_E2_ONLINE(wreg_addrs_e2[i].info))
regdump_len += wreg_addrs_e2[i].size *
(1 + wreg_addrs_e2[i].read_regs_count);
+
+ for (i = 0; i < PAGE_MODE_VALUES_E2; i++)
+ for (j = 0; j < PAGE_WRITE_REGS_E2; j++) {
+ for (k = 0; k < PAGE_READ_REGS_E2; k++)
+ if (IS_E2_ONLINE(page_read_regs_e2[k].
+ info))
+ regdump_len +=
+ page_read_regs_e2[k].size;
+ }
}
regdump_len *= 4;
regdump_len += sizeof(struct dump_hdr);
@@ -539,6 +549,12 @@ static void bnx2x_get_regs(struct net_device *dev,
if (!netif_running(bp->dev))
return;
+ /* Disable parity attentions as long as following dump may
+ * cause false alarms by reading never written registers. We
+ * will re-enable parity attentions right after the dump.
+ */
+ bnx2x_disable_blocks_parity(bp);
+
dump_hdr.hdr_size = (sizeof(struct dump_hdr) / 4) - 1;
dump_hdr.dump_sign = dump_sign_all;
dump_hdr.xstorm_waitp = REG_RD(bp, XSTORM_WAITP_ADDR);
@@ -580,6 +596,10 @@ static void bnx2x_get_regs(struct net_device *dev,
bnx2x_read_pages_regs_e2(bp, p);
}
+ /* Re-enable parity attentions */
+ bnx2x_clear_blocks_parity(bp);
+ if (CHIP_PARITY_ENABLED(bp))
+ bnx2x_enable_blocks_parity(bp);
}
#define PHY_FW_VER_LEN 20
diff --git a/drivers/net/bnx2x/bnx2x_init.h b/drivers/net/bnx2x/bnx2x_init.h
index a9d5487..5a268e9 100644
--- a/drivers/net/bnx2x/bnx2x_init.h
+++ b/drivers/net/bnx2x/bnx2x_init.h
@@ -192,5 +192,225 @@ struct src_ent {
u64 next;
};
+/****************************************************************************
+* Parity configuration
+****************************************************************************/
+#define BLOCK_PRTY_INFO(block, en_mask, m1, m1h, m2) \
+{ \
+ block##_REG_##block##_PRTY_MASK, \
+ block##_REG_##block##_PRTY_STS_CLR, \
+ en_mask, {m1, m1h, m2}, #block \
+}
+
+#define BLOCK_PRTY_INFO_0(block, en_mask, m1, m1h, m2) \
+{ \
+ block##_REG_##block##_PRTY_MASK_0, \
+ block##_REG_##block##_PRTY_STS_CLR_0, \
+ en_mask, {m1, m1h, m2}, #block"_0" \
+}
+
+#define BLOCK_PRTY_INFO_1(block, en_mask, m1, m1h, m2) \
+{ \
+ block##_REG_##block##_PRTY_MASK_1, \
+ block##_REG_##block##_PRTY_STS_CLR_1, \
+ en_mask, {m1, m1h, m2}, #block"_1" \
+}
+
+static const struct {
+ u32 mask_addr;
+ u32 sts_clr_addr;
+ u32 en_mask; /* Mask to enable parity attentions */
+ struct {
+ u32 e1; /* 57710 */
+ u32 e1h; /* 57711 */
+ u32 e2; /* 57712 */
+ } reg_mask; /* Register mask (all valid bits) */
+ char name[7]; /* Block's longest name is 6 characters long
+ * (name + suffix)
+ */
+} bnx2x_blocks_parity_data[] = {
+ /* bit 19 masked */
+ /* REG_WR(bp, PXP_REG_PXP_PRTY_MASK, 0x80000); */
+ /* bit 5,18,20-31 */
+ /* REG_WR(bp, PXP2_REG_PXP2_PRTY_MASK_0, 0xfff40020); */
+ /* bit 5 */
+ /* REG_WR(bp, PXP2_REG_PXP2_PRTY_MASK_1, 0x20); */
+ /* REG_WR(bp, HC_REG_HC_PRTY_MASK, 0x0); */
+ /* REG_WR(bp, MISC_REG_MISC_PRTY_MASK, 0x0); */
+
+ /* Block IGU, MISC, PXP and PXP2 parity errors as long as we don't
+ * want to handle "system kill" flow at the moment.
+ */
+ BLOCK_PRTY_INFO(PXP, 0x3ffffff, 0x3ffffff, 0x3ffffff, 0x3ffffff),
+ BLOCK_PRTY_INFO_0(PXP2, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
+ BLOCK_PRTY_INFO_1(PXP2, 0x7ff, 0x7f, 0x7f, 0x7ff),
+ BLOCK_PRTY_INFO(HC, 0x7, 0x7, 0x7, 0),
+ BLOCK_PRTY_INFO(IGU, 0x7ff, 0, 0, 0x7ff),
+ BLOCK_PRTY_INFO(MISC, 0x1, 0x1, 0x1, 0x1),
+ BLOCK_PRTY_INFO(QM, 0, 0x1ff, 0xfff, 0xfff),
+ BLOCK_PRTY_INFO(DORQ, 0, 0x3, 0x3, 0x3),
+ {GRCBASE_UPB + PB_REG_PB_PRTY_MASK,
+ GRCBASE_UPB + PB_REG_PB_PRTY_STS_CLR, 0,
+ {0xf, 0xf, 0xf}, "UPB"},
+ {GRCBASE_XPB + PB_REG_PB_PRTY_MASK,
+ GRCBASE_XPB + PB_REG_PB_PRTY_STS_CLR, 0,
+ {0xf, 0xf, 0xf}, "XPB"},
+ BLOCK_PRTY_INFO(SRC, 0x4, 0x7, 0x7, 0x7),
+ BLOCK_PRTY_INFO(CDU, 0, 0x1f, 0x1f, 0x1f),
+ BLOCK_PRTY_INFO(CFC, 0, 0xf, 0xf, 0xf),
+ BLOCK_PRTY_INFO(DBG, 0, 0x1, 0x1, 0x1),
+ BLOCK_PRTY_INFO(DMAE, 0, 0xf, 0xf, 0xf),
+ BLOCK_PRTY_INFO(BRB1, 0, 0xf, 0xf, 0xf),
+ BLOCK_PRTY_INFO(PRS, (1<<6), 0xff, 0xff, 0xff),
+ BLOCK_PRTY_INFO(TSDM, 0x18, 0x7ff, 0x7ff, 0x7ff),
+ BLOCK_PRTY_INFO(CSDM, 0x8, 0x7ff, 0x7ff, 0x7ff),
+ BLOCK_PRTY_INFO(USDM, 0x38, 0x7ff, 0x7ff, 0x7ff),
+ BLOCK_PRTY_INFO(XSDM, 0x8, 0x7ff, 0x7ff, 0x7ff),
+ BLOCK_PRTY_INFO_0(TSEM, 0, 0xffffffff, 0xffffffff, 0xffffffff),
+ BLOCK_PRTY_INFO_1(TSEM, 0, 0x3, 0x1f, 0x3f),
+ BLOCK_PRTY_INFO_0(USEM, 0, 0xffffffff, 0xffffffff, 0xffffffff),
+ BLOCK_PRTY_INFO_1(USEM, 0, 0x3, 0x1f, 0x1f),
+ BLOCK_PRTY_INFO_0(CSEM, 0, 0xffffffff, 0xffffffff, 0xffffffff),
+ BLOCK_PRTY_INFO_1(CSEM, 0, 0x3, 0x1f, 0x1f),
+ BLOCK_PRTY_INFO_0(XSEM, 0, 0xffffffff, 0xffffffff, 0xffffffff),
+ BLOCK_PRTY_INFO_1(XSEM, 0, 0x3, 0x1f, 0x3f),
+};
+
+
+/* [28] MCP Latched rom_parity
+ * [29] MCP Latched ump_rx_parity
+ * [30] MCP Latched ump_tx_parity
+ * [31] MCP Latched scpad_parity
+ */
+#define MISC_AEU_ENABLE_MCP_PRTY_BITS \
+ (AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY | \
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY | \
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY | \
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY)
+
+/* Below registers control the MCP parity attention output. When
+ * MISC_AEU_ENABLE_MCP_PRTY_BITS are set - attentions are
+ * enabled, when cleared - disabled.
+ */
+static const u32 mcp_attn_ctl_regs[] = {
+ MISC_REG_AEU_ENABLE4_FUNC_0_OUT_0,
+ MISC_REG_AEU_ENABLE4_NIG_0,
+ MISC_REG_AEU_ENABLE4_PXP_0,
+ MISC_REG_AEU_ENABLE4_FUNC_1_OUT_0,
+ MISC_REG_AEU_ENABLE4_NIG_1,
+ MISC_REG_AEU_ENABLE4_PXP_1
+};
+
+static inline void bnx2x_set_mcp_parity(struct bnx2x *bp, u8 enable)
+{
+ int i;
+ u32 reg_val;
+
+ for (i = 0; i < ARRAY_SIZE(mcp_attn_ctl_regs); i++) {
+ reg_val = REG_RD(bp, mcp_attn_ctl_regs[i]);
+
+ if (enable)
+ reg_val |= MISC_AEU_ENABLE_MCP_PRTY_BITS;
+ else
+ reg_val &= ~MISC_AEU_ENABLE_MCP_PRTY_BITS;
+
+ REG_WR(bp, mcp_attn_ctl_regs[i], reg_val);
+ }
+}
+
+static inline u32 bnx2x_parity_reg_mask(struct bnx2x *bp, int idx)
+{
+ if (CHIP_IS_E1(bp))
+ return bnx2x_blocks_parity_data[idx].reg_mask.e1;
+ else if (CHIP_IS_E1H(bp))
+ return bnx2x_blocks_parity_data[idx].reg_mask.e1h;
+ else
+ return bnx2x_blocks_parity_data[idx].reg_mask.e2;
+}
+
+static inline void bnx2x_disable_blocks_parity(struct bnx2x *bp)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bnx2x_blocks_parity_data); i++) {
+ u32 dis_mask = bnx2x_parity_reg_mask(bp, i);
+
+ if (dis_mask) {
+ REG_WR(bp, bnx2x_blocks_parity_data[i].mask_addr,
+ dis_mask);
+ DP(NETIF_MSG_HW, "Setting parity mask "
+ "for %s to\t\t0x%x\n",
+ bnx2x_blocks_parity_data[i].name, dis_mask);
+ }
+ }
+
+ /* Disable MCP parity attentions */
+ bnx2x_set_mcp_parity(bp, false);
+}
+
+/**
+ * Clear the parity error status registers.
+ */
+static inline void bnx2x_clear_blocks_parity(struct bnx2x *bp)
+{
+ int i;
+ u32 reg_val, mcp_aeu_bits =
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY |
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY |
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY |
+ AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY;
+
+ /* Clear SEM_FAST parities */
+ REG_WR(bp, XSEM_REG_FAST_MEMORY + SEM_FAST_REG_PARITY_RST, 0x1);
+ REG_WR(bp, TSEM_REG_FAST_MEMORY + SEM_FAST_REG_PARITY_RST, 0x1);
+ REG_WR(bp, USEM_REG_FAST_MEMORY + SEM_FAST_REG_PARITY_RST, 0x1);
+ REG_WR(bp, CSEM_REG_FAST_MEMORY + SEM_FAST_REG_PARITY_RST, 0x1);
+
+ for (i = 0; i < ARRAY_SIZE(bnx2x_blocks_parity_data); i++) {
+ u32 reg_mask = bnx2x_parity_reg_mask(bp, i);
+
+ if (reg_mask) {
+ reg_val = REG_RD(bp, bnx2x_blocks_parity_data[i].
+ sts_clr_addr);
+ if (reg_val & reg_mask)
+ DP(NETIF_MSG_HW,
+ "Parity errors in %s: 0x%x\n",
+ bnx2x_blocks_parity_data[i].name,
+ reg_val & reg_mask);
+ }
+ }
+
+ /* Check if there were parity attentions in MCP */
+ reg_val = REG_RD(bp, MISC_REG_AEU_AFTER_INVERT_4_MCP);
+ if (reg_val & mcp_aeu_bits)
+ DP(NETIF_MSG_HW, "Parity error in MCP: 0x%x\n",
+ reg_val & mcp_aeu_bits);
+
+ /* Clear parity attentions in MCP:
+ * [7] clears Latched rom_parity
+ * [8] clears Latched ump_rx_parity
+ * [9] clears Latched ump_tx_parity
+ * [10] clears Latched scpad_parity (both ports)
+ */
+ REG_WR(bp, MISC_REG_AEU_CLR_LATCH_SIGNAL, 0x780);
+}
+
+static inline void bnx2x_enable_blocks_parity(struct bnx2x *bp)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bnx2x_blocks_parity_data); i++) {
+ u32 reg_mask = bnx2x_parity_reg_mask(bp, i);
+
+ if (reg_mask)
+ REG_WR(bp, bnx2x_blocks_parity_data[i].mask_addr,
+ bnx2x_blocks_parity_data[i].en_mask & reg_mask);
+ }
+
+ /* Enable MCP parity attentions */
+ bnx2x_set_mcp_parity(bp, true);
+}
+
+
#endif /* BNX2X_INIT_H */
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index 489a5512..84e1af4 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -3152,7 +3152,6 @@ static inline void bnx2x_attn_int_deasserted3(struct bnx2x *bp, u32 attn)
#define LOAD_COUNTER_MASK (((u32)0x1 << LOAD_COUNTER_BITS) - 1)
#define RESET_DONE_FLAG_MASK (~LOAD_COUNTER_MASK)
#define RESET_DONE_FLAG_SHIFT LOAD_COUNTER_BITS
-#define CHIP_PARITY_SUPPORTED(bp) (CHIP_IS_E1(bp) || CHIP_IS_E1H(bp))
/*
* should be run under rtnl lock
@@ -3527,7 +3526,7 @@ static void bnx2x_attn_int_deasserted(struct bnx2x *bp, u32 deasserted)
try to handle this event */
bnx2x_acquire_alr(bp);
- if (bnx2x_chk_parity_attn(bp)) {
+ if (CHIP_PARITY_ENABLED(bp) && bnx2x_chk_parity_attn(bp)) {
bp->recovery_state = BNX2X_RECOVERY_INIT;
bnx2x_set_reset_in_progress(bp);
schedule_delayed_work(&bp->reset_task, 0);
@@ -4754,7 +4753,7 @@ static int bnx2x_int_mem_test(struct bnx2x *bp)
return 0; /* OK */
}
-static void enable_blocks_attention(struct bnx2x *bp)
+static void bnx2x_enable_blocks_attention(struct bnx2x *bp)
{
REG_WR(bp, PXP_REG_PXP_INT_MASK_0, 0);
if (CHIP_IS_E2(bp))
@@ -4808,53 +4807,9 @@ static void enable_blocks_attention(struct bnx2x *bp)
REG_WR(bp, CDU_REG_CDU_INT_MASK, 0);
REG_WR(bp, DMAE_REG_DMAE_INT_MASK, 0);
/* REG_WR(bp, MISC_REG_MISC_INT_MASK, 0); */
- REG_WR(bp, PBF_REG_PBF_INT_MASK, 0X18); /* bit 3,4 masked */
+ REG_WR(bp, PBF_REG_PBF_INT_MASK, 0x18); /* bit 3,4 masked */
}
-static const struct {
- u32 addr;
- u32 mask;
-} bnx2x_parity_mask[] = {
- {PXP_REG_PXP_PRTY_MASK, 0x3ffffff},
- {PXP2_REG_PXP2_PRTY_MASK_0, 0xffffffff},
- {PXP2_REG_PXP2_PRTY_MASK_1, 0x7f},
- {HC_REG_HC_PRTY_MASK, 0x7},
- {MISC_REG_MISC_PRTY_MASK, 0x1},
- {QM_REG_QM_PRTY_MASK, 0x0},
- {DORQ_REG_DORQ_PRTY_MASK, 0x0},
- {GRCBASE_UPB + PB_REG_PB_PRTY_MASK, 0x0},
- {GRCBASE_XPB + PB_REG_PB_PRTY_MASK, 0x0},
- {SRC_REG_SRC_PRTY_MASK, 0x4}, /* bit 2 */
- {CDU_REG_CDU_PRTY_MASK, 0x0},
- {CFC_REG_CFC_PRTY_MASK, 0x0},
- {DBG_REG_DBG_PRTY_MASK, 0x0},
- {DMAE_REG_DMAE_PRTY_MASK, 0x0},
- {BRB1_REG_BRB1_PRTY_MASK, 0x0},
- {PRS_REG_PRS_PRTY_MASK, (1<<6)},/* bit 6 */
- {TSDM_REG_TSDM_PRTY_MASK, 0x18}, /* bit 3,4 */
- {CSDM_REG_CSDM_PRTY_MASK, 0x8}, /* bit 3 */
- {USDM_REG_USDM_PRTY_MASK, 0x38}, /* bit 3,4,5 */
- {XSDM_REG_XSDM_PRTY_MASK, 0x8}, /* bit 3 */
- {TSEM_REG_TSEM_PRTY_MASK_0, 0x0},
- {TSEM_REG_TSEM_PRTY_MASK_1, 0x0},
- {USEM_REG_USEM_PRTY_MASK_0, 0x0},
- {USEM_REG_USEM_PRTY_MASK_1, 0x0},
- {CSEM_REG_CSEM_PRTY_MASK_0, 0x0},
- {CSEM_REG_CSEM_PRTY_MASK_1, 0x0},
- {XSEM_REG_XSEM_PRTY_MASK_0, 0x0},
- {XSEM_REG_XSEM_PRTY_MASK_1, 0x0}
-};
-
-static void enable_blocks_parity(struct bnx2x *bp)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(bnx2x_parity_mask); i++)
- REG_WR(bp, bnx2x_parity_mask[i].addr,
- bnx2x_parity_mask[i].mask);
-}
-
-
static void bnx2x_reset_common(struct bnx2x *bp)
{
/* reset_common */
@@ -5350,9 +5305,9 @@ static int bnx2x_init_hw_common(struct bnx2x *bp, u32 load_code)
/* clear PXP2 attentions */
REG_RD(bp, PXP2_REG_PXP2_INT_STS_CLR_0);
- enable_blocks_attention(bp);
- if (CHIP_PARITY_SUPPORTED(bp))
- enable_blocks_parity(bp);
+ bnx2x_enable_blocks_attention(bp);
+ if (CHIP_PARITY_ENABLED(bp))
+ bnx2x_enable_blocks_parity(bp);
if (!BP_NOMCP(bp)) {
/* In E2 2-PORT mode, same ext phy is used for the two paths */
@@ -8751,13 +8706,6 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
dev_err(&bp->pdev->dev, "MCP disabled, "
"must load devices in order!\n");
- /* Set multi queue mode */
- if ((multi_mode != ETH_RSS_MODE_DISABLED) &&
- ((int_mode == INT_MODE_INTx) || (int_mode == INT_MODE_MSI))) {
- dev_err(&bp->pdev->dev, "Multi disabled since int_mode "
- "requested is not MSI-X\n");
- multi_mode = ETH_RSS_MODE_DISABLED;
- }
bp->multi_mode = multi_mode;
bp->int_mode = int_mode;
@@ -9560,9 +9508,15 @@ static void __devexit bnx2x_remove_one(struct pci_dev *pdev)
/* Delete all NAPI objects */
bnx2x_del_all_napi(bp);
+ /* Power on: we can't let PCI layer write to us while we are in D3 */
+ bnx2x_set_power_state(bp, PCI_D0);
+
/* Disable MSI/MSI-X */
bnx2x_disable_msi(bp);
+ /* Power off */
+ bnx2x_set_power_state(bp, PCI_D3hot);
+
/* Make sure RESET task is not scheduled before continuing */
cancel_delayed_work_sync(&bp->reset_task);
diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h
index bfd875b..38ef7ca 100644
--- a/drivers/net/bnx2x/bnx2x_reg.h
+++ b/drivers/net/bnx2x/bnx2x_reg.h
@@ -18,6 +18,8 @@
* WR - Write Clear (write 1 to clear the bit)
*
*/
+#ifndef BNX2X_REG_H
+#define BNX2X_REG_H
#define ATC_ATC_INT_STS_REG_ADDRESS_ERROR (0x1<<0)
#define ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS (0x1<<2)
@@ -39,6 +41,8 @@
#define BRB1_REG_BRB1_PRTY_MASK 0x60138
/* [R 4] Parity register #0 read */
#define BRB1_REG_BRB1_PRTY_STS 0x6012c
+/* [RC 4] Parity register #0 read clear */
+#define BRB1_REG_BRB1_PRTY_STS_CLR 0x60130
/* [RW 10] At address BRB1_IND_FREE_LIST_PRS_CRDT initialize free head. At
* address BRB1_IND_FREE_LIST_PRS_CRDT+1 initialize free tail. At address
* BRB1_IND_FREE_LIST_PRS_CRDT+2 initialize parser initial credit. Warning -
@@ -132,8 +136,12 @@
#define CCM_REG_CCM_INT_MASK 0xd01e4
/* [R 11] Interrupt register #0 read */
#define CCM_REG_CCM_INT_STS 0xd01d8
+/* [RW 27] Parity mask register #0 read/write */
+#define CCM_REG_CCM_PRTY_MASK 0xd01f4
/* [R 27] Parity register #0 read */
#define CCM_REG_CCM_PRTY_STS 0xd01e8
+/* [RC 27] Parity register #0 read clear */
+#define CCM_REG_CCM_PRTY_STS_CLR 0xd01ec
/* [RW 3] The size of AG context region 0 in REG-pairs. Designates the MS
REG-pair number (e.g. if region 0 is 6 REG-pairs; the value should be 5).
Is used to determine the number of the AG context REG-pairs written back;
@@ -350,6 +358,8 @@
#define CDU_REG_CDU_PRTY_MASK 0x10104c
/* [R 5] Parity register #0 read */
#define CDU_REG_CDU_PRTY_STS 0x101040
+/* [RC 5] Parity register #0 read clear */
+#define CDU_REG_CDU_PRTY_STS_CLR 0x101044
/* [RC 32] logging of error data in case of a CDU load error:
{expected_cid[15:0]; xpected_type[2:0]; xpected_region[2:0]; ctive_error;
ype_error; ctual_active; ctual_compressed_context}; */
@@ -381,6 +391,8 @@
#define CFC_REG_CFC_PRTY_MASK 0x104118
/* [R 4] Parity register #0 read */
#define CFC_REG_CFC_PRTY_STS 0x10410c
+/* [RC 4] Parity register #0 read clear */
+#define CFC_REG_CFC_PRTY_STS_CLR 0x104110
/* [RW 21] CID cam access (21:1 - Data; alid - 0) */
#define CFC_REG_CID_CAM 0x104800
#define CFC_REG_CONTROL0 0x104028
@@ -466,6 +478,8 @@
#define CSDM_REG_CSDM_PRTY_MASK 0xc22bc
/* [R 11] Parity register #0 read */
#define CSDM_REG_CSDM_PRTY_STS 0xc22b0
+/* [RC 11] Parity register #0 read clear */
+#define CSDM_REG_CSDM_PRTY_STS_CLR 0xc22b4
#define CSDM_REG_ENABLE_IN1 0xc2238
#define CSDM_REG_ENABLE_IN2 0xc223c
#define CSDM_REG_ENABLE_OUT1 0xc2240
@@ -556,6 +570,9 @@
/* [R 32] Parity register #0 read */
#define CSEM_REG_CSEM_PRTY_STS_0 0x200124
#define CSEM_REG_CSEM_PRTY_STS_1 0x200134
+/* [RC 32] Parity register #0 read clear */
+#define CSEM_REG_CSEM_PRTY_STS_CLR_0 0x200128
+#define CSEM_REG_CSEM_PRTY_STS_CLR_1 0x200138
#define CSEM_REG_ENABLE_IN 0x2000a4
#define CSEM_REG_ENABLE_OUT 0x2000a8
/* [RW 32] This address space contains all registers and memories that are
@@ -648,6 +665,8 @@
#define DBG_REG_DBG_PRTY_MASK 0xc0a8
/* [R 1] Parity register #0 read */
#define DBG_REG_DBG_PRTY_STS 0xc09c
+/* [RC 1] Parity register #0 read clear */
+#define DBG_REG_DBG_PRTY_STS_CLR 0xc0a0
/* [RW 1] When set the DMAE will process the commands as in E1.5. 1.The
* function that is used is always SRC-PCI; 2.VF_Valid = 0; 3.VFID=0;
* 4.Completion function=0; 5.Error handling=0 */
@@ -668,6 +687,8 @@
#define DMAE_REG_DMAE_PRTY_MASK 0x102064
/* [R 4] Parity register #0 read */
#define DMAE_REG_DMAE_PRTY_STS 0x102058
+/* [RC 4] Parity register #0 read clear */
+#define DMAE_REG_DMAE_PRTY_STS_CLR 0x10205c
/* [RW 1] Command 0 go. */
#define DMAE_REG_GO_C0 0x102080
/* [RW 1] Command 1 go. */
@@ -734,6 +755,8 @@
#define DORQ_REG_DORQ_PRTY_MASK 0x170190
/* [R 2] Parity register #0 read */
#define DORQ_REG_DORQ_PRTY_STS 0x170184
+/* [RC 2] Parity register #0 read clear */
+#define DORQ_REG_DORQ_PRTY_STS_CLR 0x170188
/* [RW 8] The address to write the DPM CID to STORM. */
#define DORQ_REG_DPM_CID_ADDR 0x170044
/* [RW 5] The DPM mode CID extraction offset. */
@@ -842,8 +865,12 @@
/* [R 1] data availble for error memory. If this bit is clear do not red
* from error_handling_memory. */
#define IGU_REG_ERROR_HANDLING_DATA_VALID 0x130130
+/* [RW 11] Parity mask register #0 read/write */
+#define IGU_REG_IGU_PRTY_MASK 0x1300a8
/* [R 11] Parity register #0 read */
#define IGU_REG_IGU_PRTY_STS 0x13009c
+/* [RC 11] Parity register #0 read clear */
+#define IGU_REG_IGU_PRTY_STS_CLR 0x1300a0
/* [R 4] Debug: int_handle_fsm */
#define IGU_REG_INT_HANDLE_FSM 0x130050
#define IGU_REG_LEADING_EDGE_LATCH 0x130134
@@ -1501,6 +1528,8 @@
#define MISC_REG_MISC_PRTY_MASK 0xa398
/* [R 1] Parity register #0 read */
#define MISC_REG_MISC_PRTY_STS 0xa38c
+/* [RC 1] Parity register #0 read clear */
+#define MISC_REG_MISC_PRTY_STS_CLR 0xa390
#define MISC_REG_NIG_WOL_P0 0xa270
#define MISC_REG_NIG_WOL_P1 0xa274
/* [R 1] If set indicate that the pcie_rst_b was asserted without perst
@@ -2082,6 +2111,10 @@
#define PBF_REG_PBF_INT_MASK 0x1401d4
/* [R 5] Interrupt register #0 read */
#define PBF_REG_PBF_INT_STS 0x1401c8
+/* [RW 20] Parity mask register #0 read/write */
+#define PBF_REG_PBF_PRTY_MASK 0x1401e4
+/* [RC 20] Parity register #0 read clear */
+#define PBF_REG_PBF_PRTY_STS_CLR 0x1401dc
#define PB_REG_CONTROL 0
/* [RW 2] Interrupt mask register #0 read/write */
#define PB_REG_PB_INT_MASK 0x28
@@ -2091,6 +2124,8 @@
#define PB_REG_PB_PRTY_MASK 0x38
/* [R 4] Parity register #0 read */
#define PB_REG_PB_PRTY_STS 0x2c
+/* [RC 4] Parity register #0 read clear */
+#define PB_REG_PB_PRTY_STS_CLR 0x30
#define PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR (0x1<<0)
#define PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW (0x1<<8)
#define PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR (0x1<<1)
@@ -2446,6 +2481,8 @@
#define PRS_REG_PRS_PRTY_MASK 0x401a4
/* [R 8] Parity register #0 read */
#define PRS_REG_PRS_PRTY_STS 0x40198
+/* [RC 8] Parity register #0 read clear */
+#define PRS_REG_PRS_PRTY_STS_CLR 0x4019c
/* [RW 8] Context region for pure acknowledge packets. Used in CFC load
request message */
#define PRS_REG_PURE_REGIONS 0x40024
@@ -2599,6 +2636,9 @@
/* [R 32] Parity register #0 read */
#define PXP2_REG_PXP2_PRTY_STS_0 0x12057c
#define PXP2_REG_PXP2_PRTY_STS_1 0x12058c
+/* [RC 32] Parity register #0 read clear */
+#define PXP2_REG_PXP2_PRTY_STS_CLR_0 0x120580
+#define PXP2_REG_PXP2_PRTY_STS_CLR_1 0x120590
/* [R 1] Debug only: The 'almost full' indication from each fifo (gives
indication about backpressure) */
#define PXP2_REG_RD_ALMOST_FULL_0 0x120424
@@ -3001,6 +3041,8 @@
#define PXP_REG_PXP_PRTY_MASK 0x103094
/* [R 26] Parity register #0 read */
#define PXP_REG_PXP_PRTY_STS 0x103088
+/* [RC 27] Parity register #0 read clear */
+#define PXP_REG_PXP_PRTY_STS_CLR 0x10308c
/* [RW 4] The activity counter initial increment value sent in the load
request */
#define QM_REG_ACTCTRINITVAL_0 0x168040
@@ -3157,6 +3199,8 @@
#define QM_REG_QM_PRTY_MASK 0x168454
/* [R 12] Parity register #0 read */
#define QM_REG_QM_PRTY_STS 0x168448
+/* [RC 12] Parity register #0 read clear */
+#define QM_REG_QM_PRTY_STS_CLR 0x16844c
/* [R 32] Current queues in pipeline: Queues from 32 to 63 */
#define QM_REG_QSTATUS_HIGH 0x16802c
/* [R 32] Current queues in pipeline: Queues from 96 to 127 */
@@ -3442,6 +3486,8 @@
#define QM_REG_WRRWEIGHTS_9 0x168848
/* [R 6] Keep the fill level of the fifo from write client 1 */
#define QM_REG_XQM_WRC_FIFOLVL 0x168000
+/* [W 1] reset to parity interrupt */
+#define SEM_FAST_REG_PARITY_RST 0x18840
#define SRC_REG_COUNTFREE0 0x40500
/* [RW 1] If clr the searcher is compatible to E1 A0 - support only two
ports. If set the searcher support 8 functions. */
@@ -3470,6 +3516,8 @@
#define SRC_REG_SRC_PRTY_MASK 0x404c8
/* [R 3] Parity register #0 read */
#define SRC_REG_SRC_PRTY_STS 0x404bc
+/* [RC 3] Parity register #0 read clear */
+#define SRC_REG_SRC_PRTY_STS_CLR 0x404c0
/* [R 4] Used to read the value of the XX protection CAM occupancy counter. */
#define TCM_REG_CAM_OCCUP 0x5017c
/* [RW 1] CDU AG read Interface enable. If 0 - the request input is
@@ -3596,8 +3644,12 @@
#define TCM_REG_TCM_INT_MASK 0x501dc
/* [R 11] Interrupt register #0 read */
#define TCM_REG_TCM_INT_STS 0x501d0
+/* [RW 27] Parity mask register #0 read/write */
+#define TCM_REG_TCM_PRTY_MASK 0x501ec
/* [R 27] Parity register #0 read */
#define TCM_REG_TCM_PRTY_STS 0x501e0
+/* [RC 27] Parity register #0 read clear */
+#define TCM_REG_TCM_PRTY_STS_CLR 0x501e4
/* [RW 3] The size of AG context region 0 in REG-pairs. Designates the MS
REG-pair number (e.g. if region 0 is 6 REG-pairs; the value should be 5).
Is used to determine the number of the AG context REG-pairs written back;
@@ -3755,6 +3807,10 @@
#define TM_REG_TM_INT_MASK 0x1640fc
/* [R 1] Interrupt register #0 read */
#define TM_REG_TM_INT_STS 0x1640f0
+/* [RW 7] Parity mask register #0 read/write */
+#define TM_REG_TM_PRTY_MASK 0x16410c
+/* [RC 7] Parity register #0 read clear */
+#define TM_REG_TM_PRTY_STS_CLR 0x164104
/* [RW 8] The event id for aggregated interrupt 0 */
#define TSDM_REG_AGG_INT_EVENT_0 0x42038
#define TSDM_REG_AGG_INT_EVENT_1 0x4203c
@@ -3835,6 +3891,8 @@
#define TSDM_REG_TSDM_PRTY_MASK 0x422bc
/* [R 11] Parity register #0 read */
#define TSDM_REG_TSDM_PRTY_STS 0x422b0
+/* [RC 11] Parity register #0 read clear */
+#define TSDM_REG_TSDM_PRTY_STS_CLR 0x422b4
/* [RW 5] The number of time_slots in the arbitration cycle */
#define TSEM_REG_ARB_CYCLE_SIZE 0x180034
/* [RW 3] The source that is associated with arbitration element 0. Source
@@ -3914,6 +3972,9 @@
#define TSEM_REG_SLOW_EXT_STORE_EMPTY 0x1802a0
/* [RW 8] List of free threads . There is a bit per thread. */
#define TSEM_REG_THREADS_LIST 0x1802e4
+/* [RC 32] Parity register #0 read clear */
+#define TSEM_REG_TSEM_PRTY_STS_CLR_0 0x180118
+#define TSEM_REG_TSEM_PRTY_STS_CLR_1 0x180128
/* [RW 3] The arbitration scheme of time_slot 0 */
#define TSEM_REG_TS_0_AS 0x180038
/* [RW 3] The arbitration scheme of time_slot 10 */
@@ -4116,6 +4177,8 @@
#define UCM_REG_UCM_INT_STS 0xe01c8
/* [R 27] Parity register #0 read */
#define UCM_REG_UCM_PRTY_STS 0xe01d8
+/* [RC 27] Parity register #0 read clear */
+#define UCM_REG_UCM_PRTY_STS_CLR 0xe01dc
/* [RW 2] The size of AG context region 0 in REG-pairs. Designates the MS
REG-pair number (e.g. if region 0 is 6 REG-pairs; the value should be 5).
Is used to determine the number of the AG context REG-pairs written back;
@@ -4292,6 +4355,8 @@
#define USDM_REG_USDM_PRTY_MASK 0xc42c0
/* [R 11] Parity register #0 read */
#define USDM_REG_USDM_PRTY_STS 0xc42b4
+/* [RC 11] Parity register #0 read clear */
+#define USDM_REG_USDM_PRTY_STS_CLR 0xc42b8
/* [RW 5] The number of time_slots in the arbitration cycle */
#define USEM_REG_ARB_CYCLE_SIZE 0x300034
/* [RW 3] The source that is associated with arbitration element 0. Source
@@ -4421,6 +4486,9 @@
/* [R 32] Parity register #0 read */
#define USEM_REG_USEM_PRTY_STS_0 0x300124
#define USEM_REG_USEM_PRTY_STS_1 0x300134
+/* [RC 32] Parity register #0 read clear */
+#define USEM_REG_USEM_PRTY_STS_CLR_0 0x300128
+#define USEM_REG_USEM_PRTY_STS_CLR_1 0x300138
/* [W 7] VF or PF ID for reset error bit. Values 0-63 reset error bit for 64
* VF; values 64-67 reset error for 4 PF; values 68-127 are not valid. */
#define USEM_REG_VFPF_ERR_NUM 0x300380
@@ -4797,6 +4865,8 @@
#define XSDM_REG_XSDM_PRTY_MASK 0x1662bc
/* [R 11] Parity register #0 read */
#define XSDM_REG_XSDM_PRTY_STS 0x1662b0
+/* [RC 11] Parity register #0 read clear */
+#define XSDM_REG_XSDM_PRTY_STS_CLR 0x1662b4
/* [RW 5] The number of time_slots in the arbitration cycle */
#define XSEM_REG_ARB_CYCLE_SIZE 0x280034
/* [RW 3] The source that is associated with arbitration element 0. Source
@@ -4929,6 +4999,9 @@
/* [R 32] Parity register #0 read */
#define XSEM_REG_XSEM_PRTY_STS_0 0x280124
#define XSEM_REG_XSEM_PRTY_STS_1 0x280134
+/* [RC 32] Parity register #0 read clear */
+#define XSEM_REG_XSEM_PRTY_STS_CLR_0 0x280128
+#define XSEM_REG_XSEM_PRTY_STS_CLR_1 0x280138
#define MCPR_NVM_ACCESS_ENABLE_EN (1L<<0)
#define MCPR_NVM_ACCESS_ENABLE_WR_EN (1L<<1)
#define MCPR_NVM_ADDR_NVM_ADDR_VALUE (0xffffffL<<0)
@@ -6316,3 +6389,4 @@ static inline u8 calc_crc8(u32 data, u8 crc)
}
+#endif /* BNX2X_REG_H */
diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c
index 6e4d9b1..bda60d5 100644
--- a/drivers/net/bnx2x/bnx2x_stats.c
+++ b/drivers/net/bnx2x/bnx2x_stats.c
@@ -158,6 +158,11 @@ static void bnx2x_storm_stats_post(struct bnx2x *bp)
spin_lock_bh(&bp->stats_lock);
+ if (bp->stats_pending) {
+ spin_unlock_bh(&bp->stats_lock);
+ return;
+ }
+
ramrod_data.drv_counter = bp->stats_counter++;
ramrod_data.collect_port = bp->port.pmf ? 1 : 0;
for_each_eth_queue(bp, i)
diff --git a/drivers/net/cxgb4vf/cxgb4vf_main.c b/drivers/net/cxgb4vf/cxgb4vf_main.c
index 3c403f8..56166ae 100644
--- a/drivers/net/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/cxgb4vf/cxgb4vf_main.c
@@ -749,13 +749,19 @@ static int cxgb4vf_open(struct net_device *dev)
netif_set_real_num_tx_queues(dev, pi->nqsets);
err = netif_set_real_num_rx_queues(dev, pi->nqsets);
if (err)
- return err;
- set_bit(pi->port_id, &adapter->open_device_map);
+ goto err_unwind;
err = link_start(dev);
if (err)
- return err;
+ goto err_unwind;
+
netif_tx_start_all_queues(dev);
+ set_bit(pi->port_id, &adapter->open_device_map);
return 0;
+
+err_unwind:
+ if (adapter->open_device_map == 0)
+ adapter_down(adapter);
+ return err;
}
/*
@@ -764,13 +770,12 @@ static int cxgb4vf_open(struct net_device *dev)
*/
static int cxgb4vf_stop(struct net_device *dev)
{
- int ret;
struct port_info *pi = netdev_priv(dev);
struct adapter *adapter = pi->adapter;
netif_tx_stop_all_queues(dev);
netif_carrier_off(dev);
- ret = t4vf_enable_vi(adapter, pi->viid, false, false);
+ t4vf_enable_vi(adapter, pi->viid, false, false);
pi->link_cfg.link_ok = 0;
clear_bit(pi->port_id, &adapter->open_device_map);
diff --git a/drivers/net/cxgb4vf/t4vf_hw.c b/drivers/net/cxgb4vf/t4vf_hw.c
index e4bec78..0f51c80 100644
--- a/drivers/net/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/cxgb4vf/t4vf_hw.c
@@ -147,9 +147,20 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
/*
* Write the command array into the Mailbox Data register array and
* transfer ownership of the mailbox to the firmware.
+ *
+ * For the VFs, the Mailbox Data "registers" are actually backed by
+ * T4's "MA" interface rather than PL Registers (as is the case for
+ * the PFs). Because these are in different coherency domains, the
+ * write to the VF's PL-register-backed Mailbox Control can race in
+ * front of the writes to the MA-backed VF Mailbox Data "registers".
+ * So we need to do a read-back on at least one byte of the VF Mailbox
+ * Data registers before doing the write to the VF Mailbox Control
+ * register.
*/
for (i = 0, p = cmd; i < size; i += 8)
t4_write_reg64(adapter, mbox_data + i, be64_to_cpu(*p++));
+ t4_read_reg(adapter, mbox_data); /* flush write */
+
t4_write_reg(adapter, mbox_ctl,
MBMSGVALID | MBOWNER(MBOX_OWNER_FW));
t4_read_reg(adapter, mbox_ctl); /* flush write */
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c
index 77d08e6..aed223b 100644
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -130,10 +130,15 @@ static s32 e1000_set_phy_type(struct e1000_hw *hw)
if (hw->mac_type == e1000_82541 ||
hw->mac_type == e1000_82541_rev_2 ||
hw->mac_type == e1000_82547 ||
- hw->mac_type == e1000_82547_rev_2) {
+ hw->mac_type == e1000_82547_rev_2)
hw->phy_type = e1000_phy_igp;
- break;
- }
+ break;
+ case RTL8211B_PHY_ID:
+ hw->phy_type = e1000_phy_8211;
+ break;
+ case RTL8201N_PHY_ID:
+ hw->phy_type = e1000_phy_8201;
+ break;
default:
/* Should never have loaded on this device */
hw->phy_type = e1000_phy_undefined;
@@ -318,6 +323,9 @@ s32 e1000_set_mac_type(struct e1000_hw *hw)
case E1000_DEV_ID_82547GI:
hw->mac_type = e1000_82547_rev_2;
break;
+ case E1000_DEV_ID_INTEL_CE4100_GBE:
+ hw->mac_type = e1000_ce4100;
+ break;
default:
/* Should never have loaded on this device */
return -E1000_ERR_MAC_TYPE;
@@ -372,6 +380,9 @@ void e1000_set_media_type(struct e1000_hw *hw)
case e1000_82542_rev2_1:
hw->media_type = e1000_media_type_fiber;
break;
+ case e1000_ce4100:
+ hw->media_type = e1000_media_type_copper;
+ break;
default:
status = er32(STATUS);
if (status & E1000_STATUS_TBIMODE) {
@@ -460,6 +471,7 @@ s32 e1000_reset_hw(struct e1000_hw *hw)
/* Reset is performed on a shadow of the control register */
ew32(CTRL_DUP, (ctrl | E1000_CTRL_RST));
break;
+ case e1000_ce4100:
default:
ew32(CTRL, (ctrl | E1000_CTRL_RST));
break;
@@ -952,6 +964,67 @@ static s32 e1000_setup_fiber_serdes_link(struct e1000_hw *hw)
}
/**
+ * e1000_copper_link_rtl_setup - Copper link setup for e1000_phy_rtl series.
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Commits changes to PHY configuration by calling e1000_phy_reset().
+ */
+static s32 e1000_copper_link_rtl_setup(struct e1000_hw *hw)
+{
+ s32 ret_val;
+
+ /* SW reset the PHY so all changes take effect */
+ ret_val = e1000_phy_reset(hw);
+ if (ret_val) {
+ e_dbg("Error Resetting the PHY\n");
+ return ret_val;
+ }
+
+ return E1000_SUCCESS;
+}
+
+static s32 gbe_dhg_phy_setup(struct e1000_hw *hw)
+{
+ s32 ret_val;
+ u32 ctrl_aux;
+
+ switch (hw->phy_type) {
+ case e1000_phy_8211:
+ ret_val = e1000_copper_link_rtl_setup(hw);
+ if (ret_val) {
+ e_dbg("e1000_copper_link_rtl_setup failed!\n");
+ return ret_val;
+ }
+ break;
+ case e1000_phy_8201:
+ /* Set RMII mode */
+ ctrl_aux = er32(CTL_AUX);
+ ctrl_aux |= E1000_CTL_AUX_RMII;
+ ew32(CTL_AUX, ctrl_aux);
+ E1000_WRITE_FLUSH();
+
+ /* Disable the J/K bits required for receive */
+ ctrl_aux = er32(CTL_AUX);
+ ctrl_aux |= 0x4;
+ ctrl_aux &= ~0x2;
+ ew32(CTL_AUX, ctrl_aux);
+ E1000_WRITE_FLUSH();
+ ret_val = e1000_copper_link_rtl_setup(hw);
+
+ if (ret_val) {
+ e_dbg("e1000_copper_link_rtl_setup failed!\n");
+ return ret_val;
+ }
+ break;
+ default:
+ e_dbg("Error Resetting the PHY\n");
+ return E1000_ERR_PHY_TYPE;
+ }
+
+ return E1000_SUCCESS;
+}
+
+/**
* e1000_copper_link_preconfig - early configuration for copper
* @hw: Struct containing variables accessed by shared code
*
@@ -1286,6 +1359,10 @@ static s32 e1000_copper_link_autoneg(struct e1000_hw *hw)
if (hw->autoneg_advertised == 0)
hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+ /* IFE/RTL8201N PHY only supports 10/100 */
+ if (hw->phy_type == e1000_phy_8201)
+ hw->autoneg_advertised &= AUTONEG_ADVERTISE_10_100_ALL;
+
e_dbg("Reconfiguring auto-neg advertisement params\n");
ret_val = e1000_phy_setup_autoneg(hw);
if (ret_val) {
@@ -1341,7 +1418,7 @@ static s32 e1000_copper_link_postconfig(struct e1000_hw *hw)
s32 ret_val;
e_dbg("e1000_copper_link_postconfig");
- if (hw->mac_type >= e1000_82544) {
+ if ((hw->mac_type >= e1000_82544) && (hw->mac_type != e1000_ce4100)) {
e1000_config_collision_dist(hw);
} else {
ret_val = e1000_config_mac_to_phy(hw);
@@ -1395,6 +1472,12 @@ static s32 e1000_setup_copper_link(struct e1000_hw *hw)
ret_val = e1000_copper_link_mgp_setup(hw);
if (ret_val)
return ret_val;
+ } else {
+ ret_val = gbe_dhg_phy_setup(hw);
+ if (ret_val) {
+ e_dbg("gbe_dhg_phy_setup failed!\n");
+ return ret_val;
+ }
}
if (hw->autoneg) {
@@ -1461,10 +1544,11 @@ s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
return ret_val;
/* Read the MII 1000Base-T Control Register (Address 9). */
- ret_val =
- e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg);
+ ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg);
if (ret_val)
return ret_val;
+ else if (hw->phy_type == e1000_phy_8201)
+ mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK;
/* Need to parse both autoneg_advertised and fc and set up
* the appropriate PHY registers. First we will parse for
@@ -1577,9 +1661,14 @@ s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
e_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
- ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg);
- if (ret_val)
- return ret_val;
+ if (hw->phy_type == e1000_phy_8201) {
+ mii_1000t_ctrl_reg = 0;
+ } else {
+ ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL,
+ mii_1000t_ctrl_reg);
+ if (ret_val)
+ return ret_val;
+ }
return E1000_SUCCESS;
}
@@ -1860,7 +1949,7 @@ static s32 e1000_config_mac_to_phy(struct e1000_hw *hw)
/* 82544 or newer MAC, Auto Speed Detection takes care of
* MAC speed/duplex configuration.*/
- if (hw->mac_type >= e1000_82544)
+ if ((hw->mac_type >= e1000_82544) && (hw->mac_type != e1000_ce4100))
return E1000_SUCCESS;
/* Read the Device Control Register and set the bits to Force Speed
@@ -1870,27 +1959,49 @@ static s32 e1000_config_mac_to_phy(struct e1000_hw *hw)
ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS);
- /* Set up duplex in the Device Control and Transmit Control
- * registers depending on negotiated values.
- */
- ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
- if (ret_val)
- return ret_val;
+ switch (hw->phy_type) {
+ case e1000_phy_8201:
+ ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
+ if (ret_val)
+ return ret_val;
- if (phy_data & M88E1000_PSSR_DPLX)
- ctrl |= E1000_CTRL_FD;
- else
- ctrl &= ~E1000_CTRL_FD;
+ if (phy_data & RTL_PHY_CTRL_FD)
+ ctrl |= E1000_CTRL_FD;
+ else
+ ctrl &= ~E1000_CTRL_FD;
- e1000_config_collision_dist(hw);
+ if (phy_data & RTL_PHY_CTRL_SPD_100)
+ ctrl |= E1000_CTRL_SPD_100;
+ else
+ ctrl |= E1000_CTRL_SPD_10;
- /* Set up speed in the Device Control register depending on
- * negotiated values.
- */
- if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
- ctrl |= E1000_CTRL_SPD_1000;
- else if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS)
- ctrl |= E1000_CTRL_SPD_100;
+ e1000_config_collision_dist(hw);
+ break;
+ default:
+ /* Set up duplex in the Device Control and Transmit Control
+ * registers depending on negotiated values.
+ */
+ ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
+ &phy_data);
+ if (ret_val)
+ return ret_val;
+
+ if (phy_data & M88E1000_PSSR_DPLX)
+ ctrl |= E1000_CTRL_FD;
+ else
+ ctrl &= ~E1000_CTRL_FD;
+
+ e1000_config_collision_dist(hw);
+
+ /* Set up speed in the Device Control register depending on
+ * negotiated values.
+ */
+ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
+ ctrl |= E1000_CTRL_SPD_1000;
+ else if ((phy_data & M88E1000_PSSR_SPEED) ==
+ M88E1000_PSSR_100MBS)
+ ctrl |= E1000_CTRL_SPD_100;
+ }
/* Write the configured values back to the Device Control Reg. */
ew32(CTRL, ctrl);
@@ -2401,7 +2512,8 @@ s32 e1000_check_for_link(struct e1000_hw *hw)
* speed/duplex on the MAC to the current PHY speed/duplex
* settings.
*/
- if (hw->mac_type >= e1000_82544)
+ if ((hw->mac_type >= e1000_82544) &&
+ (hw->mac_type != e1000_ce4100))
e1000_config_collision_dist(hw);
else {
ret_val = e1000_config_mac_to_phy(hw);
@@ -2738,7 +2850,7 @@ static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
{
u32 i;
u32 mdic = 0;
- const u32 phy_addr = 1;
+ const u32 phy_addr = (hw->mac_type == e1000_ce4100) ? hw->phy_addr : 1;
e_dbg("e1000_read_phy_reg_ex");
@@ -2752,28 +2864,61 @@ static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
* Control register. The MAC will take care of interfacing with the
* PHY to retrieve the desired data.
*/
- mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) |
- (phy_addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_READ));
+ if (hw->mac_type == e1000_ce4100) {
+ mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) |
+ (phy_addr << E1000_MDIC_PHY_SHIFT) |
+ (INTEL_CE_GBE_MDIC_OP_READ) |
+ (INTEL_CE_GBE_MDIC_GO));
- ew32(MDIC, mdic);
+ writel(mdic, E1000_MDIO_CMD);
- /* Poll the ready bit to see if the MDI read completed */
- for (i = 0; i < 64; i++) {
- udelay(50);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Read did not complete\n");
- return -E1000_ERR_PHY;
- }
- if (mdic & E1000_MDIC_ERROR) {
- e_dbg("MDI Error\n");
- return -E1000_ERR_PHY;
+ /* Poll the ready bit to see if the MDI read
+ * completed
+ */
+ for (i = 0; i < 64; i++) {
+ udelay(50);
+ mdic = readl(E1000_MDIO_CMD);
+ if (!(mdic & INTEL_CE_GBE_MDIC_GO))
+ break;
+ }
+
+ if (mdic & INTEL_CE_GBE_MDIC_GO) {
+ e_dbg("MDI Read did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+
+ mdic = readl(E1000_MDIO_STS);
+ if (mdic & INTEL_CE_GBE_MDIC_READ_ERROR) {
+ e_dbg("MDI Read Error\n");
+ return -E1000_ERR_PHY;
+ }
+ *phy_data = (u16) mdic;
+ } else {
+ mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) |
+ (phy_addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_READ));
+
+ ew32(MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read
+ * completed
+ */
+ for (i = 0; i < 64; i++) {
+ udelay(50);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Read did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ e_dbg("MDI Error\n");
+ return -E1000_ERR_PHY;
+ }
+ *phy_data = (u16) mdic;
}
- *phy_data = (u16) mdic;
} else {
/* We must first send a preamble through the MDIO pin to signal the
* beginning of an MII instruction. This is done by sending 32
@@ -2840,7 +2985,7 @@ static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
{
u32 i;
u32 mdic = 0;
- const u32 phy_addr = 1;
+ const u32 phy_addr = (hw->mac_type == e1000_ce4100) ? hw->phy_addr : 1;
e_dbg("e1000_write_phy_reg_ex");
@@ -2850,27 +2995,54 @@ static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
}
if (hw->mac_type > e1000_82543) {
- /* Set up Op-code, Phy Address, register address, and data intended
- * for the PHY register in the MDI Control register. The MAC will take
- * care of interfacing with the PHY to send the desired data.
+ /* Set up Op-code, Phy Address, register address, and data
+ * intended for the PHY register in the MDI Control register.
+ * The MAC will take care of interfacing with the PHY to send
+ * the desired data.
*/
- mdic = (((u32) phy_data) |
- (reg_addr << E1000_MDIC_REG_SHIFT) |
- (phy_addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_WRITE));
+ if (hw->mac_type == e1000_ce4100) {
+ mdic = (((u32) phy_data) |
+ (reg_addr << E1000_MDIC_REG_SHIFT) |
+ (phy_addr << E1000_MDIC_PHY_SHIFT) |
+ (INTEL_CE_GBE_MDIC_OP_WRITE) |
+ (INTEL_CE_GBE_MDIC_GO));
- ew32(MDIC, mdic);
+ writel(mdic, E1000_MDIO_CMD);
- /* Poll the ready bit to see if the MDI read completed */
- for (i = 0; i < 641; i++) {
- udelay(5);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Write did not complete\n");
- return -E1000_ERR_PHY;
+ /* Poll the ready bit to see if the MDI read
+ * completed
+ */
+ for (i = 0; i < 640; i++) {
+ udelay(5);
+ mdic = readl(E1000_MDIO_CMD);
+ if (!(mdic & INTEL_CE_GBE_MDIC_GO))
+ break;
+ }
+ if (mdic & INTEL_CE_GBE_MDIC_GO) {
+ e_dbg("MDI Write did not complete\n");
+ return -E1000_ERR_PHY;
+ }
+ } else {
+ mdic = (((u32) phy_data) |
+ (reg_addr << E1000_MDIC_REG_SHIFT) |
+ (phy_addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_WRITE));
+
+ ew32(MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read
+ * completed
+ */
+ for (i = 0; i < 641; i++) {
+ udelay(5);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Write did not complete\n");
+ return -E1000_ERR_PHY;
+ }
}
} else {
/* We'll need to use the SW defined pins to shift the write command
@@ -3048,6 +3220,11 @@ static s32 e1000_detect_gig_phy(struct e1000_hw *hw)
if (hw->phy_id == M88E1011_I_PHY_ID)
match = true;
break;
+ case e1000_ce4100:
+ if ((hw->phy_id == RTL8211B_PHY_ID) ||
+ (hw->phy_id == RTL8201N_PHY_ID))
+ match = true;
+ break;
case e1000_82541:
case e1000_82541_rev_2:
case e1000_82547:
@@ -3291,6 +3468,9 @@ s32 e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info)
if (hw->phy_type == e1000_phy_igp)
return e1000_phy_igp_get_info(hw, phy_info);
+ else if ((hw->phy_type == e1000_phy_8211) ||
+ (hw->phy_type == e1000_phy_8201))
+ return E1000_SUCCESS;
else
return e1000_phy_m88_get_info(hw, phy_info);
}
@@ -3742,6 +3922,12 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
e_dbg("e1000_read_eeprom");
+ if (hw->mac_type == e1000_ce4100) {
+ GBE_CONFIG_FLASH_READ(GBE_CONFIG_BASE_VIRT, offset, words,
+ data);
+ return E1000_SUCCESS;
+ }
+
/* If eeprom is not yet detected, do so now */
if (eeprom->word_size == 0)
e1000_init_eeprom_params(hw);
@@ -3904,6 +4090,12 @@ static s32 e1000_do_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
e_dbg("e1000_write_eeprom");
+ if (hw->mac_type == e1000_ce4100) {
+ GBE_CONFIG_FLASH_WRITE(GBE_CONFIG_BASE_VIRT, offset, words,
+ data);
+ return E1000_SUCCESS;
+ }
+
/* If eeprom is not yet detected, do so now */
if (eeprom->word_size == 0)
e1000_init_eeprom_params(hw);
diff --git a/drivers/net/e1000/e1000_hw.h b/drivers/net/e1000/e1000_hw.h
index ecd9f6c..f5514a0 100644
--- a/drivers/net/e1000/e1000_hw.h
+++ b/drivers/net/e1000/e1000_hw.h
@@ -52,6 +52,7 @@ typedef enum {
e1000_82545,
e1000_82545_rev_3,
e1000_82546,
+ e1000_ce4100,
e1000_82546_rev_3,
e1000_82541,
e1000_82541_rev_2,
@@ -209,9 +210,11 @@ typedef enum {
} e1000_1000t_rx_status;
typedef enum {
- e1000_phy_m88 = 0,
- e1000_phy_igp,
- e1000_phy_undefined = 0xFF
+ e1000_phy_m88 = 0,
+ e1000_phy_igp,
+ e1000_phy_8211,
+ e1000_phy_8201,
+ e1000_phy_undefined = 0xFF
} e1000_phy_type;
typedef enum {
@@ -442,6 +445,7 @@ void e1000_io_write(struct e1000_hw *hw, unsigned long port, u32 value);
#define E1000_DEV_ID_82547EI 0x1019
#define E1000_DEV_ID_82547EI_MOBILE 0x101A
#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5
+#define E1000_DEV_ID_INTEL_CE4100_GBE 0x2E6E
#define NODE_ADDRESS_SIZE 6
#define ETH_LENGTH_OF_ADDRESS 6
@@ -808,6 +812,16 @@ struct e1000_ffvt_entry {
#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */
#define E1000_FLA 0x0001C /* Flash Access - RW */
#define E1000_MDIC 0x00020 /* MDI Control - RW */
+
+extern void __iomem *ce4100_gbe_mdio_base_virt;
+#define INTEL_CE_GBE_MDIO_RCOMP_BASE (ce4100_gbe_mdio_base_virt)
+#define E1000_MDIO_STS (INTEL_CE_GBE_MDIO_RCOMP_BASE + 0)
+#define E1000_MDIO_CMD (INTEL_CE_GBE_MDIO_RCOMP_BASE + 4)
+#define E1000_MDIO_DRV (INTEL_CE_GBE_MDIO_RCOMP_BASE + 8)
+#define E1000_MDC_CMD (INTEL_CE_GBE_MDIO_RCOMP_BASE + 0xC)
+#define E1000_RCOMP_CTL (INTEL_CE_GBE_MDIO_RCOMP_BASE + 0x20)
+#define E1000_RCOMP_STS (INTEL_CE_GBE_MDIO_RCOMP_BASE + 0x24)
+
#define E1000_SCTL 0x00024 /* SerDes Control - RW */
#define E1000_FEXTNVM 0x00028 /* Future Extended NVM register */
#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */
@@ -820,6 +834,34 @@ struct e1000_ffvt_entry {
#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */
#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */
#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */
+
+/* Auxiliary Control Register. This register is CE4100 specific,
+ * RMII/RGMII function is switched by this register - RW
+ * Following are bits definitions of the Auxiliary Control Register
+ */
+#define E1000_CTL_AUX 0x000E0
+#define E1000_CTL_AUX_END_SEL_SHIFT 10
+#define E1000_CTL_AUX_ENDIANESS_SHIFT 8
+#define E1000_CTL_AUX_RGMII_RMII_SHIFT 0
+
+/* descriptor and packet transfer use CTL_AUX.ENDIANESS */
+#define E1000_CTL_AUX_DES_PKT (0x0 << E1000_CTL_AUX_END_SEL_SHIFT)
+/* descriptor use CTL_AUX.ENDIANESS, packet use default */
+#define E1000_CTL_AUX_DES (0x1 << E1000_CTL_AUX_END_SEL_SHIFT)
+/* descriptor use default, packet use CTL_AUX.ENDIANESS */
+#define E1000_CTL_AUX_PKT (0x2 << E1000_CTL_AUX_END_SEL_SHIFT)
+/* all use CTL_AUX.ENDIANESS */
+#define E1000_CTL_AUX_ALL (0x3 << E1000_CTL_AUX_END_SEL_SHIFT)
+
+#define E1000_CTL_AUX_RGMII (0x0 << E1000_CTL_AUX_RGMII_RMII_SHIFT)
+#define E1000_CTL_AUX_RMII (0x1 << E1000_CTL_AUX_RGMII_RMII_SHIFT)
+
+/* LW little endian, Byte big endian */
+#define E1000_CTL_AUX_LWLE_BBE (0x0 << E1000_CTL_AUX_ENDIANESS_SHIFT)
+#define E1000_CTL_AUX_LWLE_BLE (0x1 << E1000_CTL_AUX_ENDIANESS_SHIFT)
+#define E1000_CTL_AUX_LWBE_BBE (0x2 << E1000_CTL_AUX_ENDIANESS_SHIFT)
+#define E1000_CTL_AUX_LWBE_BLE (0x3 << E1000_CTL_AUX_ENDIANESS_SHIFT)
+
#define E1000_RCTL 0x00100 /* RX Control - RW */
#define E1000_RDTR1 0x02820 /* RX Delay Timer (1) - RW */
#define E1000_RDBAL1 0x02900 /* RX Descriptor Base Address Low (1) - RW */
@@ -1011,6 +1053,7 @@ struct e1000_ffvt_entry {
* in more current versions of the 8254x. Despite the difference in location,
* the registers function in the same manner.
*/
+#define E1000_82542_CTL_AUX E1000_CTL_AUX
#define E1000_82542_CTRL E1000_CTRL
#define E1000_82542_CTRL_DUP E1000_CTRL_DUP
#define E1000_82542_STATUS E1000_STATUS
@@ -1571,6 +1614,11 @@ struct e1000_hw {
#define E1000_MDIC_INT_EN 0x20000000
#define E1000_MDIC_ERROR 0x40000000
+#define INTEL_CE_GBE_MDIC_OP_WRITE 0x04000000
+#define INTEL_CE_GBE_MDIC_OP_READ 0x00000000
+#define INTEL_CE_GBE_MDIC_GO 0x80000000
+#define INTEL_CE_GBE_MDIC_READ_ERROR 0x80000000
+
#define E1000_KUMCTRLSTA_MASK 0x0000FFFF
#define E1000_KUMCTRLSTA_OFFSET 0x001F0000
#define E1000_KUMCTRLSTA_OFFSET_SHIFT 16
@@ -2871,6 +2919,11 @@ struct e1000_host_command_info {
#define M88E1111_I_PHY_ID 0x01410CC0
#define L1LXT971A_PHY_ID 0x001378E0
+#define RTL8211B_PHY_ID 0x001CC910
+#define RTL8201N_PHY_ID 0x8200
+#define RTL_PHY_CTRL_FD 0x0100 /* Full duplex.0=half; 1=full */
+#define RTL_PHY_CTRL_SPD_100 0x200000 /* Force 100Mb */
+
/* Bits...
* 15-5: page
* 4-0: register offset
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 340e12d..4ff88a6 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -28,6 +28,12 @@
#include "e1000.h"
#include <net/ip6_checksum.h>
+#include <linux/io.h>
+
+/* Intel Media SOC GbE MDIO physical base address */
+static unsigned long ce4100_gbe_mdio_base_phy;
+/* Intel Media SOC GbE MDIO virtual base address */
+void __iomem *ce4100_gbe_mdio_base_virt;
char e1000_driver_name[] = "e1000";
static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
@@ -79,6 +85,7 @@ static DEFINE_PCI_DEVICE_TABLE(e1000_pci_tbl) = {
INTEL_E1000_ETHERNET_DEVICE(0x108A),
INTEL_E1000_ETHERNET_DEVICE(0x1099),
INTEL_E1000_ETHERNET_DEVICE(0x10B5),
+ INTEL_E1000_ETHERNET_DEVICE(0x2E6E),
/* required last entry */
{0,}
};
@@ -459,6 +466,7 @@ static void e1000_power_down_phy(struct e1000_adapter *adapter)
case e1000_82545:
case e1000_82545_rev_3:
case e1000_82546:
+ case e1000_ce4100:
case e1000_82546_rev_3:
case e1000_82541:
case e1000_82541_rev_2:
@@ -573,6 +581,7 @@ void e1000_reset(struct e1000_adapter *adapter)
case e1000_82545:
case e1000_82545_rev_3:
case e1000_82546:
+ case e1000_ce4100:
case e1000_82546_rev_3:
pba = E1000_PBA_48K;
break;
@@ -894,6 +903,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
static int global_quad_port_a = 0; /* global ksp3 port a indication */
int i, err, pci_using_dac;
u16 eeprom_data = 0;
+ u16 tmp = 0;
u16 eeprom_apme_mask = E1000_EEPROM_APME;
int bars, need_ioport;
@@ -996,6 +1006,14 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
goto err_sw_init;
err = -EIO;
+ if (hw->mac_type == e1000_ce4100) {
+ ce4100_gbe_mdio_base_phy = pci_resource_start(pdev, BAR_1);
+ ce4100_gbe_mdio_base_virt = ioremap(ce4100_gbe_mdio_base_phy,
+ pci_resource_len(pdev, BAR_1));
+
+ if (!ce4100_gbe_mdio_base_virt)
+ goto err_mdio_ioremap;
+ }
if (hw->mac_type >= e1000_82543) {
netdev->features = NETIF_F_SG |
@@ -1135,6 +1153,20 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
adapter->wol = adapter->eeprom_wol;
device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+ /* Auto detect PHY address */
+ if (hw->mac_type == e1000_ce4100) {
+ for (i = 0; i < 32; i++) {
+ hw->phy_addr = i;
+ e1000_read_phy_reg(hw, PHY_ID2, &tmp);
+ if (tmp == 0 || tmp == 0xFF) {
+ if (i == 31)
+ goto err_eeprom;
+ continue;
+ } else
+ break;
+ }
+ }
+
/* reset the hardware with the new settings */
e1000_reset(adapter);
@@ -1171,6 +1203,8 @@ err_eeprom:
kfree(adapter->rx_ring);
err_dma:
err_sw_init:
+err_mdio_ioremap:
+ iounmap(ce4100_gbe_mdio_base_virt);
iounmap(hw->hw_addr);
err_ioremap:
free_netdev(netdev);
@@ -1409,6 +1443,7 @@ static bool e1000_check_64k_bound(struct e1000_adapter *adapter, void *start,
/* First rev 82545 and 82546 need to not allow any memory
* write location to cross 64k boundary due to errata 23 */
if (hw->mac_type == e1000_82545 ||
+ hw->mac_type == e1000_ce4100 ||
hw->mac_type == e1000_82546) {
return ((begin ^ (end - 1)) >> 16) != 0 ? false : true;
}
diff --git a/drivers/net/e1000/e1000_osdep.h b/drivers/net/e1000/e1000_osdep.h
index edd1c75..55c1711 100644
--- a/drivers/net/e1000/e1000_osdep.h
+++ b/drivers/net/e1000/e1000_osdep.h
@@ -34,12 +34,21 @@
#ifndef _E1000_OSDEP_H_
#define _E1000_OSDEP_H_
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
#include <asm/io.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
+
+#define CONFIG_RAM_BASE 0x60000
+#define GBE_CONFIG_OFFSET 0x0
+
+#define GBE_CONFIG_RAM_BASE \
+ ((unsigned int)(CONFIG_RAM_BASE + GBE_CONFIG_OFFSET))
+
+#define GBE_CONFIG_BASE_VIRT phys_to_virt(GBE_CONFIG_RAM_BASE)
+
+#define GBE_CONFIG_FLASH_WRITE(base, offset, count, data) \
+ (iowrite16_rep(base + offset, data, count))
+
+#define GBE_CONFIG_FLASH_READ(base, offset, count, data) \
+ (ioread16_rep(base + (offset << 1), data, count))
#define er32(reg) \
(readl(hw->hw_addr + ((hw->mac_type >= e1000_82543) \
diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c
index e57e409..cb6c7b1 100644
--- a/drivers/net/e1000e/82571.c
+++ b/drivers/net/e1000e/82571.c
@@ -78,6 +78,8 @@ static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw);
static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw);
static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw);
static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw);
+static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active);
+static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active);
/**
* e1000_init_phy_params_82571 - Init PHY func ptrs.
@@ -113,6 +115,8 @@ static s32 e1000_init_phy_params_82571(struct e1000_hw *hw)
phy->type = e1000_phy_bm;
phy->ops.acquire = e1000_get_hw_semaphore_82574;
phy->ops.release = e1000_put_hw_semaphore_82574;
+ phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82574;
+ phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82574;
break;
default:
return -E1000_ERR_PHY;
@@ -121,29 +125,36 @@ static s32 e1000_init_phy_params_82571(struct e1000_hw *hw)
/* This can only be done after all function pointers are setup. */
ret_val = e1000_get_phy_id_82571(hw);
+ if (ret_val) {
+ e_dbg("Error getting PHY ID\n");
+ return ret_val;
+ }
/* Verify phy id */
switch (hw->mac.type) {
case e1000_82571:
case e1000_82572:
if (phy->id != IGP01E1000_I_PHY_ID)
- return -E1000_ERR_PHY;
+ ret_val = -E1000_ERR_PHY;
break;
case e1000_82573:
if (phy->id != M88E1111_I_PHY_ID)
- return -E1000_ERR_PHY;
+ ret_val = -E1000_ERR_PHY;
break;
case e1000_82574:
case e1000_82583:
if (phy->id != BME1000_E_PHY_ID_R2)
- return -E1000_ERR_PHY;
+ ret_val = -E1000_ERR_PHY;
break;
default:
- return -E1000_ERR_PHY;
+ ret_val = -E1000_ERR_PHY;
break;
}
- return 0;
+ if (ret_val)
+ e_dbg("PHY ID unknown: type = 0x%08x\n", phy->id);
+
+ return ret_val;
}
/**
@@ -649,6 +660,58 @@ static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw)
}
/**
+ * e1000_set_d0_lplu_state_82574 - Set Low Power Linkup D0 state
+ * @hw: pointer to the HW structure
+ * @active: true to enable LPLU, false to disable
+ *
+ * Sets the LPLU D0 state according to the active flag.
+ * LPLU will not be activated unless the
+ * device autonegotiation advertisement meets standards of
+ * either 10 or 10/100 or 10/100/1000 at all duplexes.
+ * This is a function pointer entry point only called by
+ * PHY setup routines.
+ **/
+static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active)
+{
+ u16 data = er32(POEMB);
+
+ if (active)
+ data |= E1000_PHY_CTRL_D0A_LPLU;
+ else
+ data &= ~E1000_PHY_CTRL_D0A_LPLU;
+
+ ew32(POEMB, data);
+ return 0;
+}
+
+/**
+ * e1000_set_d3_lplu_state_82574 - Sets low power link up state for D3
+ * @hw: pointer to the HW structure
+ * @active: boolean used to enable/disable lplu
+ *
+ * The low power link up (lplu) state is set to the power management level D3
+ * when active is true, else clear lplu for D3. LPLU
+ * is used during Dx states where the power conservation is most important.
+ * During driver activity, SmartSpeed should be enabled so performance is
+ * maintained.
+ **/
+static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active)
+{
+ u16 data = er32(POEMB);
+
+ if (!active) {
+ data &= ~E1000_PHY_CTRL_NOND0A_LPLU;
+ } else if ((hw->phy.autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+ (hw->phy.autoneg_advertised == E1000_ALL_NOT_GIG) ||
+ (hw->phy.autoneg_advertised == E1000_ALL_10_SPEED)) {
+ data |= E1000_PHY_CTRL_NOND0A_LPLU;
+ }
+
+ ew32(POEMB, data);
+ return 0;
+}
+
+/**
* e1000_acquire_nvm_82571 - Request for access to the EEPROM
* @hw: pointer to the HW structure
*
@@ -956,7 +1019,7 @@ static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active)
**/
static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
{
- u32 ctrl, ctrl_ext, icr;
+ u32 ctrl, ctrl_ext;
s32 ret_val;
/*
@@ -1040,7 +1103,7 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
/* Clear any pending interrupt events. */
ew32(IMC, 0xffffffff);
- icr = er32(ICR);
+ er32(ICR);
if (hw->mac.type == e1000_82571) {
/* Install any alternate MAC address into RAR0 */
diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index 2c913b8..5255be7 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -38,6 +38,7 @@
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/pci-aspm.h>
+#include <linux/crc32.h>
#include "hw.h"
@@ -496,6 +497,8 @@ extern void e1000e_free_tx_resources(struct e1000_adapter *adapter);
extern void e1000e_update_stats(struct e1000_adapter *adapter);
extern void e1000e_set_interrupt_capability(struct e1000_adapter *adapter);
extern void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter);
+extern void e1000e_get_hw_control(struct e1000_adapter *adapter);
+extern void e1000e_release_hw_control(struct e1000_adapter *adapter);
extern void e1000e_disable_aspm(struct pci_dev *pdev, u16 state);
extern unsigned int copybreak;
diff --git a/drivers/net/e1000e/es2lan.c b/drivers/net/e1000e/es2lan.c
index b18c644..e45a61c 100644
--- a/drivers/net/e1000e/es2lan.c
+++ b/drivers/net/e1000e/es2lan.c
@@ -784,7 +784,7 @@ static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed,
**/
static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
{
- u32 ctrl, icr;
+ u32 ctrl;
s32 ret_val;
/*
@@ -818,7 +818,7 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
/* Clear any pending interrupt events. */
ew32(IMC, 0xffffffff);
- icr = er32(ICR);
+ er32(ICR);
ret_val = e1000_check_alt_mac_addr_generic(hw);
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index affcacf..f8ed03d 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -624,20 +624,24 @@ static void e1000_get_drvinfo(struct net_device *netdev,
struct e1000_adapter *adapter = netdev_priv(netdev);
char firmware_version[32];
- strncpy(drvinfo->driver, e1000e_driver_name, 32);
- strncpy(drvinfo->version, e1000e_driver_version, 32);
+ strncpy(drvinfo->driver, e1000e_driver_name,
+ sizeof(drvinfo->driver) - 1);
+ strncpy(drvinfo->version, e1000e_driver_version,
+ sizeof(drvinfo->version) - 1);
/*
* EEPROM image version # is reported as firmware version # for
* PCI-E controllers
*/
- sprintf(firmware_version, "%d.%d-%d",
+ snprintf(firmware_version, sizeof(firmware_version), "%d.%d-%d",
(adapter->eeprom_vers & 0xF000) >> 12,
(adapter->eeprom_vers & 0x0FF0) >> 4,
(adapter->eeprom_vers & 0x000F));
- strncpy(drvinfo->fw_version, firmware_version, 32);
- strncpy(drvinfo->bus_info, pci_name(adapter->pdev), 32);
+ strncpy(drvinfo->fw_version, firmware_version,
+ sizeof(drvinfo->fw_version) - 1);
+ strncpy(drvinfo->bus_info, pci_name(adapter->pdev),
+ sizeof(drvinfo->bus_info) - 1);
drvinfo->regdump_len = e1000_get_regs_len(netdev);
drvinfo->eedump_len = e1000_get_eeprom_len(netdev);
}
@@ -1704,6 +1708,19 @@ static void e1000_diag_test(struct net_device *netdev,
bool if_running = netif_running(netdev);
set_bit(__E1000_TESTING, &adapter->state);
+
+ if (!if_running) {
+ /* Get control of and reset hardware */
+ if (adapter->flags & FLAG_HAS_AMT)
+ e1000e_get_hw_control(adapter);
+
+ e1000e_power_up_phy(adapter);
+
+ adapter->hw.phy.autoneg_wait_to_complete = 1;
+ e1000e_reset(adapter);
+ adapter->hw.phy.autoneg_wait_to_complete = 0;
+ }
+
if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
/* Offline tests */
@@ -1717,8 +1734,6 @@ static void e1000_diag_test(struct net_device *netdev,
if (if_running)
/* indicate we're in test mode */
dev_close(netdev);
- else
- e1000e_reset(adapter);
if (e1000_reg_test(adapter, &data[0]))
eth_test->flags |= ETH_TEST_FL_FAILED;
@@ -1732,8 +1747,6 @@ static void e1000_diag_test(struct net_device *netdev,
eth_test->flags |= ETH_TEST_FL_FAILED;
e1000e_reset(adapter);
- /* make sure the phy is powered up */
- e1000e_power_up_phy(adapter);
if (e1000_loopback_test(adapter, &data[3]))
eth_test->flags |= ETH_TEST_FL_FAILED;
@@ -1755,28 +1768,29 @@ static void e1000_diag_test(struct net_device *netdev,
if (if_running)
dev_open(netdev);
} else {
- if (!if_running && (adapter->flags & FLAG_HAS_AMT)) {
- clear_bit(__E1000_TESTING, &adapter->state);
- dev_open(netdev);
- set_bit(__E1000_TESTING, &adapter->state);
- }
+ /* Online tests */
e_info("online testing starting\n");
- /* Online tests */
- if (e1000_link_test(adapter, &data[4]))
- eth_test->flags |= ETH_TEST_FL_FAILED;
- /* Online tests aren't run; pass by default */
+ /* register, eeprom, intr and loopback tests not run online */
data[0] = 0;
data[1] = 0;
data[2] = 0;
data[3] = 0;
- if (!if_running && (adapter->flags & FLAG_HAS_AMT))
- dev_close(netdev);
+ if (e1000_link_test(adapter, &data[4]))
+ eth_test->flags |= ETH_TEST_FL_FAILED;
clear_bit(__E1000_TESTING, &adapter->state);
}
+
+ if (!if_running) {
+ e1000e_reset(adapter);
+
+ if (adapter->flags & FLAG_HAS_AMT)
+ e1000e_release_hw_control(adapter);
+ }
+
msleep_interruptible(4 * 1000);
}
diff --git a/drivers/net/e1000e/hw.h b/drivers/net/e1000e/hw.h
index ba302a5..e774380 100644
--- a/drivers/net/e1000e/hw.h
+++ b/drivers/net/e1000e/hw.h
@@ -83,6 +83,7 @@ enum e1e_registers {
E1000_EXTCNF_CTRL = 0x00F00, /* Extended Configuration Control */
E1000_EXTCNF_SIZE = 0x00F08, /* Extended Configuration Size */
E1000_PHY_CTRL = 0x00F10, /* PHY Control Register in CSR */
+#define E1000_POEMB E1000_PHY_CTRL /* PHY OEM Bits */
E1000_PBA = 0x01000, /* Packet Buffer Allocation - RW */
E1000_PBS = 0x01008, /* Packet Buffer Size */
E1000_EEMNGCTL = 0x01010, /* MNG EEprom Control */
diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index d86cc08..5328a292 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -1395,22 +1395,6 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw)
}
}
-static u32 e1000_calc_rx_da_crc(u8 mac[])
-{
- u32 poly = 0xEDB88320; /* Polynomial for 802.3 CRC calculation */
- u32 i, j, mask, crc;
-
- crc = 0xffffffff;
- for (i = 0; i < 6; i++) {
- crc = crc ^ mac[i];
- for (j = 8; j > 0; j--) {
- mask = (crc & 1) * (-1);
- crc = (crc >> 1) ^ (poly & mask);
- }
- }
- return ~crc;
-}
-
/**
* e1000_lv_jumbo_workaround_ich8lan - required for jumbo frame operation
* with 82579 PHY
@@ -1453,8 +1437,7 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable)
mac_addr[4] = (addr_high & 0xFF);
mac_addr[5] = ((addr_high >> 8) & 0xFF);
- ew32(PCH_RAICC(i),
- e1000_calc_rx_da_crc(mac_addr));
+ ew32(PCH_RAICC(i), ~ether_crc_le(ETH_ALEN, mac_addr));
}
/* Write Rx addresses to the PHY */
@@ -2977,7 +2960,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
{
struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
u16 reg;
- u32 ctrl, icr, kab;
+ u32 ctrl, kab;
s32 ret_val;
/*
@@ -3067,7 +3050,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
ew32(CRC_OFFSET, 0x65656565);
ew32(IMC, 0xffffffff);
- icr = er32(ICR);
+ er32(ICR);
kab = er32(KABGTXD);
kab |= E1000_KABGTXD_BGSQLBIAS;
@@ -3118,7 +3101,7 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw)
* Reset the phy after disabling host wakeup to reset the Rx buffer.
*/
if (hw->phy.type == e1000_phy_82578) {
- hw->phy.ops.read_reg(hw, BM_WUC, &i);
+ e1e_rphy(hw, BM_WUC, &i);
ret_val = e1000_phy_hw_reset_ich8lan(hw);
if (ret_val)
return ret_val;
@@ -3276,9 +3259,8 @@ static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw)
(hw->phy.type == e1000_phy_82577)) {
ew32(FCRTV_PCH, hw->fc.refresh_time);
- ret_val = hw->phy.ops.write_reg(hw,
- PHY_REG(BM_PORT_CTRL_PAGE, 27),
- hw->fc.pause_time);
+ ret_val = e1e_wphy(hw, PHY_REG(BM_PORT_CTRL_PAGE, 27),
+ hw->fc.pause_time);
if (ret_val)
return ret_val;
}
@@ -3342,8 +3324,7 @@ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw)
return ret_val;
break;
case e1000_phy_ife:
- ret_val = hw->phy.ops.read_reg(hw, IFE_PHY_MDIX_CONTROL,
- &reg_data);
+ ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &reg_data);
if (ret_val)
return ret_val;
@@ -3361,8 +3342,7 @@ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw)
reg_data |= IFE_PMC_AUTO_MDIX;
break;
}
- ret_val = hw->phy.ops.write_reg(hw, IFE_PHY_MDIX_CONTROL,
- reg_data);
+ ret_val = e1e_wphy(hw, IFE_PHY_MDIX_CONTROL, reg_data);
if (ret_val)
return ret_val;
break;
@@ -3646,7 +3626,8 @@ static s32 e1000_led_off_ich8lan(struct e1000_hw *hw)
{
if (hw->phy.type == e1000_phy_ife)
return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED,
- (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_OFF));
+ (IFE_PSCL_PROBE_MODE |
+ IFE_PSCL_PROBE_LEDS_OFF));
ew32(LEDCTL, hw->mac.ledctl_mode1);
return 0;
@@ -3660,8 +3641,7 @@ static s32 e1000_led_off_ich8lan(struct e1000_hw *hw)
**/
static s32 e1000_setup_led_pchlan(struct e1000_hw *hw)
{
- return hw->phy.ops.write_reg(hw, HV_LED_CONFIG,
- (u16)hw->mac.ledctl_mode1);
+ return e1e_wphy(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_mode1);
}
/**
@@ -3672,8 +3652,7 @@ static s32 e1000_setup_led_pchlan(struct e1000_hw *hw)
**/
static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw)
{
- return hw->phy.ops.write_reg(hw, HV_LED_CONFIG,
- (u16)hw->mac.ledctl_default);
+ return e1e_wphy(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_default);
}
/**
@@ -3704,7 +3683,7 @@ static s32 e1000_led_on_pchlan(struct e1000_hw *hw)
}
}
- return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, data);
+ return e1e_wphy(hw, HV_LED_CONFIG, data);
}
/**
@@ -3735,7 +3714,7 @@ static s32 e1000_led_off_pchlan(struct e1000_hw *hw)
}
}
- return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, data);
+ return e1e_wphy(hw, HV_LED_CONFIG, data);
}
/**
@@ -3844,20 +3823,20 @@ static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw)
if ((hw->phy.type == e1000_phy_82578) ||
(hw->phy.type == e1000_phy_82579) ||
(hw->phy.type == e1000_phy_82577)) {
- hw->phy.ops.read_reg(hw, HV_SCC_UPPER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_SCC_LOWER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_ECOL_UPPER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_ECOL_LOWER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_MCC_UPPER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_MCC_LOWER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_LATECOL_UPPER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_LATECOL_LOWER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_COLC_UPPER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_COLC_LOWER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_DC_UPPER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_DC_LOWER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_TNCRS_UPPER, &phy_data);
- hw->phy.ops.read_reg(hw, HV_TNCRS_LOWER, &phy_data);
+ e1e_rphy(hw, HV_SCC_UPPER, &phy_data);
+ e1e_rphy(hw, HV_SCC_LOWER, &phy_data);
+ e1e_rphy(hw, HV_ECOL_UPPER, &phy_data);
+ e1e_rphy(hw, HV_ECOL_LOWER, &phy_data);
+ e1e_rphy(hw, HV_MCC_UPPER, &phy_data);
+ e1e_rphy(hw, HV_MCC_LOWER, &phy_data);
+ e1e_rphy(hw, HV_LATECOL_UPPER, &phy_data);
+ e1e_rphy(hw, HV_LATECOL_LOWER, &phy_data);
+ e1e_rphy(hw, HV_COLC_UPPER, &phy_data);
+ e1e_rphy(hw, HV_COLC_LOWER, &phy_data);
+ e1e_rphy(hw, HV_DC_UPPER, &phy_data);
+ e1e_rphy(hw, HV_DC_LOWER, &phy_data);
+ e1e_rphy(hw, HV_TNCRS_UPPER, &phy_data);
+ e1e_rphy(hw, HV_TNCRS_LOWER, &phy_data);
}
}
diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c
index 7e55170..ff28721 100644
--- a/drivers/net/e1000e/lib.c
+++ b/drivers/net/e1000e/lib.c
@@ -1135,7 +1135,8 @@ s32 e1000e_config_fc_after_link_up(struct e1000_hw *hw)
ret_val = e1e_rphy(hw, PHY_AUTONEG_ADV, &mii_nway_adv_reg);
if (ret_val)
return ret_val;
- ret_val = e1e_rphy(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg);
+ ret_val =
+ e1e_rphy(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg);
if (ret_val)
return ret_val;
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index fe50242..fa5b604 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -1980,15 +1980,15 @@ static void e1000_irq_enable(struct e1000_adapter *adapter)
}
/**
- * e1000_get_hw_control - get control of the h/w from f/w
+ * e1000e_get_hw_control - get control of the h/w from f/w
* @adapter: address of board private structure
*
- * e1000_get_hw_control sets {CTRL_EXT|SWSM}:DRV_LOAD bit.
+ * e1000e_get_hw_control sets {CTRL_EXT|SWSM}:DRV_LOAD bit.
* For ASF and Pass Through versions of f/w this means that
* the driver is loaded. For AMT version (only with 82573)
* of the f/w this means that the network i/f is open.
**/
-static void e1000_get_hw_control(struct e1000_adapter *adapter)
+void e1000e_get_hw_control(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 ctrl_ext;
@@ -2005,16 +2005,16 @@ static void e1000_get_hw_control(struct e1000_adapter *adapter)
}
/**
- * e1000_release_hw_control - release control of the h/w to f/w
+ * e1000e_release_hw_control - release control of the h/w to f/w
* @adapter: address of board private structure
*
- * e1000_release_hw_control resets {CTRL_EXT|SWSM}:DRV_LOAD bit.
+ * e1000e_release_hw_control resets {CTRL_EXT|SWSM}:DRV_LOAD bit.
* For ASF and Pass Through versions of f/w this means that the
* driver is no longer loaded. For AMT version (only with 82573) i
* of the f/w this means that the network i/f is closed.
*
**/
-static void e1000_release_hw_control(struct e1000_adapter *adapter)
+void e1000e_release_hw_control(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 ctrl_ext;
@@ -2445,7 +2445,7 @@ static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
(vid == adapter->mng_vlan_id)) {
/* release control to f/w */
- e1000_release_hw_control(adapter);
+ e1000e_release_hw_control(adapter);
return;
}
@@ -2734,6 +2734,9 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true);
else
ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false);
+
+ if (ret_val)
+ e_dbg("failed to enable jumbo frame workaround mode\n");
}
/* Program MC offset vector base */
@@ -3184,7 +3187,6 @@ void e1000e_reset(struct e1000_adapter *adapter)
ew32(PBA, pba);
}
-
/*
* flow control settings
*
@@ -3272,7 +3274,7 @@ void e1000e_reset(struct e1000_adapter *adapter)
* that the network interface is in control
*/
if (adapter->flags & FLAG_HAS_AMT)
- e1000_get_hw_control(adapter);
+ e1000e_get_hw_control(adapter);
ew32(WUC, 0);
@@ -3285,6 +3287,13 @@ void e1000e_reset(struct e1000_adapter *adapter)
ew32(VET, ETH_P_8021Q);
e1000e_reset_adaptive(hw);
+
+ if (!netif_running(adapter->netdev) &&
+ !test_bit(__E1000_TESTING, &adapter->state)) {
+ e1000_power_down_phy(adapter);
+ return;
+ }
+
e1000_get_phy_info(hw);
if ((adapter->flags & FLAG_HAS_SMART_POWER_DOWN) &&
@@ -3570,7 +3579,7 @@ static int e1000_open(struct net_device *netdev)
* interface is now open and reset the part to a known state.
*/
if (adapter->flags & FLAG_HAS_AMT) {
- e1000_get_hw_control(adapter);
+ e1000e_get_hw_control(adapter);
e1000e_reset(adapter);
}
@@ -3634,7 +3643,7 @@ static int e1000_open(struct net_device *netdev)
return 0;
err_req_irq:
- e1000_release_hw_control(adapter);
+ e1000e_release_hw_control(adapter);
e1000_power_down_phy(adapter);
e1000e_free_rx_resources(adapter);
err_setup_rx:
@@ -3689,8 +3698,9 @@ static int e1000_close(struct net_device *netdev)
* If AMT is enabled, let the firmware know that the network
* interface is now closed
*/
- if (adapter->flags & FLAG_HAS_AMT)
- e1000_release_hw_control(adapter);
+ if ((adapter->flags & FLAG_HAS_AMT) &&
+ !test_bit(__E1000_TESTING, &adapter->state))
+ e1000e_release_hw_control(adapter);
if ((adapter->flags & FLAG_HAS_ERT) ||
(adapter->hw.mac.type == e1000_pch2lan))
@@ -5209,7 +5219,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake,
* Release control of h/w to f/w. If f/w is AMT enabled, this
* would have already happened in close and is redundant.
*/
- e1000_release_hw_control(adapter);
+ e1000e_release_hw_control(adapter);
pci_disable_device(pdev);
@@ -5366,7 +5376,7 @@ static int __e1000_resume(struct pci_dev *pdev)
* under the control of the driver.
*/
if (!(adapter->flags & FLAG_HAS_AMT))
- e1000_get_hw_control(adapter);
+ e1000e_get_hw_control(adapter);
return 0;
}
@@ -5613,7 +5623,7 @@ static void e1000_io_resume(struct pci_dev *pdev)
* under the control of the driver.
*/
if (!(adapter->flags & FLAG_HAS_AMT))
- e1000_get_hw_control(adapter);
+ e1000e_get_hw_control(adapter);
}
@@ -5636,7 +5646,7 @@ static void e1000_print_device_info(struct e1000_adapter *adapter)
ret_val = e1000_read_pba_string_generic(hw, pba_str,
E1000_PBANUM_LENGTH);
if (ret_val)
- strcpy(pba_str, "Unknown");
+ strncpy((char *)pba_str, "Unknown", sizeof(pba_str) - 1);
e_info("MAC: %d, PHY: %d, PBA No: %s\n",
hw->mac.type, hw->phy.type, pba_str);
}
@@ -5963,9 +5973,9 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
* under the control of the driver.
*/
if (!(adapter->flags & FLAG_HAS_AMT))
- e1000_get_hw_control(adapter);
+ e1000e_get_hw_control(adapter);
- strcpy(netdev->name, "eth%d");
+ strncpy(netdev->name, "eth%d", sizeof(netdev->name) - 1);
err = register_netdev(netdev);
if (err)
goto err_register;
@@ -5982,12 +5992,11 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
err_register:
if (!(adapter->flags & FLAG_HAS_AMT))
- e1000_release_hw_control(adapter);
+ e1000e_release_hw_control(adapter);
err_eeprom:
if (!e1000_check_reset_block(&adapter->hw))
e1000_phy_hw_reset(&adapter->hw);
err_hw_init:
-
kfree(adapter->tx_ring);
kfree(adapter->rx_ring);
err_sw_init:
@@ -6053,7 +6062,7 @@ static void __devexit e1000_remove(struct pci_dev *pdev)
* Release control of h/w to f/w. If f/w is AMT enabled, this
* would have already happened in close and is redundant.
*/
- e1000_release_hw_control(adapter);
+ e1000e_release_hw_control(adapter);
e1000e_reset_interrupt_capability(adapter);
kfree(adapter->tx_ring);
diff --git a/drivers/net/e1000e/phy.c b/drivers/net/e1000e/phy.c
index 1781efe..a640f1c 100644
--- a/drivers/net/e1000e/phy.c
+++ b/drivers/net/e1000e/phy.c
@@ -637,12 +637,11 @@ s32 e1000e_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data)
**/
s32 e1000_copper_link_setup_82577(struct e1000_hw *hw)
{
- struct e1000_phy_info *phy = &hw->phy;
s32 ret_val;
u16 phy_data;
/* Enable CRS on TX. This must be set for half-duplex operation. */
- ret_val = phy->ops.read_reg(hw, I82577_CFG_REG, &phy_data);
+ ret_val = e1e_rphy(hw, I82577_CFG_REG, &phy_data);
if (ret_val)
goto out;
@@ -651,7 +650,7 @@ s32 e1000_copper_link_setup_82577(struct e1000_hw *hw)
/* Enable downshift */
phy_data |= I82577_CFG_ENABLE_DOWNSHIFT;
- ret_val = phy->ops.write_reg(hw, I82577_CFG_REG, phy_data);
+ ret_val = e1e_wphy(hw, I82577_CFG_REG, phy_data);
out:
return ret_val;
@@ -774,16 +773,14 @@ s32 e1000e_copper_link_setup_m88(struct e1000_hw *hw)
}
if (phy->type == e1000_phy_82578) {
- ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
- &phy_data);
+ ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
if (ret_val)
return ret_val;
/* 82578 PHY - set the downshift count to 1x. */
phy_data |= I82578_EPSCR_DOWNSHIFT_ENABLE;
phy_data &= ~I82578_EPSCR_DOWNSHIFT_COUNTER_MASK;
- ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
- phy_data);
+ ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
if (ret_val)
return ret_val;
}
@@ -1319,9 +1316,8 @@ s32 e1000e_phy_force_speed_duplex_m88(struct e1000_hw *hw)
* We didn't get link.
* Reset the DSP and cross our fingers.
*/
- ret_val = e1e_wphy(hw,
- M88E1000_PHY_PAGE_SELECT,
- 0x001d);
+ ret_val = e1e_wphy(hw, M88E1000_PHY_PAGE_SELECT,
+ 0x001d);
if (ret_val)
return ret_val;
ret_val = e1000e_phy_reset_dsp(hw);
@@ -3071,12 +3067,12 @@ s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw)
goto out;
/* Do not apply workaround if in PHY loopback bit 14 set */
- hw->phy.ops.read_reg(hw, PHY_CONTROL, &data);
+ e1e_rphy(hw, PHY_CONTROL, &data);
if (data & PHY_CONTROL_LB)
goto out;
/* check if link is up and at 1Gbps */
- ret_val = hw->phy.ops.read_reg(hw, BM_CS_STATUS, &data);
+ ret_val = e1e_rphy(hw, BM_CS_STATUS, &data);
if (ret_val)
goto out;
@@ -3092,14 +3088,12 @@ s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw)
mdelay(200);
/* flush the packets in the fifo buffer */
- ret_val = hw->phy.ops.write_reg(hw, HV_MUX_DATA_CTRL,
- HV_MUX_DATA_CTRL_GEN_TO_MAC |
- HV_MUX_DATA_CTRL_FORCE_SPEED);
+ ret_val = e1e_wphy(hw, HV_MUX_DATA_CTRL, HV_MUX_DATA_CTRL_GEN_TO_MAC |
+ HV_MUX_DATA_CTRL_FORCE_SPEED);
if (ret_val)
goto out;
- ret_val = hw->phy.ops.write_reg(hw, HV_MUX_DATA_CTRL,
- HV_MUX_DATA_CTRL_GEN_TO_MAC);
+ ret_val = e1e_wphy(hw, HV_MUX_DATA_CTRL, HV_MUX_DATA_CTRL_GEN_TO_MAC);
out:
return ret_val;
@@ -3119,7 +3113,7 @@ s32 e1000_check_polarity_82577(struct e1000_hw *hw)
s32 ret_val;
u16 data;
- ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data);
+ ret_val = e1e_rphy(hw, I82577_PHY_STATUS_2, &data);
if (!ret_val)
phy->cable_polarity = (data & I82577_PHY_STATUS2_REV_POLARITY)
@@ -3142,13 +3136,13 @@ s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw)
u16 phy_data;
bool link;
- ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+ ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_data);
if (ret_val)
goto out;
e1000e_phy_force_speed_duplex_setup(hw, &phy_data);
- ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+ ret_val = e1e_wphy(hw, PHY_CONTROL, phy_data);
if (ret_val)
goto out;
@@ -3212,7 +3206,7 @@ s32 e1000_get_phy_info_82577(struct e1000_hw *hw)
if (ret_val)
goto out;
- ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data);
+ ret_val = e1e_rphy(hw, I82577_PHY_STATUS_2, &data);
if (ret_val)
goto out;
@@ -3224,7 +3218,7 @@ s32 e1000_get_phy_info_82577(struct e1000_hw *hw)
if (ret_val)
goto out;
- ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
+ ret_val = e1e_rphy(hw, PHY_1000T_STATUS, &data);
if (ret_val)
goto out;
@@ -3258,7 +3252,7 @@ s32 e1000_get_cable_length_82577(struct e1000_hw *hw)
s32 ret_val;
u16 phy_data, length;
- ret_val = phy->ops.read_reg(hw, I82577_PHY_DIAG_STATUS, &phy_data);
+ ret_val = e1e_rphy(hw, I82577_PHY_DIAG_STATUS, &phy_data);
if (ret_val)
goto out;
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index a724a2d..6c7257b 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -40,7 +40,7 @@
#include <asm/io.h>
#define DRV_NAME "ehea"
-#define DRV_VERSION "EHEA_0106"
+#define DRV_VERSION "EHEA_0107"
/* eHEA capability flags */
#define DLPAR_PORT_ADD_REM 1
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 1032b5b..f75d314 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -437,7 +437,7 @@ static void ehea_init_fill_rq1(struct ehea_port_res *pr, int nr_rq1a)
}
}
/* Ring doorbell */
- ehea_update_rq1a(pr->qp, i);
+ ehea_update_rq1a(pr->qp, i - 1);
}
static int ehea_refill_rq_def(struct ehea_port_res *pr,
@@ -1329,9 +1329,7 @@ static int ehea_fill_port_res(struct ehea_port_res *pr)
int ret;
struct ehea_qp_init_attr *init_attr = &pr->qp->init_attr;
- ehea_init_fill_rq1(pr, init_attr->act_nr_rwqes_rq1
- - init_attr->act_nr_rwqes_rq2
- - init_attr->act_nr_rwqes_rq3 - 1);
+ ehea_init_fill_rq1(pr, pr->rq1_skba.len);
ret = ehea_refill_rq2(pr, init_attr->act_nr_rwqes_rq2 - 1);
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index cce32d4..2a71373 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -17,6 +17,8 @@
*
* Bug fixes and cleanup by Philippe De Muyter (phdm@macqel.be)
* Copyright (c) 2004-2006 Macq Electronique SA.
+ *
+ * Copyright (C) 2010 Freescale Semiconductor, Inc.
*/
#include <linux/module.h>
@@ -45,29 +47,41 @@
#include <asm/cacheflush.h>
-#ifndef CONFIG_ARCH_MXC
+#ifndef CONFIG_ARM
#include <asm/coldfire.h>
#include <asm/mcfsim.h>
#endif
#include "fec.h"
-#ifdef CONFIG_ARCH_MXC
-#include <mach/hardware.h>
+#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
#define FEC_ALIGNMENT 0xf
#else
#define FEC_ALIGNMENT 0x3
#endif
-/*
- * Define the fixed address of the FEC hardware.
- */
-#if defined(CONFIG_M5272)
+#define DRIVER_NAME "fec"
+
+/* Controller is ENET-MAC */
+#define FEC_QUIRK_ENET_MAC (1 << 0)
+/* Controller needs driver to swap frame */
+#define FEC_QUIRK_SWAP_FRAME (1 << 1)
-static unsigned char fec_mac_default[] = {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+static struct platform_device_id fec_devtype[] = {
+ {
+ .name = DRIVER_NAME,
+ .driver_data = 0,
+ }, {
+ .name = "imx28-fec",
+ .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME,
+ }
};
+static unsigned char macaddr[ETH_ALEN];
+module_param_array(macaddr, byte, NULL, 0);
+MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
+
+#if defined(CONFIG_M5272)
/*
* Some hardware gets it MAC address out of local flash memory.
* if this is non-zero then assume it is the address to get MAC from.
@@ -133,7 +147,8 @@ static unsigned char fec_mac_default[] = {
* account when setting it.
*/
#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
- defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARCH_MXC)
+ defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
+ defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
#define OPT_FRAME_SIZE (PKT_MAXBUF_SIZE << 16)
#else
#define OPT_FRAME_SIZE 0
@@ -186,7 +201,6 @@ struct fec_enet_private {
int mii_timeout;
uint phy_speed;
phy_interface_t phy_interface;
- int index;
int link;
int full_duplex;
struct completion mdio_done;
@@ -213,10 +227,23 @@ static void fec_stop(struct net_device *dev);
/* Transmitter timeout */
#define TX_TIMEOUT (2 * HZ)
+static void *swap_buffer(void *bufaddr, int len)
+{
+ int i;
+ unsigned int *buf = bufaddr;
+
+ for (i = 0; i < (len + 3) / 4; i++, buf++)
+ *buf = cpu_to_be32(*buf);
+
+ return bufaddr;
+}
+
static netdev_tx_t
fec_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct fec_enet_private *fep = netdev_priv(dev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
struct bufdesc *bdp;
void *bufaddr;
unsigned short status;
@@ -261,6 +288,14 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
bufaddr = fep->tx_bounce[index];
}
+ /*
+ * Some design made an incorrect assumption on endian mode of
+ * the system that it's running on. As the result, driver has to
+ * swap every frame going to and coming from the controller.
+ */
+ if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+ swap_buffer(bufaddr, skb->len);
+
/* Save skb pointer */
fep->tx_skbuff[fep->skb_cur] = skb;
@@ -429,6 +464,8 @@ static void
fec_enet_rx(struct net_device *dev)
{
struct fec_enet_private *fep = netdev_priv(dev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
struct bufdesc *bdp;
unsigned short status;
struct sk_buff *skb;
@@ -492,6 +529,9 @@ fec_enet_rx(struct net_device *dev)
dma_unmap_single(NULL, bdp->cbd_bufaddr, bdp->cbd_datlen,
DMA_FROM_DEVICE);
+ if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+ swap_buffer(data, pkt_len);
+
/* This does 16 byte alignment, exactly what we need.
* The packet length includes FCS, but we don't want to
* include that when passing upstream as it messes up
@@ -538,37 +578,50 @@ rx_processing_done:
}
/* ------------------------------------------------------------------------- */
-#ifdef CONFIG_M5272
static void __inline__ fec_get_mac(struct net_device *dev)
{
struct fec_enet_private *fep = netdev_priv(dev);
+ struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
unsigned char *iap, tmpaddr[ETH_ALEN];
- if (FEC_FLASHMAC) {
- /*
- * Get MAC address from FLASH.
- * If it is all 1's or 0's, use the default.
- */
- iap = (unsigned char *)FEC_FLASHMAC;
- if ((iap[0] == 0) && (iap[1] == 0) && (iap[2] == 0) &&
- (iap[3] == 0) && (iap[4] == 0) && (iap[5] == 0))
- iap = fec_mac_default;
- if ((iap[0] == 0xff) && (iap[1] == 0xff) && (iap[2] == 0xff) &&
- (iap[3] == 0xff) && (iap[4] == 0xff) && (iap[5] == 0xff))
- iap = fec_mac_default;
- } else {
- *((unsigned long *) &tmpaddr[0]) = readl(fep->hwp + FEC_ADDR_LOW);
- *((unsigned short *) &tmpaddr[4]) = (readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
+ /*
+ * try to get mac address in following order:
+ *
+ * 1) module parameter via kernel command line in form
+ * fec.macaddr=0x00,0x04,0x9f,0x01,0x30,0xe0
+ */
+ iap = macaddr;
+
+ /*
+ * 2) from flash or fuse (via platform data)
+ */
+ if (!is_valid_ether_addr(iap)) {
+#ifdef CONFIG_M5272
+ if (FEC_FLASHMAC)
+ iap = (unsigned char *)FEC_FLASHMAC;
+#else
+ if (pdata)
+ memcpy(iap, pdata->mac, ETH_ALEN);
+#endif
+ }
+
+ /*
+ * 3) FEC mac registers set by bootloader
+ */
+ if (!is_valid_ether_addr(iap)) {
+ *((unsigned long *) &tmpaddr[0]) =
+ be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW));
+ *((unsigned short *) &tmpaddr[4]) =
+ be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
iap = &tmpaddr[0];
}
memcpy(dev->dev_addr, iap, ETH_ALEN);
- /* Adjust MAC if using default MAC address */
- if (iap == fec_mac_default)
- dev->dev_addr[ETH_ALEN-1] = fec_mac_default[ETH_ALEN-1] + fep->index;
+ /* Adjust MAC if using macaddr */
+ if (iap == macaddr)
+ dev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->pdev->id;
}
-#endif
/* ------------------------------------------------------------------------- */
@@ -651,8 +704,8 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
fep->mii_timeout = 0;
init_completion(&fep->mdio_done);
- /* start a read op */
- writel(FEC_MMFR_ST | FEC_MMFR_OP_READ |
+ /* start a write op */
+ writel(FEC_MMFR_ST | FEC_MMFR_OP_WRITE |
FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
FEC_MMFR_TA | FEC_MMFR_DATA(value),
fep->hwp + FEC_MII_DATA);
@@ -681,6 +734,7 @@ static int fec_enet_mii_probe(struct net_device *dev)
char mdio_bus_id[MII_BUS_ID_SIZE];
char phy_name[MII_BUS_ID_SIZE + 3];
int phy_id;
+ int dev_id = fep->pdev->id;
fep->phy_dev = NULL;
@@ -692,6 +746,8 @@ static int fec_enet_mii_probe(struct net_device *dev)
continue;
if (fep->mii_bus->phy_map[phy_id]->phy_id == 0)
continue;
+ if (dev_id--)
+ continue;
strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE);
break;
}
@@ -729,10 +785,35 @@ static int fec_enet_mii_probe(struct net_device *dev)
static int fec_enet_mii_init(struct platform_device *pdev)
{
+ static struct mii_bus *fec0_mii_bus;
struct net_device *dev = platform_get_drvdata(pdev);
struct fec_enet_private *fep = netdev_priv(dev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
int err = -ENXIO, i;
+ /*
+ * The dual fec interfaces are not equivalent with enet-mac.
+ * Here are the differences:
+ *
+ * - fec0 supports MII & RMII modes while fec1 only supports RMII
+ * - fec0 acts as the 1588 time master while fec1 is slave
+ * - external phys can only be configured by fec0
+ *
+ * That is to say fec1 can not work independently. It only works
+ * when fec0 is working. The reason behind this design is that the
+ * second interface is added primarily for Switch mode.
+ *
+ * Because of the last point above, both phys are attached on fec0
+ * mdio interface in board design, and need to be configured by
+ * fec0 mii_bus.
+ */
+ if ((id_entry->driver_data & FEC_QUIRK_ENET_MAC) && pdev->id) {
+ /* fec1 uses fec0 mii_bus */
+ fep->mii_bus = fec0_mii_bus;
+ return 0;
+ }
+
fep->mii_timeout = 0;
/*
@@ -769,6 +850,10 @@ static int fec_enet_mii_init(struct platform_device *pdev)
if (mdiobus_register(fep->mii_bus))
goto err_out_free_mdio_irq;
+ /* save fec0 mii_bus */
+ if (id_entry->driver_data & FEC_QUIRK_ENET_MAC)
+ fec0_mii_bus = fep->mii_bus;
+
return 0;
err_out_free_mdio_irq:
@@ -1067,9 +1152,8 @@ static const struct net_device_ops fec_netdev_ops = {
/*
* XXX: We need to clean up on failure exits here.
*
- * index is only used in legacy code
*/
-static int fec_enet_init(struct net_device *dev, int index)
+static int fec_enet_init(struct net_device *dev)
{
struct fec_enet_private *fep = netdev_priv(dev);
struct bufdesc *cbd_base;
@@ -1086,26 +1170,11 @@ static int fec_enet_init(struct net_device *dev, int index)
spin_lock_init(&fep->hw_lock);
- fep->index = index;
fep->hwp = (void __iomem *)dev->base_addr;
fep->netdev = dev;
- /* Set the Ethernet address */
-#ifdef CONFIG_M5272
+ /* Get the Ethernet address */
fec_get_mac(dev);
-#else
- {
- unsigned long l;
- l = readl(fep->hwp + FEC_ADDR_LOW);
- dev->dev_addr[0] = (unsigned char)((l & 0xFF000000) >> 24);
- dev->dev_addr[1] = (unsigned char)((l & 0x00FF0000) >> 16);
- dev->dev_addr[2] = (unsigned char)((l & 0x0000FF00) >> 8);
- dev->dev_addr[3] = (unsigned char)((l & 0x000000FF) >> 0);
- l = readl(fep->hwp + FEC_ADDR_HIGH);
- dev->dev_addr[4] = (unsigned char)((l & 0xFF000000) >> 24);
- dev->dev_addr[5] = (unsigned char)((l & 0x00FF0000) >> 16);
- }
-#endif
/* Set receive and transmit descriptor base. */
fep->rx_bd_base = cbd_base;
@@ -1156,12 +1225,25 @@ static void
fec_restart(struct net_device *dev, int duplex)
{
struct fec_enet_private *fep = netdev_priv(dev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
int i;
+ u32 val, temp_mac[2];
/* Whack a reset. We should wait for this. */
writel(1, fep->hwp + FEC_ECNTRL);
udelay(10);
+ /*
+ * enet-mac reset will reset mac address registers too,
+ * so need to reconfigure it.
+ */
+ if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) {
+ memcpy(&temp_mac, dev->dev_addr, ETH_ALEN);
+ writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW);
+ writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH);
+ }
+
/* Clear any outstanding interrupt. */
writel(0xffc00000, fep->hwp + FEC_IEVENT);
@@ -1208,20 +1290,45 @@ fec_restart(struct net_device *dev, int duplex)
/* Set MII speed */
writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
-#ifdef FEC_MIIGSK_ENR
- if (fep->phy_interface == PHY_INTERFACE_MODE_RMII) {
- /* disable the gasket and wait */
- writel(0, fep->hwp + FEC_MIIGSK_ENR);
- while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4)
- udelay(1);
+ /*
+ * The phy interface and speed need to get configured
+ * differently on enet-mac.
+ */
+ if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) {
+ val = readl(fep->hwp + FEC_R_CNTRL);
- /* configure the gasket: RMII, 50 MHz, no loopback, no echo */
- writel(1, fep->hwp + FEC_MIIGSK_CFGR);
+ /* MII or RMII */
+ if (fep->phy_interface == PHY_INTERFACE_MODE_RMII)
+ val |= (1 << 8);
+ else
+ val &= ~(1 << 8);
- /* re-enable the gasket */
- writel(2, fep->hwp + FEC_MIIGSK_ENR);
- }
+ /* 10M or 100M */
+ if (fep->phy_dev && fep->phy_dev->speed == SPEED_100)
+ val &= ~(1 << 9);
+ else
+ val |= (1 << 9);
+
+ writel(val, fep->hwp + FEC_R_CNTRL);
+ } else {
+#ifdef FEC_MIIGSK_ENR
+ if (fep->phy_interface == PHY_INTERFACE_MODE_RMII) {
+ /* disable the gasket and wait */
+ writel(0, fep->hwp + FEC_MIIGSK_ENR);
+ while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4)
+ udelay(1);
+
+ /*
+ * configure the gasket:
+ * RMII, 50 MHz, no loopback, no echo
+ */
+ writel(1, fep->hwp + FEC_MIIGSK_CFGR);
+
+ /* re-enable the gasket */
+ writel(2, fep->hwp + FEC_MIIGSK_ENR);
+ }
#endif
+ }
/* And last, enable the transmit and receive processing */
writel(2, fep->hwp + FEC_ECNTRL);
@@ -1316,7 +1423,7 @@ fec_probe(struct platform_device *pdev)
}
clk_enable(fep->clk);
- ret = fec_enet_init(ndev, 0);
+ ret = fec_enet_init(ndev);
if (ret)
goto failed_init;
@@ -1380,8 +1487,10 @@ fec_suspend(struct device *dev)
if (ndev) {
fep = netdev_priv(ndev);
- if (netif_running(ndev))
- fec_enet_close(ndev);
+ if (netif_running(ndev)) {
+ fec_stop(ndev);
+ netif_device_detach(ndev);
+ }
clk_disable(fep->clk);
}
return 0;
@@ -1396,8 +1505,10 @@ fec_resume(struct device *dev)
if (ndev) {
fep = netdev_priv(ndev);
clk_enable(fep->clk);
- if (netif_running(ndev))
- fec_enet_open(ndev);
+ if (netif_running(ndev)) {
+ fec_restart(ndev, fep->full_duplex);
+ netif_device_attach(ndev);
+ }
}
return 0;
}
@@ -1414,12 +1525,13 @@ static const struct dev_pm_ops fec_pm_ops = {
static struct platform_driver fec_driver = {
.driver = {
- .name = "fec",
+ .name = DRIVER_NAME,
.owner = THIS_MODULE,
#ifdef CONFIG_PM
.pm = &fec_pm_ops,
#endif
},
+ .id_table = fec_devtype,
.probe = fec_probe,
.remove = __devexit_p(fec_drv_remove),
};
diff --git a/drivers/net/fec.h b/drivers/net/fec.h
index 2c48b25..ace318d 100644
--- a/drivers/net/fec.h
+++ b/drivers/net/fec.h
@@ -14,7 +14,8 @@
/****************************************************************************/
#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
- defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARCH_MXC)
+ defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
+ defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
/*
* Just figures, Motorola would have to change the offsets for
* registers in the same peripheral device on different models
@@ -78,7 +79,7 @@
/*
* Define the buffer descriptor structure.
*/
-#ifdef CONFIG_ARCH_MXC
+#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
struct bufdesc {
unsigned short cbd_datlen; /* Data length */
unsigned short cbd_sc; /* Control and status info */
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index cd2d72d..af09296 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -3949,6 +3949,7 @@ static int nv_set_wol(struct net_device *dev, struct ethtool_wolinfo *wolinfo)
writel(flags, base + NvRegWakeUpFlags);
spin_unlock_irq(&np->lock);
}
+ device_set_wakeup_enable(&np->pci_dev->dev, np->wolenabled);
return 0;
}
@@ -5488,14 +5489,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
/* set mac address */
nv_copy_mac_to_hw(dev);
- /* Workaround current PCI init glitch: wakeup bits aren't
- * being set from PCI PM capability.
- */
- device_init_wakeup(&pci_dev->dev, 1);
-
/* disable WOL */
writel(0, base + NvRegWakeUpFlags);
np->wolenabled = 0;
+ device_set_wakeup_enable(&pci_dev->dev, false);
if (id->driver_data & DEV_HAS_POWER_CNTRL) {
@@ -5746,8 +5743,9 @@ static void __devexit nv_remove(struct pci_dev *pci_dev)
}
#ifdef CONFIG_PM
-static int nv_suspend(struct pci_dev *pdev, pm_message_t state)
+static int nv_suspend(struct device *device)
{
+ struct pci_dev *pdev = to_pci_dev(device);
struct net_device *dev = pci_get_drvdata(pdev);
struct fe_priv *np = netdev_priv(dev);
u8 __iomem *base = get_hwbase(dev);
@@ -5763,25 +5761,17 @@ static int nv_suspend(struct pci_dev *pdev, pm_message_t state)
for (i = 0; i <= np->register_size/sizeof(u32); i++)
np->saved_config_space[i] = readl(base + i*sizeof(u32));
- pci_save_state(pdev);
- pci_enable_wake(pdev, pci_choose_state(pdev, state), np->wolenabled);
- pci_disable_device(pdev);
- pci_set_power_state(pdev, pci_choose_state(pdev, state));
return 0;
}
-static int nv_resume(struct pci_dev *pdev)
+static int nv_resume(struct device *device)
{
+ struct pci_dev *pdev = to_pci_dev(device);
struct net_device *dev = pci_get_drvdata(pdev);
struct fe_priv *np = netdev_priv(dev);
u8 __iomem *base = get_hwbase(dev);
int i, rc = 0;
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
- /* ack any pending wake events, disable PME */
- pci_enable_wake(pdev, PCI_D0, 0);
-
/* restore non-pci configuration space */
for (i = 0; i <= np->register_size/sizeof(u32); i++)
writel(np->saved_config_space[i], base+i*sizeof(u32));
@@ -5800,6 +5790,9 @@ static int nv_resume(struct pci_dev *pdev)
return rc;
}
+static SIMPLE_DEV_PM_OPS(nv_pm_ops, nv_suspend, nv_resume);
+#define NV_PM_OPS (&nv_pm_ops)
+
static void nv_shutdown(struct pci_dev *pdev)
{
struct net_device *dev = pci_get_drvdata(pdev);
@@ -5822,15 +5815,13 @@ static void nv_shutdown(struct pci_dev *pdev)
* only put the device into D3 if we really go for poweroff.
*/
if (system_state == SYSTEM_POWER_OFF) {
- if (pci_enable_wake(pdev, PCI_D3cold, np->wolenabled))
- pci_enable_wake(pdev, PCI_D3hot, np->wolenabled);
+ pci_wake_from_d3(pdev, np->wolenabled);
pci_set_power_state(pdev, PCI_D3hot);
}
}
#else
-#define nv_suspend NULL
+#define NV_PM_OPS NULL
#define nv_shutdown NULL
-#define nv_resume NULL
#endif /* CONFIG_PM */
static DEFINE_PCI_DEVICE_TABLE(pci_tbl) = {
@@ -6002,9 +5993,8 @@ static struct pci_driver driver = {
.id_table = pci_tbl,
.probe = nv_probe,
.remove = __devexit_p(nv_remove),
- .suspend = nv_suspend,
- .resume = nv_resume,
.shutdown = nv_shutdown,
+ .driver.pm = NV_PM_OPS,
};
static int __init init_nic(void)
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 4e7d1d0..7d9ced0 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -396,7 +396,7 @@ static unsigned char *add_mcs(unsigned char *bits, int bitrate,
while (p) {
if (p->bitrate == bitrate) {
memcpy(p->bits, bits, YAM_FPGA_SIZE);
- return p->bits;
+ goto out;
}
p = p->next;
}
@@ -411,7 +411,7 @@ static unsigned char *add_mcs(unsigned char *bits, int bitrate,
p->bitrate = bitrate;
p->next = yam_data;
yam_data = p;
-
+ out:
release_firmware(fw);
return p->bits;
}
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 3ae30b8..3b8c924 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -508,6 +508,8 @@ extern void ixgbe_free_rx_resources(struct ixgbe_ring *);
extern void ixgbe_free_tx_resources(struct ixgbe_ring *);
extern void ixgbe_configure_rx_ring(struct ixgbe_adapter *,struct ixgbe_ring *);
extern void ixgbe_configure_tx_ring(struct ixgbe_adapter *,struct ixgbe_ring *);
+extern void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter,
+ struct ixgbe_ring *);
extern void ixgbe_update_stats(struct ixgbe_adapter *adapter);
extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
extern void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter);
@@ -524,26 +526,13 @@ extern s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
extern s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc);
extern s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc);
extern s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
- struct ixgbe_atr_input *input,
+ union ixgbe_atr_hash_dword input,
+ union ixgbe_atr_hash_dword common,
u8 queue);
extern s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
- struct ixgbe_atr_input *input,
+ union ixgbe_atr_input *input,
struct ixgbe_atr_input_masks *input_masks,
u16 soft_id, u8 queue);
-extern s32 ixgbe_atr_set_vlan_id_82599(struct ixgbe_atr_input *input,
- u16 vlan_id);
-extern s32 ixgbe_atr_set_src_ipv4_82599(struct ixgbe_atr_input *input,
- u32 src_addr);
-extern s32 ixgbe_atr_set_dst_ipv4_82599(struct ixgbe_atr_input *input,
- u32 dst_addr);
-extern s32 ixgbe_atr_set_src_port_82599(struct ixgbe_atr_input *input,
- u16 src_port);
-extern s32 ixgbe_atr_set_dst_port_82599(struct ixgbe_atr_input *input,
- u16 dst_port);
-extern s32 ixgbe_atr_set_flex_byte_82599(struct ixgbe_atr_input *input,
- u16 flex_byte);
-extern s32 ixgbe_atr_set_l4type_82599(struct ixgbe_atr_input *input,
- u8 l4type);
extern void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter,
struct ixgbe_ring *ring);
extern void ixgbe_clear_rscctl(struct ixgbe_adapter *adapter,
diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index bfd3c22..8d316d9 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -1003,7 +1003,7 @@ s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
udelay(10);
}
if (i >= IXGBE_FDIRCMD_CMD_POLL) {
- hw_dbg(hw ,"Flow Director previous command isn't complete, "
+ hw_dbg(hw, "Flow Director previous command isn't complete, "
"aborting table re-initialization.\n");
return IXGBE_ERR_FDIR_REINIT_FAILED;
}
@@ -1113,13 +1113,10 @@ s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc)
/* Move the flexible bytes to use the ethertype - shift 6 words */
fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT);
- fdirctrl |= IXGBE_FDIRCTRL_REPORT_STATUS;
/* Prime the keys for hashing */
- IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY,
- htonl(IXGBE_ATR_BUCKET_HASH_KEY));
- IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY,
- htonl(IXGBE_ATR_SIGNATURE_HASH_KEY));
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
/*
* Poll init-done after we write the register. Estimated times:
@@ -1209,10 +1206,8 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT);
/* Prime the keys for hashing */
- IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY,
- htonl(IXGBE_ATR_BUCKET_HASH_KEY));
- IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY,
- htonl(IXGBE_ATR_SIGNATURE_HASH_KEY));
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
/*
* Poll init-done after we write the register. Estimated times:
@@ -1251,8 +1246,8 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
* @stream: input bitstream to compute the hash on
* @key: 32-bit hash key
**/
-static u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *atr_input,
- u32 key)
+static u32 ixgbe_atr_compute_hash_82599(union ixgbe_atr_input *atr_input,
+ u32 key)
{
/*
* The algorithm is as follows:
@@ -1272,410 +1267,250 @@ static u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *atr_input,
* To simplify for programming, the algorithm is implemented
* in software this way:
*
- * Key[31:0], Stream[335:0]
+ * key[31:0], hi_hash_dword[31:0], lo_hash_dword[31:0], hash[15:0]
+ *
+ * for (i = 0; i < 352; i+=32)
+ * hi_hash_dword[31:0] ^= Stream[(i+31):i];
+ *
+ * lo_hash_dword[15:0] ^= Stream[15:0];
+ * lo_hash_dword[15:0] ^= hi_hash_dword[31:16];
+ * lo_hash_dword[31:16] ^= hi_hash_dword[15:0];
*
- * tmp_key[11 * 32 - 1:0] = 11{Key[31:0] = key concatenated 11 times
- * int_key[350:0] = tmp_key[351:1]
- * int_stream[365:0] = Stream[14:0] | Stream[335:0] | Stream[335:321]
+ * hi_hash_dword[31:0] ^= Stream[351:320];
*
- * hash[15:0] = 0;
- * for (i = 0; i < 351; i++) {
- * if (int_key[i])
- * hash ^= int_stream[(i + 15):i];
+ * if(key[0])
+ * hash[15:0] ^= Stream[15:0];
+ *
+ * for (i = 0; i < 16; i++) {
+ * if (key[i])
+ * hash[15:0] ^= lo_hash_dword[(i+15):i];
+ * if (key[i + 16])
+ * hash[15:0] ^= hi_hash_dword[(i+15):i];
* }
+ *
*/
+ __be32 common_hash_dword = 0;
+ u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+ u32 hash_result = 0;
+ u8 i;
- union {
- u64 fill[6];
- u32 key[11];
- u8 key_stream[44];
- } tmp_key;
+ /* record the flow_vm_vlan bits as they are a key part to the hash */
+ flow_vm_vlan = ntohl(atr_input->dword_stream[0]);
- u8 *stream = (u8 *)atr_input;
- u8 int_key[44]; /* upper-most bit unused */
- u8 hash_str[46]; /* upper-most 2 bits unused */
- u16 hash_result = 0;
- int i, j, k, h;
+ /* generate common hash dword */
+ for (i = 10; i; i -= 2)
+ common_hash_dword ^= atr_input->dword_stream[i] ^
+ atr_input->dword_stream[i - 1];
- /*
- * Initialize the fill member to prevent warnings
- * on some compilers
- */
- tmp_key.fill[0] = 0;
+ hi_hash_dword = ntohl(common_hash_dword);
- /* First load the temporary key stream */
- for (i = 0; i < 6; i++) {
- u64 fillkey = ((u64)key << 32) | key;
- tmp_key.fill[i] = fillkey;
- }
+ /* low dword is word swapped version of common */
+ lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
- /*
- * Set the interim key for the hashing. Bit 352 is unused, so we must
- * shift and compensate when building the key.
- */
+ /* apply flow ID/VM pool/VLAN ID bits to hash words */
+ hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
- int_key[0] = tmp_key.key_stream[0] >> 1;
- for (i = 1, j = 0; i < 44; i++) {
- unsigned int this_key = tmp_key.key_stream[j] << 7;
- j++;
- int_key[i] = (u8)(this_key | (tmp_key.key_stream[j] >> 1));
- }
-
- /*
- * Set the interim bit string for the hashing. Bits 368 and 367 are
- * unused, so shift and compensate when building the string.
- */
- hash_str[0] = (stream[40] & 0x7f) >> 1;
- for (i = 1, j = 40; i < 46; i++) {
- unsigned int this_str = stream[j] << 7;
- j++;
- if (j > 41)
- j = 0;
- hash_str[i] = (u8)(this_str | (stream[j] >> 1));
- }
+ /* Process bits 0 and 16 */
+ if (key & 0x0001) hash_result ^= lo_hash_dword;
+ if (key & 0x00010000) hash_result ^= hi_hash_dword;
/*
- * Now compute the hash. i is the index into hash_str, j is into our
- * key stream, k is counting the number of bits, and h interates within
- * each byte.
+ * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+ * delay this because bit 0 of the stream should not be processed
+ * so we do not add the vlan until after bit 0 was processed
*/
- for (i = 45, j = 43, k = 0; k < 351 && i >= 2 && j >= 0; i--, j--) {
- for (h = 0; h < 8 && k < 351; h++, k++) {
- if (int_key[j] & (1 << h)) {
- /*
- * Key bit is set, XOR in the current 16-bit
- * string. Example of processing:
- * h = 0,
- * tmp = (hash_str[i - 2] & 0 << 16) |
- * (hash_str[i - 1] & 0xff << 8) |
- * (hash_str[i] & 0xff >> 0)
- * So tmp = hash_str[15 + k:k], since the
- * i + 2 clause rolls off the 16-bit value
- * h = 7,
- * tmp = (hash_str[i - 2] & 0x7f << 9) |
- * (hash_str[i - 1] & 0xff << 1) |
- * (hash_str[i] & 0x80 >> 7)
- */
- int tmp = (hash_str[i] >> h);
- tmp |= (hash_str[i - 1] << (8 - h));
- tmp |= (int)(hash_str[i - 2] & ((1 << h) - 1))
- << (16 - h);
- hash_result ^= (u16)tmp;
- }
- }
- }
-
- return hash_result;
-}
-
-/**
- * ixgbe_atr_set_vlan_id_82599 - Sets the VLAN id in the ATR input stream
- * @input: input stream to modify
- * @vlan: the VLAN id to load
- **/
-s32 ixgbe_atr_set_vlan_id_82599(struct ixgbe_atr_input *input, u16 vlan)
-{
- input->byte_stream[IXGBE_ATR_VLAN_OFFSET + 1] = vlan >> 8;
- input->byte_stream[IXGBE_ATR_VLAN_OFFSET] = vlan & 0xff;
-
- return 0;
-}
-
-/**
- * ixgbe_atr_set_src_ipv4_82599 - Sets the source IPv4 address
- * @input: input stream to modify
- * @src_addr: the IP address to load
- **/
-s32 ixgbe_atr_set_src_ipv4_82599(struct ixgbe_atr_input *input, u32 src_addr)
-{
- input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 3] = src_addr >> 24;
- input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 2] =
- (src_addr >> 16) & 0xff;
- input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 1] =
- (src_addr >> 8) & 0xff;
- input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET] = src_addr & 0xff;
-
- return 0;
-}
-
-/**
- * ixgbe_atr_set_dst_ipv4_82599 - Sets the destination IPv4 address
- * @input: input stream to modify
- * @dst_addr: the IP address to load
- **/
-s32 ixgbe_atr_set_dst_ipv4_82599(struct ixgbe_atr_input *input, u32 dst_addr)
-{
- input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 3] = dst_addr >> 24;
- input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 2] =
- (dst_addr >> 16) & 0xff;
- input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 1] =
- (dst_addr >> 8) & 0xff;
- input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET] = dst_addr & 0xff;
-
- return 0;
-}
+ lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
-/**
- * ixgbe_atr_set_src_port_82599 - Sets the source port
- * @input: input stream to modify
- * @src_port: the source port to load
- **/
-s32 ixgbe_atr_set_src_port_82599(struct ixgbe_atr_input *input, u16 src_port)
-{
- input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET + 1] = src_port >> 8;
- input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET] = src_port & 0xff;
-
- return 0;
-}
-
-/**
- * ixgbe_atr_set_dst_port_82599 - Sets the destination port
- * @input: input stream to modify
- * @dst_port: the destination port to load
- **/
-s32 ixgbe_atr_set_dst_port_82599(struct ixgbe_atr_input *input, u16 dst_port)
-{
- input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET + 1] = dst_port >> 8;
- input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET] = dst_port & 0xff;
-
- return 0;
-}
-
-/**
- * ixgbe_atr_set_flex_byte_82599 - Sets the flexible bytes
- * @input: input stream to modify
- * @flex_bytes: the flexible bytes to load
- **/
-s32 ixgbe_atr_set_flex_byte_82599(struct ixgbe_atr_input *input, u16 flex_byte)
-{
- input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET + 1] = flex_byte >> 8;
- input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET] = flex_byte & 0xff;
-
- return 0;
-}
-
-/**
- * ixgbe_atr_set_l4type_82599 - Sets the layer 4 packet type
- * @input: input stream to modify
- * @l4type: the layer 4 type value to load
- **/
-s32 ixgbe_atr_set_l4type_82599(struct ixgbe_atr_input *input, u8 l4type)
-{
- input->byte_stream[IXGBE_ATR_L4TYPE_OFFSET] = l4type;
-
- return 0;
-}
-
-/**
- * ixgbe_atr_get_vlan_id_82599 - Gets the VLAN id from the ATR input stream
- * @input: input stream to search
- * @vlan: the VLAN id to load
- **/
-static s32 ixgbe_atr_get_vlan_id_82599(struct ixgbe_atr_input *input, u16 *vlan)
-{
- *vlan = input->byte_stream[IXGBE_ATR_VLAN_OFFSET];
- *vlan |= input->byte_stream[IXGBE_ATR_VLAN_OFFSET + 1] << 8;
-
- return 0;
-}
-
-/**
- * ixgbe_atr_get_src_ipv4_82599 - Gets the source IPv4 address
- * @input: input stream to search
- * @src_addr: the IP address to load
- **/
-static s32 ixgbe_atr_get_src_ipv4_82599(struct ixgbe_atr_input *input,
- u32 *src_addr)
-{
- *src_addr = input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET];
- *src_addr |= input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 1] << 8;
- *src_addr |= input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 2] << 16;
- *src_addr |= input->byte_stream[IXGBE_ATR_SRC_IPV4_OFFSET + 3] << 24;
-
- return 0;
-}
-/**
- * ixgbe_atr_get_dst_ipv4_82599 - Gets the destination IPv4 address
- * @input: input stream to search
- * @dst_addr: the IP address to load
- **/
-static s32 ixgbe_atr_get_dst_ipv4_82599(struct ixgbe_atr_input *input,
- u32 *dst_addr)
-{
- *dst_addr = input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET];
- *dst_addr |= input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 1] << 8;
- *dst_addr |= input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 2] << 16;
- *dst_addr |= input->byte_stream[IXGBE_ATR_DST_IPV4_OFFSET + 3] << 24;
+ /* process the remaining 30 bits in the key 2 bits at a time */
+ for (i = 15; i; i-- ) {
+ if (key & (0x0001 << i)) hash_result ^= lo_hash_dword >> i;
+ if (key & (0x00010000 << i)) hash_result ^= hi_hash_dword >> i;
+ }
- return 0;
+ return hash_result & IXGBE_ATR_HASH_MASK;
}
-/**
- * ixgbe_atr_get_src_ipv6_82599 - Gets the source IPv6 address
- * @input: input stream to search
- * @src_addr_1: the first 4 bytes of the IP address to load
- * @src_addr_2: the second 4 bytes of the IP address to load
- * @src_addr_3: the third 4 bytes of the IP address to load
- * @src_addr_4: the fourth 4 bytes of the IP address to load
- **/
-static s32 ixgbe_atr_get_src_ipv6_82599(struct ixgbe_atr_input *input,
- u32 *src_addr_1, u32 *src_addr_2,
- u32 *src_addr_3, u32 *src_addr_4)
-{
- *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 12];
- *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 13] << 8;
- *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 14] << 16;
- *src_addr_1 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 15] << 24;
-
- *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 8];
- *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 9] << 8;
- *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 10] << 16;
- *src_addr_2 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 11] << 24;
-
- *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 4];
- *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 5] << 8;
- *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 6] << 16;
- *src_addr_3 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 7] << 24;
-
- *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET];
- *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 1] << 8;
- *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 2] << 16;
- *src_addr_4 = input->byte_stream[IXGBE_ATR_SRC_IPV6_OFFSET + 3] << 24;
-
- return 0;
-}
+/*
+ * These defines allow us to quickly generate all of the necessary instructions
+ * in the function below by simply calling out IXGBE_COMPUTE_SIG_HASH_ITERATION
+ * for values 0 through 15
+ */
+#define IXGBE_ATR_COMMON_HASH_KEY \
+ (IXGBE_ATR_BUCKET_HASH_KEY & IXGBE_ATR_SIGNATURE_HASH_KEY)
+#define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \
+do { \
+ u32 n = (_n); \
+ if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \
+ common_hash ^= lo_hash_dword >> n; \
+ else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
+ bucket_hash ^= lo_hash_dword >> n; \
+ else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \
+ sig_hash ^= lo_hash_dword << (16 - n); \
+ if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \
+ common_hash ^= hi_hash_dword >> n; \
+ else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
+ bucket_hash ^= hi_hash_dword >> n; \
+ else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \
+ sig_hash ^= hi_hash_dword << (16 - n); \
+} while (0);
/**
- * ixgbe_atr_get_src_port_82599 - Gets the source port
- * @input: input stream to modify
- * @src_port: the source port to load
+ * ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash
+ * @stream: input bitstream to compute the hash on
*
- * Even though the input is given in big-endian, the FDIRPORT registers
- * expect the ports to be programmed in little-endian. Hence the need to swap
- * endianness when retrieving the data. This can be confusing since the
- * internal hash engine expects it to be big-endian.
+ * This function is almost identical to the function above but contains
+ * several optomizations such as unwinding all of the loops, letting the
+ * compiler work out all of the conditional ifs since the keys are static
+ * defines, and computing two keys at once since the hashed dword stream
+ * will be the same for both keys.
**/
-static s32 ixgbe_atr_get_src_port_82599(struct ixgbe_atr_input *input,
- u16 *src_port)
+static u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
+ union ixgbe_atr_hash_dword common)
{
- *src_port = input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET] << 8;
- *src_port |= input->byte_stream[IXGBE_ATR_SRC_PORT_OFFSET + 1];
+ u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+ u32 sig_hash = 0, bucket_hash = 0, common_hash = 0;
- return 0;
-}
+ /* record the flow_vm_vlan bits as they are a key part to the hash */
+ flow_vm_vlan = ntohl(input.dword);
-/**
- * ixgbe_atr_get_dst_port_82599 - Gets the destination port
- * @input: input stream to modify
- * @dst_port: the destination port to load
- *
- * Even though the input is given in big-endian, the FDIRPORT registers
- * expect the ports to be programmed in little-endian. Hence the need to swap
- * endianness when retrieving the data. This can be confusing since the
- * internal hash engine expects it to be big-endian.
- **/
-static s32 ixgbe_atr_get_dst_port_82599(struct ixgbe_atr_input *input,
- u16 *dst_port)
-{
- *dst_port = input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET] << 8;
- *dst_port |= input->byte_stream[IXGBE_ATR_DST_PORT_OFFSET + 1];
+ /* generate common hash dword */
+ hi_hash_dword = ntohl(common.dword);
- return 0;
-}
+ /* low dword is word swapped version of common */
+ lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
-/**
- * ixgbe_atr_get_flex_byte_82599 - Gets the flexible bytes
- * @input: input stream to modify
- * @flex_bytes: the flexible bytes to load
- **/
-static s32 ixgbe_atr_get_flex_byte_82599(struct ixgbe_atr_input *input,
- u16 *flex_byte)
-{
- *flex_byte = input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET];
- *flex_byte |= input->byte_stream[IXGBE_ATR_FLEX_BYTE_OFFSET + 1] << 8;
+ /* apply flow ID/VM pool/VLAN ID bits to hash words */
+ hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
- return 0;
-}
+ /* Process bits 0 and 16 */
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(0);
-/**
- * ixgbe_atr_get_l4type_82599 - Gets the layer 4 packet type
- * @input: input stream to modify
- * @l4type: the layer 4 type value to load
- **/
-static s32 ixgbe_atr_get_l4type_82599(struct ixgbe_atr_input *input,
- u8 *l4type)
-{
- *l4type = input->byte_stream[IXGBE_ATR_L4TYPE_OFFSET];
+ /*
+ * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+ * delay this because bit 0 of the stream should not be processed
+ * so we do not add the vlan until after bit 0 was processed
+ */
+ lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+
+ /* Process remaining 30 bit of the key */
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(1);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(2);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(3);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(4);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(5);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(6);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(7);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(8);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(9);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(10);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(11);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(12);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(13);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(14);
+ IXGBE_COMPUTE_SIG_HASH_ITERATION(15);
+
+ /* combine common_hash result with signature and bucket hashes */
+ bucket_hash ^= common_hash;
+ bucket_hash &= IXGBE_ATR_HASH_MASK;
- return 0;
+ sig_hash ^= common_hash << 16;
+ sig_hash &= IXGBE_ATR_HASH_MASK << 16;
+
+ /* return completed signature hash */
+ return sig_hash ^ bucket_hash;
}
/**
* ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter
* @hw: pointer to hardware structure
- * @stream: input bitstream
+ * @input: unique input dword
+ * @common: compressed common input dword
* @queue: queue index to direct traffic to
**/
s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
- struct ixgbe_atr_input *input,
+ union ixgbe_atr_hash_dword input,
+ union ixgbe_atr_hash_dword common,
u8 queue)
{
u64 fdirhashcmd;
- u64 fdircmd;
- u32 fdirhash;
- u16 bucket_hash, sig_hash;
- u8 l4type;
-
- bucket_hash = ixgbe_atr_compute_hash_82599(input,
- IXGBE_ATR_BUCKET_HASH_KEY);
-
- /* bucket_hash is only 15 bits */
- bucket_hash &= IXGBE_ATR_HASH_MASK;
-
- sig_hash = ixgbe_atr_compute_hash_82599(input,
- IXGBE_ATR_SIGNATURE_HASH_KEY);
-
- /* Get the l4type in order to program FDIRCMD properly */
- /* lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6 */
- ixgbe_atr_get_l4type_82599(input, &l4type);
+ u32 fdircmd;
/*
- * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits
- * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH.
+ * Get the flow_type in order to program FDIRCMD properly
+ * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6
*/
- fdirhash = sig_hash << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT | bucket_hash;
-
- fdircmd = (IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
- IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN);
-
- switch (l4type & IXGBE_ATR_L4TYPE_MASK) {
- case IXGBE_ATR_L4TYPE_TCP:
- fdircmd |= IXGBE_FDIRCMD_L4TYPE_TCP;
- break;
- case IXGBE_ATR_L4TYPE_UDP:
- fdircmd |= IXGBE_FDIRCMD_L4TYPE_UDP;
- break;
- case IXGBE_ATR_L4TYPE_SCTP:
- fdircmd |= IXGBE_FDIRCMD_L4TYPE_SCTP;
+ switch (input.formatted.flow_type) {
+ case IXGBE_ATR_FLOW_TYPE_TCPV4:
+ case IXGBE_ATR_FLOW_TYPE_UDPV4:
+ case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+ case IXGBE_ATR_FLOW_TYPE_TCPV6:
+ case IXGBE_ATR_FLOW_TYPE_UDPV6:
+ case IXGBE_ATR_FLOW_TYPE_SCTPV6:
break;
default:
- hw_dbg(hw, "Error on l4type input\n");
+ hw_dbg(hw, " Error on flow type input\n");
return IXGBE_ERR_CONFIG;
}
- if (l4type & IXGBE_ATR_L4TYPE_IPV6_MASK)
- fdircmd |= IXGBE_FDIRCMD_IPV6;
+ /* configure FDIRCMD register */
+ fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
+ IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+ fdircmd |= input.formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
+ fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
- fdircmd |= ((u64)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT);
- fdirhashcmd = ((fdircmd << 32) | fdirhash);
+ /*
+ * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits
+ * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH.
+ */
+ fdirhashcmd = (u64)fdircmd << 32;
+ fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common);
IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd);
+ hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd);
+
return 0;
}
/**
+ * ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks
+ * @input_mask: mask to be bit swapped
+ *
+ * The source and destination port masks for flow director are bit swapped
+ * in that bit 15 effects bit 0, 14 effects 1, 13, 2 etc. In order to
+ * generate a correctly swapped value we need to bit swap the mask and that
+ * is what is accomplished by this function.
+ **/
+static u32 ixgbe_get_fdirtcpm_82599(struct ixgbe_atr_input_masks *input_masks)
+{
+ u32 mask = ntohs(input_masks->dst_port_mask);
+ mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT;
+ mask |= ntohs(input_masks->src_port_mask);
+ mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1);
+ mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2);
+ mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4);
+ return ((mask & 0x00FF00FF) << 8) | ((mask & 0xFF00FF00) >> 8);
+}
+
+/*
+ * These two macros are meant to address the fact that we have registers
+ * that are either all or in part big-endian. As a result on big-endian
+ * systems we will end up byte swapping the value to little-endian before
+ * it is byte swapped again and written to the hardware in the original
+ * big-endian format.
+ */
+#define IXGBE_STORE_AS_BE32(_value) \
+ (((u32)(_value) >> 24) | (((u32)(_value) & 0x00FF0000) >> 8) | \
+ (((u32)(_value) & 0x0000FF00) << 8) | ((u32)(_value) << 24))
+
+#define IXGBE_WRITE_REG_BE32(a, reg, value) \
+ IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(ntohl(value)))
+
+#define IXGBE_STORE_AS_BE16(_value) \
+ (((u16)(_value) >> 8) | ((u16)(_value) << 8))
+
+/**
* ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter
* @hw: pointer to hardware structure
* @input: input bitstream
@@ -1687,135 +1522,139 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
* hardware writes must be protected from one another.
**/
s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
- struct ixgbe_atr_input *input,
+ union ixgbe_atr_input *input,
struct ixgbe_atr_input_masks *input_masks,
u16 soft_id, u8 queue)
{
- u32 fdircmd = 0;
u32 fdirhash;
- u32 src_ipv4 = 0, dst_ipv4 = 0;
- u32 src_ipv6_1, src_ipv6_2, src_ipv6_3, src_ipv6_4;
- u16 src_port, dst_port, vlan_id, flex_bytes;
- u16 bucket_hash;
- u8 l4type;
- u8 fdirm = 0;
-
- /* Get our input values */
- ixgbe_atr_get_l4type_82599(input, &l4type);
+ u32 fdircmd;
+ u32 fdirport, fdirtcpm;
+ u32 fdirvlan;
+ /* start with VLAN, flex bytes, VM pool, and IPv6 destination masked */
+ u32 fdirm = IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP | IXGBE_FDIRM_FLEX |
+ IXGBE_FDIRM_POOL | IXGBE_FDIRM_DIPv6;
/*
- * Check l4type formatting, and bail out before we touch the hardware
+ * Check flow_type formatting, and bail out before we touch the hardware
* if there's a configuration issue
*/
- switch (l4type & IXGBE_ATR_L4TYPE_MASK) {
- case IXGBE_ATR_L4TYPE_TCP:
- fdircmd |= IXGBE_FDIRCMD_L4TYPE_TCP;
- break;
- case IXGBE_ATR_L4TYPE_UDP:
- fdircmd |= IXGBE_FDIRCMD_L4TYPE_UDP;
- break;
- case IXGBE_ATR_L4TYPE_SCTP:
- fdircmd |= IXGBE_FDIRCMD_L4TYPE_SCTP;
+ switch (input->formatted.flow_type) {
+ case IXGBE_ATR_FLOW_TYPE_IPV4:
+ /* use the L4 protocol mask for raw IPv4/IPv6 traffic */
+ fdirm |= IXGBE_FDIRM_L4P;
+ case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+ if (input_masks->dst_port_mask || input_masks->src_port_mask) {
+ hw_dbg(hw, " Error on src/dst port mask\n");
+ return IXGBE_ERR_CONFIG;
+ }
+ case IXGBE_ATR_FLOW_TYPE_TCPV4:
+ case IXGBE_ATR_FLOW_TYPE_UDPV4:
break;
default:
- hw_dbg(hw, "Error on l4type input\n");
+ hw_dbg(hw, " Error on flow type input\n");
return IXGBE_ERR_CONFIG;
}
- bucket_hash = ixgbe_atr_compute_hash_82599(input,
- IXGBE_ATR_BUCKET_HASH_KEY);
-
- /* bucket_hash is only 15 bits */
- bucket_hash &= IXGBE_ATR_HASH_MASK;
-
- ixgbe_atr_get_vlan_id_82599(input, &vlan_id);
- ixgbe_atr_get_src_port_82599(input, &src_port);
- ixgbe_atr_get_dst_port_82599(input, &dst_port);
- ixgbe_atr_get_flex_byte_82599(input, &flex_bytes);
-
- fdirhash = soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT | bucket_hash;
-
- /* Now figure out if we're IPv4 or IPv6 */
- if (l4type & IXGBE_ATR_L4TYPE_IPV6_MASK) {
- /* IPv6 */
- ixgbe_atr_get_src_ipv6_82599(input, &src_ipv6_1, &src_ipv6_2,
- &src_ipv6_3, &src_ipv6_4);
-
- IXGBE_WRITE_REG(hw, IXGBE_FDIRSIPv6(0), src_ipv6_1);
- IXGBE_WRITE_REG(hw, IXGBE_FDIRSIPv6(1), src_ipv6_2);
- IXGBE_WRITE_REG(hw, IXGBE_FDIRSIPv6(2), src_ipv6_3);
- /* The last 4 bytes is the same register as IPv4 */
- IXGBE_WRITE_REG(hw, IXGBE_FDIRIPSA, src_ipv6_4);
-
- fdircmd |= IXGBE_FDIRCMD_IPV6;
- fdircmd |= IXGBE_FDIRCMD_IPv6DMATCH;
- } else {
- /* IPv4 */
- ixgbe_atr_get_src_ipv4_82599(input, &src_ipv4);
- IXGBE_WRITE_REG(hw, IXGBE_FDIRIPSA, src_ipv4);
- }
-
- ixgbe_atr_get_dst_ipv4_82599(input, &dst_ipv4);
- IXGBE_WRITE_REG(hw, IXGBE_FDIRIPDA, dst_ipv4);
-
- IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, (vlan_id |
- (flex_bytes << IXGBE_FDIRVLAN_FLEX_SHIFT)));
- IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, (src_port |
- (dst_port << IXGBE_FDIRPORT_DESTINATION_SHIFT)));
-
/*
- * Program the relevant mask registers. L4type cannot be
- * masked out in this implementation.
+ * Program the relevant mask registers. If src/dst_port or src/dst_addr
+ * are zero, then assume a full mask for that field. Also assume that
+ * a VLAN of 0 is unspecified, so mask that out as well. L4type
+ * cannot be masked out in this implementation.
*
* This also assumes IPv4 only. IPv6 masking isn't supported at this
* point in time.
*/
- IXGBE_WRITE_REG(hw, IXGBE_FDIRSIP4M, input_masks->src_ip_mask);
- IXGBE_WRITE_REG(hw, IXGBE_FDIRDIP4M, input_masks->dst_ip_mask);
-
- switch (l4type & IXGBE_ATR_L4TYPE_MASK) {
- case IXGBE_ATR_L4TYPE_TCP:
- IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, input_masks->src_port_mask);
- IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM,
- (IXGBE_READ_REG(hw, IXGBE_FDIRTCPM) |
- (input_masks->dst_port_mask << 16)));
+
+ /* Program FDIRM */
+ switch (ntohs(input_masks->vlan_id_mask) & 0xEFFF) {
+ case 0xEFFF:
+ /* Unmask VLAN ID - bit 0 and fall through to unmask prio */
+ fdirm &= ~IXGBE_FDIRM_VLANID;
+ case 0xE000:
+ /* Unmask VLAN prio - bit 1 */
+ fdirm &= ~IXGBE_FDIRM_VLANP;
break;
- case IXGBE_ATR_L4TYPE_UDP:
- IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, input_masks->src_port_mask);
- IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM,
- (IXGBE_READ_REG(hw, IXGBE_FDIRUDPM) |
- (input_masks->src_port_mask << 16)));
+ case 0x0FFF:
+ /* Unmask VLAN ID - bit 0 */
+ fdirm &= ~IXGBE_FDIRM_VLANID;
break;
- default:
- /* this already would have failed above */
+ case 0x0000:
+ /* do nothing, vlans already masked */
break;
+ default:
+ hw_dbg(hw, " Error on VLAN mask\n");
+ return IXGBE_ERR_CONFIG;
}
- /* Program the last mask register, FDIRM */
- if (input_masks->vlan_id_mask)
- /* Mask both VLAN and VLANP - bits 0 and 1 */
- fdirm |= 0x3;
-
- if (input_masks->data_mask)
- /* Flex bytes need masking, so mask the whole thing - bit 4 */
- fdirm |= 0x10;
+ if (input_masks->flex_mask & 0xFFFF) {
+ if ((input_masks->flex_mask & 0xFFFF) != 0xFFFF) {
+ hw_dbg(hw, " Error on flexible byte mask\n");
+ return IXGBE_ERR_CONFIG;
+ }
+ /* Unmask Flex Bytes - bit 4 */
+ fdirm &= ~IXGBE_FDIRM_FLEX;
+ }
/* Now mask VM pool and destination IPv6 - bits 5 and 2 */
- fdirm |= 0x24;
-
IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
- fdircmd |= IXGBE_FDIRCMD_CMD_ADD_FLOW;
- fdircmd |= IXGBE_FDIRCMD_FILTER_UPDATE;
- fdircmd |= IXGBE_FDIRCMD_LAST;
- fdircmd |= IXGBE_FDIRCMD_QUEUE_EN;
- fdircmd |= queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+ /* store the TCP/UDP port masks, bit reversed from port layout */
+ fdirtcpm = ixgbe_get_fdirtcpm_82599(input_masks);
+
+ /* write both the same so that UDP and TCP use the same mask */
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm);
+
+ /* store source and destination IP masks (big-enian) */
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M,
+ ~input_masks->src_ip_mask[0]);
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M,
+ ~input_masks->dst_ip_mask[0]);
+
+ /* Apply masks to input data */
+ input->formatted.vlan_id &= input_masks->vlan_id_mask;
+ input->formatted.flex_bytes &= input_masks->flex_mask;
+ input->formatted.src_port &= input_masks->src_port_mask;
+ input->formatted.dst_port &= input_masks->dst_port_mask;
+ input->formatted.src_ip[0] &= input_masks->src_ip_mask[0];
+ input->formatted.dst_ip[0] &= input_masks->dst_ip_mask[0];
+
+ /* record vlan (little-endian) and flex_bytes(big-endian) */
+ fdirvlan =
+ IXGBE_STORE_AS_BE16(ntohs(input->formatted.flex_bytes));
+ fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
+ fdirvlan |= ntohs(input->formatted.vlan_id);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
+
+ /* record source and destination port (little-endian)*/
+ fdirport = ntohs(input->formatted.dst_port);
+ fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT;
+ fdirport |= ntohs(input->formatted.src_port);
+ IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport);
+
+ /* record the first 32 bits of the destination address (big-endian) */
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
+
+ /* record the source address (big-endian) */
+ IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]);
+
+ /* configure FDIRCMD register */
+ fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
+ IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+ fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
+ fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+
+ /* we only want the bucket hash so drop the upper 16 bits */
+ fdirhash = ixgbe_atr_compute_hash_82599(input,
+ IXGBE_ATR_BUCKET_HASH_KEY);
+ fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd);
return 0;
}
+
/**
* ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register
* @hw: pointer to hardware structure
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 23ff23e..2002ea8 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -1477,9 +1477,7 @@ static void ixgbe_free_desc_rings(struct ixgbe_adapter *adapter)
reg_ctl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
reg_ctl &= ~IXGBE_RXCTRL_RXEN;
IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, reg_ctl);
- reg_ctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rx_ring->reg_idx));
- reg_ctl &= ~IXGBE_RXDCTL_ENABLE;
- IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rx_ring->reg_idx), reg_ctl);
+ ixgbe_disable_rx_queue(adapter, rx_ring);
/* now Tx */
reg_ctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(tx_ring->reg_idx));
@@ -2279,10 +2277,11 @@ static int ixgbe_set_rx_ntuple(struct net_device *dev,
struct ethtool_rx_ntuple *cmd)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
- struct ethtool_rx_ntuple_flow_spec fs = cmd->fs;
- struct ixgbe_atr_input input_struct;
+ struct ethtool_rx_ntuple_flow_spec *fs = &cmd->fs;
+ union ixgbe_atr_input input_struct;
struct ixgbe_atr_input_masks input_masks;
int target_queue;
+ int err;
if (adapter->hw.mac.type == ixgbe_mac_82598EB)
return -EOPNOTSUPP;
@@ -2291,67 +2290,122 @@ static int ixgbe_set_rx_ntuple(struct net_device *dev,
* Don't allow programming if the action is a queue greater than
* the number of online Tx queues.
*/
- if ((fs.action >= adapter->num_tx_queues) ||
- (fs.action < ETHTOOL_RXNTUPLE_ACTION_DROP))
+ if ((fs->action >= adapter->num_tx_queues) ||
+ (fs->action < ETHTOOL_RXNTUPLE_ACTION_DROP))
return -EINVAL;
- memset(&input_struct, 0, sizeof(struct ixgbe_atr_input));
+ memset(&input_struct, 0, sizeof(union ixgbe_atr_input));
memset(&input_masks, 0, sizeof(struct ixgbe_atr_input_masks));
- input_masks.src_ip_mask = fs.m_u.tcp_ip4_spec.ip4src;
- input_masks.dst_ip_mask = fs.m_u.tcp_ip4_spec.ip4dst;
- input_masks.src_port_mask = fs.m_u.tcp_ip4_spec.psrc;
- input_masks.dst_port_mask = fs.m_u.tcp_ip4_spec.pdst;
- input_masks.vlan_id_mask = fs.vlan_tag_mask;
- /* only use the lowest 2 bytes for flex bytes */
- input_masks.data_mask = (fs.data_mask & 0xffff);
-
- switch (fs.flow_type) {
+ /* record flow type */
+ switch (fs->flow_type) {
+ case IPV4_FLOW:
+ input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_IPV4;
+ break;
case TCP_V4_FLOW:
- ixgbe_atr_set_l4type_82599(&input_struct, IXGBE_ATR_L4TYPE_TCP);
+ input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
break;
case UDP_V4_FLOW:
- ixgbe_atr_set_l4type_82599(&input_struct, IXGBE_ATR_L4TYPE_UDP);
+ input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
break;
case SCTP_V4_FLOW:
- ixgbe_atr_set_l4type_82599(&input_struct, IXGBE_ATR_L4TYPE_SCTP);
+ input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
break;
default:
return -1;
}
- /* Mask bits from the inputs based on user-supplied mask */
- ixgbe_atr_set_src_ipv4_82599(&input_struct,
- (fs.h_u.tcp_ip4_spec.ip4src & ~fs.m_u.tcp_ip4_spec.ip4src));
- ixgbe_atr_set_dst_ipv4_82599(&input_struct,
- (fs.h_u.tcp_ip4_spec.ip4dst & ~fs.m_u.tcp_ip4_spec.ip4dst));
- /* 82599 expects these to be byte-swapped for perfect filtering */
- ixgbe_atr_set_src_port_82599(&input_struct,
- ((ntohs(fs.h_u.tcp_ip4_spec.psrc)) & ~fs.m_u.tcp_ip4_spec.psrc));
- ixgbe_atr_set_dst_port_82599(&input_struct,
- ((ntohs(fs.h_u.tcp_ip4_spec.pdst)) & ~fs.m_u.tcp_ip4_spec.pdst));
-
- /* VLAN and Flex bytes are either completely masked or not */
- if (!fs.vlan_tag_mask)
- ixgbe_atr_set_vlan_id_82599(&input_struct, fs.vlan_tag);
-
- if (!input_masks.data_mask)
- /* make sure we only use the first 2 bytes of user data */
- ixgbe_atr_set_flex_byte_82599(&input_struct,
- (fs.data & 0xffff));
+ /* copy vlan tag minus the CFI bit */
+ if ((fs->vlan_tag & 0xEFFF) || (~fs->vlan_tag_mask & 0xEFFF)) {
+ input_struct.formatted.vlan_id = htons(fs->vlan_tag & 0xEFFF);
+ if (!fs->vlan_tag_mask) {
+ input_masks.vlan_id_mask = htons(0xEFFF);
+ } else {
+ switch (~fs->vlan_tag_mask & 0xEFFF) {
+ /* all of these are valid vlan-mask values */
+ case 0xEFFF:
+ case 0xE000:
+ case 0x0FFF:
+ case 0x0000:
+ input_masks.vlan_id_mask =
+ htons(~fs->vlan_tag_mask);
+ break;
+ /* exit with error if vlan-mask is invalid */
+ default:
+ e_err(drv, "Partial VLAN ID or "
+ "priority mask in vlan-mask is not "
+ "supported by hardware\n");
+ return -1;
+ }
+ }
+ }
+
+ /* make sure we only use the first 2 bytes of user data */
+ if ((fs->data & 0xFFFF) || (~fs->data_mask & 0xFFFF)) {
+ input_struct.formatted.flex_bytes = htons(fs->data & 0xFFFF);
+ if (!(fs->data_mask & 0xFFFF)) {
+ input_masks.flex_mask = 0xFFFF;
+ } else if (~fs->data_mask & 0xFFFF) {
+ e_err(drv, "Partial user-def-mask is not "
+ "supported by hardware\n");
+ return -1;
+ }
+ }
+
+ /*
+ * Copy input into formatted structures
+ *
+ * These assignments are based on the following logic
+ * If neither input or mask are set assume value is masked out.
+ * If input is set, but mask is not mask should default to accept all.
+ * If input is not set, but mask is set then mask likely results in 0.
+ * If input is set and mask is set then assign both.
+ */
+ if (fs->h_u.tcp_ip4_spec.ip4src || ~fs->m_u.tcp_ip4_spec.ip4src) {
+ input_struct.formatted.src_ip[0] = fs->h_u.tcp_ip4_spec.ip4src;
+ if (!fs->m_u.tcp_ip4_spec.ip4src)
+ input_masks.src_ip_mask[0] = 0xFFFFFFFF;
+ else
+ input_masks.src_ip_mask[0] =
+ ~fs->m_u.tcp_ip4_spec.ip4src;
+ }
+ if (fs->h_u.tcp_ip4_spec.ip4dst || ~fs->m_u.tcp_ip4_spec.ip4dst) {
+ input_struct.formatted.dst_ip[0] = fs->h_u.tcp_ip4_spec.ip4dst;
+ if (!fs->m_u.tcp_ip4_spec.ip4dst)
+ input_masks.dst_ip_mask[0] = 0xFFFFFFFF;
+ else
+ input_masks.dst_ip_mask[0] =
+ ~fs->m_u.tcp_ip4_spec.ip4dst;
+ }
+ if (fs->h_u.tcp_ip4_spec.psrc || ~fs->m_u.tcp_ip4_spec.psrc) {
+ input_struct.formatted.src_port = fs->h_u.tcp_ip4_spec.psrc;
+ if (!fs->m_u.tcp_ip4_spec.psrc)
+ input_masks.src_port_mask = 0xFFFF;
+ else
+ input_masks.src_port_mask = ~fs->m_u.tcp_ip4_spec.psrc;
+ }
+ if (fs->h_u.tcp_ip4_spec.pdst || ~fs->m_u.tcp_ip4_spec.pdst) {
+ input_struct.formatted.dst_port = fs->h_u.tcp_ip4_spec.pdst;
+ if (!fs->m_u.tcp_ip4_spec.pdst)
+ input_masks.dst_port_mask = 0xFFFF;
+ else
+ input_masks.dst_port_mask = ~fs->m_u.tcp_ip4_spec.pdst;
+ }
/* determine if we need to drop or route the packet */
- if (fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
+ if (fs->action == ETHTOOL_RXNTUPLE_ACTION_DROP)
target_queue = MAX_RX_QUEUES - 1;
else
- target_queue = fs.action;
+ target_queue = fs->action;
spin_lock(&adapter->fdir_perfect_lock);
- ixgbe_fdir_add_perfect_filter_82599(&adapter->hw, &input_struct,
- &input_masks, 0, target_queue);
+ err = ixgbe_fdir_add_perfect_filter_82599(&adapter->hw,
+ &input_struct,
+ &input_masks, 0,
+ target_queue);
spin_unlock(&adapter->fdir_perfect_lock);
- return 0;
+ return err ? -1 : 0;
}
static const struct ethtool_ops ixgbe_ethtool_ops = {
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 38ab4f3..a060610 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3024,6 +3024,36 @@ static void ixgbe_rx_desc_queue_enable(struct ixgbe_adapter *adapter,
}
}
+void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter,
+ struct ixgbe_ring *ring)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ int wait_loop = IXGBE_MAX_RX_DESC_POLL;
+ u32 rxdctl;
+ u8 reg_idx = ring->reg_idx;
+
+ rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+ rxdctl &= ~IXGBE_RXDCTL_ENABLE;
+
+ /* write value back with RXDCTL.ENABLE bit cleared */
+ IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
+
+ if (hw->mac.type == ixgbe_mac_82598EB &&
+ !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
+ return;
+
+ /* the hardware may take up to 100us to really disable the rx queue */
+ do {
+ udelay(10);
+ rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+ } while (--wait_loop && (rxdctl & IXGBE_RXDCTL_ENABLE));
+
+ if (!wait_loop) {
+ e_err(drv, "RXDCTL.ENABLE on Rx queue %d not cleared within "
+ "the polling period\n", reg_idx);
+ }
+}
+
void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
struct ixgbe_ring *ring)
{
@@ -3034,9 +3064,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
/* disable queue to avoid issues while updating state */
rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
- IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx),
- rxdctl & ~IXGBE_RXDCTL_ENABLE);
- IXGBE_WRITE_FLUSH(hw);
+ ixgbe_disable_rx_queue(adapter, ring);
IXGBE_WRITE_REG(hw, IXGBE_RDBAL(reg_idx), (rdba & DMA_BIT_MASK(32)));
IXGBE_WRITE_REG(hw, IXGBE_RDBAH(reg_idx), (rdba >> 32));
@@ -4064,7 +4092,11 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
- IXGBE_WRITE_FLUSH(hw);
+ /* disable all enabled rx queues */
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ /* this call also flushes the previous write */
+ ixgbe_disable_rx_queue(adapter, adapter->rx_ring[i]);
+
msleep(10);
netif_tx_stop_all_queues(netdev);
@@ -4789,6 +4821,12 @@ static int ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+ if (adapter->flags & (IXGBE_FLAG_FDIR_HASH_CAPABLE |
+ IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
+ e_err(probe,
+ "Flow Director is not supported while multiple "
+ "queues are disabled. Disabling Flow Director\n");
+ }
adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
adapter->atr_sample_rate = 0;
@@ -5094,16 +5132,11 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM)
adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
- if (dev->features & NETIF_F_NTUPLE) {
- /* Flow Director perfect filter enabled */
- adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
- adapter->atr_sample_rate = 0;
- spin_lock_init(&adapter->fdir_perfect_lock);
- } else {
- /* Flow Director hash filters enabled */
- adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
- adapter->atr_sample_rate = 20;
- }
+ /* n-tuple support exists, always init our spinlock */
+ spin_lock_init(&adapter->fdir_perfect_lock);
+ /* Flow Director hash filters enabled */
+ adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
+ adapter->atr_sample_rate = 20;
adapter->ring_feature[RING_F_FDIR].indices =
IXGBE_MAX_FDIR_INDICES;
adapter->fdir_pballoc = 0;
@@ -6474,38 +6507,92 @@ static void ixgbe_tx_queue(struct ixgbe_ring *tx_ring,
writel(i, tx_ring->tail);
}
-static void ixgbe_atr(struct ixgbe_adapter *adapter, struct sk_buff *skb,
- u8 queue, u32 tx_flags, __be16 protocol)
+static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb,
+ u32 tx_flags, __be16 protocol)
{
- struct ixgbe_atr_input atr_input;
- struct iphdr *iph = ip_hdr(skb);
- struct ethhdr *eth = (struct ethhdr *)skb->data;
+ struct ixgbe_q_vector *q_vector = ring->q_vector;
+ union ixgbe_atr_hash_dword input = { .dword = 0 };
+ union ixgbe_atr_hash_dword common = { .dword = 0 };
+ union {
+ unsigned char *network;
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ } hdr;
struct tcphdr *th;
- u16 vlan_id;
+ __be16 vlan_id;
- /* Right now, we support IPv4 w/ TCP only */
- if (protocol != htons(ETH_P_IP) ||
- iph->protocol != IPPROTO_TCP)
+ /* if ring doesn't have a interrupt vector, cannot perform ATR */
+ if (!q_vector)
return;
- memset(&atr_input, 0, sizeof(struct ixgbe_atr_input));
+ /* do nothing if sampling is disabled */
+ if (!ring->atr_sample_rate)
+ return;
- vlan_id = (tx_flags & IXGBE_TX_FLAGS_VLAN_MASK) >>
- IXGBE_TX_FLAGS_VLAN_SHIFT;
+ ring->atr_count++;
+
+ /* snag network header to get L4 type and address */
+ hdr.network = skb_network_header(skb);
+
+ /* Currently only IPv4/IPv6 with TCP is supported */
+ if ((protocol != __constant_htons(ETH_P_IPV6) ||
+ hdr.ipv6->nexthdr != IPPROTO_TCP) &&
+ (protocol != __constant_htons(ETH_P_IP) ||
+ hdr.ipv4->protocol != IPPROTO_TCP))
+ return;
th = tcp_hdr(skb);
- ixgbe_atr_set_vlan_id_82599(&atr_input, vlan_id);
- ixgbe_atr_set_src_port_82599(&atr_input, th->dest);
- ixgbe_atr_set_dst_port_82599(&atr_input, th->source);
- ixgbe_atr_set_flex_byte_82599(&atr_input, eth->h_proto);
- ixgbe_atr_set_l4type_82599(&atr_input, IXGBE_ATR_L4TYPE_TCP);
- /* src and dst are inverted, think how the receiver sees them */
- ixgbe_atr_set_src_ipv4_82599(&atr_input, iph->daddr);
- ixgbe_atr_set_dst_ipv4_82599(&atr_input, iph->saddr);
+ /* skip this packet since the socket is closing */
+ if (th->fin)
+ return;
+
+ /* sample on all syn packets or once every atr sample count */
+ if (!th->syn && (ring->atr_count < ring->atr_sample_rate))
+ return;
+
+ /* reset sample count */
+ ring->atr_count = 0;
+
+ vlan_id = htons(tx_flags >> IXGBE_TX_FLAGS_VLAN_SHIFT);
+
+ /*
+ * src and dst are inverted, think how the receiver sees them
+ *
+ * The input is broken into two sections, a non-compressed section
+ * containing vm_pool, vlan_id, and flow_type. The rest of the data
+ * is XORed together and stored in the compressed dword.
+ */
+ input.formatted.vlan_id = vlan_id;
+
+ /*
+ * since src port and flex bytes occupy the same word XOR them together
+ * and write the value to source port portion of compressed dword
+ */
+ if (vlan_id)
+ common.port.src ^= th->dest ^ __constant_htons(ETH_P_8021Q);
+ else
+ common.port.src ^= th->dest ^ protocol;
+ common.port.dst ^= th->source;
+
+ if (protocol == __constant_htons(ETH_P_IP)) {
+ input.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+ common.ip ^= hdr.ipv4->saddr ^ hdr.ipv4->daddr;
+ } else {
+ input.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV6;
+ common.ip ^= hdr.ipv6->saddr.s6_addr32[0] ^
+ hdr.ipv6->saddr.s6_addr32[1] ^
+ hdr.ipv6->saddr.s6_addr32[2] ^
+ hdr.ipv6->saddr.s6_addr32[3] ^
+ hdr.ipv6->daddr.s6_addr32[0] ^
+ hdr.ipv6->daddr.s6_addr32[1] ^
+ hdr.ipv6->daddr.s6_addr32[2] ^
+ hdr.ipv6->daddr.s6_addr32[3];
+ }
/* This assumes the Rx queue and Tx queue are bound to the same CPU */
- ixgbe_fdir_add_signature_filter_82599(&adapter->hw, &atr_input, queue);
+ ixgbe_fdir_add_signature_filter_82599(&q_vector->adapter->hw,
+ input, common, ring->queue_index);
}
static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, int size)
@@ -6676,16 +6763,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
count = ixgbe_tx_map(adapter, tx_ring, skb, tx_flags, first, hdr_len);
if (count) {
/* add the ATR filter if ATR is on */
- if (tx_ring->atr_sample_rate) {
- ++tx_ring->atr_count;
- if ((tx_ring->atr_count >= tx_ring->atr_sample_rate) &&
- test_bit(__IXGBE_TX_FDIR_INIT_DONE,
- &tx_ring->state)) {
- ixgbe_atr(adapter, skb, tx_ring->queue_index,
- tx_flags, protocol);
- tx_ring->atr_count = 0;
- }
- }
+ if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
+ ixgbe_atr(tx_ring, skb, tx_flags, protocol);
txq = netdev_get_tx_queue(netdev, tx_ring->queue_index);
txq->tx_bytes += skb->len;
txq->tx_packets++;
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index 446f3467..fd3358f 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -1947,10 +1947,9 @@ enum ixgbe_fdir_pballoc_type {
#define IXGBE_FDIRM_VLANID 0x00000001
#define IXGBE_FDIRM_VLANP 0x00000002
#define IXGBE_FDIRM_POOL 0x00000004
-#define IXGBE_FDIRM_L3P 0x00000008
-#define IXGBE_FDIRM_L4P 0x00000010
-#define IXGBE_FDIRM_FLEX 0x00000020
-#define IXGBE_FDIRM_DIPv6 0x00000040
+#define IXGBE_FDIRM_L4P 0x00000008
+#define IXGBE_FDIRM_FLEX 0x00000010
+#define IXGBE_FDIRM_DIPv6 0x00000020
#define IXGBE_FDIRFREE_FREE_MASK 0xFFFF
#define IXGBE_FDIRFREE_FREE_SHIFT 0
@@ -1990,6 +1989,7 @@ enum ixgbe_fdir_pballoc_type {
#define IXGBE_FDIRCMD_LAST 0x00000800
#define IXGBE_FDIRCMD_COLLISION 0x00001000
#define IXGBE_FDIRCMD_QUEUE_EN 0x00008000
+#define IXGBE_FDIRCMD_FLOW_TYPE_SHIFT 5
#define IXGBE_FDIRCMD_RX_QUEUE_SHIFT 16
#define IXGBE_FDIRCMD_VT_POOL_SHIFT 24
#define IXGBE_FDIR_INIT_DONE_POLL 10
@@ -2147,51 +2147,80 @@ typedef u32 ixgbe_physical_layer;
#define FC_LOW_WATER(MTU) (2 * (2 * PAUSE_MTU(MTU) + PAUSE_RTT))
/* Software ATR hash keys */
-#define IXGBE_ATR_BUCKET_HASH_KEY 0xE214AD3D
-#define IXGBE_ATR_SIGNATURE_HASH_KEY 0x14364D17
-
-/* Software ATR input stream offsets and masks */
-#define IXGBE_ATR_VLAN_OFFSET 0
-#define IXGBE_ATR_SRC_IPV6_OFFSET 2
-#define IXGBE_ATR_SRC_IPV4_OFFSET 14
-#define IXGBE_ATR_DST_IPV6_OFFSET 18
-#define IXGBE_ATR_DST_IPV4_OFFSET 30
-#define IXGBE_ATR_SRC_PORT_OFFSET 34
-#define IXGBE_ATR_DST_PORT_OFFSET 36
-#define IXGBE_ATR_FLEX_BYTE_OFFSET 38
-#define IXGBE_ATR_VM_POOL_OFFSET 40
-#define IXGBE_ATR_L4TYPE_OFFSET 41
+#define IXGBE_ATR_BUCKET_HASH_KEY 0x3DAD14E2
+#define IXGBE_ATR_SIGNATURE_HASH_KEY 0x174D3614
+/* Software ATR input stream values and masks */
+#define IXGBE_ATR_HASH_MASK 0x7fff
#define IXGBE_ATR_L4TYPE_MASK 0x3
-#define IXGBE_ATR_L4TYPE_IPV6_MASK 0x4
#define IXGBE_ATR_L4TYPE_UDP 0x1
#define IXGBE_ATR_L4TYPE_TCP 0x2
#define IXGBE_ATR_L4TYPE_SCTP 0x3
-#define IXGBE_ATR_HASH_MASK 0x7fff
+#define IXGBE_ATR_L4TYPE_IPV6_MASK 0x4
+enum ixgbe_atr_flow_type {
+ IXGBE_ATR_FLOW_TYPE_IPV4 = 0x0,
+ IXGBE_ATR_FLOW_TYPE_UDPV4 = 0x1,
+ IXGBE_ATR_FLOW_TYPE_TCPV4 = 0x2,
+ IXGBE_ATR_FLOW_TYPE_SCTPV4 = 0x3,
+ IXGBE_ATR_FLOW_TYPE_IPV6 = 0x4,
+ IXGBE_ATR_FLOW_TYPE_UDPV6 = 0x5,
+ IXGBE_ATR_FLOW_TYPE_TCPV6 = 0x6,
+ IXGBE_ATR_FLOW_TYPE_SCTPV6 = 0x7,
+};
/* Flow Director ATR input struct. */
-struct ixgbe_atr_input {
- /* Byte layout in order, all values with MSB first:
+union ixgbe_atr_input {
+ /*
+ * Byte layout in order, all values with MSB first:
*
+ * vm_pool - 1 byte
+ * flow_type - 1 byte
* vlan_id - 2 bytes
* src_ip - 16 bytes
* dst_ip - 16 bytes
* src_port - 2 bytes
* dst_port - 2 bytes
* flex_bytes - 2 bytes
- * vm_pool - 1 byte
- * l4type - 1 byte
+ * rsvd0 - 2 bytes - space reserved must be 0.
*/
- u8 byte_stream[42];
+ struct {
+ u8 vm_pool;
+ u8 flow_type;
+ __be16 vlan_id;
+ __be32 dst_ip[4];
+ __be32 src_ip[4];
+ __be16 src_port;
+ __be16 dst_port;
+ __be16 flex_bytes;
+ __be16 rsvd0;
+ } formatted;
+ __be32 dword_stream[11];
+};
+
+/* Flow Director compressed ATR hash input struct */
+union ixgbe_atr_hash_dword {
+ struct {
+ u8 vm_pool;
+ u8 flow_type;
+ __be16 vlan_id;
+ } formatted;
+ __be32 ip;
+ struct {
+ __be16 src;
+ __be16 dst;
+ } port;
+ __be16 flex_bytes;
+ __be32 dword;
};
struct ixgbe_atr_input_masks {
- u32 src_ip_mask;
- u32 dst_ip_mask;
- u16 src_port_mask;
- u16 dst_port_mask;
- u16 vlan_id_mask;
- u16 data_mask;
+ __be16 rsvd0;
+ __be16 vlan_id_mask;
+ __be32 dst_ip_mask[4];
+ __be32 src_ip_mask[4];
+ __be16 src_port_mask;
+ __be16 dst_port_mask;
+ __be16 flex_mask;
};
enum ixgbe_eeprom_type {
diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
index 8f4bf1f..3a4277f 100644
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c
@@ -178,6 +178,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
} else {
int i;
+ buf->direct.buf = NULL;
buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
buf->npages = buf->nbufs;
buf->page_shift = PAGE_SHIFT;
@@ -229,7 +230,7 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
buf->direct.map);
else {
- if (BITS_PER_LONG == 64)
+ if (BITS_PER_LONG == 64 && buf->direct.buf)
vunmap(buf->direct.buf);
for (i = 0; i < buf->nbufs; ++i)
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 6d6806b..897f576 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -972,7 +972,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
int i;
int err;
- dev = alloc_etherdev_mq(sizeof(struct mlx4_en_priv), prof->tx_ring_num);
+ dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
+ prof->tx_ring_num, prof->rx_ring_num);
if (dev == NULL) {
mlx4_err(mdev, "Net device allocation failed\n");
return -ENOMEM;
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 7a7e18b..5de1db8 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -289,10 +289,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET);
dev_cap->bf_reg_size = 1 << (field & 0x1f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET);
- if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size)) {
- mlx4_warn(dev, "firmware bug: log2 # of blue flame regs is invalid (%d), forcing 3\n", field & 0x1f);
+ if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size))
field = 3;
- }
dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 2c15891..e953793 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -1536,6 +1536,7 @@ static struct pcmcia_device_id pcnet_ids[] = {
PCMCIA_DEVICE_PROD_ID12("CONTEC", "C-NET(PC)C-10L", 0x21cab552, 0xf6f90722),
PCMCIA_DEVICE_PROD_ID12("corega", "FEther PCC-TXF", 0x0a21501a, 0xa51564a2),
PCMCIA_DEVICE_PROD_ID12("corega", "Ether CF-TD", 0x0a21501a, 0x6589340a),
+ PCMCIA_DEVICE_PROD_ID12("corega K.K.", "corega Ether CF-TD LAN Card", 0x5261440f, 0x8797663b),
PCMCIA_DEVICE_PROD_ID12("corega K.K.", "corega EtherII PCC-T", 0x5261440f, 0xfa9d85bd),
PCMCIA_DEVICE_PROD_ID12("corega K.K.", "corega EtherII PCC-TD", 0x5261440f, 0xc49bd73d),
PCMCIA_DEVICE_PROD_ID12("Corega K.K.", "corega EtherII PCC-TD", 0xd4fdcbd8, 0xc49bd73d),
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
index 78d70a6..a1b82c9 100644
--- a/drivers/net/ppp_async.c
+++ b/drivers/net/ppp_async.c
@@ -32,6 +32,7 @@
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/slab.h>
+#include <asm/unaligned.h>
#include <asm/uaccess.h>
#include <asm/string.h>
@@ -542,7 +543,7 @@ ppp_async_encode(struct asyncppp *ap)
data = ap->tpkt->data;
count = ap->tpkt->len;
fcs = ap->tfcs;
- proto = (data[0] << 8) + data[1];
+ proto = get_unaligned_be16(data);
/*
* LCP packets with code values between 1 (configure-reqest)
@@ -963,7 +964,7 @@ static void async_lcp_peek(struct asyncppp *ap, unsigned char *data,
code = data[0];
if (code != CONFACK && code != CONFREQ)
return;
- dlen = (data[2] << 8) + data[3];
+ dlen = get_unaligned_be16(data + 2);
if (len < dlen)
return; /* packet got truncated or length is bogus */
@@ -997,15 +998,14 @@ static void async_lcp_peek(struct asyncppp *ap, unsigned char *data,
while (dlen >= 2 && dlen >= data[1] && data[1] >= 2) {
switch (data[0]) {
case LCP_MRU:
- val = (data[2] << 8) + data[3];
+ val = get_unaligned_be16(data + 2);
if (inbound)
ap->mru = val;
else
ap->chan.mtu = val;
break;
case LCP_ASYNCMAP:
- val = (data[2] << 24) + (data[3] << 16)
- + (data[4] << 8) + data[5];
+ val = get_unaligned_be32(data + 2);
if (inbound)
ap->raccm = val;
else
diff --git a/drivers/net/ppp_deflate.c b/drivers/net/ppp_deflate.c
index 695bc83..4358330 100644
--- a/drivers/net/ppp_deflate.c
+++ b/drivers/net/ppp_deflate.c
@@ -41,6 +41,7 @@
#include <linux/ppp-comp.h>
#include <linux/zlib.h>
+#include <asm/unaligned.h>
/*
* State for a Deflate (de)compressor.
@@ -232,11 +233,9 @@ static int z_compress(void *arg, unsigned char *rptr, unsigned char *obuf,
*/
wptr[0] = PPP_ADDRESS(rptr);
wptr[1] = PPP_CONTROL(rptr);
- wptr[2] = PPP_COMP >> 8;
- wptr[3] = PPP_COMP;
+ put_unaligned_be16(PPP_COMP, wptr + 2);
wptr += PPP_HDRLEN;
- wptr[0] = state->seqno >> 8;
- wptr[1] = state->seqno;
+ put_unaligned_be16(state->seqno, wptr);
wptr += DEFLATE_OVHD;
olen = PPP_HDRLEN + DEFLATE_OVHD;
state->strm.next_out = wptr;
@@ -451,7 +450,7 @@ static int z_decompress(void *arg, unsigned char *ibuf, int isize,
}
/* Check the sequence number. */
- seq = (ibuf[PPP_HDRLEN] << 8) + ibuf[PPP_HDRLEN+1];
+ seq = get_unaligned_be16(ibuf + PPP_HDRLEN);
if (seq != (state->seqno & 0xffff)) {
if (state->debug)
printk(KERN_DEBUG "z_decompress%d: bad seq # %d, expected %d\n",
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 6456484..c7a6c44 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -46,6 +46,7 @@
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <asm/unaligned.h>
#include <net/slhc_vj.h>
#include <asm/atomic.h>
@@ -210,7 +211,7 @@ struct ppp_net {
};
/* Get the PPP protocol number from a skb */
-#define PPP_PROTO(skb) (((skb)->data[0] << 8) + (skb)->data[1])
+#define PPP_PROTO(skb) get_unaligned_be16((skb)->data)
/* We limit the length of ppp->file.rq to this (arbitrary) value */
#define PPP_MAX_RQLEN 32
@@ -964,8 +965,7 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
pp = skb_push(skb, 2);
proto = npindex_to_proto[npi];
- pp[0] = proto >> 8;
- pp[1] = proto;
+ put_unaligned_be16(proto, pp);
netif_stop_queue(dev);
skb_queue_tail(&ppp->file.xq, skb);
@@ -1473,8 +1473,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
q = skb_put(frag, flen + hdrlen);
/* make the MP header */
- q[0] = PPP_MP >> 8;
- q[1] = PPP_MP;
+ put_unaligned_be16(PPP_MP, q);
if (ppp->flags & SC_MP_XSHORTSEQ) {
q[2] = bits + ((ppp->nxseq >> 8) & 0xf);
q[3] = ppp->nxseq;
diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c
index 6d1a1b8..9a1849a 100644
--- a/drivers/net/ppp_mppe.c
+++ b/drivers/net/ppp_mppe.c
@@ -55,6 +55,7 @@
#include <linux/ppp_defs.h>
#include <linux/ppp-comp.h>
#include <linux/scatterlist.h>
+#include <asm/unaligned.h>
#include "ppp_mppe.h"
@@ -395,16 +396,14 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf,
*/
obuf[0] = PPP_ADDRESS(ibuf);
obuf[1] = PPP_CONTROL(ibuf);
- obuf[2] = PPP_COMP >> 8; /* isize + MPPE_OVHD + 1 */
- obuf[3] = PPP_COMP; /* isize + MPPE_OVHD + 2 */
+ put_unaligned_be16(PPP_COMP, obuf + 2);
obuf += PPP_HDRLEN;
state->ccount = (state->ccount + 1) % MPPE_CCOUNT_SPACE;
if (state->debug >= 7)
printk(KERN_DEBUG "mppe_compress[%d]: ccount %d\n", state->unit,
state->ccount);
- obuf[0] = state->ccount >> 8;
- obuf[1] = state->ccount & 0xff;
+ put_unaligned_be16(state->ccount, obuf);
if (!state->stateful || /* stateless mode */
((state->ccount & 0xff) == 0xff) || /* "flag" packet */
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index 4c95ec3..4e6b72f 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -45,6 +45,7 @@
#include <linux/completion.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <asm/unaligned.h>
#include <asm/uaccess.h>
#define PPP_VERSION "2.4.2"
@@ -563,7 +564,7 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb)
int islcp;
data = skb->data;
- proto = (data[0] << 8) + data[1];
+ proto = get_unaligned_be16(data);
/* LCP packets with codes between 1 (configure-request)
* and 7 (code-reject) must be sent as though no options
diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 9c2a02d..44e316f 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -34,8 +34,8 @@
#define _QLCNIC_LINUX_MAJOR 5
#define _QLCNIC_LINUX_MINOR 0
-#define _QLCNIC_LINUX_SUBVERSION 14
-#define QLCNIC_LINUX_VERSIONID "5.0.14"
+#define _QLCNIC_LINUX_SUBVERSION 15
+#define QLCNIC_LINUX_VERSIONID "5.0.15"
#define QLCNIC_DRV_IDC_VER 0x01
#define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\
(_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
@@ -289,6 +289,26 @@ struct uni_data_desc{
u32 reserved[5];
};
+/* Flash Defines and Structures */
+#define QLCNIC_FLT_LOCATION 0x3F1000
+#define QLCNIC_FW_IMAGE_REGION 0x74
+struct qlcnic_flt_header {
+ u16 version;
+ u16 len;
+ u16 checksum;
+ u16 reserved;
+};
+
+struct qlcnic_flt_entry {
+ u8 region;
+ u8 reserved0;
+ u8 attrib;
+ u8 reserved1;
+ u32 size;
+ u32 start_addr;
+ u32 end_add;
+};
+
/* Magic number to let user know flash is programmed */
#define QLCNIC_BDINFO_MAGIC 0x12345678
diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c
index 1e7af70..4c14510 100644
--- a/drivers/net/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/qlcnic/qlcnic_ethtool.c
@@ -672,7 +672,7 @@ qlcnic_diag_test(struct net_device *dev, struct ethtool_test *eth_test,
if (data[1])
eth_test->flags |= ETH_TEST_FL_FAILED;
- if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+ if (eth_test->flags & ETH_TEST_FL_OFFLINE) {
data[2] = qlcnic_irq_test(dev);
if (data[2])
eth_test->flags |= ETH_TEST_FL_FAILED;
diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c
index 9b9c7c3..a7f1d5b 100644
--- a/drivers/net/qlcnic/qlcnic_init.c
+++ b/drivers/net/qlcnic/qlcnic_init.c
@@ -627,12 +627,73 @@ qlcnic_setup_idc_param(struct qlcnic_adapter *adapter) {
return 0;
}
+static int qlcnic_get_flt_entry(struct qlcnic_adapter *adapter, u8 region,
+ struct qlcnic_flt_entry *region_entry)
+{
+ struct qlcnic_flt_header flt_hdr;
+ struct qlcnic_flt_entry *flt_entry;
+ int i = 0, ret;
+ u32 entry_size;
+
+ memset(region_entry, 0, sizeof(struct qlcnic_flt_entry));
+ ret = qlcnic_rom_fast_read_words(adapter, QLCNIC_FLT_LOCATION,
+ (u8 *)&flt_hdr,
+ sizeof(struct qlcnic_flt_header));
+ if (ret) {
+ dev_warn(&adapter->pdev->dev,
+ "error reading flash layout header\n");
+ return -EIO;
+ }
+
+ entry_size = flt_hdr.len - sizeof(struct qlcnic_flt_header);
+ flt_entry = (struct qlcnic_flt_entry *)vzalloc(entry_size);
+ if (flt_entry == NULL) {
+ dev_warn(&adapter->pdev->dev, "error allocating memory\n");
+ return -EIO;
+ }
+
+ ret = qlcnic_rom_fast_read_words(adapter, QLCNIC_FLT_LOCATION +
+ sizeof(struct qlcnic_flt_header),
+ (u8 *)flt_entry, entry_size);
+ if (ret) {
+ dev_warn(&adapter->pdev->dev,
+ "error reading flash layout entries\n");
+ goto err_out;
+ }
+
+ while (i < (entry_size/sizeof(struct qlcnic_flt_entry))) {
+ if (flt_entry[i].region == region)
+ break;
+ i++;
+ }
+ if (i >= (entry_size/sizeof(struct qlcnic_flt_entry))) {
+ dev_warn(&adapter->pdev->dev,
+ "region=%x not found in %d regions\n", region, i);
+ ret = -EIO;
+ goto err_out;
+ }
+ memcpy(region_entry, &flt_entry[i], sizeof(struct qlcnic_flt_entry));
+
+err_out:
+ vfree(flt_entry);
+ return ret;
+}
+
int
qlcnic_check_flash_fw_ver(struct qlcnic_adapter *adapter)
{
+ struct qlcnic_flt_entry fw_entry;
u32 ver = -1, min_ver;
+ int ret;
- qlcnic_rom_fast_read(adapter, QLCNIC_FW_VERSION_OFFSET, (int *)&ver);
+ ret = qlcnic_get_flt_entry(adapter, QLCNIC_FW_IMAGE_REGION, &fw_entry);
+ if (!ret)
+ /* 0-4:-signature, 4-8:-fw version */
+ qlcnic_rom_fast_read(adapter, fw_entry.start_addr + 4,
+ (int *)&ver);
+ else
+ qlcnic_rom_fast_read(adapter, QLCNIC_FW_VERSION_OFFSET,
+ (int *)&ver);
ver = QLCNIC_DECODE_VERSION(ver);
min_ver = QLCNIC_MIN_FW_VERSION;
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 11e3a46..37c04b4 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -31,15 +31,15 @@ static const char qlcnic_driver_string[] = "QLogic 1/10 GbE "
static struct workqueue_struct *qlcnic_wq;
static int qlcnic_mac_learn;
-module_param(qlcnic_mac_learn, int, 0644);
+module_param(qlcnic_mac_learn, int, 0444);
MODULE_PARM_DESC(qlcnic_mac_learn, "Mac Filter (0=disabled, 1=enabled)");
static int use_msi = 1;
-module_param(use_msi, int, 0644);
+module_param(use_msi, int, 0444);
MODULE_PARM_DESC(use_msi, "MSI interrupt (0=disabled, 1=enabled");
static int use_msi_x = 1;
-module_param(use_msi_x, int, 0644);
+module_param(use_msi_x, int, 0444);
MODULE_PARM_DESC(use_msi_x, "MSI-X interrupt (0=disabled, 1=enabled");
static int auto_fw_reset = AUTO_FW_RESET_ENABLED;
@@ -47,11 +47,11 @@ module_param(auto_fw_reset, int, 0644);
MODULE_PARM_DESC(auto_fw_reset, "Auto firmware reset (0=disabled, 1=enabled");
static int load_fw_file;
-module_param(load_fw_file, int, 0644);
+module_param(load_fw_file, int, 0444);
MODULE_PARM_DESC(load_fw_file, "Load firmware from (0=flash, 1=file");
static int qlcnic_config_npars;
-module_param(qlcnic_config_npars, int, 0644);
+module_param(qlcnic_config_npars, int, 0444);
MODULE_PARM_DESC(qlcnic_config_npars, "Configure NPARs (0=disabled, 1=enabled");
static int __devinit qlcnic_probe(struct pci_dev *pdev,
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 27a7c20..bb8645a 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -1632,36 +1632,134 @@ rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
{
__le32 *phytable = (__le32 *)fw->data;
struct net_device *dev = tp->dev;
- size_t i;
+ size_t index, fw_size = fw->size / sizeof(*phytable);
+ u32 predata, count;
if (fw->size % sizeof(*phytable)) {
netif_err(tp, probe, dev, "odd sized firmware %zd\n", fw->size);
return;
}
- for (i = 0; i < fw->size / sizeof(*phytable); i++) {
- u32 action = le32_to_cpu(phytable[i]);
+ for (index = 0; index < fw_size; index++) {
+ u32 action = le32_to_cpu(phytable[index]);
+ u32 regno = (action & 0x0fff0000) >> 16;
- if (!action)
+ switch(action & 0xf0000000) {
+ case PHY_READ:
+ case PHY_DATA_OR:
+ case PHY_DATA_AND:
+ case PHY_READ_EFUSE:
+ case PHY_CLEAR_READCOUNT:
+ case PHY_WRITE:
+ case PHY_WRITE_PREVIOUS:
+ case PHY_DELAY_MS:
+ break;
+
+ case PHY_BJMPN:
+ if (regno > index) {
+ netif_err(tp, probe, tp->dev,
+ "Out of range of firmware\n");
+ return;
+ }
+ break;
+ case PHY_READCOUNT_EQ_SKIP:
+ if (index + 2 >= fw_size) {
+ netif_err(tp, probe, tp->dev,
+ "Out of range of firmware\n");
+ return;
+ }
+ break;
+ case PHY_COMP_EQ_SKIPN:
+ case PHY_COMP_NEQ_SKIPN:
+ case PHY_SKIPN:
+ if (index + 1 + regno >= fw_size) {
+ netif_err(tp, probe, tp->dev,
+ "Out of range of firmware\n");
+ return;
+ }
break;
- if ((action & 0xf0000000) != PHY_WRITE) {
- netif_err(tp, probe, dev,
- "unknown action 0x%08x\n", action);
+ case PHY_READ_MAC_BYTE:
+ case PHY_WRITE_MAC_BYTE:
+ case PHY_WRITE_ERI_WORD:
+ default:
+ netif_err(tp, probe, tp->dev,
+ "Invalid action 0x%08x\n", action);
return;
}
}
- while (i-- != 0) {
- u32 action = le32_to_cpu(*phytable);
+ predata = 0;
+ count = 0;
+
+ for (index = 0; index < fw_size; ) {
+ u32 action = le32_to_cpu(phytable[index]);
u32 data = action & 0x0000ffff;
- u32 reg = (action & 0x0fff0000) >> 16;
+ u32 regno = (action & 0x0fff0000) >> 16;
+
+ if (!action)
+ break;
switch(action & 0xf0000000) {
+ case PHY_READ:
+ predata = rtl_readphy(tp, regno);
+ count++;
+ index++;
+ break;
+ case PHY_DATA_OR:
+ predata |= data;
+ index++;
+ break;
+ case PHY_DATA_AND:
+ predata &= data;
+ index++;
+ break;
+ case PHY_BJMPN:
+ index -= regno;
+ break;
+ case PHY_READ_EFUSE:
+ predata = rtl8168d_efuse_read(tp->mmio_addr, regno);
+ index++;
+ break;
+ case PHY_CLEAR_READCOUNT:
+ count = 0;
+ index++;
+ break;
case PHY_WRITE:
- rtl_writephy(tp, reg, data);
- phytable++;
+ rtl_writephy(tp, regno, data);
+ index++;
+ break;
+ case PHY_READCOUNT_EQ_SKIP:
+ if (count == data)
+ index += 2;
+ else
+ index += 1;
+ break;
+ case PHY_COMP_EQ_SKIPN:
+ if (predata == data)
+ index += regno;
+ index++;
break;
+ case PHY_COMP_NEQ_SKIPN:
+ if (predata != data)
+ index += regno;
+ index++;
+ break;
+ case PHY_WRITE_PREVIOUS:
+ rtl_writephy(tp, regno, predata);
+ index++;
+ break;
+ case PHY_SKIPN:
+ index += regno + 1;
+ break;
+ case PHY_DELAY_MS:
+ mdelay(data);
+ index++;
+ break;
+
+ case PHY_READ_MAC_BYTE:
+ case PHY_WRITE_MAC_BYTE:
+ case PHY_WRITE_ERI_WORD:
default:
BUG();
}
@@ -3069,15 +3167,6 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rtl8168_driver_start(tp);
}
- rtl8169_init_phy(dev, tp);
-
- /*
- * Pretend we are using VLANs; This bypasses a nasty bug where
- * Interrupts stop flowing on high load on 8110SCd controllers.
- */
- if (tp->mac_version == RTL_GIGA_MAC_VER_05)
- RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) | RxVlan);
-
device_set_wakeup_enable(&pdev->dev, tp->features & RTL_FEATURE_WOL);
if (pci_dev_run_wake(pdev))
@@ -3127,6 +3216,7 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
static int rtl8169_open(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
+ void __iomem *ioaddr = tp->mmio_addr;
struct pci_dev *pdev = tp->pci_dev;
int retval = -ENOMEM;
@@ -3162,6 +3252,15 @@ static int rtl8169_open(struct net_device *dev)
napi_enable(&tp->napi);
+ rtl8169_init_phy(dev, tp);
+
+ /*
+ * Pretend we are using VLANs; This bypasses a nasty bug where
+ * Interrupts stop flowing on high load on 8110SCd controllers.
+ */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_05)
+ RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) | RxVlan);
+
rtl_pll_power_up(tp);
rtl_hw_start(dev);
@@ -3171,7 +3270,7 @@ static int rtl8169_open(struct net_device *dev)
tp->saved_wolopts = 0;
pm_runtime_put_noidle(&pdev->dev);
- rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+ rtl8169_check_link_status(dev, tp, ioaddr);
out:
return retval;
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 39996bf..7d85a38 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -46,10 +46,6 @@
#include <asm/irq.h>
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
-#define SKY2_VLAN_TAG_USED 1
-#endif
-
#include "sky2.h"
#define DRV_NAME "sky2"
@@ -1326,40 +1322,35 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return err;
}
-#ifdef SKY2_VLAN_TAG_USED
-static void sky2_set_vlan_mode(struct sky2_hw *hw, u16 port, bool onoff)
+#define NETIF_F_ALL_VLAN (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX)
+
+static void sky2_vlan_mode(struct net_device *dev)
{
- if (onoff) {
+ struct sky2_port *sky2 = netdev_priv(dev);
+ struct sky2_hw *hw = sky2->hw;
+ u16 port = sky2->port;
+
+ if (dev->features & NETIF_F_HW_VLAN_RX)
sky2_write32(hw, SK_REG(port, RX_GMF_CTRL_T),
RX_VLAN_STRIP_ON);
- sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T),
- TX_VLAN_TAG_ON);
- } else {
+ else
sky2_write32(hw, SK_REG(port, RX_GMF_CTRL_T),
RX_VLAN_STRIP_OFF);
+
+ dev->vlan_features = dev->features &~ NETIF_F_ALL_VLAN;
+ if (dev->features & NETIF_F_HW_VLAN_TX)
+ sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T),
+ TX_VLAN_TAG_ON);
+ else {
sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T),
TX_VLAN_TAG_OFF);
+
+ /* Can't do transmit offload of vlan without hw vlan */
+ dev->vlan_features &= ~(NETIF_F_TSO | NETIF_F_SG
+ | NETIF_F_ALL_CSUM);
}
}
-static void sky2_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
-{
- struct sky2_port *sky2 = netdev_priv(dev);
- struct sky2_hw *hw = sky2->hw;
- u16 port = sky2->port;
-
- netif_tx_lock_bh(dev);
- napi_disable(&hw->napi);
-
- sky2->vlgrp = grp;
- sky2_set_vlan_mode(hw, port, grp != NULL);
-
- sky2_read32(hw, B0_Y2_SP_LISR);
- napi_enable(&hw->napi);
- netif_tx_unlock_bh(dev);
-}
-#endif
-
/* Amount of required worst case padding in rx buffer */
static inline unsigned sky2_rx_pad(const struct sky2_hw *hw)
{
@@ -1635,9 +1626,7 @@ static void sky2_hw_up(struct sky2_port *sky2)
sky2_prefetch_init(hw, txqaddr[port], sky2->tx_le_map,
sky2->tx_ring_size - 1);
-#ifdef SKY2_VLAN_TAG_USED
- sky2_set_vlan_mode(hw, port, sky2->vlgrp != NULL);
-#endif
+ sky2_vlan_mode(sky2->netdev);
sky2_rx_start(sky2);
}
@@ -1780,7 +1769,7 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb,
}
ctrl = 0;
-#ifdef SKY2_VLAN_TAG_USED
+
/* Add VLAN tag, can piggyback on LRGLEN or ADDR64 */
if (vlan_tx_tag_present(skb)) {
if (!le) {
@@ -1792,7 +1781,6 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb,
le->length = cpu_to_be16(vlan_tx_tag_get(skb));
ctrl |= INS_VLAN;
}
-#endif
/* Handle TCP checksum offload */
if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -2432,11 +2420,8 @@ static struct sk_buff *sky2_receive(struct net_device *dev,
struct sk_buff *skb = NULL;
u16 count = (status & GMR_FS_LEN) >> 16;
-#ifdef SKY2_VLAN_TAG_USED
- /* Account for vlan tag */
- if (sky2->vlgrp && (status & GMR_FS_VLAN))
- count -= VLAN_HLEN;
-#endif
+ if (status & GMR_FS_VLAN)
+ count -= VLAN_HLEN; /* Account for vlan tag */
netif_printk(sky2, rx_status, KERN_DEBUG, dev,
"rx slot %u status 0x%x len %d\n",
@@ -2504,17 +2489,9 @@ static inline void sky2_tx_done(struct net_device *dev, u16 last)
static inline void sky2_skb_rx(const struct sky2_port *sky2,
u32 status, struct sk_buff *skb)
{
-#ifdef SKY2_VLAN_TAG_USED
- u16 vlan_tag = be16_to_cpu(sky2->rx_tag);
- if (sky2->vlgrp && (status & GMR_FS_VLAN)) {
- if (skb->ip_summed == CHECKSUM_NONE)
- vlan_hwaccel_receive_skb(skb, sky2->vlgrp, vlan_tag);
- else
- vlan_gro_receive(&sky2->hw->napi, sky2->vlgrp,
- vlan_tag, skb);
- return;
- }
-#endif
+ if (status & GMR_FS_VLAN)
+ __vlan_hwaccel_put_tag(skb, be16_to_cpu(sky2->rx_tag));
+
if (skb->ip_summed == CHECKSUM_NONE)
netif_receive_skb(skb);
else
@@ -2631,7 +2608,6 @@ static int sky2_status_intr(struct sky2_hw *hw, int to_do, u16 idx)
goto exit_loop;
break;
-#ifdef SKY2_VLAN_TAG_USED
case OP_RXVLAN:
sky2->rx_tag = length;
break;
@@ -2639,7 +2615,6 @@ static int sky2_status_intr(struct sky2_hw *hw, int to_do, u16 idx)
case OP_RXCHKSVLAN:
sky2->rx_tag = length;
/* fall through */
-#endif
case OP_RXCHKS:
if (likely(sky2->flags & SKY2_FLAG_RX_CHECKSUM))
sky2_rx_checksum(sky2, status);
@@ -3042,6 +3017,10 @@ static int __devinit sky2_init(struct sky2_hw *hw)
| SKY2_HW_NEW_LE
| SKY2_HW_AUTO_TX_SUM
| SKY2_HW_ADV_POWER_CTL;
+
+ /* The workaround for status conflicts VLAN tag detection. */
+ if (hw->chip_rev == CHIP_REV_YU_FE2_A0)
+ hw->flags |= SKY2_HW_VLAN_BROKEN;
break;
case CHIP_ID_YUKON_SUPR:
@@ -3411,18 +3390,15 @@ static u32 sky2_supported_modes(const struct sky2_hw *hw)
u32 modes = SUPPORTED_10baseT_Half
| SUPPORTED_10baseT_Full
| SUPPORTED_100baseT_Half
- | SUPPORTED_100baseT_Full
- | SUPPORTED_Autoneg | SUPPORTED_TP;
+ | SUPPORTED_100baseT_Full;
if (hw->flags & SKY2_HW_GIGABIT)
modes |= SUPPORTED_1000baseT_Half
| SUPPORTED_1000baseT_Full;
return modes;
} else
- return SUPPORTED_1000baseT_Half
- | SUPPORTED_1000baseT_Full
- | SUPPORTED_Autoneg
- | SUPPORTED_FIBRE;
+ return SUPPORTED_1000baseT_Half
+ | SUPPORTED_1000baseT_Full;
}
static int sky2_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
@@ -3436,9 +3412,11 @@ static int sky2_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
if (sky2_is_copper(hw)) {
ecmd->port = PORT_TP;
ecmd->speed = sky2->speed;
+ ecmd->supported |= SUPPORTED_Autoneg | SUPPORTED_TP;
} else {
ecmd->speed = SPEED_1000;
ecmd->port = PORT_FIBRE;
+ ecmd->supported |= SUPPORTED_Autoneg | SUPPORTED_FIBRE;
}
ecmd->advertising = sky2->advertising;
@@ -3455,8 +3433,19 @@ static int sky2_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
u32 supported = sky2_supported_modes(hw);
if (ecmd->autoneg == AUTONEG_ENABLE) {
+ if (ecmd->advertising & ~supported)
+ return -EINVAL;
+
+ if (sky2_is_copper(hw))
+ sky2->advertising = ecmd->advertising |
+ ADVERTISED_TP |
+ ADVERTISED_Autoneg;
+ else
+ sky2->advertising = ecmd->advertising |
+ ADVERTISED_FIBRE |
+ ADVERTISED_Autoneg;
+
sky2->flags |= SKY2_FLAG_AUTO_SPEED;
- ecmd->advertising = supported;
sky2->duplex = -1;
sky2->speed = -1;
} else {
@@ -3500,8 +3489,6 @@ static int sky2_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
sky2->flags &= ~SKY2_FLAG_AUTO_SPEED;
}
- sky2->advertising = ecmd->advertising;
-
if (netif_running(dev)) {
sky2_phy_reinit(sky2);
sky2_set_multicast(dev);
@@ -4229,15 +4216,28 @@ static int sky2_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom
static int sky2_set_flags(struct net_device *dev, u32 data)
{
struct sky2_port *sky2 = netdev_priv(dev);
- u32 supported =
- (sky2->hw->flags & SKY2_HW_RSS_BROKEN) ? 0 : ETH_FLAG_RXHASH;
+ unsigned long old_feat = dev->features;
+ u32 supported = 0;
int rc;
+ if (!(sky2->hw->flags & SKY2_HW_RSS_BROKEN))
+ supported |= ETH_FLAG_RXHASH;
+
+ if (!(sky2->hw->flags & SKY2_HW_VLAN_BROKEN))
+ supported |= ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN;
+
+ printk(KERN_DEBUG "sky2 set_flags: supported %x data %x\n",
+ supported, data);
+
rc = ethtool_op_set_flags(dev, data, supported);
if (rc)
return rc;
- rx_set_rss(dev);
+ if ((old_feat ^ dev->features) & NETIF_F_RXHASH)
+ rx_set_rss(dev);
+
+ if ((old_feat ^ dev->features) & NETIF_F_ALL_VLAN)
+ sky2_vlan_mode(dev);
return 0;
}
@@ -4273,6 +4273,7 @@ static const struct ethtool_ops sky2_ethtool_ops = {
.get_sset_count = sky2_get_sset_count,
.get_ethtool_stats = sky2_get_ethtool_stats,
.set_flags = sky2_set_flags,
+ .get_flags = ethtool_op_get_flags,
};
#ifdef CONFIG_SKY2_DEBUG
@@ -4554,9 +4555,6 @@ static const struct net_device_ops sky2_netdev_ops[2] = {
.ndo_change_mtu = sky2_change_mtu,
.ndo_tx_timeout = sky2_tx_timeout,
.ndo_get_stats64 = sky2_get_stats,
-#ifdef SKY2_VLAN_TAG_USED
- .ndo_vlan_rx_register = sky2_vlan_rx_register,
-#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = sky2_netpoll,
#endif
@@ -4572,9 +4570,6 @@ static const struct net_device_ops sky2_netdev_ops[2] = {
.ndo_change_mtu = sky2_change_mtu,
.ndo_tx_timeout = sky2_tx_timeout,
.ndo_get_stats64 = sky2_get_stats,
-#ifdef SKY2_VLAN_TAG_USED
- .ndo_vlan_rx_register = sky2_vlan_rx_register,
-#endif
},
};
@@ -4625,7 +4620,8 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw,
sky2->port = port;
dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG
- | NETIF_F_TSO | NETIF_F_GRO;
+ | NETIF_F_TSO | NETIF_F_GRO;
+
if (highmem)
dev->features |= NETIF_F_HIGHDMA;
@@ -4633,13 +4629,8 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw,
if (!(hw->flags & SKY2_HW_RSS_BROKEN))
dev->features |= NETIF_F_RXHASH;
-#ifdef SKY2_VLAN_TAG_USED
- /* The workaround for FE+ status conflicts with VLAN tag detection. */
- if (!(sky2->hw->chip_id == CHIP_ID_YUKON_FE_P &&
- sky2->hw->chip_rev == CHIP_REV_YU_FE2_A0)) {
+ if (!(hw->flags & SKY2_HW_VLAN_BROKEN))
dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
- }
-#endif
/* read the mac address */
memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8, ETH_ALEN);
diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h
index 80bdc40..6861b0e 100644
--- a/drivers/net/sky2.h
+++ b/drivers/net/sky2.h
@@ -2236,11 +2236,8 @@ struct sky2_port {
u16 rx_pending;
u16 rx_data_size;
u16 rx_nfrags;
-
-#ifdef SKY2_VLAN_TAG_USED
u16 rx_tag;
- struct vlan_group *vlgrp;
-#endif
+
struct {
unsigned long last;
u32 mac_rp;
@@ -2284,6 +2281,7 @@ struct sky2_hw {
#define SKY2_HW_AUTO_TX_SUM 0x00000040 /* new IP decode for Tx */
#define SKY2_HW_ADV_POWER_CTL 0x00000080 /* additional PHY power regs */
#define SKY2_HW_RSS_BROKEN 0x00000100
+#define SKY2_HW_VLAN_BROKEN 0x00000200
u8 chip_id;
u8 chip_rev;
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index cdbeec9..546de57 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -488,7 +488,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(!netif_carrier_ok(dev) ||
(frags > 1 && !xennet_can_sg(dev)) ||
- netif_needs_gso(dev, skb))) {
+ netif_needs_gso(skb, netif_skb_features(skb)))) {
spin_unlock_irq(&np->tx_lock);
goto drop;
}
diff --git a/drivers/ps3/Makefile b/drivers/ps3/Makefile
index ccea15c..50cb1e1 100644
--- a/drivers/ps3/Makefile
+++ b/drivers/ps3/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_PS3_VUART) += ps3-vuart.o
obj-$(CONFIG_PS3_PS3AV) += ps3av_mod.o
-ps3av_mod-objs += ps3av.o ps3av_cmd.o
+ps3av_mod-y := ps3av.o ps3av_cmd.o
obj-$(CONFIG_PPC_PS3) += sys-manager-core.o
obj-$(CONFIG_PS3_SYS_MANAGER) += ps3-sys-manager.o
obj-$(CONFIG_PS3_STORAGE) += ps3stor_lib.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index e6539cb..9583cbc 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -16,6 +16,7 @@
#include <linux/kdev_t.h>
#include <linux/idr.h>
#include <linux/slab.h>
+#include <linux/workqueue.h>
#include "rtc-core.h"
@@ -152,6 +153,18 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
spin_lock_init(&rtc->irq_task_lock);
init_waitqueue_head(&rtc->irq_queue);
+ /* Init timerqueue */
+ timerqueue_init_head(&rtc->timerqueue);
+ INIT_WORK(&rtc->irqwork, rtc_timer_do_work);
+ /* Init aie timer */
+ rtc_timer_init(&rtc->aie_timer, rtc_aie_update_irq, (void *)rtc);
+ /* Init uie timer */
+ rtc_timer_init(&rtc->uie_rtctimer, rtc_uie_update_irq, (void *)rtc);
+ /* Init pie timer */
+ hrtimer_init(&rtc->pie_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ rtc->pie_timer.function = rtc_pie_update_irq;
+ rtc->pie_enabled = 0;
+
strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
dev_set_name(&rtc->dev, "rtc%d", id);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index a0c8162..90384b9 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -14,15 +14,11 @@
#include <linux/rtc.h>
#include <linux/sched.h>
#include <linux/log2.h>
+#include <linux/workqueue.h>
-int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
+static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
{
int err;
-
- err = mutex_lock_interruptible(&rtc->ops_lock);
- if (err)
- return err;
-
if (!rtc->ops)
err = -ENODEV;
else if (!rtc->ops->read_time)
@@ -31,7 +27,18 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
memset(tm, 0, sizeof(struct rtc_time));
err = rtc->ops->read_time(rtc->dev.parent, tm);
}
+ return err;
+}
+
+int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
+{
+ int err;
+ err = mutex_lock_interruptible(&rtc->ops_lock);
+ if (err)
+ return err;
+
+ err = __rtc_read_time(rtc, tm);
mutex_unlock(&rtc->ops_lock);
return err;
}
@@ -106,188 +113,54 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
}
EXPORT_SYMBOL_GPL(rtc_set_mmss);
-static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
{
int err;
err = mutex_lock_interruptible(&rtc->ops_lock);
if (err)
return err;
-
- if (rtc->ops == NULL)
- err = -ENODEV;
- else if (!rtc->ops->read_alarm)
- err = -EINVAL;
- else {
- memset(alarm, 0, sizeof(struct rtc_wkalrm));
- err = rtc->ops->read_alarm(rtc->dev.parent, alarm);
- }
-
+ alarm->enabled = rtc->aie_timer.enabled;
+ if (alarm->enabled)
+ alarm->time = rtc_ktime_to_tm(rtc->aie_timer.node.expires);
mutex_unlock(&rtc->ops_lock);
- return err;
+
+ return 0;
}
+EXPORT_SYMBOL_GPL(rtc_read_alarm);
-int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
{
+ struct rtc_time tm;
+ long now, scheduled;
int err;
- struct rtc_time before, now;
- int first_time = 1;
- unsigned long t_now, t_alm;
- enum { none, day, month, year } missing = none;
- unsigned days;
-
- /* The lower level RTC driver may return -1 in some fields,
- * creating invalid alarm->time values, for reasons like:
- *
- * - The hardware may not be capable of filling them in;
- * many alarms match only on time-of-day fields, not
- * day/month/year calendar data.
- *
- * - Some hardware uses illegal values as "wildcard" match
- * values, which non-Linux firmware (like a BIOS) may try
- * to set up as e.g. "alarm 15 minutes after each hour".
- * Linux uses only oneshot alarms.
- *
- * When we see that here, we deal with it by using values from
- * a current RTC timestamp for any missing (-1) values. The
- * RTC driver prevents "periodic alarm" modes.
- *
- * But this can be racey, because some fields of the RTC timestamp
- * may have wrapped in the interval since we read the RTC alarm,
- * which would lead to us inserting inconsistent values in place
- * of the -1 fields.
- *
- * Reading the alarm and timestamp in the reverse sequence
- * would have the same race condition, and not solve the issue.
- *
- * So, we must first read the RTC timestamp,
- * then read the RTC alarm value,
- * and then read a second RTC timestamp.
- *
- * If any fields of the second timestamp have changed
- * when compared with the first timestamp, then we know
- * our timestamp may be inconsistent with that used by
- * the low-level rtc_read_alarm_internal() function.
- *
- * So, when the two timestamps disagree, we just loop and do
- * the process again to get a fully consistent set of values.
- *
- * This could all instead be done in the lower level driver,
- * but since more than one lower level RTC implementation needs it,
- * then it's probably best best to do it here instead of there..
- */
- /* Get the "before" timestamp */
- err = rtc_read_time(rtc, &before);
- if (err < 0)
+ err = rtc_valid_tm(&alarm->time);
+ if (err)
return err;
- do {
- if (!first_time)
- memcpy(&before, &now, sizeof(struct rtc_time));
- first_time = 0;
-
- /* get the RTC alarm values, which may be incomplete */
- err = rtc_read_alarm_internal(rtc, alarm);
- if (err)
- return err;
- if (!alarm->enabled)
- return 0;
-
- /* full-function RTCs won't have such missing fields */
- if (rtc_valid_tm(&alarm->time) == 0)
- return 0;
-
- /* get the "after" timestamp, to detect wrapped fields */
- err = rtc_read_time(rtc, &now);
- if (err < 0)
- return err;
-
- /* note that tm_sec is a "don't care" value here: */
- } while ( before.tm_min != now.tm_min
- || before.tm_hour != now.tm_hour
- || before.tm_mon != now.tm_mon
- || before.tm_year != now.tm_year);
-
- /* Fill in the missing alarm fields using the timestamp; we
- * know there's at least one since alarm->time is invalid.
- */
- if (alarm->time.tm_sec == -1)
- alarm->time.tm_sec = now.tm_sec;
- if (alarm->time.tm_min == -1)
- alarm->time.tm_min = now.tm_min;
- if (alarm->time.tm_hour == -1)
- alarm->time.tm_hour = now.tm_hour;
-
- /* For simplicity, only support date rollover for now */
- if (alarm->time.tm_mday == -1) {
- alarm->time.tm_mday = now.tm_mday;
- missing = day;
- }
- if (alarm->time.tm_mon == -1) {
- alarm->time.tm_mon = now.tm_mon;
- if (missing == none)
- missing = month;
- }
- if (alarm->time.tm_year == -1) {
- alarm->time.tm_year = now.tm_year;
- if (missing == none)
- missing = year;
- }
-
- /* with luck, no rollover is needed */
- rtc_tm_to_time(&now, &t_now);
- rtc_tm_to_time(&alarm->time, &t_alm);
- if (t_now < t_alm)
- goto done;
-
- switch (missing) {
+ rtc_tm_to_time(&alarm->time, &scheduled);
- /* 24 hour rollover ... if it's now 10am Monday, an alarm that
- * that will trigger at 5am will do so at 5am Tuesday, which
- * could also be in the next month or year. This is a common
- * case, especially for PCs.
- */
- case day:
- dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day");
- t_alm += 24 * 60 * 60;
- rtc_time_to_tm(t_alm, &alarm->time);
- break;
-
- /* Month rollover ... if it's the 31th, an alarm on the 3rd will
- * be next month. An alarm matching on the 30th, 29th, or 28th
- * may end up in the month after that! Many newer PCs support
- * this type of alarm.
+ /* Make sure we're not setting alarms in the past */
+ err = __rtc_read_time(rtc, &tm);
+ rtc_tm_to_time(&tm, &now);
+ if (scheduled <= now)
+ return -ETIME;
+ /*
+ * XXX - We just checked to make sure the alarm time is not
+ * in the past, but there is still a race window where if
+ * the is alarm set for the next second and the second ticks
+ * over right here, before we set the alarm.
*/
- case month:
- dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month");
- do {
- if (alarm->time.tm_mon < 11)
- alarm->time.tm_mon++;
- else {
- alarm->time.tm_mon = 0;
- alarm->time.tm_year++;
- }
- days = rtc_month_days(alarm->time.tm_mon,
- alarm->time.tm_year);
- } while (days < alarm->time.tm_mday);
- break;
-
- /* Year rollover ... easy except for leap years! */
- case year:
- dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year");
- do {
- alarm->time.tm_year++;
- } while (rtc_valid_tm(&alarm->time) != 0);
- break;
-
- default:
- dev_warn(&rtc->dev, "alarm rollover not handled\n");
- }
-done:
- return 0;
+ if (!rtc->ops)
+ err = -ENODEV;
+ else if (!rtc->ops->set_alarm)
+ err = -EINVAL;
+ else
+ err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
+
+ return err;
}
-EXPORT_SYMBOL_GPL(rtc_read_alarm);
int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
{
@@ -300,16 +173,18 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
err = mutex_lock_interruptible(&rtc->ops_lock);
if (err)
return err;
-
- if (!rtc->ops)
- err = -ENODEV;
- else if (!rtc->ops->set_alarm)
- err = -EINVAL;
- else
- err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
-
+ if (rtc->aie_timer.enabled) {
+ rtc_timer_remove(rtc, &rtc->aie_timer);
+ rtc->aie_timer.enabled = 0;
+ }
+ rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
+ rtc->aie_timer.period = ktime_set(0, 0);
+ if (alarm->enabled) {
+ rtc->aie_timer.enabled = 1;
+ rtc_timer_enqueue(rtc, &rtc->aie_timer);
+ }
mutex_unlock(&rtc->ops_lock);
- return err;
+ return 0;
}
EXPORT_SYMBOL_GPL(rtc_set_alarm);
@@ -319,6 +194,16 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
if (err)
return err;
+ if (rtc->aie_timer.enabled != enabled) {
+ if (enabled) {
+ rtc->aie_timer.enabled = 1;
+ rtc_timer_enqueue(rtc, &rtc->aie_timer);
+ } else {
+ rtc_timer_remove(rtc, &rtc->aie_timer);
+ rtc->aie_timer.enabled = 0;
+ }
+ }
+
if (!rtc->ops)
err = -ENODEV;
else if (!rtc->ops->alarm_irq_enable)
@@ -337,52 +222,53 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
if (err)
return err;
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
- if (enabled == 0 && rtc->uie_irq_active) {
- mutex_unlock(&rtc->ops_lock);
- return rtc_dev_update_irq_enable_emul(rtc, enabled);
+ /* make sure we're changing state */
+ if (rtc->uie_rtctimer.enabled == enabled)
+ goto out;
+
+ if (enabled) {
+ struct rtc_time tm;
+ ktime_t now, onesec;
+
+ __rtc_read_time(rtc, &tm);
+ onesec = ktime_set(1, 0);
+ now = rtc_tm_to_ktime(tm);
+ rtc->uie_rtctimer.node.expires = ktime_add(now, onesec);
+ rtc->uie_rtctimer.period = ktime_set(1, 0);
+ rtc->uie_rtctimer.enabled = 1;
+ rtc_timer_enqueue(rtc, &rtc->uie_rtctimer);
+ } else {
+ rtc_timer_remove(rtc, &rtc->uie_rtctimer);
+ rtc->uie_rtctimer.enabled = 0;
}
-#endif
-
- if (!rtc->ops)
- err = -ENODEV;
- else if (!rtc->ops->update_irq_enable)
- err = -EINVAL;
- else
- err = rtc->ops->update_irq_enable(rtc->dev.parent, enabled);
+out:
mutex_unlock(&rtc->ops_lock);
-
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
- /*
- * Enable emulation if the driver did not provide
- * the update_irq_enable function pointer or if returned
- * -EINVAL to signal that it has been configured without
- * interrupts or that are not available at the moment.
- */
- if (err == -EINVAL)
- err = rtc_dev_update_irq_enable_emul(rtc, enabled);
-#endif
return err;
+
}
EXPORT_SYMBOL_GPL(rtc_update_irq_enable);
+
/**
- * rtc_update_irq - report RTC periodic, alarm, and/or update irqs
- * @rtc: the rtc device
- * @num: how many irqs are being reported (usually one)
- * @events: mask of RTC_IRQF with one or more of RTC_PF, RTC_AF, RTC_UF
- * Context: any
+ * rtc_handle_legacy_irq - AIE, UIE and PIE event hook
+ * @rtc: pointer to the rtc device
+ *
+ * This function is called when an AIE, UIE or PIE mode interrupt
+ * has occured (or been emulated).
+ *
+ * Triggers the registered irq_task function callback.
*/
-void rtc_update_irq(struct rtc_device *rtc,
- unsigned long num, unsigned long events)
+static void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode)
{
unsigned long flags;
+ /* mark one irq of the appropriate mode */
spin_lock_irqsave(&rtc->irq_lock, flags);
- rtc->irq_data = (rtc->irq_data + (num << 8)) | events;
+ rtc->irq_data = (rtc->irq_data + (num << 8)) | (RTC_IRQF|mode);
spin_unlock_irqrestore(&rtc->irq_lock, flags);
+ /* call the task func */
spin_lock_irqsave(&rtc->irq_task_lock, flags);
if (rtc->irq_task)
rtc->irq_task->func(rtc->irq_task->private_data);
@@ -391,6 +277,69 @@ void rtc_update_irq(struct rtc_device *rtc,
wake_up_interruptible(&rtc->irq_queue);
kill_fasync(&rtc->async_queue, SIGIO, POLL_IN);
}
+
+
+/**
+ * rtc_aie_update_irq - AIE mode rtctimer hook
+ * @private: pointer to the rtc_device
+ *
+ * This functions is called when the aie_timer expires.
+ */
+void rtc_aie_update_irq(void *private)
+{
+ struct rtc_device *rtc = (struct rtc_device *)private;
+ rtc_handle_legacy_irq(rtc, 1, RTC_AF);
+}
+
+
+/**
+ * rtc_uie_update_irq - UIE mode rtctimer hook
+ * @private: pointer to the rtc_device
+ *
+ * This functions is called when the uie_timer expires.
+ */
+void rtc_uie_update_irq(void *private)
+{
+ struct rtc_device *rtc = (struct rtc_device *)private;
+ rtc_handle_legacy_irq(rtc, 1, RTC_UF);
+}
+
+
+/**
+ * rtc_pie_update_irq - PIE mode hrtimer hook
+ * @timer: pointer to the pie mode hrtimer
+ *
+ * This function is used to emulate PIE mode interrupts
+ * using an hrtimer. This function is called when the periodic
+ * hrtimer expires.
+ */
+enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer)
+{
+ struct rtc_device *rtc;
+ ktime_t period;
+ int count;
+ rtc = container_of(timer, struct rtc_device, pie_timer);
+
+ period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq);
+ count = hrtimer_forward_now(timer, period);
+
+ rtc_handle_legacy_irq(rtc, count, RTC_PF);
+
+ return HRTIMER_RESTART;
+}
+
+/**
+ * rtc_update_irq - Triggered when a RTC interrupt occurs.
+ * @rtc: the rtc device
+ * @num: how many irqs are being reported (usually one)
+ * @events: mask of RTC_IRQF with one or more of RTC_PF, RTC_AF, RTC_UF
+ * Context: any
+ */
+void rtc_update_irq(struct rtc_device *rtc,
+ unsigned long num, unsigned long events)
+{
+ schedule_work(&rtc->irqwork);
+}
EXPORT_SYMBOL_GPL(rtc_update_irq);
static int __rtc_match(struct device *dev, void *data)
@@ -477,18 +426,20 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled
int err = 0;
unsigned long flags;
- if (rtc->ops->irq_set_state == NULL)
- return -ENXIO;
-
spin_lock_irqsave(&rtc->irq_task_lock, flags);
if (rtc->irq_task != NULL && task == NULL)
err = -EBUSY;
if (rtc->irq_task != task)
err = -EACCES;
- spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
- if (err == 0)
- err = rtc->ops->irq_set_state(rtc->dev.parent, enabled);
+ if (enabled) {
+ ktime_t period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq);
+ hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL);
+ } else {
+ hrtimer_cancel(&rtc->pie_timer);
+ }
+ rtc->pie_enabled = enabled;
+ spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
return err;
}
@@ -509,21 +460,194 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
int err = 0;
unsigned long flags;
- if (rtc->ops->irq_set_freq == NULL)
- return -ENXIO;
-
spin_lock_irqsave(&rtc->irq_task_lock, flags);
if (rtc->irq_task != NULL && task == NULL)
err = -EBUSY;
if (rtc->irq_task != task)
err = -EACCES;
- spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
-
if (err == 0) {
- err = rtc->ops->irq_set_freq(rtc->dev.parent, freq);
- if (err == 0)
- rtc->irq_freq = freq;
+ rtc->irq_freq = freq;
+ if (rtc->pie_enabled) {
+ ktime_t period;
+ hrtimer_cancel(&rtc->pie_timer);
+ period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq);
+ hrtimer_start(&rtc->pie_timer, period,
+ HRTIMER_MODE_REL);
+ }
}
+ spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
return err;
}
EXPORT_SYMBOL_GPL(rtc_irq_set_freq);
+
+/**
+ * rtc_timer_enqueue - Adds a rtc_timer to the rtc_device timerqueue
+ * @rtc rtc device
+ * @timer timer being added.
+ *
+ * Enqueues a timer onto the rtc devices timerqueue and sets
+ * the next alarm event appropriately.
+ *
+ * Must hold ops_lock for proper serialization of timerqueue
+ */
+void rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
+{
+ timerqueue_add(&rtc->timerqueue, &timer->node);
+ if (&timer->node == timerqueue_getnext(&rtc->timerqueue)) {
+ struct rtc_wkalrm alarm;
+ int err;
+ alarm.time = rtc_ktime_to_tm(timer->node.expires);
+ alarm.enabled = 1;
+ err = __rtc_set_alarm(rtc, &alarm);
+ if (err == -ETIME)
+ schedule_work(&rtc->irqwork);
+ }
+}
+
+/**
+ * rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue
+ * @rtc rtc device
+ * @timer timer being removed.
+ *
+ * Removes a timer onto the rtc devices timerqueue and sets
+ * the next alarm event appropriately.
+ *
+ * Must hold ops_lock for proper serialization of timerqueue
+ */
+void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
+{
+ struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue);
+ timerqueue_del(&rtc->timerqueue, &timer->node);
+
+ if (next == &timer->node) {
+ struct rtc_wkalrm alarm;
+ int err;
+ next = timerqueue_getnext(&rtc->timerqueue);
+ if (!next)
+ return;
+ alarm.time = rtc_ktime_to_tm(next->expires);
+ alarm.enabled = 1;
+ err = __rtc_set_alarm(rtc, &alarm);
+ if (err == -ETIME)
+ schedule_work(&rtc->irqwork);
+ }
+}
+
+/**
+ * rtc_timer_do_work - Expires rtc timers
+ * @rtc rtc device
+ * @timer timer being removed.
+ *
+ * Expires rtc timers. Reprograms next alarm event if needed.
+ * Called via worktask.
+ *
+ * Serializes access to timerqueue via ops_lock mutex
+ */
+void rtc_timer_do_work(struct work_struct *work)
+{
+ struct rtc_timer *timer;
+ struct timerqueue_node *next;
+ ktime_t now;
+ struct rtc_time tm;
+
+ struct rtc_device *rtc =
+ container_of(work, struct rtc_device, irqwork);
+
+ mutex_lock(&rtc->ops_lock);
+again:
+ __rtc_read_time(rtc, &tm);
+ now = rtc_tm_to_ktime(tm);
+ while ((next = timerqueue_getnext(&rtc->timerqueue))) {
+ if (next->expires.tv64 > now.tv64)
+ break;
+
+ /* expire timer */
+ timer = container_of(next, struct rtc_timer, node);
+ timerqueue_del(&rtc->timerqueue, &timer->node);
+ timer->enabled = 0;
+ if (timer->task.func)
+ timer->task.func(timer->task.private_data);
+
+ /* Re-add/fwd periodic timers */
+ if (ktime_to_ns(timer->period)) {
+ timer->node.expires = ktime_add(timer->node.expires,
+ timer->period);
+ timer->enabled = 1;
+ timerqueue_add(&rtc->timerqueue, &timer->node);
+ }
+ }
+
+ /* Set next alarm */
+ if (next) {
+ struct rtc_wkalrm alarm;
+ int err;
+ alarm.time = rtc_ktime_to_tm(next->expires);
+ alarm.enabled = 1;
+ err = __rtc_set_alarm(rtc, &alarm);
+ if (err == -ETIME)
+ goto again;
+ }
+
+ mutex_unlock(&rtc->ops_lock);
+}
+
+
+/* rtc_timer_init - Initializes an rtc_timer
+ * @timer: timer to be intiialized
+ * @f: function pointer to be called when timer fires
+ * @data: private data passed to function pointer
+ *
+ * Kernel interface to initializing an rtc_timer.
+ */
+void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data)
+{
+ timerqueue_init(&timer->node);
+ timer->enabled = 0;
+ timer->task.func = f;
+ timer->task.private_data = data;
+}
+
+/* rtc_timer_start - Sets an rtc_timer to fire in the future
+ * @ rtc: rtc device to be used
+ * @ timer: timer being set
+ * @ expires: time at which to expire the timer
+ * @ period: period that the timer will recur
+ *
+ * Kernel interface to set an rtc_timer
+ */
+int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer,
+ ktime_t expires, ktime_t period)
+{
+ int ret = 0;
+ mutex_lock(&rtc->ops_lock);
+ if (timer->enabled)
+ rtc_timer_remove(rtc, timer);
+
+ timer->node.expires = expires;
+ timer->period = period;
+
+ timer->enabled = 1;
+ rtc_timer_enqueue(rtc, timer);
+
+ mutex_unlock(&rtc->ops_lock);
+ return ret;
+}
+
+/* rtc_timer_cancel - Stops an rtc_timer
+ * @ rtc: rtc device to be used
+ * @ timer: timer being set
+ *
+ * Kernel interface to cancel an rtc_timer
+ */
+int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer)
+{
+ int ret = 0;
+ mutex_lock(&rtc->ops_lock);
+ if (timer->enabled)
+ rtc_timer_remove(rtc, timer);
+ timer->enabled = 0;
+ mutex_unlock(&rtc->ops_lock);
+ return ret;
+}
+
+
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 5856167..7e6ce62 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -687,7 +687,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
#if defined(CONFIG_ATARI)
address_space = 64;
#elif defined(__i386__) || defined(__x86_64__) || defined(__arm__) \
- || defined(__sparc__) || defined(__mips__)
+ || defined(__sparc__) || defined(__mips__) \
+ || defined(__powerpc__)
address_space = 128;
#else
#warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes.
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 0cc0984..212b16e 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -46,105 +46,6 @@ static int rtc_dev_open(struct inode *inode, struct file *file)
return err;
}
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
-/*
- * Routine to poll RTC seconds field for change as often as possible,
- * after first RTC_UIE use timer to reduce polling
- */
-static void rtc_uie_task(struct work_struct *work)
-{
- struct rtc_device *rtc =
- container_of(work, struct rtc_device, uie_task);
- struct rtc_time tm;
- int num = 0;
- int err;
-
- err = rtc_read_time(rtc, &tm);
-
- spin_lock_irq(&rtc->irq_lock);
- if (rtc->stop_uie_polling || err) {
- rtc->uie_task_active = 0;
- } else if (rtc->oldsecs != tm.tm_sec) {
- num = (tm.tm_sec + 60 - rtc->oldsecs) % 60;
- rtc->oldsecs = tm.tm_sec;
- rtc->uie_timer.expires = jiffies + HZ - (HZ/10);
- rtc->uie_timer_active = 1;
- rtc->uie_task_active = 0;
- add_timer(&rtc->uie_timer);
- } else if (schedule_work(&rtc->uie_task) == 0) {
- rtc->uie_task_active = 0;
- }
- spin_unlock_irq(&rtc->irq_lock);
- if (num)
- rtc_update_irq(rtc, num, RTC_UF | RTC_IRQF);
-}
-static void rtc_uie_timer(unsigned long data)
-{
- struct rtc_device *rtc = (struct rtc_device *)data;
- unsigned long flags;
-
- spin_lock_irqsave(&rtc->irq_lock, flags);
- rtc->uie_timer_active = 0;
- rtc->uie_task_active = 1;
- if ((schedule_work(&rtc->uie_task) == 0))
- rtc->uie_task_active = 0;
- spin_unlock_irqrestore(&rtc->irq_lock, flags);
-}
-
-static int clear_uie(struct rtc_device *rtc)
-{
- spin_lock_irq(&rtc->irq_lock);
- if (rtc->uie_irq_active) {
- rtc->stop_uie_polling = 1;
- if (rtc->uie_timer_active) {
- spin_unlock_irq(&rtc->irq_lock);
- del_timer_sync(&rtc->uie_timer);
- spin_lock_irq(&rtc->irq_lock);
- rtc->uie_timer_active = 0;
- }
- if (rtc->uie_task_active) {
- spin_unlock_irq(&rtc->irq_lock);
- flush_work_sync(&rtc->uie_task);
- spin_lock_irq(&rtc->irq_lock);
- }
- rtc->uie_irq_active = 0;
- }
- spin_unlock_irq(&rtc->irq_lock);
- return 0;
-}
-
-static int set_uie(struct rtc_device *rtc)
-{
- struct rtc_time tm;
- int err;
-
- err = rtc_read_time(rtc, &tm);
- if (err)
- return err;
- spin_lock_irq(&rtc->irq_lock);
- if (!rtc->uie_irq_active) {
- rtc->uie_irq_active = 1;
- rtc->stop_uie_polling = 0;
- rtc->oldsecs = tm.tm_sec;
- rtc->uie_task_active = 1;
- if (schedule_work(&rtc->uie_task) == 0)
- rtc->uie_task_active = 0;
- }
- rtc->irq_data = 0;
- spin_unlock_irq(&rtc->irq_lock);
- return 0;
-}
-
-int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled)
-{
- if (enabled)
- return set_uie(rtc);
- else
- return clear_uie(rtc);
-}
-EXPORT_SYMBOL(rtc_dev_update_irq_enable_emul);
-
-#endif /* CONFIG_RTC_INTF_DEV_UIE_EMUL */
static ssize_t
rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
@@ -493,11 +394,6 @@ void rtc_dev_prepare(struct rtc_device *rtc)
rtc->dev.devt = MKDEV(MAJOR(rtc_devt), rtc->id);
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
- INIT_WORK(&rtc->uie_task, rtc_uie_task);
- setup_timer(&rtc->uie_timer, rtc_uie_timer, (unsigned long)rtc);
-#endif
-
cdev_init(&rtc->char_dev, &rtc_dev_fops);
rtc->char_dev.owner = rtc->owner;
}
diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c
index 773851f..075f170 100644
--- a/drivers/rtc/rtc-lib.c
+++ b/drivers/rtc/rtc-lib.c
@@ -117,4 +117,32 @@ int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
}
EXPORT_SYMBOL(rtc_tm_to_time);
+/*
+ * Convert rtc_time to ktime
+ */
+ktime_t rtc_tm_to_ktime(struct rtc_time tm)
+{
+ time_t time;
+ rtc_tm_to_time(&tm, &time);
+ return ktime_set(time, 0);
+}
+EXPORT_SYMBOL_GPL(rtc_tm_to_ktime);
+
+/*
+ * Convert ktime to rtc_time
+ */
+struct rtc_time rtc_ktime_to_tm(ktime_t kt)
+{
+ struct timespec ts;
+ struct rtc_time ret;
+
+ ts = ktime_to_timespec(kt);
+ /* Round up any ns */
+ if (ts.tv_nsec)
+ ts.tv_sec++;
+ rtc_time_to_tm(ts.tv_sec, &ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rtc_ktime_to_tm);
+
MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index dea7b5b..24b966d 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -469,7 +469,7 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason,
unsigned long rom_pl;
static int die_nmi_called;
- if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI)
+ if (ulReason != DIE_NMIUNKNOWN)
goto out;
if (!hpwdt_nmi_decoding)
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 7e05114..814ac4e 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -9,6 +9,8 @@ config 9P_FS
If unsure, say N.
+if 9P_FS
+
config 9P_FSCACHE
bool "Enable 9P client caching support (EXPERIMENTAL)"
depends on EXPERIMENTAL
@@ -20,7 +22,6 @@ config 9P_FSCACHE
config 9P_FS_POSIX_ACL
bool "9P POSIX Access Control Lists"
- depends on 9P_FS
select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
@@ -30,3 +31,5 @@ config 9P_FS_POSIX_ACL
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N
+
+endif
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index f8ba37e..ab8c127 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_9P_FS) := 9p.o
9p-objs := \
vfs_super.o \
vfs_inode.o \
+ vfs_inode_dotl.o \
vfs_addr.o \
vfs_file.o \
vfs_dir.o \
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 6e58c4c..02a2cf6 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -28,7 +28,7 @@ static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
{
ssize_t size;
void *value = NULL;
- struct posix_acl *acl = NULL;;
+ struct posix_acl *acl = NULL;
size = v9fs_fid_xattr_get(fid, name, NULL, 0);
if (size > 0) {
@@ -365,7 +365,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
case ACL_TYPE_DEFAULT:
name = POSIX_ACL_XATTR_DEFAULT;
if (!S_ISDIR(inode->i_mode)) {
- retval = -EINVAL;
+ retval = acl ? -EINVAL : 0;
goto err_out;
}
break;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index cb63968..c4b5d88 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -113,9 +113,27 @@ struct v9fs_session_info {
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
char *);
-void v9fs_session_close(struct v9fs_session_info *v9ses);
-void v9fs_session_cancel(struct v9fs_session_info *v9ses);
-void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
+extern void v9fs_session_close(struct v9fs_session_info *v9ses);
+extern void v9fs_session_cancel(struct v9fs_session_info *v9ses);
+extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
+extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nameidata);
+extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
+extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
+extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry);
+extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
+ void *p);
+extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
+
+extern const struct inode_operations v9fs_dir_inode_operations_dotl;
+extern const struct inode_operations v9fs_file_inode_operations_dotl;
+extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
+extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
/* other default globals */
#define V9FS_PORT 564
@@ -138,3 +156,21 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
{
return v9ses->flags & V9FS_PROTO_2000L;
}
+
+/**
+ * v9fs_inode_from_fid - Helper routine to populate an inode by
+ * issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
+static inline struct inode *
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ if (v9fs_proto_dotl(v9ses))
+ return v9fs_inode_dotl(v9ses, fid, sb);
+ else
+ return v9fs_inode(v9ses, fid, sb);
+}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 5978298..5076eeb 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -49,15 +49,8 @@
static const struct inode_operations v9fs_dir_inode_operations;
static const struct inode_operations v9fs_dir_inode_operations_dotu;
-static const struct inode_operations v9fs_dir_inode_operations_dotl;
static const struct inode_operations v9fs_file_inode_operations;
-static const struct inode_operations v9fs_file_inode_operations_dotl;
static const struct inode_operations v9fs_symlink_inode_operations;
-static const struct inode_operations v9fs_symlink_inode_operations_dotl;
-
-static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
- dev_t rdev);
/**
* unixmode2p9mode - convert unix mode bits to plan 9
@@ -251,41 +244,6 @@ void v9fs_destroy_inode(struct inode *inode)
#endif
/**
- * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
- * new file system object. This checks the S_ISGID to determine the owning
- * group of the new file system object.
- */
-
-static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
-{
- BUG_ON(dir_inode == NULL);
-
- if (dir_inode->i_mode & S_ISGID) {
- /* set_gid bit is set.*/
- return dir_inode->i_gid;
- }
- return current_fsgid();
-}
-
-/**
- * v9fs_dentry_from_dir_inode - helper function to get the dentry from
- * dir inode.
- *
- */
-
-static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
-{
- struct dentry *dentry;
-
- spin_lock(&inode->i_lock);
- /* Directory should have only one entry. */
- BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
- dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- spin_unlock(&inode->i_lock);
- return dentry;
-}
-
-/**
* v9fs_get_inode - helper function to setup an inode
* @sb: superblock
* @mode: mode to setup inode with
@@ -454,7 +412,7 @@ void v9fs_evict_inode(struct inode *inode)
#endif
}
-static struct inode *
+struct inode *
v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
struct super_block *sb)
{
@@ -489,60 +447,6 @@ error:
return ERR_PTR(err);
}
-static struct inode *
-v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
-{
- struct inode *ret = NULL;
- int err;
- struct p9_stat_dotl *st;
-
- st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
- if (IS_ERR(st))
- return ERR_CAST(st);
-
- ret = v9fs_get_inode(sb, st->st_mode);
- if (IS_ERR(ret)) {
- err = PTR_ERR(ret);
- goto error;
- }
-
- v9fs_stat2inode_dotl(st, ret);
- ret->i_ino = v9fs_qid2ino(&st->qid);
-#ifdef CONFIG_9P_FSCACHE
- v9fs_vcookie_set_qid(ret, &st->qid);
- v9fs_cache_inode_get_cookie(ret);
-#endif
- err = v9fs_get_acl(ret, fid);
- if (err) {
- iput(ret);
- goto error;
- }
- kfree(st);
- return ret;
-error:
- kfree(st);
- return ERR_PTR(err);
-}
-
-/**
- * v9fs_inode_from_fid - Helper routine to populate an inode by
- * issuing a attribute request
- * @v9ses: session information
- * @fid: fid to issue attribute request for
- * @sb: superblock on which to create inode
- *
- */
-static inline struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
-{
- if (v9fs_proto_dotl(v9ses))
- return v9fs_inode_dotl(v9ses, fid, sb);
- else
- return v9fs_inode(v9ses, fid, sb);
-}
-
/**
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
@@ -633,12 +537,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
goto error;
}
-
- if (v9ses->cache)
- d_set_d_op(dentry, &v9fs_cached_dentry_operations);
- else
- d_set_d_op(dentry, &v9fs_dentry_operations);
-
d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
@@ -657,144 +555,6 @@ error:
}
/**
- * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
- * @dir: directory inode that is being created
- * @dentry: dentry that is being deleted
- * @mode: create permissions
- * @nd: path information
- *
- */
-
-static int
-v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
- struct nameidata *nd)
-{
- int err = 0;
- char *name = NULL;
- gid_t gid;
- int flags;
- mode_t mode;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid = NULL;
- struct p9_fid *dfid, *ofid;
- struct file *filp;
- struct p9_qid qid;
- struct inode *inode;
- struct posix_acl *pacl = NULL, *dacl = NULL;
-
- v9ses = v9fs_inode2v9ses(dir);
- if (nd && nd->flags & LOOKUP_OPEN)
- flags = nd->intent.open.flags - 1;
- else {
- /*
- * create call without LOOKUP_OPEN is due
- * to mknod of regular files. So use mknod
- * operation.
- */
- return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
- }
-
- name = (char *) dentry->d_name.name;
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
- "mode:0x%x\n", name, flags, omode);
-
- dfid = v9fs_fid_lookup(dentry->d_parent);
- if (IS_ERR(dfid)) {
- err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
- return err;
- }
-
- /* clone a fid to use for creation */
- ofid = p9_client_walk(dfid, 0, NULL, 1);
- if (IS_ERR(ofid)) {
- err = PTR_ERR(ofid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
- return err;
- }
-
- gid = v9fs_get_fsgid_for_create(dir);
-
- mode = omode;
- /* Update mode based on ACL value */
- err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
- if (err) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "Failed to get acl values in creat %d\n", err);
- goto error;
- }
- err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
- if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "p9_client_open_dotl failed in creat %d\n",
- err);
- goto error;
- }
- /* instantiate inode and assign the unopened fid to the dentry */
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE ||
- (nd && nd->flags & LOOKUP_OPEN)) {
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
- fid = NULL;
- goto error;
- }
-
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
- goto error;
- }
- d_set_d_op(dentry, &v9fs_cached_dentry_operations);
- d_instantiate(dentry, inode);
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
- /* The fid would get clunked via a dput */
- fid = NULL;
- } else {
- /*
- * Not in cached mode. No need to populate
- * inode with stat. We need to get an inode
- * so that we can set the acl with dentry
- */
- inode = v9fs_get_inode(dir->i_sb, mode);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto error;
- }
- d_set_d_op(dentry, &v9fs_dentry_operations);
- d_instantiate(dentry, inode);
- }
- /* Now set the ACL based on the default value */
- v9fs_set_create_acl(dentry, dacl, pacl);
-
- /* if we are opening a file, assign the open fid to the file */
- if (nd && nd->flags & LOOKUP_OPEN) {
- filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
- if (IS_ERR(filp)) {
- p9_client_clunk(ofid);
- return PTR_ERR(filp);
- }
- filp->private_data = ofid;
- } else
- p9_client_clunk(ofid);
-
- return 0;
-
-error:
- if (ofid)
- p9_client_clunk(ofid);
- if (fid)
- p9_client_clunk(fid);
- return err;
-}
-
-/**
* v9fs_vfs_create - VFS hook to create files
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
@@ -884,107 +644,6 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return err;
}
-
-/**
- * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
- * @dir: inode that is being unlinked
- * @dentry: dentry that is being unlinked
- * @mode: mode for new directory
- *
- */
-
-static int v9fs_vfs_mkdir_dotl(struct inode *dir,
- struct dentry *dentry, int omode)
-{
- int err;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid = NULL, *dfid = NULL;
- gid_t gid;
- char *name;
- mode_t mode;
- struct inode *inode;
- struct p9_qid qid;
- struct dentry *dir_dentry;
- struct posix_acl *dacl = NULL, *pacl = NULL;
-
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
- err = 0;
- v9ses = v9fs_inode2v9ses(dir);
-
- omode |= S_IFDIR;
- if (dir->i_mode & S_ISGID)
- omode |= S_ISGID;
-
- dir_dentry = v9fs_dentry_from_dir_inode(dir);
- dfid = v9fs_fid_lookup(dir_dentry);
- if (IS_ERR(dfid)) {
- err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
- dfid = NULL;
- goto error;
- }
-
- gid = v9fs_get_fsgid_for_create(dir);
- mode = omode;
- /* Update mode based on ACL value */
- err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
- if (err) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "Failed to get acl values in mkdir %d\n", err);
- goto error;
- }
- name = (char *) dentry->d_name.name;
- err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
- if (err < 0)
- goto error;
-
- /* instantiate inode and assign the unopened fid to the dentry */
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
- fid = NULL;
- goto error;
- }
-
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
- goto error;
- }
- d_set_d_op(dentry, &v9fs_cached_dentry_operations);
- d_instantiate(dentry, inode);
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
- fid = NULL;
- } else {
- /*
- * Not in cached mode. No need to populate
- * inode with stat. We need to get an inode
- * so that we can set the acl with dentry
- */
- inode = v9fs_get_inode(dir->i_sb, mode);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto error;
- }
- d_set_d_op(dentry, &v9fs_dentry_operations);
- d_instantiate(dentry, inode);
- }
- /* Now set the ACL based on the default value */
- v9fs_set_create_acl(dentry, dacl, pacl);
-
-error:
- if (fid)
- p9_client_clunk(fid);
- return err;
-}
-
/**
* v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
* @dir: inode that is being walked from
@@ -993,7 +652,7 @@ error:
*
*/
-static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
+struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nameidata)
{
struct super_block *sb;
@@ -1063,7 +722,7 @@ error:
*
*/
-static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
+int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
{
return v9fs_remove(i, d, 0);
}
@@ -1075,7 +734,7 @@ static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
*
*/
-static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
+int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
{
return v9fs_remove(i, d, 1);
}
@@ -1089,7 +748,7 @@ static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
*
*/
-static int
+int
v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
@@ -1196,42 +855,6 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
-static int
-v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
-{
- int err;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid;
- struct p9_stat_dotl *st;
-
- P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
- err = -EPERM;
- v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- return simple_getattr(mnt, dentry, stat);
-
- fid = v9fs_fid_lookup(dentry);
- if (IS_ERR(fid))
- return PTR_ERR(fid);
-
- /* Ask for all the fields in stat structure. Server will return
- * whatever it supports
- */
-
- st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
- if (IS_ERR(st))
- return PTR_ERR(st);
-
- v9fs_stat2inode_dotl(st, dentry->d_inode);
- generic_fillattr(dentry->d_inode, stat);
- /* Change block size to what the server returned */
- stat->blksize = st->st_blksize;
-
- kfree(st);
- return 0;
-}
-
/**
* v9fs_vfs_setattr - set file metadata
* @dentry: file whose metadata to set
@@ -1291,64 +914,6 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
}
/**
- * v9fs_vfs_setattr_dotl - set file metadata
- * @dentry: file whose metadata to set
- * @iattr: metadata assignment structure
- *
- */
-
-int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
-{
- int retval;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid;
- struct p9_iattr_dotl p9attr;
-
- P9_DPRINTK(P9_DEBUG_VFS, "\n");
-
- retval = inode_change_ok(dentry->d_inode, iattr);
- if (retval)
- return retval;
-
- p9attr.valid = iattr->ia_valid;
- p9attr.mode = iattr->ia_mode;
- p9attr.uid = iattr->ia_uid;
- p9attr.gid = iattr->ia_gid;
- p9attr.size = iattr->ia_size;
- p9attr.atime_sec = iattr->ia_atime.tv_sec;
- p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
- p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
- p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
-
- retval = -EPERM;
- v9ses = v9fs_inode2v9ses(dentry->d_inode);
- fid = v9fs_fid_lookup(dentry);
- if (IS_ERR(fid))
- return PTR_ERR(fid);
-
- retval = p9_client_setattr(fid, &p9attr);
- if (retval < 0)
- return retval;
-
- if ((iattr->ia_valid & ATTR_SIZE) &&
- iattr->ia_size != i_size_read(dentry->d_inode)) {
- retval = vmtruncate(dentry->d_inode, iattr->ia_size);
- if (retval)
- return retval;
- }
-
- setattr_copy(dentry->d_inode, iattr);
- mark_inode_dirty(dentry->d_inode);
- if (iattr->ia_valid & ATTR_MODE) {
- /* We also want to update ACL when we update mode bits */
- retval = v9fs_acl_chmod(dentry);
- if (retval < 0)
- return retval;
- }
- return 0;
-}
-
-/**
* v9fs_stat2inode - populate an inode structure with mistat info
* @stat: Plan 9 metadata (mistat) structure
* @inode: inode to populate
@@ -1426,77 +991,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
}
/**
- * v9fs_stat2inode_dotl - populate an inode structure with stat info
- * @stat: stat structure
- * @inode: inode to populate
- * @sb: superblock of filesystem
- *
- */
-
-void
-v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
-{
-
- if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
- inode->i_atime.tv_sec = stat->st_atime_sec;
- inode->i_atime.tv_nsec = stat->st_atime_nsec;
- inode->i_mtime.tv_sec = stat->st_mtime_sec;
- inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
- inode->i_ctime.tv_sec = stat->st_ctime_sec;
- inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
- inode->i_uid = stat->st_uid;
- inode->i_gid = stat->st_gid;
- inode->i_nlink = stat->st_nlink;
- inode->i_mode = stat->st_mode;
- inode->i_rdev = new_decode_dev(stat->st_rdev);
-
- if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
- init_special_inode(inode, inode->i_mode, inode->i_rdev);
-
- i_size_write(inode, stat->st_size);
- inode->i_blocks = stat->st_blocks;
- } else {
- if (stat->st_result_mask & P9_STATS_ATIME) {
- inode->i_atime.tv_sec = stat->st_atime_sec;
- inode->i_atime.tv_nsec = stat->st_atime_nsec;
- }
- if (stat->st_result_mask & P9_STATS_MTIME) {
- inode->i_mtime.tv_sec = stat->st_mtime_sec;
- inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
- }
- if (stat->st_result_mask & P9_STATS_CTIME) {
- inode->i_ctime.tv_sec = stat->st_ctime_sec;
- inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
- }
- if (stat->st_result_mask & P9_STATS_UID)
- inode->i_uid = stat->st_uid;
- if (stat->st_result_mask & P9_STATS_GID)
- inode->i_gid = stat->st_gid;
- if (stat->st_result_mask & P9_STATS_NLINK)
- inode->i_nlink = stat->st_nlink;
- if (stat->st_result_mask & P9_STATS_MODE) {
- inode->i_mode = stat->st_mode;
- if ((S_ISBLK(inode->i_mode)) ||
- (S_ISCHR(inode->i_mode)))
- init_special_inode(inode, inode->i_mode,
- inode->i_rdev);
- }
- if (stat->st_result_mask & P9_STATS_RDEV)
- inode->i_rdev = new_decode_dev(stat->st_rdev);
- if (stat->st_result_mask & P9_STATS_SIZE)
- i_size_write(inode, stat->st_size);
- if (stat->st_result_mask & P9_STATS_BLOCKS)
- inode->i_blocks = stat->st_blocks;
- }
- if (stat->st_result_mask & P9_STATS_GEN)
- inode->i_generation = stat->st_gen;
-
- /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
- * because the inode structure does not have fields for them.
- */
-}
-
-/**
* v9fs_qid2ino - convert qid into inode number
* @qid: qid to hash
*
@@ -1602,7 +1096,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
*
*/
-static void
+void
v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
{
char *s = nd_get_link(nd);
@@ -1646,94 +1140,6 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
}
/**
- * v9fs_vfs_symlink_dotl - helper function to create symlinks
- * @dir: directory inode containing symlink
- * @dentry: dentry for symlink
- * @symname: symlink data
- *
- * See Also: 9P2000.L RFC for more information
- *
- */
-
-static int
-v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
- const char *symname)
-{
- struct v9fs_session_info *v9ses;
- struct p9_fid *dfid;
- struct p9_fid *fid = NULL;
- struct inode *inode;
- struct p9_qid qid;
- char *name;
- int err;
- gid_t gid;
-
- name = (char *) dentry->d_name.name;
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
- dir->i_ino, name, symname);
- v9ses = v9fs_inode2v9ses(dir);
-
- dfid = v9fs_fid_lookup(dentry->d_parent);
- if (IS_ERR(dfid)) {
- err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
- return err;
- }
-
- gid = v9fs_get_fsgid_for_create(dir);
-
- /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
- err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
-
- if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
- goto error;
- }
-
- if (v9ses->cache) {
- /* Now walk from the parent so we can get an unopened fid. */
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
- fid = NULL;
- goto error;
- }
-
- /* instantiate inode and assign the unopened fid to dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
- goto error;
- }
- d_set_d_op(dentry, &v9fs_cached_dentry_operations);
- d_instantiate(dentry, inode);
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
- fid = NULL;
- } else {
- /* Not in cached mode. No need to populate inode with stat */
- inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto error;
- }
- d_set_d_op(dentry, &v9fs_dentry_operations);
- d_instantiate(dentry, inode);
- }
-
-error:
- if (fid)
- p9_client_clunk(fid);
-
- return err;
-}
-
-/**
* v9fs_vfs_symlink - helper function to create symlinks
* @dir: directory inode containing symlink
* @dentry: dentry for symlink
@@ -1792,77 +1198,6 @@ clunk_fid:
}
/**
- * v9fs_vfs_link_dotl - create a hardlink for dotl
- * @old_dentry: dentry for file to link to
- * @dir: inode destination for new link
- * @dentry: dentry for link
- *
- */
-
-static int
-v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
- struct dentry *dentry)
-{
- int err;
- struct p9_fid *dfid, *oldfid;
- char *name;
- struct v9fs_session_info *v9ses;
- struct dentry *dir_dentry;
-
- P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
- dir->i_ino, old_dentry->d_name.name,
- dentry->d_name.name);
-
- v9ses = v9fs_inode2v9ses(dir);
- dir_dentry = v9fs_dentry_from_dir_inode(dir);
- dfid = v9fs_fid_lookup(dir_dentry);
- if (IS_ERR(dfid))
- return PTR_ERR(dfid);
-
- oldfid = v9fs_fid_lookup(old_dentry);
- if (IS_ERR(oldfid))
- return PTR_ERR(oldfid);
-
- name = (char *) dentry->d_name.name;
-
- err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
-
- if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
- return err;
- }
-
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- /* Get the latest stat info from server. */
- struct p9_fid *fid;
- struct p9_stat_dotl *st;
-
- fid = v9fs_fid_lookup(old_dentry);
- if (IS_ERR(fid))
- return PTR_ERR(fid);
-
- st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
- if (IS_ERR(st))
- return PTR_ERR(st);
-
- v9fs_stat2inode_dotl(st, old_dentry->d_inode);
-
- kfree(st);
- } else {
- /* Caching disabled. No need to get upto date stat info.
- * This dentry will be released immediately. So, just hold the
- * inode
- */
- ihold(old_dentry->d_inode);
- }
-
- d_set_d_op(dentry, old_dentry->d_op);
- d_instantiate(dentry, old_dentry->d_inode);
-
- return err;
-}
-
-/**
* v9fs_vfs_mknod - create a special file
* @dir: inode destination for new link
* @dentry: dentry for file
@@ -1907,160 +1242,6 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return retval;
}
-/**
- * v9fs_vfs_mknod_dotl - create a special file
- * @dir: inode destination for new link
- * @dentry: dentry for file
- * @mode: mode for creation
- * @rdev: device associated with special file
- *
- */
-static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
- dev_t rdev)
-{
- int err;
- char *name;
- mode_t mode;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid = NULL, *dfid = NULL;
- struct inode *inode;
- gid_t gid;
- struct p9_qid qid;
- struct dentry *dir_dentry;
- struct posix_acl *dacl = NULL, *pacl = NULL;
-
- P9_DPRINTK(P9_DEBUG_VFS,
- " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
- dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
-
- if (!new_valid_dev(rdev))
- return -EINVAL;
-
- v9ses = v9fs_inode2v9ses(dir);
- dir_dentry = v9fs_dentry_from_dir_inode(dir);
- dfid = v9fs_fid_lookup(dir_dentry);
- if (IS_ERR(dfid)) {
- err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
- dfid = NULL;
- goto error;
- }
-
- gid = v9fs_get_fsgid_for_create(dir);
- mode = omode;
- /* Update mode based on ACL value */
- err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
- if (err) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "Failed to get acl values in mknod %d\n", err);
- goto error;
- }
- name = (char *) dentry->d_name.name;
-
- err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
- if (err < 0)
- goto error;
-
- /* instantiate inode and assign the unopened fid to the dentry */
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
- fid = NULL;
- goto error;
- }
-
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
- goto error;
- }
- d_set_d_op(dentry, &v9fs_cached_dentry_operations);
- d_instantiate(dentry, inode);
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
- fid = NULL;
- } else {
- /*
- * Not in cached mode. No need to populate inode with stat.
- * socket syscall returns a fd, so we need instantiate
- */
- inode = v9fs_get_inode(dir->i_sb, mode);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto error;
- }
- d_set_d_op(dentry, &v9fs_dentry_operations);
- d_instantiate(dentry, inode);
- }
- /* Now set the ACL based on the default value */
- v9fs_set_create_acl(dentry, dacl, pacl);
-error:
- if (fid)
- p9_client_clunk(fid);
- return err;
-}
-
-static int
-v9fs_vfs_readlink_dotl(struct dentry *dentry, char *buffer, int buflen)
-{
- int retval;
- struct p9_fid *fid;
- char *target = NULL;
-
- P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
- retval = -EPERM;
- fid = v9fs_fid_lookup(dentry);
- if (IS_ERR(fid))
- return PTR_ERR(fid);
-
- retval = p9_client_readlink(fid, &target);
- if (retval < 0)
- return retval;
-
- strncpy(buffer, target, buflen);
- P9_DPRINTK(P9_DEBUG_VFS, "%s -> %s\n", dentry->d_name.name, buffer);
-
- retval = strnlen(buffer, buflen);
- return retval;
-}
-
-/**
- * v9fs_vfs_follow_link_dotl - follow a symlink path
- * @dentry: dentry for symlink
- * @nd: nameidata
- *
- */
-
-static void *
-v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
-{
- int len = 0;
- char *link = __getname();
-
- P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
-
- if (!link)
- link = ERR_PTR(-ENOMEM);
- else {
- len = v9fs_vfs_readlink_dotl(dentry, link, PATH_MAX);
- if (len < 0) {
- __putname(link);
- link = ERR_PTR(len);
- } else
- link[min(len, PATH_MAX-1)] = 0;
- }
- nd_set_link(nd, link);
-
- return NULL;
-}
-
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
@@ -2075,25 +1256,6 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.setattr = v9fs_vfs_setattr,
};
-static const struct inode_operations v9fs_dir_inode_operations_dotl = {
- .create = v9fs_vfs_create_dotl,
- .lookup = v9fs_vfs_lookup,
- .link = v9fs_vfs_link_dotl,
- .symlink = v9fs_vfs_symlink_dotl,
- .unlink = v9fs_vfs_unlink,
- .mkdir = v9fs_vfs_mkdir_dotl,
- .rmdir = v9fs_vfs_rmdir,
- .mknod = v9fs_vfs_mknod_dotl,
- .rename = v9fs_vfs_rename,
- .getattr = v9fs_vfs_getattr_dotl,
- .setattr = v9fs_vfs_setattr_dotl,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = v9fs_listxattr,
- .check_acl = v9fs_check_acl,
-};
-
static const struct inode_operations v9fs_dir_inode_operations = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
@@ -2111,16 +1273,6 @@ static const struct inode_operations v9fs_file_inode_operations = {
.setattr = v9fs_vfs_setattr,
};
-static const struct inode_operations v9fs_file_inode_operations_dotl = {
- .getattr = v9fs_vfs_getattr_dotl,
- .setattr = v9fs_vfs_setattr_dotl,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = v9fs_listxattr,
- .check_acl = v9fs_check_acl,
-};
-
static const struct inode_operations v9fs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = v9fs_vfs_follow_link,
@@ -2129,14 +1281,3 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
.setattr = v9fs_vfs_setattr,
};
-static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
- .readlink = v9fs_vfs_readlink_dotl,
- .follow_link = v9fs_vfs_follow_link_dotl,
- .put_link = v9fs_vfs_put_link,
- .getattr = v9fs_vfs_getattr_dotl,
- .setattr = v9fs_vfs_setattr_dotl,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = v9fs_listxattr,
-};
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
new file mode 100644
index 0000000..fe3ffa9
--- /dev/null
+++ b/fs/9p/vfs_inode_dotl.c
@@ -0,0 +1,824 @@
+/*
+ * linux/fs/9p/vfs_inode_dotl.c
+ *
+ * This file contains vfs inode ops for the 9P2000.L protocol.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+
+#include "v9fs.h"
+#include "v9fs_vfs.h"
+#include "fid.h"
+#include "cache.h"
+#include "xattr.h"
+#include "acl.h"
+
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+ dev_t rdev);
+
+/**
+ * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
+ * new file system object. This checks the S_ISGID to determine the owning
+ * group of the new file system object.
+ */
+
+static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
+{
+ BUG_ON(dir_inode == NULL);
+
+ if (dir_inode->i_mode & S_ISGID) {
+ /* set_gid bit is set.*/
+ return dir_inode->i_gid;
+ }
+ return current_fsgid();
+}
+
+/**
+ * v9fs_dentry_from_dir_inode - helper function to get the dentry from
+ * dir inode.
+ *
+ */
+
+static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
+{
+ struct dentry *dentry;
+
+ spin_lock(&inode->i_lock);
+ /* Directory should have only one entry. */
+ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
+ dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+ spin_unlock(&inode->i_lock);
+ return dentry;
+}
+
+struct inode *
+v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ struct inode *ret = NULL;
+ int err;
+ struct p9_stat_dotl *st;
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return ERR_CAST(st);
+
+ ret = v9fs_get_inode(sb, st->st_mode);
+ if (IS_ERR(ret)) {
+ err = PTR_ERR(ret);
+ goto error;
+ }
+
+ v9fs_stat2inode_dotl(st, ret);
+ ret->i_ino = v9fs_qid2ino(&st->qid);
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_vcookie_set_qid(ret, &st->qid);
+ v9fs_cache_inode_get_cookie(ret);
+#endif
+ err = v9fs_get_acl(ret, fid);
+ if (err) {
+ iput(ret);
+ goto error;
+ }
+ kfree(st);
+ return ret;
+error:
+ kfree(st);
+ return ERR_PTR(err);
+}
+
+/**
+ * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
+ * @dir: directory inode that is being created
+ * @dentry: dentry that is being deleted
+ * @mode: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
+ struct nameidata *nd)
+{
+ int err = 0;
+ char *name = NULL;
+ gid_t gid;
+ int flags;
+ mode_t mode;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL;
+ struct p9_fid *dfid, *ofid;
+ struct file *filp;
+ struct p9_qid qid;
+ struct inode *inode;
+ struct posix_acl *pacl = NULL, *dacl = NULL;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ if (nd && nd->flags & LOOKUP_OPEN)
+ flags = nd->intent.open.flags - 1;
+ else {
+ /*
+ * create call without LOOKUP_OPEN is due
+ * to mknod of regular files. So use mknod
+ * operation.
+ */
+ return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+ }
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
+ "mode:0x%x\n", name, flags, omode);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ /* clone a fid to use for creation */
+ ofid = p9_client_walk(dfid, 0, NULL, 1);
+ if (IS_ERR(ofid)) {
+ err = PTR_ERR(ofid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+
+ mode = omode;
+ /* Update mode based on ACL value */
+ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "Failed to get acl values in creat %d\n", err);
+ goto error;
+ }
+ err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_open_dotl failed in creat %d\n",
+ err);
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ fid = NULL;
+ goto error;
+ }
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+
+ /* Now set the ACL based on the default value */
+ v9fs_set_create_acl(dentry, dacl, pacl);
+
+ /* Since we are opening a file, assign the open fid to the file */
+ filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
+ if (IS_ERR(filp)) {
+ p9_client_clunk(ofid);
+ return PTR_ERR(filp);
+ }
+ filp->private_data = ofid;
+ return 0;
+
+error:
+ if (ofid)
+ p9_client_clunk(ofid);
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
+/**
+ * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
+ * @dir: inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir_dotl(struct inode *dir,
+ struct dentry *dentry, int omode)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ gid_t gid;
+ char *name;
+ mode_t mode;
+ struct inode *inode;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+ struct posix_acl *dacl = NULL, *pacl = NULL;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+ err = 0;
+ v9ses = v9fs_inode2v9ses(dir);
+
+ omode |= S_IFDIR;
+ if (dir->i_mode & S_ISGID)
+ omode |= S_ISGID;
+
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ mode = omode;
+ /* Update mode based on ACL value */
+ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "Failed to get acl values in mkdir %d\n", err);
+ goto error;
+ }
+ name = (char *) dentry->d_name.name;
+ err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /*
+ * Not in cached mode. No need to populate
+ * inode with stat. We need to get an inode
+ * so that we can set the acl with dentry
+ */
+ inode = v9fs_get_inode(dir->i_sb, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ }
+ /* Now set the ACL based on the default value */
+ v9fs_set_create_acl(dentry, dacl, pacl);
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
+static int
+v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
+ err = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+ return simple_getattr(mnt, dentry, stat);
+
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ /* Ask for all the fields in stat structure. Server will return
+ * whatever it supports
+ */
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, dentry->d_inode);
+ generic_fillattr(dentry->d_inode, stat);
+ /* Change block size to what the server returned */
+ stat->blksize = st->st_blksize;
+
+ kfree(st);
+ return 0;
+}
+
+/**
+ * v9fs_vfs_setattr_dotl - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+{
+ int retval;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_iattr_dotl p9attr;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+ retval = inode_change_ok(dentry->d_inode, iattr);
+ if (retval)
+ return retval;
+
+ p9attr.valid = iattr->ia_valid;
+ p9attr.mode = iattr->ia_mode;
+ p9attr.uid = iattr->ia_uid;
+ p9attr.gid = iattr->ia_gid;
+ p9attr.size = iattr->ia_size;
+ p9attr.atime_sec = iattr->ia_atime.tv_sec;
+ p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
+ p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
+ p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+
+ retval = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ retval = p9_client_setattr(fid, &p9attr);
+ if (retval < 0)
+ return retval;
+
+ if ((iattr->ia_valid & ATTR_SIZE) &&
+ iattr->ia_size != i_size_read(dentry->d_inode)) {
+ retval = vmtruncate(dentry->d_inode, iattr->ia_size);
+ if (retval)
+ return retval;
+ }
+
+ setattr_copy(dentry->d_inode, iattr);
+ mark_inode_dirty(dentry->d_inode);
+ if (iattr->ia_valid & ATTR_MODE) {
+ /* We also want to update ACL when we update mode bits */
+ retval = v9fs_acl_chmod(dentry);
+ if (retval < 0)
+ return retval;
+ }
+ return 0;
+}
+
+/**
+ * v9fs_stat2inode_dotl - populate an inode structure with stat info
+ * @stat: stat structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+{
+
+ if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ inode->i_uid = stat->st_uid;
+ inode->i_gid = stat->st_gid;
+ inode->i_nlink = stat->st_nlink;
+ inode->i_mode = stat->st_mode;
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+
+ if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+
+ i_size_write(inode, stat->st_size);
+ inode->i_blocks = stat->st_blocks;
+ } else {
+ if (stat->st_result_mask & P9_STATS_ATIME) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_MTIME) {
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_CTIME) {
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_UID)
+ inode->i_uid = stat->st_uid;
+ if (stat->st_result_mask & P9_STATS_GID)
+ inode->i_gid = stat->st_gid;
+ if (stat->st_result_mask & P9_STATS_NLINK)
+ inode->i_nlink = stat->st_nlink;
+ if (stat->st_result_mask & P9_STATS_MODE) {
+ inode->i_mode = stat->st_mode;
+ if ((S_ISBLK(inode->i_mode)) ||
+ (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode,
+ inode->i_rdev);
+ }
+ if (stat->st_result_mask & P9_STATS_RDEV)
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+ if (stat->st_result_mask & P9_STATS_SIZE)
+ i_size_write(inode, stat->st_size);
+ if (stat->st_result_mask & P9_STATS_BLOCKS)
+ inode->i_blocks = stat->st_blocks;
+ }
+ if (stat->st_result_mask & P9_STATS_GEN)
+ inode->i_generation = stat->st_gen;
+
+ /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
+ * because the inode structure does not have fields for them.
+ */
+}
+
+static int
+v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *dfid;
+ struct p9_fid *fid = NULL;
+ struct inode *inode;
+ struct p9_qid qid;
+ char *name;
+ int err;
+ gid_t gid;
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
+ dir->i_ino, name, symname);
+ v9ses = v9fs_inode2v9ses(dir);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+
+ /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
+ err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
+ goto error;
+ }
+
+ if (v9ses->cache) {
+ /* Now walk from the parent so we can get an unopened fid. */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to dentry */
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /* Not in cached mode. No need to populate inode with stat */
+ inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ }
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+
+ return err;
+}
+
+/**
+ * v9fs_vfs_link_dotl - create a hardlink for dotl
+ * @old_dentry: dentry for file to link to
+ * @dir: inode destination for new link
+ * @dentry: dentry for link
+ *
+ */
+
+static int
+v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ int err;
+ struct p9_fid *dfid, *oldfid;
+ char *name;
+ struct v9fs_session_info *v9ses;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
+ dir->i_ino, old_dentry->d_name.name,
+ dentry->d_name.name);
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid))
+ return PTR_ERR(dfid);
+
+ oldfid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(oldfid))
+ return PTR_ERR(oldfid);
+
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
+ return err;
+ }
+
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ /* Get the latest stat info from server. */
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ fid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, old_dentry->d_inode);
+
+ kfree(st);
+ } else {
+ /* Caching disabled. No need to get upto date stat info.
+ * This dentry will be released immediately. So, just hold the
+ * inode
+ */
+ ihold(old_dentry->d_inode);
+ }
+ d_instantiate(dentry, old_dentry->d_inode);
+
+ return err;
+}
+
+/**
+ * v9fs_vfs_mknod_dotl - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @rdev: device associated with special file
+ *
+ */
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+ dev_t rdev)
+{
+ int err;
+ char *name;
+ mode_t mode;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ struct inode *inode;
+ gid_t gid;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+ struct posix_acl *dacl = NULL, *pacl = NULL;
+
+ P9_DPRINTK(P9_DEBUG_VFS,
+ " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+ dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
+
+ if (!new_valid_dev(rdev))
+ return -EINVAL;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ mode = omode;
+ /* Update mode based on ACL value */
+ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "Failed to get acl values in mknod %d\n", err);
+ goto error;
+ }
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /*
+ * Not in cached mode. No need to populate inode with stat.
+ * socket syscall returns a fd, so we need instantiate
+ */
+ inode = v9fs_get_inode(dir->i_sb, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ d_instantiate(dentry, inode);
+ }
+ /* Now set the ACL based on the default value */
+ v9fs_set_create_acl(dentry, dacl, pacl);
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
+/**
+ * v9fs_vfs_follow_link_dotl - follow a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void *
+v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
+{
+ int retval;
+ struct p9_fid *fid;
+ char *link = __getname();
+ char *target;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
+
+ if (!link) {
+ link = ERR_PTR(-ENOMEM);
+ goto ndset;
+ }
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid)) {
+ __putname(link);
+ link = ERR_PTR(PTR_ERR(fid));
+ goto ndset;
+ }
+ retval = p9_client_readlink(fid, &target);
+ if (!retval) {
+ strcpy(link, target);
+ kfree(target);
+ goto ndset;
+ }
+ __putname(link);
+ link = ERR_PTR(retval);
+ndset:
+ nd_set_link(nd, link);
+ return NULL;
+}
+
+const struct inode_operations v9fs_dir_inode_operations_dotl = {
+ .create = v9fs_vfs_create_dotl,
+ .lookup = v9fs_vfs_lookup,
+ .link = v9fs_vfs_link_dotl,
+ .symlink = v9fs_vfs_symlink_dotl,
+ .unlink = v9fs_vfs_unlink,
+ .mkdir = v9fs_vfs_mkdir_dotl,
+ .rmdir = v9fs_vfs_rmdir,
+ .mknod = v9fs_vfs_mknod_dotl,
+ .rename = v9fs_vfs_rename,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
+ .check_acl = v9fs_check_acl,
+};
+
+const struct inode_operations v9fs_file_inode_operations_dotl = {
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
+ .check_acl = v9fs_check_acl,
+};
+
+const struct inode_operations v9fs_symlink_inode_operations_dotl = {
+ .readlink = generic_readlink,
+ .follow_link = v9fs_vfs_follow_link_dotl,
+ .put_link = v9fs_vfs_put_link,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
+};
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 43ec7df..d288773 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -133,7 +133,7 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
"p9_client_xattrcreate failed %d\n", retval);
goto error;
}
- msize = fid->clnt->msize;;
+ msize = fid->clnt->msize;
while (value_len) {
if (value_len > (msize - P9_IOHDRSZ))
write_count = msize - P9_IOHDRSZ;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2709b34..47cda41 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -28,21 +28,30 @@
typedef struct ext2_dir_entry_2 ext2_dirent;
+/*
+ * Tests against MAX_REC_LEN etc were put in place for 64k block
+ * sizes; if that is not possible on this arch, we can skip
+ * those tests and speed things up.
+ */
static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
{
unsigned len = le16_to_cpu(dlen);
+#if (PAGE_CACHE_SIZE >= 65536)
if (len == EXT2_MAX_REC_LEN)
return 1 << 16;
+#endif
return len;
}
static inline __le16 ext2_rec_len_to_disk(unsigned len)
{
+#if (PAGE_CACHE_SIZE >= 65536)
if (len == (1 << 16))
return cpu_to_le16(EXT2_MAX_REC_LEN);
else
BUG_ON(len > (1 << 16));
+#endif
return cpu_to_le16(len);
}
@@ -129,15 +138,15 @@ static void ext2_check_page(struct page *page, int quiet)
p = (ext2_dirent *)(kaddr + offs);
rec_len = ext2_rec_len_from_disk(p->rec_len);
- if (rec_len < EXT2_DIR_REC_LEN(1))
+ if (unlikely(rec_len < EXT2_DIR_REC_LEN(1)))
goto Eshort;
- if (rec_len & 3)
+ if (unlikely(rec_len & 3))
goto Ealign;
- if (rec_len < EXT2_DIR_REC_LEN(p->name_len))
+ if (unlikely(rec_len < EXT2_DIR_REC_LEN(p->name_len)))
goto Enamelen;
- if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
+ if (unlikely(((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)))
goto Espan;
- if (le32_to_cpu(p->inode) > max_inumber)
+ if (unlikely(le32_to_cpu(p->inode) > max_inumber))
goto Einumber;
}
if (offs != limit)
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index f8aecd2..2e1d834 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -67,7 +67,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
inode = NULL;
if (ino) {
inode = ext2_iget(dir->i_sb, ino);
- if (unlikely(IS_ERR(inode))) {
+ if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ESTALE) {
ext2_error(dir->i_sb, __func__,
"deleted inode referenced: %lu",
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index e0c6380..7731695 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -43,9 +43,10 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data);
static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
static int ext2_sync_fs(struct super_block *sb, int wait);
-void ext2_error (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext2_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
struct ext2_sb_info *sbi = EXT2_SB(sb);
struct ext2_super_block *es = sbi->s_es;
@@ -59,9 +60,13 @@ void ext2_error (struct super_block * sb, const char * function,
}
va_start(args, fmt);
- printk(KERN_CRIT "EXT2-fs (%s): error: %s: ", sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "EXT2-fs (%s): error: %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
if (test_opt(sb, ERRORS_PANIC))
@@ -76,12 +81,16 @@ void ext2_error (struct super_block * sb, const char * function,
void ext2_msg(struct super_block *sb, const char *prefix,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk("%sEXT2-fs (%s): ", prefix, sb->s_id);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk("%sEXT2-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+
va_end(args);
}
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index f84700b..c2e4dce 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -199,14 +199,6 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
goto found;
entry = next;
}
- /* Check the remaining name entries */
- while (!IS_LAST_ENTRY(entry)) {
- struct ext2_xattr_entry *next =
- EXT2_XATTR_NEXT(entry);
- if ((char *)next >= end)
- goto bad_block;
- entry = next;
- }
if (ext2_xattr_cache_insert(bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
@@ -355,7 +347,7 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
/*
* ext2_xattr_set()
*
- * Create, replace or remove an extended attribute for this inode. Buffer
+ * Create, replace or remove an extended attribute for this inode. Value
* is NULL to remove an existing extended attribute, and non-NULL to
* either replace an existing extended attribute, or create a new extended
* attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index b3db226..045995c 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -20,6 +20,7 @@
#include <linux/ext3_jbd.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
/*
* balloc.c contains the blocks allocation and deallocation routines
@@ -39,6 +40,21 @@
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+/*
+ * Calculate the block group number and offset, given a block number
+ */
+static void ext3_get_group_no_and_offset(struct super_block *sb,
+ ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp)
+{
+ struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+
+ blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
+ if (offsetp)
+ *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb);
+ if (blockgrpp)
+ *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb);
+}
+
/**
* ext3_get_group_desc() -- load group descriptor from disk
* @sb: super block
@@ -1885,3 +1901,253 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
return ext3_bg_num_gdb_meta(sb,group);
}
+
+/**
+ * ext3_trim_all_free -- function to trim all free space in alloc. group
+ * @sb: super block for file system
+ * @group: allocation group to trim
+ * @start: first group block to examine
+ * @max: last group block to examine
+ * @gdp: allocation group description structure
+ * @minblocks: minimum extent block count
+ *
+ * ext3_trim_all_free walks through group's block bitmap searching for free
+ * blocks. When the free block is found, it tries to allocate this block and
+ * consequent free block to get the biggest free extent possible, until it
+ * reaches any used block. Then issue a TRIM command on this extent and free
+ * the extent in the block bitmap. This is done until whole group is scanned.
+ */
+ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
+ ext3_grpblk_t start, ext3_grpblk_t max,
+ ext3_grpblk_t minblocks)
+{
+ handle_t *handle;
+ ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
+ ext3_fsblk_t discard_block;
+ struct ext3_sb_info *sbi;
+ struct buffer_head *gdp_bh, *bitmap_bh = NULL;
+ struct ext3_group_desc *gdp;
+ int err = 0, ret = 0;
+
+ /*
+ * We will update one block bitmap, and one group descriptor
+ */
+ handle = ext3_journal_start_sb(sb, 2);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ bitmap_bh = read_block_bitmap(sb, group);
+ if (!bitmap_bh) {
+ err = -EIO;
+ goto err_out;
+ }
+
+ BUFFER_TRACE(bitmap_bh, "getting undo access");
+ err = ext3_journal_get_undo_access(handle, bitmap_bh);
+ if (err)
+ goto err_out;
+
+ gdp = ext3_get_group_desc(sb, group, &gdp_bh);
+ if (!gdp) {
+ err = -EIO;
+ goto err_out;
+ }
+
+ BUFFER_TRACE(gdp_bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, gdp_bh);
+ if (err)
+ goto err_out;
+
+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+ sbi = EXT3_SB(sb);
+
+ /* Walk through the whole group */
+ while (start < max) {
+ start = bitmap_search_next_usable_block(start, bitmap_bh, max);
+ if (start < 0)
+ break;
+ next = start;
+
+ /*
+ * Allocate contiguous free extents by setting bits in the
+ * block bitmap
+ */
+ while (next < max
+ && claim_block(sb_bgl_lock(sbi, group),
+ next, bitmap_bh)) {
+ next++;
+ }
+
+ /* We did not claim any blocks */
+ if (next == start)
+ continue;
+
+ discard_block = (ext3_fsblk_t)start +
+ ext3_group_first_block_no(sb, group);
+
+ /* Update counters */
+ spin_lock(sb_bgl_lock(sbi, group));
+ le16_add_cpu(&gdp->bg_free_blocks_count, start - next);
+ spin_unlock(sb_bgl_lock(sbi, group));
+ percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
+
+ /* Do not issue a TRIM on extents smaller than minblocks */
+ if ((next - start) < minblocks)
+ goto free_extent;
+
+ /* Send the TRIM command down to the device */
+ err = sb_issue_discard(sb, discard_block, next - start,
+ GFP_NOFS, 0);
+ count += (next - start);
+free_extent:
+ freed = 0;
+
+ /*
+ * Clear bits in the bitmap
+ */
+ for (bit = start; bit < next; bit++) {
+ BUFFER_TRACE(bitmap_bh, "clear bit");
+ if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group),
+ bit, bitmap_bh->b_data)) {
+ ext3_error(sb, __func__,
+ "bit already cleared for block "E3FSBLK,
+ (unsigned long)bit);
+ BUFFER_TRACE(bitmap_bh, "bit already cleared");
+ } else {
+ freed++;
+ }
+ }
+
+ /* Update couters */
+ spin_lock(sb_bgl_lock(sbi, group));
+ le16_add_cpu(&gdp->bg_free_blocks_count, freed);
+ spin_unlock(sb_bgl_lock(sbi, group));
+ percpu_counter_add(&sbi->s_freeblocks_counter, freed);
+
+ start = next;
+ if (err < 0) {
+ if (err != -EOPNOTSUPP)
+ ext3_warning(sb, __func__, "Discard command "
+ "returned error %d\n", err);
+ break;
+ }
+
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ break;
+ }
+
+ cond_resched();
+
+ /* No more suitable extents */
+ if ((free_blocks - count) < minblocks)
+ break;
+ }
+
+ /* We dirtied the bitmap block */
+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
+ ret = ext3_journal_dirty_metadata(handle, bitmap_bh);
+ if (!err)
+ err = ret;
+
+ /* And the group descriptor block */
+ BUFFER_TRACE(gdp_bh, "dirtied group descriptor block");
+ ret = ext3_journal_dirty_metadata(handle, gdp_bh);
+ if (!err)
+ err = ret;
+
+ ext3_debug("trimmed %d blocks in the group %d\n",
+ count, group);
+
+err_out:
+ if (err)
+ count = err;
+ ext3_journal_stop(handle);
+ brelse(bitmap_bh);
+
+ return count;
+}
+
+/**
+ * ext3_trim_fs() -- trim ioctl handle function
+ * @sb: superblock for filesystem
+ * @start: First Byte to trim
+ * @len: number of Bytes to trim from start
+ * @minlen: minimum extent length in Bytes
+ *
+ * ext3_trim_fs goes through all allocation groups containing Bytes from
+ * start to start+len. For each such a group ext3_trim_all_free function
+ * is invoked to trim all free space.
+ */
+int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+ ext3_grpblk_t last_block, first_block, free_blocks;
+ unsigned long first_group, last_group;
+ unsigned long group, ngroups;
+ struct ext3_group_desc *gdp;
+ struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+ uint64_t start, len, minlen, trimmed;
+ ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
+ int ret = 0;
+
+ start = range->start >> sb->s_blocksize_bits;
+ len = range->len >> sb->s_blocksize_bits;
+ minlen = range->minlen >> sb->s_blocksize_bits;
+ trimmed = 0;
+
+ if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
+ return -EINVAL;
+ if (start >= max_blks)
+ goto out;
+ if (start < le32_to_cpu(es->s_first_data_block)) {
+ len -= le32_to_cpu(es->s_first_data_block) - start;
+ start = le32_to_cpu(es->s_first_data_block);
+ }
+ if (start + len > max_blks)
+ len = max_blks - start;
+
+ ngroups = EXT3_SB(sb)->s_groups_count;
+ smp_rmb();
+
+ /* Determine first and last group to examine based on start and len */
+ ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
+ &first_group, &first_block);
+ ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len),
+ &last_group, &last_block);
+ last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
+ last_block = EXT3_BLOCKS_PER_GROUP(sb);
+
+ if (first_group > last_group)
+ return -EINVAL;
+
+ for (group = first_group; group <= last_group; group++) {
+ gdp = ext3_get_group_desc(sb, group, NULL);
+ if (!gdp)
+ break;
+
+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+ if (free_blocks < minlen)
+ continue;
+
+ if (len >= EXT3_BLOCKS_PER_GROUP(sb))
+ len -= (EXT3_BLOCKS_PER_GROUP(sb) - first_block);
+ else
+ last_block = first_block + len;
+
+ ret = ext3_trim_all_free(sb, group, first_block,
+ last_block, minlen);
+ if (ret < 0)
+ break;
+
+ trimmed += ret;
+ first_block = 0;
+ }
+
+ if (ret >= 0)
+ ret = 0;
+
+out:
+ range->len = trimmed * sb->s_blocksize;
+
+ return ret;
+}
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index e2e72c3..34f0a07 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -69,25 +69,26 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
const char * error_msg = NULL;
const int rlen = ext3_rec_len_from_disk(de->rec_len);
- if (rlen < EXT3_DIR_REC_LEN(1))
+ if (unlikely(rlen < EXT3_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
- else if (rlen % 4 != 0)
+ else if (unlikely(rlen % 4 != 0))
error_msg = "rec_len % 4 != 0";
- else if (rlen < EXT3_DIR_REC_LEN(de->name_len))
+ else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+ else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)))
error_msg = "directory entry across blocks";
- else if (le32_to_cpu(de->inode) >
- le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
+ else if (unlikely(le32_to_cpu(de->inode) >
+ le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
- if (error_msg != NULL)
+ if (unlikely(error_msg != NULL))
ext3_error (dir->i_sb, function,
"bad entry in directory #%lu: %s - "
"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
dir->i_ino, error_msg, offset,
(unsigned long) le32_to_cpu(de->inode),
rlen, de->name_len);
+
return error_msg == NULL ? 1 : 0;
}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a958061..ae94f6d 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2145,13 +2145,15 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
if (try_to_extend_transaction(handle, inode)) {
if (bh) {
BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, bh);
+ if (ext3_journal_dirty_metadata(handle, bh))
+ return;
}
ext3_mark_inode_dirty(handle, inode);
truncate_restart_transaction(handle, inode);
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
- ext3_journal_get_write_access(handle, bh);
+ if (ext3_journal_get_write_access(handle, bh))
+ return;
}
}
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 8897481..fc080dd 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -276,7 +276,29 @@ group_add_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
+ case FITRIM: {
+ struct super_block *sb = inode->i_sb;
+ struct fstrim_range range;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&range, (struct fstrim_range *)arg,
+ sizeof(range)))
+ return -EFAULT;
+
+ ret = ext3_trim_fs(sb, &range);
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user((struct fstrim_range *)arg, &range,
+ sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+ }
default:
return -ENOTTY;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index bce9dce..b27ba71 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -858,6 +858,7 @@ static struct buffer_head *ext3_find_entry(struct inode *dir,
struct buffer_head * bh_use[NAMEI_RA_SIZE];
struct buffer_head * bh, *ret = NULL;
unsigned long start, block, b;
+ const u8 *name = entry->name;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
@@ -871,6 +872,16 @@ static struct buffer_head *ext3_find_entry(struct inode *dir,
namelen = entry->len;
if (namelen > EXT3_NAME_LEN)
return NULL;
+ if ((namelen <= 2) && (name[0] == '.') &&
+ (name[1] == '.' || name[1] == 0)) {
+ /*
+ * "." or ".." will only be in the first block
+ * NFS may look up ".."; "." should be handled by the VFS
+ */
+ block = start = 0;
+ nblocks = 1;
+ goto restart;
+ }
if (is_dx(dir)) {
bh = ext3_dx_find_entry(dir, entry, res_dir, &err);
/*
@@ -961,55 +972,35 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
int *err)
{
- struct super_block * sb;
+ struct super_block *sb = dir->i_sb;
struct dx_hash_info hinfo;
- u32 hash;
struct dx_frame frames[2], *frame;
- struct ext3_dir_entry_2 *de, *top;
struct buffer_head *bh;
unsigned long block;
int retval;
- int namelen = entry->len;
- const u8 *name = entry->name;
- sb = dir->i_sb;
- /* NFS may look up ".." - look at dx_root directory block */
- if (namelen > 2 || name[0] != '.'|| (namelen == 2 && name[1] != '.')) {
- if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
- return NULL;
- } else {
- frame = frames;
- frame->bh = NULL; /* for dx_release() */
- frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
- dx_set_block(frame->at, 0); /* dx_root block is 0 */
- }
- hash = hinfo.hash;
+ if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
+ return NULL;
do {
block = dx_get_block(frame->at);
if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
goto errout;
- de = (struct ext3_dir_entry_2 *) bh->b_data;
- top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
- EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de)) {
- int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
- + ((char *) de - bh->b_data);
-
- if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto errout;
- }
- if (ext3_match(namelen, name, de)) {
- *res_dir = de;
- dx_release(frames);
- return bh;
- }
+ retval = search_dirblock(bh, dir, entry,
+ block << EXT3_BLOCK_SIZE_BITS(sb),
+ res_dir);
+ if (retval == 1) {
+ dx_release(frames);
+ return bh;
}
- brelse (bh);
+ brelse(bh);
+ if (retval == -1) {
+ *err = ERR_BAD_DX_DIR;
+ goto errout;
+ }
+
/* Check to see if we should continue to search */
- retval = ext3_htree_next_block(dir, hash, frame,
+ retval = ext3_htree_next_block(dir, hinfo.hash, frame,
frames, NULL);
if (retval < 0) {
ext3_warning(sb, __func__,
@@ -1047,7 +1038,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
return ERR_PTR(-EIO);
}
inode = ext3_iget(dir->i_sb, ino);
- if (unlikely(IS_ERR(inode))) {
+ if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ESTALE) {
ext3_error(dir->i_sb, __func__,
"deleted inode referenced: %lu",
@@ -1607,7 +1598,9 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
}
- ext3_journal_dirty_metadata(handle, frames[0].bh);
+ err = ext3_journal_dirty_metadata(handle, frames[0].bh);
+ if (err)
+ goto journal_error;
}
de = do_split(handle, dir, &bh, frame, &hinfo, &err);
if (!de)
@@ -1644,8 +1637,13 @@ static int ext3_delete_entry (handle_t *handle,
if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
return -EIO;
if (de == de_del) {
+ int err;
+
BUFFER_TRACE(bh, "get_write_access");
- ext3_journal_get_write_access(handle, bh);
+ err = ext3_journal_get_write_access(handle, bh);
+ if (err)
+ goto journal_error;
+
if (pde)
pde->rec_len = ext3_rec_len_to_disk(
ext3_rec_len_from_disk(pde->rec_len) +
@@ -1654,7 +1652,12 @@ static int ext3_delete_entry (handle_t *handle,
de->inode = 0;
dir->i_version++;
BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, bh);
+ err = ext3_journal_dirty_metadata(handle, bh);
+ if (err) {
+journal_error:
+ ext3_std_error(dir->i_sb, err);
+ return err;
+ }
return 0;
}
i += ext3_rec_len_from_disk(de->rec_len);
@@ -1762,7 +1765,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
{
handle_t *handle;
struct inode * inode;
- struct buffer_head * dir_block;
+ struct buffer_head * dir_block = NULL;
struct ext3_dir_entry_2 * de;
int err, retries = 0;
@@ -1790,15 +1793,14 @@ retry:
inode->i_fop = &ext3_dir_operations;
inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
dir_block = ext3_bread (handle, inode, 0, 1, &err);
- if (!dir_block) {
- drop_nlink(inode); /* is this nlink == 0? */
- unlock_new_inode(inode);
- ext3_mark_inode_dirty(handle, inode);
- iput (inode);
- goto out_stop;
- }
+ if (!dir_block)
+ goto out_clear_inode;
+
BUFFER_TRACE(dir_block, "get_write_access");
- ext3_journal_get_write_access(handle, dir_block);
+ err = ext3_journal_get_write_access(handle, dir_block);
+ if (err)
+ goto out_clear_inode;
+
de = (struct ext3_dir_entry_2 *) dir_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
@@ -1814,11 +1816,16 @@ retry:
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, dir_block);
- brelse (dir_block);
- ext3_mark_inode_dirty(handle, inode);
- err = ext3_add_entry (handle, dentry, inode);
+ err = ext3_journal_dirty_metadata(handle, dir_block);
+ if (err)
+ goto out_clear_inode;
+
+ err = ext3_mark_inode_dirty(handle, inode);
+ if (!err)
+ err = ext3_add_entry (handle, dentry, inode);
+
if (err) {
+out_clear_inode:
inode->i_nlink = 0;
unlock_new_inode(inode);
ext3_mark_inode_dirty(handle, inode);
@@ -1827,10 +1834,14 @@ retry:
}
inc_nlink(dir);
ext3_update_dx_flag(dir);
- ext3_mark_inode_dirty(handle, dir);
+ err = ext3_mark_inode_dirty(handle, dir);
+ if (err)
+ goto out_clear_inode;
+
d_instantiate(dentry, inode);
unlock_new_inode(inode);
out_stop:
+ brelse(dir_block);
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
@@ -2353,7 +2364,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
goto end_rename;
} else {
BUFFER_TRACE(new_bh, "get write access");
- ext3_journal_get_write_access(handle, new_bh);
+ retval = ext3_journal_get_write_access(handle, new_bh);
+ if (retval)
+ goto journal_error;
new_de->inode = cpu_to_le32(old_inode->i_ino);
if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
EXT3_FEATURE_INCOMPAT_FILETYPE))
@@ -2362,7 +2375,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC;
ext3_mark_inode_dirty(handle, new_dir);
BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, new_bh);
+ retval = ext3_journal_dirty_metadata(handle, new_bh);
+ if (retval)
+ goto journal_error;
brelse(new_bh);
new_bh = NULL;
}
@@ -2411,10 +2426,17 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
ext3_update_dx_flag(old_dir);
if (dir_bh) {
BUFFER_TRACE(dir_bh, "get_write_access");
- ext3_journal_get_write_access(handle, dir_bh);
+ retval = ext3_journal_get_write_access(handle, dir_bh);
+ if (retval)
+ goto journal_error;
PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, dir_bh);
+ retval = ext3_journal_dirty_metadata(handle, dir_bh);
+ if (retval) {
+journal_error:
+ ext3_std_error(new_dir->i_sb, retval);
+ goto end_rename;
+ }
drop_nlink(old_dir);
if (new_inode) {
drop_nlink(new_inode);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index e746d30..108b142 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -249,7 +249,11 @@ static int setup_new_group_blocks(struct super_block *sb,
memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
unlock_buffer(gdb);
- ext3_journal_dirty_metadata(handle, gdb);
+ err = ext3_journal_dirty_metadata(handle, gdb);
+ if (err) {
+ brelse(gdb);
+ goto exit_bh;
+ }
ext3_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -269,7 +273,11 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(gdb);
goto exit_bh;
}
- ext3_journal_dirty_metadata(handle, gdb);
+ err = ext3_journal_dirty_metadata(handle, gdb);
+ if (err) {
+ brelse(gdb);
+ goto exit_bh;
+ }
ext3_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -295,7 +303,11 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(it);
goto exit_bh;
}
- ext3_journal_dirty_metadata(handle, it);
+ err = ext3_journal_dirty_metadata(handle, it);
+ if (err) {
+ brelse(it);
+ goto exit_bh;
+ }
brelse(it);
ext3_set_bit(bit, bh->b_data);
}
@@ -306,7 +318,9 @@ static int setup_new_group_blocks(struct super_block *sb,
mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext3_journal_dirty_metadata(handle, bh);
+ err = ext3_journal_dirty_metadata(handle, bh);
+ if (err)
+ goto exit_bh;
brelse(bh);
/* Mark unused entries in inode bitmap used */
@@ -319,7 +333,7 @@ static int setup_new_group_blocks(struct super_block *sb,
mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext3_journal_dirty_metadata(handle, bh);
+ err = ext3_journal_dirty_metadata(handle, bh);
exit_bh:
brelse(bh);
@@ -503,12 +517,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
* reserved inode, and will become GDT blocks (primary and backup).
*/
data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
- ext3_journal_dirty_metadata(handle, dind);
+ err = ext3_journal_dirty_metadata(handle, dind);
+ if (err)
+ goto exit_group_desc;
brelse(dind);
+ dind = NULL;
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
- ext3_mark_iloc_dirty(handle, inode, &iloc);
+ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ if (err)
+ goto exit_group_desc;
memset((*primary)->b_data, 0, sb->s_blocksize);
- ext3_journal_dirty_metadata(handle, *primary);
+ err = ext3_journal_dirty_metadata(handle, *primary);
+ if (err)
+ goto exit_group_desc;
o_group_desc = EXT3_SB(sb)->s_group_desc;
memcpy(n_group_desc, o_group_desc,
@@ -519,10 +540,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
kfree(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
- ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ if (err)
+ goto exit_inode;
return 0;
+exit_group_desc:
+ kfree(n_group_desc);
exit_inode:
//ext3_journal_release_buffer(handle, iloc.bh);
brelse(iloc.bh);
@@ -706,16 +731,20 @@ static void update_backups(struct super_block *sb,
}
ext3_debug("update metadata backup %#04lx\n",
(unsigned long)bh->b_blocknr);
- if ((err = ext3_journal_get_write_access(handle, bh)))
+ if ((err = ext3_journal_get_write_access(handle, bh))) {
+ brelse(bh);
break;
+ }
lock_buffer(bh);
memcpy(bh->b_data, data, size);
if (rest)
memset(bh->b_data + size, 0, rest);
set_buffer_uptodate(bh);
unlock_buffer(bh);
- ext3_journal_dirty_metadata(handle, bh);
+ err = ext3_journal_dirty_metadata(handle, bh);
brelse(bh);
+ if (err)
+ break;
}
if ((err2 = ext3_journal_stop(handle)) && !err)
err = err2;
@@ -922,7 +951,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
/* Update the global fs size fields */
sbi->s_groups_count++;
- ext3_journal_dirty_metadata(handle, primary);
+ err = ext3_journal_dirty_metadata(handle, primary);
+ if (err)
+ goto exit_journal;
/* Update the reserved block counts only once the new group is
* active. */
@@ -934,7 +965,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT3_INODES_PER_GROUP(sb));
- ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+ err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
exit_journal:
mutex_unlock(&sbi->s_resize_lock);
@@ -1064,8 +1095,14 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
goto exit_put;
}
es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
- ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
+ if (err) {
+ ext3_warning(sb, __func__,
+ "error %d on journal dirty metadata", err);
+ ext3_journal_stop(handle);
+ goto exit_put;
+ }
ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
o_blocks_count, o_blocks_count + add);
ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 77ce161..b7d0554 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -143,12 +143,16 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
void ext3_msg(struct super_block *sb, const char *prefix,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+
va_end(args);
}
@@ -195,15 +199,20 @@ static void ext3_handle_error(struct super_block *sb)
sb->s_id);
}
-void ext3_error (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext3_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
ext3_handle_error(sb);
@@ -274,15 +283,20 @@ void __ext3_std_error (struct super_block * sb, const char * function,
* case we take the easy way out and panic immediately.
*/
-void ext3_abort (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext3_abort(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
if (test_opt(sb, ERRORS_PANIC))
@@ -300,16 +314,20 @@ void ext3_abort (struct super_block * sb, const char * function,
journal_abort(EXT3_SB(sb)->s_journal, -EIO);
}
-void ext3_warning (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext3_warning(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
- sb->s_id, function);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
+ sb->s_id, function, &vaf);
+
va_end(args);
}
@@ -1848,13 +1866,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- if (generic_check_addressable(sb->s_blocksize_bits,
- le32_to_cpu(es->s_blocks_count))) {
+ err = generic_check_addressable(sb->s_blocksize_bits,
+ le32_to_cpu(es->s_blocks_count));
+ if (err) {
ext3_msg(sb, KERN_ERR,
"error: filesystem is too large to mount safely");
if (sizeof(sector_t) < 8)
ext3_msg(sb, KERN_ERR,
"error: CONFIG_LBDAF not enabled");
+ ret = err;
goto failed_mount;
}
@@ -2297,7 +2317,7 @@ static int ext3_load_journal(struct super_block *sb,
EXT3_SB(sb)->s_journal = journal;
ext3_clear_journal_err(sb, es);
- if (journal_devnum &&
+ if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index e69dc6d..32e6cc2 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -925,7 +925,7 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
/*
* ext3_xattr_set_handle()
*
- * Create, replace or remove an extended attribute for this inode. Buffer
+ * Create, replace or remove an extended attribute for this inode. Value
* is NULL to remove an existing extended attribute, and non-NULL to
* either replace an existing extended attribute, or create a new extended
* attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 14c3af2..adf96b8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -592,7 +592,8 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
* Account for the allocated meta blocks. We will never
* fail EDQUOT for metdata, but we do account for it.
*/
- if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
+ if (!(*errp) &&
+ ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ece76fb..164c560 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -60,9 +60,13 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
return (ext4_filetype_table[filetype]);
}
-
+/*
+ * Return 0 if the directory entry is OK, and 1 if there is a problem
+ *
+ * Note: this is the opposite of what ext2 and ext3 historically returned...
+ */
int __ext4_check_dir_entry(const char *function, unsigned int line,
- struct inode *dir,
+ struct inode *dir, struct file *filp,
struct ext4_dir_entry_2 *de,
struct buffer_head *bh,
unsigned int offset)
@@ -71,26 +75,37 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
const int rlen = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
- if (rlen < EXT4_DIR_REC_LEN(1))
+ if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
- else if (rlen % 4 != 0)
+ else if (unlikely(rlen % 4 != 0))
error_msg = "rec_len % 4 != 0";
- else if (rlen < EXT4_DIR_REC_LEN(de->name_len))
+ else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+ else if (unlikely(((char *) de - bh->b_data) + rlen >
+ dir->i_sb->s_blocksize))
error_msg = "directory entry across blocks";
- else if (le32_to_cpu(de->inode) >
- le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))
+ else if (unlikely(le32_to_cpu(de->inode) >
+ le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
+ else
+ return 0;
- if (error_msg != NULL)
- ext4_error_inode(dir, function, line, bh->b_blocknr,
- "bad entry in directory: %s - "
- "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset%bh->b_size), offset,
- le32_to_cpu(de->inode),
- rlen, de->name_len);
- return error_msg == NULL ? 1 : 0;
+ if (filp)
+ ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0,
+ "bad entry in directory: %s - offset=%u(%u), "
+ "inode=%u, rec_len=%d, name_len=%d",
+ error_msg, (unsigned) (offset%bh->b_size),
+ offset, le32_to_cpu(de->inode),
+ rlen, de->name_len);
+ else
+ ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0,
+ "bad entry in directory: %s - offset=%u(%u), "
+ "inode=%u, rec_len=%d, name_len=%d",
+ error_msg, (unsigned) (offset%bh->b_size),
+ offset, le32_to_cpu(de->inode),
+ rlen, de->name_len);
+
+ return 1;
}
static int ext4_readdir(struct file *filp,
@@ -152,8 +167,9 @@ static int ext4_readdir(struct file *filp,
*/
if (!bh) {
if (!dir_has_error) {
- EXT4_ERROR_INODE(inode, "directory "
- "contains a hole at offset %Lu",
+ EXT4_ERROR_FILE(filp, 0,
+ "directory contains a "
+ "hole at offset %llu",
(unsigned long long) filp->f_pos);
dir_has_error = 1;
}
@@ -194,8 +210,8 @@ revalidate:
while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
- if (!ext4_check_dir_entry(inode, de,
- bh, offset)) {
+ if (ext4_check_dir_entry(inode, filp, de,
+ bh, offset)) {
/*
* On error, skip the f_pos to the next block
*/
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 94ce3d7..bab2387 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -62,8 +62,8 @@
#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
-#define EXT4_ERROR_FILE(file, fmt, a...) \
- ext4_error_file(__func__, __LINE__, (file), (fmt), ## a)
+#define EXT4_ERROR_FILE(file, block, fmt, a...) \
+ ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
@@ -561,22 +561,6 @@ struct ext4_new_group_data {
#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
#endif
-
-/*
- * Mount options
- */
-struct ext4_mount_options {
- unsigned long s_mount_opt;
- uid_t s_resuid;
- gid_t s_resgid;
- unsigned long s_commit_interval;
- u32 s_min_batch_time, s_max_batch_time;
-#ifdef CONFIG_QUOTA
- int s_jquota_fmt;
- char *s_qf_names[MAXQUOTAS];
-#endif
-};
-
/* Max physical block we can addres w/o extents */
#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
@@ -709,6 +693,8 @@ do { \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
ext4_decode_extra_time(&(inode)->xtime, \
raw_inode->xtime ## _extra); \
+ else \
+ (inode)->xtime.tv_nsec = 0; \
} while (0)
#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
@@ -719,6 +705,8 @@ do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \
+ else \
+ (einode)->xtime.tv_nsec = 0; \
} while (0)
#define i_disk_version osd1.linux1.l_i_version
@@ -750,12 +738,13 @@ do { \
/*
* storage for cached extent
+ * If ec_len == 0, then the cache is invalid.
+ * If ec_start == 0, then the cache represents a gap (null mapping)
*/
struct ext4_ext_cache {
ext4_fsblk_t ec_start;
ext4_lblk_t ec_block;
__u32 ec_len; /* must be 32bit to return holes */
- __u32 ec_type;
};
/*
@@ -774,10 +763,12 @@ struct ext4_inode_info {
* near to their parent directory's inode.
*/
ext4_group_t i_block_group;
+ ext4_lblk_t i_dir_start_lookup;
+#if (BITS_PER_LONG < 64)
unsigned long i_state_flags; /* Dynamic state flags */
+#endif
unsigned long i_flags;
- ext4_lblk_t i_dir_start_lookup;
#ifdef CONFIG_EXT4_FS_XATTR
/*
* Extended attributes can be read independently of the main file
@@ -820,7 +811,7 @@ struct ext4_inode_info {
*/
struct rw_semaphore i_data_sem;
struct inode vfs_inode;
- struct jbd2_inode jinode;
+ struct jbd2_inode *jinode;
struct ext4_ext_cache i_cached_extent;
/*
@@ -840,14 +831,12 @@ struct ext4_inode_info {
unsigned int i_reserved_data_blocks;
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
- unsigned short i_delalloc_reserved_flag;
- sector_t i_da_metadata_calc_last_lblock;
+ ext4_lblk_t i_da_metadata_calc_last_lblock;
int i_da_metadata_calc_len;
/* on-disk additional length */
__u16 i_extra_isize;
- spinlock_t i_block_reservation_lock;
#ifdef CONFIG_QUOTA
/* quota space reservation, managed internally by quota code */
qsize_t i_reserved_quota;
@@ -856,9 +845,11 @@ struct ext4_inode_info {
/* completed IOs that might need unwritten extents handling */
struct list_head i_completed_io_list;
spinlock_t i_completed_io_lock;
+ atomic_t i_ioend_count; /* Number of outstanding io_end structs */
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;
- atomic_t i_ioend_count; /* Number of outstanding io_end structs */
+
+ spinlock_t i_block_reservation_lock;
/*
* Transactions that contain inode's metadata needed to complete
@@ -917,11 +908,20 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
-#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
-#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
+#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
+ ~EXT4_MOUNT_##opt
+#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
+ EXT4_MOUNT_##opt
#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
EXT4_MOUNT_##opt)
+#define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \
+ ~EXT4_MOUNT2_##opt
+#define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \
+ EXT4_MOUNT2_##opt
+#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
+ EXT4_MOUNT2_##opt)
+
#define ext4_set_bit ext2_set_bit
#define ext4_set_bit_atomic ext2_set_bit_atomic
#define ext4_clear_bit ext2_clear_bit
@@ -1087,6 +1087,7 @@ struct ext4_sb_info {
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
unsigned int s_mount_opt;
+ unsigned int s_mount_opt2;
unsigned int s_mount_flags;
ext4_fsblk_t s_sb_block;
uid_t s_resuid;
@@ -1237,24 +1238,39 @@ enum {
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */
+ EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
};
-#define EXT4_INODE_BIT_FNS(name, field) \
+#define EXT4_INODE_BIT_FNS(name, field, offset) \
static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
{ \
- return test_bit(bit, &EXT4_I(inode)->i_##field); \
+ return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
} \
static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
{ \
- set_bit(bit, &EXT4_I(inode)->i_##field); \
+ set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
} \
static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
{ \
- clear_bit(bit, &EXT4_I(inode)->i_##field); \
+ clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
}
-EXT4_INODE_BIT_FNS(flag, flags)
-EXT4_INODE_BIT_FNS(state, state_flags)
+EXT4_INODE_BIT_FNS(flag, flags, 0)
+#if (BITS_PER_LONG < 64)
+EXT4_INODE_BIT_FNS(state, state_flags, 0)
+
+static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
+{
+ (ei)->i_state_flags = 0;
+}
+#else
+EXT4_INODE_BIT_FNS(state, flags, 32)
+
+static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
+{
+ /* We depend on the fact that callers will set i_flags */
+}
+#endif
#else
/* Assume that user mode programs are passing in an ext4fs superblock, not
* a kernel struct super_block. This will allow us to call the feature-test
@@ -1642,10 +1658,12 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
/* dir.c */
extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
+ struct file *,
struct ext4_dir_entry_2 *,
struct buffer_head *, unsigned int);
-#define ext4_check_dir_entry(dir, de, bh, offset) \
- __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset))
+#define ext4_check_dir_entry(dir, filp, de, bh, offset) \
+ unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
+ (de), (bh), (offset)))
extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
__u32 minor_hash,
struct ext4_dir_entry_2 *dirent);
@@ -1653,6 +1671,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
/* fsync.c */
extern int ext4_sync_file(struct file *, int);
+extern int ext4_flush_completed_IO(struct inode *);
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
@@ -1752,8 +1771,8 @@ extern void ext4_error_inode(struct inode *, const char *, unsigned int,
ext4_fsblk_t, const char *, ...)
__attribute__ ((format (printf, 5, 6)));
extern void ext4_error_file(struct file *, const char *, unsigned int,
- const char *, ...)
- __attribute__ ((format (printf, 4, 5)));
+ ext4_fsblk_t, const char *, ...)
+ __attribute__ ((format (printf, 5, 6)));
extern void __ext4_std_error(struct super_block *, const char *,
unsigned int, int);
extern void __ext4_abort(struct super_block *, const char *, unsigned int,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 28ce70f..2e29abb 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -119,10 +119,6 @@ struct ext4_ext_path {
* structure for external API
*/
-#define EXT4_EXT_CACHE_NO 0
-#define EXT4_EXT_CACHE_GAP 1
-#define EXT4_EXT_CACHE_EXTENT 2
-
/*
* to be called by ext4_ext_walk_space()
* negative retcode - error
@@ -197,7 +193,7 @@ static inline unsigned short ext_depth(struct inode *inode)
static inline void
ext4_ext_invalidate_cache(struct inode *inode)
{
- EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
+ EXT4_I(inode)->i_cached_extent.ec_len = 0;
}
static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
@@ -278,7 +274,7 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
}
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
- sector_t lblocks);
+ ext4_lblk_t lblocks);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b0bd792..d8b992e 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -253,7 +253,7 @@ static inline int ext4_journal_force_commit(journal_t *journal)
static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
{
if (ext4_handle_valid(handle))
- return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+ return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
return 0;
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0554c48..e910720 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -117,11 +117,33 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
struct ext4_extent *ex;
depth = path->p_depth;
- /* try to predict block placement */
+ /*
+ * Try to predict block placement assuming that we are
+ * filling in a file which will eventually be
+ * non-sparse --- i.e., in the case of libbfd writing
+ * an ELF object sections out-of-order but in a way
+ * the eventually results in a contiguous object or
+ * executable file, or some database extending a table
+ * space file. However, this is actually somewhat
+ * non-ideal if we are writing a sparse file such as
+ * qemu or KVM writing a raw image file that is going
+ * to stay fairly sparse, since it will end up
+ * fragmenting the file system's free space. Maybe we
+ * should have some hueristics or some way to allow
+ * userspace to pass a hint to file system,
+ * especiially if the latter case turns out to be
+ * common.
+ */
ex = path[depth].p_ext;
- if (ex)
- return (ext4_ext_pblock(ex) +
- (block - le32_to_cpu(ex->ee_block)));
+ if (ex) {
+ ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
+ ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
+
+ if (block > ext_block)
+ return ext_pblk + (block - ext_block);
+ else
+ return ext_pblk - (ext_block - block);
+ }
/* it looks like index is empty;
* try to find starting block from index itself */
@@ -244,7 +266,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
* to allocate @blocks
* Worse case is one block per extent
*/
-int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
+int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
{
struct ext4_inode_info *ei = EXT4_I(inode);
int idxs, num = 0;
@@ -1872,12 +1894,10 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
cbex.ec_block = start;
cbex.ec_len = end - start;
cbex.ec_start = 0;
- cbex.ec_type = EXT4_EXT_CACHE_GAP;
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
cbex.ec_len = ext4_ext_get_actual_len(ex);
cbex.ec_start = ext4_ext_pblock(ex);
- cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
if (unlikely(cbex.ec_len == 0)) {
@@ -1917,13 +1937,12 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
static void
ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
- __u32 len, ext4_fsblk_t start, int type)
+ __u32 len, ext4_fsblk_t start)
{
struct ext4_ext_cache *cex;
BUG_ON(len == 0);
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
cex = &EXT4_I(inode)->i_cached_extent;
- cex->ec_type = type;
cex->ec_block = block;
cex->ec_len = len;
cex->ec_start = start;
@@ -1976,15 +1995,18 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
}
ext_debug(" -> %u:%lu\n", lblock, len);
- ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
+ ext4_ext_put_in_cache(inode, lblock, len, 0);
}
+/*
+ * Return 0 if cache is invalid; 1 if the cache is valid
+ */
static int
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
struct ext4_extent *ex)
{
struct ext4_ext_cache *cex;
- int ret = EXT4_EXT_CACHE_NO;
+ int ret = 0;
/*
* We borrow i_block_reservation_lock to protect i_cached_extent
@@ -1993,11 +2015,9 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
cex = &EXT4_I(inode)->i_cached_extent;
/* has cache valid data? */
- if (cex->ec_type == EXT4_EXT_CACHE_NO)
+ if (cex->ec_len == 0)
goto errout;
- BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
- cex->ec_type != EXT4_EXT_CACHE_EXTENT);
if (in_range(block, cex->ec_block, cex->ec_len)) {
ex->ee_block = cpu_to_le32(cex->ec_block);
ext4_ext_store_pblock(ex, cex->ec_start);
@@ -2005,7 +2025,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
ext_debug("%u cached by %u:%u:%llu\n",
block,
cex->ec_block, cex->ec_len, cex->ec_start);
- ret = cex->ec_type;
+ ret = 1;
}
errout:
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -3082,7 +3102,7 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
* Handle EOFBLOCKS_FL flag, clearing it if necessary
*/
static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map,
+ ext4_lblk_t lblk,
struct ext4_ext_path *path,
unsigned int len)
{
@@ -3112,7 +3132,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
* this turns out to be false, we can bail out from this
* function immediately.
*/
- if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
+ if (lblk + len < le32_to_cpu(last_ex->ee_block) +
ext4_ext_get_actual_len(last_ex))
return 0;
/*
@@ -3168,8 +3188,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
path);
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
- err = check_eofblocks_fl(handle, inode, map, path,
- map->m_len);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk,
+ path, map->m_len);
} else
err = ret;
goto out2;
@@ -3199,7 +3219,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
- err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
+ map->m_len);
if (err < 0)
goto out2;
}
@@ -3276,7 +3297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent_header *eh;
struct ext4_extent newex, *ex;
ext4_fsblk_t newblock;
- int err = 0, depth, ret, cache_type;
+ int err = 0, depth, ret;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3285,9 +3306,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_lblk, map->m_len, inode->i_ino);
/* check in cache */
- cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex);
- if (cache_type) {
- if (cache_type == EXT4_EXT_CACHE_GAP) {
+ if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
+ if (!newex.ee_start_lo && !newex.ee_start_hi) {
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
/*
* block isn't allocated yet and
@@ -3296,7 +3316,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
goto out2;
}
/* we should allocate requested block */
- } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
+ } else {
/* block is already allocated */
newblock = map->m_lblk
- le32_to_cpu(newex.ee_block)
@@ -3305,8 +3325,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
allocated = ext4_ext_get_actual_len(&newex) -
(map->m_lblk - le32_to_cpu(newex.ee_block));
goto out;
- } else {
- BUG();
}
}
@@ -3357,8 +3375,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/* Do not put uninitialized extent in the cache */
if (!ext4_ext_is_uninitialized(ex)) {
ext4_ext_put_in_cache(inode, ee_block,
- ee_len, ee_start,
- EXT4_EXT_CACHE_EXTENT);
+ ee_len, ee_start);
goto out;
}
ret = ext4_ext_handle_uninitialized_extents(handle,
@@ -3456,7 +3473,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_flags |= EXT4_MAP_UNINIT;
}
- err = check_eofblocks_fl(handle, inode, map, path, ar.len);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
if (err)
goto out2;
@@ -3490,8 +3507,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* when it is _not_ an uninitialized extent.
*/
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
- ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock,
- EXT4_EXT_CACHE_EXTENT);
+ ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
ext4_update_inode_fsync_trans(handle, inode, 1);
} else
ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -3519,6 +3535,12 @@ void ext4_ext_truncate(struct inode *inode)
int err = 0;
/*
+ * finish any pending end_io work so we won't run the risk of
+ * converting any truncated blocks to initialized later
+ */
+ ext4_flush_completed_IO(inode);
+
+ /*
* probably first extent we're gonna free will be last in block
*/
err = ext4_writepage_trans_blocks(inode);
@@ -3767,7 +3789,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
logical = (__u64)newex->ec_block << blksize_bits;
- if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
+ if (newex->ec_start == 0) {
pgoff_t offset;
struct page *page;
struct buffer_head *bh = NULL;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5a5c55d..bb003dc 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -104,6 +104,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
{
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_inode_info *ei = EXT4_I(inode);
struct vfsmount *mnt = filp->f_path.mnt;
struct path path;
char buf[64], *cp;
@@ -127,6 +128,27 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
ext4_mark_super_dirty(sb);
}
}
+ /*
+ * Set up the jbd2_inode if we are opening the inode for
+ * writing and the journal is present
+ */
+ if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
+ struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
+
+ spin_lock(&inode->i_lock);
+ if (!ei->jinode) {
+ if (!jinode) {
+ spin_unlock(&inode->i_lock);
+ return -ENOMEM;
+ }
+ ei->jinode = jinode;
+ jbd2_journal_init_jbd_inode(ei->jinode, inode);
+ jinode = NULL;
+ }
+ spin_unlock(&inode->i_lock);
+ if (unlikely(jinode != NULL))
+ jbd2_free_inode(jinode);
+ }
return dquot_file_open(inode, filp);
}
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index c1a7bc9..7829b28 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode)
* to written.
* The function return the number of pending IOs on success.
*/
-static int flush_completed_IO(struct inode *inode)
+extern int ext4_flush_completed_IO(struct inode *inode)
{
ext4_io_end_t *io;
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -169,7 +169,7 @@ int ext4_sync_file(struct file *file, int datasync)
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
- ret = flush_completed_IO(inode);
+ ret = ext4_flush_completed_IO(inode);
if (ret < 0)
return ret;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 1ce240a..eb9097a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1027,7 +1027,7 @@ got:
inode->i_generation = sbi->s_next_generation++;
spin_unlock(&sbi->s_next_gen_lock);
- ei->i_state_flags = 0;
+ ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
ext4_set_inode_state(inode, EXT4_STATE_NEW);
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e659597..e80fc51 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -40,6 +40,7 @@
#include <linux/workqueue.h>
#include <linux/kernel.h>
#include <linux/slab.h>
+#include <linux/ratelimit.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -54,10 +55,17 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
trace_ext4_begin_ordered_truncate(inode, new_size);
- return jbd2_journal_begin_ordered_truncate(
- EXT4_SB(inode->i_sb)->s_journal,
- &EXT4_I(inode)->jinode,
- new_size);
+ /*
+ * If jinode is zero, then we never opened the file for
+ * writing, so there's no need to call
+ * jbd2_journal_begin_ordered_truncate() since there's no
+ * outstanding writes we need to flush.
+ */
+ if (!EXT4_I(inode)->jinode)
+ return 0;
+ return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
+ EXT4_I(inode)->jinode,
+ new_size);
}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -552,7 +560,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
}
/**
- * ext4_blks_to_allocate: Look up the block map and count the number
+ * ext4_blks_to_allocate - Look up the block map and count the number
* of direct blocks need to be allocated for the given branch.
*
* @branch: chain of indirect blocks
@@ -591,13 +599,19 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
/**
* ext4_alloc_blocks: multiple allocate blocks needed for a branch
+ * @handle: handle for this transaction
+ * @inode: inode which needs allocated blocks
+ * @iblock: the logical block to start allocated at
+ * @goal: preferred physical block of allocation
* @indirect_blks: the number of blocks need to allocate for indirect
* blocks
- *
+ * @blks: number of desired blocks
* @new_blocks: on return it will store the new block numbers for
* the indirect blocks(if needed) and the first direct block,
- * @blks: on return it will store the total number of allocated
- * direct blocks
+ * @err: on return it will store the error code
+ *
+ * This function will return the number of blocks allocated as
+ * requested by the passed-in parameters.
*/
static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_fsblk_t goal,
@@ -711,9 +725,11 @@ failed_out:
/**
* ext4_alloc_branch - allocate and set up a chain of blocks.
+ * @handle: handle for this transaction
* @inode: owner
* @indirect_blks: number of allocated indirect blocks
* @blks: number of allocated direct blocks
+ * @goal: preferred place for allocation
* @offsets: offsets (in the blocks) to store the pointers to next.
* @branch: place to store the chain in.
*
@@ -826,6 +842,7 @@ failed:
/**
* ext4_splice_branch - splice the allocated branch onto inode.
+ * @handle: handle for this transaction
* @inode: owner
* @block: (logical) number of block we are adding
* @chain: chain of indirect blocks (with a missing link - see
@@ -1081,7 +1098,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
* Calculate the number of metadata blocks need to reserve
* to allocate a block located at @lblock
*/
-static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
+static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
{
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
return ext4_ext_calc_metadata_amount(inode, lblock);
@@ -1320,7 +1337,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* avoid double accounting
*/
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
- EXT4_I(inode)->i_delalloc_reserved_flag = 1;
+ ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
/*
* We need to check for EXT4 here because migrate
* could have changed the inode type in between
@@ -1350,7 +1367,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
ext4_da_update_reserve_space(inode, retval, 1);
}
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
- EXT4_I(inode)->i_delalloc_reserved_flag = 0;
+ ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -1878,7 +1895,7 @@ static int ext4_journalled_write_end(struct file *file,
/*
* Reserve a single block located at lblock
*/
-static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
+static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2239,7 +2256,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
* affects functions in many different parts of the allocation
* call path. This flag exists primarily because we don't
* want to change *many* call functions, so ext4_map_blocks()
- * will set the magic i_delalloc_reserved_flag once the
+ * will set the EXT4_STATE_DELALLOC_RESERVED flag once the
* inode's allocation semaphore is taken.
*
* If the blocks in questions were delalloc blocks, set
@@ -3720,8 +3737,7 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
retry:
io_end = ext4_init_io_end(inode, GFP_ATOMIC);
if (!io_end) {
- if (printk_ratelimit())
- printk(KERN_WARNING "%s: allocation fail\n", __func__);
+ pr_warning_ratelimited("%s: allocation fail\n", __func__);
schedule();
goto retry;
}
@@ -4045,7 +4061,7 @@ int ext4_block_truncate_page(handle_t *handle,
if (ext4_should_journal_data(inode)) {
err = ext4_handle_dirty_metadata(handle, inode, bh);
} else {
- if (ext4_should_order_data(inode))
+ if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
err = ext4_jbd2_file_inode(handle, inode);
mark_buffer_dirty(bh);
}
@@ -4169,6 +4185,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
{
__le32 *p;
int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
+ int err;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
flags |= EXT4_FREE_BLOCKS_METADATA;
@@ -4184,11 +4201,23 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
if (try_to_extend_transaction(handle, inode)) {
if (bh) {
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, inode, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
+ if (unlikely(err)) {
+ ext4_std_error(inode->i_sb, err);
+ return 1;
+ }
+ }
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(err)) {
+ ext4_std_error(inode->i_sb, err);
+ return 1;
+ }
+ err = ext4_truncate_restart_trans(handle, inode,
+ blocks_for_truncate(inode));
+ if (unlikely(err)) {
+ ext4_std_error(inode->i_sb, err);
+ return 1;
}
- ext4_mark_inode_dirty(handle, inode);
- ext4_truncate_restart_trans(handle, inode,
- blocks_for_truncate(inode));
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
ext4_journal_get_write_access(handle, bh);
@@ -4349,6 +4378,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
(__le32 *) bh->b_data,
(__le32 *) bh->b_data + addr_per_block,
depth);
+ brelse(bh);
/*
* Everything below this this pointer has been
@@ -4859,7 +4889,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
- ei->i_state_flags = 0;
+ ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
ei->i_dir_start_lookup = 0;
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
/* We now have enough fields to check if the inode was active or not.
@@ -5118,7 +5148,7 @@ static int ext4_do_update_inode(handle_t *handle,
if (ext4_inode_blocks_set(handle, raw_inode, ei))
goto out_brelse;
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
- raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+ raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD))
raw_inode->i_file_acl_high =
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5b4d4e3..851f49b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2608,18 +2608,12 @@ int ext4_mb_release(struct super_block *sb)
static inline int ext4_issue_discard(struct super_block *sb,
ext4_group_t block_group, ext4_grpblk_t block, int count)
{
- int ret;
ext4_fsblk_t discard_block;
discard_block = block + ext4_group_first_block_no(sb, block_group);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
- if (ret == -EOPNOTSUPP) {
- ext4_warning(sb, "discard not supported, disabling");
- clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
- }
- return ret;
+ return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
}
/*
@@ -2631,7 +2625,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
struct super_block *sb = journal->j_private;
struct ext4_buddy e4b;
struct ext4_group_info *db;
- int err, count = 0, count2 = 0;
+ int err, ret, count = 0, count2 = 0;
struct ext4_free_data *entry;
struct list_head *l, *ltmp;
@@ -2641,9 +2635,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->count, entry->group, entry);
- if (test_opt(sb, DISCARD))
- ext4_issue_discard(sb, entry->group,
+ if (test_opt(sb, DISCARD)) {
+ ret = ext4_issue_discard(sb, entry->group,
entry->start_blk, entry->count);
+ if (unlikely(ret == -EOPNOTSUPP)) {
+ ext4_warning(sb, "discard not supported, "
+ "disabling");
+ clear_opt(sb, DISCARD);
+ }
+ }
err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */
@@ -3881,19 +3881,6 @@ repeat:
}
}
-/*
- * finds all preallocated spaces and return blocks being freed to them
- * if preallocated space becomes full (no block is used from the space)
- * then the function frees space in buddy
- * XXX: at the moment, truncate (which is the only way to free blocks)
- * discards all preallocations
- */
-static void ext4_mb_return_to_preallocation(struct inode *inode,
- struct ext4_buddy *e4b,
- sector_t block, int count)
-{
- BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
-}
#ifdef CONFIG_EXT4_DEBUG
static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
{
@@ -4283,7 +4270,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
* EDQUOT check, as blocks and quotas have been already
* reserved when data being copied into pagecache.
*/
- if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+ if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
ar->flags |= EXT4_MB_DELALLOC_RESERVED;
else {
/* Without delayed allocation we need to verify
@@ -4380,7 +4367,8 @@ out:
if (inquota && ar->len < inquota)
dquot_free_block(ar->inode, inquota - ar->len);
if (!ar->len) {
- if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+ if (!ext4_test_inode_state(ar->inode,
+ EXT4_STATE_DELALLOC_RESERVED))
/* release all the reserved blocks if non delalloc */
percpu_counter_sub(&sbi->s_dirtyblocks_counter,
reserv_blks);
@@ -4626,7 +4614,11 @@ do_more:
* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed
*/
- new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+ new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+ if (!new_entry) {
+ err = -ENOMEM;
+ goto error_return;
+ }
new_entry->start_blk = bit;
new_entry->group = block_group;
new_entry->count = count;
@@ -4643,7 +4635,6 @@ do_more:
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count);
- ext4_mb_return_to_preallocation(inode, &e4b, block, count);
}
ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4718,8 +4709,6 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
ext4_unlock_group(sb, group);
ret = ext4_issue_discard(sb, group, start, count);
- if (ret)
- ext4_std_error(sb, ret);
ext4_lock_group(sb, group);
mb_free_blocks(NULL, e4b, start, ex.fe_len);
@@ -4819,6 +4808,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
ext4_group_t group, ngroups = ext4_get_groups_count(sb);
ext4_grpblk_t cnt = 0, first_block, last_block;
uint64_t start, len, minlen, trimmed;
+ ext4_fsblk_t first_data_blk =
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
int ret = 0;
start = range->start >> sb->s_blocksize_bits;
@@ -4828,6 +4819,10 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
return -EINVAL;
+ if (start < first_data_blk) {
+ len -= first_data_blk - start;
+ start = first_data_blk;
+ }
/* Determine first and last group to examine based on start and len */
ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
@@ -4851,7 +4846,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (len >= EXT4_BLOCKS_PER_GROUP(sb))
len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
else
- last_block = len;
+ last_block = first_block + len;
if (e4b.bd_info->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, &e4b, first_block,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 25f3a97..b0a126f 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -496,7 +496,7 @@ int ext4_ext_migrate(struct inode *inode)
goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
- S_IFREG, 0, goal);
+ S_IFREG, NULL, goal);
if (IS_ERR(tmp_inode)) {
retval = -ENOMEM;
ext4_journal_stop(handle);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index dc40e75..5485390 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -581,9 +581,9 @@ static int htree_dirblock_to_tree(struct file *dir_file,
dir->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(0));
for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
- if (!ext4_check_dir_entry(dir, de, bh,
- (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
- +((char *)de - bh->b_data))) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh,
+ (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
+ + ((char *)de - bh->b_data))) {
/* On error, skip the f_pos to the next block. */
dir_file->f_pos = (dir_file->f_pos |
(dir->i_sb->s_blocksize - 1)) + 1;
@@ -820,7 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh,
if ((char *) de + namelen <= dlimit &&
ext4_match (namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ext4_check_dir_entry(dir, de, bh, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
return -1;
*res_dir = de;
return 1;
@@ -1036,7 +1036,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
return ERR_PTR(-EIO);
}
inode = ext4_iget(dir->i_sb, ino);
- if (unlikely(IS_ERR(inode))) {
+ if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ESTALE) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
@@ -1269,7 +1269,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *)bh->b_data;
top = bh->b_data + blocksize - reclen;
while ((char *) de <= top) {
- if (!ext4_check_dir_entry(dir, de, bh, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
return -EIO;
if (ext4_match(namelen, name, de))
return -EEXIST;
@@ -1602,7 +1602,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
}
- ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
+ err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
+ if (err) {
+ ext4_std_error(inode->i_sb, err);
+ goto cleanup;
+ }
}
de = do_split(handle, dir, &bh, frame, &hinfo, &err);
if (!de)
@@ -1630,17 +1634,21 @@ static int ext4_delete_entry(handle_t *handle,
{
struct ext4_dir_entry_2 *de, *pde;
unsigned int blocksize = dir->i_sb->s_blocksize;
- int i;
+ int i, err;
i = 0;
pde = NULL;
de = (struct ext4_dir_entry_2 *) bh->b_data;
while (i < bh->b_size) {
- if (!ext4_check_dir_entry(dir, de, bh, i))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, i))
return -EIO;
if (de == de_del) {
BUFFER_TRACE(bh, "get_write_access");
- ext4_journal_get_write_access(handle, bh);
+ err = ext4_journal_get_write_access(handle, bh);
+ if (unlikely(err)) {
+ ext4_std_error(dir->i_sb, err);
+ return err;
+ }
if (pde)
pde->rec_len = ext4_rec_len_to_disk(
ext4_rec_len_from_disk(pde->rec_len,
@@ -1652,7 +1660,11 @@ static int ext4_delete_entry(handle_t *handle,
de->inode = 0;
dir->i_version++;
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, dir, bh);
+ err = ext4_handle_dirty_metadata(handle, dir, bh);
+ if (unlikely(err)) {
+ ext4_std_error(dir->i_sb, err);
+ return err;
+ }
return 0;
}
i += ext4_rec_len_from_disk(de->rec_len, blocksize);
@@ -1789,7 +1801,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
handle_t *handle;
struct inode *inode;
- struct buffer_head *dir_block;
+ struct buffer_head *dir_block = NULL;
struct ext4_dir_entry_2 *de;
unsigned int blocksize = dir->i_sb->s_blocksize;
int err, retries = 0;
@@ -1822,7 +1834,9 @@ retry:
if (!dir_block)
goto out_clear_inode;
BUFFER_TRACE(dir_block, "get_write_access");
- ext4_journal_get_write_access(handle, dir_block);
+ err = ext4_journal_get_write_access(handle, dir_block);
+ if (err)
+ goto out_clear_inode;
de = (struct ext4_dir_entry_2 *) dir_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
@@ -1839,10 +1853,12 @@ retry:
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, dir, dir_block);
- brelse(dir_block);
- ext4_mark_inode_dirty(handle, inode);
- err = ext4_add_entry(handle, dentry, inode);
+ err = ext4_handle_dirty_metadata(handle, dir, dir_block);
+ if (err)
+ goto out_clear_inode;
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (!err)
+ err = ext4_add_entry(handle, dentry, inode);
if (err) {
out_clear_inode:
clear_nlink(inode);
@@ -1853,10 +1869,13 @@ out_clear_inode:
}
ext4_inc_count(handle, dir);
ext4_update_dx_flag(dir);
- ext4_mark_inode_dirty(handle, dir);
+ err = ext4_mark_inode_dirty(handle, dir);
+ if (err)
+ goto out_clear_inode;
d_instantiate(dentry, inode);
unlock_new_inode(inode);
out_stop:
+ brelse(dir_block);
ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry;
@@ -1919,7 +1938,7 @@ static int empty_dir(struct inode *inode)
}
de = (struct ext4_dir_entry_2 *) bh->b_data;
}
- if (!ext4_check_dir_entry(inode, de, bh, offset)) {
+ if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) {
de = (struct ext4_dir_entry_2 *)(bh->b_data +
sb->s_blocksize);
offset = (offset | (sb->s_blocksize - 1)) + 1;
@@ -2407,7 +2426,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
ext4_current_time(new_dir);
ext4_mark_inode_dirty(handle, new_dir);
BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, new_dir, new_bh);
+ retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh);
+ if (unlikely(retval)) {
+ ext4_std_error(new_dir->i_sb, retval);
+ goto end_rename;
+ }
brelse(new_bh);
new_bh = NULL;
}
@@ -2459,7 +2482,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
- ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
+ retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
+ if (retval) {
+ ext4_std_error(old_dir->i_sb, retval);
+ goto end_rename;
+ }
ext4_dec_count(handle, old_dir);
if (new_inode) {
/* checked empty_dir above, can't have another parent,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index beacce1..7270dcf 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -44,7 +44,7 @@ int __init ext4_init_pageio(void)
if (io_page_cachep == NULL)
return -ENOMEM;
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
- if (io_page_cachep == NULL) {
+ if (io_end_cachep == NULL) {
kmem_cache_destroy(io_page_cachep);
return -ENOMEM;
}
@@ -158,11 +158,8 @@ static void ext4_end_io_work(struct work_struct *work)
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{
- ext4_io_end_t *io = NULL;
-
- io = kmem_cache_alloc(io_end_cachep, flags);
+ ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
if (io) {
- memset(io, 0, sizeof(*io));
atomic_inc(&EXT4_I(inode)->i_ioend_count);
io->inode = inode;
INIT_WORK(&io->work, ext4_end_io_work);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 981c847..3ecc6e4 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -220,7 +220,11 @@ static int setup_new_group_blocks(struct super_block *sb,
memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
unlock_buffer(gdb);
- ext4_handle_dirty_metadata(handle, NULL, gdb);
+ err = ext4_handle_dirty_metadata(handle, NULL, gdb);
+ if (unlikely(err)) {
+ brelse(gdb);
+ goto exit_bh;
+ }
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -258,7 +262,11 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
bh->b_data);
- ext4_handle_dirty_metadata(handle, NULL, bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (unlikely(err)) {
+ ext4_std_error(sb, err);
+ goto exit_bh;
+ }
brelse(bh);
/* Mark unused entries in inode bitmap used */
ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
@@ -270,7 +278,9 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
- ext4_handle_dirty_metadata(handle, NULL, bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (unlikely(err))
+ ext4_std_error(sb, err);
exit_bh:
brelse(bh);
@@ -422,17 +432,21 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
goto exit_dind;
}
- if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh)))
+ err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+ if (unlikely(err))
goto exit_dind;
- if ((err = ext4_journal_get_write_access(handle, *primary)))
+ err = ext4_journal_get_write_access(handle, *primary);
+ if (unlikely(err))
goto exit_sbh;
- if ((err = ext4_journal_get_write_access(handle, dind)))
- goto exit_primary;
+ err = ext4_journal_get_write_access(handle, dind);
+ if (unlikely(err))
+ ext4_std_error(sb, err);
/* ext4_reserve_inode_write() gets a reference on the iloc */
- if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (unlikely(err))
goto exit_dindj;
n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
@@ -454,12 +468,20 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
* reserved inode, and will become GDT blocks (primary and backup).
*/
data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
- ext4_handle_dirty_metadata(handle, NULL, dind);
- brelse(dind);
+ err = ext4_handle_dirty_metadata(handle, NULL, dind);
+ if (unlikely(err)) {
+ ext4_std_error(sb, err);
+ goto exit_inode;
+ }
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
ext4_mark_iloc_dirty(handle, inode, &iloc);
memset((*primary)->b_data, 0, sb->s_blocksize);
- ext4_handle_dirty_metadata(handle, NULL, *primary);
+ err = ext4_handle_dirty_metadata(handle, NULL, *primary);
+ if (unlikely(err)) {
+ ext4_std_error(sb, err);
+ goto exit_inode;
+ }
+ brelse(dind);
o_group_desc = EXT4_SB(sb)->s_group_desc;
memcpy(n_group_desc, o_group_desc,
@@ -470,19 +492,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
kfree(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
- ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
+ err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
+ if (err)
+ ext4_std_error(sb, err);
- return 0;
+ return err;
exit_inode:
/* ext4_journal_release_buffer(handle, iloc.bh); */
brelse(iloc.bh);
exit_dindj:
/* ext4_journal_release_buffer(handle, dind); */
-exit_primary:
- /* ext4_journal_release_buffer(handle, *primary); */
exit_sbh:
- /* ext4_journal_release_buffer(handle, *primary); */
+ /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
exit_dind:
brelse(dind);
exit_bh:
@@ -665,7 +687,9 @@ static void update_backups(struct super_block *sb,
memset(bh->b_data + size, 0, rest);
set_buffer_uptodate(bh);
unlock_buffer(bh);
- ext4_handle_dirty_metadata(handle, NULL, bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (unlikely(err))
+ ext4_std_error(sb, err);
brelse(bh);
}
if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -883,7 +907,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
/* Update the global fs size fields */
sbi->s_groups_count++;
- ext4_handle_dirty_metadata(handle, NULL, primary);
+ err = ext4_handle_dirty_metadata(handle, NULL, primary);
+ if (unlikely(err)) {
+ ext4_std_error(sb, err);
+ goto exit_journal;
+ }
/* Update the reserved block counts only once the new group is
* active. */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cd37f9d..29c80f6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -388,13 +388,14 @@ static void ext4_handle_error(struct super_block *sb)
void __ext4_error(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
- sb->s_id, function, line, current->comm);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
+ sb->s_id, function, line, current->comm, &vaf);
va_end(args);
ext4_handle_error(sb);
@@ -405,28 +406,31 @@ void ext4_error_inode(struct inode *inode, const char *function,
const char *fmt, ...)
{
va_list args;
+ struct va_format vaf;
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
es->s_last_error_block = cpu_to_le64(block);
save_error_info(inode->i_sb, function, line);
va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
inode->i_sb->s_id, function, line, inode->i_ino);
if (block)
- printk("block %llu: ", block);
- printk("comm %s: ", current->comm);
- vprintk(fmt, args);
- printk("\n");
+ printk(KERN_CONT "block %llu: ", block);
+ printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
va_end(args);
ext4_handle_error(inode->i_sb);
}
void ext4_error_file(struct file *file, const char *function,
- unsigned int line, const char *fmt, ...)
+ unsigned int line, ext4_fsblk_t block,
+ const char *fmt, ...)
{
va_list args;
+ struct va_format vaf;
struct ext4_super_block *es;
struct inode *inode = file->f_dentry->d_inode;
char pathname[80], *path;
@@ -434,17 +438,18 @@ void ext4_error_file(struct file *file, const char *function,
es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
save_error_info(inode->i_sb, function, line);
- va_start(args, fmt);
path = d_path(&(file->f_path), pathname, sizeof(pathname));
- if (!path)
+ if (IS_ERR(path))
path = "(unknown)";
printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu "
- "(comm %s path %s): ",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, path);
- vprintk(fmt, args);
- printk("\n");
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
+ inode->i_sb->s_id, function, line, inode->i_ino);
+ if (block)
+ printk(KERN_CONT "block %llu: ", block);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
va_end(args);
ext4_handle_error(inode->i_sb);
@@ -543,28 +548,29 @@ void __ext4_abort(struct super_block *sb, const char *function,
panic("EXT4-fs panic from previous error\n");
}
-void ext4_msg (struct super_block * sb, const char *prefix,
- const char *fmt, ...)
+void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
va_end(args);
}
void __ext4_warning(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
- sb->s_id, function, line);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
+ sb->s_id, function, line, &vaf);
va_end(args);
}
@@ -575,21 +581,25 @@ void __ext4_grp_locked_error(const char *function, unsigned int line,
__releases(bitlock)
__acquires(bitlock)
{
+ struct va_format vaf;
va_list args;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
es->s_last_error_ino = cpu_to_le32(ino);
es->s_last_error_block = cpu_to_le64(block);
__save_error_info(sb, function, line);
+
va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
sb->s_id, function, line, grp);
if (ino)
- printk("inode %lu: ", ino);
+ printk(KERN_CONT "inode %lu: ", ino);
if (block)
- printk("block %llu:", (unsigned long long) block);
- vprintk(fmt, args);
- printk("\n");
+ printk(KERN_CONT "block %llu:", (unsigned long long) block);
+ printk(KERN_CONT "%pV\n", &vaf);
va_end(args);
if (test_opt(sb, ERRORS_CONT)) {
@@ -808,21 +818,15 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
- /*
- * Note: We can be called before EXT4_SB(sb)->s_journal is set,
- * therefore it can be null here. Don't check it, just initialize
- * jinode.
- */
- jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
- ei->i_delalloc_reserved_flag = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
#endif
+ ei->jinode = NULL;
INIT_LIST_HEAD(&ei->i_completed_io_list);
spin_lock_init(&ei->i_completed_io_lock);
ei->cur_aio_dio = NULL;
@@ -898,9 +902,12 @@ void ext4_clear_inode(struct inode *inode)
end_writeback(inode);
dquot_drop(inode);
ext4_discard_preallocations(inode);
- if (EXT4_JOURNAL(inode))
- jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
- &EXT4_I(inode)->jinode);
+ if (EXT4_I(inode)->jinode) {
+ jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
+ EXT4_I(inode)->jinode);
+ jbd2_free_inode(EXT4_I(inode)->jinode);
+ EXT4_I(inode)->jinode = NULL;
+ }
}
static inline void ext4_show_quota_options(struct seq_file *seq,
@@ -1393,7 +1400,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
sbi->s_qf_names[qtype] = NULL;
return 0;
}
- set_opt(sbi->s_mount_opt, QUOTA);
+ set_opt(sb, QUOTA);
return 1;
}
@@ -1448,21 +1455,21 @@ static int parse_options(char *options, struct super_block *sb,
switch (token) {
case Opt_bsd_df:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- clear_opt(sbi->s_mount_opt, MINIX_DF);
+ clear_opt(sb, MINIX_DF);
break;
case Opt_minix_df:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- set_opt(sbi->s_mount_opt, MINIX_DF);
+ set_opt(sb, MINIX_DF);
break;
case Opt_grpid:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- set_opt(sbi->s_mount_opt, GRPID);
+ set_opt(sb, GRPID);
break;
case Opt_nogrpid:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- clear_opt(sbi->s_mount_opt, GRPID);
+ clear_opt(sb, GRPID);
break;
case Opt_resuid:
@@ -1480,38 +1487,38 @@ static int parse_options(char *options, struct super_block *sb,
/* *sb_block = match_int(&args[0]); */
break;
case Opt_err_panic:
- clear_opt(sbi->s_mount_opt, ERRORS_CONT);
- clear_opt(sbi->s_mount_opt, ERRORS_RO);
- set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ clear_opt(sb, ERRORS_CONT);
+ clear_opt(sb, ERRORS_RO);
+ set_opt(sb, ERRORS_PANIC);
break;
case Opt_err_ro:
- clear_opt(sbi->s_mount_opt, ERRORS_CONT);
- clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
- set_opt(sbi->s_mount_opt, ERRORS_RO);
+ clear_opt(sb, ERRORS_CONT);
+ clear_opt(sb, ERRORS_PANIC);
+ set_opt(sb, ERRORS_RO);
break;
case Opt_err_cont:
- clear_opt(sbi->s_mount_opt, ERRORS_RO);
- clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
- set_opt(sbi->s_mount_opt, ERRORS_CONT);
+ clear_opt(sb, ERRORS_RO);
+ clear_opt(sb, ERRORS_PANIC);
+ set_opt(sb, ERRORS_CONT);
break;
case Opt_nouid32:
- set_opt(sbi->s_mount_opt, NO_UID32);
+ set_opt(sb, NO_UID32);
break;
case Opt_debug:
- set_opt(sbi->s_mount_opt, DEBUG);
+ set_opt(sb, DEBUG);
break;
case Opt_oldalloc:
- set_opt(sbi->s_mount_opt, OLDALLOC);
+ set_opt(sb, OLDALLOC);
break;
case Opt_orlov:
- clear_opt(sbi->s_mount_opt, OLDALLOC);
+ clear_opt(sb, OLDALLOC);
break;
#ifdef CONFIG_EXT4_FS_XATTR
case Opt_user_xattr:
- set_opt(sbi->s_mount_opt, XATTR_USER);
+ set_opt(sb, XATTR_USER);
break;
case Opt_nouser_xattr:
- clear_opt(sbi->s_mount_opt, XATTR_USER);
+ clear_opt(sb, XATTR_USER);
break;
#else
case Opt_user_xattr:
@@ -1521,10 +1528,10 @@ static int parse_options(char *options, struct super_block *sb,
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
case Opt_acl:
- set_opt(sbi->s_mount_opt, POSIX_ACL);
+ set_opt(sb, POSIX_ACL);
break;
case Opt_noacl:
- clear_opt(sbi->s_mount_opt, POSIX_ACL);
+ clear_opt(sb, POSIX_ACL);
break;
#else
case Opt_acl:
@@ -1543,7 +1550,7 @@ static int parse_options(char *options, struct super_block *sb,
"Cannot specify journal on remount");
return 0;
}
- set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
+ set_opt(sb, UPDATE_JOURNAL);
break;
case Opt_journal_dev:
if (is_remount) {
@@ -1556,14 +1563,14 @@ static int parse_options(char *options, struct super_block *sb,
*journal_devnum = option;
break;
case Opt_journal_checksum:
- set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
+ set_opt(sb, JOURNAL_CHECKSUM);
break;
case Opt_journal_async_commit:
- set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
- set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
+ set_opt(sb, JOURNAL_ASYNC_COMMIT);
+ set_opt(sb, JOURNAL_CHECKSUM);
break;
case Opt_noload:
- set_opt(sbi->s_mount_opt, NOLOAD);
+ set_opt(sb, NOLOAD);
break;
case Opt_commit:
if (match_int(&args[0], &option))
@@ -1606,15 +1613,15 @@ static int parse_options(char *options, struct super_block *sb,
return 0;
}
} else {
- clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+ clear_opt(sb, DATA_FLAGS);
sbi->s_mount_opt |= data_opt;
}
break;
case Opt_data_err_abort:
- set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ set_opt(sb, DATA_ERR_ABORT);
break;
case Opt_data_err_ignore:
- clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ clear_opt(sb, DATA_ERR_ABORT);
break;
#ifdef CONFIG_QUOTA
case Opt_usrjquota:
@@ -1654,12 +1661,12 @@ set_qf_format:
break;
case Opt_quota:
case Opt_usrquota:
- set_opt(sbi->s_mount_opt, QUOTA);
- set_opt(sbi->s_mount_opt, USRQUOTA);
+ set_opt(sb, QUOTA);
+ set_opt(sb, USRQUOTA);
break;
case Opt_grpquota:
- set_opt(sbi->s_mount_opt, QUOTA);
- set_opt(sbi->s_mount_opt, GRPQUOTA);
+ set_opt(sb, QUOTA);
+ set_opt(sb, GRPQUOTA);
break;
case Opt_noquota:
if (sb_any_quota_loaded(sb)) {
@@ -1667,9 +1674,9 @@ set_qf_format:
"options when quota turned on");
return 0;
}
- clear_opt(sbi->s_mount_opt, QUOTA);
- clear_opt(sbi->s_mount_opt, USRQUOTA);
- clear_opt(sbi->s_mount_opt, GRPQUOTA);
+ clear_opt(sb, QUOTA);
+ clear_opt(sb, USRQUOTA);
+ clear_opt(sb, GRPQUOTA);
break;
#else
case Opt_quota:
@@ -1695,7 +1702,7 @@ set_qf_format:
sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
break;
case Opt_nobarrier:
- clear_opt(sbi->s_mount_opt, BARRIER);
+ clear_opt(sb, BARRIER);
break;
case Opt_barrier:
if (args[0].from) {
@@ -1704,9 +1711,9 @@ set_qf_format:
} else
option = 1; /* No argument, default to 1 */
if (option)
- set_opt(sbi->s_mount_opt, BARRIER);
+ set_opt(sb, BARRIER);
else
- clear_opt(sbi->s_mount_opt, BARRIER);
+ clear_opt(sb, BARRIER);
break;
case Opt_ignore:
break;
@@ -1730,17 +1737,17 @@ set_qf_format:
"Ignoring deprecated bh option");
break;
case Opt_i_version:
- set_opt(sbi->s_mount_opt, I_VERSION);
+ set_opt(sb, I_VERSION);
sb->s_flags |= MS_I_VERSION;
break;
case Opt_nodelalloc:
- clear_opt(sbi->s_mount_opt, DELALLOC);
+ clear_opt(sb, DELALLOC);
break;
case Opt_mblk_io_submit:
- set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
+ set_opt(sb, MBLK_IO_SUBMIT);
break;
case Opt_nomblk_io_submit:
- clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
+ clear_opt(sb, MBLK_IO_SUBMIT);
break;
case Opt_stripe:
if (match_int(&args[0], &option))
@@ -1750,13 +1757,13 @@ set_qf_format:
sbi->s_stripe = option;
break;
case Opt_delalloc:
- set_opt(sbi->s_mount_opt, DELALLOC);
+ set_opt(sb, DELALLOC);
break;
case Opt_block_validity:
- set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ set_opt(sb, BLOCK_VALIDITY);
break;
case Opt_noblock_validity:
- clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ clear_opt(sb, BLOCK_VALIDITY);
break;
case Opt_inode_readahead_blks:
if (match_int(&args[0], &option))
@@ -1780,7 +1787,7 @@ set_qf_format:
option);
break;
case Opt_noauto_da_alloc:
- set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ set_opt(sb, NO_AUTO_DA_ALLOC);
break;
case Opt_auto_da_alloc:
if (args[0].from) {
@@ -1789,24 +1796,24 @@ set_qf_format:
} else
option = 1; /* No argument, default to 1 */
if (option)
- clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
+ clear_opt(sb, NO_AUTO_DA_ALLOC);
else
- set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ set_opt(sb,NO_AUTO_DA_ALLOC);
break;
case Opt_discard:
- set_opt(sbi->s_mount_opt, DISCARD);
+ set_opt(sb, DISCARD);
break;
case Opt_nodiscard:
- clear_opt(sbi->s_mount_opt, DISCARD);
+ clear_opt(sb, DISCARD);
break;
case Opt_dioread_nolock:
- set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ set_opt(sb, DIOREAD_NOLOCK);
break;
case Opt_dioread_lock:
- clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ clear_opt(sb, DIOREAD_NOLOCK);
break;
case Opt_init_inode_table:
- set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ set_opt(sb, INIT_INODE_TABLE);
if (args[0].from) {
if (match_int(&args[0], &option))
return 0;
@@ -1817,7 +1824,7 @@ set_qf_format:
sbi->s_li_wait_mult = option;
break;
case Opt_noinit_inode_table:
- clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ clear_opt(sb, INIT_INODE_TABLE);
break;
default:
ext4_msg(sb, KERN_ERR,
@@ -1829,10 +1836,10 @@ set_qf_format:
#ifdef CONFIG_QUOTA
if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
- clear_opt(sbi->s_mount_opt, USRQUOTA);
+ clear_opt(sb, USRQUOTA);
if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
- clear_opt(sbi->s_mount_opt, GRPQUOTA);
+ clear_opt(sb, GRPQUOTA);
if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
ext4_msg(sb, KERN_ERR, "old and new quota "
@@ -1902,12 +1909,12 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ext4_commit_super(sb, 1);
if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
- "bpg=%lu, ipg=%lu, mo=%04x]\n",
+ "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
sb->s_blocksize,
sbi->s_groups_count,
EXT4_BLOCKS_PER_GROUP(sb),
EXT4_INODES_PER_GROUP(sb),
- sbi->s_mount_opt);
+ sbi->s_mount_opt, sbi->s_mount_opt2);
return res;
}
@@ -1937,14 +1944,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
size = flex_group_count * sizeof(struct flex_groups);
sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
if (sbi->s_flex_groups == NULL) {
- sbi->s_flex_groups = vmalloc(size);
- if (sbi->s_flex_groups)
- memset(sbi->s_flex_groups, 0, size);
- }
- if (sbi->s_flex_groups == NULL) {
- ext4_msg(sb, KERN_ERR, "not enough memory for "
- "%u flex groups", flex_group_count);
- goto failed;
+ sbi->s_flex_groups = vzalloc(size);
+ if (sbi->s_flex_groups == NULL) {
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %u flex groups",
+ flex_group_count);
+ goto failed;
+ }
}
for (i = 0; i < sbi->s_groups_count; i++) {
@@ -2923,7 +2929,7 @@ static int ext4_register_li_request(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
- int ret;
+ int ret = 0;
if (sbi->s_li_request != NULL)
return 0;
@@ -3078,41 +3084,41 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
- set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ set_opt(sb, INIT_INODE_TABLE);
if (def_mount_opts & EXT4_DEFM_DEBUG)
- set_opt(sbi->s_mount_opt, DEBUG);
+ set_opt(sb, DEBUG);
if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
"2.6.38");
- set_opt(sbi->s_mount_opt, GRPID);
+ set_opt(sb, GRPID);
}
if (def_mount_opts & EXT4_DEFM_UID16)
- set_opt(sbi->s_mount_opt, NO_UID32);
+ set_opt(sb, NO_UID32);
#ifdef CONFIG_EXT4_FS_XATTR
if (def_mount_opts & EXT4_DEFM_XATTR_USER)
- set_opt(sbi->s_mount_opt, XATTR_USER);
+ set_opt(sb, XATTR_USER);
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
if (def_mount_opts & EXT4_DEFM_ACL)
- set_opt(sbi->s_mount_opt, POSIX_ACL);
+ set_opt(sb, POSIX_ACL);
#endif
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
- set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+ set_opt(sb, JOURNAL_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
- set_opt(sbi->s_mount_opt, ORDERED_DATA);
+ set_opt(sb, ORDERED_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
- set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ set_opt(sb, WRITEBACK_DATA);
if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
- set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ set_opt(sb, ERRORS_PANIC);
else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
- set_opt(sbi->s_mount_opt, ERRORS_CONT);
+ set_opt(sb, ERRORS_CONT);
else
- set_opt(sbi->s_mount_opt, ERRORS_RO);
+ set_opt(sb, ERRORS_RO);
if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
- set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ set_opt(sb, BLOCK_VALIDITY);
if (def_mount_opts & EXT4_DEFM_DISCARD)
- set_opt(sbi->s_mount_opt, DISCARD);
+ set_opt(sb, DISCARD);
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@ -3121,7 +3127,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
- set_opt(sbi->s_mount_opt, BARRIER);
+ set_opt(sb, BARRIER);
/*
* enable delayed allocation by default
@@ -3129,7 +3135,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
if (!IS_EXT3_SB(sb) &&
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
- set_opt(sbi->s_mount_opt, DELALLOC);
+ set_opt(sb, DELALLOC);
if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
&journal_devnum, &journal_ioprio, NULL, 0)) {
@@ -3432,8 +3438,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"suppressed and not mounted read-only");
goto failed_mount_wq;
} else {
- clear_opt(sbi->s_mount_opt, DATA_FLAGS);
- set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ clear_opt(sb, DATA_FLAGS);
+ set_opt(sb, WRITEBACK_DATA);
sbi->s_journal = NULL;
needs_recovery = 0;
goto no_journal;
@@ -3471,9 +3477,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
if (jbd2_journal_check_available_features
(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
- set_opt(sbi->s_mount_opt, ORDERED_DATA);
+ set_opt(sb, ORDERED_DATA);
else
- set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+ set_opt(sb, JOURNAL_DATA);
break;
case EXT4_MOUNT_ORDERED_DATA:
@@ -3563,18 +3569,18 @@ no_journal:
(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
"requested data journaling mode");
- clear_opt(sbi->s_mount_opt, DELALLOC);
+ clear_opt(sb, DELALLOC);
}
if (test_opt(sb, DIOREAD_NOLOCK)) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - requested data journaling mode");
- clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ clear_opt(sb, DIOREAD_NOLOCK);
}
if (sb->s_blocksize < PAGE_SIZE) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - block size is too small");
- clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ clear_opt(sb, DIOREAD_NOLOCK);
}
}
@@ -4173,6 +4179,22 @@ static int ext4_unfreeze(struct super_block *sb)
return 0;
}
+/*
+ * Structure to save mount options for ext4_remount's benefit
+ */
+struct ext4_mount_options {
+ unsigned long s_mount_opt;
+ unsigned long s_mount_opt2;
+ uid_t s_resuid;
+ gid_t s_resgid;
+ unsigned long s_commit_interval;
+ u32 s_min_batch_time, s_max_batch_time;
+#ifdef CONFIG_QUOTA
+ int s_jquota_fmt;
+ char *s_qf_names[MAXQUOTAS];
+#endif
+};
+
static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
struct ext4_super_block *es;
@@ -4193,6 +4215,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
lock_super(sb);
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
+ old_opts.s_mount_opt2 = sbi->s_mount_opt2;
old_opts.s_resuid = sbi->s_resuid;
old_opts.s_resgid = sbi->s_resgid;
old_opts.s_commit_interval = sbi->s_commit_interval;
@@ -4346,6 +4369,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
restore_opts:
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
+ sbi->s_mount_opt2 = old_opts.s_mount_opt2;
sbi->s_resuid = old_opts.s_resuid;
sbi->s_resgid = old_opts.s_resgid;
sbi->s_commit_interval = old_opts.s_commit_interval;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fa4b899..fc32176 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -427,23 +427,23 @@ cleanup:
static int
ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
- int i_error, b_error;
+ int ret, ret2;
down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
- i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
- if (i_error < 0) {
- b_error = 0;
- } else {
- if (buffer) {
- buffer += i_error;
- buffer_size -= i_error;
- }
- b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
- if (b_error < 0)
- i_error = 0;
+ ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
+ if (ret < 0)
+ goto errout;
+ if (buffer) {
+ buffer += ret;
+ buffer_size -= ret;
}
+ ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
+ if (ret < 0)
+ goto errout;
+ ret += ret2;
+errout:
up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
- return i_error + b_error;
+ return ret;
}
/*
@@ -947,7 +947,7 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
/*
* ext4_xattr_set_handle()
*
- * Create, replace or remove an extended attribute for this inode. Buffer
+ * Create, replace or remove an extended attribute for this inode. Value
* is NULL to remove an existing extended attribute, and non-NULL to
* either replace an existing extended attribute, or create a new extended
* attribute. The flags XATTR_REPLACE and XATTR_CREATE
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f837ba9..9e46869 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -43,6 +43,7 @@
#include <linux/vmalloc.h>
#include <linux/backing-dev.h>
#include <linux/bitops.h>
+#include <linux/ratelimit.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
@@ -93,6 +94,7 @@ EXPORT_SYMBOL(jbd2_journal_file_inode);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
+EXPORT_SYMBOL(jbd2_inode_cache);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno);
@@ -827,7 +829,7 @@ static journal_t * journal_init_common (void)
journal = kzalloc(sizeof(*journal), GFP_KERNEL);
if (!journal)
- goto fail;
+ return NULL;
init_waitqueue_head(&journal->j_wait_transaction_locked);
init_waitqueue_head(&journal->j_wait_logspace);
@@ -852,14 +854,12 @@ static journal_t * journal_init_common (void)
err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
if (err) {
kfree(journal);
- goto fail;
+ return NULL;
}
spin_lock_init(&journal->j_history_lock);
return journal;
-fail:
- return NULL;
}
/* jbd2_journal_init_dev and jbd2_journal_init_inode:
@@ -1982,7 +1982,6 @@ static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
static struct journal_head *journal_alloc_journal_head(void)
{
struct journal_head *ret;
- static unsigned long last_warning;
#ifdef CONFIG_JBD2_DEBUG
atomic_inc(&nr_journal_heads);
@@ -1990,11 +1989,7 @@ static struct journal_head *journal_alloc_journal_head(void)
ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
if (!ret) {
jbd_debug(1, "out of memory for journal_head\n");
- if (time_after(jiffies, last_warning + 5*HZ)) {
- printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
- __func__);
- last_warning = jiffies;
- }
+ pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
while (!ret) {
yield();
ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
@@ -2292,17 +2287,19 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
#endif
-struct kmem_cache *jbd2_handle_cache;
+struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
static int __init journal_init_handle_cache(void)
{
- jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle",
- sizeof(handle_t),
- 0, /* offset */
- SLAB_TEMPORARY, /* flags */
- NULL); /* ctor */
+ jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
if (jbd2_handle_cache == NULL) {
- printk(KERN_EMERG "JBD: failed to create handle cache\n");
+ printk(KERN_EMERG "JBD2: failed to create handle cache\n");
+ return -ENOMEM;
+ }
+ jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
+ if (jbd2_inode_cache == NULL) {
+ printk(KERN_EMERG "JBD2: failed to create inode cache\n");
+ kmem_cache_destroy(jbd2_handle_cache);
return -ENOMEM;
}
return 0;
@@ -2312,6 +2309,9 @@ static void jbd2_journal_destroy_handle_cache(void)
{
if (jbd2_handle_cache)
kmem_cache_destroy(jbd2_handle_cache);
+ if (jbd2_inode_cache)
+ kmem_cache_destroy(jbd2_inode_cache);
+
}
/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 2bc4d5f..1cad869 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -299,10 +299,10 @@ int jbd2_journal_skip_recovery(journal_t *journal)
#ifdef CONFIG_JBD2_DEBUG
int dropped = info.end_transaction -
be32_to_cpu(journal->j_superblock->s_sequence);
-#endif
jbd_debug(1,
"JBD: ignoring %d transaction%s from the journal.\n",
dropped, (dropped == 1) ? "" : "s");
+#endif
journal->j_transaction_sequence = ++info.end_transaction;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6bf0a24..3948932 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -340,9 +340,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
jbd2_free_handle(handle);
current->journal_info = NULL;
handle = ERR_PTR(err);
- goto out;
}
-out:
return handle;
}
EXPORT_SYMBOL(jbd2__journal_start);
@@ -589,7 +587,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
transaction = handle->h_transaction;
journal = transaction->t_journal;
- jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
+ jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
JBUFFER_TRACE(jh, "entry");
repeat:
@@ -774,7 +772,7 @@ done:
J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
"Possible IO failure.\n");
page = jh2bh(jh)->b_page;
- offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
+ offset = offset_in_page(jh2bh(jh)->b_data);
source = kmap_atomic(page, KM_USER0);
/* Fire data frozen trigger just before we copy the data */
jbd2_buffer_frozen_trigger(jh, source + offset,
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 97f6073..ca58d64 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -4,7 +4,7 @@
obj-$(CONFIG_LOCKD) += lockd.o
-lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \
- svcproc.o svcsubs.o mon.o xdr.o grace.o
-lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o
+lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
+ svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o
+lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
lockd-objs := $(lockd-objs-y)
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
new file mode 100644
index 0000000..f848b52
--- /dev/null
+++ b/fs/lockd/clnt4xdr.c
@@ -0,0 +1,605 @@
+/*
+ * linux/fs/lockd/clnt4xdr.c
+ *
+ * XDR functions to encode/decode NLM version 4 RPC arguments and results.
+ *
+ * NLM client-side only.
+ *
+ * Copyright (C) 2010, Oracle. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/lockd/lockd.h>
+
+#define NLMDBG_FACILITY NLMDBG_XDR
+
+#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
+# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
+#endif
+
+#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
+# error "NLM host name cannot be larger than NLM's maximum string length!"
+#endif
+
+/*
+ * Declare the space requirements for NLM arguments and replies as
+ * number of 32bit-words
+ */
+#define NLM4_void_sz (0)
+#define NLM4_cookie_sz (1+(NLM_MAXCOOKIELEN>>2))
+#define NLM4_caller_sz (1+(NLMCLNT_OHSIZE>>2))
+#define NLM4_owner_sz (1+(NLMCLNT_OHSIZE>>2))
+#define NLM4_fhandle_sz (1+(NFS3_FHSIZE>>2))
+#define NLM4_lock_sz (5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz)
+#define NLM4_holder_sz (6+NLM4_owner_sz)
+
+#define NLM4_testargs_sz (NLM4_cookie_sz+1+NLM4_lock_sz)
+#define NLM4_lockargs_sz (NLM4_cookie_sz+4+NLM4_lock_sz)
+#define NLM4_cancargs_sz (NLM4_cookie_sz+2+NLM4_lock_sz)
+#define NLM4_unlockargs_sz (NLM4_cookie_sz+NLM4_lock_sz)
+
+#define NLM4_testres_sz (NLM4_cookie_sz+1+NLM4_holder_sz)
+#define NLM4_res_sz (NLM4_cookie_sz+1)
+#define NLM4_norep_sz (0)
+
+
+static s64 loff_t_to_s64(loff_t offset)
+{
+ s64 res;
+
+ if (offset >= NLM4_OFFSET_MAX)
+ res = NLM4_OFFSET_MAX;
+ else if (offset <= -NLM4_OFFSET_MAX)
+ res = -NLM4_OFFSET_MAX;
+ else
+ res = offset;
+ return res;
+}
+
+static void nlm4_compute_offsets(const struct nlm_lock *lock,
+ u64 *l_offset, u64 *l_len)
+{
+ const struct file_lock *fl = &lock->fl;
+
+ BUG_ON(fl->fl_start > NLM4_OFFSET_MAX);
+ BUG_ON(fl->fl_end > NLM4_OFFSET_MAX &&
+ fl->fl_end != OFFSET_MAX);
+
+ *l_offset = loff_t_to_s64(fl->fl_start);
+ if (fl->fl_end == OFFSET_MAX)
+ *l_len = 0;
+ else
+ *l_len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
+}
+
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+ dprintk("lockd: %s prematurely hit the end of our receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
+
+
+/*
+ * Encode/decode NLMv4 basic data types
+ *
+ * Basic NLMv4 data types are defined in Appendix II, section 6.1.4
+ * of RFC 1813: "NFS Version 3 Protocol Specification" and in Chapter
+ * 10 of X/Open's "Protocols for Interworking: XNFS, Version 3W".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions. For run-time efficiency, some data types are encoded
+ * or decoded inline.
+ */
+
+static void encode_bool(struct xdr_stream *xdr, const int value)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = value ? xdr_one : xdr_zero;
+}
+
+static void encode_int32(struct xdr_stream *xdr, const s32 value)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(value);
+}
+
+/*
+ * typedef opaque netobj<MAXNETOBJ_SZ>
+ */
+static void encode_netobj(struct xdr_stream *xdr,
+ const u8 *data, const unsigned int length)
+{
+ __be32 *p;
+
+ BUG_ON(length > XDR_MAX_NETOBJ);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, data, length);
+}
+
+static int decode_netobj(struct xdr_stream *xdr,
+ struct xdr_netobj *obj)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ if (unlikely(length > XDR_MAX_NETOBJ))
+ goto out_size;
+ obj->len = length;
+ obj->data = (u8 *)p;
+ return 0;
+out_size:
+ dprintk("NFS: returned netobj was too long: %u\n", length);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * netobj cookie;
+ */
+static void encode_cookie(struct xdr_stream *xdr,
+ const struct nlm_cookie *cookie)
+{
+ BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
+ encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
+}
+
+static int decode_cookie(struct xdr_stream *xdr,
+ struct nlm_cookie *cookie)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ /* apparently HPUX can return empty cookies */
+ if (length == 0)
+ goto out_hpux;
+ if (length > NLM_MAXCOOKIELEN)
+ goto out_size;
+ p = xdr_inline_decode(xdr, length);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ cookie->len = length;
+ memcpy(cookie->data, p, length);
+ return 0;
+out_hpux:
+ cookie->len = 4;
+ memset(cookie->data, 0, 4);
+ return 0;
+out_size:
+ dprintk("NFS: returned cookie was too long: %u\n", length);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * netobj fh;
+ */
+static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+ BUG_ON(fh->size > NFS3_FHSIZE);
+ encode_netobj(xdr, (u8 *)&fh->data, fh->size);
+}
+
+/*
+ * enum nlm4_stats {
+ * NLM4_GRANTED = 0,
+ * NLM4_DENIED = 1,
+ * NLM4_DENIED_NOLOCKS = 2,
+ * NLM4_BLOCKED = 3,
+ * NLM4_DENIED_GRACE_PERIOD = 4,
+ * NLM4_DEADLCK = 5,
+ * NLM4_ROFS = 6,
+ * NLM4_STALE_FH = 7,
+ * NLM4_FBIG = 8,
+ * NLM4_FAILED = 9
+ * };
+ *
+ * struct nlm4_stat {
+ * nlm4_stats stat;
+ * };
+ *
+ * NB: we don't swap bytes for the NLM status values. The upper
+ * layers deal directly with the status value in network byte
+ * order.
+ */
+static void encode_nlm4_stat(struct xdr_stream *xdr,
+ const __be32 stat)
+{
+ __be32 *p;
+
+ BUG_ON(be32_to_cpu(stat) > NLM_FAILED);
+ p = xdr_reserve_space(xdr, 4);
+ *p = stat;
+}
+
+static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (unlikely(*p > nlm4_failed))
+ goto out_bad_xdr;
+ *stat = *p;
+ return 0;
+out_bad_xdr:
+ dprintk("%s: server returned invalid nlm4_stats value: %u\n",
+ __func__, be32_to_cpup(p));
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * struct nlm4_holder {
+ * bool exclusive;
+ * int32 svid;
+ * netobj oh;
+ * uint64 l_offset;
+ * uint64 l_len;
+ * };
+ */
+static void encode_nlm4_holder(struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ const struct nlm_lock *lock = &result->lock;
+ u64 l_offset, l_len;
+ __be32 *p;
+
+ encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
+ encode_int32(xdr, lock->svid);
+ encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+ p = xdr_reserve_space(xdr, 4 + 4);
+ nlm4_compute_offsets(lock, &l_offset, &l_len);
+ p = xdr_encode_hyper(p, l_offset);
+ xdr_encode_hyper(p, l_len);
+}
+
+static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
+{
+ struct nlm_lock *lock = &result->lock;
+ struct file_lock *fl = &lock->fl;
+ u64 l_offset, l_len;
+ u32 exclusive;
+ int error;
+ __be32 *p;
+ s32 end;
+
+ memset(lock, 0, sizeof(*lock));
+ locks_init_lock(fl);
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ exclusive = be32_to_cpup(p++);
+ lock->svid = be32_to_cpup(p);
+ fl->fl_pid = (pid_t)lock->svid;
+
+ error = decode_netobj(xdr, &lock->oh);
+ if (unlikely(error))
+ goto out;
+
+ p = xdr_inline_decode(xdr, 8 + 8);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
+ fl->fl_flags = FL_POSIX;
+ fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
+ p = xdr_decode_hyper(p, &l_offset);
+ xdr_decode_hyper(p, &l_len);
+ end = l_offset + l_len - 1;
+
+ fl->fl_start = (loff_t)l_offset;
+ if (l_len == 0 || end < 0)
+ fl->fl_end = OFFSET_MAX;
+ else
+ fl->fl_end = (loff_t)end;
+ error = 0;
+out:
+ return error;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * string caller_name<LM_MAXSTRLEN>;
+ */
+static void encode_caller_name(struct xdr_stream *xdr, const char *name)
+{
+ /* NB: client-side does not set lock->len */
+ u32 length = strlen(name);
+ __be32 *p;
+
+ BUG_ON(length > NLM_MAXSTRLEN);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, name, length);
+}
+
+/*
+ * struct nlm4_lock {
+ * string caller_name<LM_MAXSTRLEN>;
+ * netobj fh;
+ * netobj oh;
+ * int32 svid;
+ * uint64 l_offset;
+ * uint64 l_len;
+ * };
+ */
+static void encode_nlm4_lock(struct xdr_stream *xdr,
+ const struct nlm_lock *lock)
+{
+ u64 l_offset, l_len;
+ __be32 *p;
+
+ encode_caller_name(xdr, lock->caller);
+ encode_fh(xdr, &lock->fh);
+ encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+ p = xdr_reserve_space(xdr, 4 + 8 + 8);
+ *p++ = cpu_to_be32(lock->svid);
+
+ nlm4_compute_offsets(lock, &l_offset, &l_len);
+ p = xdr_encode_hyper(p, l_offset);
+ xdr_encode_hyper(p, l_len);
+}
+
+
+/*
+ * NLMv4 XDR encode functions
+ *
+ * NLMv4 argument types are defined in Appendix II of RFC 1813:
+ * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ * struct nlm4_testargs {
+ * netobj cookie;
+ * bool exclusive;
+ * struct nlm4_lock alock;
+ * };
+ */
+static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm4_lock(xdr, lock);
+}
+
+/*
+ * struct nlm4_lockargs {
+ * netobj cookie;
+ * bool block;
+ * bool exclusive;
+ * struct nlm4_lock alock;
+ * bool reclaim;
+ * int state;
+ * };
+ */
+static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, args->block);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm4_lock(xdr, lock);
+ encode_bool(xdr, args->reclaim);
+ encode_int32(xdr, args->state);
+}
+
+/*
+ * struct nlm4_cancargs {
+ * netobj cookie;
+ * bool block;
+ * bool exclusive;
+ * struct nlm4_lock alock;
+ * };
+ */
+static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, args->block);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm4_lock(xdr, lock);
+}
+
+/*
+ * struct nlm4_unlockargs {
+ * netobj cookie;
+ * struct nlm4_lock alock;
+ * };
+ */
+static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_nlm4_lock(xdr, lock);
+}
+
+/*
+ * struct nlm4_res {
+ * netobj cookie;
+ * nlm4_stat stat;
+ * };
+ */
+static void nlm4_xdr_enc_res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ encode_cookie(xdr, &result->cookie);
+ encode_nlm4_stat(xdr, result->status);
+}
+
+/*
+ * union nlm4_testrply switch (nlm4_stats stat) {
+ * case NLM4_DENIED:
+ * struct nlm4_holder holder;
+ * default:
+ * void;
+ * };
+ *
+ * struct nlm4_testres {
+ * netobj cookie;
+ * nlm4_testrply test_stat;
+ * };
+ */
+static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ encode_cookie(xdr, &result->cookie);
+ encode_nlm4_stat(xdr, result->status);
+ if (result->status == nlm_lck_denied)
+ encode_nlm4_holder(xdr, result);
+}
+
+
+/*
+ * NLMv4 XDR decode functions
+ *
+ * NLMv4 argument types are defined in Appendix II of RFC 1813:
+ * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ * union nlm4_testrply switch (nlm4_stats stat) {
+ * case NLM4_DENIED:
+ * struct nlm4_holder holder;
+ * default:
+ * void;
+ * };
+ *
+ * struct nlm4_testres {
+ * netobj cookie;
+ * nlm4_testrply test_stat;
+ * };
+ */
+static int decode_nlm4_testrply(struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_nlm4_stat(xdr, &result->status);
+ if (unlikely(error))
+ goto out;
+ if (result->status == nlm_lck_denied)
+ error = decode_nlm4_holder(xdr, result);
+out:
+ return error;
+}
+
+static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_cookie(xdr, &result->cookie);
+ if (unlikely(error))
+ goto out;
+ error = decode_nlm4_testrply(xdr, result);
+out:
+ return error;
+}
+
+/*
+ * struct nlm4_res {
+ * netobj cookie;
+ * nlm4_stat stat;
+ * };
+ */
+static int nlm4_xdr_dec_res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_cookie(xdr, &result->cookie);
+ if (unlikely(error))
+ goto out;
+ error = decode_nlm4_stat(xdr, &result->status);
+out:
+ return error;
+}
+
+
+/*
+ * For NLM, a void procedure really returns nothing
+ */
+#define nlm4_xdr_dec_norep NULL
+
+#define PROC(proc, argtype, restype) \
+[NLMPROC_##proc] = { \
+ .p_proc = NLMPROC_##proc, \
+ .p_encode = (kxdreproc_t)nlm4_xdr_enc_##argtype, \
+ .p_decode = (kxdrdproc_t)nlm4_xdr_dec_##restype, \
+ .p_arglen = NLM4_##argtype##_sz, \
+ .p_replen = NLM4_##restype##_sz, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
+ }
+
+static struct rpc_procinfo nlm4_procedures[] = {
+ PROC(TEST, testargs, testres),
+ PROC(LOCK, lockargs, res),
+ PROC(CANCEL, cancargs, res),
+ PROC(UNLOCK, unlockargs, res),
+ PROC(GRANTED, testargs, res),
+ PROC(TEST_MSG, testargs, norep),
+ PROC(LOCK_MSG, lockargs, norep),
+ PROC(CANCEL_MSG, cancargs, norep),
+ PROC(UNLOCK_MSG, unlockargs, norep),
+ PROC(GRANTED_MSG, testargs, norep),
+ PROC(TEST_RES, testres, norep),
+ PROC(LOCK_RES, res, norep),
+ PROC(CANCEL_RES, res, norep),
+ PROC(UNLOCK_RES, res, norep),
+ PROC(GRANTED_RES, res, norep),
+};
+
+struct rpc_version nlm_version4 = {
+ .number = 4,
+ .nrprocs = ARRAY_SIZE(nlm4_procedures),
+ .procs = nlm4_procedures,
+};
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 25509eb..8d4ea83 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -79,7 +79,7 @@ EXPORT_SYMBOL_GPL(nlmclnt_init);
*/
void nlmclnt_done(struct nlm_host *host)
{
- nlm_release_host(host);
+ nlmclnt_release_host(host);
lockd_down();
}
EXPORT_SYMBOL_GPL(nlmclnt_done);
@@ -273,7 +273,7 @@ restart:
spin_unlock(&nlm_blocked_lock);
/* Release host handle after use */
- nlm_release_host(host);
+ nlmclnt_release_host(host);
lockd_down();
return 0;
}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 332c54c..adb45ec 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -58,7 +58,7 @@ static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
return;
list_del(&lockowner->list);
spin_unlock(&lockowner->host->h_lock);
- nlm_release_host(lockowner->host);
+ nlmclnt_release_host(lockowner->host);
kfree(lockowner);
}
@@ -207,22 +207,22 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
printk("nlm_alloc_call: failed, waiting for memory\n");
schedule_timeout_interruptible(5*HZ);
}
- nlm_release_host(host);
+ nlmclnt_release_host(host);
return NULL;
}
-void nlm_release_call(struct nlm_rqst *call)
+void nlmclnt_release_call(struct nlm_rqst *call)
{
if (!atomic_dec_and_test(&call->a_count))
return;
- nlm_release_host(call->a_host);
+ nlmclnt_release_host(call->a_host);
nlmclnt_release_lockargs(call);
kfree(call);
}
static void nlmclnt_rpc_release(void *data)
{
- nlm_release_call(data);
+ nlmclnt_release_call(data);
}
static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -436,7 +436,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
status = nlm_stat_to_errno(req->a_res.status);
}
out:
- nlm_release_call(req);
+ nlmclnt_release_call(req);
return status;
}
@@ -593,7 +593,7 @@ again:
out_unblock:
nlmclnt_finish_block(block);
out:
- nlm_release_call(req);
+ nlmclnt_release_call(req);
return status;
out_unlock:
/* Fatal error: ensure that we remove the lock altogether */
@@ -694,7 +694,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
/* What to do now? I'm out of my depth... */
status = -ENOLCK;
out:
- nlm_release_call(req);
+ nlmclnt_release_call(req);
return status;
}
@@ -755,7 +755,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
NLMPROC_CANCEL, &nlmclnt_cancel_ops);
if (status == 0 && req->a_res.status == nlm_lck_denied)
status = -ENOLCK;
- nlm_release_call(req);
+ nlmclnt_release_call(req);
return status;
}
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
new file mode 100644
index 0000000..180ac34
--- /dev/null
+++ b/fs/lockd/clntxdr.c
@@ -0,0 +1,627 @@
+/*
+ * linux/fs/lockd/clntxdr.c
+ *
+ * XDR functions to encode/decode NLM version 3 RPC arguments and results.
+ * NLM version 3 is backwards compatible with NLM versions 1 and 2.
+ *
+ * NLM client-side only.
+ *
+ * Copyright (C) 2010, Oracle. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/lockd/lockd.h>
+
+#define NLMDBG_FACILITY NLMDBG_XDR
+
+#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
+# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
+#endif
+
+/*
+ * Declare the space requirements for NLM arguments and replies as
+ * number of 32bit-words
+ */
+#define NLM_cookie_sz (1+(NLM_MAXCOOKIELEN>>2))
+#define NLM_caller_sz (1+(NLMCLNT_OHSIZE>>2))
+#define NLM_owner_sz (1+(NLMCLNT_OHSIZE>>2))
+#define NLM_fhandle_sz (1+(NFS2_FHSIZE>>2))
+#define NLM_lock_sz (3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz)
+#define NLM_holder_sz (4+NLM_owner_sz)
+
+#define NLM_testargs_sz (NLM_cookie_sz+1+NLM_lock_sz)
+#define NLM_lockargs_sz (NLM_cookie_sz+4+NLM_lock_sz)
+#define NLM_cancargs_sz (NLM_cookie_sz+2+NLM_lock_sz)
+#define NLM_unlockargs_sz (NLM_cookie_sz+NLM_lock_sz)
+
+#define NLM_testres_sz (NLM_cookie_sz+1+NLM_holder_sz)
+#define NLM_res_sz (NLM_cookie_sz+1)
+#define NLM_norep_sz (0)
+
+
+static s32 loff_t_to_s32(loff_t offset)
+{
+ s32 res;
+
+ if (offset >= NLM_OFFSET_MAX)
+ res = NLM_OFFSET_MAX;
+ else if (offset <= -NLM_OFFSET_MAX)
+ res = -NLM_OFFSET_MAX;
+ else
+ res = offset;
+ return res;
+}
+
+static void nlm_compute_offsets(const struct nlm_lock *lock,
+ u32 *l_offset, u32 *l_len)
+{
+ const struct file_lock *fl = &lock->fl;
+
+ BUG_ON(fl->fl_start > NLM_OFFSET_MAX);
+ BUG_ON(fl->fl_end > NLM_OFFSET_MAX &&
+ fl->fl_end != OFFSET_MAX);
+
+ *l_offset = loff_t_to_s32(fl->fl_start);
+ if (fl->fl_end == OFFSET_MAX)
+ *l_len = 0;
+ else
+ *l_len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
+}
+
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+ dprintk("lockd: %s prematurely hit the end of our receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
+
+
+/*
+ * Encode/decode NLMv3 basic data types
+ *
+ * Basic NLMv3 data types are not defined in an IETF standards
+ * document. X/Open has a description of these data types that
+ * is useful. See Chapter 10 of "Protocols for Interworking:
+ * XNFS, Version 3W".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions. For run-time efficiency, some data types are encoded
+ * or decoded inline.
+ */
+
+static void encode_bool(struct xdr_stream *xdr, const int value)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = value ? xdr_one : xdr_zero;
+}
+
+static void encode_int32(struct xdr_stream *xdr, const s32 value)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(value);
+}
+
+/*
+ * typedef opaque netobj<MAXNETOBJ_SZ>
+ */
+static void encode_netobj(struct xdr_stream *xdr,
+ const u8 *data, const unsigned int length)
+{
+ __be32 *p;
+
+ BUG_ON(length > XDR_MAX_NETOBJ);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, data, length);
+}
+
+static int decode_netobj(struct xdr_stream *xdr,
+ struct xdr_netobj *obj)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ if (unlikely(length > XDR_MAX_NETOBJ))
+ goto out_size;
+ obj->len = length;
+ obj->data = (u8 *)p;
+ return 0;
+out_size:
+ dprintk("NFS: returned netobj was too long: %u\n", length);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * netobj cookie;
+ */
+static void encode_cookie(struct xdr_stream *xdr,
+ const struct nlm_cookie *cookie)
+{
+ BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
+ encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
+}
+
+static int decode_cookie(struct xdr_stream *xdr,
+ struct nlm_cookie *cookie)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ /* apparently HPUX can return empty cookies */
+ if (length == 0)
+ goto out_hpux;
+ if (length > NLM_MAXCOOKIELEN)
+ goto out_size;
+ p = xdr_inline_decode(xdr, length);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ cookie->len = length;
+ memcpy(cookie->data, p, length);
+ return 0;
+out_hpux:
+ cookie->len = 4;
+ memset(cookie->data, 0, 4);
+ return 0;
+out_size:
+ dprintk("NFS: returned cookie was too long: %u\n", length);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * netobj fh;
+ */
+static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+ BUG_ON(fh->size != NFS2_FHSIZE);
+ encode_netobj(xdr, (u8 *)&fh->data, NFS2_FHSIZE);
+}
+
+/*
+ * enum nlm_stats {
+ * LCK_GRANTED = 0,
+ * LCK_DENIED = 1,
+ * LCK_DENIED_NOLOCKS = 2,
+ * LCK_BLOCKED = 3,
+ * LCK_DENIED_GRACE_PERIOD = 4
+ * };
+ *
+ *
+ * struct nlm_stat {
+ * nlm_stats stat;
+ * };
+ *
+ * NB: we don't swap bytes for the NLM status values. The upper
+ * layers deal directly with the status value in network byte
+ * order.
+ */
+
+static void encode_nlm_stat(struct xdr_stream *xdr,
+ const __be32 stat)
+{
+ __be32 *p;
+
+ BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
+ p = xdr_reserve_space(xdr, 4);
+ *p = stat;
+}
+
+static int decode_nlm_stat(struct xdr_stream *xdr,
+ __be32 *stat)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (unlikely(*p > nlm_lck_denied_grace_period))
+ goto out_enum;
+ *stat = *p;
+ return 0;
+out_enum:
+ dprintk("%s: server returned invalid nlm_stats value: %u\n",
+ __func__, be32_to_cpup(p));
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * struct nlm_holder {
+ * bool exclusive;
+ * int uppid;
+ * netobj oh;
+ * unsigned l_offset;
+ * unsigned l_len;
+ * };
+ */
+static void encode_nlm_holder(struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ const struct nlm_lock *lock = &result->lock;
+ u32 l_offset, l_len;
+ __be32 *p;
+
+ encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
+ encode_int32(xdr, lock->svid);
+ encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+ p = xdr_reserve_space(xdr, 4 + 4);
+ nlm_compute_offsets(lock, &l_offset, &l_len);
+ *p++ = cpu_to_be32(l_offset);
+ *p = cpu_to_be32(l_len);
+}
+
+static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
+{
+ struct nlm_lock *lock = &result->lock;
+ struct file_lock *fl = &lock->fl;
+ u32 exclusive, l_offset, l_len;
+ int error;
+ __be32 *p;
+ s32 end;
+
+ memset(lock, 0, sizeof(*lock));
+ locks_init_lock(fl);
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ exclusive = be32_to_cpup(p++);
+ lock->svid = be32_to_cpup(p);
+ fl->fl_pid = (pid_t)lock->svid;
+
+ error = decode_netobj(xdr, &lock->oh);
+ if (unlikely(error))
+ goto out;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
+ fl->fl_flags = FL_POSIX;
+ fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
+ l_offset = be32_to_cpup(p++);
+ l_len = be32_to_cpup(p);
+ end = l_offset + l_len - 1;
+
+ fl->fl_start = (loff_t)l_offset;
+ if (l_len == 0 || end < 0)
+ fl->fl_end = OFFSET_MAX;
+ else
+ fl->fl_end = (loff_t)end;
+ error = 0;
+out:
+ return error;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * string caller_name<LM_MAXSTRLEN>;
+ */
+static void encode_caller_name(struct xdr_stream *xdr, const char *name)
+{
+ /* NB: client-side does not set lock->len */
+ u32 length = strlen(name);
+ __be32 *p;
+
+ BUG_ON(length > NLM_MAXSTRLEN);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, name, length);
+}
+
+/*
+ * struct nlm_lock {
+ * string caller_name<LM_MAXSTRLEN>;
+ * netobj fh;
+ * netobj oh;
+ * int uppid;
+ * unsigned l_offset;
+ * unsigned l_len;
+ * };
+ */
+static void encode_nlm_lock(struct xdr_stream *xdr,
+ const struct nlm_lock *lock)
+{
+ u32 l_offset, l_len;
+ __be32 *p;
+
+ encode_caller_name(xdr, lock->caller);
+ encode_fh(xdr, &lock->fh);
+ encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+ p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ *p++ = cpu_to_be32(lock->svid);
+
+ nlm_compute_offsets(lock, &l_offset, &l_len);
+ *p++ = cpu_to_be32(l_offset);
+ *p = cpu_to_be32(l_len);
+}
+
+
+/*
+ * NLMv3 XDR encode functions
+ *
+ * NLMv3 argument types are defined in Chapter 10 of The Open Group's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ * struct nlm_testargs {
+ * netobj cookie;
+ * bool exclusive;
+ * struct nlm_lock alock;
+ * };
+ */
+static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm_lock(xdr, lock);
+}
+
+/*
+ * struct nlm_lockargs {
+ * netobj cookie;
+ * bool block;
+ * bool exclusive;
+ * struct nlm_lock alock;
+ * bool reclaim;
+ * int state;
+ * };
+ */
+static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, args->block);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm_lock(xdr, lock);
+ encode_bool(xdr, args->reclaim);
+ encode_int32(xdr, args->state);
+}
+
+/*
+ * struct nlm_cancargs {
+ * netobj cookie;
+ * bool block;
+ * bool exclusive;
+ * struct nlm_lock alock;
+ * };
+ */
+static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_bool(xdr, args->block);
+ encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+ encode_nlm_lock(xdr, lock);
+}
+
+/*
+ * struct nlm_unlockargs {
+ * netobj cookie;
+ * struct nlm_lock alock;
+ * };
+ */
+static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_args *args)
+{
+ const struct nlm_lock *lock = &args->lock;
+
+ encode_cookie(xdr, &args->cookie);
+ encode_nlm_lock(xdr, lock);
+}
+
+/*
+ * struct nlm_res {
+ * netobj cookie;
+ * nlm_stat stat;
+ * };
+ */
+static void nlm_xdr_enc_res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ encode_cookie(xdr, &result->cookie);
+ encode_nlm_stat(xdr, result->status);
+}
+
+/*
+ * union nlm_testrply switch (nlm_stats stat) {
+ * case LCK_DENIED:
+ * struct nlm_holder holder;
+ * default:
+ * void;
+ * };
+ *
+ * struct nlm_testres {
+ * netobj cookie;
+ * nlm_testrply test_stat;
+ * };
+ */
+static void encode_nlm_testrply(struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ if (result->status == nlm_lck_denied)
+ encode_nlm_holder(xdr, result);
+}
+
+static void nlm_xdr_enc_testres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nlm_res *result)
+{
+ encode_cookie(xdr, &result->cookie);
+ encode_nlm_stat(xdr, result->status);
+ encode_nlm_testrply(xdr, result);
+}
+
+
+/*
+ * NLMv3 XDR decode functions
+ *
+ * NLMv3 result types are defined in Chapter 10 of The Open Group's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ * union nlm_testrply switch (nlm_stats stat) {
+ * case LCK_DENIED:
+ * struct nlm_holder holder;
+ * default:
+ * void;
+ * };
+ *
+ * struct nlm_testres {
+ * netobj cookie;
+ * nlm_testrply test_stat;
+ * };
+ */
+static int decode_nlm_testrply(struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_nlm_stat(xdr, &result->status);
+ if (unlikely(error))
+ goto out;
+ if (result->status == nlm_lck_denied)
+ error = decode_nlm_holder(xdr, result);
+out:
+ return error;
+}
+
+static int nlm_xdr_dec_testres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_cookie(xdr, &result->cookie);
+ if (unlikely(error))
+ goto out;
+ error = decode_nlm_testrply(xdr, result);
+out:
+ return error;
+}
+
+/*
+ * struct nlm_res {
+ * netobj cookie;
+ * nlm_stat stat;
+ * };
+ */
+static int nlm_xdr_dec_res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nlm_res *result)
+{
+ int error;
+
+ error = decode_cookie(xdr, &result->cookie);
+ if (unlikely(error))
+ goto out;
+ error = decode_nlm_stat(xdr, &result->status);
+out:
+ return error;
+}
+
+
+/*
+ * For NLM, a void procedure really returns nothing
+ */
+#define nlm_xdr_dec_norep NULL
+
+#define PROC(proc, argtype, restype) \
+[NLMPROC_##proc] = { \
+ .p_proc = NLMPROC_##proc, \
+ .p_encode = (kxdreproc_t)nlm_xdr_enc_##argtype, \
+ .p_decode = (kxdrdproc_t)nlm_xdr_dec_##restype, \
+ .p_arglen = NLM_##argtype##_sz, \
+ .p_replen = NLM_##restype##_sz, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
+ }
+
+static struct rpc_procinfo nlm_procedures[] = {
+ PROC(TEST, testargs, testres),
+ PROC(LOCK, lockargs, res),
+ PROC(CANCEL, cancargs, res),
+ PROC(UNLOCK, unlockargs, res),
+ PROC(GRANTED, testargs, res),
+ PROC(TEST_MSG, testargs, norep),
+ PROC(LOCK_MSG, lockargs, norep),
+ PROC(CANCEL_MSG, cancargs, norep),
+ PROC(UNLOCK_MSG, unlockargs, norep),
+ PROC(GRANTED_MSG, testargs, norep),
+ PROC(TEST_RES, testres, norep),
+ PROC(LOCK_RES, res, norep),
+ PROC(CANCEL_RES, res, norep),
+ PROC(UNLOCK_RES, res, norep),
+ PROC(GRANTED_RES, res, norep),
+};
+
+static struct rpc_version nlm_version1 = {
+ .number = 1,
+ .nrprocs = ARRAY_SIZE(nlm_procedures),
+ .procs = nlm_procedures,
+};
+
+static struct rpc_version nlm_version3 = {
+ .number = 3,
+ .nrprocs = ARRAY_SIZE(nlm_procedures),
+ .procs = nlm_procedures,
+};
+
+static struct rpc_version *nlm_versions[] = {
+ [1] = &nlm_version1,
+ [3] = &nlm_version3,
+#ifdef CONFIG_LOCKD_V4
+ [4] = &nlm_version4,
+#endif
+};
+
+static struct rpc_stat nlm_rpc_stats;
+
+struct rpc_program nlm_program = {
+ .name = "lockd",
+ .number = NLM_PROGRAM,
+ .nrvers = ARRAY_SIZE(nlm_versions),
+ .version = nlm_versions,
+ .stats = &nlm_rpc_stats,
+};
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index ed0c59f..5f1bcb2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -25,9 +25,22 @@
#define NLM_HOST_EXPIRE (300 * HZ)
#define NLM_HOST_COLLECT (120 * HZ)
-static struct hlist_head nlm_hosts[NLM_HOST_NRHASH];
+static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH];
+static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH];
+
+#define for_each_host(host, pos, chain, table) \
+ for ((chain) = (table); \
+ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
+ hlist_for_each_entry((host), (pos), (chain), h_hash)
+
+#define for_each_host_safe(host, pos, next, chain, table) \
+ for ((chain) = (table); \
+ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
+ hlist_for_each_entry_safe((host), (pos), (next), \
+ (chain), h_hash)
+
static unsigned long next_gc;
-static int nrhosts;
+static unsigned long nrhosts;
static DEFINE_MUTEX(nlm_host_mutex);
static void nlm_gc_hosts(void);
@@ -40,8 +53,6 @@ struct nlm_lookup_host_info {
const u32 version; /* NLM version to search for */
const char *hostname; /* remote's hostname */
const size_t hostname_len; /* it's length */
- const struct sockaddr *src_sap; /* our address (optional) */
- const size_t src_len; /* it's length */
const int noresvport; /* use non-priv port */
};
@@ -88,127 +99,83 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap)
}
/*
- * Common host lookup routine for server & client
+ * Allocate and initialize an nlm_host. Common to both client and server.
*/
-static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
+static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
+ struct nsm_handle *nsm)
{
- struct hlist_head *chain;
- struct hlist_node *pos;
- struct nlm_host *host;
- struct nsm_handle *nsm = NULL;
-
- mutex_lock(&nlm_host_mutex);
+ struct nlm_host *host = NULL;
+ unsigned long now = jiffies;
- if (time_after_eq(jiffies, next_gc))
- nlm_gc_hosts();
-
- /* We may keep several nlm_host objects for a peer, because each
- * nlm_host is identified by
- * (address, protocol, version, server/client)
- * We could probably simplify this a little by putting all those
- * different NLM rpc_clients into one single nlm_host object.
- * This would allow us to have one nlm_host per address.
- */
- chain = &nlm_hosts[nlm_hash_address(ni->sap)];
- hlist_for_each_entry(host, pos, chain, h_hash) {
- if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
- continue;
-
- /* See if we have an NSM handle for this client */
- if (!nsm)
- nsm = host->h_nsmhandle;
-
- if (host->h_proto != ni->protocol)
- continue;
- if (host->h_version != ni->version)
- continue;
- if (host->h_server != ni->server)
- continue;
- if (ni->server && ni->src_len != 0 &&
- !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
- continue;
-
- /* Move to head of hash chain. */
- hlist_del(&host->h_hash);
- hlist_add_head(&host->h_hash, chain);
-
- nlm_get_host(host);
- dprintk("lockd: nlm_lookup_host found host %s (%s)\n",
- host->h_name, host->h_addrbuf);
- goto out;
- }
-
- /*
- * The host wasn't in our hash table. If we don't
- * have an NSM handle for it yet, create one.
- */
- if (nsm)
+ if (nsm != NULL)
atomic_inc(&nsm->sm_count);
else {
host = NULL;
nsm = nsm_get_handle(ni->sap, ni->salen,
ni->hostname, ni->hostname_len);
- if (!nsm) {
- dprintk("lockd: nlm_lookup_host failed; "
- "no nsm handle\n");
+ if (unlikely(nsm == NULL)) {
+ dprintk("lockd: %s failed; no nsm handle\n",
+ __func__);
goto out;
}
}
- host = kzalloc(sizeof(*host), GFP_KERNEL);
- if (!host) {
+ host = kmalloc(sizeof(*host), GFP_KERNEL);
+ if (unlikely(host == NULL)) {
+ dprintk("lockd: %s failed; no memory\n", __func__);
nsm_release(nsm);
- dprintk("lockd: nlm_lookup_host failed; no memory\n");
goto out;
}
- host->h_name = nsm->sm_name;
- host->h_addrbuf = nsm->sm_addrbuf;
+
memcpy(nlm_addr(host), ni->sap, ni->salen);
- host->h_addrlen = ni->salen;
+ host->h_addrlen = ni->salen;
rpc_set_port(nlm_addr(host), 0);
- memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
- host->h_srcaddrlen = ni->src_len;
+ host->h_srcaddrlen = 0;
+
+ host->h_rpcclnt = NULL;
+ host->h_name = nsm->sm_name;
host->h_version = ni->version;
host->h_proto = ni->protocol;
- host->h_rpcclnt = NULL;
- mutex_init(&host->h_mutex);
- host->h_nextrebind = jiffies + NLM_HOST_REBIND;
- host->h_expires = jiffies + NLM_HOST_EXPIRE;
- atomic_set(&host->h_count, 1);
+ host->h_reclaiming = 0;
+ host->h_server = ni->server;
+ host->h_noresvport = ni->noresvport;
+ host->h_inuse = 0;
init_waitqueue_head(&host->h_gracewait);
init_rwsem(&host->h_rwsem);
- host->h_state = 0; /* pseudo NSM state */
- host->h_nsmstate = 0; /* real NSM state */
- host->h_nsmhandle = nsm;
- host->h_server = ni->server;
- host->h_noresvport = ni->noresvport;
- hlist_add_head(&host->h_hash, chain);
+ host->h_state = 0;
+ host->h_nsmstate = 0;
+ host->h_pidcount = 0;
+ atomic_set(&host->h_count, 1);
+ mutex_init(&host->h_mutex);
+ host->h_nextrebind = now + NLM_HOST_REBIND;
+ host->h_expires = now + NLM_HOST_EXPIRE;
INIT_LIST_HEAD(&host->h_lockowners);
spin_lock_init(&host->h_lock);
INIT_LIST_HEAD(&host->h_granted);
INIT_LIST_HEAD(&host->h_reclaim);
-
- nrhosts++;
-
- dprintk("lockd: nlm_lookup_host created host %s\n",
- host->h_name);
+ host->h_nsmhandle = nsm;
+ host->h_addrbuf = nsm->sm_addrbuf;
out:
- mutex_unlock(&nlm_host_mutex);
return host;
}
/*
- * Destroy a host
+ * Destroy an nlm_host and free associated resources
+ *
+ * Caller must hold nlm_host_mutex.
*/
-static void
-nlm_destroy_host(struct nlm_host *host)
+static void nlm_destroy_host_locked(struct nlm_host *host)
{
struct rpc_clnt *clnt;
+ dprintk("lockd: destroy host %s\n", host->h_name);
+
BUG_ON(!list_empty(&host->h_lockowners));
BUG_ON(atomic_read(&host->h_count));
+ hlist_del_init(&host->h_hash);
+
nsm_unmonitor(host);
nsm_release(host->h_nsmhandle);
@@ -216,6 +183,8 @@ nlm_destroy_host(struct nlm_host *host)
if (clnt != NULL)
rpc_shutdown_client(clnt);
kfree(host);
+
+ nrhosts--;
}
/**
@@ -249,12 +218,76 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
.hostname_len = strlen(hostname),
.noresvport = noresvport,
};
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+ struct nlm_host *host;
+ struct nsm_handle *nsm = NULL;
dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
(hostname ? hostname : "<none>"), version,
(protocol == IPPROTO_UDP ? "udp" : "tcp"));
- return nlm_lookup_host(&ni);
+ mutex_lock(&nlm_host_mutex);
+
+ chain = &nlm_client_hosts[nlm_hash_address(sap)];
+ hlist_for_each_entry(host, pos, chain, h_hash) {
+ if (!rpc_cmp_addr(nlm_addr(host), sap))
+ continue;
+
+ /* Same address. Share an NSM handle if we already have one */
+ if (nsm == NULL)
+ nsm = host->h_nsmhandle;
+
+ if (host->h_proto != protocol)
+ continue;
+ if (host->h_version != version)
+ continue;
+
+ nlm_get_host(host);
+ dprintk("lockd: %s found host %s (%s)\n", __func__,
+ host->h_name, host->h_addrbuf);
+ goto out;
+ }
+
+ host = nlm_alloc_host(&ni, nsm);
+ if (unlikely(host == NULL))
+ goto out;
+
+ hlist_add_head(&host->h_hash, chain);
+ nrhosts++;
+
+ dprintk("lockd: %s created host %s (%s)\n", __func__,
+ host->h_name, host->h_addrbuf);
+
+out:
+ mutex_unlock(&nlm_host_mutex);
+ return host;
+}
+
+/**
+ * nlmclnt_release_host - release client nlm_host
+ * @host: nlm_host to release
+ *
+ */
+void nlmclnt_release_host(struct nlm_host *host)
+{
+ if (host == NULL)
+ return;
+
+ dprintk("lockd: release client host %s\n", host->h_name);
+
+ BUG_ON(atomic_read(&host->h_count) < 0);
+ BUG_ON(host->h_server);
+
+ if (atomic_dec_and_test(&host->h_count)) {
+ BUG_ON(!list_empty(&host->h_lockowners));
+ BUG_ON(!list_empty(&host->h_granted));
+ BUG_ON(!list_empty(&host->h_reclaim));
+
+ mutex_lock(&nlm_host_mutex);
+ nlm_destroy_host_locked(host);
+ mutex_unlock(&nlm_host_mutex);
+ }
}
/**
@@ -279,12 +312,18 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
const char *hostname,
const size_t hostname_len)
{
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+ struct nlm_host *host = NULL;
+ struct nsm_handle *nsm = NULL;
struct sockaddr_in sin = {
.sin_family = AF_INET,
};
struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
};
+ struct sockaddr *src_sap;
+ size_t src_len = rqstp->rq_addrlen;
struct nlm_lookup_host_info ni = {
.server = 1,
.sap = svc_addr(rqstp),
@@ -293,27 +332,91 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
.version = rqstp->rq_vers,
.hostname = hostname,
.hostname_len = hostname_len,
- .src_len = rqstp->rq_addrlen,
};
dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
(int)hostname_len, hostname, rqstp->rq_vers,
(rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp"));
+ mutex_lock(&nlm_host_mutex);
+
switch (ni.sap->sa_family) {
case AF_INET:
sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr;
- ni.src_sap = (struct sockaddr *)&sin;
+ src_sap = (struct sockaddr *)&sin;
break;
case AF_INET6:
ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6);
- ni.src_sap = (struct sockaddr *)&sin6;
+ src_sap = (struct sockaddr *)&sin6;
break;
default:
- return NULL;
+ dprintk("lockd: %s failed; unrecognized address family\n",
+ __func__);
+ goto out;
+ }
+
+ if (time_after_eq(jiffies, next_gc))
+ nlm_gc_hosts();
+
+ chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
+ hlist_for_each_entry(host, pos, chain, h_hash) {
+ if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
+ continue;
+
+ /* Same address. Share an NSM handle if we already have one */
+ if (nsm == NULL)
+ nsm = host->h_nsmhandle;
+
+ if (host->h_proto != ni.protocol)
+ continue;
+ if (host->h_version != ni.version)
+ continue;
+ if (!rpc_cmp_addr(nlm_srcaddr(host), src_sap))
+ continue;
+
+ /* Move to head of hash chain. */
+ hlist_del(&host->h_hash);
+ hlist_add_head(&host->h_hash, chain);
+
+ nlm_get_host(host);
+ dprintk("lockd: %s found host %s (%s)\n",
+ __func__, host->h_name, host->h_addrbuf);
+ goto out;
}
- return nlm_lookup_host(&ni);
+ host = nlm_alloc_host(&ni, nsm);
+ if (unlikely(host == NULL))
+ goto out;
+
+ memcpy(nlm_srcaddr(host), src_sap, src_len);
+ host->h_srcaddrlen = src_len;
+ hlist_add_head(&host->h_hash, chain);
+ nrhosts++;
+
+ dprintk("lockd: %s created host %s (%s)\n",
+ __func__, host->h_name, host->h_addrbuf);
+
+out:
+ mutex_unlock(&nlm_host_mutex);
+ return host;
+}
+
+/**
+ * nlmsvc_release_host - release server nlm_host
+ * @host: nlm_host to release
+ *
+ * Host is destroyed later in nlm_gc_host().
+ */
+void nlmsvc_release_host(struct nlm_host *host)
+{
+ if (host == NULL)
+ return;
+
+ dprintk("lockd: release server host %s\n", host->h_name);
+
+ BUG_ON(atomic_read(&host->h_count) < 0);
+ BUG_ON(!host->h_server);
+ atomic_dec(&host->h_count);
}
/*
@@ -413,20 +516,28 @@ struct nlm_host * nlm_get_host(struct nlm_host *host)
return host;
}
-/*
- * Release NLM host after use
- */
-void nlm_release_host(struct nlm_host *host)
+static struct nlm_host *next_host_state(struct hlist_head *cache,
+ struct nsm_handle *nsm,
+ const struct nlm_reboot *info)
{
- if (host != NULL) {
- dprintk("lockd: release host %s\n", host->h_name);
- BUG_ON(atomic_read(&host->h_count) < 0);
- if (atomic_dec_and_test(&host->h_count)) {
- BUG_ON(!list_empty(&host->h_lockowners));
- BUG_ON(!list_empty(&host->h_granted));
- BUG_ON(!list_empty(&host->h_reclaim));
+ struct nlm_host *host = NULL;
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+
+ mutex_lock(&nlm_host_mutex);
+ for_each_host(host, pos, chain, cache) {
+ if (host->h_nsmhandle == nsm
+ && host->h_nsmstate != info->state) {
+ host->h_nsmstate = info->state;
+ host->h_state++;
+
+ nlm_get_host(host);
+ goto out;
}
}
+out:
+ mutex_unlock(&nlm_host_mutex);
+ return host;
}
/**
@@ -438,8 +549,6 @@ void nlm_release_host(struct nlm_host *host)
*/
void nlm_host_rebooted(const struct nlm_reboot *info)
{
- struct hlist_head *chain;
- struct hlist_node *pos;
struct nsm_handle *nsm;
struct nlm_host *host;
@@ -452,32 +561,15 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
* lock for this.
* To avoid processing a host several times, we match the nsmstate.
*/
-again: mutex_lock(&nlm_host_mutex);
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry(host, pos, chain, h_hash) {
- if (host->h_nsmhandle == nsm
- && host->h_nsmstate != info->state) {
- host->h_nsmstate = info->state;
- host->h_state++;
-
- nlm_get_host(host);
- mutex_unlock(&nlm_host_mutex);
-
- if (host->h_server) {
- /* We're server for this guy, just ditch
- * all the locks he held. */
- nlmsvc_free_host_resources(host);
- } else {
- /* He's the server, initiate lock recovery. */
- nlmclnt_recovery(host);
- }
-
- nlm_release_host(host);
- goto again;
- }
- }
+ while ((host = next_host_state(nlm_server_hosts, nsm, info)) != NULL) {
+ nlmsvc_free_host_resources(host);
+ nlmsvc_release_host(host);
}
- mutex_unlock(&nlm_host_mutex);
+ while ((host = next_host_state(nlm_client_hosts, nsm, info)) != NULL) {
+ nlmclnt_recovery(host);
+ nlmclnt_release_host(host);
+ }
+
nsm_release(nsm);
}
@@ -497,13 +589,11 @@ nlm_shutdown_hosts(void)
/* First, make all hosts eligible for gc */
dprintk("lockd: nuking all hosts...\n");
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry(host, pos, chain, h_hash) {
- host->h_expires = jiffies - 1;
- if (host->h_rpcclnt) {
- rpc_shutdown_client(host->h_rpcclnt);
- host->h_rpcclnt = NULL;
- }
+ for_each_host(host, pos, chain, nlm_server_hosts) {
+ host->h_expires = jiffies - 1;
+ if (host->h_rpcclnt) {
+ rpc_shutdown_client(host->h_rpcclnt);
+ host->h_rpcclnt = NULL;
}
}
@@ -512,15 +602,13 @@ nlm_shutdown_hosts(void)
mutex_unlock(&nlm_host_mutex);
/* complain if any hosts are left */
- if (nrhosts) {
+ if (nrhosts != 0) {
printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
- dprintk("lockd: %d hosts left:\n", nrhosts);
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry(host, pos, chain, h_hash) {
- dprintk(" %s (cnt %d use %d exp %ld)\n",
- host->h_name, atomic_read(&host->h_count),
- host->h_inuse, host->h_expires);
- }
+ dprintk("lockd: %lu hosts left:\n", nrhosts);
+ for_each_host(host, pos, chain, nlm_server_hosts) {
+ dprintk(" %s (cnt %d use %d exp %ld)\n",
+ host->h_name, atomic_read(&host->h_count),
+ host->h_inuse, host->h_expires);
}
}
}
@@ -538,29 +626,22 @@ nlm_gc_hosts(void)
struct nlm_host *host;
dprintk("lockd: host garbage collection\n");
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry(host, pos, chain, h_hash)
- host->h_inuse = 0;
- }
+ for_each_host(host, pos, chain, nlm_server_hosts)
+ host->h_inuse = 0;
/* Mark all hosts that hold locks, blocks or shares */
nlmsvc_mark_resources();
- for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
- hlist_for_each_entry_safe(host, pos, next, chain, h_hash) {
- if (atomic_read(&host->h_count) || host->h_inuse
- || time_before(jiffies, host->h_expires)) {
- dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
- host->h_name, atomic_read(&host->h_count),
- host->h_inuse, host->h_expires);
- continue;
- }
- dprintk("lockd: delete host %s\n", host->h_name);
- hlist_del_init(&host->h_hash);
-
- nlm_destroy_host(host);
- nrhosts--;
+ for_each_host_safe(host, pos, next, chain, nlm_server_hosts) {
+ if (atomic_read(&host->h_count) || host->h_inuse
+ || time_before(jiffies, host->h_expires)) {
+ dprintk("nlm_gc_hosts skipping %s "
+ "(cnt %d use %d exp %ld)\n",
+ host->h_name, atomic_read(&host->h_count),
+ host->h_inuse, host->h_expires);
+ continue;
}
+ nlm_destroy_host_locked(host);
}
next_gc = jiffies + NLM_HOST_COLLECT;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e0c9189..23d7451 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -401,26 +401,22 @@ void nsm_release(struct nsm_handle *nsm)
* Status Monitor wire protocol.
*/
-static int encode_nsm_string(struct xdr_stream *xdr, const char *string)
+static void encode_nsm_string(struct xdr_stream *xdr, const char *string)
{
const u32 len = strlen(string);
__be32 *p;
- if (unlikely(len > SM_MAXSTRLEN))
- return -EIO;
- p = xdr_reserve_space(xdr, sizeof(u32) + len);
- if (unlikely(p == NULL))
- return -EIO;
+ BUG_ON(len > SM_MAXSTRLEN);
+ p = xdr_reserve_space(xdr, 4 + len);
xdr_encode_opaque(p, string, len);
- return 0;
}
/*
* "mon_name" specifies the host to be monitored.
*/
-static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
{
- return encode_nsm_string(xdr, argp->mon_name);
+ encode_nsm_string(xdr, argp->mon_name);
}
/*
@@ -429,35 +425,25 @@ static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
* (via the NLMPROC_SM_NOTIFY call) that the state of host "mon_name"
* has changed.
*/
-static int encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
{
- int status;
__be32 *p;
- status = encode_nsm_string(xdr, utsname()->nodename);
- if (unlikely(status != 0))
- return status;
- p = xdr_reserve_space(xdr, 3 * sizeof(u32));
- if (unlikely(p == NULL))
- return -EIO;
- *p++ = htonl(argp->prog);
- *p++ = htonl(argp->vers);
- *p++ = htonl(argp->proc);
- return 0;
+ encode_nsm_string(xdr, utsname()->nodename);
+ p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ *p++ = cpu_to_be32(argp->prog);
+ *p++ = cpu_to_be32(argp->vers);
+ *p = cpu_to_be32(argp->proc);
}
/*
* The "mon_id" argument specifies the non-private arguments
* of an NSMPROC_MON or NSMPROC_UNMON call.
*/
-static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
{
- int status;
-
- status = encode_mon_name(xdr, argp);
- if (unlikely(status != 0))
- return status;
- return encode_my_id(xdr, argp);
+ encode_mon_name(xdr, argp);
+ encode_my_id(xdr, argp);
}
/*
@@ -465,68 +451,56 @@ static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
* by the NSMPROC_MON call. This information will be supplied in the
* NLMPROC_SM_NOTIFY call.
*/
-static int encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
{
__be32 *p;
p = xdr_reserve_space(xdr, SM_PRIV_SIZE);
- if (unlikely(p == NULL))
- return -EIO;
xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE);
- return 0;
}
-static int xdr_enc_mon(struct rpc_rqst *req, __be32 *p,
- const struct nsm_args *argp)
+static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nsm_args *argp)
{
- struct xdr_stream xdr;
- int status;
-
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- status = encode_mon_id(&xdr, argp);
- if (unlikely(status))
- return status;
- return encode_priv(&xdr, argp);
+ encode_mon_id(xdr, argp);
+ encode_priv(xdr, argp);
}
-static int xdr_enc_unmon(struct rpc_rqst *req, __be32 *p,
- const struct nsm_args *argp)
+static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nsm_args *argp)
{
- struct xdr_stream xdr;
-
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- return encode_mon_id(&xdr, argp);
+ encode_mon_id(xdr, argp);
}
-static int xdr_dec_stat_res(struct rpc_rqst *rqstp, __be32 *p,
- struct nsm_res *resp)
+static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nsm_res *resp)
{
- struct xdr_stream xdr;
+ __be32 *p;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- p = xdr_inline_decode(&xdr, 2 * sizeof(u32));
+ p = xdr_inline_decode(xdr, 4 + 4);
if (unlikely(p == NULL))
return -EIO;
- resp->status = ntohl(*p++);
- resp->state = ntohl(*p);
+ resp->status = be32_to_cpup(p++);
+ resp->state = be32_to_cpup(p);
- dprintk("lockd: xdr_dec_stat_res status %d state %d\n",
- resp->status, resp->state);
+ dprintk("lockd: %s status %d state %d\n",
+ __func__, resp->status, resp->state);
return 0;
}
-static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
- struct nsm_res *resp)
+static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nsm_res *resp)
{
- struct xdr_stream xdr;
+ __be32 *p;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- p = xdr_inline_decode(&xdr, sizeof(u32));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- resp->state = ntohl(*p);
+ resp->state = be32_to_cpup(p);
- dprintk("lockd: xdr_dec_stat state %d\n", resp->state);
+ dprintk("lockd: %s state %d\n", __func__, resp->state);
return 0;
}
@@ -542,8 +516,8 @@ static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
static struct rpc_procinfo nsm_procedures[] = {
[NSMPROC_MON] = {
.p_proc = NSMPROC_MON,
- .p_encode = (kxdrproc_t)xdr_enc_mon,
- .p_decode = (kxdrproc_t)xdr_dec_stat_res,
+ .p_encode = (kxdreproc_t)nsm_xdr_enc_mon,
+ .p_decode = (kxdrdproc_t)nsm_xdr_dec_stat_res,
.p_arglen = SM_mon_sz,
.p_replen = SM_monres_sz,
.p_statidx = NSMPROC_MON,
@@ -551,8 +525,8 @@ static struct rpc_procinfo nsm_procedures[] = {
},
[NSMPROC_UNMON] = {
.p_proc = NSMPROC_UNMON,
- .p_encode = (kxdrproc_t)xdr_enc_unmon,
- .p_decode = (kxdrproc_t)xdr_dec_stat,
+ .p_encode = (kxdreproc_t)nsm_xdr_enc_unmon,
+ .p_decode = (kxdrdproc_t)nsm_xdr_dec_stat,
.p_arglen = SM_mon_id_sz,
.p_replen = SM_unmonres_sz,
.p_statidx = NSMPROC_UNMON,
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 38d2611..9a41fdc 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -51,7 +51,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return 0;
no_locks:
- nlm_release_host(host);
+ nlmsvc_release_host(host);
if (error)
return error;
return nlm_lck_denied_nolocks;
@@ -92,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
else
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
}
@@ -134,7 +134,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
}
@@ -164,7 +164,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -197,7 +197,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_unlock(file, &argp->lock);
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -229,7 +229,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
static void nlm4svc_callback_release(void *data)
{
- nlm_release_call(data);
+ nlmsvc_release_call(data);
}
static const struct rpc_call_ops nlm4svc_callback_ops = {
@@ -261,7 +261,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
stat = func(rqstp, argp, &call->a_res);
if (stat != 0) {
- nlm_release_call(call);
+ nlmsvc_release_call(call);
return stat;
}
@@ -334,7 +334,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_share_file(host, file, argp);
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -367,7 +367,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = nlmsvc_unshare_file(host, file, argp);
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -399,7 +399,7 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
return rpc_success;
nlmsvc_free_host_resources(host);
- nlm_release_host(host);
+ nlmsvc_release_host(host);
return rpc_success;
}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ef5659b..6e31695 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -46,6 +46,7 @@ static void nlmsvc_remove_block(struct nlm_block *block);
static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
static void nlmsvc_freegrantargs(struct nlm_rqst *call);
static const struct rpc_call_ops nlmsvc_grant_ops;
+static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
/*
* The list of blocked locks to retry
@@ -233,7 +234,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
failed_free:
kfree(block);
failed:
- nlm_release_call(call);
+ nlmsvc_release_call(call);
return NULL;
}
@@ -266,7 +267,7 @@ static void nlmsvc_free_block(struct kref *kref)
mutex_unlock(&file->f_mutex);
nlmsvc_freegrantargs(block->b_call);
- nlm_release_call(block->b_call);
+ nlmsvc_release_call(block->b_call);
nlm_release_file(block->b_file);
kfree(block->b_fl);
kfree(block);
@@ -934,3 +935,32 @@ nlmsvc_retry_blocked(void)
return timeout;
}
+
+#ifdef RPC_DEBUG
+static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
+{
+ /*
+ * We can get away with a static buffer because we're only
+ * called with BKL held.
+ */
+ static char buf[2*NLM_MAXCOOKIELEN+1];
+ unsigned int i, len = sizeof(buf);
+ char *p = buf;
+
+ len--; /* allow for trailing \0 */
+ if (len < 3)
+ return "???";
+ for (i = 0 ; i < cookie->len ; i++) {
+ if (len < 2) {
+ strcpy(p-3, "...");
+ break;
+ }
+ sprintf(p, "%02x", cookie->data[i]);
+ p += 2;
+ len -= 2;
+ }
+ *p = '\0';
+
+ return buf;
+}
+#endif
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0caea53..d27aab1 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -80,7 +80,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return 0;
no_locks:
- nlm_release_host(host);
+ nlmsvc_release_host(host);
if (error)
return error;
return nlm_lck_denied_nolocks;
@@ -122,7 +122,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
}
@@ -164,7 +164,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
}
@@ -194,7 +194,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -227,7 +227,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -257,9 +257,17 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-task->tk_status);
}
+void nlmsvc_release_call(struct nlm_rqst *call)
+{
+ if (!atomic_dec_and_test(&call->a_count))
+ return;
+ nlmsvc_release_host(call->a_host);
+ kfree(call);
+}
+
static void nlmsvc_callback_release(void *data)
{
- nlm_release_call(data);
+ nlmsvc_release_call(data);
}
static const struct rpc_call_ops nlmsvc_callback_ops = {
@@ -291,7 +299,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
stat = func(rqstp, argp, &call->a_res);
if (stat != 0) {
- nlm_release_call(call);
+ nlmsvc_release_call(call);
return stat;
}
@@ -366,7 +374,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_share_file(host, file, argp));
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -399,7 +407,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
- nlm_release_host(host);
+ nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
}
@@ -431,7 +439,7 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
return rpc_success;
nlmsvc_free_host_resources(host);
- nlm_release_host(host);
+ nlmsvc_release_host(host);
return rpc_success;
}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index b583ab0..964666c 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -149,37 +149,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
}
/*
- * Encode a lock as part of an NLM call
- */
-static __be32 *
-nlm_encode_lock(__be32 *p, struct nlm_lock *lock)
-{
- struct file_lock *fl = &lock->fl;
- __s32 start, len;
-
- if (!(p = xdr_encode_string(p, lock->caller))
- || !(p = nlm_encode_fh(p, &lock->fh))
- || !(p = nlm_encode_oh(p, &lock->oh)))
- return NULL;
-
- if (fl->fl_start > NLM_OFFSET_MAX
- || (fl->fl_end > NLM_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
- return NULL;
-
- start = loff_t_to_s32(fl->fl_start);
- if (fl->fl_end == OFFSET_MAX)
- len = 0;
- else
- len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
-
- *p++ = htonl(lock->svid);
- *p++ = htonl(start);
- *p++ = htonl(len);
-
- return p;
-}
-
-/*
* Encode result of a TEST/TEST_MSG call
*/
static __be32 *
@@ -372,259 +341,3 @@ nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
{
return xdr_ressize_check(rqstp, p);
}
-
-/*
- * Now, the client side XDR functions
- */
-#ifdef NLMCLNT_SUPPORT_SHARES
-static int
-nlmclt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
-{
- return 0;
-}
-#endif
-
-static int
-nlmclt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm_decode_cookie(p, &resp->cookie)))
- return -EIO;
- resp->status = *p++;
- if (resp->status == nlm_lck_denied) {
- struct file_lock *fl = &resp->lock.fl;
- u32 excl;
- s32 start, len, end;
-
- memset(&resp->lock, 0, sizeof(resp->lock));
- locks_init_lock(fl);
- excl = ntohl(*p++);
- resp->lock.svid = ntohl(*p++);
- fl->fl_pid = (pid_t)resp->lock.svid;
- if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
- return -EIO;
-
- fl->fl_flags = FL_POSIX;
- fl->fl_type = excl? F_WRLCK : F_RDLCK;
- start = ntohl(*p++);
- len = ntohl(*p++);
- end = start + len - 1;
-
- fl->fl_start = s32_to_loff_t(start);
- if (len == 0 || end < 0)
- fl->fl_end = OFFSET_MAX;
- else
- fl->fl_end = s32_to_loff_t(end);
- }
- return 0;
-}
-
-
-static int
-nlmclt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = argp->block? xdr_one : xdr_zero;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm_encode_lock(p, lock)))
- return -EIO;
- *p++ = argp->reclaim? xdr_one : xdr_zero;
- *p++ = htonl(argp->state);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = argp->block? xdr_one : xdr_zero;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm_encode_cookie(p, &argp->cookie)))
- return -EIO;
- if (!(p = nlm_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm_encode_cookie(p, &resp->cookie)))
- return -EIO;
- *p++ = resp->status;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm_encode_testres(p, resp)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm_decode_cookie(p, &resp->cookie)))
- return -EIO;
- resp->status = *p++;
- return 0;
-}
-
-#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
-# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
-#endif
-
-/*
- * Buffer requirements for NLM
- */
-#define NLM_void_sz 0
-#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE)
-#define NLM_lock_sz 3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz
-#define NLM_holder_sz 4+NLM_owner_sz
-
-#define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz
-#define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz
-#define NLM_cancargs_sz NLM_cookie_sz+2+NLM_lock_sz
-#define NLM_unlockargs_sz NLM_cookie_sz+NLM_lock_sz
-
-#define NLM_testres_sz NLM_cookie_sz+1+NLM_holder_sz
-#define NLM_res_sz NLM_cookie_sz+1
-#define NLM_norep_sz 0
-
-/*
- * For NLM, a void procedure really returns nothing
- */
-#define nlmclt_decode_norep NULL
-
-#define PROC(proc, argtype, restype) \
-[NLMPROC_##proc] = { \
- .p_proc = NLMPROC_##proc, \
- .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
- .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
- .p_arglen = NLM_##argtype##_sz, \
- .p_replen = NLM_##restype##_sz, \
- .p_statidx = NLMPROC_##proc, \
- .p_name = #proc, \
- }
-
-static struct rpc_procinfo nlm_procedures[] = {
- PROC(TEST, testargs, testres),
- PROC(LOCK, lockargs, res),
- PROC(CANCEL, cancargs, res),
- PROC(UNLOCK, unlockargs, res),
- PROC(GRANTED, testargs, res),
- PROC(TEST_MSG, testargs, norep),
- PROC(LOCK_MSG, lockargs, norep),
- PROC(CANCEL_MSG, cancargs, norep),
- PROC(UNLOCK_MSG, unlockargs, norep),
- PROC(GRANTED_MSG, testargs, norep),
- PROC(TEST_RES, testres, norep),
- PROC(LOCK_RES, res, norep),
- PROC(CANCEL_RES, res, norep),
- PROC(UNLOCK_RES, res, norep),
- PROC(GRANTED_RES, res, norep),
-#ifdef NLMCLNT_SUPPORT_SHARES
- PROC(SHARE, shareargs, shareres),
- PROC(UNSHARE, shareargs, shareres),
- PROC(NM_LOCK, lockargs, res),
- PROC(FREE_ALL, notify, void),
-#endif
-};
-
-static struct rpc_version nlm_version1 = {
- .number = 1,
- .nrprocs = 16,
- .procs = nlm_procedures,
-};
-
-static struct rpc_version nlm_version3 = {
- .number = 3,
- .nrprocs = 24,
- .procs = nlm_procedures,
-};
-
-static struct rpc_version * nlm_versions[] = {
- [1] = &nlm_version1,
- [3] = &nlm_version3,
-#ifdef CONFIG_LOCKD_V4
- [4] = &nlm_version4,
-#endif
-};
-
-static struct rpc_stat nlm_stats;
-
-struct rpc_program nlm_program = {
- .name = "lockd",
- .number = NLM_PROGRAM,
- .nrvers = ARRAY_SIZE(nlm_versions),
- .version = nlm_versions,
- .stats = &nlm_stats,
-};
-
-#ifdef RPC_DEBUG
-const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
-{
- /*
- * We can get away with a static buffer because we're only
- * called with BKL held.
- */
- static char buf[2*NLM_MAXCOOKIELEN+1];
- unsigned int i, len = sizeof(buf);
- char *p = buf;
-
- len--; /* allow for trailing \0 */
- if (len < 3)
- return "???";
- for (i = 0 ; i < cookie->len ; i++) {
- if (len < 2) {
- strcpy(p-3, "...");
- break;
- }
- sprintf(p, "%02x", cookie->data[i]);
- p += 2;
- len -= 2;
- }
- *p = '\0';
-
- return buf;
-}
-#endif
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index ad9dbbc..dfa4789 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -93,15 +93,6 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
return p + XDR_QUADLEN(f->size);
}
-static __be32 *
-nlm4_encode_fh(__be32 *p, struct nfs_fh *f)
-{
- *p++ = htonl(f->size);
- if (f->size) p[XDR_QUADLEN(f->size)-1] = 0; /* don't leak anything */
- memcpy(p, f->data, f->size);
- return p + XDR_QUADLEN(f->size);
-}
-
/*
* Encode and decode owner handle
*/
@@ -112,12 +103,6 @@ nlm4_decode_oh(__be32 *p, struct xdr_netobj *oh)
}
static __be32 *
-nlm4_encode_oh(__be32 *p, struct xdr_netobj *oh)
-{
- return xdr_encode_netobj(p, oh);
-}
-
-static __be32 *
nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
{
struct file_lock *fl = &lock->fl;
@@ -150,38 +135,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
}
/*
- * Encode a lock as part of an NLM call
- */
-static __be32 *
-nlm4_encode_lock(__be32 *p, struct nlm_lock *lock)
-{
- struct file_lock *fl = &lock->fl;
- __s64 start, len;
-
- if (!(p = xdr_encode_string(p, lock->caller))
- || !(p = nlm4_encode_fh(p, &lock->fh))
- || !(p = nlm4_encode_oh(p, &lock->oh)))
- return NULL;
-
- if (fl->fl_start > NLM4_OFFSET_MAX
- || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
- return NULL;
-
- *p++ = htonl(lock->svid);
-
- start = loff_t_to_s64(fl->fl_start);
- if (fl->fl_end == OFFSET_MAX)
- len = 0;
- else
- len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
-
- p = xdr_encode_hyper(p, start);
- p = xdr_encode_hyper(p, len);
-
- return p;
-}
-
-/*
* Encode result of a TEST/TEST_MSG call
*/
static __be32 *
@@ -379,211 +332,3 @@ nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
{
return xdr_ressize_check(rqstp, p);
}
-
-/*
- * Now, the client side XDR functions
- */
-#ifdef NLMCLNT_SUPPORT_SHARES
-static int
-nlm4clt_decode_void(struct rpc_rqst *req, __be32 *p, void *ptr)
-{
- return 0;
-}
-#endif
-
-static int
-nlm4clt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm4_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
- return -EIO;
- resp->status = *p++;
- if (resp->status == nlm_lck_denied) {
- struct file_lock *fl = &resp->lock.fl;
- u32 excl;
- __u64 start, len;
- __s64 end;
-
- memset(&resp->lock, 0, sizeof(resp->lock));
- locks_init_lock(fl);
- excl = ntohl(*p++);
- resp->lock.svid = ntohl(*p++);
- fl->fl_pid = (pid_t)resp->lock.svid;
- if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
- return -EIO;
-
- fl->fl_flags = FL_POSIX;
- fl->fl_type = excl? F_WRLCK : F_RDLCK;
- p = xdr_decode_hyper(p, &start);
- p = xdr_decode_hyper(p, &len);
- end = start + len - 1;
-
- fl->fl_start = s64_to_loff_t(start);
- if (len == 0 || end < 0)
- fl->fl_end = OFFSET_MAX;
- else
- fl->fl_end = s64_to_loff_t(end);
- }
- return 0;
-}
-
-
-static int
-nlm4clt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = argp->block? xdr_one : xdr_zero;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm4_encode_lock(p, lock)))
- return -EIO;
- *p++ = argp->reclaim? xdr_one : xdr_zero;
- *p++ = htonl(argp->state);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
- return -EIO;
- *p++ = argp->block? xdr_one : xdr_zero;
- *p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
- if (!(p = nlm4_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
- struct nlm_lock *lock = &argp->lock;
-
- if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
- return -EIO;
- if (!(p = nlm4_encode_lock(p, lock)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
- return -EIO;
- *p++ = resp->status;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm4_encode_testres(p, resp)))
- return -EIO;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int
-nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
- if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
- return -EIO;
- resp->status = *p++;
- return 0;
-}
-
-#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
-# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
-#endif
-
-#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
-# error "NLM host name cannot be larger than NLM's maximum string length!"
-#endif
-
-/*
- * Buffer requirements for NLM
- */
-#define NLM4_void_sz 0
-#define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM4_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM4_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE)
-#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz
-#define NLM4_holder_sz 6+NLM4_owner_sz
-
-#define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz
-#define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz
-#define NLM4_cancargs_sz NLM4_cookie_sz+2+NLM4_lock_sz
-#define NLM4_unlockargs_sz NLM4_cookie_sz+NLM4_lock_sz
-
-#define NLM4_testres_sz NLM4_cookie_sz+1+NLM4_holder_sz
-#define NLM4_res_sz NLM4_cookie_sz+1
-#define NLM4_norep_sz 0
-
-/*
- * For NLM, a void procedure really returns nothing
- */
-#define nlm4clt_decode_norep NULL
-
-#define PROC(proc, argtype, restype) \
-[NLMPROC_##proc] = { \
- .p_proc = NLMPROC_##proc, \
- .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
- .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
- .p_arglen = NLM4_##argtype##_sz, \
- .p_replen = NLM4_##restype##_sz, \
- .p_statidx = NLMPROC_##proc, \
- .p_name = #proc, \
- }
-
-static struct rpc_procinfo nlm4_procedures[] = {
- PROC(TEST, testargs, testres),
- PROC(LOCK, lockargs, res),
- PROC(CANCEL, cancargs, res),
- PROC(UNLOCK, unlockargs, res),
- PROC(GRANTED, testargs, res),
- PROC(TEST_MSG, testargs, norep),
- PROC(LOCK_MSG, lockargs, norep),
- PROC(CANCEL_MSG, cancargs, norep),
- PROC(UNLOCK_MSG, unlockargs, norep),
- PROC(GRANTED_MSG, testargs, norep),
- PROC(TEST_RES, testres, norep),
- PROC(LOCK_RES, res, norep),
- PROC(CANCEL_RES, res, norep),
- PROC(UNLOCK_RES, res, norep),
- PROC(GRANTED_RES, res, norep),
-#ifdef NLMCLNT_SUPPORT_SHARES
- PROC(SHARE, shareargs, shareres),
- PROC(UNSHARE, shareargs, shareres),
- PROC(NM_LOCK, lockargs, res),
- PROC(FREE_ALL, notify, void),
-#endif
-};
-
-struct rpc_version nlm_version4 = {
- .number = 4,
- .nrprocs = 24,
- .procs = nlm4_procedures,
-};
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 9344474..a25444ab 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -76,18 +76,6 @@ EXPORT_SYMBOL(mb_cache_entry_find_first);
EXPORT_SYMBOL(mb_cache_entry_find_next);
#endif
-struct mb_cache {
- struct list_head c_cache_list;
- const char *c_name;
- atomic_t c_entry_count;
- int c_max_entries;
- int c_bucket_bits;
- struct kmem_cache *c_entry_cache;
- struct list_head *c_block_hash;
- struct list_head *c_index_hash;
-};
-
-
/*
* Global data: list of all mbcache's, lru list, and a spinlock for
* accessing cache data structures on SMP machines. The lru list is
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 93a8b3b..1990165 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -16,9 +16,7 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/sunrpc/svcauth_gss.h>
-#if defined(CONFIG_NFS_V4_1)
#include <linux/sunrpc/bc_xprt.h>
-#endif
#include <net/inet_sock.h>
@@ -137,6 +135,33 @@ out_err:
#if defined(CONFIG_NFS_V4_1)
/*
+ * * CB_SEQUENCE operations will fail until the callback sessionid is set.
+ * */
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+ struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
+ struct nfs4_sessionid *bc_sid;
+
+ if (!serv->sv_bc_xprt)
+ return -EINVAL;
+
+ /* on success freed in xprt_free */
+ bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
+ if (!bc_sid)
+ return -ENOMEM;
+ memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
+ NFS4_MAX_SESSIONID_LEN);
+ spin_lock_bh(&serv->sv_cb_lock);
+ serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
+ spin_unlock_bh(&serv->sv_cb_lock);
+ dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
+ ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
+ ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
+ serv->sv_bc_xprt);
+ return 0;
+}
+
+/*
* The callback service for NFSv4.1 callbacks
*/
static int
@@ -177,30 +202,38 @@ nfs41_callback_svc(void *vrqstp)
struct svc_rqst *
nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
{
- struct svc_xprt *bc_xprt;
- struct svc_rqst *rqstp = ERR_PTR(-ENOMEM);
+ struct svc_rqst *rqstp;
+ int ret;
- dprintk("--> %s\n", __func__);
- /* Create a svc_sock for the service */
- bc_xprt = svc_sock_create(serv, xprt->prot);
- if (!bc_xprt)
+ /*
+ * Create an svc_sock for the back channel service that shares the
+ * fore channel connection.
+ * Returns the input port (0) and sets the svc_serv bc_xprt on success
+ */
+ ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
+ SVC_SOCK_ANONYMOUS);
+ if (ret < 0) {
+ rqstp = ERR_PTR(ret);
goto out;
+ }
/*
* Save the svc_serv in the transport so that it can
* be referenced when the session backchannel is initialized
*/
- serv->bc_xprt = bc_xprt;
xprt->bc_serv = serv;
INIT_LIST_HEAD(&serv->sv_cb_list);
spin_lock_init(&serv->sv_cb_lock);
init_waitqueue_head(&serv->sv_cb_waitq);
rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
- if (IS_ERR(rqstp))
- svc_sock_destroy(bc_xprt);
+ if (IS_ERR(rqstp)) {
+ svc_xprt_put(serv->sv_bc_xprt);
+ serv->sv_bc_xprt = NULL;
+ }
out:
- dprintk("--> %s return %p\n", __func__, rqstp);
+ dprintk("--> %s return %ld\n", __func__,
+ IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
return rqstp;
}
@@ -233,6 +266,10 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
struct nfs_callback_data *cb_info)
{
}
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+ return 0;
+}
#endif /* CONFIG_NFS_V4_1 */
/*
@@ -328,6 +365,9 @@ static int check_gss_callback_principal(struct nfs_client *clp,
struct rpc_clnt *r = clp->cl_rpcclient;
char *p = svc_gss_principal(rqstp);
+ /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
+ if (clp->cl_minorversion != 0)
+ return SVC_DROP;
/*
* It might just be a normal user principal, in which case
* userspace won't bother to tell us the name at all.
@@ -345,6 +385,23 @@ static int check_gss_callback_principal(struct nfs_client *clp,
return SVC_OK;
}
+/* pg_authenticate method helper */
+static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp)
+{
+ struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp);
+ int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0;
+
+ dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc);
+ if (svc_is_backchannel(rqstp))
+ /* Sessionid (usually) set after CB_NULL ping */
+ return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
+ is_cb_compound);
+ else
+ /* No callback identifier in pg_authenticate */
+ return nfs4_find_client_no_ident(svc_addr(rqstp));
+}
+
+/* pg_authenticate method for nfsv4 callback threads. */
static int nfs_callback_authenticate(struct svc_rqst *rqstp)
{
struct nfs_client *clp;
@@ -352,7 +409,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
int ret = SVC_OK;
/* Don't talk to strangers */
- clp = nfs_find_client(svc_addr(rqstp), 4);
+ clp = nfs_cb_find_client(rqstp);
if (clp == NULL)
return SVC_DROP;
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 85a7cfd..d3b44f9 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -34,10 +34,17 @@ enum nfs4_callback_opnum {
OP_CB_ILLEGAL = 10044,
};
+struct cb_process_state {
+ __be32 drc_status;
+ struct nfs_client *clp;
+ struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */
+};
+
struct cb_compound_hdr_arg {
unsigned int taglen;
const char *tag;
unsigned int minorversion;
+ unsigned int cb_ident; /* v4.0 callback identifier */
unsigned nops;
};
@@ -103,14 +110,23 @@ struct cb_sequenceres {
uint32_t csr_target_highestslotid;
};
-extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
- struct cb_sequenceres *res);
+extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
+ struct cb_sequenceres *res,
+ struct cb_process_state *cps);
extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
#define RCA4_TYPE_MASK_RDATA_DLG 0
#define RCA4_TYPE_MASK_WDATA_DLG 1
+#define RCA4_TYPE_MASK_DIR_DLG 2
+#define RCA4_TYPE_MASK_FILE_LAYOUT 3
+#define RCA4_TYPE_MASK_BLK_LAYOUT 4
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN 8
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
+#define RCA4_TYPE_MASK_ALL 0xf31f
struct cb_recallanyargs {
struct sockaddr *craa_addr;
@@ -118,25 +134,52 @@ struct cb_recallanyargs {
uint32_t craa_type_mask;
};
-extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
+extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
+ void *dummy,
+ struct cb_process_state *cps);
struct cb_recallslotargs {
struct sockaddr *crsa_addr;
uint32_t crsa_target_max_slots;
};
-extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
- void *dummy);
+extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
+ void *dummy,
+ struct cb_process_state *cps);
+
+struct cb_layoutrecallargs {
+ struct sockaddr *cbl_addr;
+ uint32_t cbl_recall_type;
+ uint32_t cbl_layout_type;
+ uint32_t cbl_layoutchanged;
+ union {
+ struct {
+ struct nfs_fh cbl_fh;
+ struct pnfs_layout_range cbl_range;
+ nfs4_stateid cbl_stateid;
+ };
+ struct nfs_fsid cbl_fsid;
+ };
+};
-#endif /* CONFIG_NFS_V4_1 */
+extern unsigned nfs4_callback_layoutrecall(
+ struct cb_layoutrecallargs *args,
+ void *dummy, struct cb_process_state *cps);
-extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
-extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
+extern void nfs4_cb_take_slot(struct nfs_client *clp);
+#endif /* CONFIG_NFS_V4_1 */
+extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+ struct cb_getattrres *res,
+ struct cb_process_state *cps);
+extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+ struct cb_process_state *cps);
#ifdef CONFIG_NFS_V4
extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
extern void nfs_callback_down(int minorversion);
extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
+extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
#endif /* CONFIG_NFS_V4 */
/*
* nfs41: Callbacks are expected to not cause substantial latency,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2950fca..4bb91cb 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -12,30 +12,33 @@
#include "callback.h"
#include "delegation.h"
#include "internal.h"
+#include "pnfs.h"
#ifdef NFS_DEBUG
#define NFSDBG_FACILITY NFSDBG_CALLBACK
#endif
-
-__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
+
+__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+ struct cb_getattrres *res,
+ struct cb_process_state *cps)
{
- struct nfs_client *clp;
struct nfs_delegation *delegation;
struct nfs_inode *nfsi;
struct inode *inode;
+ res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+ if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
+ goto out;
+
res->bitmap[0] = res->bitmap[1] = 0;
res->status = htonl(NFS4ERR_BADHANDLE);
- clp = nfs_find_client(args->addr, 4);
- if (clp == NULL)
- goto out;
dprintk("NFS: GETATTR callback request from %s\n",
- rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
- inode = nfs_delegation_find_inode(clp, &args->fh);
+ inode = nfs_delegation_find_inode(cps->clp, &args->fh);
if (inode == NULL)
- goto out_putclient;
+ goto out;
nfsi = NFS_I(inode);
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
@@ -55,49 +58,41 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
out_iput:
rcu_read_unlock();
iput(inode);
-out_putclient:
- nfs_put_client(clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
return res->status;
}
-__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+ struct cb_process_state *cps)
{
- struct nfs_client *clp;
struct inode *inode;
__be32 res;
- res = htonl(NFS4ERR_BADHANDLE);
- clp = nfs_find_client(args->addr, 4);
- if (clp == NULL)
+ res = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+ if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
goto out;
dprintk("NFS: RECALL callback request from %s\n",
- rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
-
- do {
- struct nfs_client *prev = clp;
-
- inode = nfs_delegation_find_inode(clp, &args->fh);
- if (inode != NULL) {
- /* Set up a helper thread to actually return the delegation */
- switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
- case 0:
- res = 0;
- break;
- case -ENOENT:
- if (res != 0)
- res = htonl(NFS4ERR_BAD_STATEID);
- break;
- default:
- res = htonl(NFS4ERR_RESOURCE);
- }
- iput(inode);
- }
- clp = nfs_find_client_next(prev);
- nfs_put_client(prev);
- } while (clp != NULL);
+ rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+
+ res = htonl(NFS4ERR_BADHANDLE);
+ inode = nfs_delegation_find_inode(cps->clp, &args->fh);
+ if (inode == NULL)
+ goto out;
+ /* Set up a helper thread to actually return the delegation */
+ switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+ case 0:
+ res = 0;
+ break;
+ case -ENOENT:
+ if (res != 0)
+ res = htonl(NFS4ERR_BAD_STATEID);
+ break;
+ default:
+ res = htonl(NFS4ERR_RESOURCE);
+ }
+ iput(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
return res;
@@ -113,6 +108,139 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
#if defined(CONFIG_NFS_V4_1)
+static u32 initiate_file_draining(struct nfs_client *clp,
+ struct cb_layoutrecallargs *args)
+{
+ struct pnfs_layout_hdr *lo;
+ struct inode *ino;
+ bool found = false;
+ u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+ LIST_HEAD(free_me_list);
+
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+ if (nfs_compare_fh(&args->cbl_fh,
+ &NFS_I(lo->plh_inode)->fh))
+ continue;
+ ino = igrab(lo->plh_inode);
+ if (!ino)
+ continue;
+ found = true;
+ /* Without this, layout can be freed as soon
+ * as we release cl_lock.
+ */
+ get_layout_hdr(lo);
+ break;
+ }
+ spin_unlock(&clp->cl_lock);
+ if (!found)
+ return NFS4ERR_NOMATCHING_LAYOUT;
+
+ spin_lock(&ino->i_lock);
+ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+ mark_matching_lsegs_invalid(lo, &free_me_list,
+ args->cbl_range.iomode))
+ rv = NFS4ERR_DELAY;
+ else
+ rv = NFS4ERR_NOMATCHING_LAYOUT;
+ pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
+ spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&free_me_list);
+ put_layout_hdr(lo);
+ iput(ino);
+ return rv;
+}
+
+static u32 initiate_bulk_draining(struct nfs_client *clp,
+ struct cb_layoutrecallargs *args)
+{
+ struct pnfs_layout_hdr *lo;
+ struct inode *ino;
+ u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+ struct pnfs_layout_hdr *tmp;
+ LIST_HEAD(recall_list);
+ LIST_HEAD(free_me_list);
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+ if ((args->cbl_recall_type == RETURN_FSID) &&
+ memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
+ &args->cbl_fsid, sizeof(struct nfs_fsid)))
+ continue;
+ if (!igrab(lo->plh_inode))
+ continue;
+ get_layout_hdr(lo);
+ BUG_ON(!list_empty(&lo->plh_bulk_recall));
+ list_add(&lo->plh_bulk_recall, &recall_list);
+ }
+ spin_unlock(&clp->cl_lock);
+ list_for_each_entry_safe(lo, tmp,
+ &recall_list, plh_bulk_recall) {
+ ino = lo->plh_inode;
+ spin_lock(&ino->i_lock);
+ set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
+ rv = NFS4ERR_DELAY;
+ list_del_init(&lo->plh_bulk_recall);
+ spin_unlock(&ino->i_lock);
+ put_layout_hdr(lo);
+ iput(ino);
+ }
+ pnfs_free_lseg_list(&free_me_list);
+ return rv;
+}
+
+static u32 do_callback_layoutrecall(struct nfs_client *clp,
+ struct cb_layoutrecallargs *args)
+{
+ u32 res = NFS4ERR_DELAY;
+
+ dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
+ if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
+ goto out;
+ if (args->cbl_recall_type == RETURN_FILE)
+ res = initiate_file_draining(clp, args);
+ else
+ res = initiate_bulk_draining(clp, args);
+ clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
+out:
+ dprintk("%s returning %i\n", __func__, res);
+ return res;
+
+}
+
+__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
+ void *dummy, struct cb_process_state *cps)
+{
+ u32 res;
+
+ dprintk("%s: -->\n", __func__);
+
+ if (cps->clp)
+ res = do_callback_layoutrecall(cps->clp, args);
+ else
+ res = NFS4ERR_OP_NOT_IN_SESSION;
+
+ dprintk("%s: exit with status = %d\n", __func__, res);
+ return cpu_to_be32(res);
+}
+
+static void pnfs_recall_all_layouts(struct nfs_client *clp)
+{
+ struct cb_layoutrecallargs args;
+
+ /* Pretend we got a CB_LAYOUTRECALL(ALL) */
+ memset(&args, 0, sizeof(args));
+ args.cbl_recall_type = RETURN_ALL;
+ /* FIXME we ignore errors, what should we do? */
+ do_callback_layoutrecall(clp, &args);
+}
+
int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
{
if (delegation == NULL)
@@ -185,42 +313,6 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
}
/*
- * Returns a pointer to a held 'struct nfs_client' that matches the server's
- * address, major version number, and session ID. It is the caller's
- * responsibility to release the returned reference.
- *
- * Returns NULL if there are no connections with sessions, or if no session
- * matches the one of interest.
- */
- static struct nfs_client *find_client_with_session(
- const struct sockaddr *addr, u32 nfsversion,
- struct nfs4_sessionid *sessionid)
-{
- struct nfs_client *clp;
-
- clp = nfs_find_client(addr, 4);
- if (clp == NULL)
- return NULL;
-
- do {
- struct nfs_client *prev = clp;
-
- if (clp->cl_session != NULL) {
- if (memcmp(clp->cl_session->sess_id.data,
- sessionid->data,
- NFS4_MAX_SESSIONID_LEN) == 0) {
- /* Returns a held reference to clp */
- return clp;
- }
- }
- clp = nfs_find_client_next(prev);
- nfs_put_client(prev);
- } while (clp != NULL);
-
- return NULL;
-}
-
-/*
* For each referring call triple, check the session's slot table for
* a match. If the slot is in use and the sequence numbers match, the
* client is still waiting for a response to the original request.
@@ -276,20 +368,34 @@ out:
}
__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
- struct cb_sequenceres *res)
+ struct cb_sequenceres *res,
+ struct cb_process_state *cps)
{
struct nfs_client *clp;
int i;
__be32 status;
+ cps->clp = NULL;
+
status = htonl(NFS4ERR_BADSESSION);
- clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
+ /* Incoming session must match the callback session */
+ if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
+ goto out;
+
+ clp = nfs4_find_client_sessionid(args->csa_addr,
+ &args->csa_sessionid, 1);
if (clp == NULL)
goto out;
+ /* state manager is resetting the session */
+ if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+ status = NFS4ERR_DELAY;
+ goto out;
+ }
+
status = validate_seqid(&clp->cl_session->bc_slot_table, args);
if (status)
- goto out_putclient;
+ goto out;
/*
* Check for pending referring calls. If a match is found, a
@@ -298,7 +404,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
*/
if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
status = htonl(NFS4ERR_DELAY);
- goto out_putclient;
+ goto out;
}
memcpy(&res->csr_sessionid, &args->csa_sessionid,
@@ -307,83 +413,93 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
res->csr_slotid = args->csa_slotid;
res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+ nfs4_cb_take_slot(clp);
+ cps->clp = clp; /* put in nfs4_callback_compound */
-out_putclient:
- nfs_put_client(clp);
out:
for (i = 0; i < args->csa_nrclists; i++)
kfree(args->csa_rclists[i].rcl_refcalls);
kfree(args->csa_rclists);
- if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP))
- res->csr_status = 0;
- else
+ if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
+ cps->drc_status = status;
+ status = 0;
+ } else
res->csr_status = status;
+
dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
ntohl(status), ntohl(res->csr_status));
return status;
}
-__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+static bool
+validate_bitmap_values(unsigned long mask)
+{
+ return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
+}
+
+__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
+ struct cb_process_state *cps)
{
- struct nfs_client *clp;
__be32 status;
fmode_t flags = 0;
- status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
- clp = nfs_find_client(args->craa_addr, 4);
- if (clp == NULL)
+ status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ if (!cps->clp) /* set in cb_sequence */
goto out;
dprintk("NFS: RECALL_ANY callback request from %s\n",
- rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+
+ status = cpu_to_be32(NFS4ERR_INVAL);
+ if (!validate_bitmap_values(args->craa_type_mask))
+ goto out;
+ status = cpu_to_be32(NFS4_OK);
if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags = FMODE_READ;
if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags |= FMODE_WRITE;
-
+ if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
+ &args->craa_type_mask))
+ pnfs_recall_all_layouts(cps->clp);
if (flags)
- nfs_expire_all_delegation_types(clp, flags);
- status = htonl(NFS4_OK);
+ nfs_expire_all_delegation_types(cps->clp, flags);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
}
/* Reduce the fore channel's max_slots to the target value */
-__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
+__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
+ struct cb_process_state *cps)
{
- struct nfs_client *clp;
struct nfs4_slot_table *fc_tbl;
__be32 status;
status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
- clp = nfs_find_client(args->crsa_addr, 4);
- if (clp == NULL)
+ if (!cps->clp) /* set in cb_sequence */
goto out;
dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
- rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+ rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
args->crsa_target_max_slots);
- fc_tbl = &clp->cl_session->fc_slot_table;
+ fc_tbl = &cps->clp->cl_session->fc_slot_table;
status = htonl(NFS4ERR_BAD_HIGH_SLOT);
if (args->crsa_target_max_slots > fc_tbl->max_slots ||
args->crsa_target_max_slots < 1)
- goto out_putclient;
+ goto out;
status = htonl(NFS4_OK);
if (args->crsa_target_max_slots == fc_tbl->max_slots)
- goto out_putclient;
+ goto out;
fc_tbl->target_max_slots = args->crsa_target_max_slots;
- nfs41_handle_recall_slot(clp);
-out_putclient:
- nfs_put_client(clp); /* balance nfs_find_client */
+ nfs41_handle_recall_slot(cps->clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 05af212..23112c2 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -10,8 +10,10 @@
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/slab.h>
+#include <linux/sunrpc/bc_xprt.h>
#include "nfs4_fs.h"
#include "callback.h"
+#include "internal.h"
#define CB_OP_TAGLEN_MAXSZ (512)
#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
@@ -22,6 +24,7 @@
#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#if defined(CONFIG_NFS_V4_1)
+#define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
4 + 1 + 3)
#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
@@ -33,7 +36,8 @@
/* Internal error code */
#define NFS4ERR_RESOURCE_HDR 11050
-typedef __be32 (*callback_process_op_t)(void *, void *);
+typedef __be32 (*callback_process_op_t)(void *, void *,
+ struct cb_process_state *);
typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
@@ -160,7 +164,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
hdr->minorversion = ntohl(*p++);
/* Check minor version is zero or one. */
if (hdr->minorversion <= 1) {
- p++; /* skip callback_ident */
+ hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
} else {
printk(KERN_WARNING "%s: NFSv4 server callback with "
"illegal minor version %u!\n",
@@ -220,6 +224,66 @@ out:
#if defined(CONFIG_NFS_V4_1)
+static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct cb_layoutrecallargs *args)
+{
+ __be32 *p;
+ __be32 status = 0;
+ uint32_t iomode;
+
+ args->cbl_addr = svc_addr(rqstp);
+ p = read_buf(xdr, 4 * sizeof(uint32_t));
+ if (unlikely(p == NULL)) {
+ status = htonl(NFS4ERR_BADXDR);
+ goto out;
+ }
+
+ args->cbl_layout_type = ntohl(*p++);
+ /* Depite the spec's xdr, iomode really belongs in the FILE switch,
+ * as it is unuseable and ignored with the other types.
+ */
+ iomode = ntohl(*p++);
+ args->cbl_layoutchanged = ntohl(*p++);
+ args->cbl_recall_type = ntohl(*p++);
+
+ if (args->cbl_recall_type == RETURN_FILE) {
+ args->cbl_range.iomode = iomode;
+ status = decode_fh(xdr, &args->cbl_fh);
+ if (unlikely(status != 0))
+ goto out;
+
+ p = read_buf(xdr, 2 * sizeof(uint64_t));
+ if (unlikely(p == NULL)) {
+ status = htonl(NFS4ERR_BADXDR);
+ goto out;
+ }
+ p = xdr_decode_hyper(p, &args->cbl_range.offset);
+ p = xdr_decode_hyper(p, &args->cbl_range.length);
+ status = decode_stateid(xdr, &args->cbl_stateid);
+ if (unlikely(status != 0))
+ goto out;
+ } else if (args->cbl_recall_type == RETURN_FSID) {
+ p = read_buf(xdr, 2 * sizeof(uint64_t));
+ if (unlikely(p == NULL)) {
+ status = htonl(NFS4ERR_BADXDR);
+ goto out;
+ }
+ p = xdr_decode_hyper(p, &args->cbl_fsid.major);
+ p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
+ } else if (args->cbl_recall_type != RETURN_ALL) {
+ status = htonl(NFS4ERR_BADXDR);
+ goto out;
+ }
+ dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
+ __func__,
+ args->cbl_layout_type, iomode,
+ args->cbl_layoutchanged, args->cbl_recall_type);
+out:
+ dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+ return status;
+}
+
static __be32 decode_sessionid(struct xdr_stream *xdr,
struct nfs4_sessionid *sid)
{
@@ -574,10 +638,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
case OP_CB_SEQUENCE:
case OP_CB_RECALL_ANY:
case OP_CB_RECALL_SLOT:
+ case OP_CB_LAYOUTRECALL:
*op = &callback_ops[op_nr];
break;
- case OP_CB_LAYOUTRECALL:
case OP_CB_NOTIFY_DEVICEID:
case OP_CB_NOTIFY:
case OP_CB_PUSH_DELEG:
@@ -593,6 +657,37 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
return htonl(NFS_OK);
}
+static void nfs4_callback_free_slot(struct nfs4_session *session)
+{
+ struct nfs4_slot_table *tbl = &session->bc_slot_table;
+
+ spin_lock(&tbl->slot_tbl_lock);
+ /*
+ * Let the state manager know callback processing done.
+ * A single slot, so highest used slotid is either 0 or -1
+ */
+ tbl->highest_used_slotid--;
+ nfs4_check_drain_bc_complete(session);
+ spin_unlock(&tbl->slot_tbl_lock);
+}
+
+static void nfs4_cb_free_slot(struct nfs_client *clp)
+{
+ if (clp && clp->cl_session)
+ nfs4_callback_free_slot(clp->cl_session);
+}
+
+/* A single slot, so highest used slotid is either 0 or -1 */
+void nfs4_cb_take_slot(struct nfs_client *clp)
+{
+ struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
+
+ spin_lock(&tbl->slot_tbl_lock);
+ tbl->highest_used_slotid++;
+ BUG_ON(tbl->highest_used_slotid != 0);
+ spin_unlock(&tbl->slot_tbl_lock);
+}
+
#else /* CONFIG_NFS_V4_1 */
static __be32
@@ -601,6 +696,9 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
}
+static void nfs4_cb_free_slot(struct nfs_client *clp)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
static __be32
@@ -621,7 +719,8 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
static __be32 process_op(uint32_t minorversion, int nop,
struct svc_rqst *rqstp,
struct xdr_stream *xdr_in, void *argp,
- struct xdr_stream *xdr_out, void *resp, int* drc_status)
+ struct xdr_stream *xdr_out, void *resp,
+ struct cb_process_state *cps)
{
struct callback_op *op = &callback_ops[0];
unsigned int op_nr;
@@ -644,8 +743,8 @@ static __be32 process_op(uint32_t minorversion, int nop,
if (status)
goto encode_hdr;
- if (*drc_status) {
- status = *drc_status;
+ if (cps->drc_status) {
+ status = cps->drc_status;
goto encode_hdr;
}
@@ -653,16 +752,10 @@ static __be32 process_op(uint32_t minorversion, int nop,
if (maxlen > 0 && maxlen < PAGE_SIZE) {
status = op->decode_args(rqstp, xdr_in, argp);
if (likely(status == 0))
- status = op->process_op(argp, resp);
+ status = op->process_op(argp, resp, cps);
} else
status = htonl(NFS4ERR_RESOURCE);
- /* Only set by OP_CB_SEQUENCE processing */
- if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
- *drc_status = status;
- status = 0;
- }
-
encode_hdr:
res = encode_op_hdr(xdr_out, op_nr, status);
if (unlikely(res))
@@ -681,8 +774,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
struct cb_compound_hdr_arg hdr_arg = { 0 };
struct cb_compound_hdr_res hdr_res = { NULL };
struct xdr_stream xdr_in, xdr_out;
- __be32 *p;
- __be32 status, drc_status = 0;
+ __be32 *p, status;
+ struct cb_process_state cps = {
+ .drc_status = 0,
+ .clp = NULL,
+ };
unsigned int nops = 0;
dprintk("%s: start\n", __func__);
@@ -696,6 +792,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
if (status == __constant_htonl(NFS4ERR_RESOURCE))
return rpc_garbage_args;
+ if (hdr_arg.minorversion == 0) {
+ cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
+ if (!cps.clp)
+ return rpc_drop_reply;
+ } else
+ cps.svc_sid = bc_xprt_sid(rqstp);
+
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
@@ -703,7 +806,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
while (status == 0 && nops != hdr_arg.nops) {
status = process_op(hdr_arg.minorversion, nops, rqstp,
- &xdr_in, argp, &xdr_out, resp, &drc_status);
+ &xdr_in, argp, &xdr_out, resp, &cps);
nops++;
}
@@ -716,6 +819,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
*hdr_res.status = status;
*hdr_res.nops = htonl(nops);
+ nfs4_cb_free_slot(cps.clp);
+ nfs_put_client(cps.clp);
dprintk("%s: done, status = %u\n", __func__, ntohl(status));
return rpc_success;
}
@@ -739,6 +844,12 @@ static struct callback_op callback_ops[] = {
.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
},
#if defined(CONFIG_NFS_V4_1)
+ [OP_CB_LAYOUTRECALL] = {
+ .process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
+ .decode_args =
+ (callback_decode_arg_t)decode_layoutrecall_args,
+ .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
+ },
[OP_CB_SEQUENCE] = {
.process_op = (callback_process_op_t)nfs4_callback_sequence,
.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0870d0d..192f2f8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -56,6 +56,30 @@ static DEFINE_SPINLOCK(nfs_client_lock);
static LIST_HEAD(nfs_client_list);
static LIST_HEAD(nfs_volume_list);
static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+#ifdef CONFIG_NFS_V4
+static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
+
+/*
+ * Get a unique NFSv4.0 callback identifier which will be used
+ * by the V4.0 callback service to lookup the nfs_client struct
+ */
+static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
+{
+ int ret = 0;
+
+ if (clp->rpc_ops->version != 4 || minorversion != 0)
+ return ret;
+retry:
+ if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL))
+ return -ENOMEM;
+ spin_lock(&nfs_client_lock);
+ ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident);
+ spin_unlock(&nfs_client_lock);
+ if (ret == -EAGAIN)
+ goto retry;
+ return ret;
+}
+#endif /* CONFIG_NFS_V4 */
/*
* RPC cruft for NFS
@@ -144,7 +168,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
clp->cl_proto = cl_init->proto;
#ifdef CONFIG_NFS_V4
- INIT_LIST_HEAD(&clp->cl_delegations);
+ err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
+ if (err)
+ goto error_cleanup;
+
spin_lock_init(&clp->cl_lock);
INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -170,21 +197,17 @@ error_0:
}
#ifdef CONFIG_NFS_V4
-/*
- * Clears/puts all minor version specific parts from an nfs_client struct
- * reverting it to minorversion 0.
- */
-static void nfs4_clear_client_minor_version(struct nfs_client *clp)
-{
#ifdef CONFIG_NFS_V4_1
- if (nfs4_has_session(clp)) {
+static void nfs4_shutdown_session(struct nfs_client *clp)
+{
+ if (nfs4_has_session(clp))
nfs4_destroy_session(clp->cl_session);
- clp->cl_session = NULL;
- }
-
- clp->cl_mvops = nfs_v4_minor_ops[0];
-#endif /* CONFIG_NFS_V4_1 */
}
+#else /* CONFIG_NFS_V4_1 */
+static void nfs4_shutdown_session(struct nfs_client *clp)
+{
+}
+#endif /* CONFIG_NFS_V4_1 */
/*
* Destroy the NFS4 callback service
@@ -199,17 +222,49 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
{
if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
nfs4_kill_renewd(clp);
- nfs4_clear_client_minor_version(clp);
+ nfs4_shutdown_session(clp);
nfs4_destroy_callback(clp);
if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
nfs_idmap_delete(clp);
rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
}
+
+/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
+void nfs_cleanup_cb_ident_idr(void)
+{
+ idr_destroy(&cb_ident_idr);
+}
+
+/* nfs_client_lock held */
+static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
+{
+ if (clp->cl_cb_ident)
+ idr_remove(&cb_ident_idr, clp->cl_cb_ident);
+}
+
+static void pnfs_init_server(struct nfs_server *server)
+{
+ rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
+}
+
#else
static void nfs4_shutdown_client(struct nfs_client *clp)
{
}
+
+void nfs_cleanup_cb_ident_idr(void)
+{
+}
+
+static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
+{
+}
+
+static void pnfs_init_server(struct nfs_server *server)
+{
+}
+
#endif /* CONFIG_NFS_V4 */
/*
@@ -248,6 +303,7 @@ void nfs_put_client(struct nfs_client *clp)
if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
list_del(&clp->cl_share_link);
+ nfs_cb_idr_remove_locked(clp);
spin_unlock(&nfs_client_lock);
BUG_ON(!list_empty(&clp->cl_superblocks));
@@ -363,70 +419,28 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
return 0;
}
-/*
- * Find a client by IP address and protocol version
- * - returns NULL if no such client
- */
-struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
-{
- struct nfs_client *clp;
-
- spin_lock(&nfs_client_lock);
- list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
- struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
-
- /* Don't match clients that failed to initialise properly */
- if (!(clp->cl_cons_state == NFS_CS_READY ||
- clp->cl_cons_state == NFS_CS_SESSION_INITING))
- continue;
-
- /* Different NFS versions cannot share the same nfs_client */
- if (clp->rpc_ops->version != nfsversion)
- continue;
-
- /* Match only the IP address, not the port number */
- if (!nfs_sockaddr_match_ipaddr(addr, clap))
- continue;
-
- atomic_inc(&clp->cl_count);
- spin_unlock(&nfs_client_lock);
- return clp;
- }
- spin_unlock(&nfs_client_lock);
- return NULL;
-}
-
-/*
- * Find a client by IP address and protocol version
- * - returns NULL if no such client
- */
-struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
+/* Common match routine for v4.0 and v4.1 callback services */
+bool
+nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
+ u32 minorversion)
{
- struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr;
- u32 nfsvers = clp->rpc_ops->version;
+ struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
- spin_lock(&nfs_client_lock);
- list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) {
- struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+ /* Don't match clients that failed to initialise */
+ if (!(clp->cl_cons_state == NFS_CS_READY ||
+ clp->cl_cons_state == NFS_CS_SESSION_INITING))
+ return false;
- /* Don't match clients that failed to initialise properly */
- if (clp->cl_cons_state != NFS_CS_READY)
- continue;
+ /* Match the version and minorversion */
+ if (clp->rpc_ops->version != 4 ||
+ clp->cl_minorversion != minorversion)
+ return false;
- /* Different NFS versions cannot share the same nfs_client */
- if (clp->rpc_ops->version != nfsvers)
- continue;
+ /* Match only the IP address, not the port number */
+ if (!nfs_sockaddr_match_ipaddr(addr, clap))
+ return false;
- /* Match only the IP address, not the port number */
- if (!nfs_sockaddr_match_ipaddr(sap, clap))
- continue;
-
- atomic_inc(&clp->cl_count);
- spin_unlock(&nfs_client_lock);
- return clp;
- }
- spin_unlock(&nfs_client_lock);
- return NULL;
+ return true;
}
/*
@@ -988,6 +1002,27 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
target->options = source->options;
}
+static void nfs_server_insert_lists(struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+}
+
+static void nfs_server_remove_lists(struct nfs_server *server)
+{
+ spin_lock(&nfs_client_lock);
+ list_del_rcu(&server->client_link);
+ list_del(&server->master_link);
+ spin_unlock(&nfs_client_lock);
+
+ synchronize_rcu();
+}
+
/*
* Allocate and initialise a server record
*/
@@ -1004,6 +1039,7 @@ static struct nfs_server *nfs_alloc_server(void)
/* Zero out the NFS state stuff */
INIT_LIST_HEAD(&server->client_link);
INIT_LIST_HEAD(&server->master_link);
+ INIT_LIST_HEAD(&server->delegations);
atomic_set(&server->active, 0);
@@ -1019,6 +1055,8 @@ static struct nfs_server *nfs_alloc_server(void)
return NULL;
}
+ pnfs_init_server(server);
+
return server;
}
@@ -1029,11 +1067,8 @@ void nfs_free_server(struct nfs_server *server)
{
dprintk("--> nfs_free_server()\n");
+ nfs_server_remove_lists(server);
unset_pnfs_layoutdriver(server);
- spin_lock(&nfs_client_lock);
- list_del(&server->client_link);
- list_del(&server->master_link);
- spin_unlock(&nfs_client_lock);
if (server->destroy != NULL)
server->destroy(server);
@@ -1108,11 +1143,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
+ nfs_server_insert_lists(server);
server->mount_time = jiffies;
nfs_free_fattr(fattr);
return server;
@@ -1125,6 +1156,101 @@ error:
#ifdef CONFIG_NFS_V4
/*
+ * NFSv4.0 callback thread helper
+ *
+ * Find a client by IP address, protocol version, and minorversion
+ *
+ * Called from the pg_authenticate method. The callback identifier
+ * is not used as it has not been decoded.
+ *
+ * Returns NULL if no such client
+ */
+struct nfs_client *
+nfs4_find_client_no_ident(const struct sockaddr *addr)
+{
+ struct nfs_client *clp;
+
+ spin_lock(&nfs_client_lock);
+ list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ if (nfs4_cb_match_client(addr, clp, 0) == false)
+ continue;
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nfs_client_lock);
+ return clp;
+ }
+ spin_unlock(&nfs_client_lock);
+ return NULL;
+}
+
+/*
+ * NFSv4.0 callback thread helper
+ *
+ * Find a client by callback identifier
+ */
+struct nfs_client *
+nfs4_find_client_ident(int cb_ident)
+{
+ struct nfs_client *clp;
+
+ spin_lock(&nfs_client_lock);
+ clp = idr_find(&cb_ident_idr, cb_ident);
+ if (clp)
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nfs_client_lock);
+ return clp;
+}
+
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * NFSv4.1 callback thread helper
+ * For CB_COMPOUND calls, find a client by IP address, protocol version,
+ * minorversion, and sessionID
+ *
+ * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
+ * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
+ * can arrive before the callback sessionid is set. For CB_NULL calls,
+ * find a client by IP address protocol version, and minorversion.
+ *
+ * Returns NULL if no such client
+ */
+struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *addr,
+ struct nfs4_sessionid *sid, int is_cb_compound)
+{
+ struct nfs_client *clp;
+
+ spin_lock(&nfs_client_lock);
+ list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ if (nfs4_cb_match_client(addr, clp, 1) == false)
+ continue;
+
+ if (!nfs4_has_session(clp))
+ continue;
+
+ /* Match sessionid unless cb_null call*/
+ if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data,
+ sid->data, NFS4_MAX_SESSIONID_LEN) != 0))
+ continue;
+
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nfs_client_lock);
+ return clp;
+ }
+ spin_unlock(&nfs_client_lock);
+ return NULL;
+}
+
+#else /* CONFIG_NFS_V4_1 */
+
+struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *addr,
+ struct nfs4_sessionid *sid, int is_cb_compound)
+{
+ return NULL;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+/*
* Initialize the NFS4 callback service
*/
static int nfs4_init_callback(struct nfs_client *clp)
@@ -1342,11 +1468,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
server->namelen = NFS4_MAXNAMLEN;
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
+ nfs_server_insert_lists(server);
server->mount_time = jiffies;
out:
nfs_free_fattr(fattr);
@@ -1551,11 +1673,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
if (error < 0)
goto out_free_server;
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
+ nfs_server_insert_lists(server);
server->mount_time = jiffies;
nfs_free_fattr(fattr_fsinfo);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 1fd62fc..364e432 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -40,11 +40,23 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
call_rcu(&delegation->rcu, nfs_free_delegation_callback);
}
+/**
+ * nfs_mark_delegation_referenced - set delegation's REFERENCED flag
+ * @delegation: delegation to process
+ *
+ */
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
}
+/**
+ * nfs_have_delegation - check if inode has a delegation
+ * @inode: inode to check
+ * @flags: delegation types to check for
+ *
+ * Returns one if inode has the indicated delegation, otherwise zero.
+ */
int nfs_have_delegation(struct inode *inode, fmode_t flags)
{
struct nfs_delegation *delegation;
@@ -119,10 +131,15 @@ again:
return 0;
}
-/*
- * Set up a delegation on an inode
+/**
+ * nfs_inode_reclaim_delegation - process a delegation reclaim request
+ * @inode: inode to process
+ * @cred: credential to use for request
+ * @res: new delegation state from server
+ *
*/
-void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
+ struct nfs_openres *res)
{
struct nfs_delegation *delegation;
struct rpc_cred *oldcred = NULL;
@@ -175,38 +192,52 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
return inode;
}
-static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
- const nfs4_stateid *stateid,
- struct nfs_client *clp)
+static struct nfs_delegation *
+nfs_detach_delegation_locked(struct nfs_inode *nfsi,
+ struct nfs_server *server)
{
struct nfs_delegation *delegation =
rcu_dereference_protected(nfsi->delegation,
- lockdep_is_held(&clp->cl_lock));
+ lockdep_is_held(&server->nfs_client->cl_lock));
if (delegation == NULL)
goto nomatch;
+
spin_lock(&delegation->lock);
- if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
- sizeof(delegation->stateid.data)) != 0)
- goto nomatch_unlock;
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
nfsi->delegation_state = 0;
rcu_assign_pointer(nfsi->delegation, NULL);
spin_unlock(&delegation->lock);
return delegation;
-nomatch_unlock:
- spin_unlock(&delegation->lock);
nomatch:
return NULL;
}
-/*
- * Set up a delegation on an inode
+static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
+ struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs_delegation *delegation;
+
+ spin_lock(&clp->cl_lock);
+ delegation = nfs_detach_delegation_locked(nfsi, server);
+ spin_unlock(&clp->cl_lock);
+ return delegation;
+}
+
+/**
+ * nfs_inode_set_delegation - set up a delegation on an inode
+ * @inode: inode to which delegation applies
+ * @cred: cred to use for subsequent delegation processing
+ * @res: new delegation state from server
+ *
+ * Returns zero on success, or a negative errno value.
*/
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation, *old_delegation;
struct nfs_delegation *freeme = NULL;
@@ -227,7 +258,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
spin_lock(&clp->cl_lock);
old_delegation = rcu_dereference_protected(nfsi->delegation,
- lockdep_is_held(&clp->cl_lock));
+ lockdep_is_held(&clp->cl_lock));
if (old_delegation != NULL) {
if (memcmp(&delegation->stateid, &old_delegation->stateid,
sizeof(old_delegation->stateid)) == 0 &&
@@ -246,9 +277,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
delegation = NULL;
goto out;
}
- freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
+ freeme = nfs_detach_delegation_locked(nfsi, server);
}
- list_add_rcu(&delegation->super_list, &clp->cl_delegations);
+ list_add_rcu(&delegation->super_list, &server->delegations);
nfsi->delegation_state = delegation->type;
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
@@ -290,73 +321,85 @@ out:
return err;
}
-/*
- * Return all delegations that have been marked for return
+/**
+ * nfs_client_return_marked_delegations - return previously marked delegations
+ * @clp: nfs_client to process
+ *
+ * Returns zero on success, or a negative errno value.
*/
int nfs_client_return_marked_delegations(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
+ struct nfs_server *server;
struct inode *inode;
int err = 0;
restart:
rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
- if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
- continue;
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL)
- continue;
- spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
- spin_unlock(&clp->cl_lock);
- rcu_read_unlock();
- if (delegation != NULL) {
- filemap_flush(inode->i_mapping);
- err = __nfs_inode_return_delegation(inode, delegation, 0);
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ list_for_each_entry_rcu(delegation, &server->delegations,
+ super_list) {
+ if (!test_and_clear_bit(NFS_DELEGATION_RETURN,
+ &delegation->flags))
+ continue;
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ continue;
+ delegation = nfs_detach_delegation(NFS_I(inode),
+ server);
+ rcu_read_unlock();
+
+ if (delegation != NULL) {
+ filemap_flush(inode->i_mapping);
+ err = __nfs_inode_return_delegation(inode,
+ delegation, 0);
+ }
+ iput(inode);
+ if (!err)
+ goto restart;
+ set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+ return err;
}
- iput(inode);
- if (!err)
- goto restart;
- set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
- return err;
}
rcu_read_unlock();
return 0;
}
-/*
- * This function returns the delegation without reclaiming opens
- * or protecting against delegation reclaims.
- * It is therefore really only safe to be called from
- * nfs4_clear_inode()
+/**
+ * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
+ * @inode: inode to process
+ *
+ * Does not protect against delegation reclaims, therefore really only safe
+ * to be called from nfs4_clear_inode().
*/
void nfs_inode_return_delegation_noreclaim(struct inode *inode)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
if (rcu_access_pointer(nfsi->delegation) != NULL) {
- spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
- spin_unlock(&clp->cl_lock);
+ delegation = nfs_detach_delegation(nfsi, server);
if (delegation != NULL)
nfs_do_return_delegation(inode, delegation, 0);
}
}
+/**
+ * nfs_inode_return_delegation - synchronously return a delegation
+ * @inode: inode to process
+ *
+ * Returns zero on success, or a negative errno value.
+ */
int nfs_inode_return_delegation(struct inode *inode)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int err = 0;
if (rcu_access_pointer(nfsi->delegation) != NULL) {
- spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
- spin_unlock(&clp->cl_lock);
+ delegation = nfs_detach_delegation(nfsi, server);
if (delegation != NULL) {
nfs_wb_all(inode);
err = __nfs_inode_return_delegation(inode, delegation, 1);
@@ -365,46 +408,61 @@ int nfs_inode_return_delegation(struct inode *inode)
return err;
}
-static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation)
+static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
{
+ struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
+
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
}
-/*
- * Return all delegations associated to a super block
+/**
+ * nfs_super_return_all_delegations - return delegations for one superblock
+ * @sb: sb to process
+ *
*/
void nfs_super_return_all_delegations(struct super_block *sb)
{
- struct nfs_client *clp = NFS_SB(sb)->nfs_client;
+ struct nfs_server *server = NFS_SB(sb);
+ struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
if (clp == NULL)
return;
+
rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
- if (delegation->inode != NULL && delegation->inode->i_sb == sb)
- set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+ set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
spin_unlock(&delegation->lock);
}
rcu_read_unlock();
+
if (nfs_client_return_marked_delegations(clp) != 0)
nfs4_schedule_state_manager(clp);
}
-static
-void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags)
+static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
+ fmode_t flags)
{
struct nfs_delegation *delegation;
- rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
- nfs_mark_return_delegation(clp, delegation);
+ nfs_mark_return_delegation(delegation);
}
+}
+
+static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
+ fmode_t flags)
+{
+ struct nfs_server *server;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs_mark_return_all_delegation_types(server, flags);
rcu_read_unlock();
}
@@ -419,19 +477,32 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
+/**
+ * nfs_expire_all_delegation_types
+ * @clp: client to process
+ * @flags: delegation types to expire
+ *
+ */
void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
{
nfs_client_mark_return_all_delegation_types(clp, flags);
nfs_delegation_run_state_manager(clp);
}
+/**
+ * nfs_expire_all_delegations
+ * @clp: client to process
+ *
+ */
void nfs_expire_all_delegations(struct nfs_client *clp)
{
nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
}
-/*
- * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
+/**
+ * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
+ * @clp: client to process
+ *
*/
void nfs_handle_cb_pathdown(struct nfs_client *clp)
{
@@ -440,29 +511,43 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
nfs_client_mark_return_all_delegations(clp);
}
-static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp)
+static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
- rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
- nfs_mark_return_delegation(clp, delegation);
+ nfs_mark_return_delegation(delegation);
}
- rcu_read_unlock();
}
+/**
+ * nfs_expire_unreferenced_delegations - Eliminate unused delegations
+ * @clp: nfs_client to process
+ *
+ */
void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
{
- nfs_client_mark_return_unreferenced_delegations(clp);
+ struct nfs_server *server;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs_mark_return_unreferenced_delegations(server);
+ rcu_read_unlock();
+
nfs_delegation_run_state_manager(clp);
}
-/*
- * Asynchronous delegation recall!
+/**
+ * nfs_async_inode_return_delegation - asynchronously return a delegation
+ * @inode: inode to process
+ * @stateid: state ID information from CB_RECALL arguments
+ *
+ * Returns zero on success, or a negative errno value.
*/
-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+int nfs_async_inode_return_delegation(struct inode *inode,
+ const nfs4_stateid *stateid)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_delegation *delegation;
@@ -474,22 +559,21 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
rcu_read_unlock();
return -ENOENT;
}
-
- nfs_mark_return_delegation(clp, delegation);
+ nfs_mark_return_delegation(delegation);
rcu_read_unlock();
+
nfs_delegation_run_state_manager(clp);
return 0;
}
-/*
- * Retrieve the inode associated with a delegation
- */
-struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
+static struct inode *
+nfs_delegation_find_inode_server(struct nfs_server *server,
+ const struct nfs_fh *fhandle)
{
struct nfs_delegation *delegation;
struct inode *res = NULL;
- rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
@@ -499,49 +583,121 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
if (res != NULL)
break;
}
+ return res;
+}
+
+/**
+ * nfs_delegation_find_inode - retrieve the inode associated with a delegation
+ * @clp: client state handle
+ * @fhandle: filehandle from a delegation recall
+ *
+ * Returns pointer to inode matching "fhandle," or NULL if a matching inode
+ * cannot be found.
+ */
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
+ const struct nfs_fh *fhandle)
+{
+ struct nfs_server *server;
+ struct inode *res = NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ res = nfs_delegation_find_inode_server(server, fhandle);
+ if (res != NULL)
+ break;
+ }
rcu_read_unlock();
return res;
}
-/*
- * Mark all delegations as needing to be reclaimed
+static void nfs_delegation_mark_reclaim_server(struct nfs_server *server)
+{
+ struct nfs_delegation *delegation;
+
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list)
+ set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+}
+
+/**
+ * nfs_delegation_mark_reclaim - mark all delegations as needing to be reclaimed
+ * @clp: nfs_client to process
+ *
*/
void nfs_delegation_mark_reclaim(struct nfs_client *clp)
{
- struct nfs_delegation *delegation;
+ struct nfs_server *server;
+
rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
- set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs_delegation_mark_reclaim_server(server);
rcu_read_unlock();
}
-/*
- * Reap all unclaimed delegations after reboot recovery is done
+/**
+ * nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
+ * @clp: nfs_client to process
+ *
*/
void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
+ struct nfs_server *server;
struct inode *inode;
+
restart:
rcu_read_lock();
- list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
- if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0)
- continue;
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL)
- continue;
- spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
- spin_unlock(&clp->cl_lock);
- rcu_read_unlock();
- if (delegation != NULL)
- nfs_free_delegation(delegation);
- iput(inode);
- goto restart;
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ list_for_each_entry_rcu(delegation, &server->delegations,
+ super_list) {
+ if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
+ &delegation->flags) == 0)
+ continue;
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ continue;
+ delegation = nfs_detach_delegation(NFS_I(inode),
+ server);
+ rcu_read_unlock();
+
+ if (delegation != NULL)
+ nfs_free_delegation(delegation);
+ iput(inode);
+ goto restart;
+ }
}
rcu_read_unlock();
}
+/**
+ * nfs_delegations_present - check for existence of delegations
+ * @clp: client state handle
+ *
+ * Returns one if there are any nfs_delegation structures attached
+ * to this nfs_client.
+ */
+int nfs_delegations_present(struct nfs_client *clp)
+{
+ struct nfs_server *server;
+ int ret = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ if (!list_empty(&server->delegations)) {
+ ret = 1;
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * nfs4_copy_delegation_stateid - Copy inode's state ID information
+ * @dst: stateid data structure to fill in
+ * @inode: inode to check
+ *
+ * Returns one and fills in "dst->data" * if inode had a delegation,
+ * otherwise zero is returned.
+ */
int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2026304..d9322e4 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -44,6 +44,7 @@ void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
void nfs_handle_cb_pathdown(struct nfs_client *clp);
int nfs_client_return_marked_delegations(struct nfs_client *clp);
+int nfs_delegations_present(struct nfs_client *clp);
void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d33da53..abe4f0c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,8 +33,8 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/sched.h>
-#include <linux/vmalloc.h>
#include <linux/kmemleak.h>
+#include <linux/xattr.h>
#include "delegation.h"
#include "iostat.h"
@@ -125,9 +125,10 @@ const struct inode_operations nfs4_dir_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
- .getxattr = nfs4_getxattr,
- .setxattr = nfs4_setxattr,
- .listxattr = nfs4_listxattr,
+ .getxattr = generic_getxattr,
+ .setxattr = generic_setxattr,
+ .listxattr = generic_listxattr,
+ .removexattr = generic_removexattr,
};
#endif /* CONFIG_NFS_V4 */
@@ -172,7 +173,7 @@ struct nfs_cache_array {
struct nfs_cache_array_entry array[0];
};
-typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
typedef struct {
struct file *file;
struct page *page;
@@ -378,14 +379,14 @@ error:
return error;
}
-/* Fill in an entry based on the xdr code stored in desc->page */
-static
-int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
+static int xdr_decode(nfs_readdir_descriptor_t *desc,
+ struct nfs_entry *entry, struct xdr_stream *xdr)
{
- __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
- if (IS_ERR(p))
- return PTR_ERR(p);
+ int error;
+ error = desc->decode(xdr, entry, desc->plus);
+ if (error)
+ return error;
entry->fattr->time_start = desc->timestamp;
entry->fattr->gencount = desc->gencount;
return 0;
@@ -459,25 +460,26 @@ out:
/* Perform conversion from xdr to cache array */
static
int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
- void *xdr_page, struct page *page, unsigned int buflen)
+ struct page **xdr_pages, struct page *page, unsigned int buflen)
{
struct xdr_stream stream;
- struct xdr_buf buf;
- __be32 *ptr = xdr_page;
+ struct xdr_buf buf = {
+ .pages = xdr_pages,
+ .page_len = buflen,
+ .buflen = buflen,
+ .len = buflen,
+ };
+ struct page *scratch;
struct nfs_cache_array *array;
unsigned int count = 0;
int status;
- buf.head->iov_base = xdr_page;
- buf.head->iov_len = buflen;
- buf.tail->iov_len = 0;
- buf.page_base = 0;
- buf.page_len = 0;
- buf.buflen = buf.head->iov_len;
- buf.len = buf.head->iov_len;
-
- xdr_init_decode(&stream, &buf, ptr);
+ scratch = alloc_page(GFP_KERNEL);
+ if (scratch == NULL)
+ return -ENOMEM;
+ xdr_init_decode(&stream, &buf, NULL);
+ xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
do {
status = xdr_decode(desc, entry, &stream);
@@ -506,6 +508,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
} else
status = PTR_ERR(array);
}
+
+ put_page(scratch);
return status;
}
@@ -521,7 +525,6 @@ static
void nfs_readdir_free_large_page(void *ptr, struct page **pages,
unsigned int npages)
{
- vm_unmap_ram(ptr, npages);
nfs_readdir_free_pagearray(pages, npages);
}
@@ -530,9 +533,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
* to nfs_readdir_free_large_page
*/
static
-void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
+int nfs_readdir_large_page(struct page **pages, unsigned int npages)
{
- void *ptr;
unsigned int i;
for (i = 0; i < npages; i++) {
@@ -541,13 +543,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
goto out_freepages;
pages[i] = page;
}
+ return 0;
- ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
- if (!IS_ERR_OR_NULL(ptr))
- return ptr;
out_freepages:
nfs_readdir_free_pagearray(pages, i);
- return NULL;
+ return -ENOMEM;
}
static
@@ -566,6 +566,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
entry.eof = 0;
entry.fh = nfs_alloc_fhandle();
entry.fattr = nfs_alloc_fattr();
+ entry.server = NFS_SERVER(inode);
if (entry.fh == NULL || entry.fattr == NULL)
goto out;
@@ -577,8 +578,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
memset(array, 0, sizeof(struct nfs_cache_array));
array->eof_index = -1;
- pages_ptr = nfs_readdir_large_page(pages, array_size);
- if (!pages_ptr)
+ status = nfs_readdir_large_page(pages, array_size);
+ if (status < 0)
goto out_release_array;
do {
unsigned int pglen;
@@ -587,7 +588,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
if (status < 0)
break;
pglen = status;
- status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
+ status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
if (status < 0) {
if (status == -ENOSPC)
status = 0;
@@ -1221,7 +1222,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
goto out_unblock_sillyrename;
}
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
- res = (struct dentry *)inode;
+ res = ERR_CAST(inode);
if (IS_ERR(res))
goto out_unblock_sillyrename;
@@ -1355,8 +1356,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
if (nd->flags & LOOKUP_CREATE) {
attr.ia_mode = nd->intent.open.create_mode;
attr.ia_valid = ATTR_MODE;
- if (!IS_POSIXACL(dir))
- attr.ia_mode &= ~current_umask();
+ attr.ia_mode &= ~current_umask();
} else {
open_flags &= ~(O_EXCL | O_CREAT);
attr.ia_valid = 0;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 4e2d9b6..1869688 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -238,7 +238,7 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
return nfs_idmap_lookup_name(gid, "group", buf, buflen);
}
-#else /* CONFIG_NFS_USE_IDMAPPER not defined */
+#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
#include <linux/module.h>
#include <linux/mutex.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 017daa3..ce00b70 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1410,9 +1410,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
void nfs4_evict_inode(struct inode *inode)
{
+ pnfs_destroy_layout(NFS_I(inode));
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
- pnfs_destroy_layout(NFS_I(inode));
/* If we are holding a delegation, return it! */
nfs_inode_return_delegation_noreclaim(inode);
/* First call standard NFS clear_inode() code */
@@ -1619,6 +1619,7 @@ static void __exit exit_nfs_fs(void)
#ifdef CONFIG_PROC_FS
rpc_proc_unregister("nfs");
#endif
+ nfs_cleanup_cb_ident_idr();
unregister_nfs_fs();
nfs_fs_proc_exit();
nfsiod_stop();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e6356b7..bfa3a34 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -128,9 +128,13 @@ extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
extern struct rpc_program nfs_program;
+extern void nfs_cleanup_cb_ident_idr(void);
extern void nfs_put_client(struct nfs_client *);
-extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32);
-extern struct nfs_client *nfs_find_client_next(struct nfs_client *);
+extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
+extern struct nfs_client *nfs4_find_client_ident(int);
+extern struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *,
+ int);
extern struct nfs_server *nfs_create_server(
const struct nfs_parsed_mount_data *,
struct nfs_fh *);
@@ -185,17 +189,20 @@ extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
/* nfs2xdr.c */
-extern int nfs_stat_to_errno(int);
+extern int nfs_stat_to_errno(enum nfs_stat);
extern struct rpc_procinfo nfs_procedures[];
-extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs2_decode_dirent(struct xdr_stream *,
+ struct nfs_entry *, int);
/* nfs3xdr.c */
extern struct rpc_procinfo nfs3_procedures[];
-extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs3_decode_dirent(struct xdr_stream *,
+ struct nfs_entry *, int);
/* nfs4xdr.c */
#ifdef CONFIG_NFS_V4
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs4_decode_dirent(struct xdr_stream *,
+ struct nfs_entry *, int);
#endif
#ifdef CONFIG_NFS_V4_1
extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 4f981f1..d4c2d6b 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -236,10 +236,8 @@ void nfs_umount(const struct nfs_mount_request *info)
.authflavor = RPC_AUTH_UNIX,
.flags = RPC_CLNT_CREATE_NOPING,
};
- struct mountres result;
struct rpc_message msg = {
.rpc_argp = info->dirpath,
- .rpc_resp = &result,
};
struct rpc_clnt *clnt;
int status;
@@ -248,7 +246,7 @@ void nfs_umount(const struct nfs_mount_request *info)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
clnt = rpc_create(&args);
- if (unlikely(IS_ERR(clnt)))
+ if (IS_ERR(clnt))
goto out_clnt_err;
dprintk("NFS: sending UMNT request for %s:%s\n",
@@ -280,29 +278,20 @@ out_call_err:
* XDR encode/decode functions for MOUNT
*/
-static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
+static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
{
const u32 pathname_len = strlen(pathname);
__be32 *p;
- if (unlikely(pathname_len > MNTPATHLEN))
- return -EIO;
-
- p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len);
- if (unlikely(p == NULL))
- return -EIO;
+ BUG_ON(pathname_len > MNTPATHLEN);
+ p = xdr_reserve_space(xdr, 4 + pathname_len);
xdr_encode_opaque(p, pathname, pathname_len);
-
- return 0;
}
-static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p,
- const char *dirpath)
+static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const char *dirpath)
{
- struct xdr_stream xdr;
-
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- return encode_mntdirpath(&xdr, dirpath);
+ encode_mntdirpath(xdr, dirpath);
}
/*
@@ -320,10 +309,10 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res)
u32 status;
__be32 *p;
- p = xdr_inline_decode(xdr, sizeof(status));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- status = ntohl(*p);
+ status = be32_to_cpup(p);
for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
if (mnt_errtbl[i].status == status) {
@@ -351,18 +340,16 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
return 0;
}
-static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p,
- struct mountres *res)
+static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct mountres *res)
{
- struct xdr_stream xdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
- status = decode_status(&xdr, res);
+ status = decode_status(xdr, res);
if (unlikely(status != 0 || res->errno != 0))
return status;
- return decode_fhandle(&xdr, res);
+ return decode_fhandle(xdr, res);
}
static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
@@ -371,10 +358,10 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
u32 status;
__be32 *p;
- p = xdr_inline_decode(xdr, sizeof(status));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- status = ntohl(*p);
+ status = be32_to_cpup(p);
for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
if (mnt3_errtbl[i].status == status) {
@@ -394,11 +381,11 @@ static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res)
u32 size;
__be32 *p;
- p = xdr_inline_decode(xdr, sizeof(size));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- size = ntohl(*p++);
+ size = be32_to_cpup(p);
if (size > NFS3_FHSIZE || size == 0)
return -EIO;
@@ -421,15 +408,15 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
if (*count == 0)
return 0;
- p = xdr_inline_decode(xdr, sizeof(entries));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- entries = ntohl(*p);
+ entries = be32_to_cpup(p);
dprintk("NFS: received %u auth flavors\n", entries);
if (entries > NFS_MAX_SECFLAVORS)
entries = NFS_MAX_SECFLAVORS;
- p = xdr_inline_decode(xdr, sizeof(u32) * entries);
+ p = xdr_inline_decode(xdr, 4 * entries);
if (unlikely(p == NULL))
return -EIO;
@@ -437,7 +424,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
entries = *count;
for (i = 0; i < entries; i++) {
- flavors[i] = ntohl(*p++);
+ flavors[i] = be32_to_cpup(p++);
dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]);
}
*count = i;
@@ -445,30 +432,28 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
return 0;
}
-static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p,
- struct mountres *res)
+static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct mountres *res)
{
- struct xdr_stream xdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
- status = decode_fhs_status(&xdr, res);
+ status = decode_fhs_status(xdr, res);
if (unlikely(status != 0 || res->errno != 0))
return status;
- status = decode_fhandle3(&xdr, res);
+ status = decode_fhandle3(xdr, res);
if (unlikely(status != 0)) {
res->errno = -EBADHANDLE;
return 0;
}
- return decode_auth_flavors(&xdr, res);
+ return decode_auth_flavors(xdr, res);
}
static struct rpc_procinfo mnt_procedures[] = {
[MOUNTPROC_MNT] = {
.p_proc = MOUNTPROC_MNT,
- .p_encode = (kxdrproc_t)mnt_enc_dirpath,
- .p_decode = (kxdrproc_t)mnt_dec_mountres,
+ .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
+ .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres,
.p_arglen = MNT_enc_dirpath_sz,
.p_replen = MNT_dec_mountres_sz,
.p_statidx = MOUNTPROC_MNT,
@@ -476,7 +461,7 @@ static struct rpc_procinfo mnt_procedures[] = {
},
[MOUNTPROC_UMNT] = {
.p_proc = MOUNTPROC_UMNT,
- .p_encode = (kxdrproc_t)mnt_enc_dirpath,
+ .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
.p_arglen = MNT_enc_dirpath_sz,
.p_statidx = MOUNTPROC_UMNT,
.p_name = "UMOUNT",
@@ -486,8 +471,8 @@ static struct rpc_procinfo mnt_procedures[] = {
static struct rpc_procinfo mnt3_procedures[] = {
[MOUNTPROC3_MNT] = {
.p_proc = MOUNTPROC3_MNT,
- .p_encode = (kxdrproc_t)mnt_enc_dirpath,
- .p_decode = (kxdrproc_t)mnt_dec_mountres3,
+ .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
+ .p_decode = (kxdrdproc_t)mnt_xdr_dec_mountres3,
.p_arglen = MNT_enc_dirpath_sz,
.p_replen = MNT_dec_mountres3_sz,
.p_statidx = MOUNTPROC3_MNT,
@@ -495,7 +480,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
},
[MOUNTPROC3_UMNT] = {
.p_proc = MOUNTPROC3_UMNT,
- .p_encode = (kxdrproc_t)mnt_enc_dirpath,
+ .p_encode = (kxdreproc_t)mnt_xdr_enc_dirpath,
.p_arglen = MNT_enc_dirpath_sz,
.p_statidx = MOUNTPROC3_UMNT,
.p_name = "UMOUNT",
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5914a19..792cb13 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,584 +61,1008 @@
#define NFS_readdirres_sz (1)
#define NFS_statfsres_sz (1+NFS_info_sz)
+
/*
- * Common NFS XDR functions as inlines
+ * While encoding arguments, set up the reply buffer in advance to
+ * receive reply data directly into the page cache.
*/
-static inline __be32 *
-xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fhandle)
+static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
+ unsigned int base, unsigned int len,
+ unsigned int bufsize)
{
- memcpy(p, fhandle->data, NFS2_FHSIZE);
- return p + XDR_QUADLEN(NFS2_FHSIZE);
+ struct rpc_auth *auth = req->rq_cred->cr_auth;
+ unsigned int replen;
+
+ replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
+ xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
}
-static inline __be32 *
-xdr_decode_fhandle(__be32 *p, struct nfs_fh *fhandle)
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
{
- /* NFSv2 handles have a fixed length */
- fhandle->size = NFS2_FHSIZE;
- memcpy(fhandle->data, p, NFS2_FHSIZE);
- return p + XDR_QUADLEN(NFS2_FHSIZE);
+ dprintk("NFS: %s prematurely hit the end of our receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
+
+
+/*
+ * Encode/decode NFSv2 basic data types
+ *
+ * Basic NFSv2 data types are defined in section 2.3 of RFC 1094:
+ * "NFS: Network File System Protocol Specification".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions. For run-time efficiency, some data types are encoded
+ * or decoded inline.
+ */
+
+/*
+ * typedef opaque nfsdata<>;
+ */
+static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
+{
+ u32 recvd, count;
+ size_t hdrlen;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p);
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(count > recvd))
+ goto out_cheating;
+out:
+ xdr_read_pages(xdr, count);
+ result->eof = 0; /* NFSv2 does not pass EOF flag on the wire. */
+ result->count = count;
+ return count;
+out_cheating:
+ dprintk("NFS: server cheating in read result: "
+ "count %u > recvd %u\n", count, recvd);
+ count = recvd;
+ goto out;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * enum stat {
+ * NFS_OK = 0,
+ * NFSERR_PERM = 1,
+ * NFSERR_NOENT = 2,
+ * NFSERR_IO = 5,
+ * NFSERR_NXIO = 6,
+ * NFSERR_ACCES = 13,
+ * NFSERR_EXIST = 17,
+ * NFSERR_NODEV = 19,
+ * NFSERR_NOTDIR = 20,
+ * NFSERR_ISDIR = 21,
+ * NFSERR_FBIG = 27,
+ * NFSERR_NOSPC = 28,
+ * NFSERR_ROFS = 30,
+ * NFSERR_NAMETOOLONG = 63,
+ * NFSERR_NOTEMPTY = 66,
+ * NFSERR_DQUOT = 69,
+ * NFSERR_STALE = 70,
+ * NFSERR_WFLUSH = 99
+ * };
+ */
+static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ *status = be32_to_cpup(p);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-static inline __be32*
-xdr_encode_time(__be32 *p, struct timespec *timep)
+/*
+ * 2.3.2. ftype
+ *
+ * enum ftype {
+ * NFNON = 0,
+ * NFREG = 1,
+ * NFDIR = 2,
+ * NFBLK = 3,
+ * NFCHR = 4,
+ * NFLNK = 5
+ * };
+ *
+ */
+static __be32 *xdr_decode_ftype(__be32 *p, u32 *type)
{
- *p++ = htonl(timep->tv_sec);
- /* Convert nanoseconds into microseconds */
- *p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0);
+ *type = be32_to_cpup(p++);
+ if (unlikely(*type > NF2FIFO))
+ *type = NFBAD;
return p;
}
-static inline __be32*
-xdr_encode_current_server_time(__be32 *p, struct timespec *timep)
+/*
+ * 2.3.3. fhandle
+ *
+ * typedef opaque fhandle[FHSIZE];
+ */
+static void encode_fhandle(struct xdr_stream *xdr, const struct nfs_fh *fh)
{
- /*
- * Passing the invalid value useconds=1000000 is a
- * Sun convention for "set to current server time".
- * It's needed to make permissions checks for the
- * "touch" program across v2 mounts to Solaris and
- * Irix boxes work correctly. See description of
- * sattr in section 6.1 of "NFS Illustrated" by
- * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
- */
- *p++ = htonl(timep->tv_sec);
- *p++ = htonl(1000000);
+ __be32 *p;
+
+ BUG_ON(fh->size != NFS2_FHSIZE);
+ p = xdr_reserve_space(xdr, NFS2_FHSIZE);
+ memcpy(p, fh->data, NFS2_FHSIZE);
+}
+
+static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS2_FHSIZE);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ fh->size = NFS2_FHSIZE;
+ memcpy(fh->data, p, NFS2_FHSIZE);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * 2.3.4. timeval
+ *
+ * struct timeval {
+ * unsigned int seconds;
+ * unsigned int useconds;
+ * };
+ */
+static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep)
+{
+ *p++ = cpu_to_be32(timep->tv_sec);
+ if (timep->tv_nsec != 0)
+ *p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC);
+ else
+ *p++ = cpu_to_be32(0);
return p;
}
-static inline __be32*
-xdr_decode_time(__be32 *p, struct timespec *timep)
+/*
+ * Passing the invalid value useconds=1000000 is a Sun convention for
+ * "set to current server time". It's needed to make permissions checks
+ * for the "touch" program across v2 mounts to Solaris and Irix servers
+ * work correctly. See description of sattr in section 6.1 of "NFS
+ * Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5.
+ */
+static __be32 *xdr_encode_current_server_time(__be32 *p,
+ const struct timespec *timep)
{
- timep->tv_sec = ntohl(*p++);
- /* Convert microseconds into nanoseconds */
- timep->tv_nsec = ntohl(*p++) * 1000;
+ *p++ = cpu_to_be32(timep->tv_sec);
+ *p++ = cpu_to_be32(1000000);
return p;
}
-static __be32 *
-xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
+static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
+{
+ timep->tv_sec = be32_to_cpup(p++);
+ timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC;
+ return p;
+}
+
+/*
+ * 2.3.5. fattr
+ *
+ * struct fattr {
+ * ftype type;
+ * unsigned int mode;
+ * unsigned int nlink;
+ * unsigned int uid;
+ * unsigned int gid;
+ * unsigned int size;
+ * unsigned int blocksize;
+ * unsigned int rdev;
+ * unsigned int blocks;
+ * unsigned int fsid;
+ * unsigned int fileid;
+ * timeval atime;
+ * timeval mtime;
+ * timeval ctime;
+ * };
+ *
+ */
+static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
{
u32 rdev, type;
- type = ntohl(*p++);
- fattr->mode = ntohl(*p++);
- fattr->nlink = ntohl(*p++);
- fattr->uid = ntohl(*p++);
- fattr->gid = ntohl(*p++);
- fattr->size = ntohl(*p++);
- fattr->du.nfs2.blocksize = ntohl(*p++);
- rdev = ntohl(*p++);
- fattr->du.nfs2.blocks = ntohl(*p++);
- fattr->fsid.major = ntohl(*p++);
- fattr->fsid.minor = 0;
- fattr->fileid = ntohl(*p++);
- p = xdr_decode_time(p, &fattr->atime);
- p = xdr_decode_time(p, &fattr->mtime);
- p = xdr_decode_time(p, &fattr->ctime);
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS_fattr_sz << 2);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
fattr->valid |= NFS_ATTR_FATTR_V2;
+
+ p = xdr_decode_ftype(p, &type);
+
+ fattr->mode = be32_to_cpup(p++);
+ fattr->nlink = be32_to_cpup(p++);
+ fattr->uid = be32_to_cpup(p++);
+ fattr->gid = be32_to_cpup(p++);
+ fattr->size = be32_to_cpup(p++);
+ fattr->du.nfs2.blocksize = be32_to_cpup(p++);
+
+ rdev = be32_to_cpup(p++);
fattr->rdev = new_decode_dev(rdev);
- if (type == NFCHR && rdev == NFS2_FIFO_DEV) {
+ if (type == (u32)NFCHR && rdev == (u32)NFS2_FIFO_DEV) {
fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
fattr->rdev = 0;
}
+
+ fattr->du.nfs2.blocks = be32_to_cpup(p++);
+ fattr->fsid.major = be32_to_cpup(p++);
+ fattr->fsid.minor = 0;
+ fattr->fileid = be32_to_cpup(p++);
+
+ p = xdr_decode_time(p, &fattr->atime);
+ p = xdr_decode_time(p, &fattr->mtime);
+ xdr_decode_time(p, &fattr->ctime);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * 2.3.6. sattr
+ *
+ * struct sattr {
+ * unsigned int mode;
+ * unsigned int uid;
+ * unsigned int gid;
+ * unsigned int size;
+ * timeval atime;
+ * timeval mtime;
+ * };
+ */
+
+#define NFS2_SATTR_NOT_SET (0xffffffff)
+
+static __be32 *xdr_time_not_set(__be32 *p)
+{
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
return p;
}
-static inline __be32 *
-xdr_encode_sattr(__be32 *p, struct iattr *attr)
+static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
{
- const __be32 not_set = __constant_htonl(0xFFFFFFFF);
+ __be32 *p;
- *p++ = (attr->ia_valid & ATTR_MODE) ? htonl(attr->ia_mode) : not_set;
- *p++ = (attr->ia_valid & ATTR_UID) ? htonl(attr->ia_uid) : not_set;
- *p++ = (attr->ia_valid & ATTR_GID) ? htonl(attr->ia_gid) : not_set;
- *p++ = (attr->ia_valid & ATTR_SIZE) ? htonl(attr->ia_size) : not_set;
+ p = xdr_reserve_space(xdr, NFS_sattr_sz << 2);
- if (attr->ia_valid & ATTR_ATIME_SET) {
+ if (attr->ia_valid & ATTR_MODE)
+ *p++ = cpu_to_be32(attr->ia_mode);
+ else
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+ if (attr->ia_valid & ATTR_UID)
+ *p++ = cpu_to_be32(attr->ia_uid);
+ else
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+ if (attr->ia_valid & ATTR_GID)
+ *p++ = cpu_to_be32(attr->ia_gid);
+ else
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+ if (attr->ia_valid & ATTR_SIZE)
+ *p++ = cpu_to_be32((u32)attr->ia_size);
+ else
+ *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
+
+ if (attr->ia_valid & ATTR_ATIME_SET)
p = xdr_encode_time(p, &attr->ia_atime);
- } else if (attr->ia_valid & ATTR_ATIME) {
+ else if (attr->ia_valid & ATTR_ATIME)
p = xdr_encode_current_server_time(p, &attr->ia_atime);
- } else {
- *p++ = not_set;
- *p++ = not_set;
- }
-
- if (attr->ia_valid & ATTR_MTIME_SET) {
- p = xdr_encode_time(p, &attr->ia_mtime);
- } else if (attr->ia_valid & ATTR_MTIME) {
- p = xdr_encode_current_server_time(p, &attr->ia_mtime);
- } else {
- *p++ = not_set;
- *p++ = not_set;
- }
- return p;
+ else
+ p = xdr_time_not_set(p);
+ if (attr->ia_valid & ATTR_MTIME_SET)
+ xdr_encode_time(p, &attr->ia_mtime);
+ else if (attr->ia_valid & ATTR_MTIME)
+ xdr_encode_current_server_time(p, &attr->ia_mtime);
+ else
+ xdr_time_not_set(p);
}
/*
- * NFS encode functions
+ * 2.3.7. filename
+ *
+ * typedef string filename<MAXNAMLEN>;
*/
+static void encode_filename(struct xdr_stream *xdr,
+ const char *name, u32 length)
+{
+ __be32 *p;
+
+ BUG_ON(length > NFS2_MAXNAMLEN);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, name, length);
+}
+
+static int decode_filename_inline(struct xdr_stream *xdr,
+ const char **name, u32 *length)
+{
+ __be32 *p;
+ u32 count;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p);
+ if (count > NFS3_MAXNAMLEN)
+ goto out_nametoolong;
+ p = xdr_inline_decode(xdr, count);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ *name = (const char *)p;
+ *length = count;
+ return 0;
+out_nametoolong:
+ dprintk("NFS: returned filename too long: %u\n", count);
+ return -ENAMETOOLONG;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
/*
- * Encode file handle argument
- * GETATTR, READLINK, STATFS
+ * 2.3.8. path
+ *
+ * typedef string path<MAXPATHLEN>;
*/
-static int
-nfs_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh)
+static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length)
{
- p = xdr_encode_fhandle(p, fh);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ __be32 *p;
+
+ BUG_ON(length > NFS2_MAXPATHLEN);
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(length);
+ xdr_write_pages(xdr, pages, 0, length);
+}
+
+static int decode_path(struct xdr_stream *xdr)
+{
+ u32 length, recvd;
+ size_t hdrlen;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p);
+ if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN))
+ goto out_size;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(length > recvd))
+ goto out_cheating;
+
+ xdr_read_pages(xdr, length);
+ xdr_terminate_string(xdr->buf, length);
return 0;
+out_size:
+ dprintk("NFS: returned pathname too long: %u\n", length);
+ return -ENAMETOOLONG;
+out_cheating:
+ dprintk("NFS: server cheating in pathname result: "
+ "length %u > received %u\n", length, recvd);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
/*
- * Encode SETATTR arguments
+ * 2.3.9. attrstat
+ *
+ * union attrstat switch (stat status) {
+ * case NFS_OK:
+ * fattr attributes;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs_sattrargs *args)
+static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_sattr(p, args->sattr);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_fattr(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
/*
- * Encode directory ops argument
- * LOOKUP, RMDIR
+ * 2.3.10. diropargs
+ *
+ * struct diropargs {
+ * fhandle dir;
+ * filename name;
+ * };
*/
-static int
-nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
+static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh,
+ const char *name, u32 length)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_fhandle(xdr, fh);
+ encode_filename(xdr, name, length);
}
/*
- * Encode REMOVE argument
+ * 2.3.11. diropres
+ *
+ * union diropres switch (stat status) {
+ * case NFS_OK:
+ * struct {
+ * fhandle file;
+ * fattr attributes;
+ * } diropok;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
+static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name.name, args->name.len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ int error;
+
+ error = decode_fhandle(xdr, result->fh);
+ if (unlikely(error))
+ goto out;
+ error = decode_fattr(xdr, result->fattr);
+out:
+ return error;
+}
+
+static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_diropok(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
+
/*
- * Arguments to a READ call. Since we read data directly into the page
- * cache, we also set up the reply iovec here so that iov[1] points
- * exactly to the page we want to fetch.
+ * NFSv2 XDR encode functions
+ *
+ * NFSv2 argument types are defined in section 2.2 of RFC 1094:
+ * "NFS: Network File System Protocol Specification".
*/
-static int
-nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+
+static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_fh *fh)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
- u32 offset = (u32)args->offset;
+ encode_fhandle(xdr, fh);
+}
+
+/*
+ * 2.2.3. sattrargs
+ *
+ * struct sattrargs {
+ * fhandle file;
+ * sattr attributes;
+ * };
+ */
+static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_sattrargs *args)
+{
+ encode_fhandle(xdr, args->fh);
+ encode_sattr(xdr, args->sattr);
+}
+
+static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_diropargs *args)
+{
+ encode_diropargs(xdr, args->fh, args->name, args->len);
+}
+
+static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_readlinkargs *args)
+{
+ encode_fhandle(xdr, args->fh);
+ prepare_reply_buffer(req, args->pages, args->pgbase,
+ args->pglen, NFS_readlinkres_sz);
+}
+
+/*
+ * 2.2.7. readargs
+ *
+ * struct readargs {
+ * fhandle file;
+ * unsigned offset;
+ * unsigned count;
+ * unsigned totalcount;
+ * };
+ */
+static void encode_readargs(struct xdr_stream *xdr,
+ const struct nfs_readargs *args)
+{
+ u32 offset = args->offset;
u32 count = args->count;
+ __be32 *p;
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(offset);
- *p++ = htonl(count);
- *p++ = htonl(count);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ encode_fhandle(xdr, args->fh);
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen,
- args->pages, args->pgbase, count);
+ p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ *p++ = cpu_to_be32(offset);
+ *p++ = cpu_to_be32(count);
+ *p = cpu_to_be32(count);
+}
+
+static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_readargs *args)
+{
+ encode_readargs(xdr, args);
+ prepare_reply_buffer(req, args->pages, args->pgbase,
+ args->count, NFS_readres_sz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
- return 0;
}
/*
- * Decode READ reply
+ * 2.2.9. writeargs
+ *
+ * struct writeargs {
+ * fhandle file;
+ * unsigned beginoffset;
+ * unsigned offset;
+ * unsigned totalcount;
+ * nfsdata data;
+ * };
*/
-static int
-nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
+static void encode_writeargs(struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
{
- struct kvec *iov = req->rq_rcv_buf.head;
- size_t hdrlen;
- u32 count, recvd;
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- p = xdr_decode_fattr(p, res->fattr);
-
- count = ntohl(*p++);
- res->eof = 0;
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READ reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READ header is short. iovec will be shifted.\n");
- xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
- }
+ u32 offset = args->offset;
+ u32 count = args->count;
+ __be32 *p;
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (count > recvd) {
- dprintk("NFS: server cheating in read reply: "
- "count %u > recvd %u\n", count, recvd);
- count = recvd;
- }
+ encode_fhandle(xdr, args->fh);
- dprintk("RPC: readres OK count %u\n", count);
- if (count < res->count)
- res->count = count;
+ p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
+ *p++ = cpu_to_be32(offset);
+ *p++ = cpu_to_be32(offset);
+ *p++ = cpu_to_be32(count);
- return count;
+ /* nfsdata */
+ *p = cpu_to_be32(count);
+ xdr_write_pages(xdr, args->pages, args->pgbase, count);
}
+static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
+{
+ encode_writeargs(xdr, args);
+ xdr->buf->flags |= XDRBUF_WRITE;
+}
/*
- * Write arguments. Splice the buffer to be written into the iovec.
+ * 2.2.10. createargs
+ *
+ * struct createargs {
+ * diropargs where;
+ * sattr attributes;
+ * };
*/
-static int
-nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_createargs *args)
{
- struct xdr_buf *sndbuf = &req->rq_snd_buf;
- u32 offset = (u32)args->offset;
- u32 count = args->count;
-
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(offset);
- *p++ = htonl(offset);
- *p++ = htonl(count);
- *p++ = htonl(count);
- sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+ encode_diropargs(xdr, args->fh, args->name, args->len);
+ encode_sattr(xdr, args->sattr);
+}
- /* Copy the page array */
- xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
- sndbuf->flags |= XDRBUF_WRITE;
- return 0;
+static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_removeargs *args)
+{
+ encode_diropargs(xdr, args->fh, args->name.name, args->name.len);
}
/*
- * Encode create arguments
- * CREATE, MKDIR
+ * 2.2.12. renameargs
+ *
+ * struct renameargs {
+ * diropargs from;
+ * diropargs to;
+ * };
*/
-static int
-nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args)
+static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_renameargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- p = xdr_encode_sattr(p, args->sattr);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ const struct qstr *old = args->old_name;
+ const struct qstr *new = args->new_name;
+
+ encode_diropargs(xdr, args->old_dir, old->name, old->len);
+ encode_diropargs(xdr, args->new_dir, new->name, new->len);
}
/*
- * Encode RENAME arguments
+ * 2.2.13. linkargs
+ *
+ * struct linkargs {
+ * fhandle from;
+ * diropargs to;
+ * };
*/
-static int
-nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
+static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_linkargs *args)
{
- p = xdr_encode_fhandle(p, args->old_dir);
- p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
- p = xdr_encode_fhandle(p, args->new_dir);
- p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_fhandle(xdr, args->fromfh);
+ encode_diropargs(xdr, args->tofh, args->toname, args->tolen);
}
/*
- * Encode LINK arguments
+ * 2.2.14. symlinkargs
+ *
+ * struct symlinkargs {
+ * diropargs from;
+ * path to;
+ * sattr attributes;
+ * };
*/
-static int
-nfs_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs_linkargs *args)
+static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_symlinkargs *args)
{
- p = xdr_encode_fhandle(p, args->fromfh);
- p = xdr_encode_fhandle(p, args->tofh);
- p = xdr_encode_array(p, args->toname, args->tolen);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
+ encode_path(xdr, args->pages, args->pathlen);
+ encode_sattr(xdr, args->sattr);
}
/*
- * Encode SYMLINK arguments
+ * 2.2.17. readdirargs
+ *
+ * struct readdirargs {
+ * fhandle dir;
+ * nfscookie cookie;
+ * unsigned count;
+ * };
*/
-static int
-nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *args)
+static void encode_readdirargs(struct xdr_stream *xdr,
+ const struct nfs_readdirargs *args)
{
- struct xdr_buf *sndbuf = &req->rq_snd_buf;
- size_t pad;
+ __be32 *p;
- p = xdr_encode_fhandle(p, args->fromfh);
- p = xdr_encode_array(p, args->fromname, args->fromlen);
- *p++ = htonl(args->pathlen);
- sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+ encode_fhandle(xdr, args->fh);
- xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen);
+ p = xdr_reserve_space(xdr, 4 + 4);
+ *p++ = cpu_to_be32(args->cookie);
+ *p = cpu_to_be32(args->count);
+}
- /*
- * xdr_encode_pages may have added a few bytes to ensure the
- * pathname ends on a 4-byte boundary. Start encoding the
- * attributes after the pad bytes.
- */
- pad = sndbuf->tail->iov_len;
- if (pad > 0)
- p++;
- p = xdr_encode_sattr(p, args->sattr);
- sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
- return 0;
+static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_readdirargs *args)
+{
+ encode_readdirargs(xdr, args);
+ prepare_reply_buffer(req, args->pages, 0,
+ args->count, NFS_readdirres_sz);
}
/*
- * Encode arguments to readdir call
+ * NFSv2 XDR decode functions
+ *
+ * NFSv2 result types are defined in section 2.2 of RFC 1094:
+ * "NFS: Network File System Protocol Specification".
*/
-static int
-nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
+
+static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *__unused)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
- u32 count = args->count;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
+}
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(args->cookie);
- *p++ = htonl(count); /* see above */
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_fattr *result)
+{
+ return decode_attrstat(xdr, result);
+}
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
- return 0;
+static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_diropok *result)
+{
+ return decode_diropres(xdr, result);
}
/*
- * Decode the result of a readdir call.
- * We're not really decoding anymore, we just leave the buffer untouched
- * and only check that it is syntactically correct.
- * The real decoding happens in nfs_decode_entry below, called directly
- * from nfs_readdir for each entry.
+ * 2.2.6. readlinkres
+ *
+ * union readlinkres switch (stat status) {
+ * case NFS_OK:
+ * path data;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
+static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req,
+ struct xdr_stream *xdr, void *__unused)
{
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- struct page **page;
- size_t hdrlen;
- unsigned int pglen, recvd;
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
-
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READDIR reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
- xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
- }
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_path(xdr);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
+}
- pglen = rcvbuf->page_len;
- recvd = rcvbuf->len - hdrlen;
- if (pglen > recvd)
- pglen = recvd;
- page = rcvbuf->pages;
- return pglen;
+/*
+ * 2.2.7. readres
+ *
+ * union readres switch (stat status) {
+ * case NFS_OK:
+ * fattr attributes;
+ * nfsdata data;
+ * default:
+ * void;
+ * };
+ */
+static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_readres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_fattr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_nfsdata(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_writeres *result)
{
- dprintk("nfs: %s: prematurely hit end of receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
+ /* All NFSv2 writes are "file sync" writes */
+ result->verf->committed = NFS_FILE_SYNC;
+ return decode_attrstat(xdr, result->fattr);
}
-__be32 *
-nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
+/**
+ * nfs2_decode_dirent - Decode a single NFSv2 directory entry stored in
+ * the local page cache.
+ * @xdr: XDR stream where entry resides
+ * @entry: buffer to fill in with entry data
+ * @plus: boolean indicating whether this should be a readdirplus entry
+ *
+ * Returns zero if successful, otherwise a negative errno value is
+ * returned.
+ *
+ * This function is not invoked during READDIR reply decoding, but
+ * rather whenever an application invokes the getdents(2) system call
+ * on a directory already in our cache.
+ *
+ * 2.2.17. entry
+ *
+ * struct entry {
+ * unsigned fileid;
+ * filename name;
+ * nfscookie cookie;
+ * entry *nextentry;
+ * };
+ */
+int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ int plus)
{
__be32 *p;
+ int error;
+
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
+ if (unlikely(p == NULL))
goto out_overflow;
- if (!ntohl(*p++)) {
+ if (*p++ == xdr_zero) {
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
+ if (unlikely(p == NULL))
goto out_overflow;
- if (!ntohl(*p++))
- return ERR_PTR(-EAGAIN);
+ if (*p++ == xdr_zero)
+ return -EAGAIN;
entry->eof = 1;
- return ERR_PTR(-EBADCOOKIE);
+ return -EBADCOOKIE;
}
- p = xdr_inline_decode(xdr, 8);
- if (unlikely(!p))
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
goto out_overflow;
+ entry->ino = be32_to_cpup(p);
- entry->ino = ntohl(*p++);
- entry->len = ntohl(*p++);
+ error = decode_filename_inline(xdr, &entry->name, &entry->len);
+ if (unlikely(error))
+ return error;
- p = xdr_inline_decode(xdr, entry->len + 4);
- if (unlikely(!p))
+ /*
+ * The type (size and byte order) of nfscookie isn't defined in
+ * RFC 1094. This implementation assumes that it's an XDR uint32.
+ */
+ entry->prev_cookie = entry->cookie;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
goto out_overflow;
- entry->name = (const char *) p;
- p += XDR_QUADLEN(entry->len);
- entry->prev_cookie = entry->cookie;
- entry->cookie = ntohl(*p++);
+ entry->cookie = be32_to_cpup(p);
entry->d_type = DT_UNKNOWN;
- p = xdr_inline_peek(xdr, 8);
- if (p != NULL)
- entry->eof = !p[0] && p[1];
- else
- entry->eof = 0;
-
- return p;
+ return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
- return ERR_PTR(-EAGAIN);
-}
-
-/*
- * NFS XDR decode functions
- */
-/*
- * Decode simple status reply
- */
-static int
-nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy)
-{
- int status;
-
- if ((status = ntohl(*p++)) != 0)
- status = nfs_stat_to_errno(status);
- return status;
+ return -EAGAIN;
}
/*
- * Decode attrstat reply
- * GETATTR, SETATTR, WRITE
- */
-static int
-nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
-{
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- xdr_decode_fattr(p, fattr);
- return 0;
-}
-
-/*
- * Decode diropres reply
- * LOOKUP, CREATE, MKDIR
+ * 2.2.17. readdirres
+ *
+ * union readdirres switch (stat status) {
+ * case NFS_OK:
+ * struct {
+ * entry *entries;
+ * bool eof;
+ * } readdirok;
+ * default:
+ * void;
+ * };
+ *
+ * Read the directory contents into the page cache, but don't
+ * touch them. The actual decoding is done by nfs2_decode_dirent()
+ * during subsequent nfs_readdir() calls.
*/
-static int
-nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
+static int decode_readdirok(struct xdr_stream *xdr)
{
- int status;
+ u32 recvd, pglen;
+ size_t hdrlen;
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- p = xdr_decode_fhandle(p, res->fh);
- xdr_decode_fattr(p, res->fattr);
- return 0;
+ pglen = xdr->buf->page_len;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(pglen > recvd))
+ goto out_cheating;
+out:
+ xdr_read_pages(xdr, pglen);
+ return pglen;
+out_cheating:
+ dprintk("NFS: server cheating in readdir result: "
+ "pglen %u > recvd %u\n", pglen, recvd);
+ pglen = recvd;
+ goto out;
}
-/*
- * Encode READLINK args
- */
-static int
-nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
+static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req,
+ struct xdr_stream *xdr, void *__unused)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
-
- p = xdr_encode_fhandle(p, args->fh);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_readdirok(xdr);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode READLINK reply
+ * 2.2.18. statfsres
+ *
+ * union statfsres (stat status) {
+ * case NFS_OK:
+ * struct {
+ * unsigned tsize;
+ * unsigned bsize;
+ * unsigned blocks;
+ * unsigned bfree;
+ * unsigned bavail;
+ * } info;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
+static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
{
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- size_t hdrlen;
- u32 len, recvd;
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- /* Convert length of symlink */
- len = ntohl(*p++);
- if (len >= rcvbuf->page_len) {
- dprintk("nfs: server returned giant symlink!\n");
- return -ENAMETOOLONG;
- }
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READLINK reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
- xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
- }
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (recvd < len) {
- dprintk("NFS: server cheating in readlink reply: "
- "count %u > recvd %u\n", len, recvd);
- return -EIO;
- }
+ __be32 *p;
- xdr_terminate_string(rcvbuf, len);
+ p = xdr_inline_decode(xdr, NFS_info_sz << 2);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ result->tsize = be32_to_cpup(p++);
+ result->bsize = be32_to_cpup(p++);
+ result->blocks = be32_to_cpup(p++);
+ result->bfree = be32_to_cpup(p++);
+ result->bavail = be32_to_cpup(p);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-/*
- * Decode WRITE reply
- */
-static int
-nfs_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
+static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs2_fsstat *result)
{
- res->verf->committed = NFS_FILE_SYNC;
- return nfs_xdr_attrstat(req, p, res->fattr);
+ enum nfs_stat status;
+ int error;
+
+ error = decode_stat(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS_OK)
+ goto out_default;
+ error = decode_info(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
-/*
- * Decode STATFS reply
- */
-static int
-nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res)
-{
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
-
- res->tsize = ntohl(*p++);
- res->bsize = ntohl(*p++);
- res->blocks = ntohl(*p++);
- res->bfree = ntohl(*p++);
- res->bavail = ntohl(*p++);
- return 0;
-}
/*
* We need to translate between nfs status return values and
* the local errno values which may not be the same.
*/
-static struct {
+static const struct {
int stat;
int errno;
} nfs_errtbl[] = {
@@ -678,28 +1102,30 @@ static struct {
{ -1, -EIO }
};
-/*
- * Convert an NFS error code to a local one.
- * This one is used jointly by NFSv2 and NFSv3.
+/**
+ * nfs_stat_to_errno - convert an NFS status code to a local errno
+ * @status: NFS status code to convert
+ *
+ * Returns a local errno value, or -EIO if the NFS status code is
+ * not recognized. This function is used jointly by NFSv2 and NFSv3.
*/
-int
-nfs_stat_to_errno(int stat)
+int nfs_stat_to_errno(enum nfs_stat status)
{
int i;
for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == stat)
+ if (nfs_errtbl[i].stat == (int)status)
return nfs_errtbl[i].errno;
}
- dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
+ dprintk("NFS: Unrecognized nfs status value: %u\n", status);
return nfs_errtbl[i].errno;
}
#define PROC(proc, argtype, restype, timer) \
[NFSPROC_##proc] = { \
.p_proc = NFSPROC_##proc, \
- .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
- .p_decode = (kxdrproc_t) nfs_xdr_##restype, \
+ .p_encode = (kxdreproc_t)nfs2_xdr_enc_##argtype, \
+ .p_decode = (kxdrdproc_t)nfs2_xdr_dec_##restype, \
.p_arglen = NFS_##argtype##_sz, \
.p_replen = NFS_##restype##_sz, \
.p_timer = timer, \
@@ -707,21 +1133,21 @@ nfs_stat_to_errno(int stat)
.p_name = #proc, \
}
struct rpc_procinfo nfs_procedures[] = {
- PROC(GETATTR, fhandle, attrstat, 1),
- PROC(SETATTR, sattrargs, attrstat, 0),
- PROC(LOOKUP, diropargs, diropres, 2),
- PROC(READLINK, readlinkargs, readlinkres, 3),
- PROC(READ, readargs, readres, 3),
- PROC(WRITE, writeargs, writeres, 4),
- PROC(CREATE, createargs, diropres, 0),
- PROC(REMOVE, removeargs, stat, 0),
- PROC(RENAME, renameargs, stat, 0),
- PROC(LINK, linkargs, stat, 0),
- PROC(SYMLINK, symlinkargs, stat, 0),
- PROC(MKDIR, createargs, diropres, 0),
- PROC(RMDIR, diropargs, stat, 0),
- PROC(READDIR, readdirargs, readdirres, 3),
- PROC(STATFS, fhandle, statfsres, 0),
+ PROC(GETATTR, fhandle, attrstat, 1),
+ PROC(SETATTR, sattrargs, attrstat, 0),
+ PROC(LOOKUP, diropargs, diropres, 2),
+ PROC(READLINK, readlinkargs, readlinkres, 3),
+ PROC(READ, readargs, readres, 3),
+ PROC(WRITE, writeargs, writeres, 4),
+ PROC(CREATE, createargs, diropres, 0),
+ PROC(REMOVE, removeargs, stat, 0),
+ PROC(RENAME, renameargs, stat, 0),
+ PROC(LINK, linkargs, stat, 0),
+ PROC(SYMLINK, symlinkargs, stat, 0),
+ PROC(MKDIR, createargs, diropres, 0),
+ PROC(RMDIR, diropargs, stat, 0),
+ PROC(READDIR, readdirargs, readdirres, 3),
+ PROC(STATFS, fhandle, statfsres, 0),
};
struct rpc_version nfs_version2 = {
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f6cc60f..01c5e8b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -37,18 +37,16 @@
#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2))
#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2))
#define NFS3_fattr_sz (21)
-#define NFS3_wcc_attr_sz (6)
+#define NFS3_cookieverf_sz (NFS3_COOKIEVERFSIZE>>2)
+#define NFS3_wcc_attr_sz (6)
#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz)
#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz)
-#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz)
-#define NFS3_fsstat_sz
-#define NFS3_fsinfo_sz
-#define NFS3_pathconf_sz
-#define NFS3_entry_sz (NFS3_filename_sz+3)
-
-#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
+#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz)
#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz)
-#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz)
+
+#define NFS3_getattrargs_sz (NFS3_fh_sz)
+#define NFS3_setattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
+#define NFS3_lookupargs_sz (NFS3_fh_sz+NFS3_filename_sz)
#define NFS3_accessargs_sz (NFS3_fh_sz+1)
#define NFS3_readlinkargs_sz (NFS3_fh_sz)
#define NFS3_readargs_sz (NFS3_fh_sz+3)
@@ -57,14 +55,16 @@
#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz)
#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz)
+#define NFS3_removeargs_sz (NFS3_fh_sz+NFS3_filename_sz)
#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz)
#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz)
-#define NFS3_readdirargs_sz (NFS3_fh_sz+2)
+#define NFS3_readdirargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+3)
+#define NFS3_readdirplusargs_sz (NFS3_fh_sz+NFS3_cookieverf_sz+4)
#define NFS3_commitargs_sz (NFS3_fh_sz+3)
-#define NFS3_attrstat_sz (1+NFS3_fattr_sz)
-#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz)
-#define NFS3_removeres_sz (NFS3_wccstat_sz)
+#define NFS3_getattrres_sz (1+NFS3_fattr_sz)
+#define NFS3_setattrres_sz (1+NFS3_wcc_data_sz)
+#define NFS3_removeres_sz (NFS3_setattrres_sz)
#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
@@ -100,1079 +100,2362 @@ static const umode_t nfs_type2fmt[] = {
[NF3FIFO] = S_IFIFO,
};
+/*
+ * While encoding arguments, set up the reply buffer in advance to
+ * receive reply data directly into the page cache.
+ */
+static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
+ unsigned int base, unsigned int len,
+ unsigned int bufsize)
+{
+ struct rpc_auth *auth = req->rq_cred->cr_auth;
+ unsigned int replen;
+
+ replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
+ xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
+}
+
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
{
- dprintk("nfs: %s: prematurely hit end of receive buffer. "
+ dprintk("NFS: %s prematurely hit the end of our receive buffer. "
"Remaining buffer length is %tu words.\n",
func, xdr->end - xdr->p);
}
+
/*
- * Common NFS XDR functions as inlines
+ * Encode/decode NFSv3 basic data types
+ *
+ * Basic NFSv3 data types are defined in section 2.5 of RFC 1813:
+ * "NFS Version 3 Protocol Specification".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions. For run-time efficiency, some data types are encoded
+ * or decoded inline.
*/
-static inline __be32 *
-xdr_encode_fhandle(__be32 *p, const struct nfs_fh *fh)
+
+static void encode_uint32(struct xdr_stream *xdr, u32 value)
{
- return xdr_encode_array(p, fh->data, fh->size);
+ __be32 *p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(value);
}
-static inline __be32 *
-xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
+static int decode_uint32(struct xdr_stream *xdr, u32 *value)
{
- if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) {
- memcpy(fh->data, p, fh->size);
- return p + XDR_QUADLEN(fh->size);
- }
- return NULL;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ *value = be32_to_cpup(p);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_uint64(struct xdr_stream *xdr, u64 *value)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ xdr_decode_hyper(p, value);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * fileid3
+ *
+ * typedef uint64 fileid3;
+ */
+static __be32 *xdr_decode_fileid3(__be32 *p, u64 *fileid)
+{
+ return xdr_decode_hyper(p, fileid);
+}
+
+static int decode_fileid3(struct xdr_stream *xdr, u64 *fileid)
+{
+ return decode_uint64(xdr, fileid);
+}
+
+/*
+ * filename3
+ *
+ * typedef string filename3<>;
+ */
+static void encode_filename3(struct xdr_stream *xdr,
+ const char *name, u32 length)
+{
+ __be32 *p;
+
+ BUG_ON(length > NFS3_MAXNAMLEN);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, name, length);
}
-static inline __be32 *
-xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh)
+static int decode_inline_filename3(struct xdr_stream *xdr,
+ const char **name, u32 *length)
{
__be32 *p;
+ u32 count;
+
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p);
+ if (count > NFS3_MAXNAMLEN)
+ goto out_nametoolong;
+ p = xdr_inline_decode(xdr, count);
+ if (unlikely(p == NULL))
goto out_overflow;
- fh->size = ntohl(*p++);
+ *name = (const char *)p;
+ *length = count;
+ return 0;
- if (fh->size <= NFS3_FHSIZE) {
- p = xdr_inline_decode(xdr, fh->size);
- if (unlikely(!p))
- goto out_overflow;
- memcpy(fh->data, p, fh->size);
- return p + XDR_QUADLEN(fh->size);
- }
- return NULL;
+out_nametoolong:
+ dprintk("NFS: returned filename too long: %u\n", count);
+ return -ENAMETOOLONG;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * nfspath3
+ *
+ * typedef string nfspath3<>;
+ */
+static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages,
+ const u32 length)
+{
+ BUG_ON(length > NFS3_MAXPATHLEN);
+ encode_uint32(xdr, length);
+ xdr_write_pages(xdr, pages, 0, length);
+}
+static int decode_nfspath3(struct xdr_stream *xdr)
+{
+ u32 recvd, count;
+ size_t hdrlen;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p);
+ if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN))
+ goto out_nametoolong;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(count > recvd))
+ goto out_cheating;
+
+ xdr_read_pages(xdr, count);
+ xdr_terminate_string(xdr->buf, count);
+ return 0;
+
+out_nametoolong:
+ dprintk("NFS: returned pathname too long: %u\n", count);
+ return -ENAMETOOLONG;
+out_cheating:
+ dprintk("NFS: server cheating in pathname result: "
+ "count %u > recvd %u\n", count, recvd);
+ return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
- return ERR_PTR(-EIO);
+ return -EIO;
}
/*
- * Encode/decode time.
+ * cookie3
+ *
+ * typedef uint64 cookie3
*/
-static inline __be32 *
-xdr_encode_time3(__be32 *p, struct timespec *timep)
+static __be32 *xdr_encode_cookie3(__be32 *p, u64 cookie)
{
- *p++ = htonl(timep->tv_sec);
- *p++ = htonl(timep->tv_nsec);
- return p;
+ return xdr_encode_hyper(p, cookie);
}
-static inline __be32 *
-xdr_decode_time3(__be32 *p, struct timespec *timep)
+static int decode_cookie3(struct xdr_stream *xdr, u64 *cookie)
{
- timep->tv_sec = ntohl(*p++);
- timep->tv_nsec = ntohl(*p++);
- return p;
+ return decode_uint64(xdr, cookie);
+}
+
+/*
+ * cookieverf3
+ *
+ * typedef opaque cookieverf3[NFS3_COOKIEVERFSIZE];
+ */
+static __be32 *xdr_encode_cookieverf3(__be32 *p, const __be32 *verifier)
+{
+ memcpy(p, verifier, NFS3_COOKIEVERFSIZE);
+ return p + XDR_QUADLEN(NFS3_COOKIEVERFSIZE);
+}
+
+static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ memcpy(verifier, p, NFS3_COOKIEVERFSIZE);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * createverf3
+ *
+ * typedef opaque createverf3[NFS3_CREATEVERFSIZE];
+ */
+static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS3_CREATEVERFSIZE);
+ memcpy(p, verifier, NFS3_CREATEVERFSIZE);
+}
+
+static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ memcpy(verifier, p, NFS3_WRITEVERFSIZE);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * size3
+ *
+ * typedef uint64 size3;
+ */
+static __be32 *xdr_decode_size3(__be32 *p, u64 *size)
+{
+ return xdr_decode_hyper(p, size);
+}
+
+/*
+ * nfsstat3
+ *
+ * enum nfsstat3 {
+ * NFS3_OK = 0,
+ * ...
+ * }
+ */
+#define NFS3_OK NFS_OK
+
+static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ *status = be32_to_cpup(p);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * ftype3
+ *
+ * enum ftype3 {
+ * NF3REG = 1,
+ * NF3DIR = 2,
+ * NF3BLK = 3,
+ * NF3CHR = 4,
+ * NF3LNK = 5,
+ * NF3SOCK = 6,
+ * NF3FIFO = 7
+ * };
+ */
+static void encode_ftype3(struct xdr_stream *xdr, const u32 type)
+{
+ BUG_ON(type > NF3FIFO);
+ encode_uint32(xdr, type);
}
-static __be32 *
-xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
+static __be32 *xdr_decode_ftype3(__be32 *p, umode_t *mode)
{
- unsigned int type, major, minor;
- umode_t fmode;
+ u32 type;
- type = ntohl(*p++);
+ type = be32_to_cpup(p++);
if (type > NF3FIFO)
type = NF3NON;
- fmode = nfs_type2fmt[type];
- fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
- fattr->nlink = ntohl(*p++);
- fattr->uid = ntohl(*p++);
- fattr->gid = ntohl(*p++);
- p = xdr_decode_hyper(p, &fattr->size);
- p = xdr_decode_hyper(p, &fattr->du.nfs3.used);
-
- /* Turn remote device info into Linux-specific dev_t */
- major = ntohl(*p++);
- minor = ntohl(*p++);
- fattr->rdev = MKDEV(major, minor);
- if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor)
- fattr->rdev = 0;
+ *mode = nfs_type2fmt[type];
+ return p;
+}
- p = xdr_decode_hyper(p, &fattr->fsid.major);
- fattr->fsid.minor = 0;
- p = xdr_decode_hyper(p, &fattr->fileid);
- p = xdr_decode_time3(p, &fattr->atime);
- p = xdr_decode_time3(p, &fattr->mtime);
- p = xdr_decode_time3(p, &fattr->ctime);
+/*
+ * specdata3
+ *
+ * struct specdata3 {
+ * uint32 specdata1;
+ * uint32 specdata2;
+ * };
+ */
+static void encode_specdata3(struct xdr_stream *xdr, const dev_t rdev)
+{
+ __be32 *p;
- /* Update the mode bits */
- fattr->valid |= NFS_ATTR_FATTR_V3;
+ p = xdr_reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(MAJOR(rdev));
+ *p = cpu_to_be32(MINOR(rdev));
+}
+
+static __be32 *xdr_decode_specdata3(__be32 *p, dev_t *rdev)
+{
+ unsigned int major, minor;
+
+ major = be32_to_cpup(p++);
+ minor = be32_to_cpup(p++);
+ *rdev = MKDEV(major, minor);
+ if (MAJOR(*rdev) != major || MINOR(*rdev) != minor)
+ *rdev = 0;
+ return p;
+}
+
+/*
+ * nfs_fh3
+ *
+ * struct nfs_fh3 {
+ * opaque data<NFS3_FHSIZE>;
+ * };
+ */
+static void encode_nfs_fh3(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+ __be32 *p;
+
+ BUG_ON(fh->size > NFS3_FHSIZE);
+ p = xdr_reserve_space(xdr, 4 + fh->size);
+ xdr_encode_opaque(p, fh->data, fh->size);
+}
+
+static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ length = be32_to_cpup(p++);
+ if (unlikely(length > NFS3_FHSIZE))
+ goto out_toobig;
+ p = xdr_inline_decode(xdr, length);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ fh->size = length;
+ memcpy(fh->data, p, length);
+ return 0;
+out_toobig:
+ dprintk("NFS: file handle size (%u) too big\n", length);
+ return -E2BIG;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static void zero_nfs_fh3(struct nfs_fh *fh)
+{
+ memset(fh, 0, sizeof(*fh));
+}
+
+/*
+ * nfstime3
+ *
+ * struct nfstime3 {
+ * uint32 seconds;
+ * uint32 nseconds;
+ * };
+ */
+static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep)
+{
+ *p++ = cpu_to_be32(timep->tv_sec);
+ *p++ = cpu_to_be32(timep->tv_nsec);
return p;
}
-static inline __be32 *
-xdr_encode_sattr(__be32 *p, struct iattr *attr)
+static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
{
+ timep->tv_sec = be32_to_cpup(p++);
+ timep->tv_nsec = be32_to_cpup(p++);
+ return p;
+}
+
+/*
+ * sattr3
+ *
+ * enum time_how {
+ * DONT_CHANGE = 0,
+ * SET_TO_SERVER_TIME = 1,
+ * SET_TO_CLIENT_TIME = 2
+ * };
+ *
+ * union set_mode3 switch (bool set_it) {
+ * case TRUE:
+ * mode3 mode;
+ * default:
+ * void;
+ * };
+ *
+ * union set_uid3 switch (bool set_it) {
+ * case TRUE:
+ * uid3 uid;
+ * default:
+ * void;
+ * };
+ *
+ * union set_gid3 switch (bool set_it) {
+ * case TRUE:
+ * gid3 gid;
+ * default:
+ * void;
+ * };
+ *
+ * union set_size3 switch (bool set_it) {
+ * case TRUE:
+ * size3 size;
+ * default:
+ * void;
+ * };
+ *
+ * union set_atime switch (time_how set_it) {
+ * case SET_TO_CLIENT_TIME:
+ * nfstime3 atime;
+ * default:
+ * void;
+ * };
+ *
+ * union set_mtime switch (time_how set_it) {
+ * case SET_TO_CLIENT_TIME:
+ * nfstime3 mtime;
+ * default:
+ * void;
+ * };
+ *
+ * struct sattr3 {
+ * set_mode3 mode;
+ * set_uid3 uid;
+ * set_gid3 gid;
+ * set_size3 size;
+ * set_atime atime;
+ * set_mtime mtime;
+ * };
+ */
+static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
+{
+ u32 nbytes;
+ __be32 *p;
+
+ /*
+ * In order to make only a single xdr_reserve_space() call,
+ * pre-compute the total number of bytes to be reserved.
+ * Six boolean values, one for each set_foo field, are always
+ * present in the encoded result, so start there.
+ */
+ nbytes = 6 * 4;
+ if (attr->ia_valid & ATTR_MODE)
+ nbytes += 4;
+ if (attr->ia_valid & ATTR_UID)
+ nbytes += 4;
+ if (attr->ia_valid & ATTR_GID)
+ nbytes += 4;
+ if (attr->ia_valid & ATTR_SIZE)
+ nbytes += 8;
+ if (attr->ia_valid & ATTR_ATIME_SET)
+ nbytes += 8;
+ if (attr->ia_valid & ATTR_MTIME_SET)
+ nbytes += 8;
+ p = xdr_reserve_space(xdr, nbytes);
+
if (attr->ia_valid & ATTR_MODE) {
*p++ = xdr_one;
- *p++ = htonl(attr->ia_mode & S_IALLUGO);
- } else {
+ *p++ = cpu_to_be32(attr->ia_mode & S_IALLUGO);
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_UID) {
*p++ = xdr_one;
- *p++ = htonl(attr->ia_uid);
- } else {
+ *p++ = cpu_to_be32(attr->ia_uid);
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_GID) {
*p++ = xdr_one;
- *p++ = htonl(attr->ia_gid);
- } else {
+ *p++ = cpu_to_be32(attr->ia_gid);
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_SIZE) {
*p++ = xdr_one;
- p = xdr_encode_hyper(p, (__u64) attr->ia_size);
- } else {
+ p = xdr_encode_hyper(p, (u64)attr->ia_size);
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_ATIME_SET) {
*p++ = xdr_two;
- p = xdr_encode_time3(p, &attr->ia_atime);
+ p = xdr_encode_nfstime3(p, &attr->ia_atime);
} else if (attr->ia_valid & ATTR_ATIME) {
*p++ = xdr_one;
- } else {
+ } else
*p++ = xdr_zero;
- }
+
if (attr->ia_valid & ATTR_MTIME_SET) {
*p++ = xdr_two;
- p = xdr_encode_time3(p, &attr->ia_mtime);
+ xdr_encode_nfstime3(p, &attr->ia_mtime);
} else if (attr->ia_valid & ATTR_MTIME) {
- *p++ = xdr_one;
- } else {
- *p++ = xdr_zero;
- }
- return p;
+ *p = xdr_one;
+ } else
+ *p = xdr_zero;
+}
+
+/*
+ * fattr3
+ *
+ * struct fattr3 {
+ * ftype3 type;
+ * mode3 mode;
+ * uint32 nlink;
+ * uid3 uid;
+ * gid3 gid;
+ * size3 size;
+ * size3 used;
+ * specdata3 rdev;
+ * uint64 fsid;
+ * fileid3 fileid;
+ * nfstime3 atime;
+ * nfstime3 mtime;
+ * nfstime3 ctime;
+ * };
+ */
+static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+ umode_t fmode;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
+ p = xdr_decode_ftype3(p, &fmode);
+
+ fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
+ fattr->nlink = be32_to_cpup(p++);
+ fattr->uid = be32_to_cpup(p++);
+ fattr->gid = be32_to_cpup(p++);
+
+ p = xdr_decode_size3(p, &fattr->size);
+ p = xdr_decode_size3(p, &fattr->du.nfs3.used);
+ p = xdr_decode_specdata3(p, &fattr->rdev);
+
+ p = xdr_decode_hyper(p, &fattr->fsid.major);
+ fattr->fsid.minor = 0;
+
+ p = xdr_decode_fileid3(p, &fattr->fileid);
+ p = xdr_decode_nfstime3(p, &fattr->atime);
+ p = xdr_decode_nfstime3(p, &fattr->mtime);
+ xdr_decode_nfstime3(p, &fattr->ctime);
+
+ fattr->valid |= NFS_ATTR_FATTR_V3;
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-static inline __be32 *
-xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr)
+/*
+ * post_op_attr
+ *
+ * union post_op_attr switch (bool attributes_follow) {
+ * case TRUE:
+ * fattr3 attributes;
+ * case FALSE:
+ * void;
+ * };
+ */
+static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
{
- p = xdr_decode_hyper(p, &fattr->pre_size);
- p = xdr_decode_time3(p, &fattr->pre_mtime);
- p = xdr_decode_time3(p, &fattr->pre_ctime);
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p != xdr_zero)
+ return decode_fattr3(xdr, fattr);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * wcc_attr
+ * struct wcc_attr {
+ * size3 size;
+ * nfstime3 mtime;
+ * nfstime3 ctime;
+ * };
+ */
+static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+
fattr->valid |= NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PREMTIME
| NFS_ATTR_FATTR_PRECTIME;
- return p;
-}
-static inline __be32 *
-xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr)
-{
- if (*p++)
- p = xdr_decode_fattr(p, fattr);
- return p;
+ p = xdr_decode_size3(p, &fattr->pre_size);
+ p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
+ xdr_decode_nfstime3(p, &fattr->pre_ctime);
+
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-static inline __be32 *
-xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+/*
+ * pre_op_attr
+ * union pre_op_attr switch (bool attributes_follow) {
+ * case TRUE:
+ * wcc_attr attributes;
+ * case FALSE:
+ * void;
+ * };
+ *
+ * wcc_data
+ *
+ * struct wcc_data {
+ * pre_op_attr before;
+ * post_op_attr after;
+ * };
+ */
+static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
{
__be32 *p;
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
+ if (unlikely(p == NULL))
goto out_overflow;
- if (ntohl(*p++)) {
- p = xdr_inline_decode(xdr, 84);
- if (unlikely(!p))
- goto out_overflow;
- p = xdr_decode_fattr(p, fattr);
- }
- return p;
+ if (*p != xdr_zero)
+ return decode_wcc_attr(xdr, fattr);
+ return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
- return ERR_PTR(-EIO);
+ return -EIO;
}
-static inline __be32 *
-xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
+static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
{
- if (*p++)
- return xdr_decode_wcc_attr(p, fattr);
- return p;
+ int error;
+
+ error = decode_pre_op_attr(xdr, fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, fattr);
+out:
+ return error;
}
+/*
+ * post_op_fh3
+ *
+ * union post_op_fh3 switch (bool handle_follows) {
+ * case TRUE:
+ * nfs_fh3 handle;
+ * case FALSE:
+ * void;
+ * };
+ */
+static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+ __be32 *p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p != xdr_zero)
+ return decode_nfs_fh3(xdr, fh);
+ zero_nfs_fh3(fh);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
-static inline __be32 *
-xdr_decode_wcc_data(__be32 *p, struct nfs_fattr *fattr)
+/*
+ * diropargs3
+ *
+ * struct diropargs3 {
+ * nfs_fh3 dir;
+ * filename3 name;
+ * };
+ */
+static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
+ const char *name, u32 length)
{
- p = xdr_decode_pre_op_attr(p, fattr);
- return xdr_decode_post_op_attr(p, fattr);
+ encode_nfs_fh3(xdr, fh);
+ encode_filename3(xdr, name, length);
}
+
/*
- * NFS encode functions
+ * NFSv3 XDR encode functions
+ *
+ * NFSv3 argument types are defined in section 3.3 of RFC 1813:
+ * "NFS Version 3 Protocol Specification".
*/
/*
- * Encode file handle argument
+ * 3.3.1 GETATTR3args
+ *
+ * struct GETATTR3args {
+ * nfs_fh3 object;
+ * };
*/
-static int
-nfs3_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh)
+static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_fh *fh)
{
- p = xdr_encode_fhandle(p, fh);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_nfs_fh3(xdr, fh);
}
/*
- * Encode SETATTR arguments
+ * 3.3.2 SETATTR3args
+ *
+ * union sattrguard3 switch (bool check) {
+ * case TRUE:
+ * nfstime3 obj_ctime;
+ * case FALSE:
+ * void;
+ * };
+ *
+ * struct SETATTR3args {
+ * nfs_fh3 object;
+ * sattr3 new_attributes;
+ * sattrguard3 guard;
+ * };
*/
-static int
-nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args)
-{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_sattr(p, args->sattr);
- *p++ = htonl(args->guard);
- if (args->guard)
- p = xdr_encode_time3(p, &args->guardtime);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+static void encode_sattrguard3(struct xdr_stream *xdr,
+ const struct nfs3_sattrargs *args)
+{
+ __be32 *p;
+
+ if (args->guard) {
+ p = xdr_reserve_space(xdr, 4 + 8);
+ *p++ = xdr_one;
+ xdr_encode_nfstime3(p, &args->guardtime);
+ } else {
+ p = xdr_reserve_space(xdr, 4);
+ *p = xdr_zero;
+ }
+}
+
+static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_sattrargs *args)
+{
+ encode_nfs_fh3(xdr, args->fh);
+ encode_sattr3(xdr, args->sattr);
+ encode_sattrguard3(xdr, args);
}
/*
- * Encode directory ops argument
+ * 3.3.3 LOOKUP3args
+ *
+ * struct LOOKUP3args {
+ * diropargs3 what;
+ * };
*/
-static int
-nfs3_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs3_diropargs *args)
+static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_diropargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_diropargs3(xdr, args->fh, args->name, args->len);
}
/*
- * Encode REMOVE argument
+ * 3.3.4 ACCESS3args
+ *
+ * struct ACCESS3args {
+ * nfs_fh3 object;
+ * uint32 access;
+ * };
*/
-static int
-nfs3_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
+static void encode_access3args(struct xdr_stream *xdr,
+ const struct nfs3_accessargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name.name, args->name.len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_nfs_fh3(xdr, args->fh);
+ encode_uint32(xdr, args->access);
+}
+
+static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_accessargs *args)
+{
+ encode_access3args(xdr, args);
}
/*
- * Encode access() argument
+ * 3.3.5 READLINK3args
+ *
+ * struct READLINK3args {
+ * nfs_fh3 symlink;
+ * };
*/
-static int
-nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *args)
+static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_readlinkargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(args->access);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_nfs_fh3(xdr, args->fh);
+ prepare_reply_buffer(req, args->pages, args->pgbase,
+ args->pglen, NFS3_readlinkres_sz);
}
/*
- * Arguments to a READ call. Since we read data directly into the page
- * cache, we also set up the reply iovec here so that iov[1] points
- * exactly to the page we want to fetch.
+ * 3.3.6 READ3args
+ *
+ * struct READ3args {
+ * nfs_fh3 file;
+ * offset3 offset;
+ * count3 count;
+ * };
*/
-static int
-nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+static void encode_read3args(struct xdr_stream *xdr,
+ const struct nfs_readargs *args)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
- u32 count = args->count;
+ __be32 *p;
+
+ encode_nfs_fh3(xdr, args->fh);
- p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_reserve_space(xdr, 8 + 4);
p = xdr_encode_hyper(p, args->offset);
- *p++ = htonl(count);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ *p = cpu_to_be32(args->count);
+}
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen,
- args->pages, args->pgbase, count);
+static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_readargs *args)
+{
+ encode_read3args(xdr, args);
+ prepare_reply_buffer(req, args->pages, args->pgbase,
+ args->count, NFS3_readres_sz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
- return 0;
}
/*
- * Write arguments. Splice the buffer to be written into the iovec.
+ * 3.3.7 WRITE3args
+ *
+ * enum stable_how {
+ * UNSTABLE = 0,
+ * DATA_SYNC = 1,
+ * FILE_SYNC = 2
+ * };
+ *
+ * struct WRITE3args {
+ * nfs_fh3 file;
+ * offset3 offset;
+ * count3 count;
+ * stable_how stable;
+ * opaque data<>;
+ * };
*/
-static int
-nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void encode_write3args(struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
{
- struct xdr_buf *sndbuf = &req->rq_snd_buf;
- u32 count = args->count;
+ __be32 *p;
+
+ encode_nfs_fh3(xdr, args->fh);
- p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_reserve_space(xdr, 8 + 4 + 4 + 4);
p = xdr_encode_hyper(p, args->offset);
- *p++ = htonl(count);
- *p++ = htonl(args->stable);
- *p++ = htonl(count);
- sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
-
- /* Copy the page array */
- xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
- sndbuf->flags |= XDRBUF_WRITE;
- return 0;
+ *p++ = cpu_to_be32(args->count);
+ *p++ = cpu_to_be32(args->stable);
+ *p = cpu_to_be32(args->count);
+ xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
+}
+
+static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
+{
+ encode_write3args(xdr, args);
+ xdr->buf->flags |= XDRBUF_WRITE;
}
/*
- * Encode CREATE arguments
+ * 3.3.8 CREATE3args
+ *
+ * enum createmode3 {
+ * UNCHECKED = 0,
+ * GUARDED = 1,
+ * EXCLUSIVE = 2
+ * };
+ *
+ * union createhow3 switch (createmode3 mode) {
+ * case UNCHECKED:
+ * case GUARDED:
+ * sattr3 obj_attributes;
+ * case EXCLUSIVE:
+ * createverf3 verf;
+ * };
+ *
+ * struct CREATE3args {
+ * diropargs3 where;
+ * createhow3 how;
+ * };
*/
-static int
-nfs3_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs3_createargs *args)
+static void encode_createhow3(struct xdr_stream *xdr,
+ const struct nfs3_createargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
-
- *p++ = htonl(args->createmode);
- if (args->createmode == NFS3_CREATE_EXCLUSIVE) {
- *p++ = args->verifier[0];
- *p++ = args->verifier[1];
- } else
- p = xdr_encode_sattr(p, args->sattr);
+ encode_uint32(xdr, args->createmode);
+ switch (args->createmode) {
+ case NFS3_CREATE_UNCHECKED:
+ case NFS3_CREATE_GUARDED:
+ encode_sattr3(xdr, args->sattr);
+ break;
+ case NFS3_CREATE_EXCLUSIVE:
+ encode_createverf3(xdr, args->verifier);
+ break;
+ default:
+ BUG();
+ }
+}
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_createargs *args)
+{
+ encode_diropargs3(xdr, args->fh, args->name, args->len);
+ encode_createhow3(xdr, args);
}
/*
- * Encode MKDIR arguments
+ * 3.3.9 MKDIR3args
+ *
+ * struct MKDIR3args {
+ * diropargs3 where;
+ * sattr3 attributes;
+ * };
*/
-static int
-nfs3_xdr_mkdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mkdirargs *args)
+static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_mkdirargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- p = xdr_encode_sattr(p, args->sattr);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ encode_diropargs3(xdr, args->fh, args->name, args->len);
+ encode_sattr3(xdr, args->sattr);
}
/*
- * Encode SYMLINK arguments
+ * 3.3.10 SYMLINK3args
+ *
+ * struct symlinkdata3 {
+ * sattr3 symlink_attributes;
+ * nfspath3 symlink_data;
+ * };
+ *
+ * struct SYMLINK3args {
+ * diropargs3 where;
+ * symlinkdata3 symlink;
+ * };
*/
-static int
-nfs3_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_symlinkargs *args)
+static void encode_symlinkdata3(struct xdr_stream *xdr,
+ const struct nfs3_symlinkargs *args)
{
- p = xdr_encode_fhandle(p, args->fromfh);
- p = xdr_encode_array(p, args->fromname, args->fromlen);
- p = xdr_encode_sattr(p, args->sattr);
- *p++ = htonl(args->pathlen);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ encode_sattr3(xdr, args->sattr);
+ encode_nfspath3(xdr, args->pages, args->pathlen);
+}
- /* Copy the page */
- xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen);
- return 0;
+static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_symlinkargs *args)
+{
+ encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
+ encode_symlinkdata3(xdr, args);
}
/*
- * Encode MKNOD arguments
+ * 3.3.11 MKNOD3args
+ *
+ * struct devicedata3 {
+ * sattr3 dev_attributes;
+ * specdata3 spec;
+ * };
+ *
+ * union mknoddata3 switch (ftype3 type) {
+ * case NF3CHR:
+ * case NF3BLK:
+ * devicedata3 device;
+ * case NF3SOCK:
+ * case NF3FIFO:
+ * sattr3 pipe_attributes;
+ * default:
+ * void;
+ * };
+ *
+ * struct MKNOD3args {
+ * diropargs3 where;
+ * mknoddata3 what;
+ * };
*/
-static int
-nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
-{
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_array(p, args->name, args->len);
- *p++ = htonl(args->type);
- p = xdr_encode_sattr(p, args->sattr);
- if (args->type == NF3CHR || args->type == NF3BLK) {
- *p++ = htonl(MAJOR(args->rdev));
- *p++ = htonl(MINOR(args->rdev));
+static void encode_devicedata3(struct xdr_stream *xdr,
+ const struct nfs3_mknodargs *args)
+{
+ encode_sattr3(xdr, args->sattr);
+ encode_specdata3(xdr, args->rdev);
+}
+
+static void encode_mknoddata3(struct xdr_stream *xdr,
+ const struct nfs3_mknodargs *args)
+{
+ encode_ftype3(xdr, args->type);
+ switch (args->type) {
+ case NF3CHR:
+ case NF3BLK:
+ encode_devicedata3(xdr, args);
+ break;
+ case NF3SOCK:
+ case NF3FIFO:
+ encode_sattr3(xdr, args->sattr);
+ break;
+ case NF3REG:
+ case NF3DIR:
+ break;
+ default:
+ BUG();
}
+}
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_mknodargs *args)
+{
+ encode_diropargs3(xdr, args->fh, args->name, args->len);
+ encode_mknoddata3(xdr, args);
}
/*
- * Encode RENAME arguments
+ * 3.3.12 REMOVE3args
+ *
+ * struct REMOVE3args {
+ * diropargs3 object;
+ * };
*/
-static int
-nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
-{
- p = xdr_encode_fhandle(p, args->old_dir);
- p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
- p = xdr_encode_fhandle(p, args->new_dir);
- p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_removeargs *args)
+{
+ encode_diropargs3(xdr, args->fh, args->name.name, args->name.len);
}
/*
- * Encode LINK arguments
+ * 3.3.14 RENAME3args
+ *
+ * struct RENAME3args {
+ * diropargs3 from;
+ * diropargs3 to;
+ * };
*/
-static int
-nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
+static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_renameargs *args)
{
- p = xdr_encode_fhandle(p, args->fromfh);
- p = xdr_encode_fhandle(p, args->tofh);
- p = xdr_encode_array(p, args->toname, args->tolen);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ const struct qstr *old = args->old_name;
+ const struct qstr *new = args->new_name;
+
+ encode_diropargs3(xdr, args->old_dir, old->name, old->len);
+ encode_diropargs3(xdr, args->new_dir, new->name, new->len);
}
/*
- * Encode arguments to readdir call
+ * 3.3.15 LINK3args
+ *
+ * struct LINK3args {
+ * nfs_fh3 file;
+ * diropargs3 link;
+ * };
*/
-static int
-nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
+static void nfs3_xdr_enc_link3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_linkargs *args)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
- u32 count = args->count;
-
- p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_hyper(p, args->cookie);
- *p++ = args->verf[0];
- *p++ = args->verf[1];
- if (args->plus) {
- /* readdirplus: need dircount + buffer size.
- * We just make sure we make dircount big enough */
- *p++ = htonl(count >> 3);
- }
- *p++ = htonl(count);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
- return 0;
+ encode_nfs_fh3(xdr, args->fromfh);
+ encode_diropargs3(xdr, args->tofh, args->toname, args->tolen);
}
/*
- * Decode the result of a readdir call.
- * We just check for syntactical correctness.
+ * 3.3.16 READDIR3args
+ *
+ * struct READDIR3args {
+ * nfs_fh3 dir;
+ * cookie3 cookie;
+ * cookieverf3 cookieverf;
+ * count3 count;
+ * };
*/
-static int
-nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res)
+static void encode_readdir3args(struct xdr_stream *xdr,
+ const struct nfs3_readdirargs *args)
{
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- struct page **page;
- size_t hdrlen;
- u32 recvd, pglen;
- int status;
-
- status = ntohl(*p++);
- /* Decode post_op_attrs */
- p = xdr_decode_post_op_attr(p, res->dir_attr);
- if (status)
- return nfs_stat_to_errno(status);
- /* Decode verifier cookie */
- if (res->verf) {
- res->verf[0] = *p++;
- res->verf[1] = *p++;
- } else {
- p += 2;
- }
+ __be32 *p;
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READDIR reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
- xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
- }
+ encode_nfs_fh3(xdr, args->fh);
- pglen = rcvbuf->page_len;
- recvd = rcvbuf->len - hdrlen;
- if (pglen > recvd)
- pglen = recvd;
- page = rcvbuf->pages;
+ p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4);
+ p = xdr_encode_cookie3(p, args->cookie);
+ p = xdr_encode_cookieverf3(p, args->verf);
+ *p = cpu_to_be32(args->count);
+}
- return pglen;
+static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_readdirargs *args)
+{
+ encode_readdir3args(xdr, args);
+ prepare_reply_buffer(req, args->pages, 0,
+ args->count, NFS3_readdirres_sz);
}
-__be32 *
-nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
+/*
+ * 3.3.17 READDIRPLUS3args
+ *
+ * struct READDIRPLUS3args {
+ * nfs_fh3 dir;
+ * cookie3 cookie;
+ * cookieverf3 cookieverf;
+ * count3 dircount;
+ * count3 maxcount;
+ * };
+ */
+static void encode_readdirplus3args(struct xdr_stream *xdr,
+ const struct nfs3_readdirargs *args)
{
__be32 *p;
- struct nfs_entry old = *entry;
-
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- if (!ntohl(*p++)) {
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- if (!ntohl(*p++))
- return ERR_PTR(-EAGAIN);
- entry->eof = 1;
- return ERR_PTR(-EBADCOOKIE);
- }
- p = xdr_inline_decode(xdr, 12);
- if (unlikely(!p))
- goto out_overflow;
- p = xdr_decode_hyper(p, &entry->ino);
- entry->len = ntohl(*p++);
+ encode_nfs_fh3(xdr, args->fh);
- p = xdr_inline_decode(xdr, entry->len + 8);
- if (unlikely(!p))
- goto out_overflow;
- entry->name = (const char *) p;
- p += XDR_QUADLEN(entry->len);
- entry->prev_cookie = entry->cookie;
- p = xdr_decode_hyper(p, &entry->cookie);
-
- entry->d_type = DT_UNKNOWN;
- if (plus) {
- entry->fattr->valid = 0;
- p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
- if (IS_ERR(p))
- goto out_overflow_exit;
- entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
- /* In fact, a post_op_fh3: */
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- if (*p++) {
- p = xdr_decode_fhandle_stream(xdr, entry->fh);
- if (IS_ERR(p))
- goto out_overflow_exit;
- /* Ugh -- server reply was truncated */
- if (p == NULL) {
- dprintk("NFS: FH truncated\n");
- *entry = old;
- return ERR_PTR(-EAGAIN);
- }
- } else
- memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
- }
+ p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4 + 4);
+ p = xdr_encode_cookie3(p, args->cookie);
+ p = xdr_encode_cookieverf3(p, args->verf);
- p = xdr_inline_peek(xdr, 8);
- if (p != NULL)
- entry->eof = !p[0] && p[1];
- else
- entry->eof = 0;
+ /*
+ * readdirplus: need dircount + buffer size.
+ * We just make sure we make dircount big enough
+ */
+ *p++ = cpu_to_be32(args->count >> 3);
- return p;
+ *p = cpu_to_be32(args->count);
+}
-out_overflow:
- print_overflow_msg(__func__, xdr);
-out_overflow_exit:
- return ERR_PTR(-EAGAIN);
+static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_readdirargs *args)
+{
+ encode_readdirplus3args(xdr, args);
+ prepare_reply_buffer(req, args->pages, 0,
+ args->count, NFS3_readdirres_sz);
}
/*
- * Encode COMMIT arguments
+ * 3.3.21 COMMIT3args
+ *
+ * struct COMMIT3args {
+ * nfs_fh3 file;
+ * offset3 offset;
+ * count3 count;
+ * };
*/
-static int
-nfs3_xdr_commitargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void encode_commit3args(struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
{
- p = xdr_encode_fhandle(p, args->fh);
+ __be32 *p;
+
+ encode_nfs_fh3(xdr, args->fh);
+
+ p = xdr_reserve_space(xdr, 8 + 4);
p = xdr_encode_hyper(p, args->offset);
- *p++ = htonl(args->count);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
+ *p = cpu_to_be32(args->count);
}
-#ifdef CONFIG_NFS_V3_ACL
-/*
- * Encode GETACL arguments
- */
-static int
-nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
- struct nfs3_getaclargs *args)
+static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs_writeargs *args)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
+ encode_commit3args(xdr, args);
+}
- p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(args->mask);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+#ifdef CONFIG_NFS_V3_ACL
- if (args->mask & (NFS_ACL | NFS_DFACL)) {
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack +
- ACL3_getaclres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0,
- NFSACL_MAXPAGES << PAGE_SHIFT);
- }
- return 0;
+static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_getaclargs *args)
+{
+ encode_nfs_fh3(xdr, args->fh);
+ encode_uint32(xdr, args->mask);
+ if (args->mask & (NFS_ACL | NFS_DFACL))
+ prepare_reply_buffer(req, args->pages, 0,
+ NFSACL_MAXPAGES << PAGE_SHIFT,
+ ACL3_getaclres_sz);
}
-/*
- * Encode SETACL arguments
- */
-static int
-nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
- struct nfs3_setaclargs *args)
+static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs3_setaclargs *args)
{
- struct xdr_buf *buf = &req->rq_snd_buf;
unsigned int base;
- int err;
-
- p = xdr_encode_fhandle(p, NFS_FH(args->inode));
- *p++ = htonl(args->mask);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- base = req->rq_slen;
+ int error;
+ encode_nfs_fh3(xdr, NFS_FH(args->inode));
+ encode_uint32(xdr, args->mask);
if (args->npages != 0)
- xdr_encode_pages(buf, args->pages, 0, args->len);
- else
- req->rq_slen = xdr_adjust_iovec(req->rq_svec,
- p + XDR_QUADLEN(args->len));
+ xdr_write_pages(xdr, args->pages, 0, args->len);
- err = nfsacl_encode(buf, base, args->inode,
+ base = req->rq_slen;
+ error = nfsacl_encode(xdr->buf, base, args->inode,
(args->mask & NFS_ACL) ?
args->acl_access : NULL, 1, 0);
- if (err > 0)
- err = nfsacl_encode(buf, base + err, args->inode,
- (args->mask & NFS_DFACL) ?
- args->acl_default : NULL, 1,
- NFS_ACL_DEFAULT);
- return (err > 0) ? 0 : err;
+ BUG_ON(error < 0);
+ error = nfsacl_encode(xdr->buf, base + error, args->inode,
+ (args->mask & NFS_DFACL) ?
+ args->acl_default : NULL, 1,
+ NFS_ACL_DEFAULT);
+ BUG_ON(error < 0);
}
+
#endif /* CONFIG_NFS_V3_ACL */
/*
- * NFS XDR decode functions
+ * NFSv3 XDR decode functions
+ *
+ * NFSv3 result types are defined in section 3.3 of RFC 1813:
+ * "NFS Version 3 Protocol Specification".
*/
/*
- * Decode attrstat reply.
+ * 3.3.1 GETATTR3res
+ *
+ * struct GETATTR3resok {
+ * fattr3 obj_attributes;
+ * };
+ *
+ * union GETATTR3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * GETATTR3resok resok;
+ * default:
+ * void;
+ * };
*/
-static int
-nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fattr *result)
{
- int status;
-
- if ((status = ntohl(*p++)))
- return nfs_stat_to_errno(status);
- xdr_decode_fattr(p, fattr);
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_fattr3(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode status+wcc_data reply
- * SATTR, REMOVE, RMDIR
+ * 3.3.2 SETATTR3res
+ *
+ * struct SETATTR3resok {
+ * wcc_data obj_wcc;
+ * };
+ *
+ * struct SETATTR3resfail {
+ * wcc_data obj_wcc;
+ * };
+ *
+ * union SETATTR3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * SETATTR3resok resok;
+ * default:
+ * SETATTR3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fattr *result)
{
- int status;
-
- if ((status = ntohl(*p++)))
- status = nfs_stat_to_errno(status);
- xdr_decode_wcc_data(p, fattr);
- return status;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
-static int
-nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res)
+/*
+ * 3.3.3 LOOKUP3res
+ *
+ * struct LOOKUP3resok {
+ * nfs_fh3 object;
+ * post_op_attr obj_attributes;
+ * post_op_attr dir_attributes;
+ * };
+ *
+ * struct LOOKUP3resfail {
+ * post_op_attr dir_attributes;
+ * };
+ *
+ * union LOOKUP3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * LOOKUP3resok resok;
+ * default:
+ * LOOKUP3resfail resfail;
+ * };
+ */
+static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_diropres *result)
{
- return nfs3_xdr_wccstat(req, p, res->dir_attr);
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_nfs_fh3(xdr, result->fh);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->dir_attr);
+out:
+ return error;
+out_default:
+ error = decode_post_op_attr(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ return nfs_stat_to_errno(status);
}
/*
- * Decode LOOKUP reply
+ * 3.3.4 ACCESS3res
+ *
+ * struct ACCESS3resok {
+ * post_op_attr obj_attributes;
+ * uint32 access;
+ * };
+ *
+ * struct ACCESS3resfail {
+ * post_op_attr obj_attributes;
+ * };
+ *
+ * union ACCESS3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * ACCESS3resok resok;
+ * default:
+ * ACCESS3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
+static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_accessres *result)
{
- int status;
-
- if ((status = ntohl(*p++))) {
- status = nfs_stat_to_errno(status);
- } else {
- if (!(p = xdr_decode_fhandle(p, res->fh)))
- return -errno_NFSERR_IO;
- p = xdr_decode_post_op_attr(p, res->fattr);
- }
- xdr_decode_post_op_attr(p, res->dir_attr);
- return status;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_uint32(xdr, &result->access);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode ACCESS reply
+ * 3.3.5 READLINK3res
+ *
+ * struct READLINK3resok {
+ * post_op_attr symlink_attributes;
+ * nfspath3 data;
+ * };
+ *
+ * struct READLINK3resfail {
+ * post_op_attr symlink_attributes;
+ * };
+ *
+ * union READLINK3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * READLINK3resok resok;
+ * default:
+ * READLINK3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
+static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fattr *result)
{
- int status = ntohl(*p++);
-
- p = xdr_decode_post_op_attr(p, res->fattr);
- if (status)
- return nfs_stat_to_errno(status);
- res->access = ntohl(*p++);
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_nfspath3(xdr);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
-static int
-nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
+/*
+ * 3.3.6 READ3res
+ *
+ * struct READ3resok {
+ * post_op_attr file_attributes;
+ * count3 count;
+ * bool eof;
+ * opaque data<>;
+ * };
+ *
+ * struct READ3resfail {
+ * post_op_attr file_attributes;
+ * };
+ *
+ * union READ3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * READ3resok resok;
+ * default:
+ * READ3resfail resfail;
+ * };
+ */
+static int decode_read3resok(struct xdr_stream *xdr,
+ struct nfs_readres *result)
{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
+ u32 eof, count, ocount, recvd;
+ size_t hdrlen;
+ __be32 *p;
- p = xdr_encode_fhandle(p, args->fh);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ p = xdr_inline_decode(xdr, 4 + 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ count = be32_to_cpup(p++);
+ eof = be32_to_cpup(p++);
+ ocount = be32_to_cpup(p++);
+ if (unlikely(ocount != count))
+ goto out_mismatch;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(count > recvd))
+ goto out_cheating;
+
+out:
+ xdr_read_pages(xdr, count);
+ result->eof = eof;
+ result->count = count;
+ return count;
+out_mismatch:
+ dprintk("NFS: READ count doesn't match length of opaque: "
+ "count %u != ocount %u\n", count, ocount);
+ return -EIO;
+out_cheating:
+ dprintk("NFS: server cheating in read result: "
+ "count %u > recvd %u\n", count, recvd);
+ count = recvd;
+ eof = 0;
+ goto out;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
- /* Inline the page array */
- replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2;
- xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
- return 0;
+static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_readres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_read3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode READLINK reply
+ * 3.3.7 WRITE3res
+ *
+ * enum stable_how {
+ * UNSTABLE = 0,
+ * DATA_SYNC = 1,
+ * FILE_SYNC = 2
+ * };
+ *
+ * struct WRITE3resok {
+ * wcc_data file_wcc;
+ * count3 count;
+ * stable_how committed;
+ * writeverf3 verf;
+ * };
+ *
+ * struct WRITE3resfail {
+ * wcc_data file_wcc;
+ * };
+ *
+ * union WRITE3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * WRITE3resok resok;
+ * default:
+ * WRITE3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+static int decode_write3resok(struct xdr_stream *xdr,
+ struct nfs_writeres *result)
{
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- size_t hdrlen;
- u32 len, recvd;
- int status;
-
- status = ntohl(*p++);
- p = xdr_decode_post_op_attr(p, fattr);
-
- if (status != 0)
- return nfs_stat_to_errno(status);
-
- /* Convert length of symlink */
- len = ntohl(*p++);
- if (len >= rcvbuf->page_len) {
- dprintk("nfs: server returned giant symlink!\n");
- return -ENAMETOOLONG;
- }
+ __be32 *p;
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READLINK reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READLINK header is short. "
- "iovec will be shifted.\n");
- xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
- }
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (recvd < len) {
- dprintk("NFS: server cheating in readlink reply: "
- "count %u > recvd %u\n", len, recvd);
- return -EIO;
- }
+ p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ result->count = be32_to_cpup(p++);
+ result->verf->committed = be32_to_cpup(p++);
+ if (unlikely(result->verf->committed > NFS_FILE_SYNC))
+ goto out_badvalue;
+ memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE);
+ return result->count;
+out_badvalue:
+ dprintk("NFS: bad stable_how value: %u\n", result->verf->committed);
+ return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
- xdr_terminate_string(rcvbuf, len);
- return 0;
+static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_writeres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_write3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode READ reply
+ * 3.3.8 CREATE3res
+ *
+ * struct CREATE3resok {
+ * post_op_fh3 obj;
+ * post_op_attr obj_attributes;
+ * wcc_data dir_wcc;
+ * };
+ *
+ * struct CREATE3resfail {
+ * wcc_data dir_wcc;
+ * };
+ *
+ * union CREATE3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * CREATE3resok resok;
+ * default:
+ * CREATE3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
+static int decode_create3resok(struct xdr_stream *xdr,
+ struct nfs3_diropres *result)
{
- struct kvec *iov = req->rq_rcv_buf.head;
- size_t hdrlen;
- u32 count, ocount, recvd;
- int status;
+ int error;
+
+ error = decode_post_op_fh3(xdr, result->fh);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ /* The server isn't required to return a file handle.
+ * If it didn't, force the client to perform a LOOKUP
+ * to determine the correct file handle and attribute
+ * values for the new object. */
+ if (result->fh->size == 0)
+ result->fattr->valid = 0;
+ error = decode_wcc_data(xdr, result->dir_attr);
+out:
+ return error;
+}
- status = ntohl(*p++);
- p = xdr_decode_post_op_attr(p, res->fattr);
+static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_diropres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_create3resok(xdr, result);
+out:
+ return error;
+out_default:
+ error = decode_wcc_data(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ return nfs_stat_to_errno(status);
+}
- if (status != 0)
- return nfs_stat_to_errno(status);
+/*
+ * 3.3.12 REMOVE3res
+ *
+ * struct REMOVE3resok {
+ * wcc_data dir_wcc;
+ * };
+ *
+ * struct REMOVE3resfail {
+ * wcc_data dir_wcc;
+ * };
+ *
+ * union REMOVE3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * REMOVE3resok resok;
+ * default:
+ * REMOVE3resfail resfail;
+ * };
+ */
+static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_removeres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
+}
- /* Decode reply count and EOF flag. NFSv3 is somewhat redundant
- * in that it puts the count both in the res struct and in the
- * opaque data count. */
- count = ntohl(*p++);
- res->eof = ntohl(*p++);
- ocount = ntohl(*p++);
+/*
+ * 3.3.14 RENAME3res
+ *
+ * struct RENAME3resok {
+ * wcc_data fromdir_wcc;
+ * wcc_data todir_wcc;
+ * };
+ *
+ * struct RENAME3resfail {
+ * wcc_data fromdir_wcc;
+ * wcc_data todir_wcc;
+ * };
+ *
+ * union RENAME3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * RENAME3resok resok;
+ * default:
+ * RENAME3resfail resfail;
+ * };
+ */
+static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_renameres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->old_fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->new_fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
+}
- if (ocount != count) {
- dprintk("NFS: READ count doesn't match RPC opaque count.\n");
- return -errno_NFSERR_IO;
- }
+/*
+ * 3.3.15 LINK3res
+ *
+ * struct LINK3resok {
+ * post_op_attr file_attributes;
+ * wcc_data linkdir_wcc;
+ * };
+ *
+ * struct LINK3resfail {
+ * post_op_attr file_attributes;
+ * wcc_data linkdir_wcc;
+ * };
+ *
+ * union LINK3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * LINK3resok resok;
+ * default:
+ * LINK3resfail resfail;
+ * };
+ */
+static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs3_linkres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
+}
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
- if (iov->iov_len < hdrlen) {
- dprintk("NFS: READ reply header overflowed:"
- "length %Zu > %Zu\n", hdrlen, iov->iov_len);
- return -errno_NFSERR_IO;
- } else if (iov->iov_len != hdrlen) {
- dprintk("NFS: READ header is short. iovec will be shifted.\n");
- xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
- }
+/**
+ * nfs3_decode_dirent - Decode a single NFSv3 directory entry stored in
+ * the local page cache
+ * @xdr: XDR stream where entry resides
+ * @entry: buffer to fill in with entry data
+ * @plus: boolean indicating whether this should be a readdirplus entry
+ *
+ * Returns zero if successful, otherwise a negative errno value is
+ * returned.
+ *
+ * This function is not invoked during READDIR reply decoding, but
+ * rather whenever an application invokes the getdents(2) system call
+ * on a directory already in our cache.
+ *
+ * 3.3.16 entry3
+ *
+ * struct entry3 {
+ * fileid3 fileid;
+ * filename3 name;
+ * cookie3 cookie;
+ * fhandle3 filehandle;
+ * post_op_attr3 attributes;
+ * entry3 *nextentry;
+ * };
+ *
+ * 3.3.17 entryplus3
+ * struct entryplus3 {
+ * fileid3 fileid;
+ * filename3 name;
+ * cookie3 cookie;
+ * post_op_attr name_attributes;
+ * post_op_fh3 name_handle;
+ * entryplus3 *nextentry;
+ * };
+ */
+int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ int plus)
+{
+ struct nfs_entry old = *entry;
+ __be32 *p;
+ int error;
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (count > recvd) {
- dprintk("NFS: server cheating in read reply: "
- "count %u > recvd %u\n", count, recvd);
- count = recvd;
- res->eof = 0;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p == xdr_zero) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p == xdr_zero)
+ return -EAGAIN;
+ entry->eof = 1;
+ return -EBADCOOKIE;
}
- if (count < res->count)
- res->count = count;
+ error = decode_fileid3(xdr, &entry->ino);
+ if (unlikely(error))
+ return error;
- return count;
-}
+ error = decode_inline_filename3(xdr, &entry->name, &entry->len);
+ if (unlikely(error))
+ return error;
-/*
- * Decode WRITE response
- */
-static int
-nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
-{
- int status;
+ entry->prev_cookie = entry->cookie;
+ error = decode_cookie3(xdr, &entry->cookie);
+ if (unlikely(error))
+ return error;
- status = ntohl(*p++);
- p = xdr_decode_wcc_data(p, res->fattr);
+ entry->d_type = DT_UNKNOWN;
- if (status != 0)
- return nfs_stat_to_errno(status);
+ if (plus) {
+ entry->fattr->valid = 0;
+ error = decode_post_op_attr(xdr, entry->fattr);
+ if (unlikely(error))
+ return error;
+ if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
+ entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
- res->count = ntohl(*p++);
- res->verf->committed = (enum nfs3_stable_how)ntohl(*p++);
- res->verf->verifier[0] = *p++;
- res->verf->verifier[1] = *p++;
+ /* In fact, a post_op_fh3: */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ if (*p != xdr_zero) {
+ error = decode_nfs_fh3(xdr, entry->fh);
+ if (unlikely(error)) {
+ if (error == -E2BIG)
+ goto out_truncated;
+ return error;
+ }
+ } else
+ zero_nfs_fh3(entry->fh);
+ }
- return res->count;
-}
+ return 0;
-/*
- * Decode a CREATE response
- */
-static int
-nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
-{
- int status;
-
- status = ntohl(*p++);
- if (status == 0) {
- if (*p++) {
- if (!(p = xdr_decode_fhandle(p, res->fh)))
- return -errno_NFSERR_IO;
- p = xdr_decode_post_op_attr(p, res->fattr);
- } else {
- memset(res->fh, 0, sizeof(*res->fh));
- /* Do decode post_op_attr but set it to NULL */
- p = xdr_decode_post_op_attr(p, res->fattr);
- res->fattr->valid = 0;
- }
- } else {
- status = nfs_stat_to_errno(status);
- }
- p = xdr_decode_wcc_data(p, res->dir_attr);
- return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EAGAIN;
+out_truncated:
+ dprintk("NFS: directory entry contains invalid file handle\n");
+ *entry = old;
+ return -EAGAIN;
}
/*
- * Decode RENAME reply
+ * 3.3.16 READDIR3res
+ *
+ * struct dirlist3 {
+ * entry3 *entries;
+ * bool eof;
+ * };
+ *
+ * struct READDIR3resok {
+ * post_op_attr dir_attributes;
+ * cookieverf3 cookieverf;
+ * dirlist3 reply;
+ * };
+ *
+ * struct READDIR3resfail {
+ * post_op_attr dir_attributes;
+ * };
+ *
+ * union READDIR3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * READDIR3resok resok;
+ * default:
+ * READDIR3resfail resfail;
+ * };
+ *
+ * Read the directory contents into the page cache, but otherwise
+ * don't touch them. The actual decoding is done by nfs3_decode_entry()
+ * during subsequent nfs_readdir() calls.
*/
-static int
-nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
+static int decode_dirlist3(struct xdr_stream *xdr)
{
- int status;
+ u32 recvd, pglen;
+ size_t hdrlen;
- if ((status = ntohl(*p++)) != 0)
- status = nfs_stat_to_errno(status);
- p = xdr_decode_wcc_data(p, res->old_fattr);
- p = xdr_decode_wcc_data(p, res->new_fattr);
- return status;
+ pglen = xdr->buf->page_len;
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ recvd = xdr->buf->len - hdrlen;
+ if (unlikely(pglen > recvd))
+ goto out_cheating;
+out:
+ xdr_read_pages(xdr, pglen);
+ return pglen;
+out_cheating:
+ dprintk("NFS: server cheating in readdir result: "
+ "pglen %u > recvd %u\n", pglen, recvd);
+ pglen = recvd;
+ goto out;
}
-/*
- * Decode LINK reply
- */
-static int
-nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res)
+static int decode_readdir3resok(struct xdr_stream *xdr,
+ struct nfs3_readdirres *result)
{
- int status;
+ int error;
+
+ error = decode_post_op_attr(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ /* XXX: do we need to check if result->verf != NULL ? */
+ error = decode_cookieverf3(xdr, result->verf);
+ if (unlikely(error))
+ goto out;
+ error = decode_dirlist3(xdr);
+out:
+ return error;
+}
- if ((status = ntohl(*p++)) != 0)
- status = nfs_stat_to_errno(status);
- p = xdr_decode_post_op_attr(p, res->fattr);
- p = xdr_decode_wcc_data(p, res->dir_attr);
- return status;
+static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_readdirres *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_readdir3resok(xdr, result);
+out:
+ return error;
+out_default:
+ error = decode_post_op_attr(xdr, result->dir_attr);
+ if (unlikely(error))
+ goto out;
+ return nfs_stat_to_errno(status);
}
/*
- * Decode FSSTAT reply
+ * 3.3.18 FSSTAT3res
+ *
+ * struct FSSTAT3resok {
+ * post_op_attr obj_attributes;
+ * size3 tbytes;
+ * size3 fbytes;
+ * size3 abytes;
+ * size3 tfiles;
+ * size3 ffiles;
+ * size3 afiles;
+ * uint32 invarsec;
+ * };
+ *
+ * struct FSSTAT3resfail {
+ * post_op_attr obj_attributes;
+ * };
+ *
+ * union FSSTAT3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * FSSTAT3resok resok;
+ * default:
+ * FSSTAT3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res)
+static int decode_fsstat3resok(struct xdr_stream *xdr,
+ struct nfs_fsstat *result)
{
- int status;
-
- status = ntohl(*p++);
-
- p = xdr_decode_post_op_attr(p, res->fattr);
- if (status != 0)
- return nfs_stat_to_errno(status);
-
- p = xdr_decode_hyper(p, &res->tbytes);
- p = xdr_decode_hyper(p, &res->fbytes);
- p = xdr_decode_hyper(p, &res->abytes);
- p = xdr_decode_hyper(p, &res->tfiles);
- p = xdr_decode_hyper(p, &res->ffiles);
- p = xdr_decode_hyper(p, &res->afiles);
+ __be32 *p;
+ p = xdr_inline_decode(xdr, 8 * 6 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ p = xdr_decode_size3(p, &result->tbytes);
+ p = xdr_decode_size3(p, &result->fbytes);
+ p = xdr_decode_size3(p, &result->abytes);
+ p = xdr_decode_size3(p, &result->tfiles);
+ p = xdr_decode_size3(p, &result->ffiles);
+ xdr_decode_size3(p, &result->afiles);
/* ignore invarsec */
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fsstat *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_fsstat3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode FSINFO reply
+ * 3.3.19 FSINFO3res
+ *
+ * struct FSINFO3resok {
+ * post_op_attr obj_attributes;
+ * uint32 rtmax;
+ * uint32 rtpref;
+ * uint32 rtmult;
+ * uint32 wtmax;
+ * uint32 wtpref;
+ * uint32 wtmult;
+ * uint32 dtpref;
+ * size3 maxfilesize;
+ * nfstime3 time_delta;
+ * uint32 properties;
+ * };
+ *
+ * struct FSINFO3resfail {
+ * post_op_attr obj_attributes;
+ * };
+ *
+ * union FSINFO3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * FSINFO3resok resok;
+ * default:
+ * FSINFO3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
+static int decode_fsinfo3resok(struct xdr_stream *xdr,
+ struct nfs_fsinfo *result)
{
- int status;
-
- status = ntohl(*p++);
-
- p = xdr_decode_post_op_attr(p, res->fattr);
- if (status != 0)
- return nfs_stat_to_errno(status);
+ __be32 *p;
- res->rtmax = ntohl(*p++);
- res->rtpref = ntohl(*p++);
- res->rtmult = ntohl(*p++);
- res->wtmax = ntohl(*p++);
- res->wtpref = ntohl(*p++);
- res->wtmult = ntohl(*p++);
- res->dtpref = ntohl(*p++);
- p = xdr_decode_hyper(p, &res->maxfilesize);
- p = xdr_decode_time3(p, &res->time_delta);
+ p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ result->rtmax = be32_to_cpup(p++);
+ result->rtpref = be32_to_cpup(p++);
+ result->rtmult = be32_to_cpup(p++);
+ result->wtmax = be32_to_cpup(p++);
+ result->wtpref = be32_to_cpup(p++);
+ result->wtmult = be32_to_cpup(p++);
+ result->dtpref = be32_to_cpup(p++);
+ p = xdr_decode_size3(p, &result->maxfilesize);
+ xdr_decode_nfstime3(p, &result->time_delta);
/* ignore properties */
- res->lease_time = 0;
+ result->lease_time = 0;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fsinfo *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_fsinfo3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode PATHCONF reply
+ * 3.3.20 PATHCONF3res
+ *
+ * struct PATHCONF3resok {
+ * post_op_attr obj_attributes;
+ * uint32 linkmax;
+ * uint32 name_max;
+ * bool no_trunc;
+ * bool chown_restricted;
+ * bool case_insensitive;
+ * bool case_preserving;
+ * };
+ *
+ * struct PATHCONF3resfail {
+ * post_op_attr obj_attributes;
+ * };
+ *
+ * union PATHCONF3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * PATHCONF3resok resok;
+ * default:
+ * PATHCONF3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res)
+static int decode_pathconf3resok(struct xdr_stream *xdr,
+ struct nfs_pathconf *result)
{
- int status;
-
- status = ntohl(*p++);
-
- p = xdr_decode_post_op_attr(p, res->fattr);
- if (status != 0)
- return nfs_stat_to_errno(status);
- res->max_link = ntohl(*p++);
- res->max_namelen = ntohl(*p++);
+ __be32 *p;
+ p = xdr_inline_decode(xdr, 4 * 6);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ result->max_link = be32_to_cpup(p++);
+ result->max_namelen = be32_to_cpup(p);
/* ignore remaining fields */
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_pathconf *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_pathconf3resok(xdr, result);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
/*
- * Decode COMMIT reply
+ * 3.3.21 COMMIT3res
+ *
+ * struct COMMIT3resok {
+ * wcc_data file_wcc;
+ * writeverf3 verf;
+ * };
+ *
+ * struct COMMIT3resfail {
+ * wcc_data file_wcc;
+ * };
+ *
+ * union COMMIT3res switch (nfsstat3 status) {
+ * case NFS3_OK:
+ * COMMIT3resok resok;
+ * default:
+ * COMMIT3resfail resfail;
+ * };
*/
-static int
-nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
+static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_writeres *result)
{
- int status;
-
- status = ntohl(*p++);
- p = xdr_decode_wcc_data(p, res->fattr);
- if (status != 0)
- return nfs_stat_to_errno(status);
-
- res->verf->verifier[0] = *p++;
- res->verf->verifier[1] = *p++;
- return 0;
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ error = decode_wcc_data(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_status;
+ error = decode_writeverf3(xdr, result->verf->verifier);
+out:
+ return error;
+out_status:
+ return nfs_stat_to_errno(status);
}
#ifdef CONFIG_NFS_V3_ACL
-/*
- * Decode GETACL reply
- */
-static int
-nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p,
- struct nfs3_getaclres *res)
+
+static inline int decode_getacl3resok(struct xdr_stream *xdr,
+ struct nfs3_getaclres *result)
{
- struct xdr_buf *buf = &req->rq_rcv_buf;
- int status = ntohl(*p++);
struct posix_acl **acl;
unsigned int *aclcnt;
- int err, base;
-
- if (status != 0)
- return nfs_stat_to_errno(status);
- p = xdr_decode_post_op_attr(p, res->fattr);
- res->mask = ntohl(*p++);
- if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
- return -EINVAL;
- base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base;
-
- acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL;
- aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL;
- err = nfsacl_decode(buf, base, aclcnt, acl);
-
- acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL;
- aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL;
- if (err > 0)
- err = nfsacl_decode(buf, base + err, aclcnt, acl);
- return (err > 0) ? 0 : err;
+ size_t hdrlen;
+ int error;
+
+ error = decode_post_op_attr(xdr, result->fattr);
+ if (unlikely(error))
+ goto out;
+ error = decode_uint32(xdr, &result->mask);
+ if (unlikely(error))
+ goto out;
+ error = -EINVAL;
+ if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+ goto out;
+
+ hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+
+ acl = NULL;
+ if (result->mask & NFS_ACL)
+ acl = &result->acl_access;
+ aclcnt = NULL;
+ if (result->mask & NFS_ACLCNT)
+ aclcnt = &result->acl_access_count;
+ error = nfsacl_decode(xdr->buf, hdrlen, aclcnt, acl);
+ if (unlikely(error <= 0))
+ goto out;
+
+ acl = NULL;
+ if (result->mask & NFS_DFACL)
+ acl = &result->acl_default;
+ aclcnt = NULL;
+ if (result->mask & NFS_DFACLCNT)
+ aclcnt = &result->acl_default_count;
+ error = nfsacl_decode(xdr->buf, hdrlen + error, aclcnt, acl);
+ if (unlikely(error <= 0))
+ return error;
+ error = 0;
+out:
+ return error;
}
-/*
- * Decode setacl reply.
- */
-static int
-nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs3_getaclres *result)
{
- int status = ntohl(*p++);
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_getacl3resok(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
+}
- if (status)
- return nfs_stat_to_errno(status);
- xdr_decode_post_op_attr(p, fattr);
- return 0;
+static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fattr *result)
+{
+ enum nfs_stat status;
+ int error;
+
+ error = decode_nfsstat3(xdr, &status);
+ if (unlikely(error))
+ goto out;
+ if (status != NFS3_OK)
+ goto out_default;
+ error = decode_post_op_attr(xdr, result);
+out:
+ return error;
+out_default:
+ return nfs_stat_to_errno(status);
}
+
#endif /* CONFIG_NFS_V3_ACL */
#define PROC(proc, argtype, restype, timer) \
[NFS3PROC_##proc] = { \
.p_proc = NFS3PROC_##proc, \
- .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
- .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
- .p_arglen = NFS3_##argtype##_sz, \
- .p_replen = NFS3_##restype##_sz, \
+ .p_encode = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args, \
+ .p_decode = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res, \
+ .p_arglen = NFS3_##argtype##args_sz, \
+ .p_replen = NFS3_##restype##res_sz, \
.p_timer = timer, \
.p_statidx = NFS3PROC_##proc, \
.p_name = #proc, \
}
struct rpc_procinfo nfs3_procedures[] = {
- PROC(GETATTR, fhandle, attrstat, 1),
- PROC(SETATTR, sattrargs, wccstat, 0),
- PROC(LOOKUP, diropargs, lookupres, 2),
- PROC(ACCESS, accessargs, accessres, 1),
- PROC(READLINK, readlinkargs, readlinkres, 3),
- PROC(READ, readargs, readres, 3),
- PROC(WRITE, writeargs, writeres, 4),
- PROC(CREATE, createargs, createres, 0),
- PROC(MKDIR, mkdirargs, createres, 0),
- PROC(SYMLINK, symlinkargs, createres, 0),
- PROC(MKNOD, mknodargs, createres, 0),
- PROC(REMOVE, removeargs, removeres, 0),
- PROC(RMDIR, diropargs, wccstat, 0),
- PROC(RENAME, renameargs, renameres, 0),
- PROC(LINK, linkargs, linkres, 0),
- PROC(READDIR, readdirargs, readdirres, 3),
- PROC(READDIRPLUS, readdirargs, readdirres, 3),
- PROC(FSSTAT, fhandle, fsstatres, 0),
- PROC(FSINFO, fhandle, fsinfores, 0),
- PROC(PATHCONF, fhandle, pathconfres, 0),
- PROC(COMMIT, commitargs, commitres, 5),
+ PROC(GETATTR, getattr, getattr, 1),
+ PROC(SETATTR, setattr, setattr, 0),
+ PROC(LOOKUP, lookup, lookup, 2),
+ PROC(ACCESS, access, access, 1),
+ PROC(READLINK, readlink, readlink, 3),
+ PROC(READ, read, read, 3),
+ PROC(WRITE, write, write, 4),
+ PROC(CREATE, create, create, 0),
+ PROC(MKDIR, mkdir, create, 0),
+ PROC(SYMLINK, symlink, create, 0),
+ PROC(MKNOD, mknod, create, 0),
+ PROC(REMOVE, remove, remove, 0),
+ PROC(RMDIR, lookup, setattr, 0),
+ PROC(RENAME, rename, rename, 0),
+ PROC(LINK, link, link, 0),
+ PROC(READDIR, readdir, readdir, 3),
+ PROC(READDIRPLUS, readdirplus, readdir, 3),
+ PROC(FSSTAT, getattr, fsstat, 0),
+ PROC(FSINFO, getattr, fsinfo, 0),
+ PROC(PATHCONF, getattr, pathconf, 0),
+ PROC(COMMIT, commit, commit, 5),
};
struct rpc_version nfs_version3 = {
@@ -1185,8 +2468,8 @@ struct rpc_version nfs_version3 = {
static struct rpc_procinfo nfs3_acl_procedures[] = {
[ACLPROC3_GETACL] = {
.p_proc = ACLPROC3_GETACL,
- .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs,
- .p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
+ .p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
+ .p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
.p_arglen = ACL3_getaclargs_sz,
.p_replen = ACL3_getaclres_sz,
.p_timer = 1,
@@ -1194,8 +2477,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
},
[ACLPROC3_SETACL] = {
.p_proc = ACLPROC3_SETACL,
- .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs,
- .p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
+ .p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
+ .p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
.p_arglen = ACL3_setaclargs_sz,
.p_replen = ACL3_setaclres_sz,
.p_timer = 0,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9fa4963..7a74740 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
NFS4CLNT_RECLAIM_REBOOT,
NFS4CLNT_RECLAIM_NOGRACE,
NFS4CLNT_DELEGRETURN,
+ NFS4CLNT_LAYOUTRECALL,
NFS4CLNT_SESSION_RESET,
NFS4CLNT_RECALL_SLOT,
};
@@ -109,7 +110,7 @@ struct nfs_unique_id {
struct nfs4_state_owner {
struct nfs_unique_id so_owner_id;
struct nfs_server *so_server;
- struct rb_node so_client_node;
+ struct rb_node so_server_node;
struct rpc_cred *so_cred; /* Associated cred */
@@ -227,12 +228,6 @@ struct nfs4_state_maintenance_ops {
extern const struct dentry_operations nfs4_dentry_operations;
extern const struct inode_operations nfs4_dir_inode_operations;
-/* inode.c */
-extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
-extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
-extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
-
-
/* nfs4proc.c */
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
@@ -241,11 +236,12 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
struct nfs4_fs_locations *fs_locations, struct page *page);
extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+extern const struct xattr_handler *nfs4_xattr_handlers[];
#if defined(CONFIG_NFS_V4_1)
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -331,7 +327,6 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
extern const nfs4_stateid zero_stateid;
/* nfs4xdr.c */
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
extern struct rpc_procinfo nfs4_procedures[];
struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 2e92f0d..23f930c 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -82,7 +82,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
{
struct nfs4_file_layout_dsaddr *dsaddr;
int status = -EINVAL;
- struct nfs_server *nfss = NFS_SERVER(lo->inode);
+ struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
dprintk("--> %s\n", __func__);
@@ -101,7 +101,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
/* find and reference the deviceid */
dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
if (dsaddr == NULL) {
- dsaddr = get_device_info(lo->inode, id);
+ dsaddr = get_device_info(lo->plh_inode, id);
if (dsaddr == NULL)
goto out;
}
@@ -243,7 +243,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
static void
filelayout_free_lseg(struct pnfs_layout_segment *lseg)
{
- struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
+ struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
dprintk("--> %s\n", __func__);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4435e5e..9d992b0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -49,6 +49,7 @@
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/sunrpc/bc_xprt.h>
+#include <linux/xattr.h>
#include "nfs4_fs.h"
#include "delegation.h"
@@ -355,9 +356,9 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
}
/*
- * Signal state manager thread if session is drained
+ * Signal state manager thread if session fore channel is drained
*/
-static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
+static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
{
struct rpc_task *task;
@@ -371,8 +372,20 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
if (ses->fc_slot_table.highest_used_slotid != -1)
return;
- dprintk("%s COMPLETE: Session Drained\n", __func__);
- complete(&ses->complete);
+ dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
+ complete(&ses->fc_slot_table.complete);
+}
+
+/*
+ * Signal state manager thread if session back channel is drained
+ */
+void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
+{
+ if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
+ ses->bc_slot_table.highest_used_slotid != -1)
+ return;
+ dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
+ complete(&ses->bc_slot_table.complete);
}
static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
@@ -389,7 +402,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
spin_lock(&tbl->slot_tbl_lock);
nfs4_free_slot(tbl, res->sr_slot);
- nfs41_check_drain_session_complete(res->sr_session);
+ nfs4_check_drain_fc_complete(res->sr_session);
spin_unlock(&tbl->slot_tbl_lock);
res->sr_slot = NULL;
}
@@ -1826,6 +1839,8 @@ struct nfs4_closedata {
struct nfs_closeres res;
struct nfs_fattr fattr;
unsigned long timestamp;
+ bool roc;
+ u32 roc_barrier;
};
static void nfs4_free_closedata(void *data)
@@ -1833,6 +1848,8 @@ static void nfs4_free_closedata(void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state_owner *sp = calldata->state->owner;
+ if (calldata->roc)
+ pnfs_roc_release(calldata->state->inode);
nfs4_put_open_state(calldata->state);
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_state_owner(sp);
@@ -1865,6 +1882,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
*/
switch (task->tk_status) {
case 0:
+ if (calldata->roc)
+ pnfs_roc_set_barrier(state->inode,
+ calldata->roc_barrier);
nfs_set_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
nfs4_close_clear_stateid_flags(state,
@@ -1917,8 +1937,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
return;
}
- if (calldata->arg.fmode == 0)
+ if (calldata->arg.fmode == 0) {
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
+ if (calldata->roc &&
+ pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
+ rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
+ task, NULL);
+ return;
+ }
+ }
nfs_fattr_init(calldata->res.fattr);
calldata->timestamp = jiffies;
@@ -1946,7 +1973,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
*
* NOTE: Caller must be holding the sp->so_owner semaphore!
*/
-int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
+int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_closedata *calldata;
@@ -1981,11 +2008,12 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
calldata->res.fattr = &calldata->fattr;
calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server;
+ calldata->roc = roc;
path_get(path);
calldata->path = *path;
- msg.rpc_argp = &calldata->arg,
- msg.rpc_resp = &calldata->res,
+ msg.rpc_argp = &calldata->arg;
+ msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
@@ -1998,6 +2026,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
out_free_calldata:
kfree(calldata);
out:
+ if (roc)
+ pnfs_roc_release(state->inode);
nfs4_put_open_state(state);
nfs4_put_state_owner(sp);
return status;
@@ -2486,6 +2516,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
path = &ctx->path;
fmode = ctx->mode;
}
+ sattr->ia_mode &= ~current_umask();
state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
d_drop(dentry);
if (IS_ERR(state)) {
@@ -2816,6 +2847,8 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
{
struct nfs4_exception exception = { };
int err;
+
+ sattr->ia_mode &= ~current_umask();
do {
err = nfs4_handle_exception(NFS_SERVER(dir),
_nfs4_proc_mkdir(dir, dentry, sattr),
@@ -2916,6 +2949,8 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
{
struct nfs4_exception exception = { };
int err;
+
+ sattr->ia_mode &= ~current_umask();
do {
err = nfs4_handle_exception(NFS_SERVER(dir),
_nfs4_proc_mknod(dir, dentry, sattr, rdev),
@@ -3478,6 +3513,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
struct nfs4_setclientid setclientid = {
.sc_verifier = &sc_verifier,
.sc_prog = program,
+ .sc_cb_ident = clp->cl_cb_ident,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
@@ -3517,7 +3553,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
if (signalled())
break;
if (loop++ & 1)
- ssleep(clp->cl_lease_time + 1);
+ ssleep(clp->cl_lease_time / HZ + 1);
else
if (++clp->cl_id_uniquifier == 0)
break;
@@ -3663,8 +3699,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
data->rpc_status = 0;
task_setup_data.callback_data = data;
- msg.rpc_argp = &data->args,
- msg.rpc_resp = &data->res,
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -3743,6 +3779,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
goto out;
lsp = request->fl_u.nfs4_fl.owner;
arg.lock_owner.id = lsp->ls_id.id;
+ arg.lock_owner.s_dev = server->s_dev;
status = nfs4_call_sync(server, &msg, &arg, &res, 1);
switch (status) {
case 0:
@@ -3908,8 +3945,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
return ERR_PTR(-ENOMEM);
}
- msg.rpc_argp = &data->arg,
- msg.rpc_resp = &data->res,
+ msg.rpc_argp = &data->arg;
+ msg.rpc_resp = &data->res;
task_setup_data.callback_data = data;
return rpc_run_task(&task_setup_data);
}
@@ -3988,6 +4025,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->arg.lock_stateid = &lsp->ls_stateid;
p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
p->arg.lock_owner.id = lsp->ls_id.id;
+ p->arg.lock_owner.s_dev = server->s_dev;
p->res.lock_seqid = p->arg.lock_seqid;
p->lsp = lsp;
p->server = server;
@@ -4145,8 +4183,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
data->arg.reclaim = NFS_LOCK_RECLAIM;
task_setup_data.callback_ops = &nfs4_recover_lock_ops;
}
- msg.rpc_argp = &data->arg,
- msg.rpc_resp = &data->res,
+ msg.rpc_argp = &data->arg;
+ msg.rpc_resp = &data->res;
task_setup_data.callback_data = data;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
@@ -4392,48 +4430,43 @@ void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
return;
args->lock_owner.clientid = server->nfs_client->cl_clientid;
args->lock_owner.id = lsp->ls_id.id;
+ args->lock_owner.s_dev = server->s_dev;
msg.rpc_argp = args;
rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
}
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
-int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
- size_t buflen, int flags)
+static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
+ const void *buf, size_t buflen,
+ int flags, int type)
{
- struct inode *inode = dentry->d_inode;
-
- if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
- return -EOPNOTSUPP;
+ if (strcmp(key, "") != 0)
+ return -EINVAL;
- return nfs4_proc_set_acl(inode, buf, buflen);
+ return nfs4_proc_set_acl(dentry->d_inode, buf, buflen);
}
-/* The getxattr man page suggests returning -ENODATA for unknown attributes,
- * and that's what we'll do for e.g. user attributes that haven't been set.
- * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
- * attributes in kernel-managed attribute namespaces. */
-ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
- size_t buflen)
+static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
+ void *buf, size_t buflen, int type)
{
- struct inode *inode = dentry->d_inode;
-
- if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
- return -EOPNOTSUPP;
+ if (strcmp(key, "") != 0)
+ return -EINVAL;
- return nfs4_proc_get_acl(inode, buf, buflen);
+ return nfs4_proc_get_acl(dentry->d_inode, buf, buflen);
}
-ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
+static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
+ size_t list_len, const char *name,
+ size_t name_len, int type)
{
- size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+ size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
return 0;
- if (buf && buflen < len)
- return -ERANGE;
- if (buf)
- memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
+
+ if (list && len <= list_len)
+ memcpy(list, XATTR_NAME_NFSV4_ACL, len);
return len;
}
@@ -4486,6 +4519,25 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
#ifdef CONFIG_NFS_V4_1
/*
+ * Check the exchange flags returned by the server for invalid flags, having
+ * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
+ * DS flags set.
+ */
+static int nfs4_check_cl_exchange_flags(u32 flags)
+{
+ if (flags & ~EXCHGID4_FLAG_MASK_R)
+ goto out_inval;
+ if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
+ (flags & EXCHGID4_FLAG_USE_NON_PNFS))
+ goto out_inval;
+ if (!(flags & (EXCHGID4_FLAG_MASK_PNFS)))
+ goto out_inval;
+ return NFS_OK;
+out_inval:
+ return -NFS4ERR_INVAL;
+}
+
+/*
* nfs4_proc_exchange_id()
*
* Since the clientid has expired, all compounds using sessions
@@ -4498,7 +4550,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
nfs4_verifier verifier;
struct nfs41_exchange_id_args args = {
.client = clp,
- .flags = clp->cl_exchange_flags,
+ .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
};
struct nfs41_exchange_id_res res = {
.client = clp,
@@ -4515,9 +4567,6 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
dprintk("--> %s\n", __func__);
BUG_ON(clp == NULL);
- /* Remove server-only flags */
- args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R;
-
p = (u32 *)verifier.data;
*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
*p = htonl((u32)clp->cl_boot_time.tv_nsec);
@@ -4543,6 +4592,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
break;
}
+ status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
dprintk("<-- %s status= %d\n", __func__, status);
return status;
}
@@ -4776,17 +4826,17 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
if (!session)
return NULL;
- init_completion(&session->complete);
-
tbl = &session->fc_slot_table;
tbl->highest_used_slotid = -1;
spin_lock_init(&tbl->slot_tbl_lock);
rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
+ init_completion(&tbl->complete);
tbl = &session->bc_slot_table;
tbl->highest_used_slotid = -1;
spin_lock_init(&tbl->slot_tbl_lock);
rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+ init_completion(&tbl->complete);
session->session_state = 1<<NFS4_SESSION_INITING;
@@ -5280,13 +5330,23 @@ static void
nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
- struct inode *ino = lgp->args.inode;
- struct nfs_server *server = NFS_SERVER(ino);
+ struct nfs_server *server = NFS_SERVER(lgp->args.inode);
dprintk("--> %s\n", __func__);
+ /* Note the is a race here, where a CB_LAYOUTRECALL can come in
+ * right now covering the LAYOUTGET we are about to send.
+ * However, that is not so catastrophic, and there seems
+ * to be no way to prevent it completely.
+ */
if (nfs4_setup_sequence(server, &lgp->args.seq_args,
&lgp->res.seq_res, 0, task))
return;
+ if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
+ NFS_I(lgp->args.inode)->layout,
+ lgp->args.ctx->state)) {
+ rpc_exit(task, NFS4_OK);
+ return;
+ }
rpc_call_start(task);
}
@@ -5313,7 +5373,6 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
return;
}
}
- lgp->status = task->tk_status;
dprintk("<-- %s\n", __func__);
}
@@ -5322,7 +5381,6 @@ static void nfs4_layoutget_release(void *calldata)
struct nfs4_layoutget *lgp = calldata;
dprintk("--> %s\n", __func__);
- put_layout_hdr(lgp->args.inode);
if (lgp->res.layout.buf != NULL)
free_page((unsigned long) lgp->res.layout.buf);
put_nfs_open_context(lgp->args.ctx);
@@ -5367,13 +5425,10 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
if (IS_ERR(task))
return PTR_ERR(task);
status = nfs4_wait_for_completion_rpc_task(task);
- if (status != 0)
- goto out;
- status = lgp->status;
- if (status != 0)
- goto out;
- status = pnfs_layout_process(lgp);
-out:
+ if (status == 0)
+ status = task->tk_status;
+ if (status == 0)
+ status = pnfs_layout_process(lgp);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
return status;
@@ -5504,9 +5559,10 @@ static const struct inode_operations nfs4_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
- .getxattr = nfs4_getxattr,
- .setxattr = nfs4_setxattr,
- .listxattr = nfs4_listxattr,
+ .getxattr = generic_getxattr,
+ .setxattr = generic_setxattr,
+ .listxattr = generic_listxattr,
+ .removexattr = generic_removexattr,
};
const struct nfs_rpc_ops nfs_v4_clientops = {
@@ -5551,6 +5607,18 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.open_context = nfs4_atomic_open,
};
+static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
+ .prefix = XATTR_NAME_NFSV4_ACL,
+ .list = nfs4_xattr_list_nfs4_acl,
+ .get = nfs4_xattr_get_nfs4_acl,
+ .set = nfs4_xattr_set_nfs4_acl,
+};
+
+const struct xattr_handler *nfs4_xattr_handlers[] = {
+ &nfs4_xattr_nfs4_acl_handler,
+ NULL
+};
+
/*
* Local variables:
* c-basic-offset: 8
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 72b6c58..402143d 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -63,9 +63,14 @@ nfs4_renew_state(struct work_struct *work)
ops = clp->cl_mvops->state_renewal_ops;
dprintk("%s: start\n", __func__);
- /* Are there any active superblocks? */
- if (list_empty(&clp->cl_superblocks))
+
+ rcu_read_lock();
+ if (list_empty(&clp->cl_superblocks)) {
+ rcu_read_unlock();
goto out;
+ }
+ rcu_read_unlock();
+
spin_lock(&clp->cl_lock);
lease = clp->cl_lease_time;
last = clp->cl_last_renewal;
@@ -75,7 +80,7 @@ nfs4_renew_state(struct work_struct *work)
cred = ops->get_state_renewal_cred_locked(clp);
spin_unlock(&clp->cl_lock);
if (cred == NULL) {
- if (list_empty(&clp->cl_delegations)) {
+ if (!nfs_delegations_present(clp)) {
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
goto out;
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f575a31..2336d53 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -105,14 +105,17 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp)
put_rpccred(cred);
}
-struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
+static struct rpc_cred *
+nfs4_get_renew_cred_server_locked(struct nfs_server *server)
{
+ struct rpc_cred *cred = NULL;
struct nfs4_state_owner *sp;
struct rb_node *pos;
- struct rpc_cred *cred = NULL;
- for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
- sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ for (pos = rb_first(&server->state_owners);
+ pos != NULL;
+ pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
if (list_empty(&sp->so_states))
continue;
cred = get_rpccred(sp->so_cred);
@@ -121,6 +124,28 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
return cred;
}
+/**
+ * nfs4_get_renew_cred_locked - Acquire credential for a renew operation
+ * @clp: client state handle
+ *
+ * Returns an rpc_cred with reference count bumped, or NULL.
+ * Caller must hold clp->cl_lock.
+ */
+struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
+{
+ struct rpc_cred *cred = NULL;
+ struct nfs_server *server;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ cred = nfs4_get_renew_cred_server_locked(server);
+ if (cred != NULL)
+ break;
+ }
+ rcu_read_unlock();
+ return cred;
+}
+
#if defined(CONFIG_NFS_V4_1)
static int nfs41_setup_state_renewal(struct nfs_client *clp)
@@ -142,6 +167,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
return status;
}
+/*
+ * Back channel returns NFS4ERR_DELAY for new requests when
+ * NFS4_SESSION_DRAINING is set so there is no work to be done when draining
+ * is ended.
+ */
static void nfs4_end_drain_session(struct nfs_client *clp)
{
struct nfs4_session *ses = clp->cl_session;
@@ -165,22 +195,32 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
}
}
-static int nfs4_begin_drain_session(struct nfs_client *clp)
+static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
{
- struct nfs4_session *ses = clp->cl_session;
- struct nfs4_slot_table *tbl = &ses->fc_slot_table;
-
spin_lock(&tbl->slot_tbl_lock);
- set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
if (tbl->highest_used_slotid != -1) {
- INIT_COMPLETION(ses->complete);
+ INIT_COMPLETION(tbl->complete);
spin_unlock(&tbl->slot_tbl_lock);
- return wait_for_completion_interruptible(&ses->complete);
+ return wait_for_completion_interruptible(&tbl->complete);
}
spin_unlock(&tbl->slot_tbl_lock);
return 0;
}
+static int nfs4_begin_drain_session(struct nfs_client *clp)
+{
+ struct nfs4_session *ses = clp->cl_session;
+ int ret = 0;
+
+ set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+ /* back channel */
+ ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table);
+ if (ret)
+ return ret;
+ /* fore channel */
+ return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
+}
+
int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
{
int status;
@@ -192,6 +232,12 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
status = nfs4_proc_create_session(clp);
if (status != 0)
goto out;
+ status = nfs4_set_callback_sessionid(clp);
+ if (status != 0) {
+ printk(KERN_WARNING "Sessionid not set. No callback service\n");
+ nfs_callback_down(1);
+ status = 0;
+ }
nfs41_setup_state_renewal(clp);
nfs_mark_client_ready(clp, NFS_CS_READY);
out:
@@ -210,28 +256,56 @@ struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
#endif /* CONFIG_NFS_V4_1 */
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+static struct rpc_cred *
+nfs4_get_setclientid_cred_server(struct nfs_server *server)
{
+ struct nfs_client *clp = server->nfs_client;
+ struct rpc_cred *cred = NULL;
struct nfs4_state_owner *sp;
struct rb_node *pos;
+
+ spin_lock(&clp->cl_lock);
+ pos = rb_first(&server->state_owners);
+ if (pos != NULL) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
+ cred = get_rpccred(sp->so_cred);
+ }
+ spin_unlock(&clp->cl_lock);
+ return cred;
+}
+
+/**
+ * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation
+ * @clp: client state handle
+ *
+ * Returns an rpc_cred with reference count bumped, or NULL.
+ */
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+{
+ struct nfs_server *server;
struct rpc_cred *cred;
spin_lock(&clp->cl_lock);
cred = nfs4_get_machine_cred_locked(clp);
+ spin_unlock(&clp->cl_lock);
if (cred != NULL)
goto out;
- pos = rb_first(&clp->cl_state_owners);
- if (pos != NULL) {
- sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
- cred = get_rpccred(sp->so_cred);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ cred = nfs4_get_setclientid_cred_server(server);
+ if (cred != NULL)
+ break;
}
+ rcu_read_unlock();
+
out:
- spin_unlock(&clp->cl_lock);
return cred;
}
-static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
- __u64 minval, int maxbits)
+static void nfs_alloc_unique_id_locked(struct rb_root *root,
+ struct nfs_unique_id *new,
+ __u64 minval, int maxbits)
{
struct rb_node **p, *parent;
struct nfs_unique_id *pos;
@@ -286,16 +360,15 @@ static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
}
static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
{
- struct nfs_client *clp = server->nfs_client;
- struct rb_node **p = &clp->cl_state_owners.rb_node,
+ struct rb_node **p = &server->state_owners.rb_node,
*parent = NULL;
struct nfs4_state_owner *sp, *res = NULL;
while (*p != NULL) {
parent = *p;
- sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+ sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
if (server < sp->so_server) {
p = &parent->rb_left;
@@ -319,24 +392,17 @@ nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
}
static struct nfs4_state_owner *
-nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
+nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
{
- struct rb_node **p = &clp->cl_state_owners.rb_node,
+ struct nfs_server *server = new->so_server;
+ struct rb_node **p = &server->state_owners.rb_node,
*parent = NULL;
struct nfs4_state_owner *sp;
while (*p != NULL) {
parent = *p;
- sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+ sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
- if (new->so_server < sp->so_server) {
- p = &parent->rb_left;
- continue;
- }
- if (new->so_server > sp->so_server) {
- p = &parent->rb_right;
- continue;
- }
if (new->so_cred < sp->so_cred)
p = &parent->rb_left;
else if (new->so_cred > sp->so_cred)
@@ -346,18 +412,21 @@ nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
return sp;
}
}
- nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
- rb_link_node(&new->so_client_node, parent, p);
- rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
+ nfs_alloc_unique_id_locked(&server->openowner_id,
+ &new->so_owner_id, 1, 64);
+ rb_link_node(&new->so_server_node, parent, p);
+ rb_insert_color(&new->so_server_node, &server->state_owners);
return new;
}
static void
-nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
+nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
{
- if (!RB_EMPTY_NODE(&sp->so_client_node))
- rb_erase(&sp->so_client_node, &clp->cl_state_owners);
- nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
+ struct nfs_server *server = sp->so_server;
+
+ if (!RB_EMPTY_NODE(&sp->so_server_node))
+ rb_erase(&sp->so_server_node, &server->state_owners);
+ nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id);
}
/*
@@ -386,23 +455,32 @@ nfs4_alloc_state_owner(void)
static void
nfs4_drop_state_owner(struct nfs4_state_owner *sp)
{
- if (!RB_EMPTY_NODE(&sp->so_client_node)) {
- struct nfs_client *clp = sp->so_server->nfs_client;
+ if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+ struct nfs_server *server = sp->so_server;
+ struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
- rb_erase(&sp->so_client_node, &clp->cl_state_owners);
- RB_CLEAR_NODE(&sp->so_client_node);
+ rb_erase(&sp->so_server_node, &server->state_owners);
+ RB_CLEAR_NODE(&sp->so_server_node);
spin_unlock(&clp->cl_lock);
}
}
-struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+/**
+ * nfs4_get_state_owner - Look up a state owner given a credential
+ * @server: nfs_server to search
+ * @cred: RPC credential to match
+ *
+ * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
+ */
+struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
+ struct rpc_cred *cred)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp, *new;
spin_lock(&clp->cl_lock);
- sp = nfs4_find_state_owner(server, cred);
+ sp = nfs4_find_state_owner_locked(server, cred);
spin_unlock(&clp->cl_lock);
if (sp != NULL)
return sp;
@@ -412,7 +490,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
new->so_server = server;
new->so_cred = cred;
spin_lock(&clp->cl_lock);
- sp = nfs4_insert_state_owner(clp, new);
+ sp = nfs4_insert_state_owner_locked(new);
spin_unlock(&clp->cl_lock);
if (sp == new)
get_rpccred(cred);
@@ -423,6 +501,11 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
return sp;
}
+/**
+ * nfs4_put_state_owner - Release a nfs4_state_owner
+ * @sp: state owner data to release
+ *
+ */
void nfs4_put_state_owner(struct nfs4_state_owner *sp)
{
struct nfs_client *clp = sp->so_server->nfs_client;
@@ -430,7 +513,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
return;
- nfs4_remove_state_owner(clp, sp);
+ nfs4_remove_state_owner_locked(sp);
spin_unlock(&clp->cl_lock);
rpc_destroy_wait_queue(&sp->so_sequence.wait);
put_rpccred(cred);
@@ -585,8 +668,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
if (!call_close) {
nfs4_put_open_state(state);
nfs4_put_state_owner(owner);
- } else
- nfs4_do_close(path, state, gfp_mask, wait);
+ } else {
+ bool roc = pnfs_roc(state->inode);
+
+ nfs4_do_close(path, state, gfp_mask, wait, roc);
+ }
}
void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
@@ -633,7 +719,8 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p
static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
{
struct nfs4_lock_state *lsp;
- struct nfs_client *clp = state->owner->so_server->nfs_client;
+ struct nfs_server *server = state->owner->so_server;
+ struct nfs_client *clp = server->nfs_client;
lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
if (lsp == NULL)
@@ -657,7 +744,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
return NULL;
}
spin_lock(&clp->cl_lock);
- nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+ nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64);
spin_unlock(&clp->cl_lock);
INIT_LIST_HEAD(&lsp->ls_locks);
return lsp;
@@ -665,10 +752,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
{
- struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+ struct nfs_server *server = lsp->ls_state->owner->so_server;
+ struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
- nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+ nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
spin_unlock(&clp->cl_lock);
rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
kfree(lsp);
@@ -1114,15 +1202,19 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
}
}
-static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
+static void nfs4_reset_seqids(struct nfs_server *server,
+ int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
{
+ struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp;
struct rb_node *pos;
struct nfs4_state *state;
- /* Reset all sequence ids to zero */
- for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
- sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ spin_lock(&clp->cl_lock);
+ for (pos = rb_first(&server->state_owners);
+ pos != NULL;
+ pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
sp->so_seqid.flags = 0;
spin_lock(&sp->so_lock);
list_for_each_entry(state, &sp->so_states, open_states) {
@@ -1131,6 +1223,18 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_re
}
spin_unlock(&sp->so_lock);
}
+ spin_unlock(&clp->cl_lock);
+}
+
+static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
+ int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
+{
+ struct nfs_server *server;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs4_reset_seqids(server, mark_reclaim);
+ rcu_read_unlock();
}
static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
@@ -1148,25 +1252,41 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
(void)ops->reclaim_complete(clp);
}
-static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
+static void nfs4_clear_reclaim_server(struct nfs_server *server)
{
+ struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp;
struct rb_node *pos;
struct nfs4_state *state;
- if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
- return 0;
-
- for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
- sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ spin_lock(&clp->cl_lock);
+ for (pos = rb_first(&server->state_owners);
+ pos != NULL;
+ pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
spin_lock(&sp->so_lock);
list_for_each_entry(state, &sp->so_states, open_states) {
- if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags))
+ if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
+ &state->flags))
continue;
nfs4_state_mark_reclaim_nograce(clp, state);
}
spin_unlock(&sp->so_lock);
}
+ spin_unlock(&clp->cl_lock);
+}
+
+static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
+{
+ struct nfs_server *server;
+
+ if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ return 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs4_clear_reclaim_server(server);
+ rcu_read_unlock();
nfs_delegation_reap_unclaimed(clp);
return 1;
@@ -1238,27 +1358,40 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
{
+ struct nfs4_state_owner *sp;
+ struct nfs_server *server;
struct rb_node *pos;
int status = 0;
restart:
- spin_lock(&clp->cl_lock);
- for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
- struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
- if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags))
- continue;
- atomic_inc(&sp->so_count);
- spin_unlock(&clp->cl_lock);
- status = nfs4_reclaim_open_state(sp, ops);
- if (status < 0) {
- set_bit(ops->owner_flag_bit, &sp->so_flags);
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ spin_lock(&clp->cl_lock);
+ for (pos = rb_first(&server->state_owners);
+ pos != NULL;
+ pos = rb_next(pos)) {
+ sp = rb_entry(pos,
+ struct nfs4_state_owner, so_server_node);
+ if (!test_and_clear_bit(ops->owner_flag_bit,
+ &sp->so_flags))
+ continue;
+ atomic_inc(&sp->so_count);
+ spin_unlock(&clp->cl_lock);
+ rcu_read_unlock();
+
+ status = nfs4_reclaim_open_state(sp, ops);
+ if (status < 0) {
+ set_bit(ops->owner_flag_bit, &sp->so_flags);
+ nfs4_put_state_owner(sp);
+ return nfs4_recovery_handle_error(clp, status);
+ }
+
nfs4_put_state_owner(sp);
- return nfs4_recovery_handle_error(clp, status);
+ goto restart;
}
- nfs4_put_state_owner(sp);
- goto restart;
+ spin_unlock(&clp->cl_lock);
}
- spin_unlock(&clp->cl_lock);
+ rcu_read_unlock();
return status;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 9f1826b..2ab8e5c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -71,8 +71,8 @@ static int nfs4_stat_to_errno(int);
/* lock,open owner id:
* we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
*/
-#define open_owner_id_maxsz (1 + 4)
-#define lock_owner_id_maxsz (1 + 4)
+#define open_owner_id_maxsz (1 + 1 + 4)
+#define lock_owner_id_maxsz (1 + 1 + 4)
#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
@@ -1088,10 +1088,11 @@ static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lo
{
__be32 *p;
- p = reserve_space(xdr, 28);
+ p = reserve_space(xdr, 32);
p = xdr_encode_hyper(p, lowner->clientid);
- *p++ = cpu_to_be32(16);
+ *p++ = cpu_to_be32(20);
p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+ *p++ = cpu_to_be32(lowner->s_dev);
xdr_encode_hyper(p, lowner->id);
}
@@ -1210,10 +1211,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
*p++ = cpu_to_be32(OP_OPEN);
*p = cpu_to_be32(arg->seqid->sequence->counter);
encode_share_access(xdr, arg->fmode);
- p = reserve_space(xdr, 28);
+ p = reserve_space(xdr, 32);
p = xdr_encode_hyper(p, arg->clientid);
- *p++ = cpu_to_be32(16);
+ *p++ = cpu_to_be32(20);
p = xdr_encode_opaque_fixed(p, "open id:", 8);
+ *p++ = cpu_to_be32(arg->server->s_dev);
xdr_encode_hyper(p, arg->id);
}
@@ -1510,7 +1512,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
hdr->replen += decode_restorefh_maxsz;
}
-static int
+static void
encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
{
__be32 *p;
@@ -1521,14 +1523,12 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
p = reserve_space(xdr, 2*4);
*p++ = cpu_to_be32(1);
*p = cpu_to_be32(FATTR4_WORD0_ACL);
- if (arg->acl_len % 4)
- return -EINVAL;
+ BUG_ON(arg->acl_len % 4);
p = reserve_space(xdr, 4);
*p = cpu_to_be32(arg->acl_len);
xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
hdr->nops++;
hdr->replen += decode_setacl_maxsz;
- return 0;
}
static void
@@ -1789,7 +1789,6 @@ encode_layoutget(struct xdr_stream *xdr,
const struct nfs4_layoutget_args *args,
struct compound_hdr *hdr)
{
- nfs4_stateid stateid;
__be32 *p;
p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
@@ -1800,9 +1799,7 @@ encode_layoutget(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, args->range.offset);
p = xdr_encode_hyper(p, args->range.length);
p = xdr_encode_hyper(p, args->minlength);
- pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
- args->ctx->state);
- p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(args->maxcount);
dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
@@ -1833,393 +1830,362 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
/*
* Encode an ACCESS request
*/
-static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs4_accessargs *args)
+static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_accessargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_access(&xdr, args->access, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_access(xdr, args->access, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode LOOKUP request
*/
-static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_arg *args)
+static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_lookup_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->dir_fh, &hdr);
- encode_lookup(&xdr, args->name, &hdr);
- encode_getfh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_lookup(xdr, args->name, &hdr);
+ encode_getfh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode LOOKUP_ROOT request
*/
-static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_root_arg *args)
+static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs4_lookup_root_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putrootfh(&xdr, &hdr);
- encode_getfh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putrootfh(xdr, &hdr);
+ encode_getfh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode REMOVE request
*/
-static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
+static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs_removeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_remove(&xdr, &args->name, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_remove(xdr, &args->name, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode RENAME request
*/
-static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
+static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs_renameargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->old_dir, &hdr);
- encode_savefh(&xdr, &hdr);
- encode_putfh(&xdr, args->new_dir, &hdr);
- encode_rename(&xdr, args->old_name, args->new_name, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
- encode_restorefh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->old_dir, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_putfh(xdr, args->new_dir, &hdr);
+ encode_rename(xdr, args->old_name, args->new_name, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_restorefh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode LINK request
*/
-static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_link_arg *args)
+static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_link_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_savefh(&xdr, &hdr);
- encode_putfh(&xdr, args->dir_fh, &hdr);
- encode_link(&xdr, args->name, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
- encode_restorefh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_link(xdr, args->name, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_restorefh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode CREATE request
*/
-static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
+static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_create_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->dir_fh, &hdr);
- encode_savefh(&xdr, &hdr);
- encode_create(&xdr, args, &hdr);
- encode_getfh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
- encode_restorefh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_create(xdr, args, &hdr);
+ encode_getfh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_restorefh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode SYMLINK request
*/
-static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
+static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_create_arg *args)
{
- return nfs4_xdr_enc_create(req, p, args);
+ nfs4_xdr_enc_create(req, xdr, args);
}
/*
* Encode GETATTR request
*/
-static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nfs4_getattr_arg *args)
+static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_getattr_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a CLOSE request
*/
-static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
+static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_closeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_close(&xdr, args, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_close(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode an OPEN request
*/
-static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
+static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_openargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_savefh(&xdr, &hdr);
- encode_open(&xdr, args, &hdr);
- encode_getfh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
- encode_restorefh(&xdr, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_open(xdr, args, &hdr);
+ encode_getfh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_restorefh(xdr, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode an OPEN_CONFIRM request
*/
-static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_open_confirmargs *args)
+static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_open_confirmargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 0,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_open_confirm(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_open_confirm(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode an OPEN request with no attributes.
*/
-static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
+static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_openargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_open(&xdr, args, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_open(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode an OPEN_DOWNGRADE request
*/
-static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
+static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_closeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_open_downgrade(&xdr, args, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_open_downgrade(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a LOCK request
*/
-static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_args *args)
+static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_lock_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_lock(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_lock(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a LOCKT request
*/
-static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_args *args)
+static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_lockt_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_lockt(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_lockt(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a LOCKU request
*/
-static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_args *args)
+static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_locku_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_locku(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_locku(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
-static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
+static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_release_lockowner_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = 0,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_release_lockowner(xdr, &args->lock_owner, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a READLINK request
*/
-static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_readlink *args)
+static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_readlink *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_readlink(&xdr, args, req, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_readlink(xdr, args, req, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
args->pgbase, args->pglen);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a READDIR request
*/
-static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nfs4_readdir_arg *args)
+static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_readdir_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_readdir(&xdr, args, req, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_readdir(xdr, args, req, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
args->pgbase, args->count);
@@ -2227,428 +2193,387 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
__func__, hdr.replen << 2, args->pages,
args->pgbase, args->count);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a READ request
*/
-static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_readargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_read(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_read(xdr, args, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
args->pages, args->pgbase, args->count);
req->rq_rcv_buf.flags |= XDRBUF_READ;
encode_nops(&hdr);
- return 0;
}
/*
* Encode an SETATTR request
*/
-static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args)
+static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_setattrargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_setattr(&xdr, args, args->server, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_setattr(xdr, args, args->server, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a GETACL request
*/
-static int
-nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
- struct nfs_getaclargs *args)
+static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_getaclargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
uint32_t replen;
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
- encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr);
+ encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
args->acl_pages, args->acl_pgbase, args->acl_len);
encode_nops(&hdr);
- return 0;
}
/*
* Encode a WRITE request
*/
-static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_writeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_write(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_write(xdr, args, &hdr);
req->rq_snd_buf.flags |= XDRBUF_WRITE;
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a COMMIT request
*/
-static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_writeargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_commit(&xdr, args, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_commit(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* FSINFO request
*/
-static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_arg *args)
+static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs4_fsinfo_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_fsinfo(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_fsinfo(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a PATHCONF request
*/
-static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct nfs4_pathconf_arg *args)
+static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_pathconf_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
&hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a STATFS request
*/
-static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs4_statfs_arg *args)
+static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfs4_statfs_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* GETATTR_BITMAP request
*/
-static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p,
- struct nfs4_server_caps_arg *args)
+static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_server_caps_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fhandle, &hdr);
- encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fhandle, &hdr);
+ encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
FATTR4_WORD0_LINK_SUPPORT|
FATTR4_WORD0_SYMLINK_SUPPORT|
FATTR4_WORD0_ACLSUPPORT, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a RENEW request
*/
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
+static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_client *clp)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 0,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_renew(&xdr, clp, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_renew(xdr, clp, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a SETCLIENTID request
*/
-static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid *sc)
+static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_setclientid *sc)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 0,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_setclientid(&xdr, sc, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_setclientid(xdr, sc, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a SETCLIENTID_CONFIRM request
*/
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
+static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_setclientid_res *arg)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 0,
};
const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_setclientid_confirm(&xdr, arg, &hdr);
- encode_putrootfh(&xdr, &hdr);
- encode_fsinfo(&xdr, lease_bitmap, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_setclientid_confirm(xdr, arg, &hdr);
+ encode_putrootfh(xdr, &hdr);
+ encode_fsinfo(xdr, lease_bitmap, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* DELEGRETURN request
*/
-static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struct nfs4_delegreturnargs *args)
+static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfs4_delegreturnargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fhandle, &hdr);
- encode_delegreturn(&xdr, args->stateid, &hdr);
- encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fhandle, &hdr);
+ encode_delegreturn(xdr, args->stateid, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode FS_LOCATIONS request
*/
-static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_arg *args)
+static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_fs_locations_arg *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
uint32_t replen;
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->dir_fh, &hdr);
- encode_lookup(&xdr, args->name, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_lookup(xdr, args->name, &hdr);
replen = hdr.replen; /* get the attribute into args->page */
- encode_fs_locations(&xdr, args->bitmask, &hdr);
+ encode_fs_locations(xdr, args->bitmask, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
0, PAGE_SIZE);
encode_nops(&hdr);
- return 0;
}
#if defined(CONFIG_NFS_V4_1)
/*
* EXCHANGE_ID request
*/
-static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
- struct nfs41_exchange_id_args *args)
+static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs41_exchange_id_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = args->client->cl_mvops->minor_version,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_exchange_id(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_exchange_id(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a CREATE_SESSION request
*/
-static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p,
- struct nfs41_create_session_args *args)
+static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs41_create_session_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = args->client->cl_mvops->minor_version,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_create_session(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_create_session(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a DESTROY_SESSION request
*/
-static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_session *session)
+static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_session *session)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = session->clp->cl_mvops->minor_version,
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_destroy_session(&xdr, session, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_destroy_session(xdr, session, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a SEQUENCE request
*/
-static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_sequence_args *args)
+static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs4_sequence_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a GET_LEASE_TIME request
*/
-static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_get_lease_time_args *args)
+static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_get_lease_time_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
};
const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->la_seq_args, &hdr);
- encode_putrootfh(&xdr, &hdr);
- encode_fsinfo(&xdr, lease_bitmap, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->la_seq_args, &hdr);
+ encode_putrootfh(xdr, &hdr);
+ encode_fsinfo(xdr, lease_bitmap, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* a RECLAIM_COMPLETE request
*/
-static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
- struct nfs41_reclaim_complete_args *args)
+static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs41_reclaim_complete_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args)
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_reclaim_complete(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_reclaim_complete(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
/*
* Encode GETDEVICEINFO request
*/
-static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_getdeviceinfo_args *args)
+static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_getdeviceinfo_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_getdeviceinfo(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_getdeviceinfo(xdr, args, &hdr);
/* set up reply kvec. Subtract notification bitmap max size (2)
* so that notification bitmap is put in xdr_buf tail */
@@ -2657,27 +2582,24 @@ static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
args->pdev->pglen);
encode_nops(&hdr);
- return 0;
}
/*
* Encode LAYOUTGET request
*/
-static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_layoutget_args *args)
+static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_layoutget_args *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
- encode_layoutget(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+ encode_layoutget(xdr, args, &hdr);
encode_nops(&hdr);
- return 0;
}
#endif /* CONFIG_NFS_V4_1 */
@@ -4475,7 +4397,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
goto out_overflow;
eof = be32_to_cpup(p++);
count = be32_to_cpup(p);
- hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+ hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
recvd = req->rq_rcv_buf.len - hdrlen;
if (count > recvd) {
dprintk("NFS: server cheating in read reply: "
@@ -5000,7 +4922,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
goto out_overflow;
len = be32_to_cpup(p);
if (len) {
- int i;
+ uint32_t i;
p = xdr_inline_decode(xdr, 4 * len);
if (unlikely(!p))
@@ -5090,26 +5012,26 @@ out_overflow:
/*
* Decode OPEN_DOWNGRADE response
*/
-static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs_closeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_open_downgrade(&xdr, res);
+ status = decode_open_downgrade(xdr, res);
if (status != 0)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5118,26 +5040,25 @@ out:
/*
* Decode ACCESS response
*/
-static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res)
+static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_accessres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status != 0)
goto out;
- status = decode_access(&xdr, res);
+ status = decode_access(xdr, res);
if (status != 0)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5146,26 +5067,28 @@ out:
/*
* Decode LOOKUP response
*/
-static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_lookup_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_lookup(&xdr)) != 0)
+ status = decode_lookup(xdr);
+ if (status)
goto out;
- if ((status = decode_getfh(&xdr, res->fh)) != 0)
+ status = decode_getfh(xdr, res->fh);
+ if (status)
goto out;
- status = decode_getfattr(&xdr, res->fattr, res->server
+ status = decode_getfattr(xdr, res->fattr, res->server
,!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5174,23 +5097,25 @@ out:
/*
* Decode LOOKUP_ROOT response
*/
-static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs4_lookup_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putrootfh(&xdr)) != 0)
+ status = decode_putrootfh(xdr);
+ if (status)
goto out;
- if ((status = decode_getfh(&xdr, res->fh)) == 0)
- status = decode_getfattr(&xdr, res->fattr, res->server,
+ status = decode_getfh(xdr, res->fh);
+ if (status == 0)
+ status = decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5199,24 +5124,25 @@ out:
/*
* Decode REMOVE response
*/
-static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_removeres *res)
+static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_removeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
+ status = decode_remove(xdr, &res->cinfo);
+ if (status)
goto out;
- decode_getfattr(&xdr, res->dir_attr, res->server,
+ decode_getfattr(xdr, res->dir_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5225,34 +5151,38 @@ out:
/*
* Decode RENAME response
*/
-static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
+static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_renameres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_savefh(&xdr)) != 0)
+ status = decode_savefh(xdr);
+ if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
+ status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
+ if (status)
goto out;
/* Current FH is target directory */
- if (decode_getfattr(&xdr, res->new_fattr, res->server,
+ if (decode_getfattr(xdr, res->new_fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
- if ((status = decode_restorefh(&xdr)) != 0)
+ status = decode_restorefh(xdr);
+ if (status)
goto out;
- decode_getfattr(&xdr, res->old_fattr, res->server,
+ decode_getfattr(xdr, res->old_fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5261,37 +5191,41 @@ out:
/*
* Decode LINK response
*/
-static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link_res *res)
+static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_link_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_savefh(&xdr)) != 0)
+ status = decode_savefh(xdr);
+ if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_link(&xdr, &res->cinfo)) != 0)
+ status = decode_link(xdr, &res->cinfo);
+ if (status)
goto out;
/*
* Note order: OP_LINK leaves the directory as the current
* filehandle.
*/
- if (decode_getfattr(&xdr, res->dir_attr, res->server,
+ if (decode_getfattr(xdr, res->dir_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
- if ((status = decode_restorefh(&xdr)) != 0)
+ status = decode_restorefh(xdr);
+ if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5300,33 +5234,37 @@ out:
/*
* Decode CREATE response
*/
-static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_create_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_savefh(&xdr)) != 0)
+ status = decode_savefh(xdr);
+ if (status)
goto out;
- if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
+ status = decode_create(xdr, &res->dir_cinfo);
+ if (status)
goto out;
- if ((status = decode_getfh(&xdr, res->fh)) != 0)
+ status = decode_getfh(xdr, res->fh);
+ if (status)
goto out;
- if (decode_getfattr(&xdr, res->fattr, res->server,
+ if (decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
- if ((status = decode_restorefh(&xdr)) != 0)
+ status = decode_restorefh(xdr);
+ if (status)
goto out;
- decode_getfattr(&xdr, res->dir_fattr, res->server,
+ decode_getfattr(xdr, res->dir_fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5335,31 +5273,31 @@ out:
/*
* Decode SYMLINK response
*/
-static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_create_res *res)
{
- return nfs4_xdr_dec_create(rqstp, p, res);
+ return nfs4_xdr_dec_create(rqstp, xdr, res);
}
/*
* Decode GETATTR response
*/
-static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_getattr_res *res)
+static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_getattr_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_getfattr(&xdr, res->fattr, res->server,
+ status = decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5368,46 +5306,40 @@ out:
/*
* Encode an SETACL request
*/
-static int
-nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args)
+static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
+ struct nfs_setaclargs *args)
{
- struct xdr_stream xdr;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- int status;
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_compound_hdr(&xdr, req, &hdr);
- encode_sequence(&xdr, &args->seq_args, &hdr);
- encode_putfh(&xdr, args->fh, &hdr);
- status = encode_setacl(&xdr, args, &hdr);
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_setacl(xdr, args, &hdr);
encode_nops(&hdr);
- return status;
}
/*
* Decode SETACL response
*/
static int
-nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p,
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
struct nfs_setaclres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_setattr(&xdr);
+ status = decode_setattr(xdr);
out:
return status;
}
@@ -5416,24 +5348,22 @@ out:
* Decode GETACL response
*/
static int
-nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p,
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
struct nfs_getaclres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_getacl(&xdr, rqstp, &res->acl_len);
+ status = decode_getacl(xdr, rqstp, &res->acl_len);
out:
return status;
@@ -5442,23 +5372,22 @@ out:
/*
* Decode CLOSE response
*/
-static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_closeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_close(&xdr, res);
+ status = decode_close(xdr, res);
if (status != 0)
goto out;
/*
@@ -5467,7 +5396,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
* an ESTALE error. Shouldn't be a problem,
* though, since fattr->valid will remain unset.
*/
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5476,36 +5405,35 @@ out:
/*
* Decode OPEN response
*/
-static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_openres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_savefh(&xdr);
+ status = decode_savefh(xdr);
if (status)
goto out;
- status = decode_open(&xdr, res);
+ status = decode_open(xdr, res);
if (status)
goto out;
- if (decode_getfh(&xdr, &res->fh) != 0)
+ if (decode_getfh(xdr, &res->fh) != 0)
goto out;
- if (decode_getfattr(&xdr, res->f_attr, res->server,
+ if (decode_getfattr(xdr, res->f_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
- if (decode_restorefh(&xdr) != 0)
+ if (decode_restorefh(xdr) != 0)
goto out;
- decode_getfattr(&xdr, res->dir_attr, res->server,
+ decode_getfattr(xdr, res->dir_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5514,20 +5442,20 @@ out:
/*
* Decode OPEN_CONFIRM response
*/
-static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res)
+static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs_open_confirmres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_open_confirm(&xdr, res);
+ status = decode_open_confirm(xdr, res);
out:
return status;
}
@@ -5535,26 +5463,26 @@ out:
/*
* Decode OPEN response
*/
-static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs_openres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_open(&xdr, res);
+ status = decode_open(xdr, res);
if (status)
goto out;
- decode_getfattr(&xdr, res->f_attr, res->server,
+ decode_getfattr(xdr, res->f_attr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5563,26 +5491,26 @@ out:
/*
* Decode SETATTR response
*/
-static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res)
+static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs_setattrres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_setattr(&xdr);
+ status = decode_setattr(xdr);
if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5591,23 +5519,22 @@ out:
/*
* Decode LOCK response
*/
-static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_res *res)
+static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_lock_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_lock(&xdr, res);
+ status = decode_lock(xdr, res);
out:
return status;
}
@@ -5615,23 +5542,22 @@ out:
/*
* Decode LOCKT response
*/
-static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lockt_res *res)
+static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_lockt_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_lockt(&xdr, res);
+ status = decode_lockt(xdr, res);
out:
return status;
}
@@ -5639,61 +5565,58 @@ out:
/*
* Decode LOCKU response
*/
-static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_locku_res *res)
+static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_locku_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_locku(&xdr, res);
+ status = decode_locku(xdr, res);
out:
return status;
}
-static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr, void *dummy)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_release_lockowner(&xdr);
+ status = decode_release_lockowner(xdr);
return status;
}
/*
* Decode READLINK response
*/
-static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p,
+static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_readlink_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_readlink(&xdr, rqstp);
+ status = decode_readlink(xdr, rqstp);
out:
return status;
}
@@ -5701,23 +5624,22 @@ out:
/*
* Decode READDIR response
*/
-static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_readdir_res *res)
+static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs4_readdir_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_readdir(&xdr, rqstp, res);
+ status = decode_readdir(xdr, rqstp, res);
out:
return status;
}
@@ -5725,23 +5647,22 @@ out:
/*
* Decode Read response
*/
-static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readres *res)
+static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_readres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_read(&xdr, rqstp, res);
+ status = decode_read(xdr, rqstp, res);
if (!status)
status = res->count;
out:
@@ -5751,26 +5672,25 @@ out:
/*
* Decode WRITE response
*/
-static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_writeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_write(&xdr, res);
+ status = decode_write(xdr, res);
if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
if (!status)
status = res->count;
@@ -5781,26 +5701,25 @@ out:
/*
* Decode COMMIT response
*/
-static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfs_writeres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_commit(&xdr, res);
+ status = decode_commit(xdr, res);
if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5809,85 +5728,80 @@ out:
/*
* Decode FSINFO response
*/
-static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
struct nfs4_fsinfo_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (!status)
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (!status)
- status = decode_fsinfo(&xdr, res->fsinfo);
+ status = decode_fsinfo(xdr, res->fsinfo);
return status;
}
/*
* Decode PATHCONF response
*/
-static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
struct nfs4_pathconf_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (!status)
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (!status)
- status = decode_pathconf(&xdr, res->pathconf);
+ status = decode_pathconf(xdr, res->pathconf);
return status;
}
/*
* Decode STATFS response
*/
-static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
struct nfs4_statfs_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (!status)
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (!status)
- status = decode_statfs(&xdr, res->fsstat);
+ status = decode_statfs(xdr, res->fsstat);
return status;
}
/*
* Decode GETATTR_BITMAP response
*/
-static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res)
+static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_server_caps_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- status = decode_server_caps(&xdr, res);
+ status = decode_server_caps(xdr, res);
out:
return status;
}
@@ -5895,79 +5809,77 @@ out:
/*
* Decode RENEW response
*/
-static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ void *__unused)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_renew(&xdr);
+ status = decode_renew(xdr);
return status;
}
/*
* Decode SETCLIENTID response
*/
-static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
- struct nfs4_setclientid_res *res)
+static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_setclientid_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_setclientid(&xdr, res);
+ status = decode_setclientid(xdr, res);
return status;
}
/*
* Decode SETCLIENTID_CONFIRM response
*/
-static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo)
+static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_fsinfo *fsinfo)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_setclientid_confirm(&xdr);
+ status = decode_setclientid_confirm(xdr);
if (!status)
- status = decode_putrootfh(&xdr);
+ status = decode_putrootfh(xdr);
if (!status)
- status = decode_fsinfo(&xdr, fsinfo);
+ status = decode_fsinfo(xdr, fsinfo);
return status;
}
/*
* Decode DELEGRETURN response
*/
-static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res)
+static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs4_delegreturnres *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status != 0)
goto out;
- status = decode_delegreturn(&xdr);
+ status = decode_delegreturn(xdr);
if (status != 0)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server,
+ decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
@@ -5976,26 +5888,27 @@ out:
/*
* Decode FS_LOCATIONS response
*/
-static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
struct nfs4_fs_locations_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, req);
+ status = decode_sequence(xdr, &res->seq_res, req);
if (status)
goto out;
- if ((status = decode_putfh(&xdr)) != 0)
+ status = decode_putfh(xdr);
+ if (status)
goto out;
- if ((status = decode_lookup(&xdr)) != 0)
+ status = decode_lookup(xdr);
+ if (status)
goto out;
- xdr_enter_page(&xdr, PAGE_SIZE);
- status = decode_getfattr(&xdr, &res->fs_locations->fattr,
+ xdr_enter_page(xdr, PAGE_SIZE);
+ status = decode_getfattr(xdr, &res->fs_locations->fattr,
res->fs_locations->server,
!RPC_IS_ASYNC(req->rq_task));
out:
@@ -6006,129 +5919,122 @@ out:
/*
* Decode EXCHANGE_ID response
*/
-static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
void *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_exchange_id(&xdr, res);
+ status = decode_exchange_id(xdr, res);
return status;
}
/*
* Decode CREATE_SESSION response
*/
-static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs41_create_session_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_create_session(&xdr, res);
+ status = decode_create_session(xdr, res);
return status;
}
/*
* Decode DESTROY_SESSION response
*/
-static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p,
- void *dummy)
+static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_destroy_session(&xdr, dummy);
+ status = decode_destroy_session(xdr, res);
return status;
}
/*
* Decode SEQUENCE response
*/
-static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_sequence_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, res, rqstp);
+ status = decode_sequence(xdr, res, rqstp);
return status;
}
/*
* Decode GET_LEASE_TIME response
*/
-static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_get_lease_time_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->lr_seq_res, rqstp);
+ status = decode_sequence(xdr, &res->lr_seq_res, rqstp);
if (!status)
- status = decode_putrootfh(&xdr);
+ status = decode_putrootfh(xdr);
if (!status)
- status = decode_fsinfo(&xdr, res->lr_fsinfo);
+ status = decode_fsinfo(xdr, res->lr_fsinfo);
return status;
}
/*
* Decode RECLAIM_COMPLETE response
*/
-static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs41_reclaim_complete_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (!status)
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (!status)
- status = decode_reclaim_complete(&xdr, (void *)NULL);
+ status = decode_reclaim_complete(xdr, (void *)NULL);
return status;
}
/*
* Decode GETDEVINFO response
*/
-static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_getdeviceinfo_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status != 0)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status != 0)
goto out;
- status = decode_getdeviceinfo(&xdr, res->pdev);
+ status = decode_getdeviceinfo(xdr, res->pdev);
out:
return status;
}
@@ -6136,31 +6042,44 @@ out:
/*
* Decode LAYOUTGET response
*/
-static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
struct nfs4_layoutget_res *res)
{
- struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_compound_hdr(&xdr, &hdr);
+ status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
- status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
- status = decode_putfh(&xdr);
+ status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_layoutget(&xdr, rqstp, res);
+ status = decode_layoutget(xdr, rqstp, res);
out:
return status;
}
#endif /* CONFIG_NFS_V4_1 */
-__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
- struct nfs_server *server, int plus)
+/**
+ * nfs4_decode_dirent - Decode a single NFSv4 directory entry stored in
+ * the local page cache.
+ * @xdr: XDR stream where entry resides
+ * @entry: buffer to fill in with entry data
+ * @plus: boolean indicating whether this should be a readdirplus entry
+ *
+ * Returns zero if successful, otherwise a negative errno value is
+ * returned.
+ *
+ * This function is not invoked during READDIR reply decoding, but
+ * rather whenever an application invokes the getdents(2) system call
+ * on a directory already in our cache.
+ */
+int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ int plus)
{
uint32_t bitmap[2] = {0};
uint32_t len;
@@ -6172,9 +6091,9 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (unlikely(!p))
goto out_overflow;
if (!ntohl(*p++))
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
entry->eof = 1;
- return ERR_PTR(-EBADCOOKIE);
+ return -EBADCOOKIE;
}
p = xdr_inline_decode(xdr, 12);
@@ -6203,7 +6122,8 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (decode_attr_length(xdr, &len, &p) < 0)
goto out_overflow;
- if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
+ if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
+ entry->server, 1) < 0)
goto out_overflow;
if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
entry->ino = entry->fattr->fileid;
@@ -6215,17 +6135,11 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (verify_attr_len(xdr, p, len) < 0)
goto out_overflow;
- p = xdr_inline_peek(xdr, 8);
- if (p != NULL)
- entry->eof = !p[0] && p[1];
- else
- entry->eof = 0;
-
- return p;
+ return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
}
/*
@@ -6301,8 +6215,8 @@ nfs4_stat_to_errno(int stat)
#define PROC(proc, argtype, restype) \
[NFSPROC4_CLNT_##proc] = { \
.p_proc = NFSPROC4_COMPOUND, \
- .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
- .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
+ .p_encode = (kxdreproc_t)nfs4_xdr_##argtype, \
+ .p_decode = (kxdrdproc_t)nfs4_xdr_##restype, \
.p_arglen = NFS4_##argtype##_sz, \
.p_replen = NFS4_##restype##_sz, \
.p_statidx = NFSPROC4_CLNT_##proc, \
@@ -6310,50 +6224,50 @@ nfs4_stat_to_errno(int stat)
}
struct rpc_procinfo nfs4_procedures[] = {
- PROC(READ, enc_read, dec_read),
- PROC(WRITE, enc_write, dec_write),
- PROC(COMMIT, enc_commit, dec_commit),
- PROC(OPEN, enc_open, dec_open),
- PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm),
- PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr),
- PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade),
- PROC(CLOSE, enc_close, dec_close),
- PROC(SETATTR, enc_setattr, dec_setattr),
- PROC(FSINFO, enc_fsinfo, dec_fsinfo),
- PROC(RENEW, enc_renew, dec_renew),
- PROC(SETCLIENTID, enc_setclientid, dec_setclientid),
- PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm),
- PROC(LOCK, enc_lock, dec_lock),
- PROC(LOCKT, enc_lockt, dec_lockt),
- PROC(LOCKU, enc_locku, dec_locku),
- PROC(ACCESS, enc_access, dec_access),
- PROC(GETATTR, enc_getattr, dec_getattr),
- PROC(LOOKUP, enc_lookup, dec_lookup),
- PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
- PROC(REMOVE, enc_remove, dec_remove),
- PROC(RENAME, enc_rename, dec_rename),
- PROC(LINK, enc_link, dec_link),
- PROC(SYMLINK, enc_symlink, dec_symlink),
- PROC(CREATE, enc_create, dec_create),
- PROC(PATHCONF, enc_pathconf, dec_pathconf),
- PROC(STATFS, enc_statfs, dec_statfs),
- PROC(READLINK, enc_readlink, dec_readlink),
- PROC(READDIR, enc_readdir, dec_readdir),
- PROC(SERVER_CAPS, enc_server_caps, dec_server_caps),
- PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
- PROC(GETACL, enc_getacl, dec_getacl),
- PROC(SETACL, enc_setacl, dec_setacl),
- PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
- PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
+ PROC(READ, enc_read, dec_read),
+ PROC(WRITE, enc_write, dec_write),
+ PROC(COMMIT, enc_commit, dec_commit),
+ PROC(OPEN, enc_open, dec_open),
+ PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm),
+ PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr),
+ PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade),
+ PROC(CLOSE, enc_close, dec_close),
+ PROC(SETATTR, enc_setattr, dec_setattr),
+ PROC(FSINFO, enc_fsinfo, dec_fsinfo),
+ PROC(RENEW, enc_renew, dec_renew),
+ PROC(SETCLIENTID, enc_setclientid, dec_setclientid),
+ PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm),
+ PROC(LOCK, enc_lock, dec_lock),
+ PROC(LOCKT, enc_lockt, dec_lockt),
+ PROC(LOCKU, enc_locku, dec_locku),
+ PROC(ACCESS, enc_access, dec_access),
+ PROC(GETATTR, enc_getattr, dec_getattr),
+ PROC(LOOKUP, enc_lookup, dec_lookup),
+ PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
+ PROC(REMOVE, enc_remove, dec_remove),
+ PROC(RENAME, enc_rename, dec_rename),
+ PROC(LINK, enc_link, dec_link),
+ PROC(SYMLINK, enc_symlink, dec_symlink),
+ PROC(CREATE, enc_create, dec_create),
+ PROC(PATHCONF, enc_pathconf, dec_pathconf),
+ PROC(STATFS, enc_statfs, dec_statfs),
+ PROC(READLINK, enc_readlink, dec_readlink),
+ PROC(READDIR, enc_readdir, dec_readdir),
+ PROC(SERVER_CAPS, enc_server_caps, dec_server_caps),
+ PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
+ PROC(GETACL, enc_getacl, dec_getacl),
+ PROC(SETACL, enc_setacl, dec_setacl),
+ PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
+ PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
#if defined(CONFIG_NFS_V4_1)
- PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
- PROC(CREATE_SESSION, enc_create_session, dec_create_session),
- PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
- PROC(SEQUENCE, enc_sequence, dec_sequence),
- PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
- PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
- PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
- PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
+ PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
+ PROC(CREATE_SESSION, enc_create_session, dec_create_session),
+ PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
+ PROC(SEQUENCE, enc_sequence, dec_sequence),
+ PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
+ PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
+ PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
+ PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
#endif /* CONFIG_NFS_V4_1 */
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b68536c..e1164e3 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,12 +26,9 @@ static struct kmem_cache *nfs_page_cachep;
static inline struct nfs_page *
nfs_page_alloc(void)
{
- struct nfs_page *p;
- p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL);
- if (p) {
- memset(p, 0, sizeof(*p));
+ struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
+ if (p)
INIT_LIST_HEAD(&p->wb_list);
- }
return p;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index db77342..bc40897 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -177,105 +177,149 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
* pNFS client layout cache
*/
+/* Need to hold i_lock if caller does not already hold reference */
+void
+get_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+ atomic_inc(&lo->plh_refcount);
+}
+
static void
-get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
+destroy_layout_hdr(struct pnfs_layout_hdr *lo)
{
- assert_spin_locked(&lo->inode->i_lock);
- lo->refcount++;
+ dprintk("%s: freeing layout cache %p\n", __func__, lo);
+ BUG_ON(!list_empty(&lo->plh_layouts));
+ NFS_I(lo->plh_inode)->layout = NULL;
+ kfree(lo);
}
static void
put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
{
- assert_spin_locked(&lo->inode->i_lock);
- BUG_ON(lo->refcount == 0);
-
- lo->refcount--;
- if (!lo->refcount) {
- dprintk("%s: freeing layout cache %p\n", __func__, lo);
- BUG_ON(!list_empty(&lo->layouts));
- NFS_I(lo->inode)->layout = NULL;
- kfree(lo);
- }
+ if (atomic_dec_and_test(&lo->plh_refcount))
+ destroy_layout_hdr(lo);
}
void
-put_layout_hdr(struct inode *inode)
+put_layout_hdr(struct pnfs_layout_hdr *lo)
{
- spin_lock(&inode->i_lock);
- put_layout_hdr_locked(NFS_I(inode)->layout);
- spin_unlock(&inode->i_lock);
+ struct inode *inode = lo->plh_inode;
+
+ if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
+ destroy_layout_hdr(lo);
+ spin_unlock(&inode->i_lock);
+ }
}
static void
init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
{
- INIT_LIST_HEAD(&lseg->fi_list);
- kref_init(&lseg->kref);
- lseg->layout = lo;
+ INIT_LIST_HEAD(&lseg->pls_list);
+ atomic_set(&lseg->pls_refcount, 1);
+ smp_mb();
+ set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
+ lseg->pls_layout = lo;
}
-/* Called without i_lock held, as the free_lseg call may sleep */
-static void
-destroy_lseg(struct kref *kref)
+static void free_lseg(struct pnfs_layout_segment *lseg)
{
- struct pnfs_layout_segment *lseg =
- container_of(kref, struct pnfs_layout_segment, kref);
- struct inode *ino = lseg->layout->inode;
+ struct inode *ino = lseg->pls_layout->plh_inode;
- dprintk("--> %s\n", __func__);
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
- /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
- put_layout_hdr(ino);
+ /* Matched by get_layout_hdr in pnfs_insert_layout */
+ put_layout_hdr(NFS_I(ino)->layout);
}
-static void
-put_lseg(struct pnfs_layout_segment *lseg)
+/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
+ * could sleep, so must be called outside of the lock.
+ * Returns 1 if object was removed, otherwise return 0.
+ */
+static int
+put_lseg_locked(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
+ atomic_read(&lseg->pls_refcount),
+ test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+ if (atomic_dec_and_test(&lseg->pls_refcount)) {
+ struct inode *ino = lseg->pls_layout->plh_inode;
+
+ BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+ list_del(&lseg->pls_list);
+ if (list_empty(&lseg->pls_layout->plh_segs)) {
+ struct nfs_client *clp;
+
+ clp = NFS_SERVER(ino)->nfs_client;
+ spin_lock(&clp->cl_lock);
+ /* List does not take a reference, so no need for put here */
+ list_del_init(&lseg->pls_layout->plh_layouts);
+ spin_unlock(&clp->cl_lock);
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
+ }
+ rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
+ list_add(&lseg->pls_list, tmp_list);
+ return 1;
+ }
+ return 0;
+}
+
+static bool
+should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
{
- if (!lseg)
- return;
+ return (recall_iomode == IOMODE_ANY ||
+ lseg_iomode == recall_iomode);
+}
- dprintk("%s: lseg %p ref %d\n", __func__, lseg,
- atomic_read(&lseg->kref.refcount));
- kref_put(&lseg->kref, destroy_lseg);
+/* Returns 1 if lseg is removed from list, 0 otherwise */
+static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ int rv = 0;
+
+ if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
+ /* Remove the reference keeping the lseg in the
+ * list. It will now be removed when all
+ * outstanding io is finished.
+ */
+ rv = put_lseg_locked(lseg, tmp_list);
+ }
+ return rv;
}
-static void
-pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
+/* Returns count of number of matching invalid lsegs remaining in list
+ * after call.
+ */
+int
+mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+ struct list_head *tmp_list,
+ u32 iomode)
{
struct pnfs_layout_segment *lseg, *next;
- struct nfs_client *clp;
+ int invalid = 0, removed = 0;
dprintk("%s:Begin lo %p\n", __func__, lo);
- assert_spin_locked(&lo->inode->i_lock);
- list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
- dprintk("%s: freeing lseg %p\n", __func__, lseg);
- list_move(&lseg->fi_list, tmp_list);
- }
- clp = NFS_SERVER(lo->inode)->nfs_client;
- spin_lock(&clp->cl_lock);
- /* List does not take a reference, so no need for put here */
- list_del_init(&lo->layouts);
- spin_unlock(&clp->cl_lock);
- write_seqlock(&lo->seqlock);
- clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
- write_sequnlock(&lo->seqlock);
-
- dprintk("%s:Return\n", __func__);
+ list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
+ if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
+ dprintk("%s: freeing lseg %p iomode %d "
+ "offset %llu length %llu\n", __func__,
+ lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
+ lseg->pls_range.length);
+ invalid++;
+ removed += mark_lseg_invalid(lseg, tmp_list);
+ }
+ dprintk("%s:Return %i\n", __func__, invalid - removed);
+ return invalid - removed;
}
-static void
-pnfs_free_lseg_list(struct list_head *tmp_list)
+void
+pnfs_free_lseg_list(struct list_head *free_me)
{
- struct pnfs_layout_segment *lseg;
+ struct pnfs_layout_segment *lseg, *tmp;
- while (!list_empty(tmp_list)) {
- lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
- fi_list);
- dprintk("%s calling put_lseg on %p\n", __func__, lseg);
- list_del(&lseg->fi_list);
- put_lseg(lseg);
+ list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
+ list_del(&lseg->pls_list);
+ free_lseg(lseg);
}
}
@@ -288,7 +332,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
spin_lock(&nfsi->vfs_inode.i_lock);
lo = nfsi->layout;
if (lo) {
- pnfs_clear_lseg_list(lo, &tmp_list);
+ set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
+ mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
/* Matched by refcount set to 1 in alloc_init_layout_hdr */
put_layout_hdr_locked(lo);
}
@@ -312,76 +357,80 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
while (!list_empty(&tmp_list)) {
lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
- layouts);
+ plh_layouts);
dprintk("%s freeing layout for inode %lu\n", __func__,
- lo->inode->i_ino);
- pnfs_destroy_layout(NFS_I(lo->inode));
+ lo->plh_inode->i_ino);
+ pnfs_destroy_layout(NFS_I(lo->plh_inode));
}
}
-/* update lo->stateid with new if is more recent
- *
- * lo->stateid could be the open stateid, in which case we just use what given.
- */
-static void
-pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
- const nfs4_stateid *new)
-{
- nfs4_stateid *old = &lo->stateid;
- bool overwrite = false;
-
- write_seqlock(&lo->seqlock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
- memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
- overwrite = true;
- else {
- u32 oldseq, newseq;
-
- oldseq = be32_to_cpu(old->stateid.seqid);
- newseq = be32_to_cpu(new->stateid.seqid);
- if ((int)(newseq - oldseq) > 0)
- overwrite = true;
+/* update lo->plh_stateid with new if is more recent */
+void
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
+ bool update_barrier)
+{
+ u32 oldseq, newseq;
+
+ oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+ newseq = be32_to_cpu(new->stateid.seqid);
+ if ((int)(newseq - oldseq) > 0) {
+ memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
+ if (update_barrier) {
+ u32 new_barrier = be32_to_cpu(new->stateid.seqid);
+
+ if ((int)(new_barrier - lo->plh_barrier))
+ lo->plh_barrier = new_barrier;
+ } else {
+ /* Because of wraparound, we want to keep the barrier
+ * "close" to the current seqids. It needs to be
+ * within 2**31 to count as "behind", so if it
+ * gets too near that limit, give us a litle leeway
+ * and bring it to within 2**30.
+ * NOTE - and yes, this is all unsigned arithmetic.
+ */
+ if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
+ lo->plh_barrier = newseq - (1 << 30);
+ }
}
- if (overwrite)
- memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
- write_sequnlock(&lo->seqlock);
}
-static void
-pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
- struct nfs4_state *state)
+/* lget is set to 1 if called from inside send_layoutget call chain */
+static bool
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
+ int lget)
{
- int seq;
-
- dprintk("--> %s\n", __func__);
- write_seqlock(&lo->seqlock);
- do {
- seq = read_seqbegin(&state->seqlock);
- memcpy(lo->stateid.data, state->stateid.data,
- sizeof(state->stateid.data));
- } while (read_seqretry(&state->seqlock, seq));
- set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
- write_sequnlock(&lo->seqlock);
- dprintk("<-- %s\n", __func__);
+ if ((stateid) &&
+ (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
+ return true;
+ return lo->plh_block_lgets ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+ (list_empty(&lo->plh_segs) &&
+ (atomic_read(&lo->plh_outstanding) > lget));
}
-void
-pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct nfs4_state *open_state)
+int
+pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state)
{
- int seq;
+ int status = 0;
dprintk("--> %s\n", __func__);
- do {
- seq = read_seqbegin(&lo->seqlock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
- /* This will trigger retry of the read */
- pnfs_layout_from_open_stateid(lo, open_state);
- } else
- memcpy(dst->data, lo->stateid.data,
- sizeof(lo->stateid.data));
- } while (read_seqretry(&lo->seqlock, seq));
+ spin_lock(&lo->plh_inode->i_lock);
+ if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
+ status = -EAGAIN;
+ } else if (list_empty(&lo->plh_segs)) {
+ int seq;
+
+ do {
+ seq = read_seqbegin(&open_state->seqlock);
+ memcpy(dst->data, open_state->stateid.data,
+ sizeof(open_state->stateid.data));
+ } while (read_seqretry(&open_state->seqlock, seq));
+ } else
+ memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
+ spin_unlock(&lo->plh_inode->i_lock);
dprintk("<-- %s\n", __func__);
+ return status;
}
/*
@@ -395,7 +444,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_open_context *ctx,
u32 iomode)
{
- struct inode *ino = lo->inode;
+ struct inode *ino = lo->plh_inode;
struct nfs_server *server = NFS_SERVER(ino);
struct nfs4_layoutget *lgp;
struct pnfs_layout_segment *lseg = NULL;
@@ -404,10 +453,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
BUG_ON(ctx == NULL);
lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
- if (lgp == NULL) {
- put_layout_hdr(lo->inode);
+ if (lgp == NULL)
return NULL;
- }
lgp->args.minlength = NFS4_MAX_UINT64;
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
lgp->args.range.iomode = iomode;
@@ -424,11 +471,88 @@ send_layoutget(struct pnfs_layout_hdr *lo,
nfs4_proc_layoutget(lgp);
if (!lseg) {
/* remember that LAYOUTGET failed and suspend trying */
- set_bit(lo_fail_bit(iomode), &lo->state);
+ set_bit(lo_fail_bit(iomode), &lo->plh_flags);
}
return lseg;
}
+bool pnfs_roc(struct inode *ino)
+{
+ struct pnfs_layout_hdr *lo;
+ struct pnfs_layout_segment *lseg, *tmp;
+ LIST_HEAD(tmp_list);
+ bool found = false;
+
+ spin_lock(&ino->i_lock);
+ lo = NFS_I(ino)->layout;
+ if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+ goto out_nolayout;
+ list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
+ if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+ mark_lseg_invalid(lseg, &tmp_list);
+ found = true;
+ }
+ if (!found)
+ goto out_nolayout;
+ lo->plh_block_lgets++;
+ get_layout_hdr(lo); /* matched in pnfs_roc_release */
+ spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&tmp_list);
+ return true;
+
+out_nolayout:
+ spin_unlock(&ino->i_lock);
+ return false;
+}
+
+void pnfs_roc_release(struct inode *ino)
+{
+ struct pnfs_layout_hdr *lo;
+
+ spin_lock(&ino->i_lock);
+ lo = NFS_I(ino)->layout;
+ lo->plh_block_lgets--;
+ put_layout_hdr_locked(lo);
+ spin_unlock(&ino->i_lock);
+}
+
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+ struct pnfs_layout_hdr *lo;
+
+ spin_lock(&ino->i_lock);
+ lo = NFS_I(ino)->layout;
+ if ((int)(barrier - lo->plh_barrier) > 0)
+ lo->plh_barrier = barrier;
+ spin_unlock(&ino->i_lock);
+}
+
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+ struct nfs_inode *nfsi = NFS_I(ino);
+ struct pnfs_layout_segment *lseg;
+ bool found = false;
+
+ spin_lock(&ino->i_lock);
+ list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
+ if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+ found = true;
+ break;
+ }
+ if (!found) {
+ struct pnfs_layout_hdr *lo = nfsi->layout;
+ u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+
+ /* Since close does not return a layout stateid for use as
+ * a barrier, we choose the worst-case barrier.
+ */
+ *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
+ }
+ spin_unlock(&ino->i_lock);
+ return found;
+}
+
/*
* Compare two layout segments for sorting into layout cache.
* We want to preferentially return RW over RO layouts, so ensure those
@@ -450,37 +574,29 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin\n", __func__);
- assert_spin_locked(&lo->inode->i_lock);
- if (list_empty(&lo->segs)) {
- struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
-
- spin_lock(&clp->cl_lock);
- BUG_ON(!list_empty(&lo->layouts));
- list_add_tail(&lo->layouts, &clp->cl_layouts);
- spin_unlock(&clp->cl_lock);
- }
- list_for_each_entry(lp, &lo->segs, fi_list) {
- if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
+ assert_spin_locked(&lo->plh_inode->i_lock);
+ list_for_each_entry(lp, &lo->plh_segs, pls_list) {
+ if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
continue;
- list_add_tail(&lseg->fi_list, &lp->fi_list);
+ list_add_tail(&lseg->pls_list, &lp->pls_list);
dprintk("%s: inserted lseg %p "
"iomode %d offset %llu length %llu before "
"lp %p iomode %d offset %llu length %llu\n",
- __func__, lseg, lseg->range.iomode,
- lseg->range.offset, lseg->range.length,
- lp, lp->range.iomode, lp->range.offset,
- lp->range.length);
+ __func__, lseg, lseg->pls_range.iomode,
+ lseg->pls_range.offset, lseg->pls_range.length,
+ lp, lp->pls_range.iomode, lp->pls_range.offset,
+ lp->pls_range.length);
found = 1;
break;
}
if (!found) {
- list_add_tail(&lseg->fi_list, &lo->segs);
+ list_add_tail(&lseg->pls_list, &lo->plh_segs);
dprintk("%s: inserted lseg %p "
"iomode %d offset %llu length %llu at tail\n",
- __func__, lseg, lseg->range.iomode,
- lseg->range.offset, lseg->range.length);
+ __func__, lseg, lseg->pls_range.iomode,
+ lseg->pls_range.offset, lseg->pls_range.length);
}
- get_layout_hdr_locked(lo);
+ get_layout_hdr(lo);
dprintk("%s:Return\n", __func__);
}
@@ -493,11 +609,11 @@ alloc_init_layout_hdr(struct inode *ino)
lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
if (!lo)
return NULL;
- lo->refcount = 1;
- INIT_LIST_HEAD(&lo->layouts);
- INIT_LIST_HEAD(&lo->segs);
- seqlock_init(&lo->seqlock);
- lo->inode = ino;
+ atomic_set(&lo->plh_refcount, 1);
+ INIT_LIST_HEAD(&lo->plh_layouts);
+ INIT_LIST_HEAD(&lo->plh_segs);
+ INIT_LIST_HEAD(&lo->plh_bulk_recall);
+ lo->plh_inode = ino;
return lo;
}
@@ -510,9 +626,12 @@ pnfs_find_alloc_layout(struct inode *ino)
dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
assert_spin_locked(&ino->i_lock);
- if (nfsi->layout)
- return nfsi->layout;
-
+ if (nfsi->layout) {
+ if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
+ return NULL;
+ else
+ return nfsi->layout;
+ }
spin_unlock(&ino->i_lock);
new = alloc_init_layout_hdr(ino);
spin_lock(&ino->i_lock);
@@ -538,31 +657,32 @@ pnfs_find_alloc_layout(struct inode *ino)
static int
is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
{
- return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
+ return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
}
/*
* lookup range in layout
*/
static struct pnfs_layout_segment *
-pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
+pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
{
struct pnfs_layout_segment *lseg, *ret = NULL;
dprintk("%s:Begin\n", __func__);
- assert_spin_locked(&lo->inode->i_lock);
- list_for_each_entry(lseg, &lo->segs, fi_list) {
- if (is_matching_lseg(lseg, iomode)) {
+ assert_spin_locked(&lo->plh_inode->i_lock);
+ list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+ if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
+ is_matching_lseg(lseg, iomode)) {
ret = lseg;
break;
}
- if (cmp_layout(iomode, lseg->range.iomode) > 0)
+ if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
break;
}
dprintk("%s:Return lseg %p ref %d\n",
- __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
+ __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
return ret;
}
@@ -576,6 +696,7 @@ pnfs_update_layout(struct inode *ino,
enum pnfs_iomode iomode)
{
struct nfs_inode *nfsi = NFS_I(ino);
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
@@ -588,25 +709,53 @@ pnfs_update_layout(struct inode *ino,
goto out_unlock;
}
- /* Check to see if the layout for the given range already exists */
- lseg = pnfs_has_layout(lo, iomode);
- if (lseg) {
- dprintk("%s: Using cached lseg %p for iomode %d)\n",
- __func__, lseg, iomode);
+ /* Do we even need to bother with this? */
+ if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ dprintk("%s matches recall, use MDS\n", __func__);
goto out_unlock;
}
+ /* Check to see if the layout for the given range already exists */
+ lseg = pnfs_find_lseg(lo, iomode);
+ if (lseg)
+ goto out_unlock;
/* if LAYOUTGET already failed once we don't try again */
- if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
+ if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
+ goto out_unlock;
+
+ if (pnfs_layoutgets_blocked(lo, NULL, 0))
goto out_unlock;
+ atomic_inc(&lo->plh_outstanding);
- get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
+ get_layout_hdr(lo);
+ if (list_empty(&lo->plh_segs)) {
+ /* The lo must be on the clp list if there is any
+ * chance of a CB_LAYOUTRECALL(FILE) coming in.
+ */
+ spin_lock(&clp->cl_lock);
+ BUG_ON(!list_empty(&lo->plh_layouts));
+ list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+ spin_unlock(&clp->cl_lock);
+ }
spin_unlock(&ino->i_lock);
lseg = send_layoutget(lo, ctx, iomode);
+ if (!lseg) {
+ spin_lock(&ino->i_lock);
+ if (list_empty(&lo->plh_segs)) {
+ spin_lock(&clp->cl_lock);
+ list_del_init(&lo->plh_layouts);
+ spin_unlock(&clp->cl_lock);
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ }
+ spin_unlock(&ino->i_lock);
+ }
+ atomic_dec(&lo->plh_outstanding);
+ put_layout_hdr(lo);
out:
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
- nfsi->layout->state, lseg);
+ nfsi->layout->plh_flags, lseg);
return lseg;
out_unlock:
spin_unlock(&ino->i_lock);
@@ -619,9 +768,21 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg;
- struct inode *ino = lo->inode;
+ struct inode *ino = lo->plh_inode;
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
int status = 0;
+ /* Verify we got what we asked for.
+ * Note that because the xdr parsing only accepts a single
+ * element array, this can fail even if the server is behaving
+ * correctly.
+ */
+ if (lgp->args.range.iomode > res->range.iomode ||
+ res->range.offset != 0 ||
+ res->range.length != NFS4_MAX_UINT64) {
+ status = -EINVAL;
+ goto out;
+ }
/* Inject layout blob into I/O device driver */
lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
if (!lseg || IS_ERR(lseg)) {
@@ -635,16 +796,37 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}
spin_lock(&ino->i_lock);
+ if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ dprintk("%s forget reply due to recall\n", __func__);
+ goto out_forget_reply;
+ }
+
+ if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
+ dprintk("%s forget reply due to state\n", __func__);
+ goto out_forget_reply;
+ }
init_lseg(lo, lseg);
- lseg->range = res->range;
+ lseg->pls_range = res->range;
*lgp->lsegpp = lseg;
pnfs_insert_layout(lo, lseg);
+ if (res->return_on_close) {
+ set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
+ set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
+ }
+
/* Done processing layoutget. Set the layout stateid */
- pnfs_set_layout_stateid(lo, &res->stateid);
+ pnfs_set_layout_stateid(lo, &res->stateid, false);
spin_unlock(&ino->i_lock);
out:
return status;
+
+out_forget_reply:
+ spin_unlock(&ino->i_lock);
+ lseg->pls_layout = lo;
+ NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ goto out;
}
/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e12367d..e2612ea 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,11 +30,17 @@
#ifndef FS_NFS_PNFS_H
#define FS_NFS_PNFS_H
+enum {
+ NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
+ NFS_LSEG_ROC, /* roc bit received from server */
+};
+
struct pnfs_layout_segment {
- struct list_head fi_list;
- struct pnfs_layout_range range;
- struct kref kref;
- struct pnfs_layout_hdr *layout;
+ struct list_head pls_list;
+ struct pnfs_layout_range pls_range;
+ atomic_t pls_refcount;
+ unsigned long pls_flags;
+ struct pnfs_layout_hdr *pls_layout;
};
#ifdef CONFIG_NFS_V4_1
@@ -44,7 +50,9 @@ struct pnfs_layout_segment {
enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
- NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
+ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
+ NFS_LAYOUT_ROC, /* some lseg had roc bit set */
+ NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
};
/* Per-layout driver specific registration structure */
@@ -60,13 +68,16 @@ struct pnfs_layoutdriver_type {
};
struct pnfs_layout_hdr {
- unsigned long refcount;
- struct list_head layouts; /* other client layouts */
- struct list_head segs; /* layout segments list */
- seqlock_t seqlock; /* Protects the stateid */
- nfs4_stateid stateid;
- unsigned long state;
- struct inode *inode;
+ atomic_t plh_refcount;
+ struct list_head plh_layouts; /* other client layouts */
+ struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
+ struct list_head plh_segs; /* layout segments list */
+ nfs4_stateid plh_stateid;
+ atomic_t plh_outstanding; /* number of RPCs out */
+ unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
+ u32 plh_barrier; /* ignore lower seqids */
+ unsigned long plh_flags;
+ struct inode *plh_inode;
};
struct pnfs_device {
@@ -134,17 +145,30 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
/* pnfs.c */
+void get_layout_hdr(struct pnfs_layout_hdr *lo);
struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
enum pnfs_iomode access_type);
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
+void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
-void put_layout_hdr(struct inode *inode);
-void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct nfs4_state *open_state);
+void put_layout_hdr(struct pnfs_layout_hdr *lo);
+void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new,
+ bool update_barrier);
+int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
+ struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state);
+int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+ struct list_head *tmp_list,
+ u32 iomode);
+bool pnfs_roc(struct inode *ino);
+void pnfs_roc_release(struct inode *ino);
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
static inline int lo_fail_bit(u32 iomode)
@@ -176,6 +200,28 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
return NULL;
}
+static inline bool
+pnfs_roc(struct inode *ino)
+{
+ return false;
+}
+
+static inline void
+pnfs_roc_release(struct inode *ino)
+{
+}
+
+static inline void
+pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+}
+
+static inline bool
+pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+ return false;
+}
+
static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
{
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 58e7f84..77d5e21 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -458,7 +458,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
fattr = nfs_alloc_fattr();
status = -ENOMEM;
if (fh == NULL || fattr == NULL)
- goto out;
+ goto out_free;
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
@@ -471,6 +471,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
if (status == 0)
status = nfs_instantiate(dentry, fh, fattr);
+out_free:
nfs_free_fattr(fattr);
nfs_free_fhandle(fh);
out:
@@ -731,7 +732,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.statfs = nfs_proc_statfs,
.fsinfo = nfs_proc_fsinfo,
.pathconf = nfs_proc_pathconf,
- .decode_dirent = nfs_decode_dirent,
+ .decode_dirent = nfs2_decode_dirent,
.read_setup = nfs_proc_read_setup,
.read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4100630..0f9ea73 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -598,7 +598,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
if (nfss->mountd_version || showdefaults)
seq_printf(m, ",mountvers=%u", nfss->mountd_version);
- if (nfss->mountd_port || showdefaults)
+ if ((nfss->mountd_port &&
+ nfss->mountd_port != (unsigned short)NFS_UNSPEC_PORT) ||
+ showdefaults)
seq_printf(m, ",mountport=%u", nfss->mountd_port);
nfs_show_mountd_netid(m, nfss, showdefaults);
@@ -2494,7 +2496,13 @@ static void nfs4_clone_super(struct super_block *sb,
sb->s_maxbytes = old_sb->s_maxbytes;
sb->s_time_gran = 1;
sb->s_op = old_sb->s_op;
- nfs_initialise_sb(sb);
+ /*
+ * The VFS shouldn't apply the umask to mode bits. We will do
+ * so ourselves when necessary.
+ */
+ sb->s_flags |= MS_POSIXACL;
+ sb->s_xattr = old_sb->s_xattr;
+ nfs_initialise_sb(sb);
}
/*
@@ -2504,6 +2512,12 @@ static void nfs4_fill_super(struct super_block *sb)
{
sb->s_time_gran = 1;
sb->s_op = &nfs4_sops;
+ /*
+ * The VFS shouldn't apply the umask to mode bits. We will do
+ * so ourselves when necessary.
+ */
+ sb->s_flags |= MS_POSIXACL;
+ sb->s_xattr = nfs4_xattr_handlers;
nfs_initialise_sb(sb);
}
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 8fe9eb4..e313a51 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -429,7 +429,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
return ERR_PTR(-ENOMEM);
- task_setup_data.callback_data = data,
+ task_setup_data.callback_data = data;
data->cred = rpc_lookup_cred();
if (IS_ERR(data->cred)) {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 143da2e..21a63da 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -50,11 +50,6 @@ enum {
NFSPROC4_CLNT_CB_SEQUENCE,
};
-enum nfs_cb_opnum4 {
- OP_CB_RECALL = 4,
- OP_CB_SEQUENCE = 11,
-};
-
#define NFS4_MAXTAGLEN 20
#define NFS4_enc_cb_null_sz 0
@@ -79,61 +74,6 @@ enum nfs_cb_opnum4 {
cb_sequence_dec_sz + \
op_dec_sz)
-/*
-* Generic encode routines from fs/nfs/nfs4xdr.c
-*/
-static inline __be32 *
-xdr_writemem(__be32 *p, const void *ptr, int nbytes)
-{
- int tmp = XDR_QUADLEN(nbytes);
- if (!tmp)
- return p;
- p[tmp-1] = 0;
- memcpy(p, ptr, nbytes);
- return p + tmp;
-}
-
-#define WRITE32(n) *p++ = htonl(n)
-#define WRITEMEM(ptr,nbytes) do { \
- p = xdr_writemem(p, ptr, nbytes); \
-} while (0)
-#define RESERVE_SPACE(nbytes) do { \
- p = xdr_reserve_space(xdr, nbytes); \
- if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
- BUG_ON(!p); \
-} while (0)
-
-/*
- * Generic decode routines from fs/nfs/nfs4xdr.c
- */
-#define DECODE_TAIL \
- status = 0; \
-out: \
- return status; \
-xdr_error: \
- dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
- status = -EIO; \
- goto out
-
-#define READ32(x) (x) = ntohl(*p++)
-#define READ64(x) do { \
- (x) = (u64)ntohl(*p++) << 32; \
- (x) |= ntohl(*p++); \
-} while (0)
-#define READTIME(x) do { \
- p++; \
- (x.tv_sec) = ntohl(*p++); \
- (x.tv_nsec) = ntohl(*p++); \
-} while (0)
-#define READ_BUF(nbytes) do { \
- p = xdr_inline_decode(xdr, nbytes); \
- if (!p) { \
- dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
- __func__, __LINE__); \
- return -EIO; \
- } \
-} while (0)
-
struct nfs4_cb_compound_hdr {
/* args */
u32 ident; /* minorversion 0 only */
@@ -144,295 +84,513 @@ struct nfs4_cb_compound_hdr {
int status;
};
-static struct {
-int stat;
-int errno;
-} nfs_cb_errtbl[] = {
- { NFS4_OK, 0 },
- { NFS4ERR_PERM, EPERM },
- { NFS4ERR_NOENT, ENOENT },
- { NFS4ERR_IO, EIO },
- { NFS4ERR_NXIO, ENXIO },
- { NFS4ERR_ACCESS, EACCES },
- { NFS4ERR_EXIST, EEXIST },
- { NFS4ERR_XDEV, EXDEV },
- { NFS4ERR_NOTDIR, ENOTDIR },
- { NFS4ERR_ISDIR, EISDIR },
- { NFS4ERR_INVAL, EINVAL },
- { NFS4ERR_FBIG, EFBIG },
- { NFS4ERR_NOSPC, ENOSPC },
- { NFS4ERR_ROFS, EROFS },
- { NFS4ERR_MLINK, EMLINK },
- { NFS4ERR_NAMETOOLONG, ENAMETOOLONG },
- { NFS4ERR_NOTEMPTY, ENOTEMPTY },
- { NFS4ERR_DQUOT, EDQUOT },
- { NFS4ERR_STALE, ESTALE },
- { NFS4ERR_BADHANDLE, EBADHANDLE },
- { NFS4ERR_BAD_COOKIE, EBADCOOKIE },
- { NFS4ERR_NOTSUPP, ENOTSUPP },
- { NFS4ERR_TOOSMALL, ETOOSMALL },
- { NFS4ERR_SERVERFAULT, ESERVERFAULT },
- { NFS4ERR_BADTYPE, EBADTYPE },
- { NFS4ERR_LOCKED, EAGAIN },
- { NFS4ERR_RESOURCE, EREMOTEIO },
- { NFS4ERR_SYMLINK, ELOOP },
- { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP },
- { NFS4ERR_DEADLOCK, EDEADLK },
- { -1, EIO }
-};
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+ dprintk("NFS: %s prematurely hit the end of our receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
-static int
-nfs_cb_stat_to_errno(int stat)
+static __be32 *xdr_encode_empty_array(__be32 *p)
{
- int i;
- for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
- if (nfs_cb_errtbl[i].stat == stat)
- return nfs_cb_errtbl[i].errno;
- }
- /* If we cannot translate the error, the recovery routines should
- * handle it.
- * Note: remaining NFSv4 error codes have values > 10000, so should
- * not conflict with native Linux error codes.
- */
- return stat;
+ *p++ = xdr_zero;
+ return p;
}
/*
- * XDR encode
+ * Encode/decode NFSv4 CB basic data types
+ *
+ * Basic NFSv4 callback data types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section
+ * 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version
+ * 1 Protocol"
+ */
+
+/*
+ * nfs_cb_opnum4
+ *
+ * enum nfs_cb_opnum4 {
+ * OP_CB_GETATTR = 3,
+ * ...
+ * };
*/
+enum nfs_cb_opnum4 {
+ OP_CB_GETATTR = 3,
+ OP_CB_RECALL = 4,
+ OP_CB_LAYOUTRECALL = 5,
+ OP_CB_NOTIFY = 6,
+ OP_CB_PUSH_DELEG = 7,
+ OP_CB_RECALL_ANY = 8,
+ OP_CB_RECALLABLE_OBJ_AVAIL = 9,
+ OP_CB_RECALL_SLOT = 10,
+ OP_CB_SEQUENCE = 11,
+ OP_CB_WANTS_CANCELLED = 12,
+ OP_CB_NOTIFY_LOCK = 13,
+ OP_CB_NOTIFY_DEVICEID = 14,
+ OP_CB_ILLEGAL = 10044
+};
-static void
-encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
+static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
{
__be32 *p;
- RESERVE_SPACE(sizeof(stateid_t));
- WRITE32(sid->si_generation);
- WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(op);
}
-static void
-encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+/*
+ * nfs_fh4
+ *
+ * typedef opaque nfs_fh4<NFS4_FHSIZE>;
+ */
+static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
{
- __be32 * p;
+ u32 length = fh->fh_size;
+ __be32 *p;
- RESERVE_SPACE(16);
- WRITE32(0); /* tag length is always 0 */
- WRITE32(hdr->minorversion);
- WRITE32(hdr->ident);
- hdr->nops_p = p;
- WRITE32(hdr->nops);
+ BUG_ON(length > NFS4_FHSIZE);
+ p = xdr_reserve_space(xdr, 4 + length);
+ xdr_encode_opaque(p, &fh->fh_base, length);
}
-static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
+/*
+ * stateid4
+ *
+ * struct stateid4 {
+ * uint32_t seqid;
+ * opaque other[12];
+ * };
+ */
+static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid)
{
- *hdr->nops_p = htonl(hdr->nops);
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(sid->si_generation);
+ xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE);
}
-static void
-encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
- struct nfs4_cb_compound_hdr *hdr)
+/*
+ * sessionid4
+ *
+ * typedef opaque sessionid4[NFS4_SESSIONID_SIZE];
+ */
+static void encode_sessionid4(struct xdr_stream *xdr,
+ const struct nfsd4_session *session)
{
__be32 *p;
- int len = dp->dl_fh.fh_size;
-
- RESERVE_SPACE(4);
- WRITE32(OP_CB_RECALL);
- encode_stateid(xdr, &dp->dl_stateid);
- RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
- WRITE32(0); /* truncate optimization not implemented */
- WRITE32(len);
- WRITEMEM(&dp->dl_fh.fh_base, len);
- hdr->nops++;
+
+ p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
+ xdr_encode_opaque_fixed(p, session->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
}
-static void
-encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
- struct nfs4_cb_compound_hdr *hdr)
-{
- __be32 *p;
- struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
+/*
+ * nfsstat4
+ */
+static const struct {
+ int stat;
+ int errno;
+} nfs_cb_errtbl[] = {
+ { NFS4_OK, 0 },
+ { NFS4ERR_PERM, -EPERM },
+ { NFS4ERR_NOENT, -ENOENT },
+ { NFS4ERR_IO, -EIO },
+ { NFS4ERR_NXIO, -ENXIO },
+ { NFS4ERR_ACCESS, -EACCES },
+ { NFS4ERR_EXIST, -EEXIST },
+ { NFS4ERR_XDEV, -EXDEV },
+ { NFS4ERR_NOTDIR, -ENOTDIR },
+ { NFS4ERR_ISDIR, -EISDIR },
+ { NFS4ERR_INVAL, -EINVAL },
+ { NFS4ERR_FBIG, -EFBIG },
+ { NFS4ERR_NOSPC, -ENOSPC },
+ { NFS4ERR_ROFS, -EROFS },
+ { NFS4ERR_MLINK, -EMLINK },
+ { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
+ { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
+ { NFS4ERR_DQUOT, -EDQUOT },
+ { NFS4ERR_STALE, -ESTALE },
+ { NFS4ERR_BADHANDLE, -EBADHANDLE },
+ { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
+ { NFS4ERR_NOTSUPP, -ENOTSUPP },
+ { NFS4ERR_TOOSMALL, -ETOOSMALL },
+ { NFS4ERR_SERVERFAULT, -ESERVERFAULT },
+ { NFS4ERR_BADTYPE, -EBADTYPE },
+ { NFS4ERR_LOCKED, -EAGAIN },
+ { NFS4ERR_RESOURCE, -EREMOTEIO },
+ { NFS4ERR_SYMLINK, -ELOOP },
+ { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
+ { NFS4ERR_DEADLOCK, -EDEADLK },
+ { -1, -EIO }
+};
- if (hdr->minorversion == 0)
- return;
+/*
+ * If we cannot translate the error, the recovery routines should
+ * handle it.
+ *
+ * Note: remaining NFSv4 error codes have values > 10000, so should
+ * not conflict with native Linux error codes.
+ */
+static int nfs_cb_stat_to_errno(int status)
+{
+ int i;
- RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
+ for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
+ if (nfs_cb_errtbl[i].stat == status)
+ return nfs_cb_errtbl[i].errno;
+ }
- WRITE32(OP_CB_SEQUENCE);
- WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN);
- WRITE32(ses->se_cb_seq_nr);
- WRITE32(0); /* slotid, always 0 */
- WRITE32(0); /* highest slotid always 0 */
- WRITE32(0); /* cachethis always 0 */
- WRITE32(0); /* FIXME: support referring_call_lists */
- hdr->nops++;
+ dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status);
+ return -status;
}
-static int
-nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
+static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
+ enum nfsstat4 *status)
{
- struct xdr_stream xdrs, *xdr = &xdrs;
+ __be32 *p;
+ u32 op;
- xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
- RESERVE_SPACE(0);
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ op = be32_to_cpup(p++);
+ if (unlikely(op != expected))
+ goto out_unexpected;
+ *status = be32_to_cpup(p);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+out_unexpected:
+ dprintk("NFSD: Callback server returned operation %d but "
+ "we issued a request for %d\n", op, expected);
+ return -EIO;
}
-static int
-nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
- struct nfsd4_callback *cb)
+/*
+ * CB_COMPOUND4args
+ *
+ * struct CB_COMPOUND4args {
+ * utf8str_cs tag;
+ * uint32_t minorversion;
+ * uint32_t callback_ident;
+ * nfs_cb_argop4 argarray<>;
+ * };
+*/
+static void encode_cb_compound4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr)
{
- struct xdr_stream xdr;
- struct nfs4_delegation *args = cb->cb_op;
- struct nfs4_cb_compound_hdr hdr = {
- .ident = cb->cb_clp->cl_cb_ident,
- .minorversion = cb->cb_minorversion,
- };
+ __be32 * p;
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
- encode_cb_compound_hdr(&xdr, &hdr);
- encode_cb_sequence(&xdr, cb, &hdr);
- encode_cb_recall(&xdr, args, &hdr);
- encode_cb_nops(&hdr);
+ p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
+ p = xdr_encode_empty_array(p); /* empty tag */
+ *p++ = cpu_to_be32(hdr->minorversion);
+ *p++ = cpu_to_be32(hdr->ident);
+
+ hdr->nops_p = p;
+ *p = cpu_to_be32(hdr->nops); /* argarray element count */
+}
+
+/*
+ * Update argarray element count
+ */
+static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
+{
+ BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS);
+ *hdr->nops_p = cpu_to_be32(hdr->nops);
+}
+
+/*
+ * CB_COMPOUND4res
+ *
+ * struct CB_COMPOUND4res {
+ * nfsstat4 status;
+ * utf8str_cs tag;
+ * nfs_cb_resop4 resarray<>;
+ * };
+ */
+static int decode_cb_compound4res(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ hdr->status = be32_to_cpup(p++);
+ /* Ignore the tag */
+ length = be32_to_cpup(p++);
+ p = xdr_inline_decode(xdr, length + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
+ hdr->nops = be32_to_cpup(p);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
+/*
+ * CB_RECALL4args
+ *
+ * struct CB_RECALL4args {
+ * stateid4 stateid;
+ * bool truncate;
+ * nfs_fh4 fh;
+ * };
+ */
+static void encode_cb_recall4args(struct xdr_stream *xdr,
+ const struct nfs4_delegation *dp,
+ struct nfs4_cb_compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL);
+ encode_stateid4(xdr, &dp->dl_stateid);
+
+ p = xdr_reserve_space(xdr, 4);
+ *p++ = xdr_zero; /* truncate */
-static int
-decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
- __be32 *p;
- u32 taglen;
+ encode_nfs_fh4(xdr, &dp->dl_fh);
- READ_BUF(8);
- READ32(hdr->status);
- /* We've got no use for the tag; ignore it: */
- READ32(taglen);
- READ_BUF(taglen + 4);
- p += XDR_QUADLEN(taglen);
- READ32(hdr->nops);
- return 0;
+ hdr->nops++;
}
-static int
-decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+/*
+ * CB_SEQUENCE4args
+ *
+ * struct CB_SEQUENCE4args {
+ * sessionid4 csa_sessionid;
+ * sequenceid4 csa_sequenceid;
+ * slotid4 csa_slotid;
+ * slotid4 csa_highest_slotid;
+ * bool csa_cachethis;
+ * referring_call_list4 csa_referring_call_lists<>;
+ * };
+ */
+static void encode_cb_sequence4args(struct xdr_stream *xdr,
+ const struct nfsd4_callback *cb,
+ struct nfs4_cb_compound_hdr *hdr)
{
+ struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
__be32 *p;
- u32 op;
- int32_t nfserr;
-
- READ_BUF(8);
- READ32(op);
- if (op != expected) {
- dprintk("NFSD: decode_cb_op_hdr: Callback server returned "
- " operation %d but we issued a request for %d\n",
- op, expected);
- return -EIO;
- }
- READ32(nfserr);
- if (nfserr != NFS_OK)
- return -nfs_cb_stat_to_errno(nfserr);
- return 0;
+
+ if (hdr->minorversion == 0)
+ return;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
+ encode_sessionid4(xdr, session);
+
+ p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
+ *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
+ *p++ = xdr_zero; /* csa_slotid */
+ *p++ = xdr_zero; /* csa_highest_slotid */
+ *p++ = xdr_zero; /* csa_cachethis */
+ xdr_encode_empty_array(p); /* csa_referring_call_lists */
+
+ hdr->nops++;
}
/*
+ * CB_SEQUENCE4resok
+ *
+ * struct CB_SEQUENCE4resok {
+ * sessionid4 csr_sessionid;
+ * sequenceid4 csr_sequenceid;
+ * slotid4 csr_slotid;
+ * slotid4 csr_highest_slotid;
+ * slotid4 csr_target_highest_slotid;
+ * };
+ *
+ * union CB_SEQUENCE4res switch (nfsstat4 csr_status) {
+ * case NFS4_OK:
+ * CB_SEQUENCE4resok csr_resok4;
+ * default:
+ * void;
+ * };
+ *
* Our current back channel implmentation supports a single backchannel
* with a single slot.
*/
-static int
-decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
- struct rpc_rqst *rqstp)
+static int decode_cb_sequence4resok(struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
{
- struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
+ struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
struct nfs4_sessionid id;
int status;
- u32 dummy;
__be32 *p;
+ u32 dummy;
- if (cb->cb_minorversion == 0)
- return 0;
-
- status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
- if (status)
- return status;
+ status = -ESERVERFAULT;
/*
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
- status = -ESERVERFAULT;
-
- READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
+ if (unlikely(p == NULL))
+ goto out_overflow;
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
- p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
- if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
- dprintk("%s Invalid session id\n", __func__);
+ if (memcmp(id.data, session->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN) != 0) {
+ dprintk("NFS: %s Invalid session id\n", __func__);
goto out;
}
- READ32(dummy);
- if (dummy != ses->se_cb_seq_nr) {
- dprintk("%s Invalid sequence number\n", __func__);
+ p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
+
+ dummy = be32_to_cpup(p++);
+ if (dummy != session->se_cb_seq_nr) {
+ dprintk("NFS: %s Invalid sequence number\n", __func__);
goto out;
}
- READ32(dummy); /* slotid must be 0 */
+
+ dummy = be32_to_cpup(p++);
if (dummy != 0) {
- dprintk("%s Invalid slotid\n", __func__);
+ dprintk("NFS: %s Invalid slotid\n", __func__);
goto out;
}
- /* FIXME: process highest slotid and target highest slotid */
+
+ /*
+ * FIXME: process highest slotid and target highest slotid
+ */
status = 0;
out:
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
+static int decode_cb_sequence4res(struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
+{
+ enum nfsstat4 nfserr;
+ int status;
+
+ if (cb->cb_minorversion == 0)
+ return 0;
+
+ status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
+ if (unlikely(status))
+ goto out;
+ if (unlikely(nfserr != NFS4_OK))
+ goto out_default;
+ status = decode_cb_sequence4resok(xdr, cb);
+out:
+ return status;
+out_default:
+ return nfs_cb_stat_to_errno(status);
+}
-static int
-nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
+/*
+ * NFSv4.0 and NFSv4.1 XDR encode functions
+ *
+ * NFSv4.0 callback argument types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section 20
+ * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
+ * Protocol".
+ */
+
+/*
+ * NB: Without this zero space reservation, callbacks over krb5p fail
+ */
+static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *__unused)
+{
+ xdr_reserve_space(xdr, 0);
+}
+
+/*
+ * 20.2. Operation 4: CB_RECALL - Recall a Delegation
+ */
+static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct nfsd4_callback *cb)
+{
+ const struct nfs4_delegation *args = cb->cb_op;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_minorversion,
+ };
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recall4args(xdr, args, &hdr);
+ encode_cb_nops(&hdr);
+}
+
+
+/*
+ * NFSv4.0 and NFSv4.1 XDR decode functions
+ *
+ * NFSv4.0 callback result types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section 20
+ * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1
+ * Protocol".
+ */
+
+static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *__unused)
{
return 0;
}
-static int
-nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
- struct nfsd4_callback *cb)
+/*
+ * 20.2. Operation 4: CB_RECALL - Recall a Delegation
+ */
+static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
{
- struct xdr_stream xdr;
struct nfs4_cb_compound_hdr hdr;
+ enum nfsstat4 nfserr;
int status;
- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
- status = decode_cb_compound_hdr(&xdr, &hdr);
- if (status)
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
goto out;
- if (cb) {
- status = decode_cb_sequence(&xdr, cb, rqstp);
- if (status)
+
+ if (cb != NULL) {
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status))
goto out;
}
- status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
+
+ status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
+ if (unlikely(status))
+ goto out;
+ if (unlikely(nfserr != NFS4_OK))
+ goto out_default;
out:
return status;
+out_default:
+ return nfs_cb_stat_to_errno(status);
}
/*
* RPC procedure tables
*/
-#define PROC(proc, call, argtype, restype) \
-[NFSPROC4_CLNT_##proc] = { \
- .p_proc = NFSPROC4_CB_##call, \
- .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
- .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
- .p_arglen = NFS4_##argtype##_sz, \
- .p_replen = NFS4_##restype##_sz, \
- .p_statidx = NFSPROC4_CB_##call, \
- .p_name = #proc, \
-}
-
-static struct rpc_procinfo nfs4_cb_procedures[] = {
- PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null),
- PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall),
+#define PROC(proc, call, argtype, restype) \
+[NFSPROC4_CLNT_##proc] = { \
+ .p_proc = NFSPROC4_CB_##call, \
+ .p_encode = (kxdreproc_t)nfs4_xdr_enc_##argtype, \
+ .p_decode = (kxdrdproc_t)nfs4_xdr_dec_##restype, \
+ .p_arglen = NFS4_enc_##argtype##_sz, \
+ .p_replen = NFS4_dec_##restype##_sz, \
+ .p_statidx = NFSPROC4_CB_##call, \
+ .p_name = #proc, \
+}
+
+static struct rpc_procinfo nfs4_cb_procedures[] = {
+ PROC(CB_NULL, NULL, cb_null, cb_null),
+ PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall),
};
-static struct rpc_version nfs_cb_version4 = {
+static struct rpc_version nfs_cb_version4 = {
/*
* Note on the callback rpc program version number: despite language in rfc
* 5661 section 18.36.3 requiring servers to use 4 in this field, the
@@ -440,29 +598,29 @@ static struct rpc_version nfs_cb_version4 = {
* in practice that appears to be what implementations use. The section
* 18.36.3 language is expected to be fixed in an erratum.
*/
- .number = 1,
- .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
- .procs = nfs4_cb_procedures
+ .number = 1,
+ .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
+ .procs = nfs4_cb_procedures
};
-static struct rpc_version * nfs_cb_version[] = {
+static struct rpc_version *nfs_cb_version[] = {
&nfs_cb_version4,
};
static struct rpc_program cb_program;
static struct rpc_stat cb_stats = {
- .program = &cb_program
+ .program = &cb_program
};
#define NFS4_CALLBACK 0x40000000
static struct rpc_program cb_program = {
- .name = "nfs4_cb",
- .number = NFS4_CALLBACK,
- .nrvers = ARRAY_SIZE(nfs_cb_version),
- .version = nfs_cb_version,
- .stats = &cb_stats,
- .pipe_dir_name = "/nfsd4_cb",
+ .name = "nfs4_cb",
+ .number = NFS4_CALLBACK,
+ .nrvers = ARRAY_SIZE(nfs_cb_version),
+ .version = nfs_cb_version,
+ .stats = &cb_stats,
+ .pipe_dir_name = "/nfsd4_cb",
};
static int max_cb_time(void)
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 0d84066..ab152c0 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -51,7 +51,7 @@ config OCFS2_FS_USERSPACE_CLUSTER
config OCFS2_FS_STATS
bool "OCFS2 statistics"
- depends on OCFS2_FS
+ depends on OCFS2_FS && DEBUG_FS
default y
help
This option allows some fs statistics to be captured. Enabling
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 592fae5..e4984e2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -565,7 +565,6 @@ static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
return ret;
}
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
struct ocfs2_extent_block *eb);
static void ocfs2_adjust_rightmost_records(handle_t *handle,
@@ -5858,6 +5857,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
ocfs2_journal_dirty(handle, tl_bh);
+ osb->truncated_clusters += num_clusters;
bail:
mlog_exit(status);
return status;
@@ -5929,6 +5929,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
i--;
}
+ osb->truncated_clusters = 0;
+
bail:
mlog_exit(status);
return status;
@@ -7139,64 +7141,6 @@ bail:
}
/*
- * Expects the inode to already be locked.
- */
-int ocfs2_prepare_truncate(struct ocfs2_super *osb,
- struct inode *inode,
- struct buffer_head *fe_bh,
- struct ocfs2_truncate_context **tc)
-{
- int status;
- unsigned int new_i_clusters;
- struct ocfs2_dinode *fe;
- struct ocfs2_extent_block *eb;
- struct buffer_head *last_eb_bh = NULL;
-
- mlog_entry_void();
-
- *tc = NULL;
-
- new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
- i_size_read(inode));
- fe = (struct ocfs2_dinode *) fe_bh->b_data;
-
- mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
- "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters,
- (unsigned long long)le64_to_cpu(fe->i_size));
-
- *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
- if (!(*tc)) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail;
- }
- ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
-
- if (fe->id2.i_list.l_tree_depth) {
- status = ocfs2_read_extent_block(INODE_CACHE(inode),
- le64_to_cpu(fe->i_last_eb_blk),
- &last_eb_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
- }
-
- (*tc)->tc_last_eb_bh = last_eb_bh;
-
- status = 0;
-bail:
- if (status < 0) {
- if (*tc)
- ocfs2_free_truncate_context(*tc);
- *tc = NULL;
- }
- mlog_exit_void();
- return status;
-}
-
-/*
* 'start' is inclusive, 'end' is not.
*/
int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
@@ -7270,18 +7214,3 @@ out_commit:
out:
return ret;
}
-
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
-{
- /*
- * The caller is responsible for completing deallocation
- * before freeing the context.
- */
- if (tc->tc_dealloc.c_first_suballocator != NULL)
- mlog(ML_NOTICE,
- "Truncate completion has non-empty dealloc context\n");
-
- brelse(tc->tc_last_eb_bh);
-
- kfree(tc);
-}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 55762b5..3bd08a0 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -228,10 +228,6 @@ struct ocfs2_truncate_context {
int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
u64 range_start, u64 range_end);
-int ocfs2_prepare_truncate(struct ocfs2_super *osb,
- struct inode *inode,
- struct buffer_head *fe_bh,
- struct ocfs2_truncate_context **tc);
int ocfs2_commit_truncate(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *di_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0d7c554..1fbb0e2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1630,6 +1630,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
return ret;
}
+/*
+ * Try to flush truncate logs if we can free enough clusters from it.
+ * As for return value, "< 0" means error, "0" no space and "1" means
+ * we have freed enough spaces and let the caller try to allocate again.
+ */
+static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
+ unsigned int needed)
+{
+ tid_t target;
+ int ret = 0;
+ unsigned int truncated_clusters;
+
+ mutex_lock(&osb->osb_tl_inode->i_mutex);
+ truncated_clusters = osb->truncated_clusters;
+ mutex_unlock(&osb->osb_tl_inode->i_mutex);
+
+ /*
+ * Check whether we can succeed in allocating if we free
+ * the truncate log.
+ */
+ if (truncated_clusters < needed)
+ goto out;
+
+ ret = ocfs2_flush_truncate_log(osb);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
+ jbd2_log_wait_commit(osb->journal->j_journal, target);
+ ret = 1;
+ }
+out:
+ return ret;
+}
+
int ocfs2_write_begin_nolock(struct file *filp,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
@@ -1637,7 +1674,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
struct buffer_head *di_bh, struct page *mmap_page)
{
int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
- unsigned int clusters_to_alloc, extents_to_split;
+ unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
struct ocfs2_write_ctxt *wc;
struct inode *inode = mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -1646,7 +1683,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle;
struct ocfs2_extent_tree et;
+ int try_free = 1, ret1;
+try_again:
ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
if (ret) {
mlog_errno(ret);
@@ -1681,6 +1720,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
mlog_errno(ret);
goto out;
} else if (ret == 1) {
+ clusters_need = wc->w_clen;
ret = ocfs2_refcount_cow(inode, filp, di_bh,
wc->w_cpos, wc->w_clen, UINT_MAX);
if (ret) {
@@ -1695,6 +1735,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
mlog_errno(ret);
goto out;
}
+ clusters_need += clusters_to_alloc;
di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
@@ -1817,6 +1858,22 @@ out:
ocfs2_free_alloc_context(data_ac);
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
+
+ if (ret == -ENOSPC && try_free) {
+ /*
+ * Try to free some truncate log so that we can have enough
+ * clusters to allocate.
+ */
+ try_free = 0;
+
+ ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
+ if (ret1 == 1)
+ goto try_again;
+
+ if (ret1 < 0)
+ mlog_errno(ret1);
+ }
+
return ret;
}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9e3d45b..a6cc053 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -82,6 +82,7 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
#define O2HB_DB_TYPE_REGION_LIVENODES 4
#define O2HB_DB_TYPE_REGION_NUMBER 5
#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
+#define O2HB_DB_TYPE_REGION_PINNED 7
struct o2hb_debug_buf {
int db_type;
int db_size;
@@ -101,6 +102,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
#define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
#define O2HB_DEBUG_REGION_NUMBER "num"
#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
+#define O2HB_DEBUG_REGION_PINNED "pinned"
static struct dentry *o2hb_debug_dir;
static struct dentry *o2hb_debug_livenodes;
@@ -132,6 +134,33 @@ char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
+/*
+ * o2hb_dependent_users tracks the number of registered callbacks that depend
+ * on heartbeat. o2net and o2dlm are two entities that register this callback.
+ * However only o2dlm depends on the heartbeat. It does not want the heartbeat
+ * to stop while a dlm domain is still active.
+ */
+unsigned int o2hb_dependent_users;
+
+/*
+ * In global heartbeat mode, all regions are pinned if there are one or more
+ * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
+ * regions are unpinned if the region count exceeds the cut off or the number
+ * of dependent users falls to zero.
+ */
+#define O2HB_PIN_CUT_OFF 3
+
+/*
+ * In local heartbeat mode, we assume the dlm domain name to be the same as
+ * region uuid. This is true for domains created for the file system but not
+ * necessarily true for userdlm domains. This is a known limitation.
+ *
+ * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
+ * works for both file system and userdlm domains.
+ */
+static int o2hb_region_pin(const char *region_uuid);
+static void o2hb_region_unpin(const char *region_uuid);
+
/* Only sets a new threshold if there are no active regions.
*
* No locking or otherwise interesting code is required for reading
@@ -186,7 +215,9 @@ struct o2hb_region {
struct config_item hr_item;
struct list_head hr_all_item;
- unsigned hr_unclean_stop:1;
+ unsigned hr_unclean_stop:1,
+ hr_item_pinned:1,
+ hr_item_dropped:1;
/* protected by the hr_callback_sem */
struct task_struct *hr_task;
@@ -212,9 +243,11 @@ struct o2hb_region {
struct dentry *hr_debug_livenodes;
struct dentry *hr_debug_regnum;
struct dentry *hr_debug_elapsed_time;
+ struct dentry *hr_debug_pinned;
struct o2hb_debug_buf *hr_db_livenodes;
struct o2hb_debug_buf *hr_db_regnum;
struct o2hb_debug_buf *hr_db_elapsed_time;
+ struct o2hb_debug_buf *hr_db_pinned;
/* let the person setting up hb wait for it to return until it
* has reached a 'steady' state. This will be fixed when we have
@@ -701,6 +734,14 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
config_item_name(&reg->hr_item));
set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
+
+ /*
+ * If global heartbeat active, unpin all regions if the
+ * region count > CUT_OFF
+ */
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
+ o2hb_region_unpin(NULL);
}
static int o2hb_check_slot(struct o2hb_region *reg,
@@ -1041,6 +1082,9 @@ static int o2hb_thread(void *data)
set_user_nice(current, -20);
+ /* Pin node */
+ o2nm_depend_this_node();
+
while (!kthread_should_stop() && !reg->hr_unclean_stop) {
/* We track the time spent inside
* o2hb_do_disk_heartbeat so that we avoid more than
@@ -1090,6 +1134,9 @@ static int o2hb_thread(void *data)
mlog_errno(ret);
}
+ /* Unpin node */
+ o2nm_undepend_this_node();
+
mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
return 0;
@@ -1142,6 +1189,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
reg->hr_last_timeout_start));
goto done;
+ case O2HB_DB_TYPE_REGION_PINNED:
+ reg = (struct o2hb_region *)db->db_data;
+ out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
+ !!reg->hr_item_pinned);
+ goto done;
+
default:
goto done;
}
@@ -1315,6 +1368,8 @@ int o2hb_init(void)
memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
+ o2hb_dependent_users = 0;
+
return o2hb_debug_init();
}
@@ -1384,6 +1439,7 @@ static void o2hb_region_release(struct config_item *item)
debugfs_remove(reg->hr_debug_livenodes);
debugfs_remove(reg->hr_debug_regnum);
debugfs_remove(reg->hr_debug_elapsed_time);
+ debugfs_remove(reg->hr_debug_pinned);
debugfs_remove(reg->hr_debug_dir);
spin_lock(&o2hb_live_lock);
@@ -1948,6 +2004,18 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
goto bail;
}
+ reg->hr_debug_pinned =
+ o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
+ reg->hr_debug_dir,
+ &(reg->hr_db_pinned),
+ sizeof(*(reg->hr_db_pinned)),
+ O2HB_DB_TYPE_REGION_PINNED,
+ 0, 0, reg);
+ if (!reg->hr_debug_pinned) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
ret = 0;
bail:
return ret;
@@ -2002,15 +2070,20 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
{
struct task_struct *hb_task;
struct o2hb_region *reg = to_o2hb_region(item);
+ int quorum_region = 0;
/* stop the thread when the user removes the region dir */
spin_lock(&o2hb_live_lock);
if (o2hb_global_heartbeat_active()) {
clear_bit(reg->hr_region_num, o2hb_region_bitmap);
clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
+ if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
+ quorum_region = 1;
+ clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
}
hb_task = reg->hr_task;
reg->hr_task = NULL;
+ reg->hr_item_dropped = 1;
spin_unlock(&o2hb_live_lock);
if (hb_task)
@@ -2028,7 +2101,27 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
if (o2hb_global_heartbeat_active())
printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
config_item_name(&reg->hr_item));
+
config_item_put(item);
+
+ if (!o2hb_global_heartbeat_active() || !quorum_region)
+ return;
+
+ /*
+ * If global heartbeat active and there are dependent users,
+ * pin all regions if quorum region count <= CUT_OFF
+ */
+ spin_lock(&o2hb_live_lock);
+
+ if (!o2hb_dependent_users)
+ goto unlock;
+
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+ o2hb_region_pin(NULL);
+
+unlock:
+ spin_unlock(&o2hb_live_lock);
}
struct o2hb_heartbeat_group_attribute {
@@ -2214,63 +2307,138 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
}
EXPORT_SYMBOL_GPL(o2hb_setup_callback);
-static struct o2hb_region *o2hb_find_region(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only pin the matching region. In global we pin all the active
+ * regions.
+ */
+static int o2hb_region_pin(const char *region_uuid)
{
- struct o2hb_region *p, *reg = NULL;
+ int ret = 0, found = 0;
+ struct o2hb_region *reg;
+ char *uuid;
assert_spin_locked(&o2hb_live_lock);
- list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
- if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
- reg = p;
- break;
+ list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ uuid = config_item_name(&reg->hr_item);
+
+ /* local heartbeat */
+ if (region_uuid) {
+ if (strcmp(region_uuid, uuid))
+ continue;
+ found = 1;
+ }
+
+ if (reg->hr_item_pinned || reg->hr_item_dropped)
+ goto skip_pin;
+
+ /* Ignore ENOENT only for local hb (userdlm domain) */
+ ret = o2nm_depend_item(&reg->hr_item);
+ if (!ret) {
+ mlog(ML_CLUSTER, "Pin region %s\n", uuid);
+ reg->hr_item_pinned = 1;
+ } else {
+ if (ret == -ENOENT && found)
+ ret = 0;
+ else {
+ mlog(ML_ERROR, "Pin region %s fails with %d\n",
+ uuid, ret);
+ break;
+ }
}
+skip_pin:
+ if (found)
+ break;
}
- return reg;
+ return ret;
}
-static int o2hb_region_get(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only unpin the matching region. In global we unpin all the
+ * active regions.
+ */
+static void o2hb_region_unpin(const char *region_uuid)
{
- int ret = 0;
struct o2hb_region *reg;
+ char *uuid;
+ int found = 0;
- spin_lock(&o2hb_live_lock);
+ assert_spin_locked(&o2hb_live_lock);
- reg = o2hb_find_region(region_uuid);
- if (!reg)
- ret = -ENOENT;
- spin_unlock(&o2hb_live_lock);
+ list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ uuid = config_item_name(&reg->hr_item);
+ if (region_uuid) {
+ if (strcmp(region_uuid, uuid))
+ continue;
+ found = 1;
+ }
- if (ret)
- goto out;
+ if (reg->hr_item_pinned) {
+ mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
+ o2nm_undepend_item(&reg->hr_item);
+ reg->hr_item_pinned = 0;
+ }
+ if (found)
+ break;
+ }
+}
- ret = o2nm_depend_this_node();
- if (ret)
- goto out;
+static int o2hb_region_inc_user(const char *region_uuid)
+{
+ int ret = 0;
- ret = o2nm_depend_item(&reg->hr_item);
- if (ret)
- o2nm_undepend_this_node();
+ spin_lock(&o2hb_live_lock);
-out:
+ /* local heartbeat */
+ if (!o2hb_global_heartbeat_active()) {
+ ret = o2hb_region_pin(region_uuid);
+ goto unlock;
+ }
+
+ /*
+ * if global heartbeat active and this is the first dependent user,
+ * pin all regions if quorum region count <= CUT_OFF
+ */
+ o2hb_dependent_users++;
+ if (o2hb_dependent_users > 1)
+ goto unlock;
+
+ if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+ ret = o2hb_region_pin(NULL);
+
+unlock:
+ spin_unlock(&o2hb_live_lock);
return ret;
}
-static void o2hb_region_put(const char *region_uuid)
+void o2hb_region_dec_user(const char *region_uuid)
{
- struct o2hb_region *reg;
-
spin_lock(&o2hb_live_lock);
- reg = o2hb_find_region(region_uuid);
+ /* local heartbeat */
+ if (!o2hb_global_heartbeat_active()) {
+ o2hb_region_unpin(region_uuid);
+ goto unlock;
+ }
- spin_unlock(&o2hb_live_lock);
+ /*
+ * if global heartbeat active and there are no dependent users,
+ * unpin all quorum regions
+ */
+ o2hb_dependent_users--;
+ if (!o2hb_dependent_users)
+ o2hb_region_unpin(NULL);
- if (reg) {
- o2nm_undepend_item(&reg->hr_item);
- o2nm_undepend_this_node();
- }
+unlock:
+ spin_unlock(&o2hb_live_lock);
}
int o2hb_register_callback(const char *region_uuid,
@@ -2291,9 +2459,11 @@ int o2hb_register_callback(const char *region_uuid,
}
if (region_uuid) {
- ret = o2hb_region_get(region_uuid);
- if (ret)
+ ret = o2hb_region_inc_user(region_uuid);
+ if (ret) {
+ mlog_errno(ret);
goto out;
+ }
}
down_write(&o2hb_callback_sem);
@@ -2311,7 +2481,7 @@ int o2hb_register_callback(const char *region_uuid,
up_write(&o2hb_callback_sem);
ret = 0;
out:
- mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n",
+ mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
ret, __builtin_return_address(0), hc);
return ret;
}
@@ -2322,7 +2492,7 @@ void o2hb_unregister_callback(const char *region_uuid,
{
BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
- mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
+ mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
__builtin_return_address(0), hc);
/* XXX Can this happen _with_ a region reference? */
@@ -2330,7 +2500,7 @@ void o2hb_unregister_callback(const char *region_uuid,
return;
if (region_uuid)
- o2hb_region_put(region_uuid);
+ o2hb_region_dec_user(region_uuid);
down_write(&o2hb_callback_sem);
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index a3f150e..3a583590 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -46,10 +46,15 @@
#define O2NET_DEBUG_DIR "o2net"
#define SC_DEBUG_NAME "sock_containers"
#define NST_DEBUG_NAME "send_tracking"
+#define STATS_DEBUG_NAME "stats"
+
+#define SHOW_SOCK_CONTAINERS 0
+#define SHOW_SOCK_STATS 1
static struct dentry *o2net_dentry;
static struct dentry *sc_dentry;
static struct dentry *nst_dentry;
+static struct dentry *stats_dentry;
static DEFINE_SPINLOCK(o2net_debug_lock);
@@ -123,37 +128,42 @@ static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static int nst_seq_show(struct seq_file *seq, void *v)
{
struct o2net_send_tracking *nst, *dummy_nst = seq->private;
+ ktime_t now;
+ s64 sock, send, status;
spin_lock(&o2net_debug_lock);
nst = next_nst(dummy_nst);
+ if (!nst)
+ goto out;
- if (nst != NULL) {
- /* get_task_comm isn't exported. oh well. */
- seq_printf(seq, "%p:\n"
- " pid: %lu\n"
- " tgid: %lu\n"
- " process name: %s\n"
- " node: %u\n"
- " sc: %p\n"
- " message id: %d\n"
- " message type: %u\n"
- " message key: 0x%08x\n"
- " sock acquiry: %lu.%ld\n"
- " send start: %lu.%ld\n"
- " wait start: %lu.%ld\n",
- nst, (unsigned long)nst->st_task->pid,
- (unsigned long)nst->st_task->tgid,
- nst->st_task->comm, nst->st_node,
- nst->st_sc, nst->st_id, nst->st_msg_type,
- nst->st_msg_key,
- nst->st_sock_time.tv_sec,
- (long)nst->st_sock_time.tv_usec,
- nst->st_send_time.tv_sec,
- (long)nst->st_send_time.tv_usec,
- nst->st_status_time.tv_sec,
- (long)nst->st_status_time.tv_usec);
- }
+ now = ktime_get();
+ sock = ktime_to_us(ktime_sub(now, nst->st_sock_time));
+ send = ktime_to_us(ktime_sub(now, nst->st_send_time));
+ status = ktime_to_us(ktime_sub(now, nst->st_status_time));
+
+ /* get_task_comm isn't exported. oh well. */
+ seq_printf(seq, "%p:\n"
+ " pid: %lu\n"
+ " tgid: %lu\n"
+ " process name: %s\n"
+ " node: %u\n"
+ " sc: %p\n"
+ " message id: %d\n"
+ " message type: %u\n"
+ " message key: 0x%08x\n"
+ " sock acquiry: %lld usecs ago\n"
+ " send start: %lld usecs ago\n"
+ " wait start: %lld usecs ago\n",
+ nst, (unsigned long)task_pid_nr(nst->st_task),
+ (unsigned long)nst->st_task->tgid,
+ nst->st_task->comm, nst->st_node,
+ nst->st_sc, nst->st_id, nst->st_msg_type,
+ nst->st_msg_key,
+ (long long)sock,
+ (long long)send,
+ (long long)status);
+out:
spin_unlock(&o2net_debug_lock);
return 0;
@@ -228,6 +238,11 @@ void o2net_debug_del_sc(struct o2net_sock_container *sc)
spin_unlock(&o2net_debug_lock);
}
+struct o2net_sock_debug {
+ int dbg_ctxt;
+ struct o2net_sock_container *dbg_sock;
+};
+
static struct o2net_sock_container
*next_sc(struct o2net_sock_container *sc_start)
{
@@ -253,7 +268,8 @@ static struct o2net_sock_container
static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct o2net_sock_container *sc, *dummy_sc = seq->private;
+ struct o2net_sock_debug *sd = seq->private;
+ struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
spin_lock(&o2net_debug_lock);
sc = next_sc(dummy_sc);
@@ -264,7 +280,8 @@ static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct o2net_sock_container *sc, *dummy_sc = seq->private;
+ struct o2net_sock_debug *sd = seq->private;
+ struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
spin_lock(&o2net_debug_lock);
sc = next_sc(dummy_sc);
@@ -276,65 +293,107 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return sc; /* unused, just needs to be null when done */
}
-#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec
+#ifdef CONFIG_OCFS2_FS_STATS
+# define sc_send_count(_s) ((_s)->sc_send_count)
+# define sc_recv_count(_s) ((_s)->sc_recv_count)
+# define sc_tv_acquiry_total_ns(_s) (ktime_to_ns((_s)->sc_tv_acquiry_total))
+# define sc_tv_send_total_ns(_s) (ktime_to_ns((_s)->sc_tv_send_total))
+# define sc_tv_status_total_ns(_s) (ktime_to_ns((_s)->sc_tv_status_total))
+# define sc_tv_process_total_ns(_s) (ktime_to_ns((_s)->sc_tv_process_total))
+#else
+# define sc_send_count(_s) (0U)
+# define sc_recv_count(_s) (0U)
+# define sc_tv_acquiry_total_ns(_s) (0LL)
+# define sc_tv_send_total_ns(_s) (0LL)
+# define sc_tv_status_total_ns(_s) (0LL)
+# define sc_tv_process_total_ns(_s) (0LL)
+#endif
+
+/* So that debugfs.ocfs2 can determine which format is being used */
+#define O2NET_STATS_STR_VERSION 1
+static void sc_show_sock_stats(struct seq_file *seq,
+ struct o2net_sock_container *sc)
+{
+ if (!sc)
+ return;
+
+ seq_printf(seq, "%d,%u,%lu,%lld,%lld,%lld,%lu,%lld\n", O2NET_STATS_STR_VERSION,
+ sc->sc_node->nd_num, (unsigned long)sc_send_count(sc),
+ (long long)sc_tv_acquiry_total_ns(sc),
+ (long long)sc_tv_send_total_ns(sc),
+ (long long)sc_tv_status_total_ns(sc),
+ (unsigned long)sc_recv_count(sc),
+ (long long)sc_tv_process_total_ns(sc));
+}
+
+static void sc_show_sock_container(struct seq_file *seq,
+ struct o2net_sock_container *sc)
+{
+ struct inet_sock *inet = NULL;
+ __be32 saddr = 0, daddr = 0;
+ __be16 sport = 0, dport = 0;
+
+ if (!sc)
+ return;
+
+ if (sc->sc_sock) {
+ inet = inet_sk(sc->sc_sock->sk);
+ /* the stack's structs aren't sparse endian clean */
+ saddr = (__force __be32)inet->inet_saddr;
+ daddr = (__force __be32)inet->inet_daddr;
+ sport = (__force __be16)inet->inet_sport;
+ dport = (__force __be16)inet->inet_dport;
+ }
+
+ /* XXX sigh, inet-> doesn't have sparse annotation so any
+ * use of it here generates a warning with -Wbitwise */
+ seq_printf(seq, "%p:\n"
+ " krefs: %d\n"
+ " sock: %pI4:%u -> "
+ "%pI4:%u\n"
+ " remote node: %s\n"
+ " page off: %zu\n"
+ " handshake ok: %u\n"
+ " timer: %lld usecs\n"
+ " data ready: %lld usecs\n"
+ " advance start: %lld usecs\n"
+ " advance stop: %lld usecs\n"
+ " func start: %lld usecs\n"
+ " func stop: %lld usecs\n"
+ " func key: 0x%08x\n"
+ " func type: %u\n",
+ sc,
+ atomic_read(&sc->sc_kref.refcount),
+ &saddr, inet ? ntohs(sport) : 0,
+ &daddr, inet ? ntohs(dport) : 0,
+ sc->sc_node->nd_name,
+ sc->sc_page_off,
+ sc->sc_handshake_ok,
+ (long long)ktime_to_us(sc->sc_tv_timer),
+ (long long)ktime_to_us(sc->sc_tv_data_ready),
+ (long long)ktime_to_us(sc->sc_tv_advance_start),
+ (long long)ktime_to_us(sc->sc_tv_advance_stop),
+ (long long)ktime_to_us(sc->sc_tv_func_start),
+ (long long)ktime_to_us(sc->sc_tv_func_stop),
+ sc->sc_msg_key,
+ sc->sc_msg_type);
+}
static int sc_seq_show(struct seq_file *seq, void *v)
{
- struct o2net_sock_container *sc, *dummy_sc = seq->private;
+ struct o2net_sock_debug *sd = seq->private;
+ struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
spin_lock(&o2net_debug_lock);
sc = next_sc(dummy_sc);
- if (sc != NULL) {
- struct inet_sock *inet = NULL;
-
- __be32 saddr = 0, daddr = 0;
- __be16 sport = 0, dport = 0;
-
- if (sc->sc_sock) {
- inet = inet_sk(sc->sc_sock->sk);
- /* the stack's structs aren't sparse endian clean */
- saddr = (__force __be32)inet->inet_saddr;
- daddr = (__force __be32)inet->inet_daddr;
- sport = (__force __be16)inet->inet_sport;
- dport = (__force __be16)inet->inet_dport;
- }
-
- /* XXX sigh, inet-> doesn't have sparse annotation so any
- * use of it here generates a warning with -Wbitwise */
- seq_printf(seq, "%p:\n"
- " krefs: %d\n"
- " sock: %pI4:%u -> "
- "%pI4:%u\n"
- " remote node: %s\n"
- " page off: %zu\n"
- " handshake ok: %u\n"
- " timer: %lu.%ld\n"
- " data ready: %lu.%ld\n"
- " advance start: %lu.%ld\n"
- " advance stop: %lu.%ld\n"
- " func start: %lu.%ld\n"
- " func stop: %lu.%ld\n"
- " func key: %u\n"
- " func type: %u\n",
- sc,
- atomic_read(&sc->sc_kref.refcount),
- &saddr, inet ? ntohs(sport) : 0,
- &daddr, inet ? ntohs(dport) : 0,
- sc->sc_node->nd_name,
- sc->sc_page_off,
- sc->sc_handshake_ok,
- TV_SEC_USEC(sc->sc_tv_timer),
- TV_SEC_USEC(sc->sc_tv_data_ready),
- TV_SEC_USEC(sc->sc_tv_advance_start),
- TV_SEC_USEC(sc->sc_tv_advance_stop),
- TV_SEC_USEC(sc->sc_tv_func_start),
- TV_SEC_USEC(sc->sc_tv_func_stop),
- sc->sc_msg_key,
- sc->sc_msg_type);
+ if (sc) {
+ if (sd->dbg_ctxt == SHOW_SOCK_CONTAINERS)
+ sc_show_sock_container(seq, sc);
+ else
+ sc_show_sock_stats(seq, sc);
}
-
spin_unlock(&o2net_debug_lock);
return 0;
@@ -351,7 +410,7 @@ static const struct seq_operations sc_seq_ops = {
.show = sc_seq_show,
};
-static int sc_fop_open(struct inode *inode, struct file *file)
+static int sc_common_open(struct file *file, struct o2net_sock_debug *sd)
{
struct o2net_sock_container *dummy_sc;
struct seq_file *seq;
@@ -369,7 +428,8 @@ static int sc_fop_open(struct inode *inode, struct file *file)
goto out;
seq = file->private_data;
- seq->private = dummy_sc;
+ seq->private = sd;
+ sd->dbg_sock = dummy_sc;
o2net_debug_add_sc(dummy_sc);
dummy_sc = NULL;
@@ -382,12 +442,48 @@ out:
static int sc_fop_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
- struct o2net_sock_container *dummy_sc = seq->private;
+ struct o2net_sock_debug *sd = seq->private;
+ struct o2net_sock_container *dummy_sc = sd->dbg_sock;
o2net_debug_del_sc(dummy_sc);
return seq_release_private(inode, file);
}
+static int stats_fop_open(struct inode *inode, struct file *file)
+{
+ struct o2net_sock_debug *sd;
+
+ sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
+ if (sd == NULL)
+ return -ENOMEM;
+
+ sd->dbg_ctxt = SHOW_SOCK_STATS;
+ sd->dbg_sock = NULL;
+
+ return sc_common_open(file, sd);
+}
+
+static const struct file_operations stats_seq_fops = {
+ .open = stats_fop_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = sc_fop_release,
+};
+
+static int sc_fop_open(struct inode *inode, struct file *file)
+{
+ struct o2net_sock_debug *sd;
+
+ sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
+ if (sd == NULL)
+ return -ENOMEM;
+
+ sd->dbg_ctxt = SHOW_SOCK_CONTAINERS;
+ sd->dbg_sock = NULL;
+
+ return sc_common_open(file, sd);
+}
+
static const struct file_operations sc_seq_fops = {
.open = sc_fop_open,
.read = seq_read,
@@ -419,25 +515,29 @@ int o2net_debugfs_init(void)
goto bail;
}
+ stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR,
+ o2net_dentry, NULL,
+ &stats_seq_fops);
+ if (!stats_dentry) {
+ mlog_errno(-ENOMEM);
+ goto bail;
+ }
+
return 0;
bail:
- if (sc_dentry)
- debugfs_remove(sc_dentry);
- if (nst_dentry)
- debugfs_remove(nst_dentry);
- if (o2net_dentry)
- debugfs_remove(o2net_dentry);
+ debugfs_remove(stats_dentry);
+ debugfs_remove(sc_dentry);
+ debugfs_remove(nst_dentry);
+ debugfs_remove(o2net_dentry);
return -ENOMEM;
}
void o2net_debugfs_exit(void)
{
- if (sc_dentry)
- debugfs_remove(sc_dentry);
- if (nst_dentry)
- debugfs_remove(nst_dentry);
- if (o2net_dentry)
- debugfs_remove(o2net_dentry);
+ debugfs_remove(stats_dentry);
+ debugfs_remove(sc_dentry);
+ debugfs_remove(nst_dentry);
+ debugfs_remove(o2net_dentry);
}
#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9aa426e..3b11cb1 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -153,63 +153,114 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
nst->st_node = node;
}
-static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
{
- do_gettimeofday(&nst->st_sock_time);
+ nst->st_sock_time = ktime_get();
}
-static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
{
- do_gettimeofday(&nst->st_send_time);
+ nst->st_send_time = ktime_get();
}
-static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
{
- do_gettimeofday(&nst->st_status_time);
+ nst->st_status_time = ktime_get();
}
-static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
- struct o2net_sock_container *sc)
+static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+ struct o2net_sock_container *sc)
{
nst->st_sc = sc;
}
-static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
+static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
+ u32 msg_id)
{
nst->st_id = msg_id;
}
-#else /* CONFIG_DEBUG_FS */
-
-static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
- u32 msgkey, struct task_struct *task, u8 node)
+static inline void o2net_set_sock_timer(struct o2net_sock_container *sc)
{
+ sc->sc_tv_timer = ktime_get();
}
-static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_data_ready_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_data_ready = ktime_get();
}
-static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_advance_start_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_advance_start = ktime_get();
}
-static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_advance_stop = ktime_get();
}
-static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
- struct o2net_sock_container *sc)
+static inline void o2net_set_func_start_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_func_start = ktime_get();
}
-static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
- u32 msg_id)
+static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
{
+ sc->sc_tv_func_stop = ktime_get();
}
+static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
+{
+ return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
+}
+#else /* CONFIG_DEBUG_FS */
+# define o2net_init_nst(a, b, c, d, e)
+# define o2net_set_nst_sock_time(a)
+# define o2net_set_nst_send_time(a)
+# define o2net_set_nst_status_time(a)
+# define o2net_set_nst_sock_container(a, b)
+# define o2net_set_nst_msg_id(a, b)
+# define o2net_set_sock_timer(a)
+# define o2net_set_data_ready_time(a)
+# define o2net_set_advance_start_time(a)
+# define o2net_set_advance_stop_time(a)
+# define o2net_set_func_start_time(a)
+# define o2net_set_func_stop_time(a)
+# define o2net_get_func_run_time(a) (ktime_t)0
#endif /* CONFIG_DEBUG_FS */
+#ifdef CONFIG_OCFS2_FS_STATS
+static void o2net_update_send_stats(struct o2net_send_tracking *nst,
+ struct o2net_sock_container *sc)
+{
+ sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total,
+ ktime_sub(ktime_get(),
+ nst->st_status_time));
+ sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total,
+ ktime_sub(nst->st_status_time,
+ nst->st_send_time));
+ sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total,
+ ktime_sub(nst->st_send_time,
+ nst->st_sock_time));
+ sc->sc_send_count++;
+}
+
+static void o2net_update_recv_stats(struct o2net_sock_container *sc)
+{
+ sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total,
+ o2net_get_func_run_time(sc));
+ sc->sc_recv_count++;
+}
+
+#else
+
+# define o2net_update_send_stats(a, b)
+
+# define o2net_update_recv_stats(sc)
+
+#endif /* CONFIG_OCFS2_FS_STATS */
+
static inline int o2net_reconnect_delay(void)
{
return o2nm_single_cluster->cl_reconnect_delay_ms;
@@ -355,6 +406,7 @@ static void sc_kref_release(struct kref *kref)
sc->sc_sock = NULL;
}
+ o2nm_undepend_item(&sc->sc_node->nd_item);
o2nm_node_put(sc->sc_node);
sc->sc_node = NULL;
@@ -376,6 +428,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
{
struct o2net_sock_container *sc, *ret = NULL;
struct page *page = NULL;
+ int status = 0;
page = alloc_page(GFP_NOFS);
sc = kzalloc(sizeof(*sc), GFP_NOFS);
@@ -386,6 +439,13 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
o2nm_node_get(node);
sc->sc_node = node;
+ /* pin the node item of the remote node */
+ status = o2nm_depend_item(&node->nd_item);
+ if (status) {
+ mlog_errno(status);
+ o2nm_node_put(node);
+ goto out;
+ }
INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
@@ -546,7 +606,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
if (sk->sk_user_data) {
struct o2net_sock_container *sc = sk->sk_user_data;
sclog(sc, "data_ready hit\n");
- do_gettimeofday(&sc->sc_tv_data_ready);
+ o2net_set_data_ready_time(sc);
o2net_sc_queue_work(sc, &sc->sc_rx_work);
ready = sc->sc_data_ready;
} else {
@@ -1070,6 +1130,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
o2net_set_nst_status_time(&nst);
wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
+ o2net_update_send_stats(&nst, sc);
+
/* Note that we avoid overwriting the callers status return
* variable if a system error was reported on the other
* side. Callers beware. */
@@ -1183,13 +1245,15 @@ static int o2net_process_message(struct o2net_sock_container *sc,
if (syserr != O2NET_ERR_NONE)
goto out_respond;
- do_gettimeofday(&sc->sc_tv_func_start);
+ o2net_set_func_start_time(sc);
sc->sc_msg_key = be32_to_cpu(hdr->key);
sc->sc_msg_type = be16_to_cpu(hdr->msg_type);
handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) +
be16_to_cpu(hdr->data_len),
nmh->nh_func_data, &ret_data);
- do_gettimeofday(&sc->sc_tv_func_stop);
+ o2net_set_func_stop_time(sc);
+
+ o2net_update_recv_stats(sc);
out_respond:
/* this destroys the hdr, so don't use it after this */
@@ -1300,7 +1364,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
size_t datalen;
sclog(sc, "receiving\n");
- do_gettimeofday(&sc->sc_tv_advance_start);
+ o2net_set_advance_start_time(sc);
if (unlikely(sc->sc_handshake_ok == 0)) {
if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
@@ -1375,7 +1439,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
out:
sclog(sc, "ret = %d\n", ret);
- do_gettimeofday(&sc->sc_tv_advance_stop);
+ o2net_set_advance_stop_time(sc);
return ret;
}
@@ -1475,27 +1539,28 @@ static void o2net_idle_timer(unsigned long data)
{
struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
- struct timeval now;
- do_gettimeofday(&now);
+#ifdef CONFIG_DEBUG_FS
+ ktime_t now = ktime_get();
+#endif
printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
"seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
o2net_idle_timeout() / 1000,
o2net_idle_timeout() % 1000);
- mlog(ML_NOTICE, "here are some times that might help debug the "
- "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
- "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
- sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
- now.tv_sec, (long) now.tv_usec,
- sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
- sc->sc_tv_advance_start.tv_sec,
- (long) sc->sc_tv_advance_start.tv_usec,
- sc->sc_tv_advance_stop.tv_sec,
- (long) sc->sc_tv_advance_stop.tv_usec,
+
+#ifdef CONFIG_DEBUG_FS
+ mlog(ML_NOTICE, "Here are some times that might help debug the "
+ "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, "
+ "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n",
+ (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now),
+ (long long)ktime_to_us(sc->sc_tv_data_ready),
+ (long long)ktime_to_us(sc->sc_tv_advance_start),
+ (long long)ktime_to_us(sc->sc_tv_advance_stop),
sc->sc_msg_key, sc->sc_msg_type,
- sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec,
- sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec);
+ (long long)ktime_to_us(sc->sc_tv_func_start),
+ (long long)ktime_to_us(sc->sc_tv_func_stop));
+#endif
/*
* Initialize the nn_timeout so that the next connection attempt
@@ -1511,7 +1576,7 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
msecs_to_jiffies(o2net_keepalive_delay()));
- do_gettimeofday(&sc->sc_tv_timer);
+ o2net_set_sock_timer(sc);
mod_timer(&sc->sc_idle_timeout,
jiffies + msecs_to_jiffies(o2net_idle_timeout()));
}
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 15fdbdf..4cbcb65 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -166,18 +166,27 @@ struct o2net_sock_container {
/* original handlers for the sockets */
void (*sc_state_change)(struct sock *sk);
void (*sc_data_ready)(struct sock *sk, int bytes);
-#ifdef CONFIG_DEBUG_FS
- struct list_head sc_net_debug_item;
-#endif
- struct timeval sc_tv_timer;
- struct timeval sc_tv_data_ready;
- struct timeval sc_tv_advance_start;
- struct timeval sc_tv_advance_stop;
- struct timeval sc_tv_func_start;
- struct timeval sc_tv_func_stop;
+
u32 sc_msg_key;
u16 sc_msg_type;
+#ifdef CONFIG_DEBUG_FS
+ struct list_head sc_net_debug_item;
+ ktime_t sc_tv_timer;
+ ktime_t sc_tv_data_ready;
+ ktime_t sc_tv_advance_start;
+ ktime_t sc_tv_advance_stop;
+ ktime_t sc_tv_func_start;
+ ktime_t sc_tv_func_stop;
+#endif
+#ifdef CONFIG_OCFS2_FS_STATS
+ ktime_t sc_tv_acquiry_total;
+ ktime_t sc_tv_send_total;
+ ktime_t sc_tv_status_total;
+ u32 sc_send_count;
+ u32 sc_recv_count;
+ ktime_t sc_tv_process_total;
+#endif
struct mutex sc_send_lock;
};
@@ -220,9 +229,9 @@ struct o2net_send_tracking {
u32 st_msg_type;
u32 st_msg_key;
u8 st_node;
- struct timeval st_sock_time;
- struct timeval st_send_time;
- struct timeval st_status_time;
+ ktime_t st_sock_time;
+ ktime_t st_send_time;
+ ktime_t st_status_time;
};
#else
struct o2net_send_tracking {
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index f449991..3a3ed4b 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -90,19 +90,29 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
- mlog_entry_void();
+ struct dlm_lock_resource *res;
BUG_ON(!dlm);
BUG_ON(!lock);
+ res = lock->lockres;
+
assert_spin_locked(&dlm->ast_lock);
+
if (!list_empty(&lock->ast_list)) {
- mlog(ML_ERROR, "ast list not empty!! pending=%d, newlevel=%d\n",
+ mlog(ML_ERROR, "%s: res %.*s, lock %u:%llu, "
+ "AST list not empty, pending %d, newlevel %d\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
lock->ast_pending, lock->ml.type);
BUG();
}
if (lock->ast_pending)
- mlog(0, "lock has an ast getting flushed right now\n");
+ mlog(0, "%s: res %.*s, lock %u:%llu, AST getting flushed\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
/* putting lock on list, add a ref */
dlm_lock_get(lock);
@@ -110,9 +120,10 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
/* check to see if this ast obsoletes the bast */
if (dlm_should_cancel_bast(dlm, lock)) {
- struct dlm_lock_resource *res = lock->lockres;
- mlog(0, "%s: cancelling bast for %.*s\n",
- dlm->name, res->lockname.len, res->lockname.name);
+ mlog(0, "%s: res %.*s, lock %u:%llu, Cancelling BAST\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
lock->bast_pending = 0;
list_del_init(&lock->bast_list);
lock->ml.highest_blocked = LKM_IVMODE;
@@ -134,8 +145,6 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
- mlog_entry_void();
-
BUG_ON(!dlm);
BUG_ON(!lock);
@@ -147,15 +156,21 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
- mlog_entry_void();
+ struct dlm_lock_resource *res;
BUG_ON(!dlm);
BUG_ON(!lock);
+
assert_spin_locked(&dlm->ast_lock);
+ res = lock->lockres;
+
BUG_ON(!list_empty(&lock->bast_list));
if (lock->bast_pending)
- mlog(0, "lock has a bast getting flushed right now\n");
+ mlog(0, "%s: res %.*s, lock %u:%llu, BAST getting flushed\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
/* putting lock on list, add a ref */
dlm_lock_get(lock);
@@ -167,8 +182,6 @@ void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
- mlog_entry_void();
-
BUG_ON(!dlm);
BUG_ON(!lock);
@@ -213,7 +226,10 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
dlm_astlockfunc_t *fn;
struct dlm_lockstatus *lksb;
- mlog_entry_void();
+ mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name,
+ res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
lksb = lock->lksb;
fn = lock->ast;
@@ -231,7 +247,10 @@ int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
struct dlm_lockstatus *lksb;
int lksbflags;
- mlog_entry_void();
+ mlog(0, "%s: res %.*s, lock %u:%llu, Remote AST\n", dlm->name,
+ res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
lksb = lock->lksb;
BUG_ON(lock->ml.node == dlm->node_num);
@@ -250,9 +269,14 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
{
dlm_bastlockfunc_t *fn = lock->bast;
- mlog_entry_void();
BUG_ON(lock->ml.node != dlm->node_num);
+ mlog(0, "%s: res %.*s, lock %u:%llu, Local BAST, blocked %d\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+ blocked_type);
+
(*fn)(lock->astdata, blocked_type);
}
@@ -332,7 +356,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
/* cannot get a proxy ast message if this node owns it */
BUG_ON(res->owner == dlm->node_num);
- mlog(0, "lockres %.*s\n", res->lockname.len, res->lockname.name);
+ mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
+ res->lockname.name);
spin_lock(&res->spinlock);
if (res->state & DLM_LOCK_RES_RECOVERING) {
@@ -382,8 +407,12 @@ do_ast:
if (past->type == DLM_AST) {
/* do not alter lock refcount. switching lists. */
list_move_tail(&lock->list, &res->granted);
- mlog(0, "ast: Adding to granted list... type=%d, "
- "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
+ mlog(0, "%s: res %.*s, lock %u:%llu, Granted type %d => %d\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
+ lock->ml.type, lock->ml.convert_type);
+
if (lock->ml.convert_type != LKM_IVMODE) {
lock->ml.type = lock->ml.convert_type;
lock->ml.convert_type = LKM_IVMODE;
@@ -426,9 +455,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
size_t veclen = 1;
int status;
- mlog_entry("res %.*s, to=%u, type=%d, blocked_type=%d\n",
- res->lockname.len, res->lockname.name, lock->ml.node,
- msg_type, blocked_type);
+ mlog(0, "%s: res %.*s, to %u, type %d, blocked_type %d\n", dlm->name,
+ res->lockname.len, res->lockname.name, lock->ml.node, msg_type,
+ blocked_type);
memset(&past, 0, sizeof(struct dlm_proxy_ast));
past.node_idx = dlm->node_num;
@@ -441,7 +470,6 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
vec[0].iov_len = sizeof(struct dlm_proxy_ast);
vec[0].iov_base = &past;
if (flags & DLM_LKSB_GET_LVB) {
- mlog(0, "returning requested LVB data\n");
be32_add_cpu(&past.flags, LKM_GET_LVB);
vec[1].iov_len = DLM_LVB_LEN;
vec[1].iov_base = lock->lksb->lvb;
@@ -451,8 +479,8 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
lock->ml.node, &status);
if (ret < 0)
- mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
- "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
+ mlog(ML_ERROR, "%s: res %.*s, error %d send AST to node %u\n",
+ dlm->name, res->lockname.len, res->lockname.name, ret,
lock->ml.node);
else {
if (status == DLM_RECOVERING) {
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index b36d0bf..4bdf7ba 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -50,10 +50,10 @@
#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
enum dlm_mle_type {
- DLM_MLE_BLOCK,
- DLM_MLE_MASTER,
- DLM_MLE_MIGRATION,
- DLM_MLE_NUM_TYPES
+ DLM_MLE_BLOCK = 0,
+ DLM_MLE_MASTER = 1,
+ DLM_MLE_MIGRATION = 2,
+ DLM_MLE_NUM_TYPES = 3,
};
struct dlm_master_list_entry {
@@ -82,8 +82,8 @@ struct dlm_master_list_entry {
enum dlm_ast_type {
DLM_AST = 0,
- DLM_BAST,
- DLM_ASTUNLOCK
+ DLM_BAST = 1,
+ DLM_ASTUNLOCK = 2,
};
@@ -119,9 +119,9 @@ struct dlm_recovery_ctxt
enum dlm_ctxt_state {
DLM_CTXT_NEW = 0,
- DLM_CTXT_JOINED,
- DLM_CTXT_IN_SHUTDOWN,
- DLM_CTXT_LEAVING,
+ DLM_CTXT_JOINED = 1,
+ DLM_CTXT_IN_SHUTDOWN = 2,
+ DLM_CTXT_LEAVING = 3,
};
struct dlm_ctxt
@@ -388,8 +388,8 @@ struct dlm_lock
enum dlm_lockres_list {
DLM_GRANTED_LIST = 0,
- DLM_CONVERTING_LIST,
- DLM_BLOCKED_LIST
+ DLM_CONVERTING_LIST = 1,
+ DLM_BLOCKED_LIST = 2,
};
static inline int dlm_lvb_is_empty(char *lvb)
@@ -427,27 +427,27 @@ struct dlm_node_iter
enum {
- DLM_MASTER_REQUEST_MSG = 500,
- DLM_UNUSED_MSG1, /* 501 */
- DLM_ASSERT_MASTER_MSG, /* 502 */
- DLM_CREATE_LOCK_MSG, /* 503 */
- DLM_CONVERT_LOCK_MSG, /* 504 */
- DLM_PROXY_AST_MSG, /* 505 */
- DLM_UNLOCK_LOCK_MSG, /* 506 */
- DLM_DEREF_LOCKRES_MSG, /* 507 */
- DLM_MIGRATE_REQUEST_MSG, /* 508 */
- DLM_MIG_LOCKRES_MSG, /* 509 */
- DLM_QUERY_JOIN_MSG, /* 510 */
- DLM_ASSERT_JOINED_MSG, /* 511 */
- DLM_CANCEL_JOIN_MSG, /* 512 */
- DLM_EXIT_DOMAIN_MSG, /* 513 */
- DLM_MASTER_REQUERY_MSG, /* 514 */
- DLM_LOCK_REQUEST_MSG, /* 515 */
- DLM_RECO_DATA_DONE_MSG, /* 516 */
- DLM_BEGIN_RECO_MSG, /* 517 */
- DLM_FINALIZE_RECO_MSG, /* 518 */
- DLM_QUERY_REGION, /* 519 */
- DLM_QUERY_NODEINFO, /* 520 */
+ DLM_MASTER_REQUEST_MSG = 500,
+ DLM_UNUSED_MSG1 = 501,
+ DLM_ASSERT_MASTER_MSG = 502,
+ DLM_CREATE_LOCK_MSG = 503,
+ DLM_CONVERT_LOCK_MSG = 504,
+ DLM_PROXY_AST_MSG = 505,
+ DLM_UNLOCK_LOCK_MSG = 506,
+ DLM_DEREF_LOCKRES_MSG = 507,
+ DLM_MIGRATE_REQUEST_MSG = 508,
+ DLM_MIG_LOCKRES_MSG = 509,
+ DLM_QUERY_JOIN_MSG = 510,
+ DLM_ASSERT_JOINED_MSG = 511,
+ DLM_CANCEL_JOIN_MSG = 512,
+ DLM_EXIT_DOMAIN_MSG = 513,
+ DLM_MASTER_REQUERY_MSG = 514,
+ DLM_LOCK_REQUEST_MSG = 515,
+ DLM_RECO_DATA_DONE_MSG = 516,
+ DLM_BEGIN_RECO_MSG = 517,
+ DLM_FINALIZE_RECO_MSG = 518,
+ DLM_QUERY_REGION = 519,
+ DLM_QUERY_NODEINFO = 520,
};
struct dlm_reco_node_data
@@ -460,19 +460,19 @@ struct dlm_reco_node_data
enum {
DLM_RECO_NODE_DATA_DEAD = -1,
DLM_RECO_NODE_DATA_INIT = 0,
- DLM_RECO_NODE_DATA_REQUESTING,
- DLM_RECO_NODE_DATA_REQUESTED,
- DLM_RECO_NODE_DATA_RECEIVING,
- DLM_RECO_NODE_DATA_DONE,
- DLM_RECO_NODE_DATA_FINALIZE_SENT,
+ DLM_RECO_NODE_DATA_REQUESTING = 1,
+ DLM_RECO_NODE_DATA_REQUESTED = 2,
+ DLM_RECO_NODE_DATA_RECEIVING = 3,
+ DLM_RECO_NODE_DATA_DONE = 4,
+ DLM_RECO_NODE_DATA_FINALIZE_SENT = 5,
};
enum {
DLM_MASTER_RESP_NO = 0,
- DLM_MASTER_RESP_YES,
- DLM_MASTER_RESP_MAYBE,
- DLM_MASTER_RESP_ERROR
+ DLM_MASTER_RESP_YES = 1,
+ DLM_MASTER_RESP_MAYBE = 2,
+ DLM_MASTER_RESP_ERROR = 3,
};
@@ -649,9 +649,9 @@ struct dlm_proxy_ast
#define DLM_MOD_KEY (0x666c6172)
enum dlm_query_join_response_code {
JOIN_DISALLOW = 0,
- JOIN_OK,
- JOIN_OK_NO_MAP,
- JOIN_PROTOCOL_MISMATCH,
+ JOIN_OK = 1,
+ JOIN_OK_NO_MAP = 2,
+ JOIN_PROTOCOL_MISMATCH = 3,
};
struct dlm_query_join_packet {
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 272ec86..04a32be 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -370,92 +370,46 @@ static void dlm_debug_get(struct dlm_debug_ctxt *dc)
kref_get(&dc->debug_refcnt);
}
-static struct debug_buffer *debug_buffer_allocate(void)
+static int debug_release(struct inode *inode, struct file *file)
{
- struct debug_buffer *db = NULL;
-
- db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL);
- if (!db)
- goto bail;
-
- db->len = PAGE_SIZE;
- db->buf = kmalloc(db->len, GFP_KERNEL);
- if (!db->buf)
- goto bail;
-
- return db;
-bail:
- kfree(db);
- return NULL;
-}
-
-static ssize_t debug_buffer_read(struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
-{
- struct debug_buffer *db = file->private_data;
-
- return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len);
-}
-
-static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
-{
- struct debug_buffer *db = file->private_data;
- loff_t new = -1;
-
- switch (whence) {
- case 0:
- new = off;
- break;
- case 1:
- new = file->f_pos + off;
- break;
- }
-
- if (new < 0 || new > db->len)
- return -EINVAL;
-
- return (file->f_pos = new);
+ free_page((unsigned long)file->private_data);
+ return 0;
}
-static int debug_buffer_release(struct inode *inode, struct file *file)
+static ssize_t debug_read(struct file *file, char __user *buf,
+ size_t nbytes, loff_t *ppos)
{
- struct debug_buffer *db = file->private_data;
-
- if (db)
- kfree(db->buf);
- kfree(db);
-
- return 0;
+ return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
+ i_size_read(file->f_mapping->host));
}
/* end - util funcs */
/* begin - purge list funcs */
-static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
{
struct dlm_lock_resource *res;
int out = 0;
unsigned long total = 0;
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Dumping Purgelist for Domain: %s\n", dlm->name);
spin_lock(&dlm->spinlock);
list_for_each_entry(res, &dlm->purge_list, purge) {
++total;
- if (db->len - out < 100)
+ if (len - out < 100)
continue;
spin_lock(&res->spinlock);
out += stringify_lockname(res->lockname.name,
res->lockname.len,
- db->buf + out, db->len - out);
- out += snprintf(db->buf + out, db->len - out, "\t%ld\n",
+ buf + out, len - out);
+ out += snprintf(buf + out, len - out, "\t%ld\n",
(jiffies - res->last_used)/HZ);
spin_unlock(&res->spinlock);
}
spin_unlock(&dlm->spinlock);
- out += snprintf(db->buf + out, db->len - out,
- "Total on list: %ld\n", total);
+ out += snprintf(buf + out, len - out, "Total on list: %ld\n", total);
return out;
}
@@ -463,15 +417,15 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
static int debug_purgelist_open(struct inode *inode, struct file *file)
{
struct dlm_ctxt *dlm = inode->i_private;
- struct debug_buffer *db;
+ char *buf = NULL;
- db = debug_buffer_allocate();
- if (!db)
+ buf = (char *) get_zeroed_page(GFP_NOFS);
+ if (!buf)
goto bail;
- db->len = debug_purgelist_print(dlm, db);
+ i_size_write(inode, debug_purgelist_print(dlm, buf, PAGE_SIZE - 1));
- file->private_data = db;
+ file->private_data = buf;
return 0;
bail:
@@ -480,14 +434,14 @@ bail:
static const struct file_operations debug_purgelist_fops = {
.open = debug_purgelist_open,
- .release = debug_buffer_release,
- .read = debug_buffer_read,
- .llseek = debug_buffer_llseek,
+ .release = debug_release,
+ .read = debug_read,
+ .llseek = generic_file_llseek,
};
/* end - purge list funcs */
/* begin - debug mle funcs */
-static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
{
struct dlm_master_list_entry *mle;
struct hlist_head *bucket;
@@ -495,7 +449,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
int i, out = 0;
unsigned long total = 0, longest = 0, bucket_count = 0;
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Dumping MLEs for Domain: %s\n", dlm->name);
spin_lock(&dlm->master_lock);
@@ -506,16 +460,16 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
master_hash_node);
++total;
++bucket_count;
- if (db->len - out < 200)
+ if (len - out < 200)
continue;
- out += dump_mle(mle, db->buf + out, db->len - out);
+ out += dump_mle(mle, buf + out, len - out);
}
longest = max(longest, bucket_count);
bucket_count = 0;
}
spin_unlock(&dlm->master_lock);
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Total: %ld, Longest: %ld\n", total, longest);
return out;
}
@@ -523,15 +477,15 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
static int debug_mle_open(struct inode *inode, struct file *file)
{
struct dlm_ctxt *dlm = inode->i_private;
- struct debug_buffer *db;
+ char *buf = NULL;
- db = debug_buffer_allocate();
- if (!db)
+ buf = (char *) get_zeroed_page(GFP_NOFS);
+ if (!buf)
goto bail;
- db->len = debug_mle_print(dlm, db);
+ i_size_write(inode, debug_mle_print(dlm, buf, PAGE_SIZE - 1));
- file->private_data = db;
+ file->private_data = buf;
return 0;
bail:
@@ -540,9 +494,9 @@ bail:
static const struct file_operations debug_mle_fops = {
.open = debug_mle_open,
- .release = debug_buffer_release,
- .read = debug_buffer_read,
- .llseek = debug_buffer_llseek,
+ .release = debug_release,
+ .read = debug_read,
+ .llseek = generic_file_llseek,
};
/* end - debug mle funcs */
@@ -757,7 +711,7 @@ static const struct file_operations debug_lockres_fops = {
/* end - debug lockres funcs */
/* begin - debug state funcs */
-static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
{
int out = 0;
struct dlm_reco_node_data *node;
@@ -781,35 +735,35 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
}
/* Domain: xxxxxxxxxx Key: 0xdfbac769 */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Domain: %s Key: 0x%08x Protocol: %d.%d\n",
dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
/* Thread Pid: xxx Node: xxx State: xxxxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Thread Pid: %d Node: %d State: %s\n",
- dlm->dlm_thread_task->pid, dlm->node_num, state);
+ task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state);
/* Number of Joins: xxx Joining Node: xxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Number of Joins: %d Joining Node: %d\n",
dlm->num_joins, dlm->joining_node);
/* Domain Map: xx xx xx */
- out += snprintf(db->buf + out, db->len - out, "Domain Map: ");
+ out += snprintf(buf + out, len - out, "Domain Map: ");
out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
- db->buf + out, db->len - out);
- out += snprintf(db->buf + out, db->len - out, "\n");
+ buf + out, len - out);
+ out += snprintf(buf + out, len - out, "\n");
/* Live Map: xx xx xx */
- out += snprintf(db->buf + out, db->len - out, "Live Map: ");
+ out += snprintf(buf + out, len - out, "Live Map: ");
out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
- db->buf + out, db->len - out);
- out += snprintf(db->buf + out, db->len - out, "\n");
+ buf + out, len - out);
+ out += snprintf(buf + out, len - out, "\n");
/* Lock Resources: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Lock Resources: %d (%d)\n",
atomic_read(&dlm->res_cur_count),
atomic_read(&dlm->res_tot_count));
@@ -821,29 +775,29 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
cur_mles += atomic_read(&dlm->mle_cur_count[i]);
/* MLEs: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"MLEs: %d (%d)\n", cur_mles, tot_mles);
/* Blocking: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
" Blocking: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
/* Mastery: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
" Mastery: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
/* Migration: xxx (xxx) */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
" Migration: %d (%d)\n",
atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
/* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Lists: Dirty=%s Purge=%s PendingASTs=%s "
"PendingBASTs=%s\n",
(list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
@@ -852,12 +806,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
/* Purge Count: xxx Refs: xxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Purge Count: %d Refs: %d\n", dlm->purge_count,
atomic_read(&dlm->dlm_refs.refcount));
/* Dead Node: xxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Dead Node: %d\n", dlm->reco.dead_node);
/* What about DLM_RECO_STATE_FINALIZE? */
@@ -867,19 +821,19 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
state = "INACTIVE";
/* Recovery Pid: xxxx Master: xxx State: xxxx */
- out += snprintf(db->buf + out, db->len - out,
+ out += snprintf(buf + out, len - out,
"Recovery Pid: %d Master: %d State: %s\n",
- dlm->dlm_reco_thread_task->pid,
+ task_pid_nr(dlm->dlm_reco_thread_task),
dlm->reco.new_master, state);
/* Recovery Map: xx xx */
- out += snprintf(db->buf + out, db->len - out, "Recovery Map: ");
+ out += snprintf(buf + out, len - out, "Recovery Map: ");
out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
- db->buf + out, db->len - out);
- out += snprintf(db->buf + out, db->len - out, "\n");
+ buf + out, len - out);
+ out += snprintf(buf + out, len - out, "\n");
/* Recovery Node State: */
- out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n");
+ out += snprintf(buf + out, len - out, "Recovery Node State:\n");
list_for_each_entry(node, &dlm->reco.node_data, list) {
switch (node->state) {
case DLM_RECO_NODE_DATA_INIT:
@@ -907,7 +861,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
state = "BAD";
break;
}
- out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n",
+ out += snprintf(buf + out, len - out, "\t%u - %s\n",
node->node_num, state);
}
@@ -919,15 +873,15 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
static int debug_state_open(struct inode *inode, struct file *file)
{
struct dlm_ctxt *dlm = inode->i_private;
- struct debug_buffer *db = NULL;
+ char *buf = NULL;
- db = debug_buffer_allocate();
- if (!db)
+ buf = (char *) get_zeroed_page(GFP_NOFS);
+ if (!buf)
goto bail;
- db->len = debug_state_print(dlm, db);
+ i_size_write(inode, debug_state_print(dlm, buf, PAGE_SIZE - 1));
- file->private_data = db;
+ file->private_data = buf;
return 0;
bail:
@@ -936,9 +890,9 @@ bail:
static const struct file_operations debug_state_fops = {
.open = debug_state_open,
- .release = debug_buffer_release,
- .read = debug_buffer_read,
- .llseek = debug_buffer_llseek,
+ .release = debug_release,
+ .read = debug_read,
+ .llseek = generic_file_llseek,
};
/* end - debug state funcs */
@@ -1002,14 +956,10 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
if (dc) {
- if (dc->debug_purgelist_dentry)
- debugfs_remove(dc->debug_purgelist_dentry);
- if (dc->debug_mle_dentry)
- debugfs_remove(dc->debug_mle_dentry);
- if (dc->debug_lockres_dentry)
- debugfs_remove(dc->debug_lockres_dentry);
- if (dc->debug_state_dentry)
- debugfs_remove(dc->debug_state_dentry);
+ debugfs_remove(dc->debug_purgelist_dentry);
+ debugfs_remove(dc->debug_mle_dentry);
+ debugfs_remove(dc->debug_lockres_dentry);
+ debugfs_remove(dc->debug_state_dentry);
dlm_debug_put(dc);
}
}
@@ -1040,8 +990,7 @@ bail:
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
{
- if (dlm->dlm_debugfs_subroot)
- debugfs_remove(dlm->dlm_debugfs_subroot);
+ debugfs_remove(dlm->dlm_debugfs_subroot);
}
/* debugfs root */
@@ -1057,7 +1006,6 @@ int dlm_create_debugfs_root(void)
void dlm_destroy_debugfs_root(void)
{
- if (dlm_debugfs_root)
- debugfs_remove(dlm_debugfs_root);
+ debugfs_remove(dlm_debugfs_root);
}
#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index 8c686d2..1f27c48 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -37,11 +37,6 @@ struct dlm_debug_ctxt {
struct dentry *debug_purgelist_dentry;
};
-struct debug_buffer {
- int len;
- char *buf;
-};
-
struct debug_lockres {
int dl_len;
char *dl_buf;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index cc2aaa9..7e38a07 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -460,8 +460,6 @@ redo_bucket:
}
cond_resched_lock(&dlm->spinlock);
num += n;
- mlog(0, "%s: touched %d lockreses in bucket %d "
- "(tot=%d)\n", dlm->name, n, i, num);
}
spin_unlock(&dlm->spinlock);
wake_up(&dlm->dlm_thread_wq);
@@ -1661,8 +1659,8 @@ bail:
static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
{
- o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
- o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
+ o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_up);
+ o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_down);
o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
}
@@ -1674,13 +1672,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
- status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
+ status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down);
if (status)
goto bail;
o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
- status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
+ status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up);
if (status)
goto bail;
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 69cf369..7009292 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -106,6 +106,9 @@ static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
return 0;
+ if (!dlm_lock_compatible(tmplock->ml.convert_type,
+ lock->ml.type))
+ return 0;
}
return 1;
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 2211acf..1d6d1d2 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -122,15 +122,13 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
- mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
-
assert_spin_locked(&dlm->spinlock);
assert_spin_locked(&res->spinlock);
if (__dlm_lockres_unused(res)){
if (list_empty(&res->purge)) {
- mlog(0, "putting lockres %.*s:%p onto purge list\n",
- res->lockname.len, res->lockname.name, res);
+ mlog(0, "%s: Adding res %.*s to purge list\n",
+ dlm->name, res->lockname.len, res->lockname.name);
res->last_used = jiffies;
dlm_lockres_get(res);
@@ -138,8 +136,8 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
dlm->purge_count++;
}
} else if (!list_empty(&res->purge)) {
- mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n",
- res->lockname.len, res->lockname.name, res, res->owner);
+ mlog(0, "%s: Removing res %.*s from purge list\n",
+ dlm->name, res->lockname.len, res->lockname.name);
list_del_init(&res->purge);
dlm_lockres_put(res);
@@ -150,7 +148,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
- mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
spin_lock(&dlm->spinlock);
spin_lock(&res->spinlock);
@@ -171,9 +168,8 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
master = (res->owner == dlm->node_num);
-
- mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
- res->lockname.name, master);
+ mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name,
+ res->lockname.len, res->lockname.name, master);
if (!master) {
res->state |= DLM_LOCK_RES_DROPPING_REF;
@@ -189,27 +185,25 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
/* clear our bit from the master's refmap, ignore errors */
ret = dlm_drop_lockres_ref(dlm, res);
if (ret < 0) {
- mlog_errno(ret);
+ mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name,
+ res->lockname.len, res->lockname.name, ret);
if (!dlm_is_host_down(ret))
BUG();
}
- mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
- dlm->name, res->lockname.len, res->lockname.name, ret);
spin_lock(&dlm->spinlock);
spin_lock(&res->spinlock);
}
if (!list_empty(&res->purge)) {
- mlog(0, "removing lockres %.*s:%p from purgelist, "
- "master = %d\n", res->lockname.len, res->lockname.name,
- res, master);
+ mlog(0, "%s: Removing res %.*s from purgelist, master %d\n",
+ dlm->name, res->lockname.len, res->lockname.name, master);
list_del_init(&res->purge);
dlm_lockres_put(res);
dlm->purge_count--;
}
if (!__dlm_lockres_unused(res)) {
- mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+ mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
dlm->name, res->lockname.len, res->lockname.name);
__dlm_print_one_lock_resource(res);
BUG();
@@ -266,10 +260,10 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
unused = __dlm_lockres_unused(lockres);
if (!unused ||
(lockres->state & DLM_LOCK_RES_MIGRATING)) {
- mlog(0, "lockres %s:%.*s: is in use or "
- "being remastered, used %d, state %d\n",
- dlm->name, lockres->lockname.len,
- lockres->lockname.name, !unused, lockres->state);
+ mlog(0, "%s: res %.*s is in use or being remastered, "
+ "used %d, state %d\n", dlm->name,
+ lockres->lockname.len, lockres->lockname.name,
+ !unused, lockres->state);
list_move_tail(&dlm->purge_list, &lockres->purge);
spin_unlock(&lockres->spinlock);
continue;
@@ -296,15 +290,12 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
struct list_head *head;
int can_grant = 1;
- //mlog(0, "res->lockname.len=%d\n", res->lockname.len);
- //mlog(0, "res->lockname.name=%p\n", res->lockname.name);
- //mlog(0, "shuffle res %.*s\n", res->lockname.len,
- // res->lockname.name);
-
- /* because this function is called with the lockres
+ /*
+ * Because this function is called with the lockres
* spinlock, and because we know that it is not migrating/
* recovering/in-progress, it is fine to reserve asts and
- * basts right before queueing them all throughout */
+ * basts right before queueing them all throughout
+ */
assert_spin_locked(&dlm->ast_lock);
assert_spin_locked(&res->spinlock);
BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
@@ -314,13 +305,13 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
converting:
if (list_empty(&res->converting))
goto blocked;
- mlog(0, "res %.*s has locks on a convert queue\n", res->lockname.len,
- res->lockname.name);
+ mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name,
+ res->lockname.len, res->lockname.name);
target = list_entry(res->converting.next, struct dlm_lock, list);
if (target->ml.convert_type == LKM_IVMODE) {
- mlog(ML_ERROR, "%.*s: converting a lock with no "
- "convert_type!\n", res->lockname.len, res->lockname.name);
+ mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n",
+ dlm->name, res->lockname.len, res->lockname.name);
BUG();
}
head = &res->granted;
@@ -365,9 +356,12 @@ converting:
spin_lock(&target->spinlock);
BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
- mlog(0, "calling ast for converting lock: %.*s, have: %d, "
- "granting: %d, node: %u\n", res->lockname.len,
- res->lockname.name, target->ml.type,
+ mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type "
+ "%d => %d, node %u\n", dlm->name, res->lockname.len,
+ res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
+ target->ml.type,
target->ml.convert_type, target->ml.node);
target->ml.type = target->ml.convert_type;
@@ -428,11 +422,14 @@ blocked:
spin_lock(&target->spinlock);
BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
- mlog(0, "calling ast for blocked lock: %.*s, granting: %d, "
- "node: %u\n", res->lockname.len, res->lockname.name,
+ mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, "
+ "node %u\n", dlm->name, res->lockname.len,
+ res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
target->ml.type, target->ml.node);
- // target->ml.type is already correct
+ /* target->ml.type is already correct */
list_move_tail(&target->list, &res->granted);
BUG_ON(!target->lksb);
@@ -453,7 +450,6 @@ leave:
/* must have NO locks when calling this with res !=NULL * */
void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
{
- mlog_entry("dlm=%p, res=%p\n", dlm, res);
if (res) {
spin_lock(&dlm->spinlock);
spin_lock(&res->spinlock);
@@ -466,8 +462,6 @@ void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
{
- mlog_entry("dlm=%p, res=%p\n", dlm, res);
-
assert_spin_locked(&dlm->spinlock);
assert_spin_locked(&res->spinlock);
@@ -484,13 +478,16 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
res->state |= DLM_LOCK_RES_DIRTY;
}
}
+
+ mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
+ res->lockname.name);
}
/* Launch the NM thread for the mounted volume */
int dlm_launch_thread(struct dlm_ctxt *dlm)
{
- mlog(0, "starting dlm thread...\n");
+ mlog(0, "Starting dlm_thread...\n");
dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread");
if (IS_ERR(dlm->dlm_thread_task)) {
@@ -505,7 +502,7 @@ int dlm_launch_thread(struct dlm_ctxt *dlm)
void dlm_complete_thread(struct dlm_ctxt *dlm)
{
if (dlm->dlm_thread_task) {
- mlog(ML_KTHREAD, "waiting for dlm thread to exit\n");
+ mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n");
kthread_stop(dlm->dlm_thread_task);
dlm->dlm_thread_task = NULL;
}
@@ -536,7 +533,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
/* get an extra ref on lock */
dlm_lock_get(lock);
res = lock->lockres;
- mlog(0, "delivering an ast for this lockres\n");
+ mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, "
+ "node %u\n", dlm->name, res->lockname.len,
+ res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+ lock->ml.type, lock->ml.node);
BUG_ON(!lock->ast_pending);
@@ -557,9 +559,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
/* possible that another ast was queued while
* we were delivering the last one */
if (!list_empty(&lock->ast_list)) {
- mlog(0, "aha another ast got queued while "
- "we were finishing the last one. will "
- "keep the ast_pending flag set.\n");
+ mlog(0, "%s: res %.*s, AST queued while flushing last "
+ "one\n", dlm->name, res->lockname.len,
+ res->lockname.name);
} else
lock->ast_pending = 0;
@@ -590,8 +592,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
dlm_lock_put(lock);
spin_unlock(&dlm->ast_lock);
- mlog(0, "delivering a bast for this lockres "
- "(blocked = %d\n", hi);
+ mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, "
+ "blocked %d, node %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+ hi, lock->ml.node);
if (lock->ml.node != dlm->node_num) {
ret = dlm_send_proxy_bast(dlm, res, lock, hi);
@@ -605,9 +611,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
/* possible that another bast was queued while
* we were delivering the last one */
if (!list_empty(&lock->bast_list)) {
- mlog(0, "aha another bast got queued while "
- "we were finishing the last one. will "
- "keep the bast_pending flag set.\n");
+ mlog(0, "%s: res %.*s, BAST queued while flushing last "
+ "one\n", dlm->name, res->lockname.len,
+ res->lockname.name);
} else
lock->bast_pending = 0;
@@ -675,11 +681,12 @@ static int dlm_thread(void *data)
spin_lock(&res->spinlock);
if (res->owner != dlm->node_num) {
__dlm_print_one_lock_resource(res);
- mlog(ML_ERROR, "inprog:%s, mig:%s, reco:%s, dirty:%s\n",
- res->state & DLM_LOCK_RES_IN_PROGRESS ? "yes" : "no",
- res->state & DLM_LOCK_RES_MIGRATING ? "yes" : "no",
- res->state & DLM_LOCK_RES_RECOVERING ? "yes" : "no",
- res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no");
+ mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d,"
+ " dirty %d\n", dlm->name,
+ !!(res->state & DLM_LOCK_RES_IN_PROGRESS),
+ !!(res->state & DLM_LOCK_RES_MIGRATING),
+ !!(res->state & DLM_LOCK_RES_RECOVERING),
+ !!(res->state & DLM_LOCK_RES_DIRTY));
}
BUG_ON(res->owner != dlm->node_num);
@@ -693,8 +700,8 @@ static int dlm_thread(void *data)
res->state &= ~DLM_LOCK_RES_DIRTY;
spin_unlock(&res->spinlock);
spin_unlock(&dlm->ast_lock);
- mlog(0, "delaying list shuffling for in-"
- "progress lockres %.*s, state=%d\n",
+ mlog(0, "%s: res %.*s, inprogress, delay list "
+ "shuffle, state %d\n", dlm->name,
res->lockname.len, res->lockname.name,
res->state);
delay = 1;
@@ -706,10 +713,6 @@ static int dlm_thread(void *data)
* spinlock and do NOT have the dlm lock.
* safe to reserve/queue asts and run the lists. */
- mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
- "res=%.*s\n", dlm->name,
- res->lockname.len, res->lockname.name);
-
/* called while holding lockres lock */
dlm_shuffle_lists(dlm, res);
res->state &= ~DLM_LOCK_RES_DIRTY;
@@ -733,7 +736,8 @@ in_progress:
/* unlikely, but we may need to give time to
* other tasks */
if (!--n) {
- mlog(0, "throttling dlm_thread\n");
+ mlog(0, "%s: Throttling dlm thread\n",
+ dlm->name);
break;
}
}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d14cad6..30c5231 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1017,8 +1017,11 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
* An error return must mean that no cluster locks
* were held on function exit.
*/
- if (oi1->ip_blkno != oi2->ip_blkno)
+ if (oi1->ip_blkno != oi2->ip_blkno) {
ocfs2_inode_unlock(inode2, 1);
+ brelse(*bh2);
+ *bh2 = NULL;
+ }
if (status != -ENOENT)
mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 70dd3b1..51cd689 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -420,6 +420,11 @@ struct ocfs2_super
struct inode *osb_tl_inode;
struct buffer_head *osb_tl_bh;
struct delayed_work osb_truncate_log_wq;
+ /*
+ * How many clusters in our truncate log.
+ * It must be protected by osb_tl_inode->i_mutex.
+ */
+ unsigned int truncated_clusters;
struct ocfs2_node_map osb_recovering_orphan_dirs;
unsigned int *osb_orphan_wipes;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 0fed41e..84becd3 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -133,16 +133,20 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
void __quota_error(struct super_block *sb, const char *func,
- const char *fmt, ...)
+ const char *fmt, ...)
{
- va_list args;
-
if (printk_ratelimit()) {
+ va_list args;
+ struct va_format vaf;
+
va_start(args, fmt);
- printk(KERN_ERR "Quota error (device %s): %s: ",
- sb->s_id, func);
- vprintk(fmt, args);
- printk("\n");
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_ERR "Quota error (device %s): %s: %pV\n",
+ sb->s_id, func, &vaf);
+
va_end(args);
}
}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 9e48874..e41c1becf 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -468,8 +468,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
return -ENOMEM;
ret = read_blk(info, *blk, buf);
if (ret < 0) {
- quota_error(dquot->dq_sb, "Can't read quota data "
- "block %u", blk);
+ quota_error(dquot->dq_sb, "Can't read quota data block %u",
+ *blk);
goto out_buf;
}
newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
@@ -493,8 +493,9 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
} else {
ret = write_blk(info, *blk, buf);
if (ret < 0)
- quota_error(dquot->dq_sb, "Can't write quota "
- "tree block %u", blk);
+ quota_error(dquot->dq_sb,
+ "Can't write quota tree block %u",
+ *blk);
}
}
out_buf:
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig
index f8def3c..0e0e99b 100644
--- a/fs/udf/Kconfig
+++ b/fs/udf/Kconfig
@@ -1,6 +1,5 @@
config UDF_FS
tristate "UDF file system support"
- depends on BKL # needs serious work to remove
select CRC_ITU_T
help
This is the new file system used on some CD-ROMs and DVDs. Say Y if
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index b608efa..306ee39 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -157,10 +157,9 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
udf_debug("bit %ld already set\n", bit + i);
udf_debug("byte=%2x\n",
((char *)bh->b_data)[(bit + i) >> 3]);
- } else {
- udf_add_free_space(sb, sbi->s_partition, 1);
}
}
+ udf_add_free_space(sb, sbi->s_partition, count);
mark_buffer_dirty(bh);
if (overflow) {
block += count;
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 51552bf..eb8bfe2 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,7 +30,6 @@
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include "udf_i.h"
@@ -190,18 +189,14 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
struct inode *dir = filp->f_path.dentry->d_inode;
int result;
- lock_kernel();
-
if (filp->f_pos == 0) {
if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) {
- unlock_kernel();
return 0;
}
filp->f_pos++;
}
result = do_udf_readdir(dir, filp, filldir, dirent);
- unlock_kernel();
return result;
}
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 66b9e7e..89c7848 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -32,7 +32,6 @@
#include <linux/string.h> /* memset */
#include <linux/capability.h>
#include <linux/errno.h>
-#include <linux/smp_lock.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/aio.h>
@@ -114,6 +113,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
size_t count = iocb->ki_left;
struct udf_inode_info *iinfo = UDF_I(inode);
+ down_write(&iinfo->i_data_sem);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
if (file->f_flags & O_APPEND)
pos = inode->i_size;
@@ -126,6 +126,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
udf_expand_file_adinicb(inode, pos + count, &err);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
udf_debug("udf_expand_adinicb: err=%d\n", err);
+ up_write(&iinfo->i_data_sem);
return err;
}
} else {
@@ -135,6 +136,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
iinfo->i_lenAlloc = inode->i_size;
}
}
+ up_write(&iinfo->i_data_sem);
retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);
if (retval > 0)
@@ -149,8 +151,6 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
long old_block, new_block;
int result = -EINVAL;
- lock_kernel();
-
if (file_permission(filp, MAY_READ) != 0) {
udf_debug("no permission to access inode %lu\n", inode->i_ino);
result = -EPERM;
@@ -196,7 +196,6 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
out:
- unlock_kernel();
return result;
}
@@ -204,10 +203,10 @@ static int udf_release_file(struct inode *inode, struct file *filp)
{
if (filp->f_mode & FMODE_WRITE) {
mutex_lock(&inode->i_mutex);
- lock_kernel();
+ down_write(&UDF_I(inode)->i_data_sem);
udf_discard_prealloc(inode);
udf_truncate_tail_extent(inode);
- unlock_kernel();
+ up_write(&UDF_I(inode)->i_data_sem);
mutex_unlock(&inode->i_mutex);
}
return 0;
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 75d9304..6fb7e0a 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -92,28 +92,19 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
return NULL;
}
- mutex_lock(&sbi->s_alloc_mutex);
if (sbi->s_lvid_bh) {
- struct logicalVolIntegrityDesc *lvid =
- (struct logicalVolIntegrityDesc *)
- sbi->s_lvid_bh->b_data;
- struct logicalVolIntegrityDescImpUse *lvidiu =
- udf_sb_lvidiu(sbi);
- struct logicalVolHeaderDesc *lvhd;
- uint64_t uniqueID;
- lvhd = (struct logicalVolHeaderDesc *)
- (lvid->logicalVolContentsUse);
+ struct logicalVolIntegrityDescImpUse *lvidiu;
+
+ iinfo->i_unique = lvid_get_unique_id(sb);
+ mutex_lock(&sbi->s_alloc_mutex);
+ lvidiu = udf_sb_lvidiu(sbi);
if (S_ISDIR(mode))
le32_add_cpu(&lvidiu->numDirs, 1);
else
le32_add_cpu(&lvidiu->numFiles, 1);
- iinfo->i_unique = uniqueID = le64_to_cpu(lvhd->uniqueID);
- if (!(++uniqueID & 0x00000000FFFFFFFFUL))
- uniqueID += 16;
- lvhd->uniqueID = cpu_to_le64(uniqueID);
udf_updated_lvid(sb);
+ mutex_unlock(&sbi->s_alloc_mutex);
}
- mutex_unlock(&sbi->s_alloc_mutex);
inode_init_owner(inode, dir, mode);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fc48f37..c6a2e78 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -31,7 +31,6 @@
#include "udfdecl.h"
#include <linux/mm.h>
-#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
@@ -51,6 +50,7 @@ MODULE_LICENSE("GPL");
static mode_t udf_convert_permissions(struct fileEntry *);
static int udf_update_inode(struct inode *, int);
static void udf_fill_inode(struct inode *, struct buffer_head *);
+static int udf_sync_inode(struct inode *inode);
static int udf_alloc_i_data(struct inode *inode, size_t size);
static struct buffer_head *inode_getblk(struct inode *, sector_t, int *,
sector_t *, int *);
@@ -79,9 +79,7 @@ void udf_evict_inode(struct inode *inode)
want_delete = 1;
inode->i_size = 0;
udf_truncate(inode);
- lock_kernel();
udf_update_inode(inode, IS_SYNC(inode));
- unlock_kernel();
}
invalidate_inode_buffers(inode);
end_writeback(inode);
@@ -97,9 +95,7 @@ void udf_evict_inode(struct inode *inode)
kfree(iinfo->i_ext.i_data);
iinfo->i_ext.i_data = NULL;
if (want_delete) {
- lock_kernel();
udf_free_inode(inode);
- unlock_kernel();
}
}
@@ -302,10 +298,9 @@ static int udf_get_block(struct inode *inode, sector_t block,
err = -EIO;
new = 0;
bh = NULL;
-
- lock_kernel();
-
iinfo = UDF_I(inode);
+
+ down_write(&iinfo->i_data_sem);
if (block == iinfo->i_next_alloc_block + 1) {
iinfo->i_next_alloc_block++;
iinfo->i_next_alloc_goal++;
@@ -324,7 +319,7 @@ static int udf_get_block(struct inode *inode, sector_t block,
map_bh(bh_result, inode->i_sb, phys);
abort:
- unlock_kernel();
+ up_write(&iinfo->i_data_sem);
return err;
}
@@ -1022,16 +1017,16 @@ void udf_truncate(struct inode *inode)
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return;
- lock_kernel();
iinfo = UDF_I(inode);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+ down_write(&iinfo->i_data_sem);
if (inode->i_sb->s_blocksize <
(udf_file_entry_alloc_offset(inode) +
inode->i_size)) {
udf_expand_file_adinicb(inode, inode->i_size, &err);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
inode->i_size = iinfo->i_lenAlloc;
- unlock_kernel();
+ up_write(&iinfo->i_data_sem);
return;
} else
udf_truncate_extents(inode);
@@ -1042,10 +1037,13 @@ void udf_truncate(struct inode *inode)
offset - udf_file_entry_alloc_offset(inode));
iinfo->i_lenAlloc = inode->i_size;
}
+ up_write(&iinfo->i_data_sem);
} else {
block_truncate_page(inode->i_mapping, inode->i_size,
udf_get_block);
+ down_write(&iinfo->i_data_sem);
udf_truncate_extents(inode);
+ up_write(&iinfo->i_data_sem);
}
inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
@@ -1053,7 +1051,6 @@ void udf_truncate(struct inode *inode)
udf_sync_inode(inode);
else
mark_inode_dirty(inode);
- unlock_kernel();
}
static void __udf_read_inode(struct inode *inode)
@@ -1202,6 +1199,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
return;
}
+ read_lock(&sbi->s_cred_lock);
inode->i_uid = le32_to_cpu(fe->uid);
if (inode->i_uid == -1 ||
UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) ||
@@ -1214,13 +1212,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET))
inode->i_gid = UDF_SB(inode->i_sb)->s_gid;
- inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
- if (!inode->i_nlink)
- inode->i_nlink = 1;
-
- inode->i_size = le64_to_cpu(fe->informationLength);
- iinfo->i_lenExtents = inode->i_size;
-
if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY &&
sbi->s_fmode != UDF_INVALID_MODE)
inode->i_mode = sbi->s_fmode;
@@ -1230,6 +1221,14 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
else
inode->i_mode = udf_convert_permissions(fe);
inode->i_mode &= ~sbi->s_umask;
+ read_unlock(&sbi->s_cred_lock);
+
+ inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
+ if (!inode->i_nlink)
+ inode->i_nlink = 1;
+
+ inode->i_size = le64_to_cpu(fe->informationLength);
+ iinfo->i_lenExtents = inode->i_size;
if (iinfo->i_efe == 0) {
inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
@@ -1373,16 +1372,10 @@ static mode_t udf_convert_permissions(struct fileEntry *fe)
int udf_write_inode(struct inode *inode, struct writeback_control *wbc)
{
- int ret;
-
- lock_kernel();
- ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
- unlock_kernel();
-
- return ret;
+ return udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
}
-int udf_sync_inode(struct inode *inode)
+static int udf_sync_inode(struct inode *inode)
{
return udf_update_inode(inode, 1);
}
@@ -2048,7 +2041,7 @@ long udf_block_map(struct inode *inode, sector_t block)
struct extent_position epos = {};
int ret;
- lock_kernel();
+ down_read(&UDF_I(inode)->i_data_sem);
if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) ==
(EXT_RECORDED_ALLOCATED >> 30))
@@ -2056,7 +2049,7 @@ long udf_block_map(struct inode *inode, sector_t block)
else
ret = 0;
- unlock_kernel();
+ up_read(&UDF_I(inode)->i_data_sem);
brelse(epos.bh);
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV))
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 6d8dc02..2be0f9e 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/sched.h>
#include <linux/crc-itu-t.h>
@@ -228,10 +227,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
}
if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) &&
- isdotdot) {
- brelse(epos.bh);
- return fi;
- }
+ isdotdot)
+ goto out_ok;
if (!lfi)
continue;
@@ -263,7 +260,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
if (dentry->d_name.len > UDF_NAME_LEN - 2)
return ERR_PTR(-ENAMETOOLONG);
- lock_kernel();
#ifdef UDF_RECOVERY
/* temporary shorthand for specifying files by inode number */
if (!strncmp(dentry->d_name.name, ".B=", 3)) {
@@ -275,7 +271,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
};
inode = udf_iget(dir->i_sb, lb);
if (!inode) {
- unlock_kernel();
return ERR_PTR(-EACCES);
}
} else
@@ -291,11 +286,9 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
loc = lelb_to_cpu(cfi.icb.extLocation);
inode = udf_iget(dir->i_sb, &loc);
if (!inode) {
- unlock_kernel();
return ERR_PTR(-EACCES);
}
}
- unlock_kernel();
return d_splice_alias(inode, dentry);
}
@@ -476,15 +469,19 @@ add:
f_pos >> dir->i_sb->s_blocksize_bits, 1, err);
if (!fibh->ebh)
goto out_err;
+ /* Extents could have been merged, invalidate our position */
+ brelse(epos.bh);
+ epos.bh = NULL;
+ epos.block = dinfo->i_location;
+ epos.offset = udf_file_entry_alloc_offset(dir);
if (!fibh->soffset) {
- if (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
- (EXT_RECORDED_ALLOCATED >> 30)) {
- block = eloc.logicalBlockNum + ((elen - 1) >>
+ /* Find the freshly allocated block */
+ while (udf_next_aext(dir, &epos, &eloc, &elen, 1) ==
+ (EXT_RECORDED_ALLOCATED >> 30))
+ ;
+ block = eloc.logicalBlockNum + ((elen - 1) >>
dir->i_sb->s_blocksize_bits);
- } else
- block++;
-
brelse(fibh->sbh);
fibh->sbh = fibh->ebh;
fi = (struct fileIdentDesc *)(fibh->sbh->b_data);
@@ -562,10 +559,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
int err;
struct udf_inode_info *iinfo;
- lock_kernel();
inode = udf_new_inode(dir, mode, &err);
if (!inode) {
- unlock_kernel();
return err;
}
@@ -583,7 +578,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
inode->i_nlink--;
mark_inode_dirty(inode);
iput(inode);
- unlock_kernel();
return err;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -596,7 +590,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
- unlock_kernel();
d_instantiate(dentry, inode);
return 0;
@@ -614,7 +607,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
if (!old_valid_dev(rdev))
return -EINVAL;
- lock_kernel();
err = -EIO;
inode = udf_new_inode(dir, mode, &err);
if (!inode)
@@ -627,7 +619,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
inode->i_nlink--;
mark_inode_dirty(inode);
iput(inode);
- unlock_kernel();
return err;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -646,7 +637,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
err = 0;
out:
- unlock_kernel();
return err;
}
@@ -659,7 +649,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct udf_inode_info *dinfo = UDF_I(dir);
struct udf_inode_info *iinfo;
- lock_kernel();
err = -EMLINK;
if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
goto out;
@@ -712,7 +701,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
err = 0;
out:
- unlock_kernel();
return err;
}
@@ -794,7 +782,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
struct kernel_lb_addr tloc;
retval = -ENOENT;
- lock_kernel();
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
if (!fi)
goto out;
@@ -826,7 +813,6 @@ end_rmdir:
brelse(fibh.sbh);
out:
- unlock_kernel();
return retval;
}
@@ -840,7 +826,6 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
struct kernel_lb_addr tloc;
retval = -ENOENT;
- lock_kernel();
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
if (!fi)
goto out;
@@ -870,7 +855,6 @@ end_unlink:
brelse(fibh.sbh);
out:
- unlock_kernel();
return retval;
}
@@ -890,21 +874,21 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
int block;
unsigned char *name = NULL;
int namelen;
- struct buffer_head *bh;
struct udf_inode_info *iinfo;
+ struct super_block *sb = dir->i_sb;
- lock_kernel();
inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err);
if (!inode)
goto out;
+ iinfo = UDF_I(inode);
+ down_write(&iinfo->i_data_sem);
name = kmalloc(UDF_NAME_LEN, GFP_NOFS);
if (!name) {
err = -ENOMEM;
goto out_no_entry;
}
- iinfo = UDF_I(inode);
inode->i_data.a_ops = &udf_symlink_aops;
inode->i_op = &udf_symlink_inode_operations;
@@ -912,7 +896,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
struct kernel_lb_addr eloc;
uint32_t bsize;
- block = udf_new_block(inode->i_sb, inode,
+ block = udf_new_block(sb, inode,
iinfo->i_location.partitionReferenceNum,
iinfo->i_location.logicalBlockNum, &err);
if (!block)
@@ -923,17 +907,17 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
eloc.logicalBlockNum = block;
eloc.partitionReferenceNum =
iinfo->i_location.partitionReferenceNum;
- bsize = inode->i_sb->s_blocksize;
+ bsize = sb->s_blocksize;
iinfo->i_lenExtents = bsize;
udf_add_aext(inode, &epos, &eloc, bsize, 0);
brelse(epos.bh);
- block = udf_get_pblock(inode->i_sb, block,
+ block = udf_get_pblock(sb, block,
iinfo->i_location.partitionReferenceNum,
0);
- epos.bh = udf_tgetblk(inode->i_sb, block);
+ epos.bh = udf_tgetblk(sb, block);
lock_buffer(epos.bh);
- memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize);
+ memset(epos.bh->b_data, 0x00, bsize);
set_buffer_uptodate(epos.bh);
unlock_buffer(epos.bh);
mark_buffer_dirty_inode(epos.bh, inode);
@@ -941,7 +925,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
} else
ea = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
- eoffset = inode->i_sb->s_blocksize - udf_ext0_offset(inode);
+ eoffset = sb->s_blocksize - udf_ext0_offset(inode);
pc = (struct pathComponent *)ea;
if (*symname == '/') {
@@ -981,7 +965,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
}
if (pc->componentType == 5) {
- namelen = udf_put_filename(inode->i_sb, compstart, name,
+ namelen = udf_put_filename(sb, compstart, name,
symname - compstart);
if (!namelen)
goto out_no_entry;
@@ -1015,27 +999,16 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi)
goto out_no_entry;
- cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
+ cfi.icb.extLength = cpu_to_le32(sb->s_blocksize);
cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
- bh = UDF_SB(inode->i_sb)->s_lvid_bh;
- if (bh) {
- struct logicalVolIntegrityDesc *lvid =
- (struct logicalVolIntegrityDesc *)bh->b_data;
- struct logicalVolHeaderDesc *lvhd;
- uint64_t uniqueID;
- lvhd = (struct logicalVolHeaderDesc *)
- lvid->logicalVolContentsUse;
- uniqueID = le64_to_cpu(lvhd->uniqueID);
+ if (UDF_SB(inode->i_sb)->s_lvid_bh) {
*(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
- cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL);
- if (!(++uniqueID & 0x00000000FFFFFFFFUL))
- uniqueID += 16;
- lvhd->uniqueID = cpu_to_le64(uniqueID);
- mark_buffer_dirty(bh);
+ cpu_to_le32(lvid_get_unique_id(sb));
}
udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
mark_inode_dirty(dir);
+ up_write(&iinfo->i_data_sem);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
@@ -1044,10 +1017,10 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
out:
kfree(name);
- unlock_kernel();
return err;
out_no_entry:
+ up_write(&iinfo->i_data_sem);
inode_dec_link_count(inode);
iput(inode);
goto out;
@@ -1060,36 +1033,20 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
struct udf_fileident_bh fibh;
struct fileIdentDesc cfi, *fi;
int err;
- struct buffer_head *bh;
- lock_kernel();
if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
- unlock_kernel();
return -EMLINK;
}
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi) {
- unlock_kernel();
return err;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
cfi.icb.extLocation = cpu_to_lelb(UDF_I(inode)->i_location);
- bh = UDF_SB(inode->i_sb)->s_lvid_bh;
- if (bh) {
- struct logicalVolIntegrityDesc *lvid =
- (struct logicalVolIntegrityDesc *)bh->b_data;
- struct logicalVolHeaderDesc *lvhd;
- uint64_t uniqueID;
- lvhd = (struct logicalVolHeaderDesc *)
- (lvid->logicalVolContentsUse);
- uniqueID = le64_to_cpu(lvhd->uniqueID);
+ if (UDF_SB(inode->i_sb)->s_lvid_bh) {
*(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
- cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL);
- if (!(++uniqueID & 0x00000000FFFFFFFFUL))
- uniqueID += 16;
- lvhd->uniqueID = cpu_to_le64(uniqueID);
- mark_buffer_dirty(bh);
+ cpu_to_le32(lvid_get_unique_id(inode->i_sb));
}
udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
@@ -1103,7 +1060,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
mark_inode_dirty(inode);
ihold(inode);
d_instantiate(dentry, inode);
- unlock_kernel();
return 0;
}
@@ -1124,7 +1080,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
struct kernel_lb_addr tloc;
struct udf_inode_info *old_iinfo = UDF_I(old_inode);
- lock_kernel();
ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
if (ofi) {
if (ofibh.sbh != ofibh.ebh)
@@ -1248,7 +1203,6 @@ end_rename:
brelse(nfibh.ebh);
brelse(nfibh.sbh);
}
- unlock_kernel();
return retval;
}
@@ -1261,7 +1215,6 @@ static struct dentry *udf_get_parent(struct dentry *child)
struct fileIdentDesc cfi;
struct udf_fileident_bh fibh;
- lock_kernel();
if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
goto out_unlock;
@@ -1273,11 +1226,9 @@ static struct dentry *udf_get_parent(struct dentry *child)
inode = udf_iget(child->d_inode->i_sb, &tloc);
if (!inode)
goto out_unlock;
- unlock_kernel();
return d_obtain_alias(inode);
out_unlock:
- unlock_kernel();
return ERR_PTR(-EACCES);
}
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 745eb20..a71090e 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -25,6 +25,7 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
+#include <linux/mutex.h>
uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
uint16_t partition, uint32_t offset)
@@ -159,7 +160,9 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
struct udf_sb_info *sbi = UDF_SB(sb);
u16 reallocationTableLen;
struct buffer_head *bh;
+ int ret = 0;
+ mutex_lock(&sbi->s_alloc_mutex);
for (i = 0; i < sbi->s_partitions; i++) {
struct udf_part_map *map = &sbi->s_partmaps[i];
if (old_block > map->s_partition_root &&
@@ -175,8 +178,10 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
break;
}
- if (!st)
- return 1;
+ if (!st) {
+ ret = 1;
+ goto out;
+ }
reallocationTableLen =
le16_to_cpu(st->reallocationTableLen);
@@ -207,14 +212,16 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
((old_block -
map->s_partition_root) &
(sdata->s_packet_len - 1));
- return 0;
+ ret = 0;
+ goto out;
} else if (origLoc == packet) {
*new_block = le32_to_cpu(
entry->mappedLocation) +
((old_block -
map->s_partition_root) &
(sdata->s_packet_len - 1));
- return 0;
+ ret = 0;
+ goto out;
} else if (origLoc > packet)
break;
}
@@ -251,20 +258,24 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block)
st->mapEntry[k].mappedLocation) +
((old_block - map->s_partition_root) &
(sdata->s_packet_len - 1));
- return 0;
+ ret = 0;
+ goto out;
}
- return 1;
+ ret = 1;
+ goto out;
} /* if old_block */
}
if (i == sbi->s_partitions) {
/* outside of partitions */
/* for now, fail =) */
- return 1;
+ ret = 1;
}
- return 0;
+out:
+ mutex_unlock(&sbi->s_alloc_mutex);
+ return ret;
}
static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b539d53..7b27b06 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -48,7 +48,6 @@
#include <linux/stat.h>
#include <linux/cdrom.h>
#include <linux/nls.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/vmalloc.h>
@@ -135,6 +134,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
ei->i_next_alloc_block = 0;
ei->i_next_alloc_goal = 0;
ei->i_strat4096 = 0;
+ init_rwsem(&ei->i_data_sem);
return &ei->vfs_inode;
}
@@ -574,13 +574,14 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
if (!udf_parse_options(options, &uopt, true))
return -EINVAL;
- lock_kernel();
+ write_lock(&sbi->s_cred_lock);
sbi->s_flags = uopt.flags;
sbi->s_uid = uopt.uid;
sbi->s_gid = uopt.gid;
sbi->s_umask = uopt.umask;
sbi->s_fmode = uopt.fmode;
sbi->s_dmode = uopt.dmode;
+ write_unlock(&sbi->s_cred_lock);
if (sbi->s_lvid_bh) {
int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
@@ -597,7 +598,6 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
udf_open_lvid(sb);
out_unlock:
- unlock_kernel();
return error;
}
@@ -966,9 +966,9 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
(sizeof(struct buffer_head *) * nr_groups);
if (size <= PAGE_SIZE)
- bitmap = kmalloc(size, GFP_KERNEL);
+ bitmap = kzalloc(size, GFP_KERNEL);
else
- bitmap = vmalloc(size); /* TODO: get rid of vmalloc */
+ bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
if (bitmap == NULL) {
udf_error(sb, __func__,
@@ -977,7 +977,6 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
return NULL;
}
- memset(bitmap, 0x00, size);
bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1);
bitmap->s_nr_groups = nr_groups;
return bitmap;
@@ -1781,6 +1780,8 @@ static void udf_open_lvid(struct super_block *sb)
if (!bh)
return;
+
+ mutex_lock(&sbi->s_alloc_mutex);
lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
lvidiu = udf_sb_lvidiu(sbi);
@@ -1797,6 +1798,7 @@ static void udf_open_lvid(struct super_block *sb)
lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
mark_buffer_dirty(bh);
sbi->s_lvid_dirty = 0;
+ mutex_unlock(&sbi->s_alloc_mutex);
}
static void udf_close_lvid(struct super_block *sb)
@@ -1809,6 +1811,7 @@ static void udf_close_lvid(struct super_block *sb)
if (!bh)
return;
+ mutex_lock(&sbi->s_alloc_mutex);
lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
lvidiu = udf_sb_lvidiu(sbi);
lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
@@ -1829,6 +1832,34 @@ static void udf_close_lvid(struct super_block *sb)
lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
mark_buffer_dirty(bh);
sbi->s_lvid_dirty = 0;
+ mutex_unlock(&sbi->s_alloc_mutex);
+}
+
+u64 lvid_get_unique_id(struct super_block *sb)
+{
+ struct buffer_head *bh;
+ struct udf_sb_info *sbi = UDF_SB(sb);
+ struct logicalVolIntegrityDesc *lvid;
+ struct logicalVolHeaderDesc *lvhd;
+ u64 uniqueID;
+ u64 ret;
+
+ bh = sbi->s_lvid_bh;
+ if (!bh)
+ return 0;
+
+ lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
+ lvhd = (struct logicalVolHeaderDesc *)lvid->logicalVolContentsUse;
+
+ mutex_lock(&sbi->s_alloc_mutex);
+ ret = uniqueID = le64_to_cpu(lvhd->uniqueID);
+ if (!(++uniqueID & 0xFFFFFFFF))
+ uniqueID += 16;
+ lvhd->uniqueID = cpu_to_le64(uniqueID);
+ mutex_unlock(&sbi->s_alloc_mutex);
+ mark_buffer_dirty(bh);
+
+ return ret;
}
static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
@@ -1886,8 +1917,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
struct kernel_lb_addr rootdir, fileset;
struct udf_sb_info *sbi;
- lock_kernel();
-
uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
uopt.uid = -1;
uopt.gid = -1;
@@ -1896,10 +1925,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
uopt.dmode = UDF_INVALID_MODE;
sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
- if (!sbi) {
- unlock_kernel();
+ if (!sbi)
return -ENOMEM;
- }
sb->s_fs_info = sbi;
@@ -1936,6 +1963,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
sbi->s_fmode = uopt.fmode;
sbi->s_dmode = uopt.dmode;
sbi->s_nls_map = uopt.nls_map;
+ rwlock_init(&sbi->s_cred_lock);
if (uopt.session == 0xFFFFFFFF)
sbi->s_session = udf_get_last_session(sb);
@@ -2045,7 +2073,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
goto error_out;
}
sb->s_maxbytes = MAX_LFS_FILESIZE;
- unlock_kernel();
return 0;
error_out:
@@ -2066,7 +2093,6 @@ error_out:
kfree(sbi);
sb->s_fs_info = NULL;
- unlock_kernel();
return -EINVAL;
}
@@ -2105,8 +2131,6 @@ static void udf_put_super(struct super_block *sb)
sbi = UDF_SB(sb);
- lock_kernel();
-
if (sbi->s_vat_inode)
iput(sbi->s_vat_inode);
if (sbi->s_partitions)
@@ -2122,8 +2146,6 @@ static void udf_put_super(struct super_block *sb)
kfree(sbi->s_partmaps);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
-
- unlock_kernel();
}
static int udf_sync_fs(struct super_block *sb, int wait)
@@ -2186,8 +2208,6 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
uint16_t ident;
struct spaceBitmapDesc *bm;
- lock_kernel();
-
loc.logicalBlockNum = bitmap->s_extPosition;
loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
bh = udf_read_ptagged(sb, &loc, 0, &ident);
@@ -2224,10 +2244,7 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
}
}
brelse(bh);
-
out:
- unlock_kernel();
-
return accum;
}
@@ -2240,8 +2257,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
int8_t etype;
struct extent_position epos;
- lock_kernel();
-
+ mutex_lock(&UDF_SB(sb)->s_alloc_mutex);
epos.block = UDF_I(table)->i_location;
epos.offset = sizeof(struct unallocSpaceEntry);
epos.bh = NULL;
@@ -2250,8 +2266,7 @@ static unsigned int udf_count_free_table(struct super_block *sb,
accum += (elen >> table->i_sb->s_blocksize_bits);
brelse(epos.bh);
-
- unlock_kernel();
+ mutex_unlock(&UDF_SB(sb)->s_alloc_mutex);
return accum;
}
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 1606478..b1d4488 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -27,7 +27,6 @@
#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include "udf_i.h"
@@ -78,13 +77,16 @@ static int udf_symlink_filler(struct file *file, struct page *page)
int err = -EIO;
unsigned char *p = kmap(page);
struct udf_inode_info *iinfo;
+ uint32_t pos;
- lock_kernel();
iinfo = UDF_I(inode);
+ pos = udf_block_map(inode, 0);
+
+ down_read(&iinfo->i_data_sem);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
symlink = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
} else {
- bh = sb_bread(inode->i_sb, udf_block_map(inode, 0));
+ bh = sb_bread(inode->i_sb, pos);
if (!bh)
goto out;
@@ -95,14 +97,14 @@ static int udf_symlink_filler(struct file *file, struct page *page)
udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p);
brelse(bh);
- unlock_kernel();
+ up_read(&iinfo->i_data_sem);
SetPageUptodate(page);
kunmap(page);
unlock_page(page);
return 0;
out:
- unlock_kernel();
+ up_read(&iinfo->i_data_sem);
SetPageError(page);
kunmap(page);
unlock_page(page);
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index e58d1de..d1bd31e 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -1,6 +1,18 @@
#ifndef _UDF_I_H
#define _UDF_I_H
+/*
+ * The i_data_sem and i_mutex serve for protection of allocation information
+ * of a regular files and symlinks. This includes all extents belonging to
+ * the file/symlink, a fact whether data are in-inode or in external data
+ * blocks, preallocation, goal block information... When extents are read,
+ * i_mutex or i_data_sem must be held (for reading is enough in case of
+ * i_data_sem). When extents are changed, i_data_sem must be held for writing
+ * and also i_mutex must be held.
+ *
+ * For directories i_mutex is used for all the necessary protection.
+ */
+
struct udf_inode_info {
struct timespec i_crtime;
/* Physical address of inode */
@@ -21,6 +33,7 @@ struct udf_inode_info {
struct long_ad *i_lad;
__u8 *i_data;
} i_ext;
+ struct rw_semaphore i_data_sem;
struct inode vfs_inode;
};
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index d113b72..4858c19 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -2,6 +2,7 @@
#define __LINUX_UDF_SB_H
#include <linux/mutex.h>
+#include <linux/bitops.h>
/* Since UDF 2.01 is ISO 13346 based... */
#define UDF_SUPER_MAGIC 0x15013346
@@ -128,6 +129,8 @@ struct udf_sb_info {
uid_t s_uid;
mode_t s_fmode;
mode_t s_dmode;
+ /* Lock protecting consistency of above permission settings */
+ rwlock_t s_cred_lock;
/* Root Info */
struct timespec s_record_time;
@@ -139,7 +142,7 @@ struct udf_sb_info {
__u16 s_udfrev;
/* Miscellaneous flags */
- __u32 s_flags;
+ unsigned long s_flags;
/* Encoding info */
struct nls_table *s_nls_map;
@@ -161,8 +164,19 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi);
int udf_compute_nr_groups(struct super_block *sb, u32 partition);
-#define UDF_QUERY_FLAG(X,Y) ( UDF_SB(X)->s_flags & ( 1 << (Y) ) )
-#define UDF_SET_FLAG(X,Y) ( UDF_SB(X)->s_flags |= ( 1 << (Y) ) )
-#define UDF_CLEAR_FLAG(X,Y) ( UDF_SB(X)->s_flags &= ~( 1 << (Y) ) )
+static inline int UDF_QUERY_FLAG(struct super_block *sb, int flag)
+{
+ return test_bit(flag, &UDF_SB(sb)->s_flags);
+}
+
+static inline void UDF_SET_FLAG(struct super_block *sb, int flag)
+{
+ set_bit(flag, &UDF_SB(sb)->s_flags);
+}
+
+static inline void UDF_CLEAR_FLAG(struct super_block *sb, int flag)
+{
+ clear_bit(flag, &UDF_SB(sb)->s_flags);
+}
#endif /* __LINUX_UDF_SB_H */
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 6995ab1..eba4820 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -111,6 +111,8 @@ struct extent_position {
};
/* super.c */
+
+__attribute__((format(printf, 3, 4)))
extern void udf_warning(struct super_block *, const char *, const char *, ...);
static inline void udf_updated_lvid(struct super_block *sb)
{
@@ -123,6 +125,7 @@ static inline void udf_updated_lvid(struct super_block *sb)
sb->s_dirt = 1;
UDF_SB(sb)->s_lvid_dirty = 1;
}
+extern u64 lvid_get_unique_id(struct super_block *sb);
/* namei.c */
extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
@@ -133,7 +136,6 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
extern long udf_ioctl(struct file *, unsigned int, unsigned long);
/* inode.c */
extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
-extern int udf_sync_inode(struct inode *);
extern void udf_expand_file_adinicb(struct inode *, int, int *);
extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
deleted file mode 100644
index 4dfc7c3..0000000
--- a/fs/xfs/linux-2.6/sv.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_SV_H__
-#define __XFS_SUPPORT_SV_H__
-
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-
-/*
- * Synchronisation variables.
- *
- * (Parameters "pri", "svf" and "rts" are not implemented)
- */
-
-typedef struct sv_s {
- wait_queue_head_t waiters;
-} sv_t;
-
-static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue_exclusive(&sv->waiters, &wait);
- __set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(lock);
-
- schedule();
-
- remove_wait_queue(&sv->waiters, &wait);
-}
-
-#define sv_init(sv,flag,name) \
- init_waitqueue_head(&(sv)->waiters)
-#define sv_destroy(sv) \
- /*NOTHING*/
-#define sv_wait(sv, pri, lock, s) \
- _sv_wait(sv, lock)
-#define sv_signal(sv) \
- wake_up(&(sv)->waiters)
-#define sv_broadcast(sv) \
- wake_up_all(&(sv)->waiters)
-
-#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 691f612..ec7bbb5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -38,15 +38,6 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
-/*
- * Types of I/O for bmap clustering and I/O completion tracking.
- */
-enum {
- IO_READ, /* mapping for a read */
- IO_DELAY, /* mapping covers delalloc region */
- IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
- IO_NEW /* just allocated */
-};
/*
* Prime number of hash buckets since address is used as the key.
@@ -182,9 +173,6 @@ xfs_setfilesize(
xfs_inode_t *ip = XFS_I(ioend->io_inode);
xfs_fsize_t isize;
- ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
- ASSERT(ioend->io_type != IO_READ);
-
if (unlikely(ioend->io_error))
return 0;
@@ -244,10 +232,8 @@ xfs_end_io(
* We might have to update the on-disk file size after extending
* writes.
*/
- if (ioend->io_type != IO_READ) {
- error = xfs_setfilesize(ioend);
- ASSERT(!error || error == EAGAIN);
- }
+ error = xfs_setfilesize(ioend);
+ ASSERT(!error || error == EAGAIN);
/*
* If we didn't complete processing of the ioend, requeue it to the
@@ -318,14 +304,63 @@ STATIC int
xfs_map_blocks(
struct inode *inode,
loff_t offset,
- ssize_t count,
struct xfs_bmbt_irec *imap,
- int flags)
+ int type,
+ int nonblocking)
{
- int nmaps = 1;
- int new = 0;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ ssize_t count = 1 << inode->i_blkbits;
+ xfs_fileoff_t offset_fsb, end_fsb;
+ int error = 0;
+ int bmapi_flags = XFS_BMAPI_ENTIRE;
+ int nimaps = 1;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -XFS_ERROR(EIO);
+
+ if (type == IO_UNWRITTEN)
+ bmapi_flags |= XFS_BMAPI_IGSTATE;
+
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+ if (nonblocking)
+ return -XFS_ERROR(EAGAIN);
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ }
- return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new);
+ ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+ (ip->i_df.if_flags & XFS_IFEXTENTS));
+ ASSERT(offset <= mp->m_maxioffset);
+
+ if (offset + count > mp->m_maxioffset)
+ count = mp->m_maxioffset - offset;
+ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+ bmapi_flags, NULL, 0, imap, &nimaps, NULL);
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+ if (error)
+ return -XFS_ERROR(error);
+
+ if (type == IO_DELALLOC &&
+ (!nimaps || isnullstartblock(imap->br_startblock))) {
+ error = xfs_iomap_write_allocate(ip, offset, count, imap);
+ if (!error)
+ trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
+ return -XFS_ERROR(error);
+ }
+
+#ifdef DEBUG
+ if (type == IO_UNWRITTEN) {
+ ASSERT(nimaps);
+ ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+ ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+ }
+#endif
+ if (nimaps)
+ trace_xfs_map_blocks_found(ip, offset, count, type, imap);
+ return 0;
}
STATIC int
@@ -380,26 +415,18 @@ xfs_submit_ioend_bio(
submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC_PLUG : WRITE, bio);
- ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
- bio_put(bio);
}
STATIC struct bio *
xfs_alloc_ioend_bio(
struct buffer_head *bh)
{
- struct bio *bio;
int nvecs = bio_get_nr_vecs(bh->b_bdev);
-
- do {
- bio = bio_alloc(GFP_NOIO, nvecs);
- nvecs >>= 1;
- } while (!bio);
+ struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
ASSERT(bio->bi_private == NULL);
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
- bio_get(bio);
return bio;
}
@@ -470,9 +497,8 @@ xfs_submit_ioend(
/* Pass 1 - start writeback */
do {
next = ioend->io_list;
- for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+ for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
xfs_start_buffer_writeback(bh);
- }
} while ((ioend = next) != NULL);
/* Pass 2 - submit I/O */
@@ -600,117 +626,13 @@ xfs_map_at_offset(
ASSERT(imap->br_startblock != HOLESTARTBLOCK);
ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
- lock_buffer(bh);
xfs_map_buffer(inode, bh, imap, offset);
- bh->b_bdev = xfs_find_bdev_for_inode(inode);
set_buffer_mapped(bh);
clear_buffer_delay(bh);
clear_buffer_unwritten(bh);
}
/*
- * Look for a page at index that is suitable for clustering.
- */
-STATIC unsigned int
-xfs_probe_page(
- struct page *page,
- unsigned int pg_offset)
-{
- struct buffer_head *bh, *head;
- int ret = 0;
-
- if (PageWriteback(page))
- return 0;
- if (!PageDirty(page))
- return 0;
- if (!page->mapping)
- return 0;
- if (!page_has_buffers(page))
- return 0;
-
- bh = head = page_buffers(page);
- do {
- if (!buffer_uptodate(bh))
- break;
- if (!buffer_mapped(bh))
- break;
- ret += bh->b_size;
- if (ret >= pg_offset)
- break;
- } while ((bh = bh->b_this_page) != head);
-
- return ret;
-}
-
-STATIC size_t
-xfs_probe_cluster(
- struct inode *inode,
- struct page *startpage,
- struct buffer_head *bh,
- struct buffer_head *head)
-{
- struct pagevec pvec;
- pgoff_t tindex, tlast, tloff;
- size_t total = 0;
- int done = 0, i;
-
- /* First sum forwards in this page */
- do {
- if (!buffer_uptodate(bh) || !buffer_mapped(bh))
- return total;
- total += bh->b_size;
- } while ((bh = bh->b_this_page) != head);
-
- /* if we reached the end of the page, sum forwards in following pages */
- tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
- tindex = startpage->index + 1;
-
- /* Prune this back to avoid pathological behavior */
- tloff = min(tlast, startpage->index + 64);
-
- pagevec_init(&pvec, 0);
- while (!done && tindex <= tloff) {
- unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
- if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
- size_t pg_offset, pg_len = 0;
-
- if (tindex == tlast) {
- pg_offset =
- i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
- if (!pg_offset) {
- done = 1;
- break;
- }
- } else
- pg_offset = PAGE_CACHE_SIZE;
-
- if (page->index == tindex && trylock_page(page)) {
- pg_len = xfs_probe_page(page, pg_offset);
- unlock_page(page);
- }
-
- if (!pg_len) {
- done = 1;
- break;
- }
-
- total += pg_len;
- tindex++;
- }
-
- pagevec_release(&pvec);
- cond_resched();
- }
-
- return total;
-}
-
-/*
* Test if a given page is suitable for writing as part of an unwritten
* or delayed allocate extent.
*/
@@ -731,9 +653,9 @@ xfs_is_delayed_page(
if (buffer_unwritten(bh))
acceptable = (type == IO_UNWRITTEN);
else if (buffer_delay(bh))
- acceptable = (type == IO_DELAY);
+ acceptable = (type == IO_DELALLOC);
else if (buffer_dirty(bh) && buffer_mapped(bh))
- acceptable = (type == IO_NEW);
+ acceptable = (type == IO_OVERWRITE);
else
break;
} while ((bh = bh->b_this_page) != head);
@@ -758,8 +680,7 @@ xfs_convert_page(
loff_t tindex,
struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
- struct writeback_control *wbc,
- int all_bh)
+ struct writeback_control *wbc)
{
struct buffer_head *bh, *head;
xfs_off_t end_offset;
@@ -814,37 +735,30 @@ xfs_convert_page(
continue;
}
- if (buffer_unwritten(bh) || buffer_delay(bh)) {
+ if (buffer_unwritten(bh) || buffer_delay(bh) ||
+ buffer_mapped(bh)) {
if (buffer_unwritten(bh))
type = IO_UNWRITTEN;
+ else if (buffer_delay(bh))
+ type = IO_DELALLOC;
else
- type = IO_DELAY;
+ type = IO_OVERWRITE;
if (!xfs_imap_valid(inode, imap, offset)) {
done = 1;
continue;
}
- ASSERT(imap->br_startblock != HOLESTARTBLOCK);
- ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
- xfs_map_at_offset(inode, bh, imap, offset);
+ lock_buffer(bh);
+ if (type != IO_OVERWRITE)
+ xfs_map_at_offset(inode, bh, imap, offset);
xfs_add_to_ioend(inode, bh, offset, type,
ioendp, done);
page_dirty--;
count++;
} else {
- type = IO_NEW;
- if (buffer_mapped(bh) && all_bh) {
- lock_buffer(bh);
- xfs_add_to_ioend(inode, bh, offset,
- type, ioendp, done);
- count++;
- page_dirty--;
- } else {
- done = 1;
- }
+ done = 1;
}
} while (offset += len, (bh = bh->b_this_page) != head);
@@ -876,7 +790,6 @@ xfs_cluster_write(
struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
struct writeback_control *wbc,
- int all_bh,
pgoff_t tlast)
{
struct pagevec pvec;
@@ -891,7 +804,7 @@ xfs_cluster_write(
for (i = 0; i < pagevec_count(&pvec); i++) {
done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- imap, ioendp, wbc, all_bh);
+ imap, ioendp, wbc);
if (done)
break;
}
@@ -935,7 +848,7 @@ xfs_aops_discard_page(
struct buffer_head *bh, *head;
loff_t offset = page_offset(page);
- if (!xfs_is_delayed_page(page, IO_DELAY))
+ if (!xfs_is_delayed_page(page, IO_DELALLOC))
goto out_invalidate;
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1002,10 +915,10 @@ xfs_vm_writepage(
unsigned int type;
__uint64_t end_offset;
pgoff_t end_index, last_index;
- ssize_t size, len;
- int flags, err, imap_valid = 0, uptodate = 1;
+ ssize_t len;
+ int err, imap_valid = 0, uptodate = 1;
int count = 0;
- int all_bh = 0;
+ int nonblocking = 0;
trace_xfs_writepage(inode, page, 0);
@@ -1056,10 +969,14 @@ xfs_vm_writepage(
bh = head = page_buffers(page);
offset = page_offset(page);
- flags = BMAPI_READ;
- type = IO_NEW;
+ type = IO_OVERWRITE;
+
+ if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
+ nonblocking = 1;
do {
+ int new_ioend = 0;
+
if (offset >= end_offset)
break;
if (!buffer_uptodate(bh))
@@ -1076,90 +993,54 @@ xfs_vm_writepage(
continue;
}
- if (imap_valid)
- imap_valid = xfs_imap_valid(inode, &imap, offset);
-
- if (buffer_unwritten(bh) || buffer_delay(bh)) {
- int new_ioend = 0;
-
- /*
- * Make sure we don't use a read-only iomap
- */
- if (flags == BMAPI_READ)
- imap_valid = 0;
-
- if (buffer_unwritten(bh)) {
+ if (buffer_unwritten(bh)) {
+ if (type != IO_UNWRITTEN) {
type = IO_UNWRITTEN;
- flags = BMAPI_WRITE | BMAPI_IGNSTATE;
- } else if (buffer_delay(bh)) {
- type = IO_DELAY;
- flags = BMAPI_ALLOCATE;
-
- if (wbc->sync_mode == WB_SYNC_NONE)
- flags |= BMAPI_TRYLOCK;
- }
-
- if (!imap_valid) {
- /*
- * If we didn't have a valid mapping then we
- * need to ensure that we put the new mapping
- * in a new ioend structure. This needs to be
- * done to ensure that the ioends correctly
- * reflect the block mappings at io completion
- * for unwritten extent conversion.
- */
- new_ioend = 1;
- err = xfs_map_blocks(inode, offset, len,
- &imap, flags);
- if (err)
- goto error;
- imap_valid = xfs_imap_valid(inode, &imap,
- offset);
+ imap_valid = 0;
}
- if (imap_valid) {
- xfs_map_at_offset(inode, bh, &imap, offset);
- xfs_add_to_ioend(inode, bh, offset, type,
- &ioend, new_ioend);
- count++;
+ } else if (buffer_delay(bh)) {
+ if (type != IO_DELALLOC) {
+ type = IO_DELALLOC;
+ imap_valid = 0;
}
} else if (buffer_uptodate(bh)) {
- /*
- * we got here because the buffer is already mapped.
- * That means it must already have extents allocated
- * underneath it. Map the extent by reading it.
- */
- if (!imap_valid || flags != BMAPI_READ) {
- flags = BMAPI_READ;
- size = xfs_probe_cluster(inode, page, bh, head);
- err = xfs_map_blocks(inode, offset, size,
- &imap, flags);
- if (err)
- goto error;
- imap_valid = xfs_imap_valid(inode, &imap,
- offset);
+ if (type != IO_OVERWRITE) {
+ type = IO_OVERWRITE;
+ imap_valid = 0;
}
+ } else {
+ if (PageUptodate(page)) {
+ ASSERT(buffer_mapped(bh));
+ imap_valid = 0;
+ }
+ continue;
+ }
+ if (imap_valid)
+ imap_valid = xfs_imap_valid(inode, &imap, offset);
+ if (!imap_valid) {
/*
- * We set the type to IO_NEW in case we are doing a
- * small write at EOF that is extending the file but
- * without needing an allocation. We need to update the
- * file size on I/O completion in this case so it is
- * the same case as having just allocated a new extent
- * that we are writing into for the first time.
+ * If we didn't have a valid mapping then we need to
+ * put the new mapping into a separate ioend structure.
+ * This ensures non-contiguous extents always have
+ * separate ioends, which is particularly important
+ * for unwritten extent conversion at I/O completion
+ * time.
*/
- type = IO_NEW;
- if (trylock_buffer(bh)) {
- if (imap_valid)
- all_bh = 1;
- xfs_add_to_ioend(inode, bh, offset, type,
- &ioend, !imap_valid);
- count++;
- } else {
- imap_valid = 0;
- }
- } else if (PageUptodate(page)) {
- ASSERT(buffer_mapped(bh));
- imap_valid = 0;
+ new_ioend = 1;
+ err = xfs_map_blocks(inode, offset, &imap, type,
+ nonblocking);
+ if (err)
+ goto error;
+ imap_valid = xfs_imap_valid(inode, &imap, offset);
+ }
+ if (imap_valid) {
+ lock_buffer(bh);
+ if (type != IO_OVERWRITE)
+ xfs_map_at_offset(inode, bh, &imap, offset);
+ xfs_add_to_ioend(inode, bh, offset, type, &ioend,
+ new_ioend);
+ count++;
}
if (!iohead)
@@ -1188,7 +1069,7 @@ xfs_vm_writepage(
end_index = last_index;
xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
- wbc, all_bh, end_index);
+ wbc, end_index);
}
if (iohead)
@@ -1257,13 +1138,19 @@ __xfs_get_blocks(
int create,
int direct)
{
- int flags = create ? BMAPI_WRITE : BMAPI_READ;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t offset_fsb, end_fsb;
+ int error = 0;
+ int lockmode = 0;
struct xfs_bmbt_irec imap;
+ int nimaps = 1;
xfs_off_t offset;
ssize_t size;
- int nimap = 1;
int new = 0;
- int error;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -XFS_ERROR(EIO);
offset = (xfs_off_t)iblock << inode->i_blkbits;
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
@@ -1272,15 +1159,45 @@ __xfs_get_blocks(
if (!create && direct && offset >= i_size_read(inode))
return 0;
- if (direct && create)
- flags |= BMAPI_DIRECT;
+ if (create) {
+ lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, lockmode);
+ } else {
+ lockmode = xfs_ilock_map_shared(ip);
+ }
+
+ ASSERT(offset <= mp->m_maxioffset);
+ if (offset + size > mp->m_maxioffset)
+ size = mp->m_maxioffset - offset;
+ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
- error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap,
- &new);
+ error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+ XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
if (error)
- return -error;
- if (nimap == 0)
- return 0;
+ goto out_unlock;
+
+ if (create &&
+ (!nimaps ||
+ (imap.br_startblock == HOLESTARTBLOCK ||
+ imap.br_startblock == DELAYSTARTBLOCK))) {
+ if (direct) {
+ error = xfs_iomap_write_direct(ip, offset, size,
+ &imap, nimaps);
+ } else {
+ error = xfs_iomap_write_delay(ip, offset, size, &imap);
+ }
+ if (error)
+ goto out_unlock;
+
+ trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
+ } else if (nimaps) {
+ trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
+ } else {
+ trace_xfs_get_blocks_notfound(ip, offset, size);
+ goto out_unlock;
+ }
+ xfs_iunlock(ip, lockmode);
if (imap.br_startblock != HOLESTARTBLOCK &&
imap.br_startblock != DELAYSTARTBLOCK) {
@@ -1347,6 +1264,10 @@ __xfs_get_blocks(
}
return 0;
+
+out_unlock:
+ xfs_iunlock(ip, lockmode);
+ return -error;
}
int
@@ -1434,7 +1355,7 @@ xfs_vm_direct_IO(
ssize_t ret;
if (rw & WRITE) {
- iocb->private = xfs_alloc_ioend(inode, IO_NEW);
+ iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index c5057fb..71f721e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,6 +23,22 @@ extern struct workqueue_struct *xfsconvertd_workqueue;
extern mempool_t *xfs_ioend_pool;
/*
+ * Types of I/O for bmap clustering and I/O completion tracking.
+ */
+enum {
+ IO_DIRECT = 0, /* special case for direct I/O ioends */
+ IO_DELALLOC, /* mapping covers delalloc region */
+ IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
+ IO_OVERWRITE, /* mapping covers already allocated extent */
+};
+
+#define XFS_IO_TYPES \
+ { 0, "" }, \
+ { IO_DELALLOC, "delalloc" }, \
+ { IO_UNWRITTEN, "unwritten" }, \
+ { IO_OVERWRITE, "overwrite" }
+
+/*
* xfs_ioend struct manages large extent writes for XFS.
* It can manage several multi-page bio's at once.
*/
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4c5deb6..92f1f2a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -44,12 +44,7 @@
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-static struct shrinker xfs_buf_shake = {
- .shrink = xfsbufd_wakeup,
- .seeks = DEFAULT_SEEKS,
-};
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
@@ -168,8 +163,79 @@ test_page_region(
}
/*
- * Internal xfs_buf_t object manipulation
+ * xfs_buf_lru_add - add a buffer to the LRU.
+ *
+ * The LRU takes a new reference to the buffer so that it will only be freed
+ * once the shrinker takes the buffer off the LRU.
*/
+STATIC void
+xfs_buf_lru_add(
+ struct xfs_buf *bp)
+{
+ struct xfs_buftarg *btp = bp->b_target;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (list_empty(&bp->b_lru)) {
+ atomic_inc(&bp->b_hold);
+ list_add_tail(&bp->b_lru, &btp->bt_lru);
+ btp->bt_lru_nr++;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * xfs_buf_lru_del - remove a buffer from the LRU
+ *
+ * The unlocked check is safe here because it only occurs when there are not
+ * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
+ * to optimise the shrinker removing the buffer from the LRU and calling
+ * xfs_buf_free(). i.e. it removes an unneccessary round trip on the
+ * bt_lru_lock.
+ */
+STATIC void
+xfs_buf_lru_del(
+ struct xfs_buf *bp)
+{
+ struct xfs_buftarg *btp = bp->b_target;
+
+ if (list_empty(&bp->b_lru))
+ return;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (!list_empty(&bp->b_lru)) {
+ list_del_init(&bp->b_lru);
+ btp->bt_lru_nr--;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * When we mark a buffer stale, we remove the buffer from the LRU and clear the
+ * b_lru_ref count so that the buffer is freed immediately when the buffer
+ * reference count falls to zero. If the buffer is already on the LRU, we need
+ * to remove the reference that LRU holds on the buffer.
+ *
+ * This prevents build-up of stale buffers on the LRU.
+ */
+void
+xfs_buf_stale(
+ struct xfs_buf *bp)
+{
+ bp->b_flags |= XBF_STALE;
+ atomic_set(&(bp)->b_lru_ref, 0);
+ if (!list_empty(&bp->b_lru)) {
+ struct xfs_buftarg *btp = bp->b_target;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (!list_empty(&bp->b_lru)) {
+ list_del_init(&bp->b_lru);
+ btp->bt_lru_nr--;
+ atomic_dec(&bp->b_hold);
+ }
+ spin_unlock(&btp->bt_lru_lock);
+ }
+ ASSERT(atomic_read(&bp->b_hold) >= 1);
+}
STATIC void
_xfs_buf_initialize(
@@ -186,7 +252,9 @@ _xfs_buf_initialize(
memset(bp, 0, sizeof(xfs_buf_t));
atomic_set(&bp->b_hold, 1);
+ atomic_set(&bp->b_lru_ref, 1);
init_completion(&bp->b_iowait);
+ INIT_LIST_HEAD(&bp->b_lru);
INIT_LIST_HEAD(&bp->b_list);
RB_CLEAR_NODE(&bp->b_rbnode);
sema_init(&bp->b_sema, 0); /* held, no waiters */
@@ -262,6 +330,8 @@ xfs_buf_free(
{
trace_xfs_buf_free(bp, _RET_IP_);
+ ASSERT(list_empty(&bp->b_lru));
+
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
uint i;
@@ -337,7 +407,6 @@ _xfs_buf_lookup_pages(
__func__, gfp_mask);
XFS_STATS_INC(xb_page_retries);
- xfsbufd_wakeup(NULL, 0, gfp_mask);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
@@ -828,6 +897,7 @@ xfs_buf_rele(
if (!pag) {
ASSERT(!bp->b_relse);
+ ASSERT(list_empty(&bp->b_lru));
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold))
xfs_buf_free(bp);
@@ -835,13 +905,19 @@ xfs_buf_rele(
}
ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
+
ASSERT(atomic_read(&bp->b_hold) > 0);
if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
if (bp->b_relse) {
atomic_inc(&bp->b_hold);
spin_unlock(&pag->pag_buf_lock);
bp->b_relse(bp);
+ } else if (!(bp->b_flags & XBF_STALE) &&
+ atomic_read(&bp->b_lru_ref)) {
+ xfs_buf_lru_add(bp);
+ spin_unlock(&pag->pag_buf_lock);
} else {
+ xfs_buf_lru_del(bp);
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
spin_unlock(&pag->pag_buf_lock);
@@ -1438,51 +1514,84 @@ xfs_buf_iomove(
*/
/*
- * Wait for any bufs with callbacks that have been submitted but
- * have not yet returned... walk the hash list for the target.
+ * Wait for any bufs with callbacks that have been submitted but have not yet
+ * returned. These buffers will have an elevated hold count, so wait on those
+ * while freeing all the buffers only held by the LRU.
*/
void
xfs_wait_buftarg(
struct xfs_buftarg *btp)
{
- struct xfs_perag *pag;
- uint i;
+ struct xfs_buf *bp;
- for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
- pag = xfs_perag_get(btp->bt_mount, i);
- spin_lock(&pag->pag_buf_lock);
- while (rb_first(&pag->pag_buf_tree)) {
- spin_unlock(&pag->pag_buf_lock);
+restart:
+ spin_lock(&btp->bt_lru_lock);
+ while (!list_empty(&btp->bt_lru)) {
+ bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+ if (atomic_read(&bp->b_hold) > 1) {
+ spin_unlock(&btp->bt_lru_lock);
delay(100);
- spin_lock(&pag->pag_buf_lock);
+ goto restart;
}
- spin_unlock(&pag->pag_buf_lock);
- xfs_perag_put(pag);
+ /*
+ * clear the LRU reference count so the bufer doesn't get
+ * ignored in xfs_buf_rele().
+ */
+ atomic_set(&bp->b_lru_ref, 0);
+ spin_unlock(&btp->bt_lru_lock);
+ xfs_buf_rele(bp);
+ spin_lock(&btp->bt_lru_lock);
}
+ spin_unlock(&btp->bt_lru_lock);
}
-/*
- * buftarg list for delwrite queue processing
- */
-static LIST_HEAD(xfs_buftarg_list);
-static DEFINE_SPINLOCK(xfs_buftarg_lock);
-
-STATIC void
-xfs_register_buftarg(
- xfs_buftarg_t *btp)
+int
+xfs_buftarg_shrink(
+ struct shrinker *shrink,
+ int nr_to_scan,
+ gfp_t mask)
{
- spin_lock(&xfs_buftarg_lock);
- list_add(&btp->bt_list, &xfs_buftarg_list);
- spin_unlock(&xfs_buftarg_lock);
-}
+ struct xfs_buftarg *btp = container_of(shrink,
+ struct xfs_buftarg, bt_shrinker);
+ struct xfs_buf *bp;
+ LIST_HEAD(dispose);
-STATIC void
-xfs_unregister_buftarg(
- xfs_buftarg_t *btp)
-{
- spin_lock(&xfs_buftarg_lock);
- list_del(&btp->bt_list);
- spin_unlock(&xfs_buftarg_lock);
+ if (!nr_to_scan)
+ return btp->bt_lru_nr;
+
+ spin_lock(&btp->bt_lru_lock);
+ while (!list_empty(&btp->bt_lru)) {
+ if (nr_to_scan-- <= 0)
+ break;
+
+ bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+
+ /*
+ * Decrement the b_lru_ref count unless the value is already
+ * zero. If the value is already zero, we need to reclaim the
+ * buffer, otherwise it gets another trip through the LRU.
+ */
+ if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+ list_move_tail(&bp->b_lru, &btp->bt_lru);
+ continue;
+ }
+
+ /*
+ * remove the buffer from the LRU now to avoid needing another
+ * lock round trip inside xfs_buf_rele().
+ */
+ list_move(&bp->b_lru, &dispose);
+ btp->bt_lru_nr--;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+
+ while (!list_empty(&dispose)) {
+ bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
+ list_del_init(&bp->b_lru);
+ xfs_buf_rele(bp);
+ }
+
+ return btp->bt_lru_nr;
}
void
@@ -1490,17 +1599,14 @@ xfs_free_buftarg(
struct xfs_mount *mp,
struct xfs_buftarg *btp)
{
+ unregister_shrinker(&btp->bt_shrinker);
+
xfs_flush_buftarg(btp, 1);
if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_blkdev_issue_flush(btp);
iput(btp->bt_mapping->host);
- /* Unregister the buftarg first so that we don't get a
- * wakeup finding a non-existent task
- */
- xfs_unregister_buftarg(btp);
kthread_stop(btp->bt_task);
-
kmem_free(btp);
}
@@ -1597,20 +1703,13 @@ xfs_alloc_delwrite_queue(
xfs_buftarg_t *btp,
const char *fsname)
{
- int error = 0;
-
- INIT_LIST_HEAD(&btp->bt_list);
INIT_LIST_HEAD(&btp->bt_delwrite_queue);
spin_lock_init(&btp->bt_delwrite_lock);
btp->bt_flags = 0;
btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
- if (IS_ERR(btp->bt_task)) {
- error = PTR_ERR(btp->bt_task);
- goto out_error;
- }
- xfs_register_buftarg(btp);
-out_error:
- return error;
+ if (IS_ERR(btp->bt_task))
+ return PTR_ERR(btp->bt_task);
+ return 0;
}
xfs_buftarg_t *
@@ -1627,12 +1726,17 @@ xfs_alloc_buftarg(
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
+ INIT_LIST_HEAD(&btp->bt_lru);
+ spin_lock_init(&btp->bt_lru_lock);
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
if (xfs_mapping_buftarg(btp, bdev))
goto error;
if (xfs_alloc_delwrite_queue(btp, fsname))
goto error;
+ btp->bt_shrinker.shrink = xfs_buftarg_shrink;
+ btp->bt_shrinker.seeks = DEFAULT_SEEKS;
+ register_shrinker(&btp->bt_shrinker);
return btp;
error:
@@ -1737,27 +1841,6 @@ xfs_buf_runall_queues(
flush_workqueue(queue);
}
-STATIC int
-xfsbufd_wakeup(
- struct shrinker *shrink,
- int priority,
- gfp_t mask)
-{
- xfs_buftarg_t *btp;
-
- spin_lock(&xfs_buftarg_lock);
- list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
- if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
- continue;
- if (list_empty(&btp->bt_delwrite_queue))
- continue;
- set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
- wake_up_process(btp->bt_task);
- }
- spin_unlock(&xfs_buftarg_lock);
- return 0;
-}
-
/*
* Move as many buffers as specified to the supplied list
* idicating if we skipped any buffers to prevent deadlocks.
@@ -1952,7 +2035,6 @@ xfs_buf_init(void)
if (!xfsconvertd_workqueue)
goto out_destroy_xfsdatad_workqueue;
- register_shrinker(&xfs_buf_shake);
return 0;
out_destroy_xfsdatad_workqueue:
@@ -1968,7 +2050,6 @@ xfs_buf_init(void)
void
xfs_buf_terminate(void)
{
- unregister_shrinker(&xfs_buf_shake);
destroy_workqueue(xfsconvertd_workqueue);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 383a3f3..a76c242 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -128,10 +128,15 @@ typedef struct xfs_buftarg {
/* per device delwri queue */
struct task_struct *bt_task;
- struct list_head bt_list;
struct list_head bt_delwrite_queue;
spinlock_t bt_delwrite_lock;
unsigned long bt_flags;
+
+ /* LRU control structures */
+ struct shrinker bt_shrinker;
+ struct list_head bt_lru;
+ spinlock_t bt_lru_lock;
+ unsigned int bt_lru_nr;
} xfs_buftarg_t;
/*
@@ -164,9 +169,11 @@ typedef struct xfs_buf {
xfs_off_t b_file_offset; /* offset in file */
size_t b_buffer_length;/* size of buffer in bytes */
atomic_t b_hold; /* reference count */
+ atomic_t b_lru_ref; /* lru reclaim ref count */
xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */
+ struct list_head b_lru; /* lru list */
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
struct xfs_perag *b_pag; /* contains rbtree root */
@@ -264,7 +271,8 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
-#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE)
+void xfs_buf_stale(struct xfs_buf *bp);
+#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
#define XFS_BUF_SUPER_STALE(bp) do { \
@@ -328,9 +336,15 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0)
+static inline void
+xfs_buf_set_ref(
+ struct xfs_buf *bp,
+ int lru_ref)
+{
+ atomic_set(&bp->b_lru_ref, lru_ref);
+}
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref)
#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
-#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 3764d74..fc0114d 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -70,8 +70,16 @@ xfs_fs_encode_fh(
else
fileid_type = FILEID_INO32_GEN_PARENT;
- /* filesystem may contain 64bit inode numbers */
- if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS))
+ /*
+ * If the the filesystem may contain 64bit inode numbers, we need
+ * to use larger file handles that can represent them.
+ *
+ * While we only allocate inodes that do not fit into 32 bits any
+ * large enough filesystem may contain them, thus the slightly
+ * confusing looking conditional below.
+ */
+ if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
+ (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
fileid_type |= XFS_FILEID_TYPE_64FLAG;
/*
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 214ddd7..0964949 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -37,7 +37,6 @@
#include <kmem.h>
#include <mrlock.h>
-#include <sv.h>
#include <time.h>
#include <support/debug.h>
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 064f964..c51faaa 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -834,8 +834,11 @@ xfsaild_wakeup(
struct xfs_ail *ailp,
xfs_lsn_t threshold_lsn)
{
- ailp->xa_target = threshold_lsn;
- wake_up_process(ailp->xa_task);
+ /* only ever move the target forwards */
+ if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
+ ailp->xa_target = threshold_lsn;
+ wake_up_process(ailp->xa_task);
+ }
}
STATIC int
@@ -847,8 +850,17 @@ xfsaild(
long tout = 0; /* milliseconds */
while (!kthread_should_stop()) {
- schedule_timeout_interruptible(tout ?
- msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
+ /*
+ * for short sleeps indicating congestion, don't allow us to
+ * get woken early. Otherwise all we do is bang on the AIL lock
+ * without making progress.
+ */
+ if (tout && tout <= 20)
+ __set_current_state(TASK_KILLABLE);
+ else
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(tout ?
+ msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
/* swsusp */
try_to_freeze();
@@ -1118,6 +1130,8 @@ xfs_fs_evict_inode(
*/
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+ lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+ &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
xfs_inactive(ip);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index afb0d7c..a02480d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab(
{
struct inode *inode = VFS_I(ip);
+ ASSERT(rcu_read_lock_held());
+
+ /*
+ * check for stale RCU freed inode
+ *
+ * If the inode has been reallocated, it doesn't matter if it's not in
+ * the AG we are walking - we are walking for writeback, so if it
+ * passes all the "valid inode" checks and is dirty, then we'll write
+ * it back anyway. If it has been reallocated and still being
+ * initialised, the XFS_INEW check below will catch it.
+ */
+ spin_lock(&ip->i_flags_lock);
+ if (!ip->i_ino)
+ goto out_unlock_noent;
+
+ /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+ if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+ goto out_unlock_noent;
+ spin_unlock(&ip->i_flags_lock);
+
/* nothing to sync during shutdown */
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return EFSCORRUPTED;
- /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
- if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
- return ENOENT;
-
/* If we can't grab the inode, it must on it's way to reclaim. */
if (!igrab(inode))
return ENOENT;
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab(
/* inode is valid */
return 0;
+
+out_unlock_noent:
+ spin_unlock(&ip->i_flags_lock);
+ return ENOENT;
}
STATIC int
@@ -98,12 +118,12 @@ restart:
int error = 0;
int i;
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
(void **)batch, first_index,
XFS_LOOKUP_BATCH);
if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
break;
}
@@ -118,18 +138,26 @@ restart:
batch[i] = NULL;
/*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
+ * Update the index for the next lookup. Catch
+ * overflows into the next AG range which can occur if
+ * we have inodes in the last block of the AG and we
+ * are currently pointing to the last inode.
+ *
+ * Because we may see inodes that are from the wrong AG
+ * due to RCU freeing and reallocation, only update the
+ * index if it lies in this AG. It was a race that lead
+ * us to see this inode, so another lookup from the
+ * same index will not find it again.
*/
+ if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+ continue;
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
done = 1;
}
/* unlock now we've grabbed the inodes. */
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
for (i = 0; i < nr_found; i++) {
if (!batch[i])
@@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag(
struct xfs_perag *pag;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- write_lock(&pag->pag_ici_lock);
+ spin_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
__xfs_inode_set_reclaim_tag(pag, ip);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
xfs_perag_put(pag);
}
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab(
struct xfs_inode *ip,
int flags)
{
+ ASSERT(rcu_read_lock_held());
+
+ /* quick check for stale RCU freed inode */
+ if (!ip->i_ino)
+ return 1;
/*
- * do some unlocked checks first to avoid unnecceary lock traffic.
+ * do some unlocked checks first to avoid unnecessary lock traffic.
* The first is a flush lock check, the second is a already in reclaim
* check. Only do these checks if we are not going to block on locks.
*/
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab(
* The radix tree lock here protects a thread in xfs_iget from racing
* with us starting reclaim on the inode. Once we have the
* XFS_IRECLAIM flag set it will not touch us.
+ *
+ * Due to RCU lookup, we may find inodes that have been freed and only
+ * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
+ * aren't candidates for reclaim at all, so we must check the
+ * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
*/
spin_lock(&ip->i_flags_lock);
- ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
- if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
- /* ignore as it is already under reclaim */
+ if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
+ __xfs_iflags_test(ip, XFS_IRECLAIM)) {
+ /* not a reclaim candidate. */
spin_unlock(&ip->i_flags_lock);
return 1;
}
@@ -795,12 +833,12 @@ reclaim:
* added to the tree assert that it's been there before to catch
* problems with the inode life time early on.
*/
- write_lock(&pag->pag_ici_lock);
+ spin_lock(&pag->pag_ici_lock);
if (!radix_tree_delete(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
ASSERT(0);
__xfs_inode_clear_reclaim(pag, ip);
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
/*
* Here we do an (almost) spurious inode lock in order to coordinate
@@ -864,14 +902,14 @@ restart:
struct xfs_inode *batch[XFS_LOOKUP_BATCH];
int i;
- write_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
nr_found = radix_tree_gang_lookup_tag(
&pag->pag_ici_root,
(void **)batch, first_index,
XFS_LOOKUP_BATCH,
XFS_ICI_RECLAIM_TAG);
if (!nr_found) {
- write_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
break;
}
@@ -891,14 +929,24 @@ restart:
* occur if we have inodes in the last block of
* the AG and we are currently pointing to the
* last inode.
+ *
+ * Because we may see inodes that are from the
+ * wrong AG due to RCU freeing and
+ * reallocation, only update the index if it
+ * lies in this AG. It was a race that lead us
+ * to see this inode, so another lookup from
+ * the same index will not find it again.
*/
+ if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
+ pag->pag_agno)
+ continue;
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
done = 1;
}
/* unlock now we've grabbed the inodes. */
- write_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
for (i = 0; i < nr_found; i++) {
if (!batch[i])
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index acef2e9..647af2a 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__field(int, curr_res)
__field(int, unit_res)
__field(unsigned int, flags)
- __field(void *, reserve_headq)
- __field(void *, write_headq)
+ __field(int, reserveq)
+ __field(int, writeq)
__field(int, grant_reserve_cycle)
__field(int, grant_reserve_bytes)
__field(int, grant_write_cycle)
@@ -784,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->curr_res = tic->t_curr_res;
__entry->unit_res = tic->t_unit_res;
__entry->flags = tic->t_flags;
- __entry->reserve_headq = log->l_reserve_headq;
- __entry->write_headq = log->l_write_headq;
- __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
- __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
- __entry->grant_write_cycle = log->l_grant_write_cycle;
- __entry->grant_write_bytes = log->l_grant_write_bytes;
+ __entry->reserveq = list_empty(&log->l_reserveq);
+ __entry->writeq = list_empty(&log->l_writeq);
+ xlog_crack_grant_head(&log->l_grant_reserve_head,
+ &__entry->grant_reserve_cycle,
+ &__entry->grant_reserve_bytes);
+ xlog_crack_grant_head(&log->l_grant_write_head,
+ &__entry->grant_write_cycle,
+ &__entry->grant_write_bytes);
__entry->curr_cycle = log->l_curr_cycle;
__entry->curr_block = log->l_curr_block;
- __entry->tail_lsn = log->l_tail_lsn;
+ __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
),
TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
- "t_unit_res %u t_flags %s reserve_headq 0x%p "
- "write_headq 0x%p grant_reserve_cycle %d "
+ "t_unit_res %u t_flags %s reserveq %s "
+ "writeq %s grant_reserve_cycle %d "
"grant_reserve_bytes %d grant_write_cycle %d "
"grant_write_bytes %d curr_cycle %d curr_block %d "
"tail_cycle %d tail_block %d",
@@ -807,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->curr_res,
__entry->unit_res,
__print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
- __entry->reserve_headq,
- __entry->write_headq,
+ __entry->reserveq ? "empty" : "active",
+ __entry->writeq ? "empty" : "active",
__entry->grant_reserve_cycle,
__entry->grant_reserve_bytes,
__entry->grant_write_cycle,
@@ -835,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
@@ -842,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
@@ -935,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage);
DEFINE_PAGE_EVENT(xfs_releasepage);
DEFINE_PAGE_EVENT(xfs_invalidatepage);
-DECLARE_EVENT_CLASS(xfs_iomap_class,
+DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
- int flags, struct xfs_bmbt_irec *irec),
- TP_ARGS(ip, offset, count, flags, irec),
+ int type, struct xfs_bmbt_irec *irec),
+ TP_ARGS(ip, offset, count, type, irec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
@@ -946,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
__field(loff_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
- __field(int, flags)
+ __field(int, type)
__field(xfs_fileoff_t, startoff)
__field(xfs_fsblock_t, startblock)
__field(xfs_filblks_t, blockcount)
@@ -958,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
__entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
- __entry->flags = flags;
+ __entry->type = type;
__entry->startoff = irec ? irec->br_startoff : 0;
__entry->startblock = irec ? irec->br_startblock : 0;
__entry->blockcount = irec ? irec->br_blockcount : 0;
),
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
- "offset 0x%llx count %zd flags %s "
+ "offset 0x%llx count %zd type %s "
"startoff 0x%llx startblock %lld blockcount 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
@@ -972,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
__entry->new_size,
__entry->offset,
__entry->count,
- __print_flags(__entry->flags, "|", BMAPI_FLAGS),
+ __print_symbolic(__entry->type, XFS_IO_TYPES),
__entry->startoff,
(__int64_t)__entry->startblock,
__entry->blockcount)
)
#define DEFINE_IOMAP_EVENT(name) \
-DEFINE_EVENT(xfs_iomap_class, name, \
+DEFINE_EVENT(xfs_imap_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
- int flags, struct xfs_bmbt_irec *irec), \
- TP_ARGS(ip, offset, count, flags, irec))
-DEFINE_IOMAP_EVENT(xfs_iomap_enter);
-DEFINE_IOMAP_EVENT(xfs_iomap_found);
-DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
+ int type, struct xfs_bmbt_irec *irec), \
+ TP_ARGS(ip, offset, count, type, irec))
+DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
DECLARE_EVENT_CLASS(xfs_simple_io_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1022,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \
TP_ARGS(ip, offset, count))
DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
+DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
TRACE_EVENT(xfs_itruncate_start,
@@ -1420,6 +1426,7 @@ DEFINE_EVENT(xfs_alloc_class, name, \
TP_PROTO(struct xfs_alloc_arg *args), \
TP_ARGS(args))
DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index faf8e1a..d22aa31 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -149,7 +149,6 @@ xfs_qm_dqdestroy(
ASSERT(list_empty(&dqp->q_freelist));
mutex_destroy(&dqp->q_qlock);
- sv_destroy(&dqp->q_pinwait);
kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
atomic_dec(&xfs_Gqm->qm_totaldquots);
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 63c7a1a..58632cc 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,7 @@ typedef struct xfs_perag {
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
- rwlock_t pag_ici_lock; /* incore inode lock */
+ spinlock_t pag_ici_lock; /* incore inode cache lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
int pag_ici_reclaimable; /* reclaimable inodes */
struct mutex pag_ici_reclaim_lock; /* serialisation point */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 112abc4..fa8723f 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -577,61 +577,58 @@ xfs_alloc_ag_vextent_exact(
xfs_extlen_t rlen; /* length of returned extent */
ASSERT(args->alignment == 1);
+
/*
* Allocate/initialize a cursor for the by-number freespace btree.
*/
bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO);
+ args->agno, XFS_BTNUM_BNO);
+
/*
* Lookup bno and minlen in the btree (minlen is irrelevant, really).
* Look for the closest free block <= bno, it must contain bno
* if any free block does.
*/
- if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i)))
+ error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
+ if (error)
goto error0;
- if (!i) {
- /*
- * Didn't find it, return null.
- */
- xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
- args->agbno = NULLAGBLOCK;
- return 0;
- }
+ if (!i)
+ goto not_found;
+
/*
* Grab the freespace record.
*/
- if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)))
+ error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
+ if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
ASSERT(fbno <= args->agbno);
minend = args->agbno + args->minlen;
maxend = args->agbno + args->maxlen;
fend = fbno + flen;
+
/*
* Give up if the freespace isn't long enough for the minimum request.
*/
- if (fend < minend) {
- xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
- args->agbno = NULLAGBLOCK;
- return 0;
- }
+ if (fend < minend)
+ goto not_found;
+
/*
* End of extent will be smaller of the freespace end and the
* maximal requested end.
- */
- end = XFS_AGBLOCK_MIN(fend, maxend);
- /*
+ *
* Fix the length according to mod and prod if given.
*/
+ end = XFS_AGBLOCK_MIN(fend, maxend);
args->len = end - args->agbno;
xfs_alloc_fix_len(args);
- if (!xfs_alloc_fix_minleft(args)) {
- xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
- return 0;
- }
+ if (!xfs_alloc_fix_minleft(args))
+ goto not_found;
+
rlen = args->len;
ASSERT(args->agbno + rlen <= fend);
end = args->agbno + rlen;
+
/*
* We are allocating agbno for rlen [agbno .. end]
* Allocate/initialize a cursor for the by-size btree.
@@ -640,16 +637,25 @@ xfs_alloc_ag_vextent_exact(
args->agno, XFS_BTNUM_CNT);
ASSERT(args->agbno + args->len <=
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
- if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
- args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
+ error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
+ args->len, XFSA_FIXUP_BNO_OK);
+ if (error) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
goto error0;
}
+
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- trace_xfs_alloc_exact_done(args);
args->wasfromfl = 0;
+ trace_xfs_alloc_exact_done(args);
+ return 0;
+
+not_found:
+ /* Didn't find it, return null. */
+ xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+ args->agbno = NULLAGBLOCK;
+ trace_xfs_alloc_exact_notfound(args);
return 0;
error0:
@@ -659,6 +665,95 @@ error0:
}
/*
+ * Search the btree in a given direction via the search cursor and compare
+ * the records found against the good extent we've already found.
+ */
+STATIC int
+xfs_alloc_find_best_extent(
+ struct xfs_alloc_arg *args, /* allocation argument structure */
+ struct xfs_btree_cur **gcur, /* good cursor */
+ struct xfs_btree_cur **scur, /* searching cursor */
+ xfs_agblock_t gdiff, /* difference for search comparison */
+ xfs_agblock_t *sbno, /* extent found by search */
+ xfs_extlen_t *slen,
+ xfs_extlen_t *slena, /* aligned length */
+ int dir) /* 0 = search right, 1 = search left */
+{
+ xfs_agblock_t bno;
+ xfs_agblock_t new;
+ xfs_agblock_t sdiff;
+ int error;
+ int i;
+
+ /* The good extent is perfect, no need to search. */
+ if (!gdiff)
+ goto out_use_good;
+
+ /*
+ * Look until we find a better one, run out of space or run off the end.
+ */
+ do {
+ error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
+ if (error)
+ goto error0;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+ xfs_alloc_compute_aligned(*sbno, *slen, args->alignment,
+ args->minlen, &bno, slena);
+
+ /*
+ * The good extent is closer than this one.
+ */
+ if (!dir) {
+ if (bno >= args->agbno + gdiff)
+ goto out_use_good;
+ } else {
+ if (bno <= args->agbno - gdiff)
+ goto out_use_good;
+ }
+
+ /*
+ * Same distance, compare length and pick the best.
+ */
+ if (*slena >= args->minlen) {
+ args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
+ xfs_alloc_fix_len(args);
+
+ sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+ args->alignment, *sbno,
+ *slen, &new);
+
+ /*
+ * Choose closer size and invalidate other cursor.
+ */
+ if (sdiff < gdiff)
+ goto out_use_search;
+ goto out_use_good;
+ }
+
+ if (!dir)
+ error = xfs_btree_increment(*scur, 0, &i);
+ else
+ error = xfs_btree_decrement(*scur, 0, &i);
+ if (error)
+ goto error0;
+ } while (i);
+
+out_use_good:
+ xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
+ *scur = NULL;
+ return 0;
+
+out_use_search:
+ xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
+ *gcur = NULL;
+ return 0;
+
+error0:
+ /* caller invalidates cursors */
+ return error;
+}
+
+/*
* Allocate a variable extent near bno in the allocation group agno.
* Extent's length (returned in len) will be between minlen and maxlen,
* and of the form k * prod + mod unless there's nothing that large.
@@ -925,203 +1020,45 @@ xfs_alloc_ag_vextent_near(
}
}
} while (bno_cur_lt || bno_cur_gt);
+
/*
* Got both cursors still active, need to find better entry.
*/
if (bno_cur_lt && bno_cur_gt) {
- /*
- * Left side is long enough, look for a right side entry.
- */
if (ltlena >= args->minlen) {
/*
- * Fix up the length.
+ * Left side is good, look for a right side entry.
*/
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
- rlen = args->len;
- ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
+ ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, ltbno, ltlen, &ltnew);
+
+ error = xfs_alloc_find_best_extent(args,
+ &bno_cur_lt, &bno_cur_gt,
+ ltdiff, &gtbno, &gtlen, &gtlena,
+ 0 /* search right */);
+ } else {
+ ASSERT(gtlena >= args->minlen);
+
/*
- * Not perfect.
- */
- if (ltdiff) {
- /*
- * Look until we find a better one, run out of
- * space, or run off the end.
- */
- while (bno_cur_lt && bno_cur_gt) {
- if ((error = xfs_alloc_get_rec(
- bno_cur_gt, &gtbno,
- &gtlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_alloc_compute_aligned(gtbno, gtlen,
- args->alignment, args->minlen,
- &gtbnoa, &gtlena);
- /*
- * The left one is clearly better.
- */
- if (gtbnoa >= args->agbno + ltdiff) {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- break;
- }
- /*
- * If we reach a big enough entry,
- * compare the two and pick the best.
- */
- if (gtlena >= args->minlen) {
- args->len =
- XFS_EXTLEN_MIN(gtlena,
- args->maxlen);
- xfs_alloc_fix_len(args);
- rlen = args->len;
- gtdiff = xfs_alloc_compute_diff(
- args->agbno, rlen,
- args->alignment,
- gtbno, gtlen, &gtnew);
- /*
- * Right side is better.
- */
- if (gtdiff < ltdiff) {
- xfs_btree_del_cursor(
- bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
- /*
- * Left side is better.
- */
- else {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- break;
- }
- /*
- * Fell off the right end.
- */
- if ((error = xfs_btree_increment(
- bno_cur_gt, 0, &i)))
- goto error0;
- if (!i) {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- break;
- }
- }
- }
- /*
- * The left side is perfect, trash the right side.
- */
- else {
- xfs_btree_del_cursor(bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- }
- /*
- * It's the right side that was found first, look left.
- */
- else {
- /*
- * Fix up the length.
+ * Right side is good, look for a left side entry.
*/
args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
xfs_alloc_fix_len(args);
- rlen = args->len;
- gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
+ gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, gtbno, gtlen, &gtnew);
- /*
- * Right side entry isn't perfect.
- */
- if (gtdiff) {
- /*
- * Look until we find a better one, run out of
- * space, or run off the end.
- */
- while (bno_cur_lt && bno_cur_gt) {
- if ((error = xfs_alloc_get_rec(
- bno_cur_lt, &ltbno,
- &ltlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- xfs_alloc_compute_aligned(ltbno, ltlen,
- args->alignment, args->minlen,
- &ltbnoa, &ltlena);
- /*
- * The right one is clearly better.
- */
- if (ltbnoa <= args->agbno - gtdiff) {
- xfs_btree_del_cursor(
- bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- break;
- }
- /*
- * If we reach a big enough entry,
- * compare the two and pick the best.
- */
- if (ltlena >= args->minlen) {
- args->len = XFS_EXTLEN_MIN(
- ltlena, args->maxlen);
- xfs_alloc_fix_len(args);
- rlen = args->len;
- ltdiff = xfs_alloc_compute_diff(
- args->agbno, rlen,
- args->alignment,
- ltbno, ltlen, &ltnew);
- /*
- * Left side is better.
- */
- if (ltdiff < gtdiff) {
- xfs_btree_del_cursor(
- bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- /*
- * Right side is better.
- */
- else {
- xfs_btree_del_cursor(
- bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
- break;
- }
- /*
- * Fell off the left end.
- */
- if ((error = xfs_btree_decrement(
- bno_cur_lt, 0, &i)))
- goto error0;
- if (!i) {
- xfs_btree_del_cursor(bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- break;
- }
- }
- }
- /*
- * The right side is perfect, trash the left side.
- */
- else {
- xfs_btree_del_cursor(bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
+
+ error = xfs_alloc_find_best_extent(args,
+ &bno_cur_gt, &bno_cur_lt,
+ gtdiff, &ltbno, &ltlen, &ltlena,
+ 1 /* search left */);
}
+
+ if (error)
+ goto error0;
}
+
/*
* If we couldn't get anything, give up.
*/
@@ -1130,6 +1067,7 @@ xfs_alloc_ag_vextent_near(
args->agbno = NULLAGBLOCK;
return 0;
}
+
/*
* At this point we have selected a freespace entry, either to the
* left or to the right. If it's on the right, copy all the
@@ -1146,6 +1084,7 @@ xfs_alloc_ag_vextent_near(
j = 1;
} else
j = 0;
+
/*
* Fix up the length and compute the useful address.
*/
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index a6cff8e..71e90dc2 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
* It didn't all fit, so we have to sort everything on hashval.
*/
sbsize = sf->hdr.count * sizeof(*sbuf);
- sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
+ sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
/*
* Scan the attribute list for the rest of the entries, storing
@@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
args.dp = context->dp;
args.whichfork = XFS_ATTR_FORK;
args.valuelen = valuelen;
- args.value = kmem_alloc(valuelen, KM_SLEEP);
+ args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
retval = xfs_attr_rmtval_get(&args);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 04f9cca..2f9e97c 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -634,9 +634,8 @@ xfs_btree_read_bufl(
return error;
}
ASSERT(!bp || !XFS_BUF_GETERROR(bp));
- if (bp != NULL) {
+ if (bp)
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
- }
*bpp = bp;
return 0;
}
@@ -944,13 +943,13 @@ xfs_btree_set_refs(
switch (cur->bc_btnum) {
case XFS_BTNUM_BNO:
case XFS_BTNUM_CNT:
- XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
+ XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
break;
case XFS_BTNUM_INO:
- XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
+ XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
break;
case XFS_BTNUM_BMAP:
- XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
+ XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
break;
default:
ASSERT(0);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 2686d0d..ed2b65f 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -142,7 +142,7 @@ xfs_buf_item_log_check(
#endif
STATIC void xfs_buf_error_relse(xfs_buf_t *bp);
-STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
+STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
/*
* This returns the number of log iovecs needed to log the
@@ -450,7 +450,7 @@ xfs_buf_item_unpin(
* xfs_trans_ail_delete() drops the AIL lock.
*/
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
- xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
+ xfs_buf_do_callbacks(bp);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
} else {
@@ -918,15 +918,26 @@ xfs_buf_attach_iodone(
XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
}
+/*
+ * We can have many callbacks on a buffer. Running the callbacks individually
+ * can cause a lot of contention on the AIL lock, so we allow for a single
+ * callback to be able to scan the remaining lip->li_bio_list for other items
+ * of the same type and callback to be processed in the first call.
+ *
+ * As a result, the loop walking the callback list below will also modify the
+ * list. it removes the first item from the list and then runs the callback.
+ * The loop then restarts from the new head of the list. This allows the
+ * callback to scan and modify the list attached to the buffer and we don't
+ * have to care about maintaining a next item pointer.
+ */
STATIC void
xfs_buf_do_callbacks(
- xfs_buf_t *bp,
- xfs_log_item_t *lip)
+ struct xfs_buf *bp)
{
- xfs_log_item_t *nlip;
+ struct xfs_log_item *lip;
- while (lip != NULL) {
- nlip = lip->li_bio_list;
+ while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
+ XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
ASSERT(lip->li_cb != NULL);
/*
* Clear the next pointer so we don't have any
@@ -936,7 +947,6 @@ xfs_buf_do_callbacks(
*/
lip->li_bio_list = NULL;
lip->li_cb(bp, lip);
- lip = nlip;
}
}
@@ -970,7 +980,7 @@ xfs_buf_iodone_callbacks(
ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
XFS_BUF_SUPER_STALE(bp);
trace_xfs_buf_item_iodone(bp, _RET_IP_);
- xfs_buf_do_callbacks(bp, lip);
+ xfs_buf_do_callbacks(bp);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
xfs_buf_ioend(bp, 0);
@@ -1029,7 +1039,7 @@ xfs_buf_iodone_callbacks(
return;
}
- xfs_buf_do_callbacks(bp, lip);
+ xfs_buf_do_callbacks(bp);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
xfs_buf_ioend(bp, 0);
@@ -1063,7 +1073,7 @@ xfs_buf_error_relse(
* We have to unpin the pinned buffers so do the
* callbacks.
*/
- xfs_buf_do_callbacks(bp, lip);
+ xfs_buf_do_callbacks(bp);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
XFS_BUF_SET_BRELSE_FUNC(bp,NULL);
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 0e2ed43..b6ecd20 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -105,17 +105,6 @@ typedef struct xfs_buf_log_item {
xfs_buf_log_format_t bli_format; /* in-log header */
} xfs_buf_log_item_t;
-/*
- * This structure is used during recovery to record the buf log
- * items which have been canceled and should not be replayed.
- */
-typedef struct xfs_buf_cancel {
- xfs_daddr_t bc_blkno;
- uint bc_len;
- int bc_refcount;
- struct xfs_buf_cancel *bc_next;
-} xfs_buf_cancel_t;
-
void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
void xfs_buf_item_relse(struct xfs_buf *);
void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index a55e687..75f2ef6 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -48,6 +48,28 @@ xfs_efi_item_free(
}
/*
+ * Freeing the efi requires that we remove it from the AIL if it has already
+ * been placed there. However, the EFI may not yet have been placed in the AIL
+ * when called by xfs_efi_release() from EFD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the
+ * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees
+ * the EFI.
+ */
+STATIC void
+__xfs_efi_release(
+ struct xfs_efi_log_item *efip)
+{
+ struct xfs_ail *ailp = efip->efi_item.li_ailp;
+
+ if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) {
+ spin_lock(&ailp->xa_lock);
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ xfs_trans_ail_delete(ailp, &efip->efi_item);
+ xfs_efi_item_free(efip);
+ }
+}
+
+/*
* This returns the number of iovecs needed to log the given efi item.
* We only need 1 iovec for an efi item. It just logs the efi_log_format
* structure.
@@ -74,7 +96,8 @@ xfs_efi_item_format(
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
uint size;
- ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents);
+ ASSERT(atomic_read(&efip->efi_next_extent) ==
+ efip->efi_format.efi_nextents);
efip->efi_format.efi_type = XFS_LI_EFI;
@@ -99,10 +122,12 @@ xfs_efi_item_pin(
}
/*
- * While EFIs cannot really be pinned, the unpin operation is the
- * last place at which the EFI is manipulated during a transaction.
- * Here we coordinate with xfs_efi_cancel() to determine who gets to
- * free the EFI.
+ * While EFIs cannot really be pinned, the unpin operation is the last place at
+ * which the EFI is manipulated during a transaction. If we are being asked to
+ * remove the EFI it's because the transaction has been cancelled and by
+ * definition that means the EFI cannot be in the AIL so remove it from the
+ * transaction and free it. Otherwise coordinate with xfs_efi_release() (via
+ * XFS_EFI_COMMITTED) to determine who gets to free the EFI.
*/
STATIC void
xfs_efi_item_unpin(
@@ -110,20 +135,14 @@ xfs_efi_item_unpin(
int remove)
{
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
- struct xfs_ail *ailp = lip->li_ailp;
-
- spin_lock(&ailp->xa_lock);
- if (efip->efi_flags & XFS_EFI_CANCELED) {
- if (remove)
- xfs_trans_del_item(lip);
- /* xfs_trans_ail_delete() drops the AIL lock. */
- xfs_trans_ail_delete(ailp, lip);
+ if (remove) {
+ ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
+ xfs_trans_del_item(lip);
xfs_efi_item_free(efip);
- } else {
- efip->efi_flags |= XFS_EFI_COMMITTED;
- spin_unlock(&ailp->xa_lock);
+ return;
}
+ __xfs_efi_release(efip);
}
/*
@@ -152,16 +171,20 @@ xfs_efi_item_unlock(
}
/*
- * The EFI is logged only once and cannot be moved in the log, so
- * simply return the lsn at which it's been logged. The canceled
- * flag is not paid any attention here. Checking for that is delayed
- * until the EFI is unpinned.
+ * The EFI is logged only once and cannot be moved in the log, so simply return
+ * the lsn at which it's been logged. For bulk transaction committed
+ * processing, the EFI may be processed but not yet unpinned prior to the EFD
+ * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected
+ * when processing the EFD.
*/
STATIC xfs_lsn_t
xfs_efi_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
+ struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+
+ set_bit(XFS_EFI_COMMITTED, &efip->efi_flags);
return lsn;
}
@@ -230,6 +253,7 @@ xfs_efi_init(
xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
efip->efi_format.efi_nextents = nextents;
efip->efi_format.efi_id = (__psint_t)(void*)efip;
+ atomic_set(&efip->efi_next_extent, 0);
return efip;
}
@@ -289,37 +313,18 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
/*
- * This is called by the efd item code below to release references to
- * the given efi item. Each efd calls this with the number of
- * extents that it has logged, and when the sum of these reaches
- * the total number of extents logged by this efi item we can free
- * the efi item.
- *
- * Freeing the efi item requires that we remove it from the AIL.
- * We'll use the AIL lock to protect our counters as well as
- * the removal from the AIL.
+ * This is called by the efd item code below to release references to the given
+ * efi item. Each efd calls this with the number of extents that it has
+ * logged, and when the sum of these reaches the total number of extents logged
+ * by this efi item we can free the efi item.
*/
void
xfs_efi_release(xfs_efi_log_item_t *efip,
uint nextents)
{
- struct xfs_ail *ailp = efip->efi_item.li_ailp;
- int extents_left;
-
- ASSERT(efip->efi_next_extent > 0);
- ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
-
- spin_lock(&ailp->xa_lock);
- ASSERT(efip->efi_next_extent >= nextents);
- efip->efi_next_extent -= nextents;
- extents_left = efip->efi_next_extent;
- if (extents_left == 0) {
- /* xfs_trans_ail_delete() drops the AIL lock. */
- xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
- xfs_efi_item_free(efip);
- } else {
- spin_unlock(&ailp->xa_lock);
- }
+ ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
+ if (atomic_sub_and_test(nextents, &efip->efi_next_extent))
+ __xfs_efi_release(efip);
}
static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 0d22c56..375f68e 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -111,11 +111,10 @@ typedef struct xfs_efd_log_format_64 {
#define XFS_EFI_MAX_FAST_EXTENTS 16
/*
- * Define EFI flags.
+ * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
*/
-#define XFS_EFI_RECOVERED 0x1
-#define XFS_EFI_COMMITTED 0x2
-#define XFS_EFI_CANCELED 0x4
+#define XFS_EFI_RECOVERED 1
+#define XFS_EFI_COMMITTED 2
/*
* This is the "extent free intention" log item. It is used
@@ -125,8 +124,8 @@ typedef struct xfs_efd_log_format_64 {
*/
typedef struct xfs_efi_log_item {
xfs_log_item_t efi_item;
- uint efi_flags; /* misc flags */
- uint efi_next_extent;
+ atomic_t efi_next_extent;
+ unsigned long efi_flags; /* misc flags */
xfs_efi_log_format_t efi_format;
} xfs_efi_log_item_t;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a7c116e..f56d30e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -374,6 +374,7 @@ xfs_growfs_data_private(
mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
} else
mp->m_maxicount = 0;
+ xfs_set_low_space_thresholds(mp);
/* update secondary superblocks. */
for (agno = 1; agno < nagcount; agno++) {
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index d7de5a3..cb9b6d1 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -43,6 +43,17 @@
/*
+ * Define xfs inode iolock lockdep classes. We need to ensure that all active
+ * inodes are considered the same for lockdep purposes, including inodes that
+ * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
+ * guarantee the locks are considered the same when there are multiple lock
+ * initialisation siteѕ. Also, define a reclaimable inode class so it is
+ * obvious in lockdep reports which class the report is against.
+ */
+static struct lock_class_key xfs_iolock_active;
+struct lock_class_key xfs_iolock_reclaimable;
+
+/*
* Allocate and initialise an xfs_inode.
*/
STATIC struct xfs_inode *
@@ -69,8 +80,11 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
+ ASSERT(ip->i_ino == 0);
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+ lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+ &xfs_iolock_active, "xfs_iolock_active");
/* initialise the xfs inode */
ip->i_ino = ino;
@@ -85,9 +99,6 @@ xfs_inode_alloc(
ip->i_size = 0;
ip->i_new_size = 0;
- /* prevent anyone from using this yet */
- VFS_I(ip)->i_state = I_NEW;
-
return ip;
}
@@ -145,7 +156,18 @@ xfs_inode_free(
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
- call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback);
+ /*
+ * Because we use RCU freeing we need to ensure the inode always
+ * appears to be reclaimed with an invalid inode number when in the
+ * free state. The ip->i_flags_lock provides the barrier against lookup
+ * races.
+ */
+ spin_lock(&ip->i_flags_lock);
+ ip->i_flags = XFS_IRECLAIM;
+ ip->i_ino = 0;
+ spin_unlock(&ip->i_flags_lock);
+
+ call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
}
/*
@@ -155,14 +177,29 @@ static int
xfs_iget_cache_hit(
struct xfs_perag *pag,
struct xfs_inode *ip,
+ xfs_ino_t ino,
int flags,
- int lock_flags) __releases(pag->pag_ici_lock)
+ int lock_flags) __releases(RCU)
{
struct inode *inode = VFS_I(ip);
struct xfs_mount *mp = ip->i_mount;
int error;
+ /*
+ * check for re-use of an inode within an RCU grace period due to the
+ * radix tree nodes not being updated yet. We monitor for this by
+ * setting the inode number to zero before freeing the inode structure.
+ * If the inode has been reallocated and set up, then the inode number
+ * will not match, so check for that, too.
+ */
spin_lock(&ip->i_flags_lock);
+ if (ip->i_ino != ino) {
+ trace_xfs_iget_skip(ip);
+ XFS_STATS_INC(xs_ig_frecycle);
+ error = EAGAIN;
+ goto out_error;
+ }
+
/*
* If we are racing with another cache hit that is currently
@@ -205,7 +242,7 @@ xfs_iget_cache_hit(
ip->i_flags |= XFS_IRECLAIM;
spin_unlock(&ip->i_flags_lock);
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
error = -inode_init_always(mp->m_super, inode);
if (error) {
@@ -213,7 +250,7 @@ xfs_iget_cache_hit(
* Re-initializing the inode failed, and we are in deep
* trouble. Try to re-add it to the reclaim list.
*/
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~XFS_INEW;
@@ -223,14 +260,20 @@ xfs_iget_cache_hit(
goto out_error;
}
- write_lock(&pag->pag_ici_lock);
+ spin_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
ip->i_flags |= XFS_INEW;
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
inode->i_state = I_NEW;
+
+ ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+ mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+ lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+ &xfs_iolock_active, "xfs_iolock_active");
+
spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
} else {
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
@@ -241,7 +284,7 @@ xfs_iget_cache_hit(
/* We've got a live one. */
spin_unlock(&ip->i_flags_lock);
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
trace_xfs_iget_hit(ip);
}
@@ -255,7 +298,7 @@ xfs_iget_cache_hit(
out_error:
spin_unlock(&ip->i_flags_lock);
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
return error;
}
@@ -308,7 +351,7 @@ xfs_iget_cache_miss(
BUG();
}
- write_lock(&pag->pag_ici_lock);
+ spin_lock(&pag->pag_ici_lock);
/* insert the new inode */
error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
@@ -323,14 +366,14 @@ xfs_iget_cache_miss(
ip->i_udquot = ip->i_gdquot = NULL;
xfs_iflags_set(ip, XFS_INEW);
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
*ipp = ip;
return 0;
out_preload_end:
- write_unlock(&pag->pag_ici_lock);
+ spin_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
if (lock_flags)
xfs_iunlock(ip, lock_flags);
@@ -377,7 +420,7 @@ xfs_iget(
xfs_agino_t agino;
/* reject inode numbers outside existing AGs */
- if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
+ if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
return EINVAL;
/* get the perag structure and ensure that it's inode capable */
@@ -386,15 +429,15 @@ xfs_iget(
again:
error = 0;
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root, agino);
if (ip) {
- error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
+ error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
if (error)
goto out_error_or_again;
} else {
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
XFS_STATS_INC(xs_ig_missed);
error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 108c7a0..be7cf62 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -887,7 +887,7 @@ xfs_iread(
* around for a while. This helps to keep recently accessed
* meta-data in-core longer.
*/
- XFS_BUF_SET_REF(bp, XFS_INO_REF);
+ xfs_buf_set_ref(bp, XFS_INO_REF);
/*
* Use xfs_trans_brelse() to release the buffer containing the
@@ -2000,17 +2000,33 @@ xfs_ifree_cluster(
*/
for (i = 0; i < ninodes; i++) {
retry:
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
- /* Inode not in memory or stale, nothing to do */
- if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
- read_unlock(&pag->pag_ici_lock);
+ /* Inode not in memory, nothing to do */
+ if (!ip) {
+ rcu_read_unlock();
continue;
}
/*
+ * because this is an RCU protected lookup, we could
+ * find a recently freed or even reallocated inode
+ * during the lookup. We need to check under the
+ * i_flags_lock for a valid inode here. Skip it if it
+ * is not valid, the wrong inode or stale.
+ */
+ spin_lock(&ip->i_flags_lock);
+ if (ip->i_ino != inum + i ||
+ __xfs_iflags_test(ip, XFS_ISTALE)) {
+ spin_unlock(&ip->i_flags_lock);
+ rcu_read_unlock();
+ continue;
+ }
+ spin_unlock(&ip->i_flags_lock);
+
+ /*
* Don't try to lock/unlock the current inode, but we
* _cannot_ skip the other inodes that we did not find
* in the list attached to the buffer and are not
@@ -2019,11 +2035,11 @@ retry:
*/
if (ip != free_ip &&
!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
delay(1);
goto retry;
}
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
@@ -2629,7 +2645,7 @@ xfs_iflush_cluster(
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
- read_lock(&pag->pag_ici_lock);
+ rcu_read_lock();
/* really need a gang lookup range call here */
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
first_index, inodes_per_cluster);
@@ -2640,9 +2656,21 @@ xfs_iflush_cluster(
iq = ilist[i];
if (iq == ip)
continue;
- /* if the inode lies outside this cluster, we're done. */
- if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
- break;
+
+ /*
+ * because this is an RCU protected lookup, we could find a
+ * recently freed or even reallocated inode during the lookup.
+ * We need to check under the i_flags_lock for a valid inode
+ * here. Skip it if it is not valid or the wrong inode.
+ */
+ spin_lock(&ip->i_flags_lock);
+ if (!ip->i_ino ||
+ (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
+ spin_unlock(&ip->i_flags_lock);
+ continue;
+ }
+ spin_unlock(&ip->i_flags_lock);
+
/*
* Do an un-protected check to see if the inode is dirty and
* is a candidate for flushing. These checks will be repeated
@@ -2692,7 +2720,7 @@ xfs_iflush_cluster(
}
out_free:
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
kmem_free(ilist);
out_put:
xfs_perag_put(pag);
@@ -2704,7 +2732,7 @@ cluster_corrupt_out:
* Corruption detected in the clustering loop. Invalidate the
* inode buffer and shut down the filesystem.
*/
- read_unlock(&pag->pag_ici_lock);
+ rcu_read_unlock();
/*
* Clean up the buffer. If it was B_DELWRI, just release it --
* brelse can handle it with no problems. If not, shut down the
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fb2ca2e..5c95fa8 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -376,12 +376,13 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
/*
* In-core inode flags.
*/
-#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */
-#define XFS_ISTALE 0x0002 /* inode has been staled */
-#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
-#define XFS_INEW 0x0008 /* inode has just been allocated */
-#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
-#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
+#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */
+#define XFS_ISTALE 0x0002 /* inode has been staled */
+#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
+#define XFS_INEW 0x0008 /* inode has just been allocated */
+#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
+#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
+#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
/*
* Flags for inode locking.
@@ -438,6 +439,8 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
+extern struct lock_class_key xfs_iolock_reclaimable;
+
/*
* Flags for xfs_itruncate_start().
*/
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7c8d30c..fd4f398 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -842,15 +842,64 @@ xfs_inode_item_destroy(
* flushed to disk. It is responsible for removing the inode item
* from the AIL if it has not been re-logged, and unlocking the inode's
* flush lock.
+ *
+ * To reduce AIL lock traffic as much as possible, we scan the buffer log item
+ * list for other inodes that will run this function. We remove them from the
+ * buffer list so we can process all the inode IO completions in one AIL lock
+ * traversal.
*/
void
xfs_iflush_done(
struct xfs_buf *bp,
struct xfs_log_item *lip)
{
- struct xfs_inode_log_item *iip = INODE_ITEM(lip);
- xfs_inode_t *ip = iip->ili_inode;
+ struct xfs_inode_log_item *iip;
+ struct xfs_log_item *blip;
+ struct xfs_log_item *next;
+ struct xfs_log_item *prev;
struct xfs_ail *ailp = lip->li_ailp;
+ int need_ail = 0;
+
+ /*
+ * Scan the buffer IO completions for other inodes being completed and
+ * attach them to the current inode log item.
+ */
+ blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ prev = NULL;
+ while (blip != NULL) {
+ if (lip->li_cb != xfs_iflush_done) {
+ prev = blip;
+ blip = blip->li_bio_list;
+ continue;
+ }
+
+ /* remove from list */
+ next = blip->li_bio_list;
+ if (!prev) {
+ XFS_BUF_SET_FSPRIVATE(bp, next);
+ } else {
+ prev->li_bio_list = next;
+ }
+
+ /* add to current list */
+ blip->li_bio_list = lip->li_bio_list;
+ lip->li_bio_list = blip;
+
+ /*
+ * while we have the item, do the unlocked check for needing
+ * the AIL lock.
+ */
+ iip = INODE_ITEM(blip);
+ if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
+ need_ail++;
+
+ blip = next;
+ }
+
+ /* make sure we capture the state of the initial inode. */
+ iip = INODE_ITEM(lip);
+ if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
+ need_ail++;
/*
* We only want to pull the item from the AIL if it is
@@ -861,28 +910,37 @@ xfs_iflush_done(
* the lock since it's cheaper, and then we recheck while
* holding the lock before removing the inode from the AIL.
*/
- if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) {
+ if (need_ail) {
+ struct xfs_log_item *log_items[need_ail];
+ int i = 0;
spin_lock(&ailp->xa_lock);
- if (lip->li_lsn == iip->ili_flush_lsn) {
- /* xfs_trans_ail_delete() drops the AIL lock. */
- xfs_trans_ail_delete(ailp, lip);
- } else {
- spin_unlock(&ailp->xa_lock);
+ for (blip = lip; blip; blip = blip->li_bio_list) {
+ iip = INODE_ITEM(blip);
+ if (iip->ili_logged &&
+ blip->li_lsn == iip->ili_flush_lsn) {
+ log_items[i++] = blip;
+ }
+ ASSERT(i <= need_ail);
}
+ /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
+ xfs_trans_ail_delete_bulk(ailp, log_items, i);
}
- iip->ili_logged = 0;
/*
- * Clear the ili_last_fields bits now that we know that the
- * data corresponding to them is safely on disk.
+ * clean up and unlock the flush lock now we are done. We can clear the
+ * ili_last_fields bits now that we know that the data corresponding to
+ * them is safely on disk.
*/
- iip->ili_last_fields = 0;
+ for (blip = lip; blip; blip = next) {
+ next = blip->li_bio_list;
+ blip->li_bio_list = NULL;
- /*
- * Release the inode's flush lock since we're done with it.
- */
- xfs_ifunlock(ip);
+ iip = INODE_ITEM(blip);
+ iip->ili_logged = 0;
+ iip->ili_last_fields = 0;
+ xfs_ifunlock(iip->ili_inode);
+ }
}
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2057614..55582bd 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -47,127 +47,8 @@
#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
<< mp->m_writeio_log)
-#define XFS_STRAT_WRITE_IMAPS 2
#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
-STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
- int, struct xfs_bmbt_irec *, int *);
-STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
- struct xfs_bmbt_irec *, int *);
-STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
- struct xfs_bmbt_irec *, int *);
-
-int
-xfs_iomap(
- struct xfs_inode *ip,
- xfs_off_t offset,
- ssize_t count,
- int flags,
- struct xfs_bmbt_irec *imap,
- int *nimaps,
- int *new)
-{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t offset_fsb, end_fsb;
- int error = 0;
- int lockmode = 0;
- int bmapi_flags = 0;
-
- ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
-
- *new = 0;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
-
- trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
-
- switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
- case BMAPI_READ:
- lockmode = xfs_ilock_map_shared(ip);
- bmapi_flags = XFS_BMAPI_ENTIRE;
- break;
- case BMAPI_WRITE:
- lockmode = XFS_ILOCK_EXCL;
- if (flags & BMAPI_IGNSTATE)
- bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
- xfs_ilock(ip, lockmode);
- break;
- case BMAPI_ALLOCATE:
- lockmode = XFS_ILOCK_SHARED;
- bmapi_flags = XFS_BMAPI_ENTIRE;
-
- /* Attempt non-blocking lock */
- if (flags & BMAPI_TRYLOCK) {
- if (!xfs_ilock_nowait(ip, lockmode))
- return XFS_ERROR(EAGAIN);
- } else {
- xfs_ilock(ip, lockmode);
- }
- break;
- default:
- BUG();
- }
-
- ASSERT(offset <= mp->m_maxioffset);
- if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
- count = mp->m_maxioffset - offset;
- end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
- error = xfs_bmapi(NULL, ip, offset_fsb,
- (xfs_filblks_t)(end_fsb - offset_fsb),
- bmapi_flags, NULL, 0, imap,
- nimaps, NULL);
-
- if (error)
- goto out;
-
- switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
- case BMAPI_WRITE:
- /* If we found an extent, return it */
- if (*nimaps &&
- (imap->br_startblock != HOLESTARTBLOCK) &&
- (imap->br_startblock != DELAYSTARTBLOCK)) {
- trace_xfs_iomap_found(ip, offset, count, flags, imap);
- break;
- }
-
- if (flags & BMAPI_DIRECT) {
- error = xfs_iomap_write_direct(ip, offset, count, flags,
- imap, nimaps);
- } else {
- error = xfs_iomap_write_delay(ip, offset, count, flags,
- imap, nimaps);
- }
- if (!error) {
- trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
- }
- *new = 1;
- break;
- case BMAPI_ALLOCATE:
- /* If we found an extent, return it */
- xfs_iunlock(ip, lockmode);
- lockmode = 0;
-
- if (*nimaps && !isnullstartblock(imap->br_startblock)) {
- trace_xfs_iomap_found(ip, offset, count, flags, imap);
- break;
- }
-
- error = xfs_iomap_write_allocate(ip, offset, count,
- imap, nimaps);
- break;
- }
-
- ASSERT(*nimaps <= 1);
-
-out:
- if (lockmode)
- xfs_iunlock(ip, lockmode);
- return XFS_ERROR(error);
-}
-
STATIC int
xfs_iomap_eof_align_last_fsb(
xfs_mount_t *mp,
@@ -236,14 +117,13 @@ xfs_cmn_err_fsblock_zero(
return EFSCORRUPTED;
}
-STATIC int
+int
xfs_iomap_write_direct(
xfs_inode_t *ip,
xfs_off_t offset,
size_t count,
- int flags,
xfs_bmbt_irec_t *imap,
- int *nmaps)
+ int nmaps)
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t offset_fsb;
@@ -279,7 +159,7 @@ xfs_iomap_write_direct(
if (error)
goto error_out;
} else {
- if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK))
+ if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
imap->br_blockcount +
imap->br_startoff);
@@ -331,7 +211,7 @@ xfs_iomap_write_direct(
xfs_trans_ijoin(tp, ip);
bmapi_flag = XFS_BMAPI_WRITE;
- if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
+ if (offset < ip->i_size || extsz)
bmapi_flag |= XFS_BMAPI_PREALLOC;
/*
@@ -370,7 +250,6 @@ xfs_iomap_write_direct(
goto error_out;
}
- *nmaps = 1;
return 0;
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
@@ -379,7 +258,6 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
- *nmaps = 0; /* nothing set-up here */
error_out:
return XFS_ERROR(error);
@@ -389,6 +267,9 @@ error_out:
* If the caller is doing a write at the end of the file, then extend the
* allocation out to the file system's write iosize. We clean up any extra
* space left over when the file is closed in xfs_inactive().
+ *
+ * If we find we already have delalloc preallocation beyond EOF, don't do more
+ * preallocation as it it not needed.
*/
STATIC int
xfs_iomap_eof_want_preallocate(
@@ -396,7 +277,6 @@ xfs_iomap_eof_want_preallocate(
xfs_inode_t *ip,
xfs_off_t offset,
size_t count,
- int ioflag,
xfs_bmbt_irec_t *imap,
int nimaps,
int *prealloc)
@@ -405,6 +285,7 @@ xfs_iomap_eof_want_preallocate(
xfs_filblks_t count_fsb;
xfs_fsblock_t firstblock;
int n, error, imaps;
+ int found_delalloc = 0;
*prealloc = 0;
if ((offset + count) <= ip->i_size)
@@ -429,20 +310,66 @@ xfs_iomap_eof_want_preallocate(
return 0;
start_fsb += imap[n].br_blockcount;
count_fsb -= imap[n].br_blockcount;
+
+ if (imap[n].br_startblock == DELAYSTARTBLOCK)
+ found_delalloc = 1;
}
}
- *prealloc = 1;
+ if (!found_delalloc)
+ *prealloc = 1;
return 0;
}
-STATIC int
+/*
+ * If we don't have a user specified preallocation size, dynamically increase
+ * the preallocation size as the size of the file grows. Cap the maximum size
+ * at a single extent or less if the filesystem is near full. The closer the
+ * filesystem is to full, the smaller the maximum prealocation.
+ */
+STATIC xfs_fsblock_t
+xfs_iomap_prealloc_size(
+ struct xfs_mount *mp,
+ struct xfs_inode *ip)
+{
+ xfs_fsblock_t alloc_blocks = 0;
+
+ if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
+ int shift = 0;
+ int64_t freesp;
+
+ alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size);
+ alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
+ rounddown_pow_of_two(alloc_blocks));
+
+ xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+ freesp = mp->m_sb.sb_fdblocks;
+ if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
+ shift = 2;
+ if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
+ shift++;
+ if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
+ shift++;
+ if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
+ shift++;
+ if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
+ shift++;
+ }
+ if (shift)
+ alloc_blocks >>= shift;
+ }
+
+ if (alloc_blocks < mp->m_writeio_blocks)
+ alloc_blocks = mp->m_writeio_blocks;
+
+ return alloc_blocks;
+}
+
+int
xfs_iomap_write_delay(
xfs_inode_t *ip,
xfs_off_t offset,
size_t count,
- int ioflag,
- xfs_bmbt_irec_t *ret_imap,
- int *nmaps)
+ xfs_bmbt_irec_t *ret_imap)
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t offset_fsb;
@@ -469,16 +396,19 @@ xfs_iomap_write_delay(
extsz = xfs_get_extsz_hint(ip);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
- ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
+ imap, XFS_WRITE_IMAPS, &prealloc);
if (error)
return error;
retry:
if (prealloc) {
+ xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
+
aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
- last_fsb = ioalign + mp->m_writeio_blocks;
+ last_fsb = ioalign + alloc_blocks;
} else {
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
}
@@ -496,22 +426,31 @@ retry:
XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
&nimaps, NULL);
- if (error && (error != ENOSPC))
+ switch (error) {
+ case 0:
+ case ENOSPC:
+ case EDQUOT:
+ break;
+ default:
return XFS_ERROR(error);
+ }
/*
- * If bmapi returned us nothing, and if we didn't get back EDQUOT,
- * then we must have run out of space - flush all other inodes with
- * delalloc blocks and retry without EOF preallocation.
+ * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For
+ * ENOSPC, * flush all other inodes with delalloc blocks to free up
+ * some of the excess reserved metadata space. For both cases, retry
+ * without EOF preallocation.
*/
if (nimaps == 0) {
trace_xfs_delalloc_enospc(ip, offset, count);
if (flushed)
- return XFS_ERROR(ENOSPC);
+ return XFS_ERROR(error ? error : ENOSPC);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_flush_inodes(ip);
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if (error == ENOSPC) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_flush_inodes(ip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ }
flushed = 1;
error = 0;
@@ -523,8 +462,6 @@ retry:
return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
*ret_imap = imap[0];
- *nmaps = 1;
-
return 0;
}
@@ -538,13 +475,12 @@ retry:
* We no longer bother to look at the incoming map - all we have to
* guarantee is that whatever we allocate fills the required range.
*/
-STATIC int
+int
xfs_iomap_write_allocate(
xfs_inode_t *ip,
xfs_off_t offset,
size_t count,
- xfs_bmbt_irec_t *imap,
- int *retmap)
+ xfs_bmbt_irec_t *imap)
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t offset_fsb, last_block;
@@ -557,8 +493,6 @@ xfs_iomap_write_allocate(
int error = 0;
int nres;
- *retmap = 0;
-
/*
* Make sure that the dquots are there.
*/
@@ -680,7 +614,6 @@ xfs_iomap_write_allocate(
if ((offset_fsb >= imap->br_startoff) &&
(offset_fsb < (imap->br_startoff +
imap->br_blockcount))) {
- *retmap = 1;
XFS_STATS_INC(xs_xstrat_quick);
return 0;
}
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 7748a43..8061576 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,30 +18,15 @@
#ifndef __XFS_IOMAP_H__
#define __XFS_IOMAP_H__
-/* base extent manipulation calls */
-#define BMAPI_READ (1 << 0) /* read extents */
-#define BMAPI_WRITE (1 << 1) /* create extents */
-#define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */
-
-/* modifiers */
-#define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */
-#define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */
-#define BMAPI_MMA (1 << 6) /* allocate for mmap write */
-#define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */
-
-#define BMAPI_FLAGS \
- { BMAPI_READ, "READ" }, \
- { BMAPI_WRITE, "WRITE" }, \
- { BMAPI_ALLOCATE, "ALLOCATE" }, \
- { BMAPI_IGNSTATE, "IGNSTATE" }, \
- { BMAPI_DIRECT, "DIRECT" }, \
- { BMAPI_TRYLOCK, "TRYLOCK" }
-
struct xfs_inode;
struct xfs_bmbt_irec;
-extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int,
- struct xfs_bmbt_irec *, int *, int *);
+extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
+ struct xfs_bmbt_irec *, int);
+extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
+ struct xfs_bmbt_irec *);
+extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
+ struct xfs_bmbt_irec *);
extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index cee4ab9..0bf24b1 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -47,7 +47,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
xfs_buftarg_t *log_target,
xfs_daddr_t blk_offset,
int num_bblks);
-STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
+STATIC int xlog_space_left(struct log *log, atomic64_t *head);
STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
STATIC void xlog_dealloc_log(xlog_t *log);
@@ -70,7 +70,7 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
/* local functions to manipulate grant head */
STATIC int xlog_grant_log_space(xlog_t *log,
xlog_ticket_t *xtic);
-STATIC void xlog_grant_push_ail(xfs_mount_t *mp,
+STATIC void xlog_grant_push_ail(struct log *log,
int need_bytes);
STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
xlog_ticket_t *ticket);
@@ -81,98 +81,73 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
#if defined(DEBUG)
STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
-STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
+STATIC void xlog_verify_grant_tail(struct log *log);
STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
int count, boolean_t syncing);
STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
xfs_lsn_t tail_lsn);
#else
#define xlog_verify_dest_ptr(a,b)
-#define xlog_verify_grant_head(a,b)
+#define xlog_verify_grant_tail(a)
#define xlog_verify_iclog(a,b,c,d)
#define xlog_verify_tail_lsn(a,b,c)
#endif
STATIC int xlog_iclogs_empty(xlog_t *log);
-
static void
-xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+xlog_grant_sub_space(
+ struct log *log,
+ atomic64_t *head,
+ int bytes)
{
- if (*qp) {
- tic->t_next = (*qp);
- tic->t_prev = (*qp)->t_prev;
- (*qp)->t_prev->t_next = tic;
- (*qp)->t_prev = tic;
- } else {
- tic->t_prev = tic->t_next = tic;
- *qp = tic;
- }
+ int64_t head_val = atomic64_read(head);
+ int64_t new, old;
- tic->t_flags |= XLOG_TIC_IN_Q;
-}
+ do {
+ int cycle, space;
-static void
-xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
-{
- if (tic == tic->t_next) {
- *qp = NULL;
- } else {
- *qp = tic->t_next;
- tic->t_next->t_prev = tic->t_prev;
- tic->t_prev->t_next = tic->t_next;
- }
+ xlog_crack_grant_head_val(head_val, &cycle, &space);
- tic->t_next = tic->t_prev = NULL;
- tic->t_flags &= ~XLOG_TIC_IN_Q;
+ space -= bytes;
+ if (space < 0) {
+ space += log->l_logsize;
+ cycle--;
+ }
+
+ old = head_val;
+ new = xlog_assign_grant_head_val(cycle, space);
+ head_val = atomic64_cmpxchg(head, old, new);
+ } while (head_val != old);
}
static void
-xlog_grant_sub_space(struct log *log, int bytes)
+xlog_grant_add_space(
+ struct log *log,
+ atomic64_t *head,
+ int bytes)
{
- log->l_grant_write_bytes -= bytes;
- if (log->l_grant_write_bytes < 0) {
- log->l_grant_write_bytes += log->l_logsize;
- log->l_grant_write_cycle--;
- }
-
- log->l_grant_reserve_bytes -= bytes;
- if ((log)->l_grant_reserve_bytes < 0) {
- log->l_grant_reserve_bytes += log->l_logsize;
- log->l_grant_reserve_cycle--;
- }
+ int64_t head_val = atomic64_read(head);
+ int64_t new, old;
-}
+ do {
+ int tmp;
+ int cycle, space;
-static void
-xlog_grant_add_space_write(struct log *log, int bytes)
-{
- int tmp = log->l_logsize - log->l_grant_write_bytes;
- if (tmp > bytes)
- log->l_grant_write_bytes += bytes;
- else {
- log->l_grant_write_cycle++;
- log->l_grant_write_bytes = bytes - tmp;
- }
-}
+ xlog_crack_grant_head_val(head_val, &cycle, &space);
-static void
-xlog_grant_add_space_reserve(struct log *log, int bytes)
-{
- int tmp = log->l_logsize - log->l_grant_reserve_bytes;
- if (tmp > bytes)
- log->l_grant_reserve_bytes += bytes;
- else {
- log->l_grant_reserve_cycle++;
- log->l_grant_reserve_bytes = bytes - tmp;
- }
-}
+ tmp = log->l_logsize - space;
+ if (tmp > bytes)
+ space += bytes;
+ else {
+ space = bytes - tmp;
+ cycle++;
+ }
-static inline void
-xlog_grant_add_space(struct log *log, int bytes)
-{
- xlog_grant_add_space_write(log, bytes);
- xlog_grant_add_space_reserve(log, bytes);
+ old = head_val;
+ new = xlog_assign_grant_head_val(cycle, space);
+ head_val = atomic64_cmpxchg(head, old, new);
+ } while (head_val != old);
}
static void
@@ -355,7 +330,7 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
- xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
+ xlog_grant_push_ail(log, internal_ticket->t_unit_res);
retval = xlog_regrant_write_log_space(log, internal_ticket);
} else {
/* may sleep if need to allocate more tickets */
@@ -369,7 +344,7 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
- xlog_grant_push_ail(mp,
+ xlog_grant_push_ail(log,
(internal_ticket->t_unit_res *
internal_ticket->t_cnt));
retval = xlog_grant_log_space(log, internal_ticket);
@@ -584,8 +559,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)) {
if (!XLOG_FORCED_SHUTDOWN(log)) {
- sv_wait(&iclog->ic_force_wait, PMEM,
- &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait,
+ &log->l_icloglock);
} else {
spin_unlock(&log->l_icloglock);
}
@@ -625,8 +600,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
|| iclog->ic_state == XLOG_STATE_DIRTY
|| iclog->ic_state == XLOG_STATE_IOERROR) ) {
- sv_wait(&iclog->ic_force_wait, PMEM,
- &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait,
+ &log->l_icloglock);
} else {
spin_unlock(&log->l_icloglock);
}
@@ -703,55 +678,46 @@ xfs_log_move_tail(xfs_mount_t *mp,
{
xlog_ticket_t *tic;
xlog_t *log = mp->m_log;
- int need_bytes, free_bytes, cycle, bytes;
+ int need_bytes, free_bytes;
if (XLOG_FORCED_SHUTDOWN(log))
return;
- if (tail_lsn == 0) {
- /* needed since sync_lsn is 64 bits */
- spin_lock(&log->l_icloglock);
- tail_lsn = log->l_last_sync_lsn;
- spin_unlock(&log->l_icloglock);
- }
-
- spin_lock(&log->l_grant_lock);
+ if (tail_lsn == 0)
+ tail_lsn = atomic64_read(&log->l_last_sync_lsn);
- /* Also an invalid lsn. 1 implies that we aren't passing in a valid
- * tail_lsn.
- */
- if (tail_lsn != 1) {
- log->l_tail_lsn = tail_lsn;
- }
+ /* tail_lsn == 1 implies that we weren't passed a valid value. */
+ if (tail_lsn != 1)
+ atomic64_set(&log->l_tail_lsn, tail_lsn);
- if ((tic = log->l_write_headq)) {
+ if (!list_empty_careful(&log->l_writeq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
- cycle = log->l_grant_write_cycle;
- bytes = log->l_grant_write_bytes;
- free_bytes = xlog_space_left(log, cycle, bytes);
- do {
+ spin_lock(&log->l_grant_write_lock);
+ free_bytes = xlog_space_left(log, &log->l_grant_write_head);
+ list_for_each_entry(tic, &log->l_writeq, t_queue) {
ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
if (free_bytes < tic->t_unit_res && tail_lsn != 1)
break;
tail_lsn = 0;
free_bytes -= tic->t_unit_res;
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_write_headq);
+ trace_xfs_log_regrant_write_wake_up(log, tic);
+ wake_up(&tic->t_wait);
+ }
+ spin_unlock(&log->l_grant_write_lock);
}
- if ((tic = log->l_reserve_headq)) {
+
+ if (!list_empty_careful(&log->l_reserveq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
- cycle = log->l_grant_reserve_cycle;
- bytes = log->l_grant_reserve_bytes;
- free_bytes = xlog_space_left(log, cycle, bytes);
- do {
+ spin_lock(&log->l_grant_reserve_lock);
+ free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
+ list_for_each_entry(tic, &log->l_reserveq, t_queue) {
if (tic->t_flags & XLOG_TIC_PERM_RESERV)
need_bytes = tic->t_unit_res*tic->t_cnt;
else
@@ -760,12 +726,12 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= need_bytes;
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_reserve_headq);
+ trace_xfs_log_grant_wake_up(log, tic);
+ wake_up(&tic->t_wait);
+ }
+ spin_unlock(&log->l_grant_reserve_lock);
}
- spin_unlock(&log->l_grant_lock);
-} /* xfs_log_move_tail */
+}
/*
* Determine if we have a transaction that has gone to disk
@@ -831,23 +797,19 @@ xfs_log_need_covered(xfs_mount_t *mp)
* We may be holding the log iclog lock upon entering this routine.
*/
xfs_lsn_t
-xlog_assign_tail_lsn(xfs_mount_t *mp)
+xlog_assign_tail_lsn(
+ struct xfs_mount *mp)
{
- xfs_lsn_t tail_lsn;
- xlog_t *log = mp->m_log;
+ xfs_lsn_t tail_lsn;
+ struct log *log = mp->m_log;
tail_lsn = xfs_trans_ail_tail(mp->m_ail);
- spin_lock(&log->l_grant_lock);
- if (tail_lsn != 0) {
- log->l_tail_lsn = tail_lsn;
- } else {
- tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn;
- }
- spin_unlock(&log->l_grant_lock);
+ if (!tail_lsn)
+ tail_lsn = atomic64_read(&log->l_last_sync_lsn);
+ atomic64_set(&log->l_tail_lsn, tail_lsn);
return tail_lsn;
-} /* xlog_assign_tail_lsn */
-
+}
/*
* Return the space in the log between the tail and the head. The head
@@ -864,21 +826,26 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
* result is that we return the size of the log as the amount of space left.
*/
STATIC int
-xlog_space_left(xlog_t *log, int cycle, int bytes)
-{
- int free_bytes;
- int tail_bytes;
- int tail_cycle;
-
- tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn));
- tail_cycle = CYCLE_LSN(log->l_tail_lsn);
- if ((tail_cycle == cycle) && (bytes >= tail_bytes)) {
- free_bytes = log->l_logsize - (bytes - tail_bytes);
- } else if ((tail_cycle + 1) < cycle) {
+xlog_space_left(
+ struct log *log,
+ atomic64_t *head)
+{
+ int free_bytes;
+ int tail_bytes;
+ int tail_cycle;
+ int head_cycle;
+ int head_bytes;
+
+ xlog_crack_grant_head(head, &head_cycle, &head_bytes);
+ xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
+ tail_bytes = BBTOB(tail_bytes);
+ if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
+ free_bytes = log->l_logsize - (head_bytes - tail_bytes);
+ else if (tail_cycle + 1 < head_cycle)
return 0;
- } else if (tail_cycle < cycle) {
- ASSERT(tail_cycle == (cycle - 1));
- free_bytes = tail_bytes - bytes;
+ else if (tail_cycle < head_cycle) {
+ ASSERT(tail_cycle == (head_cycle - 1));
+ free_bytes = tail_bytes - head_bytes;
} else {
/*
* The reservation head is behind the tail.
@@ -889,12 +856,12 @@ xlog_space_left(xlog_t *log, int cycle, int bytes)
"xlog_space_left: head behind tail\n"
" tail_cycle = %d, tail_bytes = %d\n"
" GH cycle = %d, GH bytes = %d",
- tail_cycle, tail_bytes, cycle, bytes);
+ tail_cycle, tail_bytes, head_cycle, head_bytes);
ASSERT(0);
free_bytes = log->l_logsize;
}
return free_bytes;
-} /* xlog_space_left */
+}
/*
@@ -1047,12 +1014,16 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_flags |= XLOG_ACTIVE_RECOVERY;
log->l_prev_block = -1;
- log->l_tail_lsn = xlog_assign_lsn(1, 0);
/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
- log->l_last_sync_lsn = log->l_tail_lsn;
+ xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
+ xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
- log->l_grant_reserve_cycle = 1;
- log->l_grant_write_cycle = 1;
+ xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0);
+ xlog_assign_grant_head(&log->l_grant_write_head, 1, 0);
+ INIT_LIST_HEAD(&log->l_reserveq);
+ INIT_LIST_HEAD(&log->l_writeq);
+ spin_lock_init(&log->l_grant_reserve_lock);
+ spin_lock_init(&log->l_grant_write_lock);
error = EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -1094,8 +1065,7 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_xbuf = bp;
spin_lock_init(&log->l_icloglock);
- spin_lock_init(&log->l_grant_lock);
- sv_init(&log->l_flush_wait, 0, "flush_wait");
+ init_waitqueue_head(&log->l_flush_wait);
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1151,8 +1121,8 @@ xlog_alloc_log(xfs_mount_t *mp,
ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
- sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
- sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
+ init_waitqueue_head(&iclog->ic_force_wait);
+ init_waitqueue_head(&iclog->ic_write_wait);
iclogp = &iclog->ic_next;
}
@@ -1167,15 +1137,11 @@ xlog_alloc_log(xfs_mount_t *mp,
out_free_iclog:
for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
prev_iclog = iclog->ic_next;
- if (iclog->ic_bp) {
- sv_destroy(&iclog->ic_force_wait);
- sv_destroy(&iclog->ic_write_wait);
+ if (iclog->ic_bp)
xfs_buf_free(iclog->ic_bp);
- }
kmem_free(iclog);
}
spinlock_destroy(&log->l_icloglock);
- spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
out_free_log:
kmem_free(log);
@@ -1223,61 +1189,60 @@ xlog_commit_record(
* water mark. In this manner, we would be creating a low water mark.
*/
STATIC void
-xlog_grant_push_ail(xfs_mount_t *mp,
- int need_bytes)
+xlog_grant_push_ail(
+ struct log *log,
+ int need_bytes)
{
- xlog_t *log = mp->m_log; /* pointer to the log */
- xfs_lsn_t tail_lsn; /* lsn of the log tail */
- xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */
- int free_blocks; /* free blocks left to write to */
- int free_bytes; /* free bytes left to write to */
- int threshold_block; /* block in lsn we'd like to be at */
- int threshold_cycle; /* lsn cycle we'd like to be at */
- int free_threshold;
-
- ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
-
- spin_lock(&log->l_grant_lock);
- free_bytes = xlog_space_left(log,
- log->l_grant_reserve_cycle,
- log->l_grant_reserve_bytes);
- tail_lsn = log->l_tail_lsn;
- free_blocks = BTOBBT(free_bytes);
-
- /*
- * Set the threshold for the minimum number of free blocks in the
- * log to the maximum of what the caller needs, one quarter of the
- * log, and 256 blocks.
- */
- free_threshold = BTOBB(need_bytes);
- free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
- free_threshold = MAX(free_threshold, 256);
- if (free_blocks < free_threshold) {
- threshold_block = BLOCK_LSN(tail_lsn) + free_threshold;
- threshold_cycle = CYCLE_LSN(tail_lsn);
+ xfs_lsn_t threshold_lsn = 0;
+ xfs_lsn_t last_sync_lsn;
+ int free_blocks;
+ int free_bytes;
+ int threshold_block;
+ int threshold_cycle;
+ int free_threshold;
+
+ ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
+
+ free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
+ free_blocks = BTOBBT(free_bytes);
+
+ /*
+ * Set the threshold for the minimum number of free blocks in the
+ * log to the maximum of what the caller needs, one quarter of the
+ * log, and 256 blocks.
+ */
+ free_threshold = BTOBB(need_bytes);
+ free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
+ free_threshold = MAX(free_threshold, 256);
+ if (free_blocks >= free_threshold)
+ return;
+
+ xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
+ &threshold_block);
+ threshold_block += free_threshold;
if (threshold_block >= log->l_logBBsize) {
- threshold_block -= log->l_logBBsize;
- threshold_cycle += 1;
+ threshold_block -= log->l_logBBsize;
+ threshold_cycle += 1;
}
- threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block);
+ threshold_lsn = xlog_assign_lsn(threshold_cycle,
+ threshold_block);
+ /*
+ * Don't pass in an lsn greater than the lsn of the last
+ * log record known to be on disk. Use a snapshot of the last sync lsn
+ * so that it doesn't change between the compare and the set.
+ */
+ last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
+ if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
+ threshold_lsn = last_sync_lsn;
- /* Don't pass in an lsn greater than the lsn of the last
- * log record known to be on disk.
+ /*
+ * Get the transaction layer to kick the dirty buffers out to
+ * disk asynchronously. No point in trying to do this if
+ * the filesystem is shutting down.
*/
- if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0)
- threshold_lsn = log->l_last_sync_lsn;
- }
- spin_unlock(&log->l_grant_lock);
-
- /*
- * Get the transaction layer to kick the dirty buffers out to
- * disk asynchronously. No point in trying to do this if
- * the filesystem is shutting down.
- */
- if (threshold_lsn &&
- !XLOG_FORCED_SHUTDOWN(log))
- xfs_trans_ail_push(log->l_ailp, threshold_lsn);
-} /* xlog_grant_push_ail */
+ if (!XLOG_FORCED_SHUTDOWN(log))
+ xfs_trans_ail_push(log->l_ailp, threshold_lsn);
+}
/*
* The bdstrat callback function for log bufs. This gives us a central
@@ -1372,9 +1337,8 @@ xlog_sync(xlog_t *log,
roundoff < BBTOB(1)));
/* move grant heads by roundoff in sync */
- spin_lock(&log->l_grant_lock);
- xlog_grant_add_space(log, roundoff);
- spin_unlock(&log->l_grant_lock);
+ xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff);
+ xlog_grant_add_space(log, &log->l_grant_write_head, roundoff);
/* put cycle number in every block */
xlog_pack_data(log, iclog, roundoff);
@@ -1489,15 +1453,12 @@ xlog_dealloc_log(xlog_t *log)
iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) {
- sv_destroy(&iclog->ic_force_wait);
- sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp);
next_iclog = iclog->ic_next;
kmem_free(iclog);
iclog = next_iclog;
}
spinlock_destroy(&log->l_icloglock);
- spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
log->l_mp->m_log = NULL;
@@ -2232,7 +2193,7 @@ xlog_state_do_callback(
lowest_lsn = xlog_get_lowest_lsn(log);
if (lowest_lsn &&
XFS_LSN_CMP(lowest_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
+ be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
iclog = iclog->ic_next;
continue; /* Leave this iclog for
* another thread */
@@ -2240,23 +2201,21 @@ xlog_state_do_callback(
iclog->ic_state = XLOG_STATE_CALLBACK;
- spin_unlock(&log->l_icloglock);
- /* l_last_sync_lsn field protected by
- * l_grant_lock. Don't worry about iclog's lsn.
- * No one else can be here except us.
+ /*
+ * update the last_sync_lsn before we drop the
+ * icloglock to ensure we are the only one that
+ * can update it.
*/
- spin_lock(&log->l_grant_lock);
- ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
- log->l_last_sync_lsn =
- be64_to_cpu(iclog->ic_header.h_lsn);
- spin_unlock(&log->l_grant_lock);
+ ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
+ be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
+ atomic64_set(&log->l_last_sync_lsn,
+ be64_to_cpu(iclog->ic_header.h_lsn));
- } else {
- spin_unlock(&log->l_icloglock);
+ } else
ioerrors++;
- }
+
+ spin_unlock(&log->l_icloglock);
/*
* Keep processing entries in the callback list until
@@ -2297,7 +2256,7 @@ xlog_state_do_callback(
xlog_state_clean_log(log);
/* wake up threads waiting in xfs_log_force() */
- sv_broadcast(&iclog->ic_force_wait);
+ wake_up_all(&iclog->ic_force_wait);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
@@ -2344,7 +2303,7 @@ xlog_state_do_callback(
spin_unlock(&log->l_icloglock);
if (wake)
- sv_broadcast(&log->l_flush_wait);
+ wake_up_all(&log->l_flush_wait);
}
@@ -2395,7 +2354,7 @@ xlog_state_done_syncing(
* iclog buffer, we wake them all, one will get to do the
* I/O, the others get to wait for the result.
*/
- sv_broadcast(&iclog->ic_write_wait);
+ wake_up_all(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
} /* xlog_state_done_syncing */
@@ -2444,7 +2403,7 @@ restart:
XFS_STATS_INC(xs_log_noiclogs);
/* Wait for log writes to have flushed */
- sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
+ xlog_wait(&log->l_flush_wait, &log->l_icloglock);
goto restart;
}
@@ -2527,6 +2486,18 @@ restart:
*
* Once a ticket gets put onto the reserveq, it will only return after
* the needed reservation is satisfied.
+ *
+ * This function is structured so that it has a lock free fast path. This is
+ * necessary because every new transaction reservation will come through this
+ * path. Hence any lock will be globally hot if we take it unconditionally on
+ * every pass.
+ *
+ * As tickets are only ever moved on and off the reserveq under the
+ * l_grant_reserve_lock, we only need to take that lock if we are going
+ * to add the ticket to the queue and sleep. We can avoid taking the lock if the
+ * ticket was never added to the reserveq because the t_queue list head will be
+ * empty and we hold the only reference to it so it can safely be checked
+ * unlocked.
*/
STATIC int
xlog_grant_log_space(xlog_t *log,
@@ -2534,24 +2505,27 @@ xlog_grant_log_space(xlog_t *log,
{
int free_bytes;
int need_bytes;
-#ifdef DEBUG
- xfs_lsn_t tail_lsn;
-#endif
-
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("grant Recovery problem");
#endif
- /* Is there space or do we need to sleep? */
- spin_lock(&log->l_grant_lock);
-
trace_xfs_log_grant_enter(log, tic);
+ need_bytes = tic->t_unit_res;
+ if (tic->t_flags & XFS_LOG_PERM_RESERV)
+ need_bytes *= tic->t_ocnt;
+
/* something is already sleeping; insert new transaction at end */
- if (log->l_reserve_headq) {
- xlog_ins_ticketq(&log->l_reserve_headq, tic);
+ if (!list_empty_careful(&log->l_reserveq)) {
+ spin_lock(&log->l_grant_reserve_lock);
+ /* recheck the queue now we are locked */
+ if (list_empty(&log->l_reserveq)) {
+ spin_unlock(&log->l_grant_reserve_lock);
+ goto redo;
+ }
+ list_add_tail(&tic->t_queue, &log->l_reserveq);
trace_xfs_log_grant_sleep1(log, tic);
@@ -2563,72 +2537,57 @@ xlog_grant_log_space(xlog_t *log,
goto error_return;
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
+
/*
* If we got an error, and the filesystem is shutting down,
* we'll catch it down below. So just continue...
*/
trace_xfs_log_grant_wake1(log, tic);
- spin_lock(&log->l_grant_lock);
}
- if (tic->t_flags & XFS_LOG_PERM_RESERV)
- need_bytes = tic->t_unit_res*tic->t_ocnt;
- else
- need_bytes = tic->t_unit_res;
redo:
if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
+ goto error_return_unlocked;
- free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle,
- log->l_grant_reserve_bytes);
+ free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
if (free_bytes < need_bytes) {
- if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- xlog_ins_ticketq(&log->l_reserve_headq, tic);
+ spin_lock(&log->l_grant_reserve_lock);
+ if (list_empty(&tic->t_queue))
+ list_add_tail(&tic->t_queue, &log->l_reserveq);
trace_xfs_log_grant_sleep2(log, tic);
- spin_unlock(&log->l_grant_lock);
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
-
- XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
-
- spin_lock(&log->l_grant_lock);
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
- trace_xfs_log_grant_wake2(log, tic);
+ xlog_grant_push_ail(log, need_bytes);
+
+ XFS_STATS_INC(xs_sleep_logspace);
+ xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
+ trace_xfs_log_grant_wake2(log, tic);
goto redo;
- } else if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_reserve_headq, tic);
+ }
- /* we've got enough space */
- xlog_grant_add_space(log, need_bytes);
-#ifdef DEBUG
- tail_lsn = log->l_tail_lsn;
- /*
- * Check to make sure the grant write head didn't just over lap the
- * tail. If the cycles are the same, we can't be overlapping.
- * Otherwise, make sure that the cycles differ by exactly one and
- * check the byte count.
- */
- if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
- ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
- ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
+ if (!list_empty(&tic->t_queue)) {
+ spin_lock(&log->l_grant_reserve_lock);
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_reserve_lock);
}
-#endif
+
+ /* we've got enough space */
+ xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
+ xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_grant_exit(log, tic);
- xlog_verify_grant_head(log, 1);
- spin_unlock(&log->l_grant_lock);
+ xlog_verify_grant_tail(log);
return 0;
- error_return:
- if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_reserve_headq, tic);
-
+error_return_unlocked:
+ spin_lock(&log->l_grant_reserve_lock);
+error_return:
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_reserve_lock);
trace_xfs_log_grant_error(log, tic);
/*
@@ -2638,7 +2597,6 @@ redo:
*/
tic->t_curr_res = 0;
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
} /* xlog_grant_log_space */
@@ -2646,17 +2604,14 @@ redo:
/*
* Replenish the byte reservation required by moving the grant write head.
*
- *
+ * Similar to xlog_grant_log_space, the function is structured to have a lock
+ * free fast path.
*/
STATIC int
xlog_regrant_write_log_space(xlog_t *log,
xlog_ticket_t *tic)
{
int free_bytes, need_bytes;
- xlog_ticket_t *ntic;
-#ifdef DEBUG
- xfs_lsn_t tail_lsn;
-#endif
tic->t_curr_res = tic->t_unit_res;
xlog_tic_reset_res(tic);
@@ -2669,12 +2624,9 @@ xlog_regrant_write_log_space(xlog_t *log,
panic("regrant Recovery problem");
#endif
- spin_lock(&log->l_grant_lock);
-
trace_xfs_log_regrant_write_enter(log, tic);
-
if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
+ goto error_return_unlocked;
/* If there are other waiters on the queue then give them a
* chance at logspace before us. Wake up the first waiters,
@@ -2683,92 +2635,76 @@ xlog_regrant_write_log_space(xlog_t *log,
* this transaction.
*/
need_bytes = tic->t_unit_res;
- if ((ntic = log->l_write_headq)) {
- free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
- log->l_grant_write_bytes);
- do {
+ if (!list_empty_careful(&log->l_writeq)) {
+ struct xlog_ticket *ntic;
+
+ spin_lock(&log->l_grant_write_lock);
+ free_bytes = xlog_space_left(log, &log->l_grant_write_head);
+ list_for_each_entry(ntic, &log->l_writeq, t_queue) {
ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
if (free_bytes < ntic->t_unit_res)
break;
free_bytes -= ntic->t_unit_res;
- sv_signal(&ntic->t_wait);
- ntic = ntic->t_next;
- } while (ntic != log->l_write_headq);
-
- if (ntic != log->l_write_headq) {
- if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- xlog_ins_ticketq(&log->l_write_headq, tic);
+ wake_up(&ntic->t_wait);
+ }
+ if (ntic != list_first_entry(&log->l_writeq,
+ struct xlog_ticket, t_queue)) {
+ if (list_empty(&tic->t_queue))
+ list_add_tail(&tic->t_queue, &log->l_writeq);
trace_xfs_log_regrant_write_sleep1(log, tic);
- spin_unlock(&log->l_grant_lock);
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
+ xlog_grant_push_ail(log, need_bytes);
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT,
- &log->l_grant_lock, s);
-
- /* If we're shutting down, this tic is already
- * off the queue */
- spin_lock(&log->l_grant_lock);
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
+ xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
trace_xfs_log_regrant_write_wake1(log, tic);
- }
+ } else
+ spin_unlock(&log->l_grant_write_lock);
}
redo:
if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
+ goto error_return_unlocked;
- free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
- log->l_grant_write_bytes);
+ free_bytes = xlog_space_left(log, &log->l_grant_write_head);
if (free_bytes < need_bytes) {
- if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- xlog_ins_ticketq(&log->l_write_headq, tic);
- spin_unlock(&log->l_grant_lock);
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
-
- XFS_STATS_INC(xs_sleep_logspace);
- trace_xfs_log_regrant_write_sleep2(log, tic);
-
- sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ spin_lock(&log->l_grant_write_lock);
+ if (list_empty(&tic->t_queue))
+ list_add_tail(&tic->t_queue, &log->l_writeq);
- /* If we're shutting down, this tic is already off the queue */
- spin_lock(&log->l_grant_lock);
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
+ xlog_grant_push_ail(log, need_bytes);
+
+ XFS_STATS_INC(xs_sleep_logspace);
+ trace_xfs_log_regrant_write_sleep2(log, tic);
+ xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
+
trace_xfs_log_regrant_write_wake2(log, tic);
goto redo;
- } else if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_write_headq, tic);
+ }
- /* we've got enough space */
- xlog_grant_add_space_write(log, need_bytes);
-#ifdef DEBUG
- tail_lsn = log->l_tail_lsn;
- if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
- ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
- ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
+ if (!list_empty(&tic->t_queue)) {
+ spin_lock(&log->l_grant_write_lock);
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_write_lock);
}
-#endif
+ /* we've got enough space */
+ xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_regrant_write_exit(log, tic);
-
- xlog_verify_grant_head(log, 1);
- spin_unlock(&log->l_grant_lock);
+ xlog_verify_grant_tail(log);
return 0;
+ error_return_unlocked:
+ spin_lock(&log->l_grant_write_lock);
error_return:
- if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_reserve_headq, tic);
-
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_write_lock);
trace_xfs_log_regrant_write_error(log, tic);
/*
@@ -2778,7 +2714,6 @@ redo:
*/
tic->t_curr_res = 0;
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
} /* xlog_regrant_write_log_space */
@@ -2799,27 +2734,24 @@ xlog_regrant_reserve_log_space(xlog_t *log,
if (ticket->t_cnt > 0)
ticket->t_cnt--;
- spin_lock(&log->l_grant_lock);
- xlog_grant_sub_space(log, ticket->t_curr_res);
+ xlog_grant_sub_space(log, &log->l_grant_reserve_head,
+ ticket->t_curr_res);
+ xlog_grant_sub_space(log, &log->l_grant_write_head,
+ ticket->t_curr_res);
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
trace_xfs_log_regrant_reserve_sub(log, ticket);
- xlog_verify_grant_head(log, 1);
-
/* just return if we still have some of the pre-reserved space */
- if (ticket->t_cnt > 0) {
- spin_unlock(&log->l_grant_lock);
+ if (ticket->t_cnt > 0)
return;
- }
- xlog_grant_add_space_reserve(log, ticket->t_unit_res);
+ xlog_grant_add_space(log, &log->l_grant_reserve_head,
+ ticket->t_unit_res);
trace_xfs_log_regrant_reserve_exit(log, ticket);
- xlog_verify_grant_head(log, 0);
- spin_unlock(&log->l_grant_lock);
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
} /* xlog_regrant_reserve_log_space */
@@ -2843,28 +2775,29 @@ STATIC void
xlog_ungrant_log_space(xlog_t *log,
xlog_ticket_t *ticket)
{
+ int bytes;
+
if (ticket->t_cnt > 0)
ticket->t_cnt--;
- spin_lock(&log->l_grant_lock);
trace_xfs_log_ungrant_enter(log, ticket);
-
- xlog_grant_sub_space(log, ticket->t_curr_res);
-
trace_xfs_log_ungrant_sub(log, ticket);
- /* If this is a permanent reservation ticket, we may be able to free
+ /*
+ * If this is a permanent reservation ticket, we may be able to free
* up more space based on the remaining count.
*/
+ bytes = ticket->t_curr_res;
if (ticket->t_cnt > 0) {
ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
- xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
+ bytes += ticket->t_unit_res*ticket->t_cnt;
}
+ xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes);
+ xlog_grant_sub_space(log, &log->l_grant_write_head, bytes);
+
trace_xfs_log_ungrant_exit(log, ticket);
- xlog_verify_grant_head(log, 1);
- spin_unlock(&log->l_grant_lock);
xfs_log_move_tail(log->l_mp, 1);
} /* xlog_ungrant_log_space */
@@ -2901,11 +2834,11 @@ xlog_state_release_iclog(
if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
/* update tail before writing to iclog */
- xlog_assign_tail_lsn(log->l_mp);
+ xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
sync++;
iclog->ic_state = XLOG_STATE_SYNCING;
- iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
- xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ xlog_verify_tail_lsn(log, iclog, tail_lsn);
/* cycle incremented when incrementing curr_block */
}
spin_unlock(&log->l_icloglock);
@@ -3088,7 +3021,7 @@ maybe_sleep:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3206,8 +3139,8 @@ try_again:
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_prev->ic_write_wait,
- PSWP, &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_prev->ic_write_wait,
+ &log->l_icloglock);
if (log_flushed)
*log_flushed = 1;
already_slept = 1;
@@ -3235,7 +3168,7 @@ try_again:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3310,10 +3243,8 @@ xfs_log_ticket_put(
xlog_ticket_t *ticket)
{
ASSERT(atomic_read(&ticket->t_ref) > 0);
- if (atomic_dec_and_test(&ticket->t_ref)) {
- sv_destroy(&ticket->t_wait);
+ if (atomic_dec_and_test(&ticket->t_ref))
kmem_zone_free(xfs_log_ticket_zone, ticket);
- }
}
xlog_ticket_t *
@@ -3435,6 +3366,7 @@ xlog_ticket_alloc(
}
atomic_set(&tic->t_ref, 1);
+ INIT_LIST_HEAD(&tic->t_queue);
tic->t_unit_res = unit_bytes;
tic->t_curr_res = unit_bytes;
tic->t_cnt = cnt;
@@ -3445,7 +3377,7 @@ xlog_ticket_alloc(
tic->t_trans_type = 0;
if (xflags & XFS_LOG_PERM_RESERV)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
- sv_init(&tic->t_wait, SV_DEFAULT, "logtick");
+ init_waitqueue_head(&tic->t_wait);
xlog_tic_reset_res(tic);
@@ -3484,18 +3416,25 @@ xlog_verify_dest_ptr(
}
STATIC void
-xlog_verify_grant_head(xlog_t *log, int equals)
+xlog_verify_grant_tail(
+ struct log *log)
{
- if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) {
- if (equals)
- ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes);
- else
- ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes);
- } else {
- ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle);
- ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes);
- }
-} /* xlog_verify_grant_head */
+ int tail_cycle, tail_blocks;
+ int cycle, space;
+
+ /*
+ * Check to make sure the grant write head didn't just over lap the
+ * tail. If the cycles are the same, we can't be overlapping.
+ * Otherwise, make sure that the cycles differ by exactly one and
+ * check the byte count.
+ */
+ xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
+ xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
+ if (tail_cycle != cycle) {
+ ASSERT(cycle - 1 == tail_cycle);
+ ASSERT(space <= BBTOB(tail_blocks));
+ }
+}
/* check if it will fit */
STATIC void
@@ -3716,12 +3655,10 @@ xfs_log_force_umount(
xlog_cil_force(log);
/*
- * We must hold both the GRANT lock and the LOG lock,
- * before we mark the filesystem SHUTDOWN and wake
- * everybody up to tell the bad news.
+ * mark the filesystem and the as in a shutdown state and wake
+ * everybody up to tell them the bad news.
*/
spin_lock(&log->l_icloglock);
- spin_lock(&log->l_grant_lock);
mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
if (mp->m_sb_bp)
XFS_BUF_DONE(mp->m_sb_bp);
@@ -3742,27 +3679,21 @@ xfs_log_force_umount(
spin_unlock(&log->l_icloglock);
/*
- * We don't want anybody waiting for log reservations
- * after this. That means we have to wake up everybody
- * queued up on reserve_headq as well as write_headq.
- * In addition, we make sure in xlog_{re}grant_log_space
- * that we don't enqueue anything once the SHUTDOWN flag
- * is set, and this action is protected by the GRANTLOCK.
+ * We don't want anybody waiting for log reservations after this. That
+ * means we have to wake up everybody queued up on reserveq as well as
+ * writeq. In addition, we make sure in xlog_{re}grant_log_space that
+ * we don't enqueue anything once the SHUTDOWN flag is set, and this
+ * action is protected by the grant locks.
*/
- if ((tic = log->l_reserve_headq)) {
- do {
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_reserve_headq);
- }
-
- if ((tic = log->l_write_headq)) {
- do {
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_write_headq);
- }
- spin_unlock(&log->l_grant_lock);
+ spin_lock(&log->l_grant_reserve_lock);
+ list_for_each_entry(tic, &log->l_reserveq, t_queue)
+ wake_up(&tic->t_wait);
+ spin_unlock(&log->l_grant_reserve_lock);
+
+ spin_lock(&log->l_grant_write_lock);
+ list_for_each_entry(tic, &log->l_writeq, t_queue)
+ wake_up(&tic->t_wait);
+ spin_unlock(&log->l_grant_write_lock);
if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
ASSERT(!logerror);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 23d6ceb..9dc8125 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -61,7 +61,7 @@ xlog_cil_init(
INIT_LIST_HEAD(&cil->xc_committing);
spin_lock_init(&cil->xc_cil_lock);
init_rwsem(&cil->xc_ctx_lock);
- sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait");
+ init_waitqueue_head(&cil->xc_commit_wait);
INIT_LIST_HEAD(&ctx->committing);
INIT_LIST_HEAD(&ctx->busy_extents);
@@ -361,15 +361,10 @@ xlog_cil_committed(
int abort)
{
struct xfs_cil_ctx *ctx = args;
- struct xfs_log_vec *lv;
- int abortflag = abort ? XFS_LI_ABORTED : 0;
struct xfs_busy_extent *busyp, *n;
- /* unpin all the log items */
- for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) {
- xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
- abortflag);
- }
+ xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
+ ctx->start_lsn, abort);
list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
@@ -568,7 +563,7 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
goto restart;
}
}
@@ -592,7 +587,7 @@ restart:
*/
spin_lock(&cil->xc_cil_lock);
ctx->commit_lsn = commit_lsn;
- sv_broadcast(&cil->xc_commit_wait);
+ wake_up_all(&cil->xc_commit_wait);
spin_unlock(&cil->xc_cil_lock);
/* release the hounds! */
@@ -757,7 +752,7 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
goto restart;
}
if (ctx->sequence != sequence)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index edcdfe0..d5f8be8 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -21,7 +21,6 @@
struct xfs_buf;
struct log;
struct xlog_ticket;
-struct xfs_buf_cancel;
struct xfs_mount;
/*
@@ -54,7 +53,6 @@ struct xfs_mount;
BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
-
static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
{
return ((xfs_lsn_t)cycle << 32) | block;
@@ -133,12 +131,10 @@ static inline uint xlog_get_client_id(__be32 i)
*/
#define XLOG_TIC_INITED 0x1 /* has been initialized */
#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
-#define XLOG_TIC_IN_Q 0x4
#define XLOG_TIC_FLAGS \
{ XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
- { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \
- { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" }
+ { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }
#endif /* __KERNEL__ */
@@ -244,9 +240,8 @@ typedef struct xlog_res {
} xlog_res_t;
typedef struct xlog_ticket {
- sv_t t_wait; /* ticket wait queue : 20 */
- struct xlog_ticket *t_next; /* :4|8 */
- struct xlog_ticket *t_prev; /* :4|8 */
+ wait_queue_head_t t_wait; /* ticket wait queue */
+ struct list_head t_queue; /* reserve/write queue */
xlog_tid_t t_tid; /* transaction identifier : 4 */
atomic_t t_ref; /* ticket reference count : 4 */
int t_curr_res; /* current reservation in bytes : 4 */
@@ -353,8 +348,8 @@ typedef union xlog_in_core2 {
* and move everything else out to subsequent cachelines.
*/
typedef struct xlog_in_core {
- sv_t ic_force_wait;
- sv_t ic_write_wait;
+ wait_queue_head_t ic_force_wait;
+ wait_queue_head_t ic_write_wait;
struct xlog_in_core *ic_next;
struct xlog_in_core *ic_prev;
struct xfs_buf *ic_bp;
@@ -421,7 +416,7 @@ struct xfs_cil {
struct xfs_cil_ctx *xc_ctx;
struct rw_semaphore xc_ctx_lock;
struct list_head xc_committing;
- sv_t xc_commit_wait;
+ wait_queue_head_t xc_commit_wait;
xfs_lsn_t xc_current_sequence;
};
@@ -491,7 +486,7 @@ typedef struct log {
struct xfs_buftarg *l_targ; /* buftarg of log */
uint l_flags;
uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
- struct xfs_buf_cancel **l_buf_cancel_table;
+ struct list_head *l_buf_cancel_table;
int l_iclog_hsize; /* size of iclog header */
int l_iclog_heads; /* # of iclog header sectors */
uint l_sectBBsize; /* sector size in BBs (2^n) */
@@ -503,29 +498,40 @@ typedef struct log {
int l_logBBsize; /* size of log in BB chunks */
/* The following block of fields are changed while holding icloglock */
- sv_t l_flush_wait ____cacheline_aligned_in_smp;
+ wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp;
/* waiting for iclog flush */
int l_covered_state;/* state of "covering disk
* log entries" */
xlog_in_core_t *l_iclog; /* head log queue */
spinlock_t l_icloglock; /* grab to change iclog state */
- xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed
- * buffers */
- xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */
int l_curr_cycle; /* Cycle number of log writes */
int l_prev_cycle; /* Cycle number before last
* block increment */
int l_curr_block; /* current logical log block */
int l_prev_block; /* previous logical log block */
- /* The following block of fields are changed while holding grant_lock */
- spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
- xlog_ticket_t *l_reserve_headq;
- xlog_ticket_t *l_write_headq;
- int l_grant_reserve_cycle;
- int l_grant_reserve_bytes;
- int l_grant_write_cycle;
- int l_grant_write_bytes;
+ /*
+ * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
+ * read without needing to hold specific locks. To avoid operations
+ * contending with other hot objects, place each of them on a separate
+ * cacheline.
+ */
+ /* lsn of last LR on disk */
+ atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp;
+ /* lsn of 1st LR with unflushed * buffers */
+ atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
+
+ /*
+ * ticket grant locks, queues and accounting have their own cachlines
+ * as these are quite hot and can be operated on concurrently.
+ */
+ spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
+ struct list_head l_reserveq;
+ atomic64_t l_grant_reserve_head;
+
+ spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
+ struct list_head l_writeq;
+ atomic64_t l_grant_write_head;
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
@@ -534,6 +540,9 @@ typedef struct log {
} xlog_t;
+#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
+ ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
+
#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
/* common routines */
@@ -562,6 +571,61 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
xlog_in_core_t **commit_iclog, uint flags);
/*
+ * When we crack an atomic LSN, we sample it first so that the value will not
+ * change while we are cracking it into the component values. This means we
+ * will always get consistent component values to work from. This should always
+ * be used to smaple and crack LSNs taht are stored and updated in atomic
+ * variables.
+ */
+static inline void
+xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block)
+{
+ xfs_lsn_t val = atomic64_read(lsn);
+
+ *cycle = CYCLE_LSN(val);
+ *block = BLOCK_LSN(val);
+}
+
+/*
+ * Calculate and assign a value to an atomic LSN variable from component pieces.
+ */
+static inline void
+xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
+{
+ atomic64_set(lsn, xlog_assign_lsn(cycle, block));
+}
+
+/*
+ * When we crack the grant head, we sample it first so that the value will not
+ * change while we are cracking it into the component values. This means we
+ * will always get consistent component values to work from.
+ */
+static inline void
+xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
+{
+ *cycle = val >> 32;
+ *space = val & 0xffffffff;
+}
+
+static inline void
+xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
+{
+ xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
+}
+
+static inline int64_t
+xlog_assign_grant_head_val(int cycle, int space)
+{
+ return ((int64_t)cycle << 32) | space;
+}
+
+static inline void
+xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
+{
+ atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
+}
+
+/*
* Committed Item List interfaces
*/
int xlog_cil_init(struct log *log);
@@ -585,6 +649,21 @@ xlog_cil_force(struct log *log)
*/
#define XLOG_UNMOUNT_REC_TYPE (-1U)
+/*
+ * Wrapper function for waiting on a wait queue serialised against wakeups
+ * by a spinlock. This matches the semantics of all the wait queues used in the
+ * log code.
+ */
+static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(wq, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(lock);
+ schedule();
+ remove_wait_queue(wq, &wait);
+}
#endif /* __KERNEL__ */
#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 966d3f9..204d8e5 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -53,6 +53,17 @@ STATIC void xlog_recover_check_summary(xlog_t *);
#endif
/*
+ * This structure is used during recovery to record the buf log items which
+ * have been canceled and should not be replayed.
+ */
+struct xfs_buf_cancel {
+ xfs_daddr_t bc_blkno;
+ uint bc_len;
+ int bc_refcount;
+ struct list_head bc_list;
+};
+
+/*
* Sector aligned buffer routines for buffer create/read/write/access
*/
@@ -925,12 +936,12 @@ xlog_find_tail(
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
if (found == 2)
log->l_curr_cycle++;
- log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
- log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn);
- log->l_grant_reserve_cycle = log->l_curr_cycle;
- log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
- log->l_grant_write_cycle = log->l_curr_cycle;
- log->l_grant_write_bytes = BBTOB(log->l_curr_block);
+ atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+ atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+ xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle,
+ BBTOB(log->l_curr_block));
+ xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle,
+ BBTOB(log->l_curr_block));
/*
* Look for unmount record. If we find it, then we know there
@@ -960,7 +971,7 @@ xlog_find_tail(
}
after_umount_blk = (i + hblks + (int)
BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
- tail_lsn = log->l_tail_lsn;
+ tail_lsn = atomic64_read(&log->l_tail_lsn);
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
umount_data_blk = (i + hblks) % log->l_logBBsize;
@@ -975,12 +986,10 @@ xlog_find_tail(
* log records will point recovery to after the
* current unmount record.
*/
- log->l_tail_lsn =
- xlog_assign_lsn(log->l_curr_cycle,
- after_umount_blk);
- log->l_last_sync_lsn =
- xlog_assign_lsn(log->l_curr_cycle,
- after_umount_blk);
+ xlog_assign_atomic_lsn(&log->l_tail_lsn,
+ log->l_curr_cycle, after_umount_blk);
+ xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+ log->l_curr_cycle, after_umount_blk);
*tail_blk = after_umount_blk;
/*
@@ -1605,82 +1614,45 @@ xlog_recover_reorder_trans(
* record in the table to tell us how many times we expect to see this
* record during the second pass.
*/
-STATIC void
-xlog_recover_do_buffer_pass1(
- xlog_t *log,
- xfs_buf_log_format_t *buf_f)
+STATIC int
+xlog_recover_buffer_pass1(
+ struct log *log,
+ xlog_recover_item_t *item)
{
- xfs_buf_cancel_t *bcp;
- xfs_buf_cancel_t *nextp;
- xfs_buf_cancel_t *prevp;
- xfs_buf_cancel_t **bucket;
- xfs_daddr_t blkno = 0;
- uint len = 0;
- ushort flags = 0;
-
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- blkno = buf_f->blf_blkno;
- len = buf_f->blf_len;
- flags = buf_f->blf_flags;
- break;
- }
+ xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
+ struct list_head *bucket;
+ struct xfs_buf_cancel *bcp;
/*
* If this isn't a cancel buffer item, then just return.
*/
- if (!(flags & XFS_BLF_CANCEL)) {
+ if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
trace_xfs_log_recover_buf_not_cancel(log, buf_f);
- return;
- }
-
- /*
- * Insert an xfs_buf_cancel record into the hash table of
- * them. If there is already an identical record, bump
- * its reference count.
- */
- bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
- XLOG_BC_TABLE_SIZE];
- /*
- * If the hash bucket is empty then just insert a new record into
- * the bucket.
- */
- if (*bucket == NULL) {
- bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
- KM_SLEEP);
- bcp->bc_blkno = blkno;
- bcp->bc_len = len;
- bcp->bc_refcount = 1;
- bcp->bc_next = NULL;
- *bucket = bcp;
- return;
+ return 0;
}
/*
- * The hash bucket is not empty, so search for duplicates of our
- * record. If we find one them just bump its refcount. If not
- * then add us at the end of the list.
+ * Insert an xfs_buf_cancel record into the hash table of them.
+ * If there is already an identical record, bump its reference count.
*/
- prevp = NULL;
- nextp = *bucket;
- while (nextp != NULL) {
- if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
- nextp->bc_refcount++;
+ bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno);
+ list_for_each_entry(bcp, bucket, bc_list) {
+ if (bcp->bc_blkno == buf_f->blf_blkno &&
+ bcp->bc_len == buf_f->blf_len) {
+ bcp->bc_refcount++;
trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
- return;
+ return 0;
}
- prevp = nextp;
- nextp = nextp->bc_next;
- }
- ASSERT(prevp != NULL);
- bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
- KM_SLEEP);
- bcp->bc_blkno = blkno;
- bcp->bc_len = len;
+ }
+
+ bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
+ bcp->bc_blkno = buf_f->blf_blkno;
+ bcp->bc_len = buf_f->blf_len;
bcp->bc_refcount = 1;
- bcp->bc_next = NULL;
- prevp->bc_next = bcp;
+ list_add_tail(&bcp->bc_list, bucket);
+
trace_xfs_log_recover_buf_cancel_add(log, buf_f);
+ return 0;
}
/*
@@ -1698,14 +1670,13 @@ xlog_recover_do_buffer_pass1(
*/
STATIC int
xlog_check_buffer_cancelled(
- xlog_t *log,
+ struct log *log,
xfs_daddr_t blkno,
uint len,
ushort flags)
{
- xfs_buf_cancel_t *bcp;
- xfs_buf_cancel_t *prevp;
- xfs_buf_cancel_t **bucket;
+ struct list_head *bucket;
+ struct xfs_buf_cancel *bcp;
if (log->l_buf_cancel_table == NULL) {
/*
@@ -1716,128 +1687,70 @@ xlog_check_buffer_cancelled(
return 0;
}
- bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
- XLOG_BC_TABLE_SIZE];
- bcp = *bucket;
- if (bcp == NULL) {
- /*
- * There is no corresponding entry in the table built
- * in pass one, so this buffer has not been cancelled.
- */
- ASSERT(!(flags & XFS_BLF_CANCEL));
- return 0;
- }
-
/*
- * Search for an entry in the buffer cancel table that
- * matches our buffer.
+ * Search for an entry in the cancel table that matches our buffer.
*/
- prevp = NULL;
- while (bcp != NULL) {
- if (bcp->bc_blkno == blkno && bcp->bc_len == len) {
- /*
- * We've go a match, so return 1 so that the
- * recovery of this buffer is cancelled.
- * If this buffer is actually a buffer cancel
- * log item, then decrement the refcount on the
- * one in the table and remove it if this is the
- * last reference.
- */
- if (flags & XFS_BLF_CANCEL) {
- bcp->bc_refcount--;
- if (bcp->bc_refcount == 0) {
- if (prevp == NULL) {
- *bucket = bcp->bc_next;
- } else {
- prevp->bc_next = bcp->bc_next;
- }
- kmem_free(bcp);
- }
- }
- return 1;
- }
- prevp = bcp;
- bcp = bcp->bc_next;
+ bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
+ list_for_each_entry(bcp, bucket, bc_list) {
+ if (bcp->bc_blkno == blkno && bcp->bc_len == len)
+ goto found;
}
+
/*
- * We didn't find a corresponding entry in the table, so
- * return 0 so that the buffer is NOT cancelled.
+ * We didn't find a corresponding entry in the table, so return 0 so
+ * that the buffer is NOT cancelled.
*/
ASSERT(!(flags & XFS_BLF_CANCEL));
return 0;
-}
-STATIC int
-xlog_recover_do_buffer_pass2(
- xlog_t *log,
- xfs_buf_log_format_t *buf_f)
-{
- xfs_daddr_t blkno = 0;
- ushort flags = 0;
- uint len = 0;
-
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- blkno = buf_f->blf_blkno;
- flags = buf_f->blf_flags;
- len = buf_f->blf_len;
- break;
+found:
+ /*
+ * We've go a match, so return 1 so that the recovery of this buffer
+ * is cancelled. If this buffer is actually a buffer cancel log
+ * item, then decrement the refcount on the one in the table and
+ * remove it if this is the last reference.
+ */
+ if (flags & XFS_BLF_CANCEL) {
+ if (--bcp->bc_refcount == 0) {
+ list_del(&bcp->bc_list);
+ kmem_free(bcp);
+ }
}
-
- return xlog_check_buffer_cancelled(log, blkno, len, flags);
+ return 1;
}
/*
- * Perform recovery for a buffer full of inodes. In these buffers,
- * the only data which should be recovered is that which corresponds
- * to the di_next_unlinked pointers in the on disk inode structures.
- * The rest of the data for the inodes is always logged through the
- * inodes themselves rather than the inode buffer and is recovered
- * in xlog_recover_do_inode_trans().
+ * Perform recovery for a buffer full of inodes. In these buffers, the only
+ * data which should be recovered is that which corresponds to the
+ * di_next_unlinked pointers in the on disk inode structures. The rest of the
+ * data for the inodes is always logged through the inodes themselves rather
+ * than the inode buffer and is recovered in xlog_recover_inode_pass2().
*
- * The only time when buffers full of inodes are fully recovered is
- * when the buffer is full of newly allocated inodes. In this case
- * the buffer will not be marked as an inode buffer and so will be
- * sent to xlog_recover_do_reg_buffer() below during recovery.
+ * The only time when buffers full of inodes are fully recovered is when the
+ * buffer is full of newly allocated inodes. In this case the buffer will
+ * not be marked as an inode buffer and so will be sent to
+ * xlog_recover_do_reg_buffer() below during recovery.
*/
STATIC int
xlog_recover_do_inode_buffer(
- xfs_mount_t *mp,
+ struct xfs_mount *mp,
xlog_recover_item_t *item,
- xfs_buf_t *bp,
+ struct xfs_buf *bp,
xfs_buf_log_format_t *buf_f)
{
int i;
- int item_index;
- int bit;
- int nbits;
- int reg_buf_offset;
- int reg_buf_bytes;
+ int item_index = 0;
+ int bit = 0;
+ int nbits = 0;
+ int reg_buf_offset = 0;
+ int reg_buf_bytes = 0;
int next_unlinked_offset;
int inodes_per_buf;
xfs_agino_t *logged_nextp;
xfs_agino_t *buffer_nextp;
- unsigned int *data_map = NULL;
- unsigned int map_size = 0;
trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- data_map = buf_f->blf_data_map;
- map_size = buf_f->blf_map_size;
- break;
- }
- /*
- * Set the variables corresponding to the current region to
- * 0 so that we'll initialize them on the first pass through
- * the loop.
- */
- reg_buf_offset = 0;
- reg_buf_bytes = 0;
- bit = 0;
- nbits = 0;
- item_index = 0;
inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
for (i = 0; i < inodes_per_buf; i++) {
next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
@@ -1852,18 +1765,18 @@ xlog_recover_do_inode_buffer(
* the current di_next_unlinked field.
*/
bit += nbits;
- bit = xfs_next_bit(data_map, map_size, bit);
+ bit = xfs_next_bit(buf_f->blf_data_map,
+ buf_f->blf_map_size, bit);
/*
* If there are no more logged regions in the
* buffer, then we're done.
*/
- if (bit == -1) {
+ if (bit == -1)
return 0;
- }
- nbits = xfs_contig_bits(data_map, map_size,
- bit);
+ nbits = xfs_contig_bits(buf_f->blf_data_map,
+ buf_f->blf_map_size, bit);
ASSERT(nbits > 0);
reg_buf_offset = bit << XFS_BLF_SHIFT;
reg_buf_bytes = nbits << XFS_BLF_SHIFT;
@@ -1875,9 +1788,8 @@ xlog_recover_do_inode_buffer(
* di_next_unlinked field, then move on to the next
* di_next_unlinked field.
*/
- if (next_unlinked_offset < reg_buf_offset) {
+ if (next_unlinked_offset < reg_buf_offset)
continue;
- }
ASSERT(item->ri_buf[item_index].i_addr != NULL);
ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
@@ -1913,36 +1825,29 @@ xlog_recover_do_inode_buffer(
* given buffer. The bitmap in the buf log format structure indicates
* where to place the logged data.
*/
-/*ARGSUSED*/
STATIC void
xlog_recover_do_reg_buffer(
struct xfs_mount *mp,
xlog_recover_item_t *item,
- xfs_buf_t *bp,
+ struct xfs_buf *bp,
xfs_buf_log_format_t *buf_f)
{
int i;
int bit;
int nbits;
- unsigned int *data_map = NULL;
- unsigned int map_size = 0;
int error;
trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- data_map = buf_f->blf_data_map;
- map_size = buf_f->blf_map_size;
- break;
- }
bit = 0;
i = 1; /* 0 is the buf format structure */
while (1) {
- bit = xfs_next_bit(data_map, map_size, bit);
+ bit = xfs_next_bit(buf_f->blf_data_map,
+ buf_f->blf_map_size, bit);
if (bit == -1)
break;
- nbits = xfs_contig_bits(data_map, map_size, bit);
+ nbits = xfs_contig_bits(buf_f->blf_data_map,
+ buf_f->blf_map_size, bit);
ASSERT(nbits > 0);
ASSERT(item->ri_buf[i].i_addr != NULL);
ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
@@ -2176,77 +2081,46 @@ xlog_recover_do_dquot_buffer(
* for more details on the implementation of the table of cancel records.
*/
STATIC int
-xlog_recover_do_buffer_trans(
+xlog_recover_buffer_pass2(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
- xfs_mount_t *mp;
+ xfs_mount_t *mp = log->l_mp;
xfs_buf_t *bp;
int error;
- int cancel;
- xfs_daddr_t blkno;
- int len;
- ushort flags;
uint buf_flags;
- if (pass == XLOG_RECOVER_PASS1) {
- /*
- * In this pass we're only looking for buf items
- * with the XFS_BLF_CANCEL bit set.
- */
- xlog_recover_do_buffer_pass1(log, buf_f);
+ /*
+ * In this pass we only want to recover all the buffers which have
+ * not been cancelled and are not cancellation buffers themselves.
+ */
+ if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
+ buf_f->blf_len, buf_f->blf_flags)) {
+ trace_xfs_log_recover_buf_cancel(log, buf_f);
return 0;
- } else {
- /*
- * In this pass we want to recover all the buffers
- * which have not been cancelled and are not
- * cancellation buffers themselves. The routine
- * we call here will tell us whether or not to
- * continue with the replay of this buffer.
- */
- cancel = xlog_recover_do_buffer_pass2(log, buf_f);
- if (cancel) {
- trace_xfs_log_recover_buf_cancel(log, buf_f);
- return 0;
- }
}
+
trace_xfs_log_recover_buf_recover(log, buf_f);
- switch (buf_f->blf_type) {
- case XFS_LI_BUF:
- blkno = buf_f->blf_blkno;
- len = buf_f->blf_len;
- flags = buf_f->blf_flags;
- break;
- default:
- xfs_fs_cmn_err(CE_ALERT, log->l_mp,
- "xfs_log_recover: unknown buffer type 0x%x, logdev %s",
- buf_f->blf_type, log->l_mp->m_logname ?
- log->l_mp->m_logname : "internal");
- XFS_ERROR_REPORT("xlog_recover_do_buffer_trans",
- XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
- }
- mp = log->l_mp;
buf_flags = XBF_LOCK;
- if (!(flags & XFS_BLF_INODE_BUF))
+ if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF))
buf_flags |= XBF_MAPPED;
- bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
+ bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
+ buf_flags);
if (XFS_BUF_ISERROR(bp)) {
- xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
- bp, blkno);
+ xfs_ioerror_alert("xlog_recover_do..(read#1)", mp,
+ bp, buf_f->blf_blkno);
error = XFS_BUF_GETERROR(bp);
xfs_buf_relse(bp);
return error;
}
error = 0;
- if (flags & XFS_BLF_INODE_BUF) {
+ if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
- } else if (flags &
+ } else if (buf_f->blf_flags &
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
} else {
@@ -2286,16 +2160,14 @@ xlog_recover_do_buffer_trans(
}
STATIC int
-xlog_recover_do_inode_trans(
+xlog_recover_inode_pass2(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
xfs_inode_log_format_t *in_f;
- xfs_mount_t *mp;
+ xfs_mount_t *mp = log->l_mp;
xfs_buf_t *bp;
xfs_dinode_t *dip;
- xfs_ino_t ino;
int len;
xfs_caddr_t src;
xfs_caddr_t dest;
@@ -2305,10 +2177,6 @@ xlog_recover_do_inode_trans(
xfs_icdinode_t *dicp;
int need_free = 0;
- if (pass == XLOG_RECOVER_PASS1) {
- return 0;
- }
-
if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
in_f = item->ri_buf[0].i_addr;
} else {
@@ -2318,8 +2186,6 @@ xlog_recover_do_inode_trans(
if (error)
goto error;
}
- ino = in_f->ilf_ino;
- mp = log->l_mp;
/*
* Inode buffers can be freed, look out for it,
@@ -2354,8 +2220,8 @@ xlog_recover_do_inode_trans(
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
- dip, bp, ino);
- XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)",
+ dip, bp, in_f->ilf_ino);
+ XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
XFS_ERRLEVEL_LOW, mp);
error = EFSCORRUPTED;
goto error;
@@ -2365,8 +2231,8 @@ xlog_recover_do_inode_trans(
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld",
- item, ino);
- XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)",
+ item, in_f->ilf_ino);
+ XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
XFS_ERRLEVEL_LOW, mp);
error = EFSCORRUPTED;
goto error;
@@ -2394,12 +2260,12 @@ xlog_recover_do_inode_trans(
if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE)) {
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
- item, dip, bp, ino);
+ item, dip, bp, in_f->ilf_ino);
error = EFSCORRUPTED;
goto error;
}
@@ -2407,40 +2273,40 @@ xlog_recover_do_inode_trans(
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE) &&
(dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
- item, dip, bp, ino);
+ item, dip, bp, in_f->ilf_ino);
error = EFSCORRUPTED;
goto error;
}
}
if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
- item, dip, bp, ino,
+ item, dip, bp, in_f->ilf_ino,
dicp->di_nextents + dicp->di_anextents,
dicp->di_nblocks);
error = EFSCORRUPTED;
goto error;
}
if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
"xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
- item, dip, bp, ino, dicp->di_forkoff);
+ item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
error = EFSCORRUPTED;
goto error;
}
if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
- XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
+ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
XFS_ERRLEVEL_LOW, mp, dicp);
xfs_buf_relse(bp);
xfs_fs_cmn_err(CE_ALERT, mp,
@@ -2532,7 +2398,7 @@ xlog_recover_do_inode_trans(
break;
default:
- xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag");
+ xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag");
ASSERT(0);
xfs_buf_relse(bp);
error = EIO;
@@ -2556,18 +2422,11 @@ error:
* of that type.
*/
STATIC int
-xlog_recover_do_quotaoff_trans(
+xlog_recover_quotaoff_pass1(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
- xfs_qoff_logformat_t *qoff_f;
-
- if (pass == XLOG_RECOVER_PASS2) {
- return (0);
- }
-
- qoff_f = item->ri_buf[0].i_addr;
+ xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr;
ASSERT(qoff_f);
/*
@@ -2588,22 +2447,17 @@ xlog_recover_do_quotaoff_trans(
* Recover a dquot record
*/
STATIC int
-xlog_recover_do_dquot_trans(
+xlog_recover_dquot_pass2(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
- xfs_mount_t *mp;
+ xfs_mount_t *mp = log->l_mp;
xfs_buf_t *bp;
struct xfs_disk_dquot *ddq, *recddq;
int error;
xfs_dq_logformat_t *dq_f;
uint type;
- if (pass == XLOG_RECOVER_PASS1) {
- return 0;
- }
- mp = log->l_mp;
/*
* Filesystems are required to send in quota flags at mount time.
@@ -2647,7 +2501,7 @@ xlog_recover_do_dquot_trans(
if ((error = xfs_qm_dqcheck(recddq,
dq_f->qlf_id,
0, XFS_QMOPT_DOWARN,
- "xlog_recover_do_dquot_trans (log copy)"))) {
+ "xlog_recover_dquot_pass2 (log copy)"))) {
return XFS_ERROR(EIO);
}
ASSERT(dq_f->qlf_len == 1);
@@ -2670,7 +2524,7 @@ xlog_recover_do_dquot_trans(
* minimal initialization then.
*/
if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
- "xlog_recover_do_dquot_trans")) {
+ "xlog_recover_dquot_pass2")) {
xfs_buf_relse(bp);
return XFS_ERROR(EIO);
}
@@ -2693,38 +2547,31 @@ xlog_recover_do_dquot_trans(
* LSN.
*/
STATIC int
-xlog_recover_do_efi_trans(
+xlog_recover_efi_pass2(
xlog_t *log,
xlog_recover_item_t *item,
- xfs_lsn_t lsn,
- int pass)
+ xfs_lsn_t lsn)
{
int error;
- xfs_mount_t *mp;
+ xfs_mount_t *mp = log->l_mp;
xfs_efi_log_item_t *efip;
xfs_efi_log_format_t *efi_formatp;
- if (pass == XLOG_RECOVER_PASS1) {
- return 0;
- }
-
efi_formatp = item->ri_buf[0].i_addr;
- mp = log->l_mp;
efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
&(efip->efi_format)))) {
xfs_efi_item_free(efip);
return error;
}
- efip->efi_next_extent = efi_formatp->efi_nextents;
- efip->efi_flags |= XFS_EFI_COMMITTED;
+ atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
spin_lock(&log->l_ailp->xa_lock);
/*
* xfs_trans_ail_update() drops the AIL lock.
*/
- xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
+ xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
return 0;
}
@@ -2737,11 +2584,10 @@ xlog_recover_do_efi_trans(
* efd format structure. If we find it, we remove the efi from the
* AIL and free it.
*/
-STATIC void
-xlog_recover_do_efd_trans(
+STATIC int
+xlog_recover_efd_pass2(
xlog_t *log,
- xlog_recover_item_t *item,
- int pass)
+ xlog_recover_item_t *item)
{
xfs_efd_log_format_t *efd_formatp;
xfs_efi_log_item_t *efip = NULL;
@@ -2750,10 +2596,6 @@ xlog_recover_do_efd_trans(
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
- if (pass == XLOG_RECOVER_PASS1) {
- return;
- }
-
efd_formatp = item->ri_buf[0].i_addr;
ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
@@ -2785,62 +2627,6 @@ xlog_recover_do_efd_trans(
}
xfs_trans_ail_cursor_done(ailp, &cur);
spin_unlock(&ailp->xa_lock);
-}
-
-/*
- * Perform the transaction
- *
- * If the transaction modifies a buffer or inode, do it now. Otherwise,
- * EFIs and EFDs get queued up by adding entries into the AIL for them.
- */
-STATIC int
-xlog_recover_do_trans(
- xlog_t *log,
- xlog_recover_t *trans,
- int pass)
-{
- int error = 0;
- xlog_recover_item_t *item;
-
- error = xlog_recover_reorder_trans(log, trans, pass);
- if (error)
- return error;
-
- list_for_each_entry(item, &trans->r_itemq, ri_list) {
- trace_xfs_log_recover_item_recover(log, trans, item, pass);
- switch (ITEM_TYPE(item)) {
- case XFS_LI_BUF:
- error = xlog_recover_do_buffer_trans(log, item, pass);
- break;
- case XFS_LI_INODE:
- error = xlog_recover_do_inode_trans(log, item, pass);
- break;
- case XFS_LI_EFI:
- error = xlog_recover_do_efi_trans(log, item,
- trans->r_lsn, pass);
- break;
- case XFS_LI_EFD:
- xlog_recover_do_efd_trans(log, item, pass);
- error = 0;
- break;
- case XFS_LI_DQUOT:
- error = xlog_recover_do_dquot_trans(log, item, pass);
- break;
- case XFS_LI_QUOTAOFF:
- error = xlog_recover_do_quotaoff_trans(log, item,
- pass);
- break;
- default:
- xlog_warn(
- "XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item));
- ASSERT(0);
- error = XFS_ERROR(EIO);
- break;
- }
-
- if (error)
- return error;
- }
return 0;
}
@@ -2852,7 +2638,7 @@ xlog_recover_do_trans(
*/
STATIC void
xlog_recover_free_trans(
- xlog_recover_t *trans)
+ struct xlog_recover *trans)
{
xlog_recover_item_t *item, *n;
int i;
@@ -2871,17 +2657,95 @@ xlog_recover_free_trans(
}
STATIC int
+xlog_recover_commit_pass1(
+ struct log *log,
+ struct xlog_recover *trans,
+ xlog_recover_item_t *item)
+{
+ trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
+
+ switch (ITEM_TYPE(item)) {
+ case XFS_LI_BUF:
+ return xlog_recover_buffer_pass1(log, item);
+ case XFS_LI_QUOTAOFF:
+ return xlog_recover_quotaoff_pass1(log, item);
+ case XFS_LI_INODE:
+ case XFS_LI_EFI:
+ case XFS_LI_EFD:
+ case XFS_LI_DQUOT:
+ /* nothing to do in pass 1 */
+ return 0;
+ default:
+ xlog_warn(
+ "XFS: invalid item type (%d) xlog_recover_commit_pass1",
+ ITEM_TYPE(item));
+ ASSERT(0);
+ return XFS_ERROR(EIO);
+ }
+}
+
+STATIC int
+xlog_recover_commit_pass2(
+ struct log *log,
+ struct xlog_recover *trans,
+ xlog_recover_item_t *item)
+{
+ trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
+
+ switch (ITEM_TYPE(item)) {
+ case XFS_LI_BUF:
+ return xlog_recover_buffer_pass2(log, item);
+ case XFS_LI_INODE:
+ return xlog_recover_inode_pass2(log, item);
+ case XFS_LI_EFI:
+ return xlog_recover_efi_pass2(log, item, trans->r_lsn);
+ case XFS_LI_EFD:
+ return xlog_recover_efd_pass2(log, item);
+ case XFS_LI_DQUOT:
+ return xlog_recover_dquot_pass2(log, item);
+ case XFS_LI_QUOTAOFF:
+ /* nothing to do in pass2 */
+ return 0;
+ default:
+ xlog_warn(
+ "XFS: invalid item type (%d) xlog_recover_commit_pass2",
+ ITEM_TYPE(item));
+ ASSERT(0);
+ return XFS_ERROR(EIO);
+ }
+}
+
+/*
+ * Perform the transaction.
+ *
+ * If the transaction modifies a buffer or inode, do it now. Otherwise,
+ * EFIs and EFDs get queued up by adding entries into the AIL for them.
+ */
+STATIC int
xlog_recover_commit_trans(
- xlog_t *log,
- xlog_recover_t *trans,
+ struct log *log,
+ struct xlog_recover *trans,
int pass)
{
- int error;
+ int error = 0;
+ xlog_recover_item_t *item;
hlist_del(&trans->r_list);
- if ((error = xlog_recover_do_trans(log, trans, pass)))
+
+ error = xlog_recover_reorder_trans(log, trans, pass);
+ if (error)
return error;
- xlog_recover_free_trans(trans); /* no error */
+
+ list_for_each_entry(item, &trans->r_itemq, ri_list) {
+ if (pass == XLOG_RECOVER_PASS1)
+ error = xlog_recover_commit_pass1(log, trans, item);
+ else
+ error = xlog_recover_commit_pass2(log, trans, item);
+ if (error)
+ return error;
+ }
+
+ xlog_recover_free_trans(trans);
return 0;
}
@@ -3011,7 +2875,7 @@ xlog_recover_process_efi(
xfs_extent_t *extp;
xfs_fsblock_t startblock_fsb;
- ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED));
+ ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
/*
* First check the validity of the extents described by the
@@ -3050,7 +2914,7 @@ xlog_recover_process_efi(
extp->ext_len);
}
- efip->efi_flags |= XFS_EFI_RECOVERED;
+ set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
error = xfs_trans_commit(tp, 0);
return error;
@@ -3107,7 +2971,7 @@ xlog_recover_process_efis(
* Skip EFIs that we've already processed.
*/
efip = (xfs_efi_log_item_t *)lip;
- if (efip->efi_flags & XFS_EFI_RECOVERED) {
+ if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
lip = xfs_trans_ail_cursor_next(ailp, &cur);
continue;
}
@@ -3724,7 +3588,7 @@ xlog_do_log_recovery(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk)
{
- int error;
+ int error, i;
ASSERT(head_blk != tail_blk);
@@ -3732,10 +3596,12 @@ xlog_do_log_recovery(
* First do a pass to find all of the cancelled buf log items.
* Store them in the buf_cancel_table for use in the second pass.
*/
- log->l_buf_cancel_table =
- (xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE *
- sizeof(xfs_buf_cancel_t*),
+ log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
+ sizeof(struct list_head),
KM_SLEEP);
+ for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
+ INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
+
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS1);
if (error != 0) {
@@ -3754,7 +3620,7 @@ xlog_do_log_recovery(
int i;
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
- ASSERT(log->l_buf_cancel_table[i] == NULL);
+ ASSERT(list_empty(&log->l_buf_cancel_table[i]));
}
#endif /* DEBUG */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 19e9dfa..d447aef 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -472,7 +472,7 @@ xfs_initialize_perag(
goto out_unwind;
pag->pag_agno = index;
pag->pag_mount = mp;
- rwlock_init(&pag->pag_ici_lock);
+ spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
spin_lock_init(&pag->pag_buf_lock);
@@ -975,6 +975,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp)
}
/*
+ * precalculate the low space thresholds for dynamic speculative preallocation.
+ */
+void
+xfs_set_low_space_thresholds(
+ struct xfs_mount *mp)
+{
+ int i;
+
+ for (i = 0; i < XFS_LOWSP_MAX; i++) {
+ __uint64_t space = mp->m_sb.sb_dblocks;
+
+ do_div(space, 100);
+ mp->m_low_space[i] = space * (i + 1);
+ }
+}
+
+
+/*
* Set whether we're using inode alignment.
*/
STATIC void
@@ -1196,6 +1214,9 @@ xfs_mountfs(
*/
xfs_set_rw_sizes(mp);
+ /* set the low space thresholds for dynamic preallocation */
+ xfs_set_low_space_thresholds(mp);
+
/*
* Set the inode cluster size.
* This may still be overridden by the file system
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5861b49..a62e897 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -103,6 +103,16 @@ extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
xfs_mod_incore_sb(mp, field, delta, rsvd)
#endif
+/* dynamic preallocation free space thresholds, 5% down to 1% */
+enum {
+ XFS_LOWSP_1_PCNT = 0,
+ XFS_LOWSP_2_PCNT,
+ XFS_LOWSP_3_PCNT,
+ XFS_LOWSP_4_PCNT,
+ XFS_LOWSP_5_PCNT,
+ XFS_LOWSP_MAX,
+};
+
typedef struct xfs_mount {
struct super_block *m_super;
xfs_tid_t m_tid; /* next unused tid for fs */
@@ -202,6 +212,8 @@ typedef struct xfs_mount {
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
struct shrinker m_inode_shrink; /* inode reclaim shrinker */
+ int64_t m_low_space[XFS_LOWSP_MAX];
+ /* low free space thresholds */
} xfs_mount_t;
/*
@@ -379,6 +391,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
+extern void xfs_set_low_space_thresholds(struct xfs_mount *);
+
#endif /* __KERNEL__ */
extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f6d956b..f80a067 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs(
* they could be immediately flushed and we'd have to race with the flusher
* trying to pull the item from the AIL as we add it.
*/
-void
+static void
xfs_trans_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t commit_lsn,
@@ -1425,6 +1425,83 @@ xfs_trans_committed(
xfs_trans_free(tp);
}
+static inline void
+xfs_log_item_batch_insert(
+ struct xfs_ail *ailp,
+ struct xfs_log_item **log_items,
+ int nr_items,
+ xfs_lsn_t commit_lsn)
+{
+ int i;
+
+ spin_lock(&ailp->xa_lock);
+ /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
+ xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
+
+ for (i = 0; i < nr_items; i++)
+ IOP_UNPIN(log_items[i], 0);
+}
+
+/*
+ * Bulk operation version of xfs_trans_committed that takes a log vector of
+ * items to insert into the AIL. This uses bulk AIL insertion techniques to
+ * minimise lock traffic.
+ */
+void
+xfs_trans_committed_bulk(
+ struct xfs_ail *ailp,
+ struct xfs_log_vec *log_vector,
+ xfs_lsn_t commit_lsn,
+ int aborted)
+{
+#define LOG_ITEM_BATCH_SIZE 32
+ struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
+ struct xfs_log_vec *lv;
+ int i = 0;
+
+ /* unpin all the log items */
+ for (lv = log_vector; lv; lv = lv->lv_next ) {
+ struct xfs_log_item *lip = lv->lv_item;
+ xfs_lsn_t item_lsn;
+
+ if (aborted)
+ lip->li_flags |= XFS_LI_ABORTED;
+ item_lsn = IOP_COMMITTED(lip, commit_lsn);
+
+ /* item_lsn of -1 means the item was freed */
+ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
+ continue;
+
+ if (item_lsn != commit_lsn) {
+
+ /*
+ * Not a bulk update option due to unusual item_lsn.
+ * Push into AIL immediately, rechecking the lsn once
+ * we have the ail lock. Then unpin the item.
+ */
+ spin_lock(&ailp->xa_lock);
+ if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
+ xfs_trans_ail_update(ailp, lip, item_lsn);
+ else
+ spin_unlock(&ailp->xa_lock);
+ IOP_UNPIN(lip, 0);
+ continue;
+ }
+
+ /* Item is a candidate for bulk AIL insert. */
+ log_items[i++] = lv->lv_item;
+ if (i >= LOG_ITEM_BATCH_SIZE) {
+ xfs_log_item_batch_insert(ailp, log_items,
+ LOG_ITEM_BATCH_SIZE, commit_lsn);
+ i = 0;
+ }
+ }
+
+ /* make sure we insert the remainder! */
+ if (i)
+ xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
+}
+
/*
* Called from the trans_commit code when we notice that
* the filesystem is in the middle of a forced shutdown.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 246286b..c2042b7 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -294,8 +294,8 @@ struct xfs_log_item_desc {
#define XFS_ALLOC_BTREE_REF 2
#define XFS_BMAP_BTREE_REF 2
#define XFS_DIR_BTREE_REF 2
+#define XFS_INO_REF 2
#define XFS_ATTR_BTREE_REF 1
-#define XFS_INO_REF 1
#define XFS_DQUOT_REF 1
#ifdef __KERNEL__
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index dc90695..c5bbbc4 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,8 +28,8 @@
#include "xfs_trans_priv.h"
#include "xfs_error.h"
-STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *);
-STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
+STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t);
+STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
@@ -449,129 +449,152 @@ xfs_trans_unlocked_item(
xfs_log_move_tail(ailp->xa_mount, 1);
} /* xfs_trans_unlocked_item */
-
/*
- * Update the position of the item in the AIL with the new
- * lsn. If it is not yet in the AIL, add it. Otherwise, move
- * it to its new position by removing it and re-adding it.
+ * xfs_trans_ail_update - bulk AIL insertion operation.
+ *
+ * @xfs_trans_ail_update takes an array of log items that all need to be
+ * positioned at the same LSN in the AIL. If an item is not in the AIL, it will
+ * be added. Otherwise, it will be repositioned by removing it and re-adding
+ * it to the AIL. If we move the first item in the AIL, update the log tail to
+ * match the new minimum LSN in the AIL.
*
- * Wakeup anyone with an lsn less than the item's lsn. If the item
- * we move in the AIL is the minimum one, update the tail lsn in the
- * log manager.
+ * This function takes the AIL lock once to execute the update operations on
+ * all the items in the array, and as such should not be called with the AIL
+ * lock held. As a result, once we have the AIL lock, we need to check each log
+ * item LSN to confirm it needs to be moved forward in the AIL.
*
- * This function must be called with the AIL lock held. The lock
- * is dropped before returning.
+ * To optimise the insert operation, we delete all the items from the AIL in
+ * the first pass, moving them into a temporary list, then splice the temporary
+ * list into the correct position in the AIL. This avoids needing to do an
+ * insert operation on every item.
+ *
+ * This function must be called with the AIL lock held. The lock is dropped
+ * before returning.
*/
void
-xfs_trans_ail_update(
- struct xfs_ail *ailp,
- xfs_log_item_t *lip,
- xfs_lsn_t lsn) __releases(ailp->xa_lock)
+xfs_trans_ail_update_bulk(
+ struct xfs_ail *ailp,
+ struct xfs_log_item **log_items,
+ int nr_items,
+ xfs_lsn_t lsn) __releases(ailp->xa_lock)
{
- xfs_log_item_t *dlip = NULL;
- xfs_log_item_t *mlip; /* ptr to minimum lip */
+ xfs_log_item_t *mlip;
xfs_lsn_t tail_lsn;
+ int mlip_changed = 0;
+ int i;
+ LIST_HEAD(tmp);
mlip = xfs_ail_min(ailp);
- if (lip->li_flags & XFS_LI_IN_AIL) {
- dlip = xfs_ail_delete(ailp, lip);
- ASSERT(dlip == lip);
- xfs_trans_ail_cursor_clear(ailp, dlip);
- } else {
- lip->li_flags |= XFS_LI_IN_AIL;
+ for (i = 0; i < nr_items; i++) {
+ struct xfs_log_item *lip = log_items[i];
+ if (lip->li_flags & XFS_LI_IN_AIL) {
+ /* check if we really need to move the item */
+ if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
+ continue;
+
+ xfs_ail_delete(ailp, lip);
+ if (mlip == lip)
+ mlip_changed = 1;
+ } else {
+ lip->li_flags |= XFS_LI_IN_AIL;
+ }
+ lip->li_lsn = lsn;
+ list_add(&lip->li_ail, &tmp);
}
- lip->li_lsn = lsn;
- xfs_ail_insert(ailp, lip);
+ xfs_ail_splice(ailp, &tmp, lsn);
- if (mlip == dlip) {
- mlip = xfs_ail_min(ailp);
- /*
- * It is not safe to access mlip after the AIL lock is
- * dropped, so we must get a copy of li_lsn before we do
- * so. This is especially important on 32-bit platforms
- * where accessing and updating 64-bit values like li_lsn
- * is not atomic.
- */
- tail_lsn = mlip->li_lsn;
- spin_unlock(&ailp->xa_lock);
- xfs_log_move_tail(ailp->xa_mount, tail_lsn);
- } else {
+ if (!mlip_changed) {
spin_unlock(&ailp->xa_lock);
+ return;
}
-
-} /* xfs_trans_update_ail */
+ /*
+ * It is not safe to access mlip after the AIL lock is dropped, so we
+ * must get a copy of li_lsn before we do so. This is especially
+ * important on 32-bit platforms where accessing and updating 64-bit
+ * values like li_lsn is not atomic.
+ */
+ mlip = xfs_ail_min(ailp);
+ tail_lsn = mlip->li_lsn;
+ spin_unlock(&ailp->xa_lock);
+ xfs_log_move_tail(ailp->xa_mount, tail_lsn);
+}
/*
- * Delete the given item from the AIL. It must already be in
- * the AIL.
+ * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL
*
- * Wakeup anyone with an lsn less than item's lsn. If the item
- * we delete in the AIL is the minimum one, update the tail lsn in the
- * log manager.
+ * @xfs_trans_ail_delete_bulk takes an array of log items that all need to
+ * removed from the AIL. The caller is already holding the AIL lock, and done
+ * all the checks necessary to ensure the items passed in via @log_items are
+ * ready for deletion. This includes checking that the items are in the AIL.
*
- * Clear the IN_AIL flag from the item, reset its lsn to 0, and
- * bump the AIL's generation count to indicate that the tree
- * has changed.
+ * For each log item to be removed, unlink it from the AIL, clear the IN_AIL
+ * flag from the item and reset the item's lsn to 0. If we remove the first
+ * item in the AIL, update the log tail to match the new minimum LSN in the
+ * AIL.
*
- * This function must be called with the AIL lock held. The lock
- * is dropped before returning.
+ * This function will not drop the AIL lock until all items are removed from
+ * the AIL to minimise the amount of lock traffic on the AIL. This does not
+ * greatly increase the AIL hold time, but does significantly reduce the amount
+ * of traffic on the lock, especially during IO completion.
+ *
+ * This function must be called with the AIL lock held. The lock is dropped
+ * before returning.
*/
void
-xfs_trans_ail_delete(
- struct xfs_ail *ailp,
- xfs_log_item_t *lip) __releases(ailp->xa_lock)
+xfs_trans_ail_delete_bulk(
+ struct xfs_ail *ailp,
+ struct xfs_log_item **log_items,
+ int nr_items) __releases(ailp->xa_lock)
{
- xfs_log_item_t *dlip;
xfs_log_item_t *mlip;
xfs_lsn_t tail_lsn;
+ int mlip_changed = 0;
+ int i;
- if (lip->li_flags & XFS_LI_IN_AIL) {
- mlip = xfs_ail_min(ailp);
- dlip = xfs_ail_delete(ailp, lip);
- ASSERT(dlip == lip);
- xfs_trans_ail_cursor_clear(ailp, dlip);
-
+ mlip = xfs_ail_min(ailp);
- lip->li_flags &= ~XFS_LI_IN_AIL;
- lip->li_lsn = 0;
+ for (i = 0; i < nr_items; i++) {
+ struct xfs_log_item *lip = log_items[i];
+ if (!(lip->li_flags & XFS_LI_IN_AIL)) {
+ struct xfs_mount *mp = ailp->xa_mount;
- if (mlip == dlip) {
- mlip = xfs_ail_min(ailp);
- /*
- * It is not safe to access mlip after the AIL lock
- * is dropped, so we must get a copy of li_lsn
- * before we do so. This is especially important
- * on 32-bit platforms where accessing and updating
- * 64-bit values like li_lsn is not atomic.
- */
- tail_lsn = mlip ? mlip->li_lsn : 0;
- spin_unlock(&ailp->xa_lock);
- xfs_log_move_tail(ailp->xa_mount, tail_lsn);
- } else {
spin_unlock(&ailp->xa_lock);
+ if (!XFS_FORCED_SHUTDOWN(mp)) {
+ xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
+ "%s: attempting to delete a log item that is not in the AIL",
+ __func__);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ }
+ return;
}
+
+ xfs_ail_delete(ailp, lip);
+ lip->li_flags &= ~XFS_LI_IN_AIL;
+ lip->li_lsn = 0;
+ if (mlip == lip)
+ mlip_changed = 1;
}
- else {
- /*
- * If the file system is not being shutdown, we are in
- * serious trouble if we get to this stage.
- */
- struct xfs_mount *mp = ailp->xa_mount;
+ if (!mlip_changed) {
spin_unlock(&ailp->xa_lock);
- if (!XFS_FORCED_SHUTDOWN(mp)) {
- xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
- "%s: attempting to delete a log item that is not in the AIL",
- __func__);
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- }
+ return;
}
-}
-
+ /*
+ * It is not safe to access mlip after the AIL lock is dropped, so we
+ * must get a copy of li_lsn before we do so. This is especially
+ * important on 32-bit platforms where accessing and updating 64-bit
+ * values like li_lsn is not atomic. It is possible we've emptied the
+ * AIL here, so if that is the case, pass an LSN of 0 to the tail move.
+ */
+ mlip = xfs_ail_min(ailp);
+ tail_lsn = mlip ? mlip->li_lsn : 0;
+ spin_unlock(&ailp->xa_lock);
+ xfs_log_move_tail(ailp->xa_mount, tail_lsn);
+}
/*
* The active item list (AIL) is a doubly linked list of log
@@ -623,16 +646,13 @@ xfs_trans_ail_destroy(
}
/*
- * Insert the given log item into the AIL.
- * We almost always insert at the end of the list, so on inserts
- * we search from the end of the list to find where the
- * new item belongs.
+ * splice the log item list into the AIL at the given LSN.
*/
STATIC void
-xfs_ail_insert(
+xfs_ail_splice(
struct xfs_ail *ailp,
- xfs_log_item_t *lip)
-/* ARGSUSED */
+ struct list_head *list,
+ xfs_lsn_t lsn)
{
xfs_log_item_t *next_lip;
@@ -640,39 +660,33 @@ xfs_ail_insert(
* If the list is empty, just insert the item.
*/
if (list_empty(&ailp->xa_ail)) {
- list_add(&lip->li_ail, &ailp->xa_ail);
+ list_splice(list, &ailp->xa_ail);
return;
}
list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
- if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)
+ if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
break;
}
ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
- (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0));
-
- list_add(&lip->li_ail, &next_lip->li_ail);
+ (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
- xfs_ail_check(ailp, lip);
+ list_splice_init(list, &next_lip->li_ail);
return;
}
/*
* Delete the given item from the AIL. Return a pointer to the item.
*/
-/*ARGSUSED*/
-STATIC xfs_log_item_t *
+STATIC void
xfs_ail_delete(
struct xfs_ail *ailp,
xfs_log_item_t *lip)
-/* ARGSUSED */
{
xfs_ail_check(ailp, lip);
-
list_del(&lip->li_ail);
-
- return lip;
+ xfs_trans_ail_cursor_clear(ailp, lip);
}
/*
@@ -682,7 +696,6 @@ xfs_ail_delete(
STATIC xfs_log_item_t *
xfs_ail_min(
struct xfs_ail *ailp)
-/* ARGSUSED */
{
if (list_empty(&ailp->xa_ail))
return NULL;
@@ -699,7 +712,6 @@ STATIC xfs_log_item_t *
xfs_ail_next(
struct xfs_ail *ailp,
xfs_log_item_t *lip)
-/* ARGSUSED */
{
if (lip->li_ail.next == &ailp->xa_ail)
return NULL;
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index f783d5e..f7590f5 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -69,12 +69,16 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp,
tp->t_flags |= XFS_TRANS_DIRTY;
efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
- next_extent = efip->efi_next_extent;
+ /*
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
+ */
+ next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
ASSERT(next_extent < efip->efi_format.efi_nextents);
extp = &(efip->efi_format.efi_extents[next_extent]);
extp->ext_start = start_block;
extp->ext_len = ext_len;
- efip->efi_next_extent++;
}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 62da86c..35162c2 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -22,15 +22,17 @@ struct xfs_log_item;
struct xfs_log_item_desc;
struct xfs_mount;
struct xfs_trans;
+struct xfs_ail;
+struct xfs_log_vec;
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
int flags);
-void xfs_trans_item_committed(struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn, int aborted);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
+void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
+ xfs_lsn_t commit_lsn, int aborted);
/*
* AIL traversal cursor.
*
@@ -73,12 +75,29 @@ struct xfs_ail {
/*
* From xfs_trans_ail.c
*/
-void xfs_trans_ail_update(struct xfs_ail *ailp,
- struct xfs_log_item *lip, xfs_lsn_t lsn)
- __releases(ailp->xa_lock);
-void xfs_trans_ail_delete(struct xfs_ail *ailp,
- struct xfs_log_item *lip)
- __releases(ailp->xa_lock);
+void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
+ struct xfs_log_item **log_items, int nr_items,
+ xfs_lsn_t lsn) __releases(ailp->xa_lock);
+static inline void
+xfs_trans_ail_update(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn) __releases(ailp->xa_lock)
+{
+ xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
+}
+
+void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
+ struct xfs_log_item **log_items, int nr_items)
+ __releases(ailp->xa_lock);
+static inline void
+xfs_trans_ail_delete(
+ struct xfs_ail *ailp,
+ xfs_log_item_t *lip) __releases(ailp->xa_lock)
+{
+ xfs_trans_ail_delete_bulk(ailp, &lip, 1);
+}
+
void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
void xfs_trans_unlocked_item(struct xfs_ail *,
xfs_log_item_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8e4a63c4..d8e6f8c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -964,29 +964,48 @@ xfs_release(
xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
}
- if (ip->i_d.di_nlink != 0) {
- if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
- ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
- ip->i_delayed_blks > 0)) &&
- (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
- (!(ip->i_d.di_flags &
- (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
+ if (ip->i_d.di_nlink == 0)
+ return 0;
- /*
- * If we can't get the iolock just skip truncating
- * the blocks past EOF because we could deadlock
- * with the mmap_sem otherwise. We'll get another
- * chance to drop them once the last reference to
- * the inode is dropped, so we'll never leak blocks
- * permanently.
- */
- error = xfs_free_eofblocks(mp, ip,
- XFS_FREE_EOF_TRYLOCK);
- if (error)
- return error;
- }
- }
+ if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+ ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
+ ip->i_delayed_blks > 0)) &&
+ (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
+ (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
+ /*
+ * If we can't get the iolock just skip truncating the blocks
+ * past EOF because we could deadlock with the mmap_sem
+ * otherwise. We'll get another chance to drop them once the
+ * last reference to the inode is dropped, so we'll never leak
+ * blocks permanently.
+ *
+ * Further, check if the inode is being opened, written and
+ * closed frequently and we have delayed allocation blocks
+ * oustanding (e.g. streaming writes from the NFS server),
+ * truncating the blocks past EOF will cause fragmentation to
+ * occur.
+ *
+ * In this case don't do the truncation, either, but we have to
+ * be careful how we detect this case. Blocks beyond EOF show
+ * up as i_delayed_blks even when the inode is clean, so we
+ * need to truncate them away first before checking for a dirty
+ * release. Hence on the first dirty close we will still remove
+ * the speculative allocation, but after that we will leave it
+ * in place.
+ */
+ if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
+ return 0;
+
+ error = xfs_free_eofblocks(mp, ip,
+ XFS_FREE_EOF_TRYLOCK);
+ if (error)
+ return error;
+
+ /* delalloc blocks after truncation means it really is dirty */
+ if (ip->i_delayed_blks)
+ xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
+ }
return 0;
}
diff --git a/include/linux/bfin_mac.h b/include/linux/bfin_mac.h
index 904dec7..a69554e 100644
--- a/include/linux/bfin_mac.h
+++ b/include/linux/bfin_mac.h
@@ -24,6 +24,7 @@ struct bfin_mii_bus_platform_data {
const unsigned short *mac_peripherals;
int phy_mode;
unsigned int phy_mask;
+ unsigned short vlan1_mask, vlan2_mask;
};
#endif
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index a90b389..1c70028 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -44,34 +44,24 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
extern int ddebug_remove_module(const char *mod_name);
#define dynamic_pr_debug(fmt, ...) do { \
- __label__ do_printk; \
- __label__ out; \
static struct _ddebug descriptor \
__used \
__attribute__((section("__verbose"), aligned(8))) = \
{ KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
_DPRINTK_FLAGS_DEFAULT }; \
- JUMP_LABEL(&descriptor.enabled, do_printk); \
- goto out; \
-do_printk: \
- printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
-out: ; \
+ if (unlikely(descriptor.enabled)) \
+ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
} while (0)
#define dynamic_dev_dbg(dev, fmt, ...) do { \
- __label__ do_printk; \
- __label__ out; \
static struct _ddebug descriptor \
__used \
__attribute__((section("__verbose"), aligned(8))) = \
{ KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
_DPRINTK_FLAGS_DEFAULT }; \
- JUMP_LABEL(&descriptor.enabled, do_printk); \
- goto out; \
-do_printk: \
- dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \
-out: ; \
+ if (unlikely(descriptor.enabled)) \
+ dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \
} while (0)
#else
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f16a010..bec8b82 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -48,8 +48,10 @@ extern int eth_validate_addr(struct net_device *dev);
-extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count);
+extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
+ unsigned int rxqs);
#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
+#define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
/**
* is_zero_ether_addr - Determine if give Ethernet address is all zeros.
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 6ce1bca..65990ef 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -724,21 +724,30 @@ struct ext3_dir_entry_2 {
~EXT3_DIR_ROUND)
#define EXT3_MAX_REC_LEN ((1<<16)-1)
+/*
+ * Tests against MAX_REC_LEN etc were put in place for 64k block
+ * sizes; if that is not possible on this arch, we can skip
+ * those tests and speed things up.
+ */
static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
{
unsigned len = le16_to_cpu(dlen);
+#if (PAGE_CACHE_SIZE >= 65536)
if (len == EXT3_MAX_REC_LEN)
return 1 << 16;
+#endif
return len;
}
static inline __le16 ext3_rec_len_to_disk(unsigned len)
{
+#if (PAGE_CACHE_SIZE >= 65536)
if (len == (1 << 16))
return cpu_to_le16(EXT3_MAX_REC_LEN);
else if (len > (1 << 16))
BUG();
+#endif
return cpu_to_le16(len);
}
@@ -856,6 +865,7 @@ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
extern void ext3_init_block_alloc_info(struct inode *);
extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv);
+extern int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range);
/* dir.c */
extern int ext3_check_dir_entry(const char *, struct inode *,
diff --git a/include/linux/fec.h b/include/linux/fec.h
index 5d3523d..bcff455 100644
--- a/include/linux/fec.h
+++ b/include/linux/fec.h
@@ -3,6 +3,8 @@
* Copyright (c) 2009 Orex Computed Radiography
* Baruch Siach <baruch@tkos.co.il>
*
+ * Copyright (C) 2010 Freescale Semiconductor, Inc.
+ *
* Header file for the FEC platform data
*
* This program is free software; you can redistribute it and/or modify
@@ -16,6 +18,7 @@
struct fec_platform_data {
phy_interface_t phy;
+ unsigned char mac[ETH_ALEN];
};
#endif
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index f7e73c3..dd3f201 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -103,7 +103,7 @@ struct __fdb_entry {
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-typedef int (*br_should_route_hook_t)(struct sk_buff *skb);
+typedef int br_should_route_hook_t(struct sk_buff *skb);
extern br_should_route_hook_t __rcu *br_should_route_hook;
#endif
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 2ae86aa..27e79c2 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -94,7 +94,7 @@ extern void jbd2_free(void *ptr, size_t size);
*
* This is an opaque datatype.
**/
-typedef struct handle_s handle_t; /* Atomic operation type */
+typedef struct jbd2_journal_handle handle_t; /* Atomic operation type */
/**
@@ -416,7 +416,7 @@ struct jbd2_revoke_table_s;
* in so it can be fixed later.
*/
-struct handle_s
+struct jbd2_journal_handle
{
/* Which compound transaction is this update a part of? */
transaction_t *h_transaction;
@@ -1158,6 +1158,22 @@ static inline void jbd2_free_handle(handle_t *handle)
kmem_cache_free(jbd2_handle_cache, handle);
}
+/*
+ * jbd2_inode management (optional, for those file systems that want to use
+ * dynamically allocated jbd2_inode structures)
+ */
+extern struct kmem_cache *jbd2_inode_cache;
+
+static inline struct jbd2_inode *jbd2_alloc_inode(gfp_t gfp_flags)
+{
+ return kmem_cache_alloc(jbd2_inode_cache, gfp_flags);
+}
+
+static inline void jbd2_free_inode(struct jbd2_inode *jinode)
+{
+ kmem_cache_free(jbd2_inode_cache, jinode);
+}
+
/* Primary revoke support */
#define JOURNAL_REVOKE_DEFAULT_HASH 256
extern int jbd2_journal_init_revoke(journal_t *, int);
diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h
index 34b2b7f..257d377 100644
--- a/include/linux/lockd/debug.h
+++ b/include/linux/lockd/debug.h
@@ -44,14 +44,4 @@
#define NLMDBG_XDR 0x0100
#define NLMDBG_ALL 0x7fff
-
-/*
- * Support for printing NLM cookies in dprintk()
- */
-#ifdef RPC_DEBUG
-struct nlm_cookie;
-/* Call this function with the BKL held (it uses a static buffer) */
-extern const char *nlmdbg_cookie2a(const struct nlm_cookie *);
-#endif
-
#endif /* LINUX_LOCKD_DEBUG_H */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 2dee05e..ff9abff 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -202,9 +202,9 @@ extern u32 nsm_local_state;
* Lockd client functions
*/
struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
-void nlm_release_call(struct nlm_rqst *);
int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+void nlmclnt_release_call(struct nlm_rqst *);
struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
void nlmclnt_finish_block(struct nlm_wait *block);
int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
@@ -223,13 +223,14 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
const u32 version,
const char *hostname,
int noresvport);
+void nlmclnt_release_host(struct nlm_host *);
struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
const char *hostname,
const size_t hostname_len);
+void nlmsvc_release_host(struct nlm_host *);
struct rpc_clnt * nlm_bind_host(struct nlm_host *);
void nlm_rebind_host(struct nlm_host *);
struct nlm_host * nlm_get_host(struct nlm_host *);
-void nlm_release_host(struct nlm_host *);
void nlm_shutdown_hosts(void);
void nlm_host_rebooted(const struct nlm_reboot *);
@@ -267,6 +268,7 @@ unsigned long nlmsvc_retry_blocked(void);
void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
nlm_host_match_fn_t match);
void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
+void nlmsvc_release_call(struct nlm_rqst *);
/*
* File handling for the server personality
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 54cbbac..5525d37 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -18,6 +18,17 @@ struct mb_cache_entry {
} e_index;
};
+struct mb_cache {
+ struct list_head c_cache_list;
+ const char *c_name;
+ atomic_t c_entry_count;
+ int c_max_entries;
+ int c_bucket_bits;
+ struct kmem_cache *c_entry_cache;
+ struct list_head *c_block_hash;
+ struct list_head *c_index_hash;
+};
+
/* Functions on caches */
struct mb_cache *mb_cache_create(const char *, int);
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 085f041..8e70310 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -57,6 +57,10 @@
* is configured in 4-bit mode.
*/
#define TMIO_MMC_BLKSZ_2BYTES (1 << 1)
+/*
+ * Some controllers can support SDIO IRQ signalling.
+ */
+#define TMIO_MMC_SDIO_IRQ (1 << 2)
int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
@@ -66,6 +70,7 @@ void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
struct tmio_mmc_dma {
void *chan_priv_tx;
void *chan_priv_rx;
+ int alignment_shift;
};
/*
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
new file mode 100644
index 0000000..16b0261
--- /dev/null
+++ b/include/linux/mmc/dw_mmc.h
@@ -0,0 +1,217 @@
+/*
+ * Synopsys DesignWare Multimedia Card Interface driver
+ * (Based on NXP driver for lpc 31xx)
+ *
+ * Copyright (C) 2009 NXP Semiconductors
+ * Copyright (C) 2009, 2010 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _LINUX_MMC_DW_MMC_H_
+#define _LINUX_MMC_DW_MMC_H_
+
+#define MAX_MCI_SLOTS 2
+
+enum dw_mci_state {
+ STATE_IDLE = 0,
+ STATE_SENDING_CMD,
+ STATE_SENDING_DATA,
+ STATE_DATA_BUSY,
+ STATE_SENDING_STOP,
+ STATE_DATA_ERROR,
+};
+
+enum {
+ EVENT_CMD_COMPLETE = 0,
+ EVENT_XFER_COMPLETE,
+ EVENT_DATA_COMPLETE,
+ EVENT_DATA_ERROR,
+ EVENT_XFER_ERROR
+};
+
+struct mmc_data;
+
+/**
+ * struct dw_mci - MMC controller state shared between all slots
+ * @lock: Spinlock protecting the queue and associated data.
+ * @regs: Pointer to MMIO registers.
+ * @sg: Scatterlist entry currently being processed by PIO code, if any.
+ * @pio_offset: Offset into the current scatterlist entry.
+ * @cur_slot: The slot which is currently using the controller.
+ * @mrq: The request currently being processed on @cur_slot,
+ * or NULL if the controller is idle.
+ * @cmd: The command currently being sent to the card, or NULL.
+ * @data: The data currently being transferred, or NULL if no data
+ * transfer is in progress.
+ * @use_dma: Whether DMA channel is initialized or not.
+ * @sg_dma: Bus address of DMA buffer.
+ * @sg_cpu: Virtual address of DMA buffer.
+ * @dma_ops: Pointer to platform-specific DMA callbacks.
+ * @cmd_status: Snapshot of SR taken upon completion of the current
+ * command. Only valid when EVENT_CMD_COMPLETE is pending.
+ * @data_status: Snapshot of SR taken upon completion of the current
+ * data transfer. Only valid when EVENT_DATA_COMPLETE or
+ * EVENT_DATA_ERROR is pending.
+ * @stop_cmdr: Value to be loaded into CMDR when the stop command is
+ * to be sent.
+ * @dir_status: Direction of current transfer.
+ * @tasklet: Tasklet running the request state machine.
+ * @card_tasklet: Tasklet handling card detect.
+ * @pending_events: Bitmask of events flagged by the interrupt handler
+ * to be processed by the tasklet.
+ * @completed_events: Bitmask of events which the state machine has
+ * processed.
+ * @state: Tasklet state.
+ * @queue: List of slots waiting for access to the controller.
+ * @bus_hz: The rate of @mck in Hz. This forms the basis for MMC bus
+ * rate and timeout calculations.
+ * @current_speed: Configured rate of the controller.
+ * @num_slots: Number of slots available.
+ * @pdev: Platform device associated with the MMC controller.
+ * @pdata: Platform data associated with the MMC controller.
+ * @slot: Slots sharing this MMC controller.
+ * @data_shift: log2 of FIFO item size.
+ * @push_data: Pointer to FIFO push function.
+ * @pull_data: Pointer to FIFO pull function.
+ * @quirks: Set of quirks that apply to specific versions of the IP.
+ *
+ * Locking
+ * =======
+ *
+ * @lock is a softirq-safe spinlock protecting @queue as well as
+ * @cur_slot, @mrq and @state. These must always be updated
+ * at the same time while holding @lock.
+ *
+ * The @mrq field of struct dw_mci_slot is also protected by @lock,
+ * and must always be written at the same time as the slot is added to
+ * @queue.
+ *
+ * @pending_events and @completed_events are accessed using atomic bit
+ * operations, so they don't need any locking.
+ *
+ * None of the fields touched by the interrupt handler need any
+ * locking. However, ordering is important: Before EVENT_DATA_ERROR or
+ * EVENT_DATA_COMPLETE is set in @pending_events, all data-related
+ * interrupts must be disabled and @data_status updated with a
+ * snapshot of SR. Similarly, before EVENT_CMD_COMPLETE is set, the
+ * CMDRDY interupt must be disabled and @cmd_status updated with a
+ * snapshot of SR, and before EVENT_XFER_COMPLETE can be set, the
+ * bytes_xfered field of @data must be written. This is ensured by
+ * using barriers.
+ */
+struct dw_mci {
+ spinlock_t lock;
+ void __iomem *regs;
+
+ struct scatterlist *sg;
+ unsigned int pio_offset;
+
+ struct dw_mci_slot *cur_slot;
+ struct mmc_request *mrq;
+ struct mmc_command *cmd;
+ struct mmc_data *data;
+
+ /* DMA interface members*/
+ int use_dma;
+
+ dma_addr_t sg_dma;
+ void *sg_cpu;
+ struct dw_mci_dma_ops *dma_ops;
+#ifdef CONFIG_MMC_DW_IDMAC
+ unsigned int ring_size;
+#else
+ struct dw_mci_dma_data *dma_data;
+#endif
+ u32 cmd_status;
+ u32 data_status;
+ u32 stop_cmdr;
+ u32 dir_status;
+ struct tasklet_struct tasklet;
+ struct tasklet_struct card_tasklet;
+ unsigned long pending_events;
+ unsigned long completed_events;
+ enum dw_mci_state state;
+ struct list_head queue;
+
+ u32 bus_hz;
+ u32 current_speed;
+ u32 num_slots;
+ struct platform_device *pdev;
+ struct dw_mci_board *pdata;
+ struct dw_mci_slot *slot[MAX_MCI_SLOTS];
+
+ /* FIFO push and pull */
+ int data_shift;
+ void (*push_data)(struct dw_mci *host, void *buf, int cnt);
+ void (*pull_data)(struct dw_mci *host, void *buf, int cnt);
+
+ /* Workaround flags */
+ u32 quirks;
+};
+
+/* DMA ops for Internal/External DMAC interface */
+struct dw_mci_dma_ops {
+ /* DMA Ops */
+ int (*init)(struct dw_mci *host);
+ void (*start)(struct dw_mci *host, unsigned int sg_len);
+ void (*complete)(struct dw_mci *host);
+ void (*stop)(struct dw_mci *host);
+ void (*cleanup)(struct dw_mci *host);
+ void (*exit)(struct dw_mci *host);
+};
+
+/* IP Quirks/flags. */
+/* No special quirks or flags to cater for */
+#define DW_MCI_QUIRK_NONE 0
+/* DTO fix for command transmission with IDMAC configured */
+#define DW_MCI_QUIRK_IDMAC_DTO 1
+/* delay needed between retries on some 2.11a implementations */
+#define DW_MCI_QUIRK_RETRY_DELAY 2
+/* High Speed Capable - Supports HS cards (upto 50MHz) */
+#define DW_MCI_QUIRK_HIGHSPEED 4
+
+
+struct dma_pdata;
+
+struct block_settings {
+ unsigned short max_segs; /* see blk_queue_max_segments */
+ unsigned int max_blk_size; /* maximum size of one mmc block */
+ unsigned int max_blk_count; /* maximum number of blocks in one req*/
+ unsigned int max_req_size; /* maximum number of bytes in one req*/
+ unsigned int max_seg_size; /* see blk_queue_max_segment_size */
+};
+
+/* Board platform data */
+struct dw_mci_board {
+ u32 num_slots;
+
+ u32 quirks; /* Workaround / Quirk flags */
+ unsigned int bus_hz; /* Bus speed */
+
+ /* delay in mS before detecting cards after interrupt */
+ u32 detect_delay_ms;
+
+ int (*init)(u32 slot_id, irq_handler_t , void *);
+ int (*get_ro)(u32 slot_id);
+ int (*get_cd)(u32 slot_id);
+ int (*get_ocr)(u32 slot_id);
+ int (*get_bus_wd)(u32 slot_id);
+ /*
+ * Enable power to selected slot and set voltage to desired level.
+ * Voltage levels are specified using MMC_VDD_xxx defines defined
+ * in linux/mmc/host.h file.
+ */
+ void (*setpower)(u32 slot_id, u32 volt);
+ void (*exit)(u32 slot_id);
+ void (*select_slot)(u32 slot_id);
+
+ struct dw_mci_dma_ops *dma_ops;
+ struct dma_pdata *data;
+ struct block_settings *blk_settings;
+};
+
+#endif /* _LINUX_MMC_DW_MMC_H_ */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 30f6fad..bcb793e 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -131,6 +131,9 @@ struct mmc_host {
unsigned int f_max;
unsigned int f_init;
u32 ocr_avail;
+ u32 ocr_avail_sdio; /* SDIO-specific OCR */
+ u32 ocr_avail_sd; /* SD-specific OCR */
+ u32 ocr_avail_mmc; /* MMC-specific OCR */
struct notifier_block pm_notify;
#define MMC_VDD_165_195 0x00000080 /* VDD voltage 1.65 - 1.95 */
@@ -169,9 +172,20 @@ struct mmc_host {
#define MMC_CAP_1_2V_DDR (1 << 12) /* can support */
/* DDR mode at 1.2V */
#define MMC_CAP_POWER_OFF_CARD (1 << 13) /* Can power off after boot */
+#define MMC_CAP_BUS_WIDTH_TEST (1 << 14) /* CMD14/CMD19 bus width ok */
mmc_pm_flag_t pm_caps; /* supported pm features */
+#ifdef CONFIG_MMC_CLKGATE
+ int clk_requests; /* internal reference counter */
+ unsigned int clk_delay; /* number of MCI clk hold cycles */
+ bool clk_gated; /* clock gated */
+ struct work_struct clk_gate_work; /* delayed clock gate */
+ unsigned int clk_old; /* old clock value cache */
+ spinlock_t clk_lock; /* lock for clk fields */
+ struct mutex clk_gate_mutex; /* mutex for clock gating */
+#endif
+
/* host specific block data */
unsigned int max_seg_size; /* see blk_queue_max_segment_size */
unsigned short max_segs; /* see blk_queue_max_segments */
@@ -307,5 +321,10 @@ static inline int mmc_card_is_removable(struct mmc_host *host)
return !(host->caps & MMC_CAP_NONREMOVABLE) && mmc_assume_removable;
}
+static inline int mmc_card_is_powered_resumed(struct mmc_host *host)
+{
+ return host->pm_flags & MMC_PM_KEEP_POWER;
+}
+
#endif
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 956fbd87..612301f 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -40,7 +40,9 @@
#define MMC_READ_DAT_UNTIL_STOP 11 /* adtc [31:0] dadr R1 */
#define MMC_STOP_TRANSMISSION 12 /* ac R1b */
#define MMC_SEND_STATUS 13 /* ac [31:16] RCA R1 */
+#define MMC_BUS_TEST_R 14 /* adtc R1 */
#define MMC_GO_INACTIVE_STATE 15 /* ac [31:16] RCA */
+#define MMC_BUS_TEST_W 19 /* adtc R1 */
#define MMC_SPI_READ_OCR 58 /* spi spi_R3 */
#define MMC_SPI_CRC_ON_OFF 59 /* spi [0:0] flag spi_R1 */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 1fdc673..83bd9f7 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -83,6 +83,8 @@ struct sdhci_host {
#define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 (1<<28)
/* Controller doesn't have HISPD bit field in HI-SPEED SD card */
#define SDHCI_QUIRK_NO_HISPD_BIT (1<<29)
+/* Controller treats ADMA descriptors with length 0000h incorrectly */
+#define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC (1<<30)
int irq; /* Device IRQ */
void __iomem *ioaddr; /* Mapped address */
@@ -139,6 +141,10 @@ struct sdhci_host {
unsigned int caps; /* Alternative capabilities */
+ unsigned int ocr_avail_sdio; /* OCR bit masks */
+ unsigned int ocr_avail_sd;
+ unsigned int ocr_avail_mmc;
+
unsigned long private[0] ____cacheline_aligned;
};
#endif /* __SDHCI_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0f6b1c9..be4957c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2191,11 +2191,15 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
extern void ether_setup(struct net_device *dev);
/* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
void (*setup)(struct net_device *),
- unsigned int queue_count);
+ unsigned int txqs, unsigned int rxqs);
#define alloc_netdev(sizeof_priv, name, setup) \
- alloc_netdev_mq(sizeof_priv, name, setup, 1)
+ alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1)
+
+#define alloc_netdev_mq(sizeof_priv, name, setup, count) \
+ alloc_netdev_mqs(sizeof_priv, name, setup, count, count)
+
extern int register_netdev(struct net_device *dev);
extern void unregister_netdev(struct net_device *dev);
@@ -2303,7 +2307,7 @@ unsigned long netdev_fix_features(unsigned long features, const char *name);
void netif_stacked_transfer_operstate(const struct net_device *rootdev,
struct net_device *dev);
-int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev);
+int netif_skb_features(struct sk_buff *skb);
static inline int net_gso_ok(int features, int gso_type)
{
@@ -2317,16 +2321,10 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
(!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
}
-static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+static inline int netif_needs_gso(struct sk_buff *skb, int features)
{
- if (skb_is_gso(skb)) {
- int features = netif_get_vlan_features(skb, dev);
-
- return (!skb_gso_ok(skb, features) ||
- unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
- }
-
- return 0;
+ return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
+ unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
}
static inline void netif_set_gso_max_size(struct net_device *dev,
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 742bec0..6712e71 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -472,7 +472,7 @@ extern void xt_free_table_info(struct xt_table_info *info);
* necessary for reading the counters.
*/
struct xt_info_lock {
- spinlock_t lock;
+ seqlock_t lock;
unsigned char readers;
};
DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
@@ -497,7 +497,7 @@ static inline void xt_info_rdlock_bh(void)
local_bh_disable();
lock = &__get_cpu_var(xt_info_locks);
if (likely(!lock->readers++))
- spin_lock(&lock->lock);
+ write_seqlock(&lock->lock);
}
static inline void xt_info_rdunlock_bh(void)
@@ -505,7 +505,7 @@ static inline void xt_info_rdunlock_bh(void)
struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
if (likely(!--lock->readers))
- spin_unlock(&lock->lock);
+ write_sequnlock(&lock->lock);
local_bh_enable();
}
@@ -516,12 +516,12 @@ static inline void xt_info_rdunlock_bh(void)
*/
static inline void xt_info_wrlock(unsigned int cpu)
{
- spin_lock(&per_cpu(xt_info_locks, cpu).lock);
+ write_seqlock(&per_cpu(xt_info_locks, cpu).lock);
}
static inline void xt_info_wrunlock(unsigned int cpu)
{
- spin_unlock(&per_cpu(xt_info_locks, cpu).lock);
+ write_sequnlock(&per_cpu(xt_info_locks, cpu).lock);
}
/*
diff --git a/include/linux/nfs3.h b/include/linux/nfs3.h
index ac33806..6ccfe3b 100644
--- a/include/linux/nfs3.h
+++ b/include/linux/nfs3.h
@@ -11,6 +11,9 @@
#define NFS3_MAXGROUPS 16
#define NFS3_FHSIZE 64
#define NFS3_COOKIESIZE 4
+#define NFS3_CREATEVERFSIZE 8
+#define NFS3_COOKIEVERFSIZE 8
+#define NFS3_WRITEVERFSIZE 8
#define NFS3_FIFO_DEV (-1)
#define NFS3MODE_FMT 0170000
#define NFS3MODE_DIR 0040000
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 4925b22..9b46300 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -111,9 +111,13 @@
#define EXCHGID4_FLAG_SUPP_MOVED_REFER 0x00000001
#define EXCHGID4_FLAG_SUPP_MOVED_MIGR 0x00000002
+#define EXCHGID4_FLAG_BIND_PRINC_STATEID 0x00000100
+
#define EXCHGID4_FLAG_USE_NON_PNFS 0x00010000
#define EXCHGID4_FLAG_USE_PNFS_MDS 0x00020000
#define EXCHGID4_FLAG_USE_PNFS_DS 0x00040000
+#define EXCHGID4_FLAG_MASK_PNFS 0x00070000
+
#define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000
#define EXCHGID4_FLAG_CONFIRMED_R 0x80000000
/*
@@ -121,8 +125,8 @@
* they're set in the argument or response, have separate
* invalid flag masks for arg (_A) and resp (_R).
*/
-#define EXCHGID4_FLAG_MASK_A 0x40070003
-#define EXCHGID4_FLAG_MASK_R 0x80070003
+#define EXCHGID4_FLAG_MASK_A 0x40070103
+#define EXCHGID4_FLAG_MASK_R 0x80070103
#define SEQ4_STATUS_CB_PATH_DOWN 0x00000001
#define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 452d964..b197563 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -47,11 +47,6 @@ struct nfs_client {
u64 cl_clientid; /* constant */
unsigned long cl_state;
- struct rb_root cl_openowner_id;
- struct rb_root cl_lockowner_id;
-
- struct list_head cl_delegations;
- struct rb_root cl_state_owners;
spinlock_t cl_lock;
unsigned long cl_lease_time;
@@ -71,6 +66,7 @@ struct nfs_client {
*/
char cl_ipaddr[48];
unsigned char cl_id_uniquifier;
+ u32 cl_cb_ident; /* v4.0 callback identifier */
const struct nfs4_minor_version_ops *cl_mvops;
#endif /* CONFIG_NFS_V4 */
@@ -148,7 +144,14 @@ struct nfs_server {
that are supported on this
filesystem */
struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */
+ struct rpc_wait_queue roc_rpcwaitq;
+
+ /* the following fields are protected by nfs_client->cl_lock */
+ struct rb_root state_owners;
+ struct rb_root openowner_id;
+ struct rb_root lockowner_id;
#endif
+ struct list_head delegations;
void (*destroy)(struct nfs_server *);
atomic_t active; /* Keep trace of any activity to this server */
@@ -196,6 +199,7 @@ struct nfs4_slot_table {
* op for dynamic resizing */
int target_max_slots; /* Set by CB_RECALL_SLOT as
* the new max_slots */
+ struct completion complete;
};
static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
@@ -212,7 +216,6 @@ struct nfs4_session {
unsigned long session_state;
u32 hash_alg;
u32 ssv_len;
- struct completion complete;
/* The fore and back channel */
struct nfs4_channel_attrs fc_attrs;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 80f0719..b006857 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -208,6 +208,7 @@ struct nfs4_layoutget_args {
struct inode *inode;
struct nfs_open_context *ctx;
struct nfs4_sequence_args seq_args;
+ nfs4_stateid stateid;
};
struct nfs4_layoutget_res {
@@ -223,7 +224,6 @@ struct nfs4_layoutget {
struct nfs4_layoutget_args args;
struct nfs4_layoutget_res res;
struct pnfs_layout_segment **lsegpp;
- int status;
};
struct nfs4_getdeviceinfo_args {
@@ -317,6 +317,7 @@ struct nfs_closeres {
struct nfs_lowner {
__u64 clientid;
__u64 id;
+ dev_t s_dev;
};
struct nfs_lock_args {
@@ -484,6 +485,7 @@ struct nfs_entry {
struct nfs_fh * fh;
struct nfs_fattr * fattr;
unsigned char d_type;
+ struct nfs_server * server;
};
/*
@@ -1089,7 +1091,7 @@ struct nfs_rpc_ops {
int (*pathconf) (struct nfs_server *, struct nfs_fh *,
struct nfs_pathconf *);
int (*set_capabilities)(struct nfs_server *, struct nfs_fh *);
- __be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int plus);
+ int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
void (*read_setup) (struct nfs_read_data *, struct rpc_message *);
int (*read_done) (struct rpc_task *, struct nfs_read_data *);
void (*write_setup) (struct nfs_write_data *, struct rpc_message *);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index cb845c16..ab47732 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -518,6 +518,7 @@
#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603
+#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
#define PCI_DEVICE_ID_AMD_SCSI 0x2020
@@ -1650,6 +1651,11 @@
#define PCI_DEVICE_ID_O2_6836 0x6836
#define PCI_DEVICE_ID_O2_6812 0x6872
#define PCI_DEVICE_ID_O2_6933 0x6933
+#define PCI_DEVICE_ID_O2_8120 0x8120
+#define PCI_DEVICE_ID_O2_8220 0x8220
+#define PCI_DEVICE_ID_O2_8221 0x8221
+#define PCI_DEVICE_ID_O2_8320 0x8320
+#define PCI_DEVICE_ID_O2_8321 0x8321
#define PCI_VENDOR_ID_3DFX 0x121a
#define PCI_DEVICE_ID_3DFX_VOODOO 0x0001
@@ -2363,6 +2369,8 @@
#define PCI_DEVICE_ID_JMICRON_JMB38X_SD 0x2381
#define PCI_DEVICE_ID_JMICRON_JMB38X_MMC 0x2382
#define PCI_DEVICE_ID_JMICRON_JMB38X_MS 0x2383
+#define PCI_DEVICE_ID_JMICRON_JMB388_SD 0x2391
+#define PCI_DEVICE_ID_JMICRON_JMB388_ESD 0x2392
#define PCI_VENDOR_ID_KORENIX 0x1982
#define PCI_DEVICE_ID_KORENIX_JETCARDF0 0x1600
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index d1a9193..223b14c 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -31,8 +31,9 @@ static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
#define quota_error(sb, fmt, args...) \
__quota_error((sb), __func__, fmt , ## args)
-extern void __quota_error(struct super_block *sb, const char *func,
- const char *fmt, ...);
+extern __attribute__((format (printf, 3, 4)))
+void __quota_error(struct super_block *sb, const char *func,
+ const char *fmt, ...);
/*
* declaration of quota_function calls in kernel.
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 14dbc83..3c995b4 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -107,12 +107,17 @@ extern int rtc_year_days(unsigned int day, unsigned int month, unsigned int year
extern int rtc_valid_tm(struct rtc_time *tm);
extern int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time);
extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm);
+ktime_t rtc_tm_to_ktime(struct rtc_time tm);
+struct rtc_time rtc_ktime_to_tm(ktime_t kt);
+
#include <linux/device.h>
#include <linux/seq_file.h>
#include <linux/cdev.h>
#include <linux/poll.h>
#include <linux/mutex.h>
+#include <linux/timerqueue.h>
+#include <linux/workqueue.h>
extern struct class *rtc_class;
@@ -151,7 +156,19 @@ struct rtc_class_ops {
};
#define RTC_DEVICE_NAME_SIZE 20
-struct rtc_task;
+typedef struct rtc_task {
+ void (*func)(void *private_data);
+ void *private_data;
+} rtc_task_t;
+
+
+struct rtc_timer {
+ struct rtc_task task;
+ struct timerqueue_node node;
+ ktime_t period;
+ int enabled;
+};
+
/* flags */
#define RTC_DEV_BUSY 0
@@ -179,16 +196,13 @@ struct rtc_device
spinlock_t irq_task_lock;
int irq_freq;
int max_user_freq;
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
- struct work_struct uie_task;
- struct timer_list uie_timer;
- /* Those fields are protected by rtc->irq_lock */
- unsigned int oldsecs;
- unsigned int uie_irq_active:1;
- unsigned int stop_uie_polling:1;
- unsigned int uie_task_active:1;
- unsigned int uie_timer_active:1;
-#endif
+
+ struct timerqueue_head timerqueue;
+ struct rtc_timer aie_timer;
+ struct rtc_timer uie_rtctimer;
+ struct hrtimer pie_timer; /* sub second exp, so needs hrtimer */
+ int pie_enabled;
+ struct work_struct irqwork;
};
#define to_rtc_device(d) container_of(d, struct rtc_device, dev)
@@ -224,15 +238,22 @@ extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
unsigned int enabled);
-typedef struct rtc_task {
- void (*func)(void *private_data);
- void *private_data;
-} rtc_task_t;
+void rtc_aie_update_irq(void *private);
+void rtc_uie_update_irq(void *private);
+enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer);
int rtc_register(rtc_task_t *task);
int rtc_unregister(rtc_task_t *task);
int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
+void rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
+void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
+void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data);
+int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer,
+ ktime_t expires, ktime_t period);
+int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer);
+void rtc_timer_do_work(struct work_struct *work);
+
static inline bool is_leap_year(unsigned int year)
{
return (!(year % 4) && (year % 100)) || !(year % 400);
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index b202475..8521067 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -110,9 +110,9 @@ struct rpc_credops {
__be32 * (*crmarshal)(struct rpc_task *, __be32 *);
int (*crrefresh)(struct rpc_task *);
__be32 * (*crvalidate)(struct rpc_task *, __be32 *);
- int (*crwrap_req)(struct rpc_task *, kxdrproc_t,
+ int (*crwrap_req)(struct rpc_task *, kxdreproc_t,
void *, __be32 *, void *);
- int (*crunwrap_resp)(struct rpc_task *, kxdrproc_t,
+ int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
void *, __be32 *, void *);
};
@@ -139,8 +139,8 @@ struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *
void put_rpccred(struct rpc_cred *);
__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *);
__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *);
-int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj);
-int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj);
+int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
+int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
int rpcauth_refreshcred(struct rpc_task *);
void rpcauth_invalcred(struct rpc_task *);
int rpcauth_uptodatecred(struct rpc_task *);
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 7c91260..c50b458 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -43,10 +43,18 @@ int bc_send(struct rpc_rqst *req);
*/
static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
{
- if (rqstp->rq_server->bc_xprt)
+ if (rqstp->rq_server->sv_bc_xprt)
return 1;
return 0;
}
+static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
+{
+ if (svc_is_backchannel(rqstp))
+ return (struct nfs4_sessionid *)
+ rqstp->rq_server->sv_bc_xprt->xpt_bc_sid;
+ return NULL;
+}
+
#else /* CONFIG_NFS_V4_1 */
static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
unsigned int min_reqs)
@@ -59,6 +67,11 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
return 0;
}
+static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
+{
+ return NULL;
+}
+
static inline void xprt_free_bc_request(struct rpc_rqst *req)
{
}
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index a5a55f2..ef9476a 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -89,8 +89,8 @@ struct rpc_version {
*/
struct rpc_procinfo {
u32 p_proc; /* RPC procedure number */
- kxdrproc_t p_encode; /* XDR encode function */
- kxdrproc_t p_decode; /* XDR decode function */
+ kxdreproc_t p_encode; /* XDR encode function */
+ kxdrdproc_t p_decode; /* XDR decode function */
unsigned int p_arglen; /* argument hdr length (u32) */
unsigned int p_replen; /* reply hdr length (u32) */
unsigned int p_count; /* call count */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5a3085b..c81d4d8 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -99,7 +99,7 @@ struct svc_serv {
spinlock_t sv_cb_lock; /* protects the svc_cb_list */
wait_queue_head_t sv_cb_waitq; /* sleep here if there are no
* entries in the svc_cb_list */
- struct svc_xprt *bc_xprt;
+ struct svc_xprt *sv_bc_xprt; /* callback on fore channel */
#endif /* CONFIG_NFS_V4_1 */
};
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index aea0d438..357da5e 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -78,6 +78,7 @@ struct svc_xprt {
size_t xpt_remotelen; /* length of address */
struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */
struct list_head xpt_users; /* callbacks on free */
+ void *xpt_bc_sid; /* back channel session ID */
struct net *xpt_net;
};
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 498ab93..fc84b7a 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -33,8 +33,8 @@ struct xdr_netobj {
};
/*
- * This is the generic XDR function. rqstp is either a rpc_rqst (client
- * side) or svc_rqst pointer (server side).
+ * This is the legacy generic XDR function. rqstp is either a rpc_rqst
+ * (client side) or svc_rqst pointer (server side).
* Encode functions always assume there's enough room in the buffer.
*/
typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
@@ -201,14 +201,22 @@ struct xdr_stream {
__be32 *end; /* end of available buffer space */
struct kvec *iov; /* pointer to the current kvec */
+ struct kvec scratch; /* Scratch buffer */
+ struct page **page_ptr; /* pointer to the current page */
};
+/*
+ * These are the xdr_stream style generic XDR encode and decode functions.
+ */
+typedef void (*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len);
extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d3e4f87..c681461 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -32,7 +32,7 @@ struct tracepoint {
int state; /* State. */
void (*regfunc)(void);
void (*unregfunc)(void);
- struct tracepoint_func *funcs;
+ struct tracepoint_func __rcu *funcs;
} __attribute__((aligned(32))); /*
* Aligned on 32 bytes because it is
* globally visible and gcc happily
@@ -326,7 +326,7 @@ do_trace: \
* memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
* __entry->next_pid = next->pid;
* __entry->next_prio = next->prio;
- * )
+ * ),
*
* *
* * Formatted output of a trace record via TP_printk().
diff --git a/include/net/ah.h b/include/net/ah.h
index f0129f7..be7798d 100644
--- a/include/net/ah.h
+++ b/include/net/ah.h
@@ -4,7 +4,7 @@
#include <linux/skbuff.h>
/* This is the maximum truncated ICV length that we know of. */
-#define MAX_AH_AUTH_LEN 12
+#define MAX_AH_AUTH_LEN 16
struct crypto_ahash;
diff --git a/include/net/arp.h b/include/net/arp.h
index f4cf6ce..91f0568 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -25,5 +25,6 @@ extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
const unsigned char *src_hw,
const unsigned char *target_hw);
extern void arp_xmit(struct sk_buff *skb);
+int arp_invalidate(struct net_device *dev, __be32 ip);
#endif /* _ARP_H */
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index d5df797..5395e09 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -107,8 +107,8 @@ struct phonet_protocol {
int sock_type;
};
-int phonet_proto_register(int protocol, struct phonet_protocol *pp);
-void phonet_proto_unregister(int protocol, struct phonet_protocol *pp);
+int phonet_proto_register(unsigned int protocol, struct phonet_protocol *pp);
+void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp);
int phonet_sysctl_init(void);
void phonet_sysctl_exit(void);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 0af57eb..e9eee99 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -207,7 +207,7 @@ static inline int qdisc_qlen(struct Qdisc *q)
return q->q.qlen;
}
-static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
+static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
{
return (struct qdisc_skb_cb *)skb->cb;
}
@@ -394,7 +394,7 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
return true;
}
-static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
+static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
{
return qdisc_skb_cb(skb)->pkt_len;
}
@@ -426,10 +426,18 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
}
-static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int len)
+
+static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
+ const struct sk_buff *skb)
+{
+ bstats->bytes += qdisc_pkt_len(skb);
+ bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+}
+
+static inline void qdisc_bstats_update(struct Qdisc *sch,
+ const struct sk_buff *skb)
{
- sch->bstats.bytes += len;
- sch->bstats.packets++;
+ bstats_update(&sch->bstats, skb);
}
static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
@@ -437,7 +445,7 @@ static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
{
__skb_queue_tail(list, skb);
sch->qstats.backlog += qdisc_pkt_len(skb);
- __qdisc_update_bstats(sch, qdisc_pkt_len(skb));
+ qdisc_bstats_update(sch, skb);
return NET_XMIT_SUCCESS;
}
diff --git a/include/net/sock.h b/include/net/sock.h
index 21a02f7..d884d26 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -152,14 +152,18 @@ struct sock_common {
* fields between dontcopy_begin/dontcopy_end
* are not copied in sock_copy()
*/
+ /* private: */
int skc_dontcopy_begin[0];
+ /* public: */
union {
struct hlist_node skc_node;
struct hlist_nulls_node skc_nulls_node;
};
int skc_tx_queue_mapping;
atomic_t skc_refcnt;
+ /* private: */
int skc_dontcopy_end[0];
+ /* public: */
};
/**
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index b0b4eb2..da39b22 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -21,6 +21,16 @@
#undef CREATE_TRACE_POINTS
#include <linux/stringify.h>
+/*
+ * module.h includes tracepoints, and because ftrace.h
+ * pulls in module.h:
+ * trace/ftrace.h -> linux/ftrace_event.h -> linux/perf_event.h ->
+ * linux/ftrace.h -> linux/module.h
+ * we must include module.h here before we play with any of
+ * the TRACE_EVENT() macros, otherwise the tracepoints included
+ * by module.h may break the build.
+ */
+#include <linux/module.h>
#undef TRACE_EVENT
#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 75ce9d5..f10293c 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -25,9 +25,7 @@ TRACE_EVENT(kfree_skb,
TP_fast_assign(
__entry->skbaddr = skb;
- if (skb) {
- __entry->protocol = ntohs(skb->protocol);
- }
+ __entry->protocol = ntohs(skb->protocol);
__entry->location = location;
),
diff --git a/kernel/Makefile b/kernel/Makefile
index 33e0a39..5669f71 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_RING_BUFFER) += trace/
+obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 89c7486..f9a45eb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -994,6 +994,15 @@ NORET_TYPE void do_exit(long code)
exit_fs(tsk);
check_stack_usage();
exit_thread();
+
+ /*
+ * Flush inherited counters to the parent - before the parent
+ * gets woken up by child-exit notifications.
+ *
+ * because of cgroup mode, must be called before cgroup_exit()
+ */
+ perf_event_exit_task(tsk);
+
cgroup_exit(tsk, 1);
if (group_dead)
@@ -1007,11 +1016,6 @@ NORET_TYPE void do_exit(long code)
* FIXME: do that only when needed, using sched_exit tracepoint
*/
flush_ptrace_hw_breakpoint(tsk);
- /*
- * Flush inherited counters to the parent - before the parent
- * gets woken up by child-exit notifications.
- */
- perf_event_exit_task(tsk);
exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 11847bf..b782b7a 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -38,6 +38,12 @@
#include <asm/irq_regs.h>
+enum event_type_t {
+ EVENT_FLEXIBLE = 0x1,
+ EVENT_PINNED = 0x2,
+ EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
+};
+
atomic_t perf_task_events __read_mostly;
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -65,6 +71,12 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
static atomic64_t perf_event_id;
+static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type);
+
+static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type);
+
void __weak perf_event_print_debug(void) { }
extern __weak const char *perf_pmu_name(void)
@@ -72,6 +84,11 @@ extern __weak const char *perf_pmu_name(void)
return "pmu";
}
+static inline u64 perf_clock(void)
+{
+ return local_clock();
+}
+
void perf_pmu_disable(struct pmu *pmu)
{
int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -240,11 +257,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
put_ctx(ctx);
}
-static inline u64 perf_clock(void)
-{
- return local_clock();
-}
-
/*
* Update the record of the current time in a context.
*/
@@ -256,6 +268,12 @@ static void update_context_time(struct perf_event_context *ctx)
ctx->timestamp = now;
}
+static u64 perf_event_time(struct perf_event *event)
+{
+ struct perf_event_context *ctx = event->ctx;
+ return ctx ? ctx->time : 0;
+}
+
/*
* Update the total_time_enabled and total_time_running fields for a event.
*/
@@ -269,7 +287,7 @@ static void update_event_times(struct perf_event *event)
return;
if (ctx->is_active)
- run_end = ctx->time;
+ run_end = perf_event_time(event);
else
run_end = event->tstamp_stopped;
@@ -278,7 +296,7 @@ static void update_event_times(struct perf_event *event)
if (event->state == PERF_EVENT_STATE_INACTIVE)
run_end = event->tstamp_stopped;
else
- run_end = ctx->time;
+ run_end = perf_event_time(event);
event->total_time_running = run_end - event->tstamp_running;
}
@@ -534,6 +552,7 @@ event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
+ u64 tstamp = perf_event_time(event);
u64 delta;
/*
* An event which could not be activated because of
@@ -545,7 +564,7 @@ event_sched_out(struct perf_event *event,
&& !event_filter_match(event)) {
delta = ctx->time - event->tstamp_stopped;
event->tstamp_running += delta;
- event->tstamp_stopped = ctx->time;
+ event->tstamp_stopped = tstamp;
}
if (event->state != PERF_EVENT_STATE_ACTIVE)
@@ -556,7 +575,7 @@ event_sched_out(struct perf_event *event,
event->pending_disable = 0;
event->state = PERF_EVENT_STATE_OFF;
}
- event->tstamp_stopped = ctx->time;
+ event->tstamp_stopped = tstamp;
event->pmu->del(event, 0);
event->oncpu = -1;
@@ -768,6 +787,8 @@ event_sched_in(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
+ u64 tstamp = perf_event_time(event);
+
if (event->state <= PERF_EVENT_STATE_OFF)
return 0;
@@ -784,9 +805,9 @@ event_sched_in(struct perf_event *event,
return -EAGAIN;
}
- event->tstamp_running += ctx->time - event->tstamp_stopped;
+ event->tstamp_running += tstamp - event->tstamp_stopped;
- event->shadow_ctx_time = ctx->time - ctx->timestamp;
+ event->shadow_ctx_time = tstamp - ctx->timestamp;
if (!is_software_event(event))
cpuctx->active_oncpu++;
@@ -898,11 +919,13 @@ static int group_can_go_on(struct perf_event *event,
static void add_event_to_ctx(struct perf_event *event,
struct perf_event_context *ctx)
{
+ u64 tstamp = perf_event_time(event);
+
list_add_event(event, ctx);
perf_group_attach(event);
- event->tstamp_enabled = ctx->time;
- event->tstamp_running = ctx->time;
- event->tstamp_stopped = ctx->time;
+ event->tstamp_enabled = tstamp;
+ event->tstamp_running = tstamp;
+ event->tstamp_stopped = tstamp;
}
/*
@@ -937,7 +960,7 @@ static void __perf_install_in_context(void *info)
add_event_to_ctx(event, ctx);
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
goto unlock;
/*
@@ -1042,14 +1065,13 @@ static void __perf_event_mark_enabled(struct perf_event *event,
struct perf_event_context *ctx)
{
struct perf_event *sub;
+ u64 tstamp = perf_event_time(event);
event->state = PERF_EVENT_STATE_INACTIVE;
- event->tstamp_enabled = ctx->time - event->total_time_enabled;
+ event->tstamp_enabled = tstamp - event->total_time_enabled;
list_for_each_entry(sub, &event->sibling_list, group_entry) {
- if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
- sub->tstamp_enabled =
- ctx->time - sub->total_time_enabled;
- }
+ if (sub->state >= PERF_EVENT_STATE_INACTIVE)
+ sub->tstamp_enabled = tstamp - sub->total_time_enabled;
}
}
@@ -1082,7 +1104,7 @@ static void __perf_event_enable(void *info)
goto unlock;
__perf_event_mark_enabled(event, ctx);
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
goto unlock;
/*
@@ -1193,12 +1215,6 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
return 0;
}
-enum event_type_t {
- EVENT_FLEXIBLE = 0x1,
- EVENT_PINNED = 0x2,
- EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
-};
-
static void ctx_sched_out(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx,
enum event_type_t event_type)
@@ -1435,7 +1451,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx,
list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
if (event->state <= PERF_EVENT_STATE_OFF)
continue;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
continue;
if (group_can_go_on(event, cpuctx, 1))
@@ -1467,7 +1483,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
* Listen to the 'cpu' scheduling filter constraint
* of events:
*/
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
continue;
if (group_can_go_on(event, cpuctx, can_add_hw)) {
@@ -1694,7 +1710,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
continue;
hwc = &event->hw;
@@ -3893,7 +3909,7 @@ static int perf_event_task_match(struct perf_event *event)
if (event->state < PERF_EVENT_STATE_INACTIVE)
return 0;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
return 0;
if (event->attr.comm || event->attr.mmap ||
@@ -4030,7 +4046,7 @@ static int perf_event_comm_match(struct perf_event *event)
if (event->state < PERF_EVENT_STATE_INACTIVE)
return 0;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
return 0;
if (event->attr.comm)
@@ -4178,7 +4194,7 @@ static int perf_event_mmap_match(struct perf_event *event,
if (event->state < PERF_EVENT_STATE_INACTIVE)
return 0;
- if (event->cpu != -1 && event->cpu != smp_processor_id())
+ if (!event_filter_match(event))
return 0;
if ((!executable && event->attr.mmap_data) ||
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 53f3381..761c510 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
-obj-$(CONFIG_EVENT_TRACING) += power-traces.o
+obj-$(CONFIG_TRACEPOINTS) += power-traces.o
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f8cf959..dc53ecb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1313,12 +1313,10 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
__this_cpu_inc(user_stack_count);
-
-
event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
sizeof(*entry), flags, pc);
if (!event)
- return;
+ goto out_drop_count;
entry = ring_buffer_event_data(event);
entry->tgid = current->tgid;
@@ -1333,8 +1331,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
if (!filter_check_discard(call, entry, buffer, event))
ring_buffer_unlock_commit(buffer, event);
+ out_drop_count:
__this_cpu_dec(user_stack_count);
-
out:
preempt_enable();
}
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 3094318..b335acb 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -141,11 +141,10 @@ static void ddebug_change(const struct ddebug_query *query,
else if (!dp->flags)
dt->num_enabled++;
dp->flags = newflags;
- if (newflags) {
- jump_label_enable(&dp->enabled);
- } else {
- jump_label_disable(&dp->enabled);
- }
+ if (newflags)
+ dp->enabled = 1;
+ else
+ dp->enabled = 0;
if (verbose)
printk(KERN_INFO
"ddebug: changed %s:%d [%s]%s %s\n",
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 798beac..1e308f2 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -178,27 +178,24 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
break;
case 's':{
char **sptr = va_arg(ap, char **);
- int16_t len;
- int size;
+ uint16_t len;
errcode = p9pdu_readf(pdu, proto_version,
"w", &len);
if (errcode)
break;
- size = max_t(int16_t, len, 0);
-
- *sptr = kmalloc(size + 1, GFP_KERNEL);
+ *sptr = kmalloc(len + 1, GFP_KERNEL);
if (*sptr == NULL) {
errcode = -EFAULT;
break;
}
- if (pdu_read(pdu, *sptr, size)) {
+ if (pdu_read(pdu, *sptr, len)) {
errcode = -EFAULT;
kfree(*sptr);
*sptr = NULL;
} else
- (*sptr)[size] = 0;
+ (*sptr)[len] = 0;
}
break;
case 'Q':{
@@ -234,14 +231,14 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
}
break;
case 'D':{
- int32_t *count = va_arg(ap, int32_t *);
+ uint32_t *count = va_arg(ap, uint32_t *);
void **data = va_arg(ap, void **);
errcode =
p9pdu_readf(pdu, proto_version, "d", count);
if (!errcode) {
*count =
- min_t(int32_t, *count,
+ min_t(uint32_t, *count,
pdu->size - pdu->offset);
*data = &pdu->sdata[pdu->offset];
}
@@ -404,9 +401,10 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
break;
case 's':{
const char *sptr = va_arg(ap, const char *);
- int16_t len = 0;
+ uint16_t len = 0;
if (sptr)
- len = min_t(int16_t, strlen(sptr), USHRT_MAX);
+ len = min_t(uint16_t, strlen(sptr),
+ USHRT_MAX);
errcode = p9pdu_writef(pdu, proto_version,
"w", len);
@@ -438,7 +436,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
stbuf->n_gid, stbuf->n_muid);
} break;
case 'D':{
- int32_t count = va_arg(ap, int32_t);
+ uint32_t count = va_arg(ap, uint32_t);
const void *data = va_arg(ap, const void *);
errcode = p9pdu_writef(pdu, proto_version, "d",
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 1bf0cf5..8184c03 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -740,12 +740,12 @@ static int setsockopt(struct socket *sock,
if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
return -ENOPROTOOPT;
lock_sock(&(cf_sk->sk));
- cf_sk->conn_req.param.size = ol;
if (ol > sizeof(cf_sk->conn_req.param.data) ||
copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
release_sock(&cf_sk->sk);
return -EINVAL;
}
+ cf_sk->conn_req.param.size = ol;
release_sock(&cf_sk->sk);
return 0;
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 84a422c..fa9dab3 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -76,6 +76,8 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
int pktlen;
int err = 0;
+ const u8 *ip_version;
+ u8 buf;
priv = container_of(layr, struct chnl_net, chnl);
@@ -90,7 +92,21 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
* send the packet to the net stack.
*/
skb->dev = priv->netdev;
- skb->protocol = htons(ETH_P_IP);
+
+ /* check the version of IP */
+ ip_version = skb_header_pointer(skb, 0, 1, &buf);
+ if (!ip_version)
+ return -EINVAL;
+ switch (*ip_version >> 4) {
+ case 4:
+ skb->protocol = htons(ETH_P_IP);
+ break;
+ case 6:
+ skb->protocol = htons(ETH_P_IPV6);
+ break;
+ default:
+ return -EINVAL;
+ }
/* If we change the header in loop mode, the checksum is corrupted. */
if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
diff --git a/net/core/dev.c b/net/core/dev.c
index a215269..a3ef808 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1732,33 +1732,6 @@ void netif_device_attach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_attach);
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
-{
- return ((features & NETIF_F_NO_CSUM) ||
- ((features & NETIF_F_V4_CSUM) &&
- protocol == htons(ETH_P_IP)) ||
- ((features & NETIF_F_V6_CSUM) &&
- protocol == htons(ETH_P_IPV6)) ||
- ((features & NETIF_F_FCOE_CRC) &&
- protocol == htons(ETH_P_FCOE)));
-}
-
-static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
-{
- __be16 protocol = skb->protocol;
- int features = dev->features;
-
- if (vlan_tx_tag_present(skb)) {
- features &= dev->vlan_features;
- } else if (protocol == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- protocol = veh->h_vlan_encapsulated_proto;
- features &= dev->vlan_features;
- }
-
- return can_checksum_protocol(features, protocol);
-}
-
/**
* skb_dev_set -- assign a new device to a buffer
* @skb: buffer for the new device
@@ -1971,16 +1944,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
/**
* dev_gso_segment - Perform emulated hardware segmentation on skb.
* @skb: buffer to segment
+ * @features: device features as applicable to this skb
*
* This function segments the given skb and stores the list of segments
* in skb->next.
*/
-static int dev_gso_segment(struct sk_buff *skb)
+static int dev_gso_segment(struct sk_buff *skb, int features)
{
- struct net_device *dev = skb->dev;
struct sk_buff *segs;
- int features = dev->features & ~(illegal_highdma(dev, skb) ?
- NETIF_F_SG : 0);
segs = skb_gso_segment(skb, features);
@@ -2017,22 +1988,52 @@ static inline void skb_orphan_try(struct sk_buff *skb)
}
}
-int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+{
+ return ((features & NETIF_F_GEN_CSUM) ||
+ ((features & NETIF_F_V4_CSUM) &&
+ protocol == htons(ETH_P_IP)) ||
+ ((features & NETIF_F_V6_CSUM) &&
+ protocol == htons(ETH_P_IPV6)) ||
+ ((features & NETIF_F_FCOE_CRC) &&
+ protocol == htons(ETH_P_FCOE)));
+}
+
+static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+{
+ if (!can_checksum_protocol(protocol, features)) {
+ features &= ~NETIF_F_ALL_CSUM;
+ features &= ~NETIF_F_SG;
+ } else if (illegal_highdma(skb->dev, skb)) {
+ features &= ~NETIF_F_SG;
+ }
+
+ return features;
+}
+
+int netif_skb_features(struct sk_buff *skb)
{
__be16 protocol = skb->protocol;
+ int features = skb->dev->features;
if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
- } else if (!skb->vlan_tci)
- return dev->features;
+ } else if (!vlan_tx_tag_present(skb)) {
+ return harmonize_features(skb, protocol, features);
+ }
- if (protocol != htons(ETH_P_8021Q))
- return dev->features & dev->vlan_features;
- else
- return 0;
+ features &= skb->dev->vlan_features;
+
+ if (protocol != htons(ETH_P_8021Q)) {
+ return harmonize_features(skb, protocol, features);
+ } else {
+ features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
+ NETIF_F_GEN_CSUM;
+ return harmonize_features(skb, protocol, features);
+ }
}
-EXPORT_SYMBOL(netif_get_vlan_features);
+EXPORT_SYMBOL(netif_skb_features);
/*
* Returns true if either:
@@ -2042,22 +2043,13 @@ EXPORT_SYMBOL(netif_get_vlan_features);
* support DMA from it.
*/
static inline int skb_needs_linearize(struct sk_buff *skb,
- struct net_device *dev)
+ int features)
{
- if (skb_is_nonlinear(skb)) {
- int features = dev->features;
-
- if (vlan_tx_tag_present(skb))
- features &= dev->vlan_features;
-
- return (skb_has_frag_list(skb) &&
- !(features & NETIF_F_FRAGLIST)) ||
+ return skb_is_nonlinear(skb) &&
+ ((skb_has_frag_list(skb) &&
+ !(features & NETIF_F_FRAGLIST)) ||
(skb_shinfo(skb)->nr_frags &&
- (!(features & NETIF_F_SG) ||
- illegal_highdma(dev, skb)));
- }
-
- return 0;
+ !(features & NETIF_F_SG)));
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -2067,6 +2059,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
int rc = NETDEV_TX_OK;
if (likely(!skb->next)) {
+ int features;
+
/*
* If device doesnt need skb->dst, release it right now while
* its hot in this cpu cache
@@ -2079,8 +2073,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb_orphan_try(skb);
+ features = netif_skb_features(skb);
+
if (vlan_tx_tag_present(skb) &&
- !(dev->features & NETIF_F_HW_VLAN_TX)) {
+ !(features & NETIF_F_HW_VLAN_TX)) {
skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
if (unlikely(!skb))
goto out;
@@ -2088,13 +2084,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb->vlan_tci = 0;
}
- if (netif_needs_gso(dev, skb)) {
- if (unlikely(dev_gso_segment(skb)))
+ if (netif_needs_gso(skb, features)) {
+ if (unlikely(dev_gso_segment(skb, features)))
goto out_kfree_skb;
if (skb->next)
goto gso;
} else {
- if (skb_needs_linearize(skb, dev) &&
+ if (skb_needs_linearize(skb, features) &&
__skb_linearize(skb))
goto out_kfree_skb;
@@ -2105,7 +2101,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
if (skb->ip_summed == CHECKSUM_PARTIAL) {
skb_set_transport_header(skb,
skb_checksum_start_offset(skb));
- if (!dev_can_checksum(dev, skb) &&
+ if (!(features & NETIF_F_ALL_CSUM) &&
skb_checksum_help(skb))
goto out_kfree_skb;
}
@@ -2301,7 +2297,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
*/
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
skb_dst_force(skb);
- __qdisc_update_bstats(q, skb->len);
+
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ qdisc_bstats_update(q, skb);
+
if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -5621,18 +5620,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
}
/**
- * alloc_netdev_mq - allocate network device
+ * alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
* @name: device name format string
* @setup: callback to initialize device
- * @queue_count: the number of subqueues to allocate
+ * @txqs: the number of TX subqueues to allocate
+ * @rxqs: the number of RX subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subquue structs
- * for each queue on the device at the end of the netdevice.
+ * for each queue on the device.
*/
-struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
- void (*setup)(struct net_device *), unsigned int queue_count)
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+ void (*setup)(struct net_device *),
+ unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
size_t alloc_size;
@@ -5640,12 +5641,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
BUG_ON(strlen(name) >= sizeof(dev->name));
- if (queue_count < 1) {
+ if (txqs < 1) {
pr_err("alloc_netdev: Unable to allocate device "
"with zero queues.\n");
return NULL;
}
+#ifdef CONFIG_RPS
+ if (rxqs < 1) {
+ pr_err("alloc_netdev: Unable to allocate device "
+ "with zero RX queues.\n");
+ return NULL;
+ }
+#endif
+
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
@@ -5676,14 +5685,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- dev->num_tx_queues = queue_count;
- dev->real_num_tx_queues = queue_count;
+ dev->num_tx_queues = txqs;
+ dev->real_num_tx_queues = txqs;
if (netif_alloc_netdev_queues(dev))
goto free_pcpu;
#ifdef CONFIG_RPS
- dev->num_rx_queues = queue_count;
- dev->real_num_rx_queues = queue_count;
+ dev->num_rx_queues = rxqs;
+ dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_pcpu;
#endif
@@ -5711,7 +5720,7 @@ free_p:
kfree(p);
return NULL;
}
-EXPORT_SYMBOL(alloc_netdev_mq);
+EXPORT_SYMBOL(alloc_netdev_mqs);
/**
* free_netdev - free network device
diff --git a/net/core/filter.c b/net/core/filter.c
index 2b27d4e..afc5837 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL(sk_filter);
/**
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
- * @filter: filter to apply
+ * @fentry: filter to apply
*
* Decode and apply filter instructions to the skb->data.
* Return length to keep, 0 for none. @skb is the data we are
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 750db57..a5f7535 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1820,7 +1820,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
return -EPERM;
- if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+ if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 4508705..5fdb072 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -426,7 +426,8 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
- dp->dccps_gsr = seq;
+ if (after48(seq, dp->dccps_gsr))
+ dp->dccps_gsr = seq;
/* Sequence validity window depends on remote Sequence Window (7.5.1) */
dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
/*
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 15af247..8cde009 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -260,7 +260,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
*/
if (time_before(now, (dp->dccps_rate_last +
sysctl_dccp_sync_ratelimit)))
- return 0;
+ return -1;
DCCP_WARN("Step 6 failed for %s packet, "
"(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 5639438..4234882 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -21,7 +21,8 @@
/* Boundary values */
static int zero = 0,
u8_max = 0xFF;
-static unsigned long seqw_min = 32;
+static unsigned long seqw_min = DCCPF_SEQ_WMIN,
+ seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */
static struct ctl_table dccp_default_table[] = {
{
@@ -31,6 +32,7 @@ static struct ctl_table dccp_default_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra1 = &seqw_min, /* RFC 4340, 7.5.2 */
+ .extra2 = &seqw_max,
},
{
.procname = "rx_ccid",
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f00ef2f..f9d7ac9 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -347,10 +347,11 @@ void ether_setup(struct net_device *dev)
EXPORT_SYMBOL(ether_setup);
/**
- * alloc_etherdev_mq - Allocates and sets up an Ethernet device
+ * alloc_etherdev_mqs - Allocates and sets up an Ethernet device
* @sizeof_priv: Size of additional driver-private structure to be allocated
* for this Ethernet device
- * @queue_count: The number of queues this device has.
+ * @txqs: The number of TX queues this device has.
+ * @txqs: The number of RX queues this device has.
*
* Fill in the fields of the device structure with Ethernet-generic
* values. Basically does everything except registering the device.
@@ -360,11 +361,12 @@ EXPORT_SYMBOL(ether_setup);
* this private data area.
*/
-struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
+struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
+ unsigned int rxqs)
{
- return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
+ return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs);
}
-EXPORT_SYMBOL(alloc_etherdev_mq);
+EXPORT_SYMBOL(alloc_etherdev_mqs);
static size_t _format_mac_addr(char *buf, int buflen,
const unsigned char *addr, int len)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 880a5ec..86961be 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -314,14 +314,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
- ah = (struct ip_auth_hdr *)skb->data;
- iph = ip_hdr(skb);
- ihl = ip_hdrlen(skb);
if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
goto out;
nfrags = err;
+ ah = (struct ip_auth_hdr *)skb->data;
+ iph = ip_hdr(skb);
+ ihl = ip_hdrlen(skb);
+
work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len);
if (!work_iph)
goto out;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a2fc7b9..04c8b69 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1143,6 +1143,23 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
return err;
}
+int arp_invalidate(struct net_device *dev, __be32 ip)
+{
+ struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
+ int err = -ENXIO;
+
+ if (neigh) {
+ if (neigh->nud_state & ~NUD_NOARP)
+ err = neigh_update(neigh, NULL, NUD_FAILED,
+ NEIGH_UPDATE_F_OVERRIDE|
+ NEIGH_UPDATE_F_ADMIN);
+ neigh_release(neigh);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(arp_invalidate);
+
static int arp_req_delete_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
@@ -1163,7 +1180,6 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
{
int err;
__be32 ip;
- struct neighbour *neigh;
if (r->arp_flags & ATF_PUBL)
return arp_req_delete_public(net, r, dev);
@@ -1181,16 +1197,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
if (!dev)
return -EINVAL;
}
- err = -ENXIO;
- neigh = neigh_lookup(&arp_tbl, &ip, dev);
- if (neigh) {
- if (neigh->nud_state & ~NUD_NOARP)
- err = neigh_update(neigh, NULL, NUD_FAILED,
- NEIGH_UPDATE_F_OVERRIDE|
- NEIGH_UPDATE_F_ADMIN);
- neigh_release(neigh);
- }
- return err;
+ return arp_invalidate(dev, ip);
}
/*
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 25e3181..97e5fb7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct sock *sk,
!sk2->sk_bound_dev_if ||
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
if (!reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) {
+ ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) {
const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
sk2_rcv_saddr == sk_rcv_saddr(sk))
@@ -122,7 +122,8 @@ again:
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
smallest_rover = rover;
- if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
+ if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
+ !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
spin_unlock(&head->lock);
snum = smallest_rover;
goto have_snum;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2ada171..2746c1f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -858,7 +858,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
nlmsg_len(nlh) < hdrlen)
return -EINVAL;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
if (nlmsg_attrlen(nlh, hdrlen)) {
struct nlattr *attr;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3fac340..e855fff 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t,
struct arpt_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu = get_cpu();
-
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU
- *
- * Bottom half has to be disabled to prevent deadlock
- * if new softirq were to run and call ipt_do_table
- */
- local_bh_disable();
- i = 0;
- xt_entry_foreach(iter, t->entries[curcpu], t->size) {
- SET_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
- ++i;
- }
- local_bh_enable();
- /* Processing counters from other cpus, we can let bottom half enabled,
- * (preemption is disabled)
- */
for_each_possible_cpu(cpu) {
- if (cpu == curcpu)
- continue;
+ seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
i = 0;
- local_bh_disable();
- xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
- ADD_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqbegin(lock);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqretry(lock, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
++i;
}
- xt_info_wrunlock(cpu);
- local_bh_enable();
}
- put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
* about).
*/
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc(countersize);
+ counters = vzalloc(countersize);
if (counters == NULL)
return ERR_PTR(-ENOMEM);
@@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name,
struct arpt_entry *iter;
ret = 0;
- counters = vmalloc(num_counters * sizeof(struct xt_counters));
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto out;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a846d63..652efea 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t,
struct ipt_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu = get_cpu();
-
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU.
- *
- * Bottom half has to be disabled to prevent deadlock
- * if new softirq were to run and call ipt_do_table
- */
- local_bh_disable();
- i = 0;
- xt_entry_foreach(iter, t->entries[curcpu], t->size) {
- SET_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
- ++i;
- }
- local_bh_enable();
- /* Processing counters from other cpus, we can let bottom half enabled,
- * (preemption is disabled)
- */
for_each_possible_cpu(cpu) {
- if (cpu == curcpu)
- continue;
+ seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
i = 0;
- local_bh_disable();
- xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
- ADD_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqbegin(lock);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqretry(lock, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
++i; /* macro does multi eval of i */
}
- xt_info_wrunlock(cpu);
- local_bh_enable();
}
- put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
(other than comefrom, which userspace doesn't care
about). */
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc(countersize);
+ counters = vzalloc(countersize);
if (counters == NULL)
return ERR_PTR(-ENOMEM);
@@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ipt_entry *iter;
ret = 0;
- counters = vmalloc(num_counters * sizeof(struct xt_counters));
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto out;
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index ee82d4e..1aba54a 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -538,14 +538,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, ah_hlen))
goto out;
- ip6h = ipv6_hdr(skb);
-
- skb_push(skb, hdr_len);
if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
goto out;
nfrags = err;
+ ah = (struct ip_auth_hdr *)skb->data;
+ ip6h = ipv6_hdr(skb);
+
+ skb_push(skb, hdr_len);
+
work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
if (!work_iph)
goto out;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e46305d..d144e62 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -44,7 +44,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
!sk2->sk_bound_dev_if ||
sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
(!sk->sk_reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) &&
+ ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) &&
ipv6_rcv_saddr_equal(sk, sk2))
break;
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4555823..7d227c6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t,
struct ip6t_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu = get_cpu();
-
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU
- *
- * Bottom half has to be disabled to prevent deadlock
- * if new softirq were to run and call ipt_do_table
- */
- local_bh_disable();
- i = 0;
- xt_entry_foreach(iter, t->entries[curcpu], t->size) {
- SET_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
- ++i;
- }
- local_bh_enable();
- /* Processing counters from other cpus, we can let bottom half enabled,
- * (preemption is disabled)
- */
for_each_possible_cpu(cpu) {
- if (cpu == curcpu)
- continue;
+ seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
i = 0;
- local_bh_disable();
- xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
- ADD_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqbegin(lock);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqretry(lock, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
++i;
}
- xt_info_wrunlock(cpu);
- local_bh_enable();
}
- put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
(other than comefrom, which userspace doesn't care
about). */
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc(countersize);
+ counters = vzalloc(countersize);
if (counters == NULL)
return ERR_PTR(-ENOMEM);
@@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ip6t_entry *iter;
ret = 0;
- counters = vmalloc(num_counters * sizeof(struct xt_counters));
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto out;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0cdba50..5cb8d30 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -645,25 +645,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
- rcu_read_lock();
+ spin_lock_bh(&nf_conntrack_lock);
last = (struct nf_conn *)cb->args[1];
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart:
- hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[cb->args[0]],
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = nf_ct_tuplehash_to_ctrack(h);
- if (!atomic_inc_not_zero(&ct->ct_general.use))
- continue;
/* Dump entries of a given L3 protocol number.
* If it is not specified, ie. l3proto == 0,
* then dump everything. */
if (l3proto && nf_ct_l3num(ct) != l3proto)
- goto releasect;
+ continue;
if (cb->args[1]) {
if (ct != last)
- goto releasect;
+ continue;
cb->args[1] = 0;
}
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
@@ -681,8 +679,6 @@ restart:
if (acct)
memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
}
-releasect:
- nf_ct_put(ct);
}
if (cb->args[1]) {
cb->args[1] = 0;
@@ -690,7 +686,7 @@ releasect:
}
}
out:
- rcu_read_unlock();
+ spin_unlock_bh(&nf_conntrack_lock);
if (last)
nf_ct_put(last);
@@ -928,7 +924,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
u16 zone;
int err;
- if (nlh->nlmsg_flags & NLM_F_DUMP)
+ if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP)
return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
ctnetlink_done);
@@ -1790,7 +1786,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
u16 zone;
int err;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
return netlink_dump_start(ctnl, skb, nlh,
ctnetlink_exp_dump_table,
ctnetlink_exp_done);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 80463507..c942376 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1325,7 +1325,8 @@ static int __init xt_init(void)
for_each_possible_cpu(i) {
struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
- spin_lock_init(&lock->lock);
+
+ seqlock_init(&lock->lock);
lock->readers = 0;
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 1781d99..f83cb37 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -519,7 +519,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
security_netlink_recv(skb, CAP_NET_ADMIN))
return -EPERM;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
if (ops->dumpit == NULL)
return -EOPNOTSUPP;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index fd95beb..1072b2c 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -37,7 +37,7 @@
/* Transport protocol registration */
static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
-static struct phonet_protocol *phonet_proto_get(int protocol)
+static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
{
struct phonet_protocol *pp;
@@ -458,7 +458,7 @@ static struct packet_type phonet_packet_type __read_mostly = {
static DEFINE_MUTEX(proto_tab_lock);
-int __init_or_module phonet_proto_register(int protocol,
+int __init_or_module phonet_proto_register(unsigned int protocol,
struct phonet_protocol *pp)
{
int err = 0;
@@ -481,7 +481,7 @@ int __init_or_module phonet_proto_register(int protocol,
}
EXPORT_SYMBOL(phonet_proto_register);
-void phonet_proto_unregister(int protocol, struct phonet_protocol *pp)
+void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp)
{
mutex_lock(&proto_tab_lock);
BUG_ON(proto_tab[protocol] != pp);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 67dc7ce..83ddfc0 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -508,8 +508,7 @@ static int tcf_csum(struct sk_buff *skb,
spin_lock(&p->tcf_lock);
p->tcf_tm.lastuse = jiffies;
- p->tcf_bstats.bytes += qdisc_pkt_len(skb);
- p->tcf_bstats.packets++;
+ bstats_update(&p->tcf_bstats, skb);
action = p->tcf_action;
update_flags = p->update_flags;
spin_unlock(&p->tcf_lock);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8daef96..c2a7c20 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -209,8 +209,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
spin_lock(&ipt->tcf_lock);
ipt->tcf_tm.lastuse = jiffies;
- ipt->tcf_bstats.bytes += qdisc_pkt_len(skb);
- ipt->tcf_bstats.packets++;
+ bstats_update(&ipt->tcf_bstats, skb);
/* yes, we have to worry about both in and out dev
worry later - danger - this API seems to have changed
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0c311be..d765067 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -165,8 +165,7 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
spin_lock(&m->tcf_lock);
m->tcf_tm.lastuse = jiffies;
- m->tcf_bstats.bytes += qdisc_pkt_len(skb);
- m->tcf_bstats.packets++;
+ bstats_update(&m->tcf_bstats, skb);
dev = m->tcfm_dev;
if (!dev) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 186eb83..178a4bd 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -125,8 +125,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
egress = p->flags & TCA_NAT_FLAG_EGRESS;
action = p->tcf_action;
- p->tcf_bstats.bytes += qdisc_pkt_len(skb);
- p->tcf_bstats.packets++;
+ bstats_update(&p->tcf_bstats, skb);
spin_unlock(&p->tcf_lock);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a0593c9..445bef7 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -187,8 +187,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
bad:
p->tcf_qstats.overlimits++;
done:
- p->tcf_bstats.bytes += qdisc_pkt_len(skb);
- p->tcf_bstats.packets++;
+ bstats_update(&p->tcf_bstats, skb);
spin_unlock(&p->tcf_lock);
return p->tcf_action;
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 7ebf743..e2f08b1 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -298,8 +298,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
spin_lock(&police->tcf_lock);
- police->tcf_bstats.bytes += qdisc_pkt_len(skb);
- police->tcf_bstats.packets++;
+ bstats_update(&police->tcf_bstats, skb);
if (police->tcfp_ewma_rate &&
police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 97e84f3..7287cff 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -42,8 +42,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
spin_lock(&d->tcf_lock);
d->tcf_tm.lastuse = jiffies;
- d->tcf_bstats.bytes += qdisc_pkt_len(skb);
- d->tcf_bstats.packets++;
+ bstats_update(&d->tcf_bstats, skb);
/* print policy string followed by _ then packet count
* Example if this was the 3rd packet and the string was "hello"
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 66cbf4e..836f5fe 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -46,8 +46,7 @@ static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
spin_lock(&d->tcf_lock);
d->tcf_tm.lastuse = jiffies;
- d->tcf_bstats.bytes += qdisc_pkt_len(skb);
- d->tcf_bstats.packets++;
+ bstats_update(&d->tcf_bstats, skb);
if (d->flags & SKBEDIT_F_PRIORITY)
skb->priority = d->priority;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 2825407..943d733 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -422,10 +422,8 @@ drop: __maybe_unused
}
return ret;
}
- sch->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
- flow->bstats.bytes += qdisc_pkt_len(skb);
- flow->bstats.packets++;
+ qdisc_bstats_update(sch, skb);
+ bstats_update(&flow->bstats, skb);
/*
* Okay, this may seem weird. We pretend we've dropped the packet if
* it goes via ATM. The reason for this is that the outer qdisc
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index eb76315..c80d1c2 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -390,8 +390,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
ret = qdisc_enqueue(skb, cl->q);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
- sch->bstats.packets++;
- sch->bstats.bytes += qdisc_pkt_len(skb);
+ qdisc_bstats_update(sch, skb);
cbq_mark_toplevel(q, cl);
if (!cl->next_alive)
cbq_activate_class(cl);
@@ -650,8 +649,7 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
ret = qdisc_enqueue(skb, cl->q);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
- sch->bstats.packets++;
- sch->bstats.bytes += qdisc_pkt_len(skb);
+ qdisc_bstats_update(sch, skb);
if (!cl->next_alive)
cbq_activate_class(cl);
return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index aa8b531..de55e64 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -351,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
- unsigned int len;
int err;
cl = drr_classify(skb, sch, &err);
@@ -362,7 +361,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
}
- len = qdisc_pkt_len(skb);
err = qdisc_enqueue(skb, cl->qdisc);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
@@ -377,10 +375,8 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cl->deficit = cl->quantum;
}
- cl->bstats.packets++;
- cl->bstats.bytes += len;
- sch->bstats.packets++;
- sch->bstats.bytes += len;
+ bstats_update(&cl->bstats, skb);
+ qdisc_bstats_update(sch, skb);
sch->q.qlen++;
return err;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1d295d6..60f4bdd 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -260,8 +260,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
}
- sch->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
+ qdisc_bstats_update(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 069c62b..2e45791 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1599,10 +1599,8 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl->qdisc->q.qlen == 1)
set_active(cl, qdisc_pkt_len(skb));
- cl->bstats.packets++;
- cl->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
- sch->bstats.bytes += qdisc_pkt_len(skb);
+ bstats_update(&cl->bstats, skb);
+ qdisc_bstats_update(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 01b519d..984c1b0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -569,15 +569,12 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
return ret;
} else {
- cl->bstats.packets +=
- skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
- cl->bstats.bytes += qdisc_pkt_len(skb);
+ bstats_update(&cl->bstats, skb);
htb_activate(q, cl);
}
sch->q.qlen++;
- sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
- sch->bstats.bytes += qdisc_pkt_len(skb);
+ qdisc_bstats_update(sch, skb);
return NET_XMIT_SUCCESS;
}
@@ -648,12 +645,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
htb_add_to_wait_tree(q, cl, diff);
}
- /* update byte stats except for leaves which are already updated */
- if (cl->level) {
- cl->bstats.bytes += bytes;
- cl->bstats.packets += skb_is_gso(skb)?
- skb_shinfo(skb)->gso_segs:1;
- }
+ /* update basic stats except for leaves which are already updated */
+ if (cl->level)
+ bstats_update(&cl->bstats, skb);
+
cl = cl->parent;
}
}
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index f10e34a..bce1665 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -63,8 +63,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
result = tc_classify(skb, p->filter_list, &res);
- sch->bstats.packets++;
- sch->bstats.bytes += qdisc_pkt_len(skb);
+ qdisc_bstats_update(sch, skb);
switch (result) {
case TC_ACT_SHOT:
result = TC_ACT_SHOT;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 32690de..21f13da 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -83,8 +83,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
ret = qdisc_enqueue(skb, qdisc);
if (ret == NET_XMIT_SUCCESS) {
- sch->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
+ qdisc_bstats_update(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index e5593c0..1c4bce8 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -240,8 +240,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (likely(ret == NET_XMIT_SUCCESS)) {
sch->q.qlen++;
- sch->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
+ qdisc_bstats_update(sch, skb);
} else if (net_xmit_drop_count(ret)) {
sch->qstats.drops++;
}
@@ -477,8 +476,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
__skb_queue_after(list, skb, nskb);
sch->qstats.backlog += qdisc_pkt_len(nskb);
- sch->bstats.bytes += qdisc_pkt_len(nskb);
- sch->bstats.packets++;
+ qdisc_bstats_update(sch, nskb);
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index b1c95bc..966158d 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -84,8 +84,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
ret = qdisc_enqueue(skb, qdisc);
if (ret == NET_XMIT_SUCCESS) {
- sch->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
+ qdisc_bstats_update(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index a67ba3c..a6009c5 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -94,8 +94,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
ret = qdisc_enqueue(skb, child);
if (likely(ret == NET_XMIT_SUCCESS)) {
- sch->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
+ qdisc_bstats_update(sch, skb);
sch->q.qlen++;
} else if (net_xmit_drop_count(ret)) {
q->stats.pdrop++;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index d54ac94..239ec53 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -403,8 +403,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
slot->allot = q->scaled_quantum;
}
if (++sch->q.qlen <= q->limit) {
- sch->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
+ qdisc_bstats_update(sch, skb);
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 641a30d..77565e7 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -134,8 +134,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
}
sch->q.qlen++;
- sch->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
+ qdisc_bstats_update(sch, skb);
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 106479a..af9360d 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -83,8 +83,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
if (q->q.qlen < dev->tx_queue_len) {
__skb_queue_tail(&q->q, skb);
- sch->bstats.bytes += qdisc_pkt_len(skb);
- sch->bstats.packets++;
+ qdisc_bstats_update(sch, skb);
return NET_XMIT_SUCCESS;
}
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index afe6784..67e3127 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -563,8 +563,17 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p)
return cred->cr_ops->crvalidate(task, p);
}
+static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
+ __be32 *data, void *obj)
+{
+ struct xdr_stream xdr;
+
+ xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
+ encode(rqstp, &xdr, obj);
+}
+
int
-rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
+rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
__be32 *data, void *obj)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -574,11 +583,22 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
if (cred->cr_ops->crwrap_req)
return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
/* By default, we encode the arguments normally. */
- return encode(rqstp, data, obj);
+ rpcauth_wrap_req_encode(encode, rqstp, data, obj);
+ return 0;
+}
+
+static int
+rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
+ __be32 *data, void *obj)
+{
+ struct xdr_stream xdr;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
+ return decode(rqstp, &xdr, obj);
}
int
-rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
+rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
__be32 *data, void *obj)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -589,7 +609,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
data, obj);
/* By default, we decode the arguments normally. */
- return decode(rqstp, data, obj);
+ return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
}
int
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 3835ce3..45dbf15 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1231,9 +1231,19 @@ out_bad:
return NULL;
}
+static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
+ __be32 *p, void *obj)
+{
+ struct xdr_stream xdr;
+
+ xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
+ encode(rqstp, &xdr, obj);
+}
+
static inline int
gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
+ kxdreproc_t encode, struct rpc_rqst *rqstp,
+ __be32 *p, void *obj)
{
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
struct xdr_buf integ_buf;
@@ -1249,9 +1259,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
- status = encode(rqstp, p, obj);
- if (status)
- return status;
+ gss_wrap_req_encode(encode, rqstp, p, obj);
if (xdr_buf_subsegment(snd_buf, &integ_buf,
offset, snd_buf->len - offset))
@@ -1325,7 +1333,8 @@ out:
static inline int
gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
+ kxdreproc_t encode, struct rpc_rqst *rqstp,
+ __be32 *p, void *obj)
{
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
u32 offset;
@@ -1342,9 +1351,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
- status = encode(rqstp, p, obj);
- if (status)
- return status;
+ gss_wrap_req_encode(encode, rqstp, p, obj);
status = alloc_enc_pages(rqstp);
if (status)
@@ -1394,7 +1401,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
static int
gss_wrap_req(struct rpc_task *task,
- kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
+ kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
@@ -1407,12 +1414,14 @@ gss_wrap_req(struct rpc_task *task,
/* The spec seems a little ambiguous here, but I think that not
* wrapping context destruction requests makes the most sense.
*/
- status = encode(rqstp, p, obj);
+ gss_wrap_req_encode(encode, rqstp, p, obj);
+ status = 0;
goto out;
}
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
- status = encode(rqstp, p, obj);
+ gss_wrap_req_encode(encode, rqstp, p, obj);
+ status = 0;
break;
case RPC_GSS_SVC_INTEGRITY:
status = gss_wrap_req_integ(cred, ctx, encode,
@@ -1494,10 +1503,19 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
return 0;
}
+static int
+gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
+ __be32 *p, void *obj)
+{
+ struct xdr_stream xdr;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ return decode(rqstp, &xdr, obj);
+}
static int
gss_unwrap_resp(struct rpc_task *task,
- kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
+ kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
@@ -1528,7 +1546,7 @@ gss_unwrap_resp(struct rpc_task *task,
cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
+ (savedlen - head->iov_len);
out_decode:
- status = decode(rqstp, p, obj);
+ status = gss_unwrap_req_decode(decode, rqstp, p, obj);
out:
gss_put_ctx(ctx);
dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 7dcfe0c..1dd1a68 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -59,8 +59,8 @@ int bc_send(struct rpc_rqst *req)
ret = task->tk_status;
rpc_put_task(task);
}
- return ret;
dprintk("RPC: bc_send ret= %d\n", ret);
+ return ret;
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 92ce94f..57d344c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1095,7 +1095,7 @@ static void
rpc_xdr_encode(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- kxdrproc_t encode;
+ kxdreproc_t encode;
__be32 *p;
dprint_status(task);
@@ -1535,7 +1535,7 @@ call_decode(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
- kxdrproc_t decode = task->tk_msg.rpc_proc->p_decode;
+ kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
__be32 *p;
dprintk("RPC: %5u call_decode (status %d)\n",
@@ -1776,12 +1776,11 @@ out_overflow:
goto out_garbage;
}
-static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj)
+static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
{
- return 0;
}
-static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj)
+static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
{
return 0;
}
@@ -1830,23 +1829,15 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
const struct rpc_task *task)
{
const char *rpc_waitq = "none";
- char *p, action[KSYM_SYMBOL_LEN];
if (RPC_IS_QUEUED(task))
rpc_waitq = rpc_qname(task->tk_waitqueue);
- /* map tk_action pointer to a function name; then trim off
- * the "+0x0 [sunrpc]" */
- sprint_symbol(action, (unsigned long)task->tk_action);
- p = strchr(action, '+');
- if (p)
- *p = '\0';
-
- printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n",
+ printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
task->tk_pid, task->tk_flags, task->tk_status,
clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
- action, rpc_waitq);
+ task->tk_action, rpc_waitq);
}
void rpc_show_tasks(void)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 09f01f4..72bc536 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -474,7 +474,7 @@ static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
{
struct inode *inode;
- BUG_ON(!d_unhashed(dentry));
+ d_drop(dentry);
inode = rpc_get_inode(dir->i_sb, mode);
if (!inode)
goto out_err;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index fa6d7ca..c652e4c 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -57,10 +57,6 @@ enum {
RPCBPROC_GETSTAT,
};
-#define RPCB_HIGHPROC_2 RPCBPROC_CALLIT
-#define RPCB_HIGHPROC_3 RPCBPROC_TADDR2UADDR
-#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT
-
/*
* r_owner
*
@@ -693,46 +689,37 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
* XDR functions for rpcbind
*/
-static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p,
- const struct rpcbind_args *rpcb)
+static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct rpcbind_args *rpcb)
{
struct rpc_task *task = req->rq_task;
- struct xdr_stream xdr;
+ __be32 *p;
dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
task->tk_pid, task->tk_msg.rpc_proc->p_name,
rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-
- p = xdr_reserve_space(&xdr, sizeof(__be32) * RPCB_mappingargs_sz);
- if (unlikely(p == NULL))
- return -EIO;
-
- *p++ = htonl(rpcb->r_prog);
- *p++ = htonl(rpcb->r_vers);
- *p++ = htonl(rpcb->r_prot);
- *p = htonl(rpcb->r_port);
-
- return 0;
+ p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2);
+ *p++ = cpu_to_be32(rpcb->r_prog);
+ *p++ = cpu_to_be32(rpcb->r_vers);
+ *p++ = cpu_to_be32(rpcb->r_prot);
+ *p = cpu_to_be32(rpcb->r_port);
}
-static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
struct rpcbind_args *rpcb)
{
struct rpc_task *task = req->rq_task;
- struct xdr_stream xdr;
unsigned long port;
-
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ __be32 *p;
rpcb->r_port = 0;
- p = xdr_inline_decode(&xdr, sizeof(__be32));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
- port = ntohl(*p);
+ port = be32_to_cpup(p);
dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
task->tk_msg.rpc_proc->p_name, port);
if (unlikely(port > USHRT_MAX))
@@ -742,20 +729,18 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
return 0;
}
-static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
unsigned int *boolp)
{
struct rpc_task *task = req->rq_task;
- struct xdr_stream xdr;
-
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ __be32 *p;
- p = xdr_inline_decode(&xdr, sizeof(__be32));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -EIO;
*boolp = 0;
- if (*p)
+ if (*p != xdr_zero)
*boolp = 1;
dprintk("RPC: %5u RPCB_%s call %s\n",
@@ -764,73 +749,53 @@ static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p,
return 0;
}
-static int encode_rpcb_string(struct xdr_stream *xdr, const char *string,
- const u32 maxstrlen)
+static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
+ const u32 maxstrlen)
{
- u32 len;
__be32 *p;
+ u32 len;
- if (unlikely(string == NULL))
- return -EIO;
len = strlen(string);
- if (unlikely(len > maxstrlen))
- return -EIO;
-
- p = xdr_reserve_space(xdr, sizeof(__be32) + len);
- if (unlikely(p == NULL))
- return -EIO;
+ BUG_ON(len > maxstrlen);
+ p = xdr_reserve_space(xdr, 4 + len);
xdr_encode_opaque(p, string, len);
-
- return 0;
}
-static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p,
- const struct rpcbind_args *rpcb)
+static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const struct rpcbind_args *rpcb)
{
struct rpc_task *task = req->rq_task;
- struct xdr_stream xdr;
+ __be32 *p;
dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
task->tk_pid, task->tk_msg.rpc_proc->p_name,
rpcb->r_prog, rpcb->r_vers,
rpcb->r_netid, rpcb->r_addr);
- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-
- p = xdr_reserve_space(&xdr,
- sizeof(__be32) * (RPCB_program_sz + RPCB_version_sz));
- if (unlikely(p == NULL))
- return -EIO;
- *p++ = htonl(rpcb->r_prog);
- *p = htonl(rpcb->r_vers);
-
- if (encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN))
- return -EIO;
- if (encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN))
- return -EIO;
- if (encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN))
- return -EIO;
+ p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2);
+ *p++ = cpu_to_be32(rpcb->r_prog);
+ *p = cpu_to_be32(rpcb->r_vers);
- return 0;
+ encode_rpcb_string(xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN);
+ encode_rpcb_string(xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN);
+ encode_rpcb_string(xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
}
-static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
struct rpcbind_args *rpcb)
{
struct sockaddr_storage address;
struct sockaddr *sap = (struct sockaddr *)&address;
struct rpc_task *task = req->rq_task;
- struct xdr_stream xdr;
+ __be32 *p;
u32 len;
rpcb->r_port = 0;
- xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
- p = xdr_inline_decode(&xdr, sizeof(__be32));
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_fail;
- len = ntohl(*p);
+ len = be32_to_cpup(p);
/*
* If the returned universal address is a null string,
@@ -845,7 +810,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
if (unlikely(len > RPCBIND_MAXUADDRLEN))
goto out_fail;
- p = xdr_inline_decode(&xdr, len);
+ p = xdr_inline_decode(xdr, len);
if (unlikely(p == NULL))
goto out_fail;
dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid,
@@ -871,8 +836,8 @@ out_fail:
static struct rpc_procinfo rpcb_procedures2[] = {
[RPCBPROC_SET] = {
.p_proc = RPCBPROC_SET,
- .p_encode = (kxdrproc_t)rpcb_enc_mapping,
- .p_decode = (kxdrproc_t)rpcb_dec_set,
+ .p_encode = (kxdreproc_t)rpcb_enc_mapping,
+ .p_decode = (kxdrdproc_t)rpcb_dec_set,
.p_arglen = RPCB_mappingargs_sz,
.p_replen = RPCB_setres_sz,
.p_statidx = RPCBPROC_SET,
@@ -881,8 +846,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
},
[RPCBPROC_UNSET] = {
.p_proc = RPCBPROC_UNSET,
- .p_encode = (kxdrproc_t)rpcb_enc_mapping,
- .p_decode = (kxdrproc_t)rpcb_dec_set,
+ .p_encode = (kxdreproc_t)rpcb_enc_mapping,
+ .p_decode = (kxdrdproc_t)rpcb_dec_set,
.p_arglen = RPCB_mappingargs_sz,
.p_replen = RPCB_setres_sz,
.p_statidx = RPCBPROC_UNSET,
@@ -891,8 +856,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
},
[RPCBPROC_GETPORT] = {
.p_proc = RPCBPROC_GETPORT,
- .p_encode = (kxdrproc_t)rpcb_enc_mapping,
- .p_decode = (kxdrproc_t)rpcb_dec_getport,
+ .p_encode = (kxdreproc_t)rpcb_enc_mapping,
+ .p_decode = (kxdrdproc_t)rpcb_dec_getport,
.p_arglen = RPCB_mappingargs_sz,
.p_replen = RPCB_getportres_sz,
.p_statidx = RPCBPROC_GETPORT,
@@ -904,8 +869,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
static struct rpc_procinfo rpcb_procedures3[] = {
[RPCBPROC_SET] = {
.p_proc = RPCBPROC_SET,
- .p_encode = (kxdrproc_t)rpcb_enc_getaddr,
- .p_decode = (kxdrproc_t)rpcb_dec_set,
+ .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
+ .p_decode = (kxdrdproc_t)rpcb_dec_set,
.p_arglen = RPCB_getaddrargs_sz,
.p_replen = RPCB_setres_sz,
.p_statidx = RPCBPROC_SET,
@@ -914,8 +879,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
},
[RPCBPROC_UNSET] = {
.p_proc = RPCBPROC_UNSET,
- .p_encode = (kxdrproc_t)rpcb_enc_getaddr,
- .p_decode = (kxdrproc_t)rpcb_dec_set,
+ .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
+ .p_decode = (kxdrdproc_t)rpcb_dec_set,
.p_arglen = RPCB_getaddrargs_sz,
.p_replen = RPCB_setres_sz,
.p_statidx = RPCBPROC_UNSET,
@@ -924,8 +889,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
},
[RPCBPROC_GETADDR] = {
.p_proc = RPCBPROC_GETADDR,
- .p_encode = (kxdrproc_t)rpcb_enc_getaddr,
- .p_decode = (kxdrproc_t)rpcb_dec_getaddr,
+ .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
+ .p_decode = (kxdrdproc_t)rpcb_dec_getaddr,
.p_arglen = RPCB_getaddrargs_sz,
.p_replen = RPCB_getaddrres_sz,
.p_statidx = RPCBPROC_GETADDR,
@@ -937,8 +902,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
static struct rpc_procinfo rpcb_procedures4[] = {
[RPCBPROC_SET] = {
.p_proc = RPCBPROC_SET,
- .p_encode = (kxdrproc_t)rpcb_enc_getaddr,
- .p_decode = (kxdrproc_t)rpcb_dec_set,
+ .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
+ .p_decode = (kxdrdproc_t)rpcb_dec_set,
.p_arglen = RPCB_getaddrargs_sz,
.p_replen = RPCB_setres_sz,
.p_statidx = RPCBPROC_SET,
@@ -947,8 +912,8 @@ static struct rpc_procinfo rpcb_procedures4[] = {
},
[RPCBPROC_UNSET] = {
.p_proc = RPCBPROC_UNSET,
- .p_encode = (kxdrproc_t)rpcb_enc_getaddr,
- .p_decode = (kxdrproc_t)rpcb_dec_set,
+ .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
+ .p_decode = (kxdrdproc_t)rpcb_dec_set,
.p_arglen = RPCB_getaddrargs_sz,
.p_replen = RPCB_setres_sz,
.p_statidx = RPCBPROC_UNSET,
@@ -957,8 +922,8 @@ static struct rpc_procinfo rpcb_procedures4[] = {
},
[RPCBPROC_GETADDR] = {
.p_proc = RPCBPROC_GETADDR,
- .p_encode = (kxdrproc_t)rpcb_enc_getaddr,
- .p_decode = (kxdrproc_t)rpcb_dec_getaddr,
+ .p_encode = (kxdreproc_t)rpcb_enc_getaddr,
+ .p_decode = (kxdrdproc_t)rpcb_dec_getaddr,
.p_arglen = RPCB_getaddrargs_sz,
.p_replen = RPCB_getaddrres_sz,
.p_statidx = RPCBPROC_GETADDR,
@@ -993,19 +958,19 @@ static struct rpcb_info rpcb_next_version6[] = {
static struct rpc_version rpcb_version2 = {
.number = RPCBVERS_2,
- .nrprocs = RPCB_HIGHPROC_2,
+ .nrprocs = ARRAY_SIZE(rpcb_procedures2),
.procs = rpcb_procedures2
};
static struct rpc_version rpcb_version3 = {
.number = RPCBVERS_3,
- .nrprocs = RPCB_HIGHPROC_3,
+ .nrprocs = ARRAY_SIZE(rpcb_procedures3),
.procs = rpcb_procedures3
};
static struct rpc_version rpcb_version4 = {
.number = RPCBVERS_4,
- .nrprocs = RPCB_HIGHPROC_4,
+ .nrprocs = ARRAY_SIZE(rpcb_procedures4),
.procs = rpcb_procedures4
};
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6359c42..0e659c6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -488,10 +488,6 @@ svc_destroy(struct svc_serv *serv)
if (svc_serv_is_pooled(serv))
svc_pool_map_put();
-#if defined(CONFIG_NFS_V4_1)
- svc_sock_destroy(serv->bc_xprt);
-#endif /* CONFIG_NFS_V4_1 */
-
svc_unregister(serv);
kfree(serv->sv_pools);
kfree(serv);
@@ -1147,7 +1143,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
dropit:
svc_authorise(rqstp); /* doesn't hurt to call this twice */
dprintk("svc: svc_process dropit\n");
- svc_drop(rqstp);
return 0;
err_short_len:
@@ -1218,7 +1213,6 @@ svc_process(struct svc_rqst *rqstp)
struct kvec *resv = &rqstp->rq_res.head[0];
struct svc_serv *serv = rqstp->rq_server;
u32 dir;
- int error;
/*
* Setup response xdr_buf.
@@ -1246,11 +1240,13 @@ svc_process(struct svc_rqst *rqstp)
return 0;
}
- error = svc_process_common(rqstp, argv, resv);
- if (error <= 0)
- return error;
-
- return svc_send(rqstp);
+ /* Returns 1 for send, 0 for drop */
+ if (svc_process_common(rqstp, argv, resv))
+ return svc_send(rqstp);
+ else {
+ svc_drop(rqstp);
+ return 0;
+ }
}
#if defined(CONFIG_NFS_V4_1)
@@ -1264,10 +1260,9 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
- int error;
/* Build the svc_rqst used by the common processing routine */
- rqstp->rq_xprt = serv->bc_xprt;
+ rqstp->rq_xprt = serv->sv_bc_xprt;
rqstp->rq_xid = req->rq_xid;
rqstp->rq_prot = req->rq_xprt->prot;
rqstp->rq_server = serv;
@@ -1292,12 +1287,15 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
svc_getu32(argv); /* XID */
svc_getnl(argv); /* CALLDIR */
- error = svc_process_common(rqstp, argv, resv);
- if (error <= 0)
- return error;
-
- memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
- return bc_send(req);
+ /* Returns 1 for send, 0 for drop */
+ if (svc_process_common(rqstp, argv, resv)) {
+ memcpy(&req->rq_snd_buf, &rqstp->rq_res,
+ sizeof(req->rq_snd_buf));
+ return bc_send(req);
+ } else {
+ /* Nothing to do to drop request */
+ return 0;
+ }
}
EXPORT_SYMBOL(bc_svc_process);
#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 07919e1..d265aa7 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -66,6 +66,13 @@ static void svc_sock_free(struct svc_xprt *);
static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
+#if defined(CONFIG_NFS_V4_1)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
+ struct net *, struct sockaddr *,
+ int, int);
+static void svc_bc_sock_free(struct svc_xprt *xprt);
+#endif /* CONFIG_NFS_V4_1 */
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key svc_key[2];
static struct lock_class_key svc_slock_key[2];
@@ -1184,6 +1191,57 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
}
+#if defined(CONFIG_NFS_V4_1)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
+ struct net *, struct sockaddr *,
+ int, int);
+static void svc_bc_sock_free(struct svc_xprt *xprt);
+
+static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv,
+ struct net *net,
+ struct sockaddr *sa, int salen,
+ int flags)
+{
+ return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
+}
+
+static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
+{
+}
+
+static struct svc_xprt_ops svc_tcp_bc_ops = {
+ .xpo_create = svc_bc_tcp_create,
+ .xpo_detach = svc_bc_tcp_sock_detach,
+ .xpo_free = svc_bc_sock_free,
+ .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+};
+
+static struct svc_xprt_class svc_tcp_bc_class = {
+ .xcl_name = "tcp-bc",
+ .xcl_owner = THIS_MODULE,
+ .xcl_ops = &svc_tcp_bc_ops,
+ .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+};
+
+static void svc_init_bc_xprt_sock(void)
+{
+ svc_reg_xprt_class(&svc_tcp_bc_class);
+}
+
+static void svc_cleanup_bc_xprt_sock(void)
+{
+ svc_unreg_xprt_class(&svc_tcp_bc_class);
+}
+#else /* CONFIG_NFS_V4_1 */
+static void svc_init_bc_xprt_sock(void)
+{
+}
+
+static void svc_cleanup_bc_xprt_sock(void)
+{
+}
+#endif /* CONFIG_NFS_V4_1 */
+
static struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create,
.xpo_recvfrom = svc_tcp_recvfrom,
@@ -1207,12 +1265,14 @@ void svc_init_xprt_sock(void)
{
svc_reg_xprt_class(&svc_tcp_class);
svc_reg_xprt_class(&svc_udp_class);
+ svc_init_bc_xprt_sock();
}
void svc_cleanup_xprt_sock(void)
{
svc_unreg_xprt_class(&svc_tcp_class);
svc_unreg_xprt_class(&svc_udp_class);
+ svc_cleanup_bc_xprt_sock();
}
static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -1509,41 +1569,45 @@ static void svc_sock_free(struct svc_xprt *xprt)
kfree(svsk);
}
+#if defined(CONFIG_NFS_V4_1)
/*
- * Create a svc_xprt.
- *
- * For internal use only (e.g. nfsv4.1 backchannel).
- * Callers should typically use the xpo_create() method.
+ * Create a back channel svc_xprt which shares the fore channel socket.
*/
-struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
+ int protocol,
+ struct net *net,
+ struct sockaddr *sin, int len,
+ int flags)
{
struct svc_sock *svsk;
- struct svc_xprt *xprt = NULL;
+ struct svc_xprt *xprt;
+
+ if (protocol != IPPROTO_TCP) {
+ printk(KERN_WARNING "svc: only TCP sockets"
+ " supported on shared back channel\n");
+ return ERR_PTR(-EINVAL);
+ }
- dprintk("svc: %s\n", __func__);
svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
if (!svsk)
- goto out;
+ return ERR_PTR(-ENOMEM);
xprt = &svsk->sk_xprt;
- if (prot == IPPROTO_TCP)
- svc_xprt_init(&svc_tcp_class, xprt, serv);
- else if (prot == IPPROTO_UDP)
- svc_xprt_init(&svc_udp_class, xprt, serv);
- else
- BUG();
-out:
- dprintk("svc: %s return %p\n", __func__, xprt);
+ svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+
+ serv->sv_bc_xprt = xprt;
+
return xprt;
}
-EXPORT_SYMBOL_GPL(svc_sock_create);
/*
- * Destroy a svc_sock.
+ * Free a back channel svc_sock.
*/
-void svc_sock_destroy(struct svc_xprt *xprt)
+static void svc_bc_sock_free(struct svc_xprt *xprt)
{
- if (xprt)
+ if (xprt) {
+ kfree(xprt->xpt_bc_sid);
kfree(container_of(xprt, struct svc_sock, sk_xprt));
+ }
}
-EXPORT_SYMBOL_GPL(svc_sock_destroy);
+#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index cd9e841..679cd67 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
}
EXPORT_SYMBOL_GPL(xdr_write_pages);
+static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
+ __be32 *p, unsigned int len)
+{
+ if (len > iov->iov_len)
+ len = iov->iov_len;
+ if (p == NULL)
+ p = (__be32*)iov->iov_base;
+ xdr->p = p;
+ xdr->end = (__be32*)(iov->iov_base + len);
+ xdr->iov = iov;
+ xdr->page_ptr = NULL;
+}
+
+static int xdr_set_page_base(struct xdr_stream *xdr,
+ unsigned int base, unsigned int len)
+{
+ unsigned int pgnr;
+ unsigned int maxlen;
+ unsigned int pgoff;
+ unsigned int pgend;
+ void *kaddr;
+
+ maxlen = xdr->buf->page_len;
+ if (base >= maxlen)
+ return -EINVAL;
+ maxlen -= base;
+ if (len > maxlen)
+ len = maxlen;
+
+ base += xdr->buf->page_base;
+
+ pgnr = base >> PAGE_SHIFT;
+ xdr->page_ptr = &xdr->buf->pages[pgnr];
+ kaddr = page_address(*xdr->page_ptr);
+
+ pgoff = base & ~PAGE_MASK;
+ xdr->p = (__be32*)(kaddr + pgoff);
+
+ pgend = pgoff + len;
+ if (pgend > PAGE_SIZE)
+ pgend = PAGE_SIZE;
+ xdr->end = (__be32*)(kaddr + pgend);
+ xdr->iov = NULL;
+ return 0;
+}
+
+static void xdr_set_next_page(struct xdr_stream *xdr)
+{
+ unsigned int newbase;
+
+ newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
+ newbase -= xdr->buf->page_base;
+
+ if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
+ xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+}
+
+static bool xdr_set_next_buffer(struct xdr_stream *xdr)
+{
+ if (xdr->page_ptr != NULL)
+ xdr_set_next_page(xdr);
+ else if (xdr->iov == xdr->buf->head) {
+ if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
+ xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+ }
+ return xdr->p != xdr->end;
+}
+
/**
* xdr_init_decode - Initialize an xdr_stream for decoding data.
* @xdr: pointer to xdr_stream struct
@@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages);
*/
void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
{
- struct kvec *iov = buf->head;
- unsigned int len = iov->iov_len;
-
- if (len > buf->len)
- len = buf->len;
xdr->buf = buf;
- xdr->iov = iov;
- xdr->p = p;
- xdr->end = (__be32 *)((char *)iov->iov_base + len);
+ xdr->scratch.iov_base = NULL;
+ xdr->scratch.iov_len = 0;
+ if (buf->head[0].iov_len != 0)
+ xdr_set_iov(xdr, buf->head, p, buf->len);
+ else if (buf->page_len != 0)
+ xdr_set_page_base(xdr, 0, buf->len);
}
EXPORT_SYMBOL_GPL(xdr_init_decode);
-/**
- * xdr_inline_peek - Allow read-ahead in the XDR data stream
- * @xdr: pointer to xdr_stream struct
- * @nbytes: number of bytes of data to decode
- *
- * Check if the input buffer is long enough to enable us to decode
- * 'nbytes' more bytes of data starting at the current position.
- * If so return the current pointer without updating the current
- * pointer position.
- */
-__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
+static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p = xdr->p;
__be32 *q = p + XDR_QUADLEN(nbytes);
if (unlikely(q > xdr->end || q < p))
return NULL;
+ xdr->p = q;
return p;
}
-EXPORT_SYMBOL_GPL(xdr_inline_peek);
/**
- * xdr_inline_decode - Retrieve non-page XDR data to decode
+ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to an empty buffer
+ * @buflen: size of 'buf'
+ *
+ * The scratch buffer is used when decoding from an array of pages.
+ * If an xdr_inline_decode() call spans across page boundaries, then
+ * we copy the data into the scratch buffer in order to allow linear
+ * access.
+ */
+void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
+{
+ xdr->scratch.iov_base = buf;
+ xdr->scratch.iov_len = buflen;
+}
+EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
+
+static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
+{
+ __be32 *p;
+ void *cpdest = xdr->scratch.iov_base;
+ size_t cplen = (char *)xdr->end - (char *)xdr->p;
+
+ if (nbytes > xdr->scratch.iov_len)
+ return NULL;
+ memcpy(cpdest, xdr->p, cplen);
+ cpdest += cplen;
+ nbytes -= cplen;
+ if (!xdr_set_next_buffer(xdr))
+ return NULL;
+ p = __xdr_inline_decode(xdr, nbytes);
+ if (p == NULL)
+ return NULL;
+ memcpy(cpdest, p, nbytes);
+ return xdr->scratch.iov_base;
+}
+
+/**
+ * xdr_inline_decode - Retrieve XDR data to decode
* @xdr: pointer to xdr_stream struct
* @nbytes: number of bytes of data to decode
*
@@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek);
*/
__be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
- __be32 *p = xdr->p;
- __be32 *q = p + XDR_QUADLEN(nbytes);
+ __be32 *p;
- if (unlikely(q > xdr->end || q < p))
+ if (nbytes == 0)
+ return xdr->p;
+ if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
return NULL;
- xdr->p = q;
- return p;
+ p = __xdr_inline_decode(xdr, nbytes);
+ if (p != NULL)
+ return p;
+ return xdr_copy_to_scratch(xdr, nbytes);
}
EXPORT_SYMBOL_GPL(xdr_inline_decode);
@@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
*/
void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
{
- char * kaddr = page_address(xdr->buf->pages[0]);
xdr_read_pages(xdr, len);
/*
* Position current pointer at beginning of tail, and
* set remaining message length.
*/
- if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
- len = PAGE_CACHE_SIZE - xdr->buf->page_base;
- xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
- xdr->end = (__be32 *)((char *)xdr->p + len);
+ xdr_set_page_base(xdr, 0, len);
}
EXPORT_SYMBOL_GPL(xdr_enter_page);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8eb8895..d5e1e0b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -26,6 +26,7 @@
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/netlink.h>
+#include <net/ah.h>
#include <asm/uaccess.h>
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
#include <linux/in6.h>
@@ -302,7 +303,8 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props,
algo = xfrm_aalg_get_byname(ualg->alg_name, 1);
if (!algo)
return -ENOSYS;
- if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits)
+ if ((ualg->alg_trunc_len / 8) > MAX_AH_AUTH_LEN ||
+ ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits)
return -EINVAL;
*props = algo->desc.sadb_alg_id;
@@ -2187,7 +2189,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
- (nlh->nlmsg_flags & NLM_F_DUMP)) {
+ (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
if (link->dump == NULL)
return -EINVAL;
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1b9b13e..2b5387d 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -227,7 +227,7 @@ ifndef PERF_DEBUG
CFLAGS_OPTIMIZE = -O6
endif
-CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
EXTLIBS = -lpthread -lrt -lelf -lm
ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
ALL_LDFLAGS = $(LDFLAGS)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bc0490..7069bd3 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -331,6 +331,9 @@ try_again:
else if (err == ENODEV && cpu_list) {
die("No such device - did you specify"
" an out-of-range profile CPU?\n");
+ } else if (err == ENOENT) {
+ die("%s event is not supported. ",
+ event_name(evsel));
} else if (err == EINVAL && sample_id_all_avail) {
/*
* Old kernel, no attr->sample_id_type_all field
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 7a4ebeb..abd4b84 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -489,7 +489,8 @@ static void create_tasks(void)
err = pthread_attr_init(&attr);
BUG_ON(err);
- err = pthread_attr_setstacksize(&attr, (size_t)(16*1024));
+ err = pthread_attr_setstacksize(&attr,
+ (size_t) max(16 * 1024, PTHREAD_STACK_MIN));
BUG_ON(err);
err = pthread_mutex_lock(&start_work_mutex);
BUG_ON(err);
@@ -1861,7 +1862,7 @@ static int __cmd_record(int argc, const char **argv)
rec_argc = ARRAY_SIZE(record_args) + argc - 1;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- if (rec_argv)
+ if (rec_argv == NULL)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(record_args); i++)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 02b2d80..c385a63 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -316,6 +316,8 @@ static int run_perf_stat(int argc __used, const char **argv)
"\t Consider tweaking"
" /proc/sys/kernel/perf_event_paranoid or running as root.",
system_wide ? "system-wide " : "");
+ } else if (errno == ENOENT) {
+ error("%s event is not supported. ", event_name(counter));
} else {
error("open_counter returned with %d (%s). "
"/bin/dmesg may provide additional information.\n",
@@ -683,8 +685,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
nr_counters = ARRAY_SIZE(default_attrs);
for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
- pos = perf_evsel__new(default_attrs[c].type,
- default_attrs[c].config,
+ pos = perf_evsel__new(&default_attrs[c],
nr_counters);
if (pos == NULL)
goto out;
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 1c98434..ed56961 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -234,6 +234,7 @@ out:
return err;
}
+#include "util/cpumap.h"
#include "util/evsel.h"
#include <sys/types.h>
@@ -264,6 +265,7 @@ static int test__open_syscall_event(void)
int err = -1, fd;
struct thread_map *threads;
struct perf_evsel *evsel;
+ struct perf_event_attr attr;
unsigned int nr_open_calls = 111, i;
int id = trace_event__id("sys_enter_open");
@@ -278,7 +280,10 @@ static int test__open_syscall_event(void)
return -1;
}
- evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0);
+ memset(&attr, 0, sizeof(attr));
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.config = id;
+ evsel = perf_evsel__new(&attr, 0);
if (evsel == NULL) {
pr_debug("perf_evsel__new\n");
goto out_thread_map_delete;
@@ -317,6 +322,111 @@ out_thread_map_delete:
return err;
}
+#include <sched.h>
+
+static int test__open_syscall_event_on_all_cpus(void)
+{
+ int err = -1, fd, cpu;
+ struct thread_map *threads;
+ struct cpu_map *cpus;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr;
+ unsigned int nr_open_calls = 111, i;
+ cpu_set_t *cpu_set;
+ size_t cpu_set_size;
+ int id = trace_event__id("sys_enter_open");
+
+ if (id < 0) {
+ pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+ return -1;
+ }
+
+ threads = thread_map__new(-1, getpid());
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ cpus = cpu_map__new(NULL);
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ cpu_set = CPU_ALLOC(cpus->nr);
+
+ if (cpu_set == NULL)
+ goto out_thread_map_delete;
+
+ cpu_set_size = CPU_ALLOC_SIZE(cpus->nr);
+ CPU_ZERO_S(cpu_set_size, cpu_set);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.config = id;
+ evsel = perf_evsel__new(&attr, 0);
+ if (evsel == NULL) {
+ pr_debug("perf_evsel__new\n");
+ goto out_cpu_free;
+ }
+
+ if (perf_evsel__open(evsel, cpus, threads) < 0) {
+ pr_debug("failed to open counter: %s, "
+ "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+ strerror(errno));
+ goto out_evsel_delete;
+ }
+
+ for (cpu = 0; cpu < cpus->nr; ++cpu) {
+ unsigned int ncalls = nr_open_calls + cpu;
+
+ CPU_SET(cpu, cpu_set);
+ sched_setaffinity(0, cpu_set_size, cpu_set);
+ for (i = 0; i < ncalls; ++i) {
+ fd = open("/etc/passwd", O_RDONLY);
+ close(fd);
+ }
+ CPU_CLR(cpu, cpu_set);
+ }
+
+ /*
+ * Here we need to explicitely preallocate the counts, as if
+ * we use the auto allocation it will allocate just for 1 cpu,
+ * as we start by cpu 0.
+ */
+ if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) {
+ pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+ goto out_close_fd;
+ }
+
+ for (cpu = 0; cpu < cpus->nr; ++cpu) {
+ unsigned int expected;
+
+ if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+ pr_debug("perf_evsel__open_read_on_cpu\n");
+ goto out_close_fd;
+ }
+
+ expected = nr_open_calls + cpu;
+ if (evsel->counts->cpu[cpu].val != expected) {
+ pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n",
+ expected, cpu, evsel->counts->cpu[cpu].val);
+ goto out_close_fd;
+ }
+ }
+
+ err = 0;
+out_close_fd:
+ perf_evsel__close_fd(evsel, 1, threads->nr);
+out_evsel_delete:
+ perf_evsel__delete(evsel);
+out_cpu_free:
+ CPU_FREE(cpu_set);
+out_thread_map_delete:
+ thread_map__delete(threads);
+ return err;
+}
+
static struct test {
const char *desc;
int (*func)(void);
@@ -330,6 +440,10 @@ static struct test {
.func = test__open_syscall_event,
},
{
+ .desc = "detect open syscall event on all cpus",
+ .func = test__open_syscall_event_on_all_cpus,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1e67ab9..6ce4042 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1247,6 +1247,8 @@ try_again:
die("Permission error - are you root?\n"
"\t Consider tweaking"
" /proc/sys/kernel/perf_event_paranoid.\n");
+ if (err == ENOENT)
+ die("%s event is not supported. ", event_name(evsel));
/*
* If it's cycles then fall back to hrtimer
* based cpu-clock-tick sw counter, which
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c95267e..f5cfed6 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -6,14 +6,13 @@
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
+struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
{
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
if (evsel != NULL) {
evsel->idx = idx;
- evsel->attr.type = type;
- evsel->attr.config = config;
+ evsel->attr = *attr;
INIT_LIST_HEAD(&evsel->node);
}
@@ -128,59 +127,75 @@ int __perf_evsel__read(struct perf_evsel *evsel,
return 0;
}
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
+static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads)
{
- int cpu;
+ int cpu, thread;
- if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0)
+ if (evsel->fd == NULL &&
+ perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
return -1;
for (cpu = 0; cpu < cpus->nr; cpu++) {
- FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1,
- cpus->map[cpu], -1, 0);
- if (FD(evsel, cpu, 0) < 0)
- goto out_close;
+ for (thread = 0; thread < threads->nr; thread++) {
+ FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
+ threads->map[thread],
+ cpus->map[cpu], -1, 0);
+ if (FD(evsel, cpu, thread) < 0)
+ goto out_close;
+ }
}
return 0;
out_close:
- while (--cpu >= 0) {
- close(FD(evsel, cpu, 0));
- FD(evsel, cpu, 0) = -1;
- }
+ do {
+ while (--thread >= 0) {
+ close(FD(evsel, cpu, thread));
+ FD(evsel, cpu, thread) = -1;
+ }
+ thread = threads->nr;
+ } while (--cpu >= 0);
return -1;
}
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+static struct {
+ struct cpu_map map;
+ int cpus[1];
+} empty_cpu_map = {
+ .map.nr = 1,
+ .cpus = { -1, },
+};
+
+static struct {
+ struct thread_map map;
+ int threads[1];
+} empty_thread_map = {
+ .map.nr = 1,
+ .threads = { -1, },
+};
+
+int perf_evsel__open(struct perf_evsel *evsel,
+ struct cpu_map *cpus, struct thread_map *threads)
{
- int thread;
-
- if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr))
- return -1;
- for (thread = 0; thread < threads->nr; thread++) {
- FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr,
- threads->map[thread], -1, -1, 0);
- if (FD(evsel, 0, thread) < 0)
- goto out_close;
+ if (cpus == NULL) {
+ /* Work around old compiler warnings about strict aliasing */
+ cpus = &empty_cpu_map.map;
}
- return 0;
+ if (threads == NULL)
+ threads = &empty_thread_map.map;
-out_close:
- while (--thread >= 0) {
- close(FD(evsel, 0, thread));
- FD(evsel, 0, thread) = -1;
- }
- return -1;
+ return __perf_evsel__open(evsel, cpus, threads);
}
-int perf_evsel__open(struct perf_evsel *evsel,
- struct cpu_map *cpus, struct thread_map *threads)
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
{
- if (threads == NULL)
- return perf_evsel__open_per_cpu(evsel, cpus);
+ return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
+}
- return perf_evsel__open_per_thread(evsel, threads);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+{
+ return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a0ccd69..b2d755f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -37,7 +37,7 @@ struct perf_evsel {
struct cpu_map;
struct thread_map;
-struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
+struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
void perf_evsel__delete(struct perf_evsel *evsel);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 649083f..5cb6f4b 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -490,6 +490,31 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
return EVT_HANDLED_ALL;
}
+static int store_event_type(const char *orgname)
+{
+ char filename[PATH_MAX], *c;
+ FILE *file;
+ int id, n;
+
+ sprintf(filename, "%s/", debugfs_path);
+ strncat(filename, orgname, strlen(orgname));
+ strcat(filename, "/id");
+
+ c = strchr(filename, ':');
+ if (c)
+ *c = '/';
+
+ file = fopen(filename, "r");
+ if (!file)
+ return 0;
+ n = fscanf(file, "%i", &id);
+ fclose(file);
+ if (n < 1) {
+ pr_err("cannot store event ID\n");
+ return -EINVAL;
+ }
+ return perf_header__push_event(id, orgname);
+}
static enum event_result parse_tracepoint_event(const char **strp,
struct perf_event_attr *attr)
@@ -533,9 +558,13 @@ static enum event_result parse_tracepoint_event(const char **strp,
*strp += strlen(sys_name) + evt_length;
return parse_multiple_tracepoint_event(sys_name, evt_name,
flags);
- } else
+ } else {
+ if (store_event_type(evt_name) < 0)
+ return EVT_FAILED;
+
return parse_single_tracepoint_event(sys_name, evt_name,
evt_length, attr, strp);
+ }
}
static enum event_result
@@ -778,41 +807,11 @@ modifier:
return ret;
}
-static int store_event_type(const char *orgname)
-{
- char filename[PATH_MAX], *c;
- FILE *file;
- int id, n;
-
- sprintf(filename, "%s/", debugfs_path);
- strncat(filename, orgname, strlen(orgname));
- strcat(filename, "/id");
-
- c = strchr(filename, ':');
- if (c)
- *c = '/';
-
- file = fopen(filename, "r");
- if (!file)
- return 0;
- n = fscanf(file, "%i", &id);
- fclose(file);
- if (n < 1) {
- pr_err("cannot store event ID\n");
- return -EINVAL;
- }
- return perf_header__push_event(id, orgname);
-}
-
int parse_events(const struct option *opt __used, const char *str, int unset __used)
{
struct perf_event_attr attr;
enum event_result ret;
- if (strchr(str, ':'))
- if (store_event_type(str) < 0)
- return -1;
-
for (;;) {
memset(&attr, 0, sizeof(attr));
ret = parse_event_symbols(&str, &attr);
@@ -824,7 +823,7 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
if (ret != EVT_HANDLED_ALL) {
struct perf_evsel *evsel;
- evsel = perf_evsel__new(attr.type, attr.config,
+ evsel = perf_evsel__new(&attr,
nr_counters);
if (evsel == NULL)
return -1;
@@ -1014,8 +1013,15 @@ void print_events(void)
int perf_evsel_list__create_default(void)
{
- struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE,
- PERF_COUNT_HW_CPU_CYCLES, 0);
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+
+ evsel = perf_evsel__new(&attr, 0);
+
if (evsel == NULL)
return -ENOMEM;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6fb4694..313dac2 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1007,7 +1007,7 @@ more:
if (size == 0)
size = 8;
- if (head + event->header.size >= mmap_size) {
+ if (head + event->header.size > mmap_size) {
if (mmaps[map_idx]) {
munmap(mmaps[map_idx], mmap_size);
mmaps[map_idx] = NULL;
OpenPOWER on IntegriCloud