diff options
107 files changed, 4606 insertions, 2216 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index e833c8c..d2af87b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -535,3 +535,13 @@ Why: Hareware scan is the prefer method for iwlwifi devices for Who: Wey-Yi Guy <wey-yi.w.guy@intel.com> ---------------------------- + +What: access to nfsd auth cache through sys_nfsservctl or '.' files + in the 'nfsd' filesystem. +When: 2.6.40 +Why: This is a legacy interface which have been replaced by a more + dynamic cache. Continuing to maintain this interface is an + unnecessary burden. +Who: NeilBrown <neilb@suse.de> + +---------------------------- diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index 3225a56..a57e124 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX @@ -12,6 +12,8 @@ nfs-rdma.txt - how to install and setup the Linux NFS/RDMA client and server software nfsroot.txt - short guide on setting up a diskless box with NFS root filesystem. +pnfs.txt + - short explanation of some of the internals of the pnfs client code rpc-cache.txt - introduction to the caching mechanisms in the sunrpc layer. idmapper.txt diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt new file mode 100644 index 0000000..bc0b9cf --- /dev/null +++ b/Documentation/filesystems/nfs/pnfs.txt @@ -0,0 +1,48 @@ +Reference counting in pnfs: +========================== + +The are several inter-related caches. We have layouts which can +reference multiple devices, each of which can reference multiple data servers. +Each data server can be referenced by multiple devices. Each device +can be referenced by multiple layouts. To keep all of this straight, +we need to reference count. + + +struct pnfs_layout_hdr +---------------------- +The on-the-wire command LAYOUTGET corresponds to struct +pnfs_layout_segment, usually referred to by the variable name lseg. +Each nfs_inode may hold a pointer to a cache of of these layout +segments in nfsi->layout, of type struct pnfs_layout_hdr. + +We reference the header for the inode pointing to it, across each +outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN, +LAYOUTCOMMIT), and for each lseg held within. + +Each header is also (when non-empty) put on a list associated with +struct nfs_client (cl_layouts). Being put on this list does not bump +the reference count, as the layout is kept around by the lseg that +keeps it in the list. + +deviceid_cache +-------------- +lsegs reference device ids, which are resolved per nfs_client and +layout driver type. The device ids are held in a RCU cache (struct +nfs4_deviceid_cache). The cache itself is referenced across each +mount. The entries (struct nfs4_deviceid) themselves are held across +the lifetime of each lseg referencing them. + +RCU is used because the deviceid is basically a write once, read many +data structure. The hlist size of 32 buckets needs better +justification, but seems reasonable given that we can have multiple +deviceid's per filesystem, and multiple filesystems per nfs_client. + +The hash code is copied from the nfsd code base. A discussion of +hashing and variations of this algorithm can be found at: +http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809 + +data server cache +----------------- +file driver devices refer to data servers, which are kept in a module +level cache. Its reference is held over the lifetime of the deviceid +pointing to it. diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c index bb78c15..c9e5d72 100644 --- a/arch/arm/plat-omap/fb.c +++ b/arch/arm/plat-omap/fb.c @@ -96,7 +96,7 @@ static int fbmem_region_reserved(unsigned long start, size_t size) * Get the region_idx`th region from board config/ATAG and convert it to * our internal format. */ -static int get_fbmem_region(int region_idx, struct omapfb_mem_region *rg) +static int __init get_fbmem_region(int region_idx, struct omapfb_mem_region *rg) { const struct omap_fbmem_config *conf; u32 paddr; @@ -128,7 +128,7 @@ static int set_fbmem_region_type(struct omapfb_mem_region *rg, int mem_type, * type = 0 && paddr = 0, a default don't care case maps to * the SDRAM type. */ - if (rg->type || (!rg->type && !rg->paddr)) + if (rg->type || !rg->paddr) return 0; if (ranges_overlap(rg->paddr, rg->size, mem_start, mem_size)) { rg->type = mem_type; @@ -260,7 +260,7 @@ void __init omapfb_reserve_sdram_memblock(void) * this point, since the driver built as a module would have problem with * freeing / reallocating the regions. */ -unsigned long omapfb_reserve_sram(unsigned long sram_pstart, +unsigned long __init omapfb_reserve_sram(unsigned long sram_pstart, unsigned long sram_vstart, unsigned long sram_size, unsigned long pstart_avail, @@ -334,7 +334,7 @@ void omapfb_set_ctrl_platform_data(void *data) omapfb_config.ctrl_platform_data = data; } -static inline int omap_init_fb(void) +static int __init omap_init_fb(void) { const struct omap_lcd_config *conf; @@ -379,7 +379,7 @@ void omapfb_set_platform_data(struct omapfb_platform_data *data) omapfb_config = *data; } -static inline int omap_init_fb(void) +static int __init omap_init_fb(void) { return platform_device_register(&omap_fb_device); } @@ -390,7 +390,7 @@ void omapfb_reserve_sdram_memblock(void) { } -unsigned long omapfb_reserve_sram(unsigned long sram_pstart, +unsigned long __init omapfb_reserve_sram(unsigned long sram_pstart, unsigned long sram_vstart, unsigned long sram_size, unsigned long start_avail, @@ -409,7 +409,7 @@ void omapfb_reserve_sdram_memblock(void) { } -unsigned long omapfb_reserve_sram(unsigned long sram_pstart, +unsigned long __init omapfb_reserve_sram(unsigned long sram_pstart, unsigned long sram_vstart, unsigned long sram_size, unsigned long start_avail, diff --git a/arch/arm/plat-omap/include/plat/display.h b/arch/arm/plat-omap/include/plat/display.h index 8bd15bd..c915a66 100644 --- a/arch/arm/plat-omap/include/plat/display.h +++ b/arch/arm/plat-omap/include/plat/display.h @@ -81,37 +81,6 @@ enum omap_color_mode { OMAP_DSS_COLOR_ARGB32 = 1 << 11, /* ARGB32 */ OMAP_DSS_COLOR_RGBA32 = 1 << 12, /* RGBA32 */ OMAP_DSS_COLOR_RGBX32 = 1 << 13, /* RGBx32 */ - - OMAP_DSS_COLOR_GFX_OMAP2 = - OMAP_DSS_COLOR_CLUT1 | OMAP_DSS_COLOR_CLUT2 | - OMAP_DSS_COLOR_CLUT4 | OMAP_DSS_COLOR_CLUT8 | - OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_RGB16 | - OMAP_DSS_COLOR_RGB24U | OMAP_DSS_COLOR_RGB24P, - - OMAP_DSS_COLOR_VID_OMAP2 = - OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U | - OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 | - OMAP_DSS_COLOR_UYVY, - - OMAP_DSS_COLOR_GFX_OMAP3 = - OMAP_DSS_COLOR_CLUT1 | OMAP_DSS_COLOR_CLUT2 | - OMAP_DSS_COLOR_CLUT4 | OMAP_DSS_COLOR_CLUT8 | - OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_ARGB16 | - OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U | - OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_ARGB32 | - OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32, - - OMAP_DSS_COLOR_VID1_OMAP3 = - OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_RGB16 | - OMAP_DSS_COLOR_RGB24U | OMAP_DSS_COLOR_RGB24P | - OMAP_DSS_COLOR_YUV2 | OMAP_DSS_COLOR_UYVY, - - OMAP_DSS_COLOR_VID2_OMAP3 = - OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_ARGB16 | - OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U | - OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 | - OMAP_DSS_COLOR_UYVY | OMAP_DSS_COLOR_ARGB32 | - OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32, }; enum omap_lcd_display_type { diff --git a/arch/arm/plat-omap/include/plat/vrfb.h b/arch/arm/plat-omap/include/plat/vrfb.h index d8a03ce..3792bde 100644 --- a/arch/arm/plat-omap/include/plat/vrfb.h +++ b/arch/arm/plat-omap/include/plat/vrfb.h @@ -35,6 +35,7 @@ struct vrfb { bool yuv_mode; }; +#ifdef CONFIG_OMAP2_VRFB extern int omap_vrfb_request_ctx(struct vrfb *vrfb); extern void omap_vrfb_release_ctx(struct vrfb *vrfb); extern void omap_vrfb_adjust_size(u16 *width, u16 *height, @@ -47,4 +48,19 @@ extern void omap_vrfb_setup(struct vrfb *vrfb, unsigned long paddr, extern int omap_vrfb_map_angle(struct vrfb *vrfb, u16 height, u8 rot); extern void omap_vrfb_restore_context(void); +#else +static inline int omap_vrfb_request_ctx(struct vrfb *vrfb) { return 0; } +static inline void omap_vrfb_release_ctx(struct vrfb *vrfb) {} +static inline void omap_vrfb_adjust_size(u16 *width, u16 *height, + u8 bytespp) {} +static inline u32 omap_vrfb_min_phys_size(u16 width, u16 height, u8 bytespp) + { return 0; } +static inline u16 omap_vrfb_max_height(u32 phys_size, u16 width, u8 bytespp) + { return 0; } +static inline void omap_vrfb_setup(struct vrfb *vrfb, unsigned long paddr, + u16 width, u16 height, unsigned bytespp, bool yuv_mode) {} +static inline int omap_vrfb_map_angle(struct vrfb *vrfb, u16 height, u8 rot) + { return 0; } +static inline void omap_vrfb_restore_context(void) {} +#endif #endif /* __VRFB_H */ diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index 733093d..141abeb 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -393,7 +393,7 @@ static struct cpufreq_driver nforce2_driver = { * Detects nForce2 A2 and C1 stepping * */ -static unsigned int nforce2_detect_chipset(void) +static int nforce2_detect_chipset(void) { nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index fc09f14..d9f5136 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -35,7 +35,7 @@ static unsigned int longrun_low_freq, longrun_high_freq; * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS * and MSR_TMTA_LONGRUN_CTRL */ -static void __init longrun_get_policy(struct cpufreq_policy *policy) +static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy) { u32 msr_lo, msr_hi; @@ -165,7 +165,7 @@ static unsigned int longrun_get(unsigned int cpu) * TMTA rules: * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) */ -static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, +static int __cpuinit longrun_determine_freqs(unsigned int *low_freq, unsigned int *high_freq) { u32 msr_lo, msr_hi; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 199dcb9..c63a438 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -918,8 +918,8 @@ static int cpufreq_add_dev_interface(unsigned int cpu, spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) { - if (!cpu_online(j)) - continue; + if (!cpu_online(j)) + continue; per_cpu(cpufreq_cpu_data, j) = policy; per_cpu(cpufreq_policy_cpu, j) = policy->cpu; } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 7b5093664..c631f27 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -30,6 +30,8 @@ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) #define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) #define MICRO_FREQUENCY_UP_THRESHOLD (95) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) @@ -82,6 +84,7 @@ struct cpu_dbs_info_s { unsigned int freq_lo; unsigned int freq_lo_jiffies; unsigned int freq_hi_jiffies; + unsigned int rate_mult; int cpu; unsigned int sample_type:1; /* @@ -108,10 +111,12 @@ static struct dbs_tuners { unsigned int up_threshold; unsigned int down_differential; unsigned int ignore_nice; + unsigned int sampling_down_factor; unsigned int powersave_bias; unsigned int io_is_busy; } dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, .ignore_nice = 0, .powersave_bias = 0, @@ -259,6 +264,7 @@ static ssize_t show_##file_name \ show_one(sampling_rate, sampling_rate); show_one(io_is_busy, io_is_busy); show_one(up_threshold, up_threshold); +show_one(sampling_down_factor, sampling_down_factor); show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); @@ -340,6 +346,29 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, return count; } +static ssize_t store_sampling_down_factor(struct kobject *a, + struct attribute *b, const char *buf, size_t count) +{ + unsigned int input, j; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + mutex_lock(&dbs_mutex); + dbs_tuners_ins.sampling_down_factor = input; + + /* Reset down sampling multiplier in case it was active */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->rate_mult = 1; + } + mutex_unlock(&dbs_mutex); + + return count; +} + static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -401,6 +430,7 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, define_one_global_rw(sampling_rate); define_one_global_rw(io_is_busy); define_one_global_rw(up_threshold); +define_one_global_rw(sampling_down_factor); define_one_global_rw(ignore_nice_load); define_one_global_rw(powersave_bias); @@ -409,6 +439,7 @@ static struct attribute *dbs_attributes[] = { &sampling_rate_min.attr, &sampling_rate.attr, &up_threshold.attr, + &sampling_down_factor.attr, &ignore_nice_load.attr, &powersave_bias.attr, &io_is_busy.attr, @@ -562,6 +593,10 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* Check for frequency increase */ if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max) + this_dbs_info->rate_mult = + dbs_tuners_ins.sampling_down_factor; dbs_freq_increase(policy, policy->max); return; } @@ -584,6 +619,9 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential); + /* No longer fully busy, reset rate_mult */ + this_dbs_info->rate_mult = 1; + if (freq_next < policy->min) freq_next = policy->min; @@ -607,7 +645,8 @@ static void do_dbs_timer(struct work_struct *work) int sample_type = dbs_info->sample_type; /* We want all CPUs to do sampling nearly on same jiffy */ - int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate + * dbs_info->rate_mult); if (num_online_cpus() > 1) delay -= jiffies % delay; @@ -711,6 +750,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, } } this_dbs_info->cpu = cpu; + this_dbs_info->rate_mult = 1; ondemand_powersave_bias_init_cpu(cpu); /* * Start the timerschedule work, when this governor diff --git a/drivers/video/omap/lcd_omap3beagle.c b/drivers/video/omap/lcd_omap3beagle.c index ca75cc2..d7c6c3e 100644 --- a/drivers/video/omap/lcd_omap3beagle.c +++ b/drivers/video/omap/lcd_omap3beagle.c @@ -25,8 +25,6 @@ #include <linux/gpio.h> #include <linux/i2c/twl.h> -#include <plat/mux.h> -#include <plat/mux.h> #include <asm/mach-types.h> #include "omapfb.h" diff --git a/drivers/video/omap2/displays/Kconfig b/drivers/video/omap2/displays/Kconfig index 881c9f7..12327bb 100644 --- a/drivers/video/omap2/displays/Kconfig +++ b/drivers/video/omap2/displays/Kconfig @@ -40,7 +40,7 @@ config PANEL_TPO_TD043MTEA1 config PANEL_ACX565AKM tristate "ACX565AKM Panel" - depends on OMAP2_DSS_SDI + depends on OMAP2_DSS_SDI && SPI select BACKLIGHT_CLASS_DEVICE help This is the LCD panel used on Nokia N900 diff --git a/drivers/video/omap2/displays/panel-acx565akm.c b/drivers/video/omap2/displays/panel-acx565akm.c index 07fbb8a..e773106 100644 --- a/drivers/video/omap2/displays/panel-acx565akm.c +++ b/drivers/video/omap2/displays/panel-acx565akm.c @@ -587,6 +587,9 @@ static int acx_panel_power_on(struct omap_dss_device *dssdev) dev_dbg(&dssdev->dev, "%s\n", __func__); + if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE) + return 0; + mutex_lock(&md->mutex); r = omapdss_sdi_display_enable(dssdev); @@ -644,6 +647,9 @@ static void acx_panel_power_off(struct omap_dss_device *dssdev) dev_dbg(&dssdev->dev, "%s\n", __func__); + if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE) + return; + mutex_lock(&md->mutex); if (!md->enabled) { diff --git a/drivers/video/omap2/displays/panel-generic.c b/drivers/video/omap2/displays/panel-generic.c index 300eff5..395a68d 100644 --- a/drivers/video/omap2/displays/panel-generic.c +++ b/drivers/video/omap2/displays/panel-generic.c @@ -39,6 +39,9 @@ static int generic_panel_power_on(struct omap_dss_device *dssdev) { int r; + if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE) + return 0; + r = omapdss_dpi_display_enable(dssdev); if (r) goto err0; @@ -58,6 +61,9 @@ err0: static void generic_panel_power_off(struct omap_dss_device *dssdev) { + if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE) + return; + if (dssdev->platform_disable) dssdev->platform_disable(dssdev); diff --git a/drivers/video/omap2/displays/panel-sharp-lq043t1dg01.c b/drivers/video/omap2/displays/panel-sharp-lq043t1dg01.c index 1026746..0c6896c 100644 --- a/drivers/video/omap2/displays/panel-sharp-lq043t1dg01.c +++ b/drivers/video/omap2/displays/panel-sharp-lq043t1dg01.c @@ -43,6 +43,9 @@ static int sharp_lq_panel_power_on(struct omap_dss_device *dssdev) { int r; + if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE) + return 0; + r = omapdss_dpi_display_enable(dssdev); if (r) goto err0; @@ -65,6 +68,9 @@ err0: static void sharp_lq_panel_power_off(struct omap_dss_device *dssdev) { + if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE) + return; + if (dssdev->platform_disable) dssdev->platform_disable(dssdev); diff --git a/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c b/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c index 7d9eb2b..9a138f6 100644 --- a/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c +++ b/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c @@ -135,6 +135,9 @@ static int sharp_ls_power_on(struct omap_dss_device *dssdev) { int r = 0; + if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE) + return 0; + r = omapdss_dpi_display_enable(dssdev); if (r) goto err0; @@ -157,6 +160,9 @@ err0: static void sharp_ls_power_off(struct omap_dss_device *dssdev) { + if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE) + return; + if (dssdev->platform_disable) dssdev->platform_disable(dssdev); diff --git a/drivers/video/omap2/displays/panel-toppoly-tdo35s.c b/drivers/video/omap2/displays/panel-toppoly-tdo35s.c index e320e67..526e906 100644 --- a/drivers/video/omap2/displays/panel-toppoly-tdo35s.c +++ b/drivers/video/omap2/displays/panel-toppoly-tdo35s.c @@ -46,6 +46,9 @@ static int toppoly_tdo_panel_power_on(struct omap_dss_device *dssdev) { int r; + if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE) + return 0; + r = omapdss_dpi_display_enable(dssdev); if (r) goto err0; @@ -65,6 +68,9 @@ err0: static void toppoly_tdo_panel_power_off(struct omap_dss_device *dssdev) { + if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE) + return; + if (dssdev->platform_disable) dssdev->platform_disable(dssdev); diff --git a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c index e866e76..dbe9d43 100644 --- a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c +++ b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c @@ -269,6 +269,9 @@ static int tpo_td043_power_on(struct omap_dss_device *dssdev) int nreset_gpio = dssdev->reset_gpio; int r; + if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE) + return 0; + r = omapdss_dpi_display_enable(dssdev); if (r) goto err0; @@ -308,6 +311,9 @@ static void tpo_td043_power_off(struct omap_dss_device *dssdev) struct tpo_td043_device *tpo_td043 = dev_get_drvdata(&dssdev->dev); int nreset_gpio = dssdev->reset_gpio; + if (dssdev->state != OMAP_DSS_DISPLAY_ACTIVE) + return; + tpo_td043_write(tpo_td043->spi, 3, TPO_R03_VAL_STANDBY | TPO_R03_EN_PWM); diff --git a/drivers/video/omap2/dss/Makefile b/drivers/video/omap2/dss/Makefile index d71b5d9..7db17b5 100644 --- a/drivers/video/omap2/dss/Makefile +++ b/drivers/video/omap2/dss/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_OMAP2_DSS) += omapdss.o -omapdss-y := core.o dss.o dispc.o display.o manager.o overlay.o +omapdss-y := core.o dss.o dss_features.o dispc.o display.o manager.o overlay.o omapdss-$(CONFIG_OMAP2_DSS_DPI) += dpi.o omapdss-$(CONFIG_OMAP2_DSS_RFBI) += rfbi.o omapdss-$(CONFIG_OMAP2_DSS_VENC) += venc.o diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c index b3a498f..8e89f60 100644 --- a/drivers/video/omap2/dss/core.c +++ b/drivers/video/omap2/dss/core.c @@ -37,6 +37,7 @@ #include <plat/clock.h> #include "dss.h" +#include "dss_features.h" static struct { struct platform_device *pdev; @@ -502,6 +503,8 @@ static int omap_dss_probe(struct platform_device *pdev) core.pdev = pdev; + dss_features_init(); + dss_init_overlay_managers(pdev); dss_init_overlays(pdev); diff --git a/drivers/video/omap2/dss/dispc.c b/drivers/video/omap2/dss/dispc.c index 5ecdc00..fa40fa5 100644 --- a/drivers/video/omap2/dss/dispc.c +++ b/drivers/video/omap2/dss/dispc.c @@ -39,6 +39,7 @@ #include <plat/display.h> #include "dss.h" +#include "dss_features.h" /* DISPC */ #define DISPC_BASE 0x48050400 @@ -139,6 +140,22 @@ struct omap_dispc_isr_data { u32 mask; }; +struct dispc_h_coef { + s8 hc4; + s8 hc3; + u8 hc2; + s8 hc1; + s8 hc0; +}; + +struct dispc_v_coef { + s8 vc22; + s8 vc2; + u8 vc1; + s8 vc0; + s8 vc00; +}; + #define REG_GET(idx, start, end) \ FLD_GET(dispc_read_reg(idx), start, end) @@ -564,106 +581,77 @@ static void _dispc_set_scale_coef(enum omap_plane plane, int hscaleup, int vscaleup, int five_taps) { /* Coefficients for horizontal up-sampling */ - static const u32 coef_hup[8] = { - 0x00800000, - 0x0D7CF800, - 0x1E70F5FF, - 0x335FF5FE, - 0xF74949F7, - 0xF55F33FB, - 0xF5701EFE, - 0xF87C0DFF, + static const struct dispc_h_coef coef_hup[8] = { + { 0, 0, 128, 0, 0 }, + { -1, 13, 124, -8, 0 }, + { -2, 30, 112, -11, -1 }, + { -5, 51, 95, -11, -2 }, + { 0, -9, 73, 73, -9 }, + { -2, -11, 95, 51, -5 }, + { -1, -11, 112, 30, -2 }, + { 0, -8, 124, 13, -1 }, }; - /* Coefficients for horizontal down-sampling */ - static const u32 coef_hdown[8] = { - 0x24382400, - 0x28371FFE, - 0x2C361BFB, - 0x303516F9, - 0x11343311, - 0x1635300C, - 0x1B362C08, - 0x1F372804, + /* Coefficients for vertical up-sampling */ + static const struct dispc_v_coef coef_vup_3tap[8] = { + { 0, 0, 128, 0, 0 }, + { 0, 3, 123, 2, 0 }, + { 0, 12, 111, 5, 0 }, + { 0, 32, 89, 7, 0 }, + { 0, 0, 64, 64, 0 }, + { 0, 7, 89, 32, 0 }, + { 0, 5, 111, 12, 0 }, + { 0, 2, 123, 3, 0 }, }; - /* Coefficients for horizontal and vertical up-sampling */ - static const u32 coef_hvup[2][8] = { - { - 0x00800000, - 0x037B02FF, - 0x0C6F05FE, - 0x205907FB, - 0x00404000, - 0x075920FE, - 0x056F0CFF, - 0x027B0300, - }, - { - 0x00800000, - 0x0D7CF8FF, - 0x1E70F5FE, - 0x335FF5FB, - 0xF7404000, - 0xF55F33FE, - 0xF5701EFF, - 0xF87C0D00, - }, + static const struct dispc_v_coef coef_vup_5tap[8] = { + { 0, 0, 128, 0, 0 }, + { -1, 13, 124, -8, 0 }, + { -2, 30, 112, -11, -1 }, + { -5, 51, 95, -11, -2 }, + { 0, -9, 73, 73, -9 }, + { -2, -11, 95, 51, -5 }, + { -1, -11, 112, 30, -2 }, + { 0, -8, 124, 13, -1 }, }; - /* Coefficients for horizontal and vertical down-sampling */ - static const u32 coef_hvdown[2][8] = { - { - 0x24382400, - 0x28391F04, - 0x2D381B08, - 0x3237170C, - 0x123737F7, - 0x173732F9, - 0x1B382DFB, - 0x1F3928FE, - }, - { - 0x24382400, - 0x28371F04, - 0x2C361B08, - 0x3035160C, - 0x113433F7, - 0x163530F9, - 0x1B362CFB, - 0x1F3728FE, - }, + /* Coefficients for horizontal down-sampling */ + static const struct dispc_h_coef coef_hdown[8] = { + { 0, 36, 56, 36, 0 }, + { 4, 40, 55, 31, -2 }, + { 8, 44, 54, 27, -5 }, + { 12, 48, 53, 22, -7 }, + { -9, 17, 52, 51, 17 }, + { -7, 22, 53, 48, 12 }, + { -5, 27, 54, 44, 8 }, + { -2, 31, 55, 40, 4 }, }; - /* Coefficients for vertical up-sampling */ - static const u32 coef_vup[8] = { - 0x00000000, - 0x0000FF00, - 0x0000FEFF, - 0x0000FBFE, - 0x000000F7, - 0x0000FEFB, - 0x0000FFFE, - 0x000000FF, + /* Coefficients for vertical down-sampling */ + static const struct dispc_v_coef coef_vdown_3tap[8] = { + { 0, 36, 56, 36, 0 }, + { 0, 40, 57, 31, 0 }, + { 0, 45, 56, 27, 0 }, + { 0, 50, 55, 23, 0 }, + { 0, 18, 55, 55, 0 }, + { 0, 23, 55, 50, 0 }, + { 0, 27, 56, 45, 0 }, + { 0, 31, 57, 40, 0 }, }; - - /* Coefficients for vertical down-sampling */ - static const u32 coef_vdown[8] = { - 0x00000000, - 0x000004FE, - 0x000008FB, - 0x00000CF9, - 0x0000F711, - 0x0000F90C, - 0x0000FB08, - 0x0000FE04, + static const struct dispc_v_coef coef_vdown_5tap[8] = { + { 0, 36, 56, 36, 0 }, + { 4, 40, 55, 31, -2 }, + { 8, 44, 54, 27, -5 }, + { 12, 48, 53, 22, -7 }, + { -9, 17, 52, 51, 17 }, + { -7, 22, 53, 48, 12 }, + { -5, 27, 54, 44, 8 }, + { -2, 31, 55, 40, 4 }, }; - const u32 *h_coef; - const u32 *hv_coef; - const u32 *hv_coef_mod; - const u32 *v_coef; + const struct dispc_h_coef *h_coef; + const struct dispc_v_coef *v_coef; int i; if (hscaleup) @@ -671,47 +659,34 @@ static void _dispc_set_scale_coef(enum omap_plane plane, int hscaleup, else h_coef = coef_hdown; - if (vscaleup) { - hv_coef = coef_hvup[five_taps]; - v_coef = coef_vup; - - if (hscaleup) - hv_coef_mod = NULL; - else - hv_coef_mod = coef_hvdown[five_taps]; - } else { - hv_coef = coef_hvdown[five_taps]; - v_coef = coef_vdown; - - if (hscaleup) - hv_coef_mod = coef_hvup[five_taps]; - else - hv_coef_mod = NULL; - } + if (vscaleup) + v_coef = five_taps ? coef_vup_5tap : coef_vup_3tap; + else + v_coef = five_taps ? coef_vdown_5tap : coef_vdown_3tap; for (i = 0; i < 8; i++) { u32 h, hv; - h = h_coef[i]; - - hv = hv_coef[i]; - - if (hv_coef_mod) { - hv &= 0xffffff00; - hv |= (hv_coef_mod[i] & 0xff); - } + h = FLD_VAL(h_coef[i].hc0, 7, 0) + | FLD_VAL(h_coef[i].hc1, 15, 8) + | FLD_VAL(h_coef[i].hc2, 23, 16) + | FLD_VAL(h_coef[i].hc3, 31, 24); + hv = FLD_VAL(h_coef[i].hc4, 7, 0) + | FLD_VAL(v_coef[i].vc0, 15, 8) + | FLD_VAL(v_coef[i].vc1, 23, 16) + | FLD_VAL(v_coef[i].vc2, 31, 24); _dispc_write_firh_reg(plane, i, h); _dispc_write_firhv_reg(plane, i, hv); } - if (!five_taps) - return; - - for (i = 0; i < 8; i++) { - u32 v; - v = v_coef[i]; - _dispc_write_firv_reg(plane, i, v); + if (five_taps) { + for (i = 0; i < 8; i++) { + u32 v; + v = FLD_VAL(v_coef[i].vc00, 7, 0) + | FLD_VAL(v_coef[i].vc22, 15, 8); + _dispc_write_firv_reg(plane, i, v); + } } } @@ -800,12 +775,12 @@ static void _dispc_set_vid_size(enum omap_plane plane, int width, int height) static void _dispc_setup_global_alpha(enum omap_plane plane, u8 global_alpha) { - - BUG_ON(plane == OMAP_DSS_VIDEO1); - - if (cpu_is_omap24xx()) + if (!dss_has_feature(FEAT_GLOBAL_ALPHA)) return; + BUG_ON(!dss_has_feature(FEAT_GLOBAL_ALPHA_VID1) && + plane == OMAP_DSS_VIDEO1); + if (plane == OMAP_DSS_GFX) REG_FLD_MOD(DISPC_GLOBAL_ALPHA, global_alpha, 7, 0); else if (plane == OMAP_DSS_VIDEO2) @@ -975,17 +950,14 @@ static void dispc_read_plane_fifo_sizes(void) DISPC_VID_FIFO_SIZE_STATUS(1) }; u32 size; int plane; + u8 start, end; enable_clocks(1); - for (plane = 0; plane < ARRAY_SIZE(dispc.fifo_size); ++plane) { - if (cpu_is_omap24xx()) - size = FLD_GET(dispc_read_reg(fsz_reg[plane]), 8, 0); - else if (cpu_is_omap34xx()) - size = FLD_GET(dispc_read_reg(fsz_reg[plane]), 10, 0); - else - BUG(); + dss_feat_get_reg_field(FEAT_REG_FIFOSIZE, &start, &end); + for (plane = 0; plane < ARRAY_SIZE(dispc.fifo_size); ++plane) { + size = FLD_GET(dispc_read_reg(fsz_reg[plane]), start, end); dispc.fifo_size[plane] = size; } @@ -1002,6 +974,8 @@ void dispc_setup_plane_fifo(enum omap_plane plane, u32 low, u32 high) const struct dispc_reg ftrs_reg[] = { DISPC_GFX_FIFO_THRESHOLD, DISPC_VID_FIFO_THRESHOLD(0), DISPC_VID_FIFO_THRESHOLD(1) }; + u8 hi_start, hi_end, lo_start, lo_end; + enable_clocks(1); DSSDBG("fifo(%d) low/high old %u/%u, new %u/%u\n", @@ -1010,12 +984,12 @@ void dispc_setup_plane_fifo(enum omap_plane plane, u32 low, u32 high) REG_GET(ftrs_reg[plane], 27, 16), low, high); - if (cpu_is_omap24xx()) - dispc_write_reg(ftrs_reg[plane], - FLD_VAL(high, 24, 16) | FLD_VAL(low, 8, 0)); - else - dispc_write_reg(ftrs_reg[plane], - FLD_VAL(high, 27, 16) | FLD_VAL(low, 11, 0)); + dss_feat_get_reg_field(FEAT_REG_FIFOHIGHTHRESHOLD, &hi_start, &hi_end); + dss_feat_get_reg_field(FEAT_REG_FIFOLOWTHRESHOLD, &lo_start, &lo_end); + + dispc_write_reg(ftrs_reg[plane], + FLD_VAL(high, hi_start, hi_end) | + FLD_VAL(low, lo_start, lo_end)); enable_clocks(0); } @@ -1035,13 +1009,16 @@ static void _dispc_set_fir(enum omap_plane plane, int hinc, int vinc) u32 val; const struct dispc_reg fir_reg[] = { DISPC_VID_FIR(0), DISPC_VID_FIR(1) }; + u8 hinc_start, hinc_end, vinc_start, vinc_end; BUG_ON(plane == OMAP_DSS_GFX); - if (cpu_is_omap24xx()) - val = FLD_VAL(vinc, 27, 16) | FLD_VAL(hinc, 11, 0); - else - val = FLD_VAL(vinc, 28, 16) | FLD_VAL(hinc, 12, 0); + dss_feat_get_reg_field(FEAT_REG_FIRHINC, &hinc_start, &hinc_end); + dss_feat_get_reg_field(FEAT_REG_FIRVINC, &vinc_start, &vinc_end); + + val = FLD_VAL(vinc, vinc_start, vinc_end) | + FLD_VAL(hinc, hinc_start, hinc_end); + dispc_write_reg(fir_reg[plane-1], val); } @@ -1567,6 +1544,8 @@ static int _dispc_setup_plane(enum omap_plane plane, case OMAP_DSS_COLOR_ARGB16: case OMAP_DSS_COLOR_ARGB32: case OMAP_DSS_COLOR_RGBA32: + if (!dss_has_feature(FEAT_GLOBAL_ALPHA)) + return -EINVAL; case OMAP_DSS_COLOR_RGBX32: if (cpu_is_omap24xx()) return -EINVAL; @@ -1607,9 +1586,10 @@ static int _dispc_setup_plane(enum omap_plane plane, case OMAP_DSS_COLOR_ARGB16: case OMAP_DSS_COLOR_ARGB32: case OMAP_DSS_COLOR_RGBA32: - if (cpu_is_omap24xx()) + if (!dss_has_feature(FEAT_GLOBAL_ALPHA)) return -EINVAL; - if (plane == OMAP_DSS_VIDEO1) + if (!dss_has_feature(FEAT_GLOBAL_ALPHA_VID1) && + plane == OMAP_DSS_VIDEO1) return -EINVAL; break; @@ -2002,7 +1982,7 @@ void dispc_enable_trans_key(enum omap_channel ch, bool enable) } void dispc_enable_alpha_blending(enum omap_channel ch, bool enable) { - if (cpu_is_omap24xx()) + if (!dss_has_feature(FEAT_GLOBAL_ALPHA)) return; enable_clocks(1); @@ -2016,7 +1996,7 @@ bool dispc_alpha_blending_enabled(enum omap_channel ch) { bool enabled; - if (cpu_is_omap24xx()) + if (!dss_has_feature(FEAT_GLOBAL_ALPHA)) return false; enable_clocks(1); diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index b3fa3a7..aa4f7a5 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -3274,7 +3274,6 @@ int dsi_init(struct platform_device *pdev) dsi.vdds_dsi_reg = dss_get_vdds_dsi(); if (IS_ERR(dsi.vdds_dsi_reg)) { - iounmap(dsi.base); DSSERR("can't get VDDS_DSI regulator\n"); r = PTR_ERR(dsi.vdds_dsi_reg); goto err2; diff --git a/drivers/video/omap2/dss/dss_features.c b/drivers/video/omap2/dss/dss_features.c new file mode 100644 index 0000000..867f68d --- /dev/null +++ b/drivers/video/omap2/dss/dss_features.c @@ -0,0 +1,191 @@ +/* + * linux/drivers/video/omap2/dss/dss_features.c + * + * Copyright (C) 2010 Texas Instruments + * Author: Archit Taneja <archit@ti.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/err.h> +#include <linux/slab.h> + +#include <plat/display.h> +#include <plat/cpu.h> + +#include "dss_features.h" + +/* Defines a generic omap register field */ +struct dss_reg_field { + enum dss_feat_reg_field id; + u8 start, end; +}; + +struct omap_dss_features { + const struct dss_reg_field *reg_fields; + const int num_reg_fields; + + const u32 has_feature; + + const int num_mgrs; + const int num_ovls; + const enum omap_display_type *supported_displays; + const enum omap_color_mode *supported_color_modes; +}; + +/* This struct is assigned to one of the below during initialization */ +static struct omap_dss_features *omap_current_dss_features; + +static const struct dss_reg_field omap2_dss_reg_fields[] = { + { FEAT_REG_FIRHINC, 11, 0 }, + { FEAT_REG_FIRVINC, 27, 16 }, + { FEAT_REG_FIFOLOWTHRESHOLD, 8, 0 }, + { FEAT_REG_FIFOHIGHTHRESHOLD, 24, 16 }, + { FEAT_REG_FIFOSIZE, 8, 0 }, +}; + +static const struct dss_reg_field omap3_dss_reg_fields[] = { + { FEAT_REG_FIRHINC, 12, 0 }, + { FEAT_REG_FIRVINC, 28, 16 }, + { FEAT_REG_FIFOLOWTHRESHOLD, 11, 0 }, + { FEAT_REG_FIFOHIGHTHRESHOLD, 27, 16 }, + { FEAT_REG_FIFOSIZE, 10, 0 }, +}; + +static const enum omap_display_type omap2_dss_supported_displays[] = { + /* OMAP_DSS_CHANNEL_LCD */ + OMAP_DISPLAY_TYPE_DPI | OMAP_DISPLAY_TYPE_DBI | + OMAP_DISPLAY_TYPE_SDI | OMAP_DISPLAY_TYPE_DSI, + + /* OMAP_DSS_CHANNEL_DIGIT */ + OMAP_DISPLAY_TYPE_VENC, +}; + +static const enum omap_display_type omap3_dss_supported_displays[] = { + /* OMAP_DSS_CHANNEL_LCD */ + OMAP_DISPLAY_TYPE_DPI | OMAP_DISPLAY_TYPE_DBI | + OMAP_DISPLAY_TYPE_SDI | OMAP_DISPLAY_TYPE_DSI, + + /* OMAP_DSS_CHANNEL_DIGIT */ + OMAP_DISPLAY_TYPE_VENC, +}; + +static const enum omap_color_mode omap2_dss_supported_color_modes[] = { + /* OMAP_DSS_GFX */ + OMAP_DSS_COLOR_CLUT1 | OMAP_DSS_COLOR_CLUT2 | + OMAP_DSS_COLOR_CLUT4 | OMAP_DSS_COLOR_CLUT8 | + OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_RGB16 | + OMAP_DSS_COLOR_RGB24U | OMAP_DSS_COLOR_RGB24P, + + /* OMAP_DSS_VIDEO1 */ + OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U | + OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 | + OMAP_DSS_COLOR_UYVY, + + /* OMAP_DSS_VIDEO2 */ + OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U | + OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 | + OMAP_DSS_COLOR_UYVY, +}; + +static const enum omap_color_mode omap3_dss_supported_color_modes[] = { + /* OMAP_DSS_GFX */ + OMAP_DSS_COLOR_CLUT1 | OMAP_DSS_COLOR_CLUT2 | + OMAP_DSS_COLOR_CLUT4 | OMAP_DSS_COLOR_CLUT8 | + OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_ARGB16 | + OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U | + OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_ARGB32 | + OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32, + + /* OMAP_DSS_VIDEO1 */ + OMAP_DSS_COLOR_RGB24U | OMAP_DSS_COLOR_RGB24P | + OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_RGB16 | + OMAP_DSS_COLOR_YUV2 | OMAP_DSS_COLOR_UYVY, + + /* OMAP_DSS_VIDEO2 */ + OMAP_DSS_COLOR_RGB12U | OMAP_DSS_COLOR_ARGB16 | + OMAP_DSS_COLOR_RGB16 | OMAP_DSS_COLOR_RGB24U | + OMAP_DSS_COLOR_RGB24P | OMAP_DSS_COLOR_YUV2 | + OMAP_DSS_COLOR_UYVY | OMAP_DSS_COLOR_ARGB32 | + OMAP_DSS_COLOR_RGBA32 | OMAP_DSS_COLOR_RGBX32, +}; + +/* OMAP2 DSS Features */ +static struct omap_dss_features omap2_dss_features = { + .reg_fields = omap2_dss_reg_fields, + .num_reg_fields = ARRAY_SIZE(omap2_dss_reg_fields), + + .num_mgrs = 2, + .num_ovls = 3, + .supported_displays = omap2_dss_supported_displays, + .supported_color_modes = omap2_dss_supported_color_modes, +}; + +/* OMAP3 DSS Features */ +static struct omap_dss_features omap3_dss_features = { + .reg_fields = omap3_dss_reg_fields, + .num_reg_fields = ARRAY_SIZE(omap3_dss_reg_fields), + + .has_feature = FEAT_GLOBAL_ALPHA, + + .num_mgrs = 2, + .num_ovls = 3, + .supported_displays = omap3_dss_supported_displays, + .supported_color_modes = omap3_dss_supported_color_modes, +}; + +/* Functions returning values related to a DSS feature */ +int dss_feat_get_num_mgrs(void) +{ + return omap_current_dss_features->num_mgrs; +} + +int dss_feat_get_num_ovls(void) +{ + return omap_current_dss_features->num_ovls; +} + +enum omap_display_type dss_feat_get_supported_displays(enum omap_channel channel) +{ + return omap_current_dss_features->supported_displays[channel]; +} + +enum omap_color_mode dss_feat_get_supported_color_modes(enum omap_plane plane) +{ + return omap_current_dss_features->supported_color_modes[plane]; +} + +/* DSS has_feature check */ +bool dss_has_feature(enum dss_feat_id id) +{ + return omap_current_dss_features->has_feature & id; +} + +void dss_feat_get_reg_field(enum dss_feat_reg_field id, u8 *start, u8 *end) +{ + if (id >= omap_current_dss_features->num_reg_fields) + BUG(); + + *start = omap_current_dss_features->reg_fields[id].start; + *end = omap_current_dss_features->reg_fields[id].end; +} + +void dss_features_init(void) +{ + if (cpu_is_omap24xx()) + omap_current_dss_features = &omap2_dss_features; + else + omap_current_dss_features = &omap3_dss_features; +} diff --git a/drivers/video/omap2/dss/dss_features.h b/drivers/video/omap2/dss/dss_features.h new file mode 100644 index 0000000..cb231ea --- /dev/null +++ b/drivers/video/omap2/dss/dss_features.h @@ -0,0 +1,50 @@ +/* + * linux/drivers/video/omap2/dss/dss_features.h + * + * Copyright (C) 2010 Texas Instruments + * Author: Archit Taneja <archit@ti.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __OMAP2_DSS_FEATURES_H +#define __OMAP2_DSS_FEATURES_H + +#define MAX_DSS_MANAGERS 2 +#define MAX_DSS_OVERLAYS 3 + +/* DSS has feature id */ +enum dss_feat_id { + FEAT_GLOBAL_ALPHA = 1 << 0, + FEAT_GLOBAL_ALPHA_VID1 = 1 << 1, +}; + +/* DSS register field id */ +enum dss_feat_reg_field { + FEAT_REG_FIRHINC, + FEAT_REG_FIRVINC, + FEAT_REG_FIFOHIGHTHRESHOLD, + FEAT_REG_FIFOLOWTHRESHOLD, + FEAT_REG_FIFOSIZE, +}; + +/* DSS Feature Functions */ +int dss_feat_get_num_mgrs(void); +int dss_feat_get_num_ovls(void); +enum omap_display_type dss_feat_get_supported_displays(enum omap_channel channel); +enum omap_color_mode dss_feat_get_supported_color_modes(enum omap_plane plane); + +bool dss_has_feature(enum dss_feat_id id); +void dss_feat_get_reg_field(enum dss_feat_reg_field id, u8 *start, u8 *end); +void dss_features_init(void); +#endif diff --git a/drivers/video/omap2/dss/manager.c b/drivers/video/omap2/dss/manager.c index 6a649ab..545e9b9 100644 --- a/drivers/video/omap2/dss/manager.c +++ b/drivers/video/omap2/dss/manager.c @@ -33,6 +33,7 @@ #include <plat/cpu.h> #include "dss.h" +#include "dss_features.h" static int num_managers; static struct list_head manager_list; @@ -448,8 +449,8 @@ struct manager_cache_data { static struct { spinlock_t lock; - struct overlay_cache_data overlay_cache[3]; - struct manager_cache_data manager_cache[2]; + struct overlay_cache_data overlay_cache[MAX_DSS_OVERLAYS]; + struct manager_cache_data manager_cache[MAX_DSS_MANAGERS]; bool irq_enabled; } dss_cache; @@ -882,12 +883,12 @@ static int configure_dispc(void) { struct overlay_cache_data *oc; struct manager_cache_data *mc; - const int num_ovls = ARRAY_SIZE(dss_cache.overlay_cache); - const int num_mgrs = ARRAY_SIZE(dss_cache.manager_cache); + const int num_ovls = dss_feat_get_num_ovls(); + const int num_mgrs = dss_feat_get_num_mgrs(); int i; int r; - bool mgr_busy[2]; - bool mgr_go[2]; + bool mgr_busy[MAX_DSS_MANAGERS]; + bool mgr_go[MAX_DSS_MANAGERS]; bool busy; r = 0; @@ -989,7 +990,7 @@ void dss_setup_partial_planes(struct omap_dss_device *dssdev, { struct overlay_cache_data *oc; struct manager_cache_data *mc; - const int num_ovls = ARRAY_SIZE(dss_cache.overlay_cache); + const int num_ovls = dss_feat_get_num_ovls(); struct omap_overlay_manager *mgr; int i; u16 x, y, w, h; @@ -1121,8 +1122,8 @@ void dss_start_update(struct omap_dss_device *dssdev) { struct manager_cache_data *mc; struct overlay_cache_data *oc; - const int num_ovls = ARRAY_SIZE(dss_cache.overlay_cache); - const int num_mgrs = ARRAY_SIZE(dss_cache.manager_cache); + const int num_ovls = dss_feat_get_num_ovls(); + const int num_mgrs = dss_feat_get_num_mgrs(); struct omap_overlay_manager *mgr; int i; @@ -1151,10 +1152,10 @@ static void dss_apply_irq_handler(void *data, u32 mask) { struct manager_cache_data *mc; struct overlay_cache_data *oc; - const int num_ovls = ARRAY_SIZE(dss_cache.overlay_cache); - const int num_mgrs = ARRAY_SIZE(dss_cache.manager_cache); + const int num_ovls = dss_feat_get_num_ovls(); + const int num_mgrs = dss_feat_get_num_mgrs(); int i, r; - bool mgr_busy[2]; + bool mgr_busy[MAX_DSS_MANAGERS]; mgr_busy[0] = dispc_go_busy(0); mgr_busy[1] = dispc_go_busy(1); @@ -1461,7 +1462,7 @@ int dss_init_overlay_managers(struct platform_device *pdev) num_managers = 0; - for (i = 0; i < 2; ++i) { + for (i = 0; i < dss_feat_get_num_mgrs(); ++i) { struct omap_overlay_manager *mgr; mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); @@ -1471,14 +1472,10 @@ int dss_init_overlay_managers(struct platform_device *pdev) case 0: mgr->name = "lcd"; mgr->id = OMAP_DSS_CHANNEL_LCD; - mgr->supported_displays = - OMAP_DISPLAY_TYPE_DPI | OMAP_DISPLAY_TYPE_DBI | - OMAP_DISPLAY_TYPE_SDI | OMAP_DISPLAY_TYPE_DSI; break; case 1: mgr->name = "tv"; mgr->id = OMAP_DSS_CHANNEL_DIGIT; - mgr->supported_displays = OMAP_DISPLAY_TYPE_VENC; break; } @@ -1494,6 +1491,8 @@ int dss_init_overlay_managers(struct platform_device *pdev) mgr->disable = &dss_mgr_disable; mgr->caps = OMAP_DSS_OVL_MGR_CAP_DISPC; + mgr->supported_displays = + dss_feat_get_supported_displays(mgr->id); dss_overlay_setup_dispc_manager(mgr); diff --git a/drivers/video/omap2/dss/overlay.c b/drivers/video/omap2/dss/overlay.c index 244dca8..75642c2 100644 --- a/drivers/video/omap2/dss/overlay.c +++ b/drivers/video/omap2/dss/overlay.c @@ -35,6 +35,7 @@ #include <plat/cpu.h> #include "dss.h" +#include "dss_features.h" static int num_overlays; static struct list_head overlay_list; @@ -237,7 +238,8 @@ static ssize_t overlay_global_alpha_store(struct omap_overlay *ovl, /* Video1 plane does not support global alpha * to always make it 255 completely opaque */ - if (ovl->id == OMAP_DSS_VIDEO1) + if (!dss_has_feature(FEAT_GLOBAL_ALPHA_VID1) && + ovl->id == OMAP_DSS_VIDEO1) info.global_alpha = 255; else info.global_alpha = simple_strtoul(buf, NULL, 10); @@ -510,11 +512,11 @@ static void omap_dss_add_overlay(struct omap_overlay *overlay) list_add_tail(&overlay->list, &overlay_list); } -static struct omap_overlay *dispc_overlays[3]; +static struct omap_overlay *dispc_overlays[MAX_DSS_OVERLAYS]; void dss_overlay_setup_dispc_manager(struct omap_overlay_manager *mgr) { - mgr->num_overlays = 3; + mgr->num_overlays = dss_feat_get_num_ovls(); mgr->overlays = dispc_overlays; } @@ -535,7 +537,7 @@ void dss_init_overlays(struct platform_device *pdev) num_overlays = 0; - for (i = 0; i < 3; ++i) { + for (i = 0; i < dss_feat_get_num_ovls(); ++i) { struct omap_overlay *ovl; ovl = kzalloc(sizeof(*ovl), GFP_KERNEL); @@ -545,18 +547,12 @@ void dss_init_overlays(struct platform_device *pdev) case 0: ovl->name = "gfx"; ovl->id = OMAP_DSS_GFX; - ovl->supported_modes = cpu_is_omap34xx() ? - OMAP_DSS_COLOR_GFX_OMAP3 : - OMAP_DSS_COLOR_GFX_OMAP2; ovl->caps = OMAP_DSS_OVL_CAP_DISPC; ovl->info.global_alpha = 255; break; case 1: ovl->name = "vid1"; ovl->id = OMAP_DSS_VIDEO1; - ovl->supported_modes = cpu_is_omap34xx() ? - OMAP_DSS_COLOR_VID1_OMAP3 : - OMAP_DSS_COLOR_VID_OMAP2; ovl->caps = OMAP_DSS_OVL_CAP_SCALE | OMAP_DSS_OVL_CAP_DISPC; ovl->info.global_alpha = 255; @@ -564,9 +560,6 @@ void dss_init_overlays(struct platform_device *pdev) case 2: ovl->name = "vid2"; ovl->id = OMAP_DSS_VIDEO2; - ovl->supported_modes = cpu_is_omap34xx() ? - OMAP_DSS_COLOR_VID2_OMAP3 : - OMAP_DSS_COLOR_VID_OMAP2; ovl->caps = OMAP_DSS_OVL_CAP_SCALE | OMAP_DSS_OVL_CAP_DISPC; ovl->info.global_alpha = 255; @@ -579,6 +572,9 @@ void dss_init_overlays(struct platform_device *pdev) ovl->get_overlay_info = &dss_ovl_get_overlay_info; ovl->wait_for_go = &dss_ovl_wait_for_go; + ovl->supported_modes = + dss_feat_get_supported_color_modes(ovl->id); + omap_dss_add_overlay(ovl); r = kobject_init_and_add(&ovl->kobj, &overlay_ktype, @@ -651,7 +647,7 @@ void dss_recheck_connections(struct omap_dss_device *dssdev, bool force) } if (mgr) { - for (i = 0; i < 3; i++) { + for (i = 0; i < dss_feat_get_num_ovls(); i++) { struct omap_overlay *ovl; ovl = omap_dss_get_overlay(i); if (!ovl->manager || force) { diff --git a/drivers/video/omap2/omapfb/Kconfig b/drivers/video/omap2/omapfb/Kconfig index 43496d6..65149b2 100644 --- a/drivers/video/omap2/omapfb/Kconfig +++ b/drivers/video/omap2/omapfb/Kconfig @@ -3,7 +3,7 @@ menuconfig FB_OMAP2 depends on FB && OMAP2_DSS select OMAP2_VRAM - select OMAP2_VRFB + select OMAP2_VRFB if ARCH_OMAP2 || ARCH_OMAP3 select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c index 04034d4..6a704f1 100644 --- a/drivers/video/omap2/omapfb/omapfb-main.c +++ b/drivers/video/omap2/omapfb/omapfb-main.c @@ -714,10 +714,10 @@ int check_fb_var(struct fb_info *fbi, struct fb_var_screeninfo *var) var->pixclock = timings.pixel_clock != 0 ? KHZ2PICOS(timings.pixel_clock) : 0; - var->left_margin = timings.hfp; - var->right_margin = timings.hbp; - var->upper_margin = timings.vfp; - var->lower_margin = timings.vbp; + var->left_margin = timings.hbp; + var->right_margin = timings.hfp; + var->upper_margin = timings.vbp; + var->lower_margin = timings.vfp; var->hsync_len = timings.hsw; var->vsync_len = timings.vsw; } else { @@ -2059,10 +2059,10 @@ static int omapfb_mode_to_timings(const char *mode_str, if (r != 0) { timings->pixel_clock = PICOS2KHZ(var.pixclock); - timings->hfp = var.left_margin; - timings->hbp = var.right_margin; - timings->vfp = var.upper_margin; - timings->vbp = var.lower_margin; + timings->hbp = var.left_margin; + timings->hfp = var.right_margin; + timings->vbp = var.upper_margin; + timings->vfp = var.lower_margin; timings->hsw = var.hsync_len; timings->vsw = var.vsync_len; timings->x_res = var.xres; @@ -2198,6 +2198,16 @@ static int omapfb_probe(struct platform_device *pdev) goto err0; } + /* TODO : Replace cpu check with omap_has_vrfb once HAS_FEATURE + * available for OMAP2 and OMAP3 + */ + if (def_vrfb && !cpu_is_omap24xx() && !cpu_is_omap34xx()) { + def_vrfb = 0; + dev_warn(&pdev->dev, "VRFB is not supported on this hardware, " + "ignoring the module parameter vrfb=y\n"); + } + + mutex_init(&fbdev->mtx); fbdev->dev = &pdev->dev; diff --git a/fs/Makefile b/fs/Makefile index e6ec1d3..26956fc 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -29,10 +29,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o obj-$(CONFIG_AIO) += aio.o obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o - -nfsd-$(CONFIG_NFSD) := nfsctl.o -obj-y += $(nfsd-y) $(nfsd-m) - +obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o diff --git a/fs/compat.c b/fs/compat.c index 0644a15..f03abda 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1963,7 +1963,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, } #endif /* HAVE_SET_RESTORE_SIGMASK */ -#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) +#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED) /* Stuff for NFS server syscalls... */ struct compat_nfsctl_svc { u16 svc32_port; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index bb464d1210..25e21e4 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -353,6 +353,7 @@ nlm_bind_host(struct nlm_host *host) .to_retries = 5U, }; struct rpc_create_args args = { + .net = &init_net, .protocol = host->h_proto, .address = nlm_addr(host), .addrsize = host->h_addrlen, diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index e301546..e0c9189 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -69,6 +69,7 @@ static struct rpc_clnt *nsm_create(void) .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; struct rpc_create_args args = { + .net = &init_net, .protocol = XPRT_TRANSPORT_UDP, .address = (struct sockaddr *)&sin, .addrsize = sizeof(sin), diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index f1bacf1..b13aabc 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -206,7 +206,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name, xprt = svc_find_xprt(serv, name, family, 0); if (xprt == NULL) - return svc_create_xprt(serv, name, family, port, + return svc_create_xprt(serv, name, &init_net, family, port, SVC_SOCK_DEFAULTS); svc_xprt_put(xprt); return 0; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 031c656..a336e83 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -230,9 +230,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) static void nlm4svc_callback_release(void *data) { - lock_kernel(); nlm_release_call(data); - unlock_kernel(); } static const struct rpc_call_ops nlm4svc_callback_ops = { diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 84055d3..6f1ef00 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -52,12 +52,13 @@ static const struct rpc_call_ops nlmsvc_grant_ops; * The list of blocked locks to retry */ static LIST_HEAD(nlm_blocked); +static DEFINE_SPINLOCK(nlm_blocked_lock); /* * Insert a blocked lock into the global list */ static void -nlmsvc_insert_block(struct nlm_block *block, unsigned long when) +nlmsvc_insert_block_locked(struct nlm_block *block, unsigned long when) { struct nlm_block *b; struct list_head *pos; @@ -87,6 +88,13 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when) block->b_when = when; } +static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when) +{ + spin_lock(&nlm_blocked_lock); + nlmsvc_insert_block_locked(block, when); + spin_unlock(&nlm_blocked_lock); +} + /* * Remove a block from the global list */ @@ -94,7 +102,9 @@ static inline void nlmsvc_remove_block(struct nlm_block *block) { if (!list_empty(&block->b_list)) { + spin_lock(&nlm_blocked_lock); list_del_init(&block->b_list); + spin_unlock(&nlm_blocked_lock); nlmsvc_release_block(block); } } @@ -651,7 +661,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, struct nlm_block *block; int rc = -ENOENT; - lock_kernel(); + spin_lock(&nlm_blocked_lock); list_for_each_entry(block, &nlm_blocked, b_list) { if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n", @@ -665,13 +675,13 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, } else if (result == 0) block->b_granted = 1; - nlmsvc_insert_block(block, 0); + nlmsvc_insert_block_locked(block, 0); svc_wake_up(block->b_daemon); rc = 0; break; } } - unlock_kernel(); + spin_unlock(&nlm_blocked_lock); if (rc == -ENOENT) printk(KERN_WARNING "lockd: grant for unknown block\n"); return rc; @@ -803,7 +813,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) dprintk("lockd: GRANT_MSG RPC callback\n"); - lock_kernel(); + spin_lock(&nlm_blocked_lock); /* if the block is not on a list at this point then it has * been invalidated. Don't try to requeue it. * @@ -825,19 +835,20 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) /* Call was successful, now wait for client callback */ timeout = 60 * HZ; } - nlmsvc_insert_block(block, timeout); + nlmsvc_insert_block_locked(block, timeout); svc_wake_up(block->b_daemon); out: - unlock_kernel(); + spin_unlock(&nlm_blocked_lock); } +/* + * FIXME: nlmsvc_release_block() grabs a mutex. This is not allowed for an + * .rpc_release rpc_call_op + */ static void nlmsvc_grant_release(void *data) { struct nlm_rqst *call = data; - - lock_kernel(); nlmsvc_release_block(call->a_block); - unlock_kernel(); } static const struct rpc_call_ops nlmsvc_grant_ops = { diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 0f2ab74..c3069f3 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -260,9 +260,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) static void nlmsvc_callback_release(void *data) { - lock_kernel(); nlm_release_call(data); - unlock_kernel(); } static const struct rpc_call_ops nlmsvc_callback_ops = { diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 5c55c26..fd66765 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -77,13 +77,17 @@ config NFS_V4 config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" - depends on NFS_V4 && EXPERIMENTAL + depends on NFS_FS && NFS_V4 && EXPERIMENTAL + select PNFS_FILE_LAYOUT help This option enables support for minor version 1 of the NFSv4 protocol - (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. + (RFC 5661) in the kernel's NFS client. If unsure, say N. +config PNFS_FILE_LAYOUT + tristate + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index da7fda6..4776ff9 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o +nfs-$(CONFIG_NFS_V4_1) += pnfs.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o + +obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o +nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index e17b49e..aeec017 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv) { int ret; - ret = svc_create_xprt(serv, "tcp", PF_INET, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; @@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); - ret = svc_create_xprt(serv, "tcp", PF_INET6, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nfs_callback_tcpport6 = ret; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 930d10f..2950fca 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n if (delegation == NULL) return 0; - /* seqid is 4-bytes long */ - if (((u32 *) &stateid->data)[0] != 0) + if (stateid->stateid.seqid != 0) return 0; - if (memcmp(&delegation->stateid.data[4], &stateid->data[4], - sizeof(stateid->data)-4)) + if (memcmp(&delegation->stateid.stateid.other, + &stateid->stateid.other, + NFS4_STATEID_OTHER_SIZE)) return 0; return 1; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a882785..0870d0d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -48,6 +48,7 @@ #include "iostat.h" #include "internal.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -155,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ cred = rpc_lookup_machine_cred(); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; - +#if defined(CONFIG_NFS_V4_1) + INIT_LIST_HEAD(&clp->cl_layouts); +#endif nfs_fscache_get_client_cookie(clp); return clp; @@ -252,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp) nfs_free_client(clp); } } +EXPORT_SYMBOL_GPL(nfs_put_client); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) /* @@ -601,6 +605,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { + .net = &init_net, .protocol = clp->cl_proto, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, @@ -900,6 +905,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + set_pnfs_layoutdriver(server, fsinfo->layouttype); + server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); @@ -939,6 +946,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str } fsinfo.fattr = fattr; + fsinfo.layouttype = 0; error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); if (error < 0) goto out_error; @@ -1021,6 +1029,7 @@ void nfs_free_server(struct nfs_server *server) { dprintk("--> nfs_free_server()\n"); + unset_pnfs_layoutdriver(server); spin_lock(&nfs_client_lock); list_del(&server->client_link); list_del(&server->master_link); diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index dba50a5..a6e711a 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd, return 0; } item = container_of(h, struct nfs_dns_ent, h); - ttl = (long)item->h.expiry_time - (long)get_seconds(); + ttl = item->h.expiry_time - seconds_since_boot(); if (ttl < 0) ttl = 0; @@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) ttl = get_expiry(&buf); if (ttl == 0) goto out; - key.h.expiry_time = ttl + get_seconds(); + key.h.expiry_time = ttl + seconds_since_boot(); ret = -ENOMEM; item = nfs_dns_lookup(cd, &key); @@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd, goto out_err; ret = -ETIMEDOUT; if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() + || (*item)->h.expiry_time < seconds_since_boot() || cd->flush_time > (*item)->h.last_refresh) goto out_put; ret = -ENOENT; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e18c31e0..e756075 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -36,6 +36,7 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); + pnfs_update_layout(mapping->host, + nfs_file_open_context(file), + IOMODE_RW); + start: /* * Prevent starvation issues if someone is doing a consistency diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6eec286..314f571 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -48,6 +48,7 @@ #include "internal.h" #include "fscache.h" #include "dns_resolve.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -1410,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); + pnfs_destroy_layout(NFS_I(inode)); /* If we are holding a delegation, return it! */ nfs_inode_return_delegation_noreclaim(inode); /* First call standard NFS clear_inode() code */ @@ -1447,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) nfsi->delegation = NULL; nfsi->delegation_state = 0; init_rwsem(&nfsi->rwsem); + nfsi->layout = NULL; #endif } diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index d610203..eceafe7 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info) .rpc_resp = &result, }; struct rpc_create_args args = { + .net = &init_net, .protocol = info->protocol, .address = info->sap, .addrsize = info->salen, @@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info) .to_retries = 2, }; struct rpc_create_args args = { + .net = &init_net, .protocol = IPPROTO_UDP, .address = info->sap, .addrsize = info->salen, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c new file mode 100644 index 0000000..2e92f0d --- /dev/null +++ b/fs/nfs/nfs4filelayout.c @@ -0,0 +1,280 @@ +/* + * Module for the pnfs nfs4 file layout driver. + * Defines all I/O and Policy interface operations, plus code + * to register itself with the pNFS client. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include <linux/nfs_fs.h> + +#include "internal.h" +#include "nfs4filelayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>"); +MODULE_DESCRIPTION("The NFSv4 file layout driver"); + +static int +filelayout_set_layoutdriver(struct nfs_server *nfss) +{ + int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client, + nfs4_fl_free_deviceid_callback); + if (status) { + printk(KERN_WARNING "%s: deviceid cache could not be " + "initialized\n", __func__); + return status; + } + dprintk("%s: deviceid cache has been initialized successfully\n", + __func__); + return 0; +} + +/* Clear out the layout by destroying its device list */ +static int +filelayout_clear_layoutdriver(struct nfs_server *nfss) +{ + dprintk("--> %s\n", __func__); + + if (nfss->nfs_client->cl_devid_cache) + pnfs_put_deviceid_cache(nfss->nfs_client); + return 0; +} + +/* + * filelayout_check_layout() + * + * Make sure layout segment parameters are sane WRT the device. + * At this point no generic layer initialization of the lseg has occurred, + * and nothing has been added to the layout_hdr cache. + * + */ +static int +filelayout_check_layout(struct pnfs_layout_hdr *lo, + struct nfs4_filelayout_segment *fl, + struct nfs4_layoutget_res *lgr, + struct nfs4_deviceid *id) +{ + struct nfs4_file_layout_dsaddr *dsaddr; + int status = -EINVAL; + struct nfs_server *nfss = NFS_SERVER(lo->inode); + + dprintk("--> %s\n", __func__); + + if (fl->pattern_offset > lgr->range.offset) { + dprintk("%s pattern_offset %lld to large\n", + __func__, fl->pattern_offset); + goto out; + } + + if (fl->stripe_unit % PAGE_SIZE) { + dprintk("%s Stripe unit (%u) not page aligned\n", + __func__, fl->stripe_unit); + goto out; + } + + /* find and reference the deviceid */ + dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); + if (dsaddr == NULL) { + dsaddr = get_device_info(lo->inode, id); + if (dsaddr == NULL) + goto out; + } + fl->dsaddr = dsaddr; + + if (fl->first_stripe_index < 0 || + fl->first_stripe_index >= dsaddr->stripe_count) { + dprintk("%s Bad first_stripe_index %d\n", + __func__, fl->first_stripe_index); + goto out_put; + } + + if ((fl->stripe_type == STRIPE_SPARSE && + fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || + (fl->stripe_type == STRIPE_DENSE && + fl->num_fh != dsaddr->stripe_count)) { + dprintk("%s num_fh %u not valid for given packing\n", + __func__, fl->num_fh); + goto out_put; + } + + if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) { + dprintk("%s Stripe unit (%u) not aligned with rsize %u " + "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize, + nfss->wsize); + } + + status = 0; +out: + dprintk("--> %s returns %d\n", __func__, status); + return status; +out_put: + pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid); + goto out; +} + +static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) +{ + int i; + + for (i = 0; i < fl->num_fh; i++) { + if (!fl->fh_array[i]) + break; + kfree(fl->fh_array[i]); + } + kfree(fl->fh_array); + fl->fh_array = NULL; +} + +static void +_filelayout_free_lseg(struct nfs4_filelayout_segment *fl) +{ + filelayout_free_fh_array(fl); + kfree(fl); +} + +static int +filelayout_decode_layout(struct pnfs_layout_hdr *flo, + struct nfs4_filelayout_segment *fl, + struct nfs4_layoutget_res *lgr, + struct nfs4_deviceid *id) +{ + uint32_t *p = (uint32_t *)lgr->layout.buf; + uint32_t nfl_util; + int i; + + dprintk("%s: set_layout_map Begin\n", __func__); + + memcpy(id, p, sizeof(*id)); + p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); + print_deviceid(id); + + nfl_util = be32_to_cpup(p++); + if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) + fl->commit_through_mds = 1; + if (nfl_util & NFL4_UFLG_DENSE) + fl->stripe_type = STRIPE_DENSE; + else + fl->stripe_type = STRIPE_SPARSE; + fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK; + + fl->first_stripe_index = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &fl->pattern_offset); + fl->num_fh = be32_to_cpup(p++); + + dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n", + __func__, nfl_util, fl->num_fh, fl->first_stripe_index, + fl->pattern_offset); + + fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), + GFP_KERNEL); + if (!fl->fh_array) + return -ENOMEM; + + for (i = 0; i < fl->num_fh; i++) { + /* Do we want to use a mempool here? */ + fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); + if (!fl->fh_array[i]) { + filelayout_free_fh_array(fl); + return -ENOMEM; + } + fl->fh_array[i]->size = be32_to_cpup(p++); + if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { + printk(KERN_ERR "Too big fh %d received %d\n", + i, fl->fh_array[i]->size); + filelayout_free_fh_array(fl); + return -EIO; + } + memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); + p += XDR_QUADLEN(fl->fh_array[i]->size); + dprintk("DEBUG: %s: fh len %d\n", __func__, + fl->fh_array[i]->size); + } + + return 0; +} + +static struct pnfs_layout_segment * +filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, + struct nfs4_layoutget_res *lgr) +{ + struct nfs4_filelayout_segment *fl; + int rc; + struct nfs4_deviceid id; + + dprintk("--> %s\n", __func__); + fl = kzalloc(sizeof(*fl), GFP_KERNEL); + if (!fl) + return NULL; + + rc = filelayout_decode_layout(layoutid, fl, lgr, &id); + if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) { + _filelayout_free_lseg(fl); + return NULL; + } + return &fl->generic_hdr; +} + +static void +filelayout_free_lseg(struct pnfs_layout_segment *lseg) +{ + struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode); + struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); + + dprintk("--> %s\n", __func__); + pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, + &fl->dsaddr->deviceid); + _filelayout_free_lseg(fl); +} + +static struct pnfs_layoutdriver_type filelayout_type = { + .id = LAYOUT_NFSV4_1_FILES, + .name = "LAYOUT_NFSV4_1_FILES", + .owner = THIS_MODULE, + .set_layoutdriver = filelayout_set_layoutdriver, + .clear_layoutdriver = filelayout_clear_layoutdriver, + .alloc_lseg = filelayout_alloc_lseg, + .free_lseg = filelayout_free_lseg, +}; + +static int __init nfs4filelayout_init(void) +{ + printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n", + __func__); + return pnfs_register_layoutdriver(&filelayout_type); +} + +static void __exit nfs4filelayout_exit(void) +{ + printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n", + __func__); + pnfs_unregister_layoutdriver(&filelayout_type); +} + +module_init(nfs4filelayout_init); +module_exit(nfs4filelayout_exit); diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h new file mode 100644 index 0000000..bbf60dd --- /dev/null +++ b/fs/nfs/nfs4filelayout.h @@ -0,0 +1,94 @@ +/* + * NFSv4 file layout driver data structures. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#ifndef FS_NFS_NFS4FILELAYOUT_H +#define FS_NFS_NFS4FILELAYOUT_H + +#include "pnfs.h" + +/* + * Field testing shows we need to support upto 4096 stripe indices. + * We store each index as a u8 (u32 on the wire) to keep the memory footprint + * reasonable. This in turn means we support a maximum of 256 + * RFC 5661 multipath_list4 structures. + */ +#define NFS4_PNFS_MAX_STRIPE_CNT 4096 +#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ + +enum stripetype4 { + STRIPE_SPARSE = 1, + STRIPE_DENSE = 2 +}; + +/* Individual ip address */ +struct nfs4_pnfs_ds { + struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + u32 ds_ip_addr; + u32 ds_port; + struct nfs_client *ds_clp; + atomic_t ds_count; +}; + +struct nfs4_file_layout_dsaddr { + struct pnfs_deviceid_node deviceid; + u32 stripe_count; + u8 *stripe_indices; + u32 ds_num; + struct nfs4_pnfs_ds *ds_list[1]; +}; + +struct nfs4_filelayout_segment { + struct pnfs_layout_segment generic_hdr; + u32 stripe_type; + u32 commit_through_mds; + u32 stripe_unit; + u32 first_stripe_index; + u64 pattern_offset; + struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ + unsigned int num_fh; + struct nfs_fh **fh_array; +}; + +static inline struct nfs4_filelayout_segment * +FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) +{ + return container_of(lseg, + struct nfs4_filelayout_segment, + generic_hdr); +} + +extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); +extern void print_ds(struct nfs4_pnfs_ds *ds); +extern void print_deviceid(struct nfs4_deviceid *dev_id); +extern struct nfs4_file_layout_dsaddr * +nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); +struct nfs4_file_layout_dsaddr * +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); + +#endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c new file mode 100644 index 0000000..51fe64ac --- /dev/null +++ b/fs/nfs/nfs4filelayoutdev.c @@ -0,0 +1,448 @@ +/* + * Device operations for the pnfs nfs4 file layout driver. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * Garth Goodson <Garth.Goodson@netapp.com> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include <linux/nfs_fs.h> +#include <linux/vmalloc.h> + +#include "internal.h" +#include "nfs4filelayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +/* + * Data server cache + * + * Data servers can be mapped to different device ids. + * nfs4_pnfs_ds reference counting + * - set to 1 on allocation + * - incremented when a device id maps a data server already in the cache. + * - decremented when deviceid is removed from the cache. + */ +DEFINE_SPINLOCK(nfs4_ds_cache_lock); +static LIST_HEAD(nfs4_data_server_cache); + +/* Debug routines */ +void +print_ds(struct nfs4_pnfs_ds *ds) +{ + if (ds == NULL) { + printk("%s NULL device\n", __func__); + return; + } + printk(" ip_addr %x port %hu\n" + " ref count %d\n" + " client %p\n" + " cl_exchange_flags %x\n", + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + atomic_read(&ds->ds_count), ds->ds_clp, + ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); +} + +void +print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) +{ + int i; + + ifdebug(FACILITY) { + printk("%s dsaddr->ds_num %d\n", __func__, + dsaddr->ds_num); + for (i = 0; i < dsaddr->ds_num; i++) + print_ds(dsaddr->ds_list[i]); + } +} + +void print_deviceid(struct nfs4_deviceid *id) +{ + u32 *p = (u32 *)id; + + dprintk("%s: device id= [%x%x%x%x]\n", __func__, + p[0], p[1], p[2], p[3]); +} + +/* nfs4_ds_cache_lock is held */ +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(u32 ip_addr, u32 port) +{ + struct nfs4_pnfs_ds *ds; + + dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", + ntohl(ip_addr), ntohs(port)); + + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { + if (ds->ds_ip_addr == ip_addr && + ds->ds_port == port) { + return ds; + } + } + return NULL; +} + +static void +destroy_ds(struct nfs4_pnfs_ds *ds) +{ + dprintk("--> %s\n", __func__); + ifdebug(FACILITY) + print_ds(ds); + + if (ds->ds_clp) + nfs_put_client(ds->ds_clp); + kfree(ds); +} + +static void +nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) +{ + struct nfs4_pnfs_ds *ds; + int i; + + print_deviceid(&dsaddr->deviceid.de_id); + + for (i = 0; i < dsaddr->ds_num; i++) { + ds = dsaddr->ds_list[i]; + if (ds != NULL) { + if (atomic_dec_and_lock(&ds->ds_count, + &nfs4_ds_cache_lock)) { + list_del_init(&ds->ds_node); + spin_unlock(&nfs4_ds_cache_lock); + destroy_ds(ds); + } + } + } + kfree(dsaddr->stripe_indices); + kfree(dsaddr); +} + +void +nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device) +{ + struct nfs4_file_layout_dsaddr *dsaddr = + container_of(device, struct nfs4_file_layout_dsaddr, deviceid); + + nfs4_fl_free_deviceid(dsaddr); +} + +static struct nfs4_pnfs_ds * +nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) +{ + struct nfs4_pnfs_ds *tmp_ds, *ds; + + ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); + if (!ds) + goto out; + + spin_lock(&nfs4_ds_cache_lock); + tmp_ds = _data_server_lookup_locked(ip_addr, port); + if (tmp_ds == NULL) { + ds->ds_ip_addr = ip_addr; + ds->ds_port = port; + atomic_set(&ds->ds_count, 1); + INIT_LIST_HEAD(&ds->ds_node); + ds->ds_clp = NULL; + list_add(&ds->ds_node, &nfs4_data_server_cache); + dprintk("%s add new data server ip 0x%x\n", __func__, + ds->ds_ip_addr); + } else { + kfree(ds); + atomic_inc(&tmp_ds->ds_count); + dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", + __func__, tmp_ds->ds_ip_addr, + atomic_read(&tmp_ds->ds_count)); + ds = tmp_ds; + } + spin_unlock(&nfs4_ds_cache_lock); +out: + return ds; +} + +/* + * Currently only support ipv4, and one multi-path address. + */ +static struct nfs4_pnfs_ds * +decode_and_add_ds(__be32 **pp, struct inode *inode) +{ + struct nfs4_pnfs_ds *ds = NULL; + char *buf; + const char *ipend, *pstr; + u32 ip_addr, port; + int nlen, rlen, i; + int tmp[2]; + __be32 *r_netid, *r_addr, *p = *pp; + + /* r_netid */ + nlen = be32_to_cpup(p++); + r_netid = p; + p += XDR_QUADLEN(nlen); + + /* r_addr */ + rlen = be32_to_cpup(p++); + r_addr = p; + p += XDR_QUADLEN(rlen); + *pp = p; + + /* Check that netid is "tcp" */ + if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { + dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); + goto out_err; + } + + /* ipv6 length plus port is legal */ + if (rlen > INET6_ADDRSTRLEN + 8) { + dprintk("%s Invalid address, length %d\n", __func__, + rlen); + goto out_err; + } + buf = kmalloc(rlen + 1, GFP_KERNEL); + buf[rlen] = '\0'; + memcpy(buf, r_addr, rlen); + + /* replace the port dots with dashes for the in4_pton() delimiter*/ + for (i = 0; i < 2; i++) { + char *res = strrchr(buf, '.'); + *res = '-'; + } + + /* Currently only support ipv4 address */ + if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { + dprintk("%s: Only ipv4 addresses supported\n", __func__); + goto out_free; + } + + /* port */ + pstr = ipend; + sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); + port = htons((tmp[0] << 8) | (tmp[1])); + + ds = nfs4_pnfs_ds_add(inode, ip_addr, port); + dprintk("%s Decoded address and port %s\n", __func__, buf); +out_free: + kfree(buf); +out_err: + return ds; +} + +/* Decode opaque device data and return the result */ +static struct nfs4_file_layout_dsaddr* +decode_device(struct inode *ino, struct pnfs_device *pdev) +{ + int i, dummy; + u32 cnt, num; + u8 *indexp; + __be32 *p = (__be32 *)pdev->area, *indicesp; + struct nfs4_file_layout_dsaddr *dsaddr; + + /* Get the stripe count (number of stripe index) */ + cnt = be32_to_cpup(p++); + dprintk("%s stripe count %d\n", __func__, cnt); + if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { + printk(KERN_WARNING "%s: stripe count %d greater than " + "supported maximum %d\n", __func__, + cnt, NFS4_PNFS_MAX_STRIPE_CNT); + goto out_err; + } + + /* Check the multipath list count */ + indicesp = p; + p += XDR_QUADLEN(cnt << 2); + num = be32_to_cpup(p++); + dprintk("%s ds_num %u\n", __func__, num); + if (num > NFS4_PNFS_MAX_MULTI_CNT) { + printk(KERN_WARNING "%s: multipath count %d greater than " + "supported maximum %d\n", __func__, + num, NFS4_PNFS_MAX_MULTI_CNT); + goto out_err; + } + dsaddr = kzalloc(sizeof(*dsaddr) + + (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), + GFP_KERNEL); + if (!dsaddr) + goto out_err; + + dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL); + if (!dsaddr->stripe_indices) + goto out_err_free; + + dsaddr->stripe_count = cnt; + dsaddr->ds_num = num; + + memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); + + /* Go back an read stripe indices */ + p = indicesp; + indexp = &dsaddr->stripe_indices[0]; + for (i = 0; i < dsaddr->stripe_count; i++) { + *indexp = be32_to_cpup(p++); + if (*indexp >= num) + goto out_err_free; + indexp++; + } + /* Skip already read multipath list count */ + p++; + + for (i = 0; i < dsaddr->ds_num; i++) { + int j; + + dummy = be32_to_cpup(p++); /* multipath count */ + if (dummy > 1) { + printk(KERN_WARNING + "%s: Multipath count %d not supported, " + "skipping all greater than 1\n", __func__, + dummy); + } + for (j = 0; j < dummy; j++) { + if (j == 0) { + dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); + if (dsaddr->ds_list[i] == NULL) + goto out_err_free; + } else { + u32 len; + /* skip extra multipath */ + len = be32_to_cpup(p++); + p += XDR_QUADLEN(len); + len = be32_to_cpup(p++); + p += XDR_QUADLEN(len); + continue; + } + } + } + return dsaddr; + +out_err_free: + nfs4_fl_free_deviceid(dsaddr); +out_err: + dprintk("%s ERROR: returning NULL\n", __func__); + return NULL; +} + +/* + * Decode the opaque device specified in 'dev' + * and add it to the list of available devices. + * If the deviceid is already cached, nfs4_add_deviceid will return + * a pointer to the cached struct and throw away the new. + */ +static struct nfs4_file_layout_dsaddr* +decode_and_add_device(struct inode *inode, struct pnfs_device *dev) +{ + struct nfs4_file_layout_dsaddr *dsaddr; + struct pnfs_deviceid_node *d; + + dsaddr = decode_device(inode, dev); + if (!dsaddr) { + printk(KERN_WARNING "%s: Could not decode or add device\n", + __func__); + return NULL; + } + + d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, + &dsaddr->deviceid); + + return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); +} + +/* + * Retrieve the information for dev_id, add it to the list + * of available devices, and return it. + */ +struct nfs4_file_layout_dsaddr * +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) +{ + struct pnfs_device *pdev = NULL; + u32 max_resp_sz; + int max_pages; + struct page **pages = NULL; + struct nfs4_file_layout_dsaddr *dsaddr = NULL; + int rc, i; + struct nfs_server *server = NFS_SERVER(inode); + + /* + * Use the session max response size as the basis for setting + * GETDEVICEINFO's maxcount + */ + max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + max_pages = max_resp_sz >> PAGE_SHIFT; + dprintk("%s inode %p max_resp_sz %u max_pages %d\n", + __func__, inode, max_resp_sz, max_pages); + + pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); + if (pdev == NULL) + return NULL; + + pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) { + kfree(pdev); + return NULL; + } + for (i = 0; i < max_pages; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto out_free; + } + + /* set pdev->area */ + pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL); + if (!pdev->area) + goto out_free; + + memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); + pdev->layout_type = LAYOUT_NFSV4_1_FILES; + pdev->pages = pages; + pdev->pgbase = 0; + pdev->pglen = PAGE_SIZE * max_pages; + pdev->mincount = 0; + + rc = nfs4_proc_getdeviceinfo(server, pdev); + dprintk("%s getdevice info returns %d\n", __func__, rc); + if (rc) + goto out_free; + + /* + * Found new device, need to decode it and then add it to the + * list of known devices for this mountpoint. + */ + dsaddr = decode_and_add_device(inode, pdev); +out_free: + if (pdev->area != NULL) + vunmap(pdev->area); + for (i = 0; i < max_pages; i++) + __free_page(pages[i]); + kfree(pages); + kfree(pdev); + dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); + return dsaddr; +} + +struct nfs4_file_layout_dsaddr * +nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) +{ + struct pnfs_deviceid_node *d; + + d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); + return (d == NULL) ? NULL : + container_of(d, struct nfs4_file_layout_dsaddr, deviceid); +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e87fe61..32c8758 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -55,6 +55,7 @@ #include "internal.h" #include "iostat.h" #include "callback.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -130,6 +131,7 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_LEASE_TIME, FATTR4_WORD1_TIME_DELTA + | FATTR4_WORD1_FS_LAYOUT_TYPES }; const u32 nfs4_fs_locations_bitmap[2] = { @@ -4840,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->bc_attrs.max_reqs); } -static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) +static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) { - if (rcvd <= sent) - return 0; - printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " - "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); - return -EINVAL; + struct nfs4_channel_attrs *sent = &args->fc_attrs; + struct nfs4_channel_attrs *rcvd = &session->fc_attrs; + + if (rcvd->headerpadsz > sent->headerpadsz) + return -EINVAL; + if (rcvd->max_resp_sz > sent->max_resp_sz) + return -EINVAL; + /* + * Our requested max_ops is the minimum we need; we're not + * prepared to break up compounds into smaller pieces than that. + * So, no point even trying to continue if the server won't + * cooperate: + */ + if (rcvd->max_ops < sent->max_ops) + return -EINVAL; + if (rcvd->max_reqs == 0) + return -EINVAL; + return 0; } -#define _verify_fore_channel_attr(_name_) \ - _verify_channel_attr("fore", #_name_, \ - args->fc_attrs._name_, \ - session->fc_attrs._name_) +static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) +{ + struct nfs4_channel_attrs *sent = &args->bc_attrs; + struct nfs4_channel_attrs *rcvd = &session->bc_attrs; -#define _verify_back_channel_attr(_name_) \ - _verify_channel_attr("back", #_name_, \ - args->bc_attrs._name_, \ - session->bc_attrs._name_) + if (rcvd->max_rqst_sz > sent->max_rqst_sz) + return -EINVAL; + if (rcvd->max_resp_sz < sent->max_resp_sz) + return -EINVAL; + if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) + return -EINVAL; + /* These would render the backchannel useless: */ + if (rcvd->max_ops == 0) + return -EINVAL; + if (rcvd->max_reqs == 0) + return -EINVAL; + return 0; +} -/* - * The server is not allowed to increase the fore channel header pad size, - * maximum response size, or maximum number of operations. - * - * The back channel attributes are only negotiatied down: We send what the - * (back channel) server insists upon. - */ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) { - int ret = 0; - - ret |= _verify_fore_channel_attr(headerpadsz); - ret |= _verify_fore_channel_attr(max_resp_sz); - ret |= _verify_fore_channel_attr(max_ops); - - ret |= _verify_back_channel_attr(headerpadsz); - ret |= _verify_back_channel_attr(max_rqst_sz); - ret |= _verify_back_channel_attr(max_resp_sz); - ret |= _verify_back_channel_attr(max_resp_sz_cached); - ret |= _verify_back_channel_attr(max_ops); - ret |= _verify_back_channel_attr(max_reqs); + int ret; - return ret; + ret = nfs4_verify_fore_channel_attrs(args, session); + if (ret) + return ret; + return nfs4_verify_back_channel_attrs(args, session); } static int _nfs4_proc_create_session(struct nfs_client *clp) @@ -5255,6 +5264,147 @@ out: dprintk("<-- %s status=%d\n", __func__, status); return status; } + +static void +nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutget *lgp = calldata; + struct inode *ino = lgp->args.inode; + struct nfs_server *server = NFS_SERVER(ino); + + dprintk("--> %s\n", __func__); + if (nfs4_setup_sequence(server, &lgp->args.seq_args, + &lgp->res.seq_res, 0, task)) + return; + rpc_call_start(task); +} + +static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + + dprintk("--> %s\n", __func__); + + if (!nfs4_sequence_done(task, &lgp->res.seq_res)) + return; + + switch (task->tk_status) { + case 0: + break; + case -NFS4ERR_LAYOUTTRYLATER: + case -NFS4ERR_RECALLCONFLICT: + task->tk_status = -NFS4ERR_DELAY; + /* Fall through */ + default: + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + rpc_restart_call_prepare(task); + return; + } + } + lgp->status = task->tk_status; + dprintk("<-- %s\n", __func__); +} + +static void nfs4_layoutget_release(void *calldata) +{ + struct nfs4_layoutget *lgp = calldata; + + dprintk("--> %s\n", __func__); + put_layout_hdr(lgp->args.inode); + if (lgp->res.layout.buf != NULL) + free_page((unsigned long) lgp->res.layout.buf); + put_nfs_open_context(lgp->args.ctx); + kfree(calldata); + dprintk("<-- %s\n", __func__); +} + +static const struct rpc_call_ops nfs4_layoutget_call_ops = { + .rpc_call_prepare = nfs4_layoutget_prepare, + .rpc_call_done = nfs4_layoutget_done, + .rpc_release = nfs4_layoutget_release, +}; + +int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) +{ + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], + .rpc_argp = &lgp->args, + .rpc_resp = &lgp->res, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_layoutget_call_ops, + .callback_data = lgp, + .flags = RPC_TASK_ASYNC, + }; + int status = 0; + + dprintk("--> %s\n", __func__); + + lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); + if (lgp->res.layout.buf == NULL) { + nfs4_layoutget_release(lgp); + return -ENOMEM; + } + + lgp->res.seq_res.sr_slot = NULL; + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) + goto out; + status = lgp->status; + if (status != 0) + goto out; + status = pnfs_layout_process(lgp); +out: + rpc_put_task(task); + dprintk("<-- %s status=%d\n", __func__, status); + return status; +} + +static int +_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) +{ + struct nfs4_getdeviceinfo_args args = { + .pdev = pdev, + }; + struct nfs4_getdeviceinfo_res res = { + .pdev = pdev, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + dprintk("--> %s\n", __func__); + status = nfs4_call_sync(server, &msg, &args, &res, 0); + dprintk("<-- %s status=%d\n", __func__, status); + + return status; +} + +int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(server, + _nfs4_proc_getdeviceinfo(server, pdev), + &exception); + } while (exception.retry); + return err; +} +EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); + #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index aa0b02a..f575a31 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -54,6 +54,7 @@ #include "callback.h" #include "delegation.h" #include "internal.h" +#include "pnfs.h" #define OPENOWNER_POOL_SIZE 8 @@ -1475,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + pnfs_destroy_all_layouts(clp); } if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index bd2101d..f313c4c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -52,6 +52,7 @@ #include <linux/nfs_idmap.h> #include "nfs4_fs.h" #include "internal.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) +#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ + XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) +#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ + 1 /* layout type */ + \ + 1 /* opaque devaddr4 length */ + \ + /* devaddr4 payload is read into page */ \ + 1 /* notification bitmap length */ + \ + 1 /* notification bitmap */) +#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ + encode_stateid_maxsz) +#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ + decode_stateid_maxsz + \ + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int); #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_reclaim_complete_maxsz) +#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz +\ + encode_getdeviceinfo_maxsz) +#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_getdeviceinfo_maxsz) +#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_layoutget_maxsz) +#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_layoutget_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1737,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr, #endif /* CONFIG_NFS_V4_1 */ } +#ifdef CONFIG_NFS_V4_1 +static void +encode_getdeviceinfo(struct xdr_stream *xdr, + const struct nfs4_getdeviceinfo_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); + *p++ = cpu_to_be32(OP_GETDEVICEINFO); + p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, + NFS4_DEVICEID4_SIZE); + *p++ = cpu_to_be32(args->pdev->layout_type); + *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ + *p++ = cpu_to_be32(0); /* bitmap length 0 */ + hdr->nops++; + hdr->replen += decode_getdeviceinfo_maxsz; +} + +static void +encode_layoutget(struct xdr_stream *xdr, + const struct nfs4_layoutget_args *args, + struct compound_hdr *hdr) +{ + nfs4_stateid stateid; + __be32 *p; + + p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_LAYOUTGET); + *p++ = cpu_to_be32(0); /* Signal layout available */ + *p++ = cpu_to_be32(args->type); + *p++ = cpu_to_be32(args->range.iomode); + p = xdr_encode_hyper(p, args->range.offset); + p = xdr_encode_hyper(p, args->range.length); + p = xdr_encode_hyper(p, args->minlength); + pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout, + args->ctx->state); + p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(args->maxcount); + + dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", + __func__, + args->type, + args->range.iomode, + (unsigned long)args->range.offset, + (unsigned long)args->range.length, + args->maxcount); + hdr->nops++; + hdr->replen += decode_layoutget_maxsz; +} +#endif /* CONFIG_NFS_V4_1 */ + /* * END OF "GENERIC" ENCODE ROUTINES. */ @@ -2554,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, return 0; } +/* + * Encode GETDEVICEINFO request + */ +static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p, + struct nfs4_getdeviceinfo_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_getdeviceinfo(&xdr, args, &hdr); + + /* set up reply kvec. Subtract notification bitmap max size (2) + * so that notification bitmap is put in xdr_buf tail */ + xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2, + args->pdev->pages, args->pdev->pgbase, + args->pdev->pglen); + + encode_nops(&hdr); + return 0; +} + +/* + * Encode LAYOUTGET request + */ +static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p, + struct nfs4_layoutget_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, NFS_FH(args->inode), &hdr); + encode_layoutget(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -3978,6 +4103,61 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); } +/* + * Decode potentially multiple layout types. Currently we only support + * one layout driver per file system. + */ +static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, + uint32_t *layouttype) +{ + uint32_t *p; + int num; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + num = be32_to_cpup(p); + + /* pNFS is not supported by the underlying file system */ + if (num == 0) { + *layouttype = 0; + return 0; + } + if (num > 1) + printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " + "per filesystem not supported\n", __func__); + + /* Decode and set first layout type, move xdr->p past unused types */ + p = xdr_inline_decode(xdr, num * 4); + if (unlikely(!p)) + goto out_overflow; + *layouttype = be32_to_cpup(p); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +/* + * The type of file system exported. + * Note we must ensure that layouttype is set in any non-error case. + */ +static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *layouttype) +{ + int status = 0; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[1]); + if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U))) + return -EIO; + if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) { + status = decode_first_pnfs_layout_type(xdr, layouttype); + bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES; + } else + *layouttype = 0; + return status; +} + static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { __be32 *savep; @@ -4006,6 +4186,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); if (status != 0) goto xdr_error; + status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); + if (status != 0) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: @@ -4772,6 +4955,134 @@ out_overflow: #endif /* CONFIG_NFS_V4_1 */ } +#if defined(CONFIG_NFS_V4_1) + +static int decode_getdeviceinfo(struct xdr_stream *xdr, + struct pnfs_device *pdev) +{ + __be32 *p; + uint32_t len, type; + int status; + + status = decode_op_hdr(xdr, OP_GETDEVICEINFO); + if (status) { + if (status == -ETOOSMALL) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + pdev->mincount = be32_to_cpup(p); + dprintk("%s: Min count too small. mincnt = %u\n", + __func__, pdev->mincount); + } + return status; + } + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + goto out_overflow; + type = be32_to_cpup(p++); + if (type != pdev->layout_type) { + dprintk("%s: layout mismatch req: %u pdev: %u\n", + __func__, pdev->layout_type, type); + return -EINVAL; + } + /* + * Get the length of the opaque device_addr4. xdr_read_pages places + * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages) + * and places the remaining xdr data in xdr_buf->tail + */ + pdev->mincount = be32_to_cpup(p); + xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ + + /* Parse notification bitmap, verifying that it is zero. */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p); + if (len) { + int i; + + p = xdr_inline_decode(xdr, 4 * len); + if (unlikely(!p)) + goto out_overflow; + for (i = 0; i < len; i++, p++) { + if (be32_to_cpup(p)) { + dprintk("%s: notifications not supported\n", + __func__); + return -EIO; + } + } + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, + struct nfs4_layoutget_res *res) +{ + __be32 *p; + int status; + u32 layout_count; + + status = decode_op_hdr(xdr, OP_LAYOUTGET); + if (status) + return status; + p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); + if (unlikely(!p)) + goto out_overflow; + res->return_on_close = be32_to_cpup(p++); + p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); + layout_count = be32_to_cpup(p); + if (!layout_count) { + dprintk("%s: server responded with empty layout array\n", + __func__); + return -EINVAL; + } + + p = xdr_inline_decode(xdr, 24); + if (unlikely(!p)) + goto out_overflow; + p = xdr_decode_hyper(p, &res->range.offset); + p = xdr_decode_hyper(p, &res->range.length); + res->range.iomode = be32_to_cpup(p++); + res->type = be32_to_cpup(p++); + + status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p); + if (unlikely(status)) + return status; + + dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", + __func__, + (unsigned long)res->range.offset, + (unsigned long)res->range.length, + res->range.iomode, + res->type, + res->layout.len); + + /* nfs4_proc_layoutget allocated a single page */ + if (res->layout.len > PAGE_SIZE) + return -ENOMEM; + memcpy(res->layout.buf, p, res->layout.len); + + if (layout_count > 1) { + /* We only handle a length one array at the moment. Any + * further entries are just ignored. Note that this means + * the client may see a response that is less than the + * minimum it requested. + */ + dprintk("%s: server responded with %d layouts, dropping tail\n", + __func__, layout_count); + } + + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} +#endif /* CONFIG_NFS_V4_1 */ + /* * END OF "GENERIC" DECODE ROUTINES. */ @@ -5799,6 +6110,53 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, status = decode_reclaim_complete(&xdr, (void *)NULL); return status; } + +/* + * Decode GETDEVINFO response + */ +static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs4_getdeviceinfo_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status != 0) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status != 0) + goto out; + status = decode_getdeviceinfo(&xdr, res->pdev); +out: + return status; +} + +/* + * Decode LAYOUTGET response + */ +static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs4_layoutget_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_layoutget(&xdr, rqstp, res); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, @@ -5990,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SEQUENCE, enc_sequence, dec_sequence), PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), + PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), + PROC(LAYOUTGET, enc_layoutget, dec_layoutget), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c new file mode 100644 index 0000000..db77342 --- /dev/null +++ b/fs/nfs/pnfs.c @@ -0,0 +1,783 @@ +/* + * pNFS functions to call and manage layout drivers. + * + * Copyright (c) 2002 [year of first publication] + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include <linux/nfs_fs.h> +#include "internal.h" +#include "pnfs.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS + +/* Locking: + * + * pnfs_spinlock: + * protects pnfs_modules_tbl. + */ +static DEFINE_SPINLOCK(pnfs_spinlock); + +/* + * pnfs_modules_tbl holds all pnfs modules + */ +static LIST_HEAD(pnfs_modules_tbl); + +/* Return the registered pnfs layout driver module matching given id */ +static struct pnfs_layoutdriver_type * +find_pnfs_driver_locked(u32 id) +{ + struct pnfs_layoutdriver_type *local; + + list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) + if (local->id == id) + goto out; + local = NULL; +out: + dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); + return local; +} + +static struct pnfs_layoutdriver_type * +find_pnfs_driver(u32 id) +{ + struct pnfs_layoutdriver_type *local; + + spin_lock(&pnfs_spinlock); + local = find_pnfs_driver_locked(id); + spin_unlock(&pnfs_spinlock); + return local; +} + +void +unset_pnfs_layoutdriver(struct nfs_server *nfss) +{ + if (nfss->pnfs_curr_ld) { + nfss->pnfs_curr_ld->clear_layoutdriver(nfss); + module_put(nfss->pnfs_curr_ld->owner); + } + nfss->pnfs_curr_ld = NULL; +} + +/* + * Try to set the server's pnfs module to the pnfs layout type specified by id. + * Currently only one pNFS layout driver per filesystem is supported. + * + * @id layout type. Zero (illegal layout type) indicates pNFS not in use. + */ +void +set_pnfs_layoutdriver(struct nfs_server *server, u32 id) +{ + struct pnfs_layoutdriver_type *ld_type = NULL; + + if (id == 0) + goto out_no_driver; + if (!(server->nfs_client->cl_exchange_flags & + (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { + printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, + id, server->nfs_client->cl_exchange_flags); + goto out_no_driver; + } + ld_type = find_pnfs_driver(id); + if (!ld_type) { + request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); + ld_type = find_pnfs_driver(id); + if (!ld_type) { + dprintk("%s: No pNFS module found for %u.\n", + __func__, id); + goto out_no_driver; + } + } + if (!try_module_get(ld_type->owner)) { + dprintk("%s: Could not grab reference on module\n", __func__); + goto out_no_driver; + } + server->pnfs_curr_ld = ld_type; + if (ld_type->set_layoutdriver(server)) { + printk(KERN_ERR + "%s: Error initializing mount point for layout driver %u.\n", + __func__, id); + module_put(ld_type->owner); + goto out_no_driver; + } + dprintk("%s: pNFS module for %u set\n", __func__, id); + return; + +out_no_driver: + dprintk("%s: Using NFSv4 I/O\n", __func__); + server->pnfs_curr_ld = NULL; +} + +int +pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) +{ + int status = -EINVAL; + struct pnfs_layoutdriver_type *tmp; + + if (ld_type->id == 0) { + printk(KERN_ERR "%s id 0 is reserved\n", __func__); + return status; + } + if (!ld_type->alloc_lseg || !ld_type->free_lseg) { + printk(KERN_ERR "%s Layout driver must provide " + "alloc_lseg and free_lseg.\n", __func__); + return status; + } + + spin_lock(&pnfs_spinlock); + tmp = find_pnfs_driver_locked(ld_type->id); + if (!tmp) { + list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); + status = 0; + dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, + ld_type->name); + } else { + printk(KERN_ERR "%s Module with id %d already loaded!\n", + __func__, ld_type->id); + } + spin_unlock(&pnfs_spinlock); + + return status; +} +EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); + +void +pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) +{ + dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); + spin_lock(&pnfs_spinlock); + list_del(&ld_type->pnfs_tblid); + spin_unlock(&pnfs_spinlock); +} +EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); + +/* + * pNFS client layout cache + */ + +static void +get_layout_hdr_locked(struct pnfs_layout_hdr *lo) +{ + assert_spin_locked(&lo->inode->i_lock); + lo->refcount++; +} + +static void +put_layout_hdr_locked(struct pnfs_layout_hdr *lo) +{ + assert_spin_locked(&lo->inode->i_lock); + BUG_ON(lo->refcount == 0); + + lo->refcount--; + if (!lo->refcount) { + dprintk("%s: freeing layout cache %p\n", __func__, lo); + BUG_ON(!list_empty(&lo->layouts)); + NFS_I(lo->inode)->layout = NULL; + kfree(lo); + } +} + +void +put_layout_hdr(struct inode *inode) +{ + spin_lock(&inode->i_lock); + put_layout_hdr_locked(NFS_I(inode)->layout); + spin_unlock(&inode->i_lock); +} + +static void +init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) +{ + INIT_LIST_HEAD(&lseg->fi_list); + kref_init(&lseg->kref); + lseg->layout = lo; +} + +/* Called without i_lock held, as the free_lseg call may sleep */ +static void +destroy_lseg(struct kref *kref) +{ + struct pnfs_layout_segment *lseg = + container_of(kref, struct pnfs_layout_segment, kref); + struct inode *ino = lseg->layout->inode; + + dprintk("--> %s\n", __func__); + NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ + put_layout_hdr(ino); +} + +static void +put_lseg(struct pnfs_layout_segment *lseg) +{ + if (!lseg) + return; + + dprintk("%s: lseg %p ref %d\n", __func__, lseg, + atomic_read(&lseg->kref.refcount)); + kref_put(&lseg->kref, destroy_lseg); +} + +static void +pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) +{ + struct pnfs_layout_segment *lseg, *next; + struct nfs_client *clp; + + dprintk("%s:Begin lo %p\n", __func__, lo); + + assert_spin_locked(&lo->inode->i_lock); + list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) { + dprintk("%s: freeing lseg %p\n", __func__, lseg); + list_move(&lseg->fi_list, tmp_list); + } + clp = NFS_SERVER(lo->inode)->nfs_client; + spin_lock(&clp->cl_lock); + /* List does not take a reference, so no need for put here */ + list_del_init(&lo->layouts); + spin_unlock(&clp->cl_lock); + write_seqlock(&lo->seqlock); + clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state); + write_sequnlock(&lo->seqlock); + + dprintk("%s:Return\n", __func__); +} + +static void +pnfs_free_lseg_list(struct list_head *tmp_list) +{ + struct pnfs_layout_segment *lseg; + + while (!list_empty(tmp_list)) { + lseg = list_entry(tmp_list->next, struct pnfs_layout_segment, + fi_list); + dprintk("%s calling put_lseg on %p\n", __func__, lseg); + list_del(&lseg->fi_list); + put_lseg(lseg); + } +} + +void +pnfs_destroy_layout(struct nfs_inode *nfsi) +{ + struct pnfs_layout_hdr *lo; + LIST_HEAD(tmp_list); + + spin_lock(&nfsi->vfs_inode.i_lock); + lo = nfsi->layout; + if (lo) { + pnfs_clear_lseg_list(lo, &tmp_list); + /* Matched by refcount set to 1 in alloc_init_layout_hdr */ + put_layout_hdr_locked(lo); + } + spin_unlock(&nfsi->vfs_inode.i_lock); + pnfs_free_lseg_list(&tmp_list); +} + +/* + * Called by the state manger to remove all layouts established under an + * expired lease. + */ +void +pnfs_destroy_all_layouts(struct nfs_client *clp) +{ + struct pnfs_layout_hdr *lo; + LIST_HEAD(tmp_list); + + spin_lock(&clp->cl_lock); + list_splice_init(&clp->cl_layouts, &tmp_list); + spin_unlock(&clp->cl_lock); + + while (!list_empty(&tmp_list)) { + lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, + layouts); + dprintk("%s freeing layout for inode %lu\n", __func__, + lo->inode->i_ino); + pnfs_destroy_layout(NFS_I(lo->inode)); + } +} + +/* update lo->stateid with new if is more recent + * + * lo->stateid could be the open stateid, in which case we just use what given. + */ +static void +pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, + const nfs4_stateid *new) +{ + nfs4_stateid *old = &lo->stateid; + bool overwrite = false; + + write_seqlock(&lo->seqlock); + if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) || + memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) + overwrite = true; + else { + u32 oldseq, newseq; + + oldseq = be32_to_cpu(old->stateid.seqid); + newseq = be32_to_cpu(new->stateid.seqid); + if ((int)(newseq - oldseq) > 0) + overwrite = true; + } + if (overwrite) + memcpy(&old->stateid, &new->stateid, sizeof(new->stateid)); + write_sequnlock(&lo->seqlock); +} + +static void +pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, + struct nfs4_state *state) +{ + int seq; + + dprintk("--> %s\n", __func__); + write_seqlock(&lo->seqlock); + do { + seq = read_seqbegin(&state->seqlock); + memcpy(lo->stateid.data, state->stateid.data, + sizeof(state->stateid.data)); + } while (read_seqretry(&state->seqlock, seq)); + set_bit(NFS_LAYOUT_STATEID_SET, &lo->state); + write_sequnlock(&lo->seqlock); + dprintk("<-- %s\n", __func__); +} + +void +pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, + struct nfs4_state *open_state) +{ + int seq; + + dprintk("--> %s\n", __func__); + do { + seq = read_seqbegin(&lo->seqlock); + if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) { + /* This will trigger retry of the read */ + pnfs_layout_from_open_stateid(lo, open_state); + } else + memcpy(dst->data, lo->stateid.data, + sizeof(lo->stateid.data)); + } while (read_seqretry(&lo->seqlock, seq)); + dprintk("<-- %s\n", __func__); +} + +/* +* Get layout from server. +* for now, assume that whole file layouts are requested. +* arg->offset: 0 +* arg->length: all ones +*/ +static struct pnfs_layout_segment * +send_layoutget(struct pnfs_layout_hdr *lo, + struct nfs_open_context *ctx, + u32 iomode) +{ + struct inode *ino = lo->inode; + struct nfs_server *server = NFS_SERVER(ino); + struct nfs4_layoutget *lgp; + struct pnfs_layout_segment *lseg = NULL; + + dprintk("--> %s\n", __func__); + + BUG_ON(ctx == NULL); + lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); + if (lgp == NULL) { + put_layout_hdr(lo->inode); + return NULL; + } + lgp->args.minlength = NFS4_MAX_UINT64; + lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; + lgp->args.range.iomode = iomode; + lgp->args.range.offset = 0; + lgp->args.range.length = NFS4_MAX_UINT64; + lgp->args.type = server->pnfs_curr_ld->id; + lgp->args.inode = ino; + lgp->args.ctx = get_nfs_open_context(ctx); + lgp->lsegpp = &lseg; + + /* Synchronously retrieve layout information from server and + * store in lseg. + */ + nfs4_proc_layoutget(lgp); + if (!lseg) { + /* remember that LAYOUTGET failed and suspend trying */ + set_bit(lo_fail_bit(iomode), &lo->state); + } + return lseg; +} + +/* + * Compare two layout segments for sorting into layout cache. + * We want to preferentially return RW over RO layouts, so ensure those + * are seen first. + */ +static s64 +cmp_layout(u32 iomode1, u32 iomode2) +{ + /* read > read/write */ + return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); +} + +static void +pnfs_insert_layout(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg) +{ + struct pnfs_layout_segment *lp; + int found = 0; + + dprintk("%s:Begin\n", __func__); + + assert_spin_locked(&lo->inode->i_lock); + if (list_empty(&lo->segs)) { + struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client; + + spin_lock(&clp->cl_lock); + BUG_ON(!list_empty(&lo->layouts)); + list_add_tail(&lo->layouts, &clp->cl_layouts); + spin_unlock(&clp->cl_lock); + } + list_for_each_entry(lp, &lo->segs, fi_list) { + if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0) + continue; + list_add_tail(&lseg->fi_list, &lp->fi_list); + dprintk("%s: inserted lseg %p " + "iomode %d offset %llu length %llu before " + "lp %p iomode %d offset %llu length %llu\n", + __func__, lseg, lseg->range.iomode, + lseg->range.offset, lseg->range.length, + lp, lp->range.iomode, lp->range.offset, + lp->range.length); + found = 1; + break; + } + if (!found) { + list_add_tail(&lseg->fi_list, &lo->segs); + dprintk("%s: inserted lseg %p " + "iomode %d offset %llu length %llu at tail\n", + __func__, lseg, lseg->range.iomode, + lseg->range.offset, lseg->range.length); + } + get_layout_hdr_locked(lo); + + dprintk("%s:Return\n", __func__); +} + +static struct pnfs_layout_hdr * +alloc_init_layout_hdr(struct inode *ino) +{ + struct pnfs_layout_hdr *lo; + + lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); + if (!lo) + return NULL; + lo->refcount = 1; + INIT_LIST_HEAD(&lo->layouts); + INIT_LIST_HEAD(&lo->segs); + seqlock_init(&lo->seqlock); + lo->inode = ino; + return lo; +} + +static struct pnfs_layout_hdr * +pnfs_find_alloc_layout(struct inode *ino) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_hdr *new = NULL; + + dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); + + assert_spin_locked(&ino->i_lock); + if (nfsi->layout) + return nfsi->layout; + + spin_unlock(&ino->i_lock); + new = alloc_init_layout_hdr(ino); + spin_lock(&ino->i_lock); + + if (likely(nfsi->layout == NULL)) /* Won the race? */ + nfsi->layout = new; + else + kfree(new); + return nfsi->layout; +} + +/* + * iomode matching rules: + * iomode lseg match + * ----- ----- ----- + * ANY READ true + * ANY RW true + * RW READ false + * RW RW true + * READ READ true + * READ RW true + */ +static int +is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) +{ + return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); +} + +/* + * lookup range in layout + */ +static struct pnfs_layout_segment * +pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) +{ + struct pnfs_layout_segment *lseg, *ret = NULL; + + dprintk("%s:Begin\n", __func__); + + assert_spin_locked(&lo->inode->i_lock); + list_for_each_entry(lseg, &lo->segs, fi_list) { + if (is_matching_lseg(lseg, iomode)) { + ret = lseg; + break; + } + if (cmp_layout(iomode, lseg->range.iomode) > 0) + break; + } + + dprintk("%s:Return lseg %p ref %d\n", + __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0); + return ret; +} + +/* + * Layout segment is retreived from the server if not cached. + * The appropriate layout segment is referenced and returned to the caller. + */ +struct pnfs_layout_segment * +pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + enum pnfs_iomode iomode) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_hdr *lo; + struct pnfs_layout_segment *lseg = NULL; + + if (!pnfs_enabled_sb(NFS_SERVER(ino))) + return NULL; + spin_lock(&ino->i_lock); + lo = pnfs_find_alloc_layout(ino); + if (lo == NULL) { + dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); + goto out_unlock; + } + + /* Check to see if the layout for the given range already exists */ + lseg = pnfs_has_layout(lo, iomode); + if (lseg) { + dprintk("%s: Using cached lseg %p for iomode %d)\n", + __func__, lseg, iomode); + goto out_unlock; + } + + /* if LAYOUTGET already failed once we don't try again */ + if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) + goto out_unlock; + + get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ + spin_unlock(&ino->i_lock); + + lseg = send_layoutget(lo, ctx, iomode); +out: + dprintk("%s end, state 0x%lx lseg %p\n", __func__, + nfsi->layout->state, lseg); + return lseg; +out_unlock: + spin_unlock(&ino->i_lock); + goto out; +} + +int +pnfs_layout_process(struct nfs4_layoutget *lgp) +{ + struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; + struct nfs4_layoutget_res *res = &lgp->res; + struct pnfs_layout_segment *lseg; + struct inode *ino = lo->inode; + int status = 0; + + /* Inject layout blob into I/O device driver */ + lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); + if (!lseg || IS_ERR(lseg)) { + if (!lseg) + status = -ENOMEM; + else + status = PTR_ERR(lseg); + dprintk("%s: Could not allocate layout: error %d\n", + __func__, status); + goto out; + } + + spin_lock(&ino->i_lock); + init_lseg(lo, lseg); + lseg->range = res->range; + *lgp->lsegpp = lseg; + pnfs_insert_layout(lo, lseg); + + /* Done processing layoutget. Set the layout stateid */ + pnfs_set_layout_stateid(lo, &res->stateid); + spin_unlock(&ino->i_lock); +out: + return status; +} + +/* + * Device ID cache. Currently supports one layout type per struct nfs_client. + * Add layout type to the lookup key to expand to support multiple types. + */ +int +pnfs_alloc_init_deviceid_cache(struct nfs_client *clp, + void (*free_callback)(struct pnfs_deviceid_node *)) +{ + struct pnfs_deviceid_cache *c; + + c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); + if (!c) + return -ENOMEM; + spin_lock(&clp->cl_lock); + if (clp->cl_devid_cache != NULL) { + atomic_inc(&clp->cl_devid_cache->dc_ref); + dprintk("%s [kref [%d]]\n", __func__, + atomic_read(&clp->cl_devid_cache->dc_ref)); + kfree(c); + } else { + /* kzalloc initializes hlists */ + spin_lock_init(&c->dc_lock); + atomic_set(&c->dc_ref, 1); + c->dc_free_callback = free_callback; + clp->cl_devid_cache = c; + dprintk("%s [new]\n", __func__); + } + spin_unlock(&clp->cl_lock); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache); + +/* + * Called from pnfs_layoutdriver_type->free_lseg + * last layout segment reference frees deviceid + */ +void +pnfs_put_deviceid(struct pnfs_deviceid_cache *c, + struct pnfs_deviceid_node *devid) +{ + struct nfs4_deviceid *id = &devid->de_id; + struct pnfs_deviceid_node *d; + struct hlist_node *n; + long h = nfs4_deviceid_hash(id); + + dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); + if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) + return; + + hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) + if (!memcmp(&d->de_id, id, sizeof(*id))) { + hlist_del_rcu(&d->de_node); + spin_unlock(&c->dc_lock); + synchronize_rcu(); + c->dc_free_callback(devid); + return; + } + spin_unlock(&c->dc_lock); + /* Why wasn't it found in the list? */ + BUG(); +} +EXPORT_SYMBOL_GPL(pnfs_put_deviceid); + +/* Find and reference a deviceid */ +struct pnfs_deviceid_node * +pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id) +{ + struct pnfs_deviceid_node *d; + struct hlist_node *n; + long hash = nfs4_deviceid_hash(id); + + dprintk("--> %s hash %ld\n", __func__, hash); + rcu_read_lock(); + hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) { + if (!memcmp(&d->de_id, id, sizeof(*id))) { + if (!atomic_inc_not_zero(&d->de_ref)) { + goto fail; + } else { + rcu_read_unlock(); + return d; + } + } + } +fail: + rcu_read_unlock(); + return NULL; +} +EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid); + +/* + * Add a deviceid to the cache. + * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new + */ +struct pnfs_deviceid_node * +pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) +{ + struct pnfs_deviceid_node *d; + long hash = nfs4_deviceid_hash(&new->de_id); + + dprintk("--> %s hash %ld\n", __func__, hash); + spin_lock(&c->dc_lock); + d = pnfs_find_get_deviceid(c, &new->de_id); + if (d) { + spin_unlock(&c->dc_lock); + dprintk("%s [discard]\n", __func__); + c->dc_free_callback(new); + return d; + } + INIT_HLIST_NODE(&new->de_node); + atomic_set(&new->de_ref, 1); + hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]); + spin_unlock(&c->dc_lock); + dprintk("%s [new]\n", __func__); + return new; +} +EXPORT_SYMBOL_GPL(pnfs_add_deviceid); + +void +pnfs_put_deviceid_cache(struct nfs_client *clp) +{ + struct pnfs_deviceid_cache *local = clp->cl_devid_cache; + + dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); + if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { + int i; + /* Verify cache is empty */ + for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) + BUG_ON(!hlist_empty(&local->dc_deviceids[i])); + clp->cl_devid_cache = NULL; + spin_unlock(&clp->cl_lock); + kfree(local); + } +} +EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h new file mode 100644 index 0000000..e12367d --- /dev/null +++ b/fs/nfs/pnfs.h @@ -0,0 +1,189 @@ +/* + * pNFS client data structures. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#ifndef FS_NFS_PNFS_H +#define FS_NFS_PNFS_H + +struct pnfs_layout_segment { + struct list_head fi_list; + struct pnfs_layout_range range; + struct kref kref; + struct pnfs_layout_hdr *layout; +}; + +#ifdef CONFIG_NFS_V4_1 + +#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" + +enum { + NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ + NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ + NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */ +}; + +/* Per-layout driver specific registration structure */ +struct pnfs_layoutdriver_type { + struct list_head pnfs_tblid; + const u32 id; + const char *name; + struct module *owner; + int (*set_layoutdriver) (struct nfs_server *); + int (*clear_layoutdriver) (struct nfs_server *); + struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); + void (*free_lseg) (struct pnfs_layout_segment *lseg); +}; + +struct pnfs_layout_hdr { + unsigned long refcount; + struct list_head layouts; /* other client layouts */ + struct list_head segs; /* layout segments list */ + seqlock_t seqlock; /* Protects the stateid */ + nfs4_stateid stateid; + unsigned long state; + struct inode *inode; +}; + +struct pnfs_device { + struct nfs4_deviceid dev_id; + unsigned int layout_type; + unsigned int mincount; + struct page **pages; + void *area; + unsigned int pgbase; + unsigned int pglen; +}; + +/* + * Device ID RCU cache. A device ID is unique per client ID and layout type. + */ +#define NFS4_DEVICE_ID_HASH_BITS 5 +#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) +#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) + +static inline u32 +nfs4_deviceid_hash(struct nfs4_deviceid *id) +{ + unsigned char *cptr = (unsigned char *)id->data; + unsigned int nbytes = NFS4_DEVICEID4_SIZE; + u32 x = 0; + + while (nbytes--) { + x *= 37; + x += *cptr++; + } + return x & NFS4_DEVICE_ID_HASH_MASK; +} + +struct pnfs_deviceid_node { + struct hlist_node de_node; + struct nfs4_deviceid de_id; + atomic_t de_ref; +}; + +struct pnfs_deviceid_cache { + spinlock_t dc_lock; + atomic_t dc_ref; + void (*dc_free_callback)(struct pnfs_deviceid_node *); + struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE]; +}; + +extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *, + void (*free_callback)(struct pnfs_deviceid_node *)); +extern void pnfs_put_deviceid_cache(struct nfs_client *); +extern struct pnfs_deviceid_node *pnfs_find_get_deviceid( + struct pnfs_deviceid_cache *, + struct nfs4_deviceid *); +extern struct pnfs_deviceid_node *pnfs_add_deviceid( + struct pnfs_deviceid_cache *, + struct pnfs_deviceid_node *); +extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c, + struct pnfs_deviceid_node *devid); + +extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); +extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); + +/* nfs4proc.c */ +extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, + struct pnfs_device *dev); +extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); + +/* pnfs.c */ +struct pnfs_layout_segment * +pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, + enum pnfs_iomode access_type); +void set_pnfs_layoutdriver(struct nfs_server *, u32 id); +void unset_pnfs_layoutdriver(struct nfs_server *); +int pnfs_layout_process(struct nfs4_layoutget *lgp); +void pnfs_destroy_layout(struct nfs_inode *); +void pnfs_destroy_all_layouts(struct nfs_client *); +void put_layout_hdr(struct inode *inode); +void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, + struct nfs4_state *open_state); + + +static inline int lo_fail_bit(u32 iomode) +{ + return iomode == IOMODE_RW ? + NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; +} + +/* Return true if a layout driver is being used for this mountpoint */ +static inline int pnfs_enabled_sb(struct nfs_server *nfss) +{ + return nfss->pnfs_curr_ld != NULL; +} + +#else /* CONFIG_NFS_V4_1 */ + +static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) +{ +} + +static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) +{ +} + +static inline struct pnfs_layout_segment * +pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, + enum pnfs_iomode access_type) +{ + return NULL; +} + +static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) +{ +} + +static inline void unset_pnfs_layoutdriver(struct nfs_server *s) +{ +} + +#endif /* CONFIG_NFS_V4_1 */ + +#endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 79859c8..e4b62c6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -25,6 +25,7 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -120,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); + pnfs_update_layout(inode, ctx, IOMODE_READ); new = nfs_create_request(ctx, inode, page, 0, len); if (IS_ERR(new)) { unlock_page(page); @@ -624,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ + pnfs_update_layout(inode, desc.ctx, IOMODE_READ); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 7cf4dda..31a78fc 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -29,6 +29,18 @@ config NFSD If unsure, say N. +config NFSD_DEPRECATED + bool "Include support for deprecated syscall interface to NFSD" + depends on NFSD + default y + help + The syscall interface to nfsd was obsoleted in 2.6.0 by a new + filesystem based interface. The old interface is due for removal + in 2.6.40. If you wish to remove the interface before then + say N. + + In unsure, say Y. + config NFSD_V2_ACL bool depends on NFSD diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c2a4f71..c0fcb7a 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -28,9 +28,6 @@ typedef struct auth_domain svc_client; typedef struct svc_export svc_export; -static void exp_do_unexport(svc_export *unexp); -static int exp_verify_string(char *cp, int max); - /* * We have two caches. * One maps client+vfsmnt+dentry to export options - the export map @@ -802,6 +799,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) return ek; } +#ifdef CONFIG_NFSD_DEPRECATED static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, struct svc_export *exp) { @@ -852,6 +850,7 @@ exp_get_fsid_key(svc_client *clp, int fsid) return exp_find_key(clp, FSID_NUM, fsidv, NULL); } +#endif static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, struct cache_req *reqp) @@ -893,6 +892,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path) return exp; } +#ifdef CONFIG_NFSD_DEPRECATED /* * Hashtable locking. Write locks are placed only by user processes * wanting to modify export information. @@ -925,6 +925,19 @@ exp_writeunlock(void) { up_write(&hash_sem); } +#else + +/* hash_sem not needed once deprecated interface is removed */ +void exp_readlock(void) {} +static inline void exp_writelock(void){} +void exp_readunlock(void) {} +static inline void exp_writeunlock(void){} + +#endif + +#ifdef CONFIG_NFSD_DEPRECATED +static void exp_do_unexport(svc_export *unexp); +static int exp_verify_string(char *cp, int max); static void exp_fsid_unhash(struct svc_export *exp) { @@ -935,10 +948,9 @@ static void exp_fsid_unhash(struct svc_export *exp) ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; + sunrpc_invalidate(&ek->h, &svc_expkey_cache); cache_put(&ek->h, &svc_expkey_cache); } - svc_expkey_cache.nextcheck = get_seconds(); } static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) @@ -973,10 +985,9 @@ static void exp_unhash(struct svc_export *exp) ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; + sunrpc_invalidate(&ek->h, &svc_expkey_cache); cache_put(&ek->h, &svc_expkey_cache); } - svc_expkey_cache.nextcheck = get_seconds(); } /* @@ -1097,8 +1108,7 @@ out: static void exp_do_unexport(svc_export *unexp) { - unexp->h.expiry_time = get_seconds()-1; - svc_export_cache.nextcheck = get_seconds(); + sunrpc_invalidate(&unexp->h, &svc_export_cache); exp_unhash(unexp); exp_fsid_unhash(unexp); } @@ -1150,6 +1160,7 @@ out_unlock: exp_writeunlock(); return err; } +#endif /* CONFIG_NFSD_DEPRECATED */ /* * Obtain the root fh on behalf of a client. @@ -1459,25 +1470,43 @@ static void show_secinfo_flags(struct seq_file *m, int flags) show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); } +static bool secinfo_flags_equal(int f, int g) +{ + f &= NFSEXP_SECINFO_FLAGS; + g &= NFSEXP_SECINFO_FLAGS; + return f == g; +} + +static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end) +{ + int flags; + + flags = (*fp)->flags; + seq_printf(m, ",sec=%d", (*fp)->pseudoflavor); + (*fp)++; + while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) { + seq_printf(m, ":%d", (*fp)->pseudoflavor); + (*fp)++; + } + return flags; +} + static void show_secinfo(struct seq_file *m, struct svc_export *exp) { struct exp_flavor_info *f; struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; - int lastflags = 0, first = 0; + int flags; if (exp->ex_nflavors == 0) return; - for (f = exp->ex_flavors; f < end; f++) { - if (first || f->flags != lastflags) { - if (!first) - show_secinfo_flags(m, lastflags); - seq_printf(m, ",sec=%d", f->pseudoflavor); - lastflags = f->flags; - } else { - seq_printf(m, ":%d", f->pseudoflavor); - } + f = exp->ex_flavors; + flags = show_secinfo_run(m, &f, end); + if (!secinfo_flags_equal(flags, exp->ex_flags)) + show_secinfo_flags(m, flags); + while (f != end) { + flags = show_secinfo_run(m, &f, end); + show_secinfo_flags(m, flags); } - show_secinfo_flags(m, lastflags); } static void exp_flags(struct seq_file *m, int flag, int fsid, @@ -1532,6 +1561,7 @@ const struct seq_operations nfs_exports_op = { .show = e_show, }; +#ifdef CONFIG_NFSD_DEPRECATED /* * Add or modify a client. * Change requests may involve the list of host addresses. The list of @@ -1563,7 +1593,7 @@ exp_addclient(struct nfsctl_client *ncp) /* Insert client into hashtable. */ for (i = 0; i < ncp->cl_naddr; i++) { ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); - auth_unix_add_addr(&addr6, dom); + auth_unix_add_addr(&init_net, &addr6, dom); } auth_unix_forget_old(dom); auth_domain_put(dom); @@ -1621,6 +1651,7 @@ exp_verify_string(char *cp, int max) printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); return 0; } +#endif /* CONFIG_NFSD_DEPRECATED */ /* * Initialize the exports module. diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 988cbb3..143da2e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -41,7 +41,6 @@ #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 -#define NFS4_STATEID_SIZE 16 /* Index of predefined Linux callback client operations */ @@ -248,10 +247,11 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, } static void -encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, +encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; + struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; if (hdr->minorversion == 0) return; @@ -259,8 +259,8 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); WRITE32(OP_CB_SEQUENCE); - WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(args->cbs_clp->cl_cb_seq_nr); + WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(ses->se_cb_seq_nr); WRITE32(0); /* slotid, always 0 */ WRITE32(0); /* highest slotid always 0 */ WRITE32(0); /* cachethis always 0 */ @@ -280,18 +280,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) static int nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, - struct nfs4_rpc_args *rpc_args) + struct nfsd4_callback *cb) { struct xdr_stream xdr; - struct nfs4_delegation *args = rpc_args->args_op; + struct nfs4_delegation *args = cb->cb_op; struct nfs4_cb_compound_hdr hdr = { - .ident = args->dl_ident, - .minorversion = rpc_args->args_seq.cbs_minorversion, + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_minorversion, }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_cb_compound_hdr(&xdr, &hdr); - encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); + encode_cb_sequence(&xdr, cb, &hdr); encode_cb_recall(&xdr, args, &hdr); encode_cb_nops(&hdr); return 0; @@ -339,15 +339,16 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) * with a single slot. */ static int -decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, +decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, struct rpc_rqst *rqstp) { + struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; struct nfs4_sessionid id; int status; u32 dummy; __be32 *p; - if (res->cbs_minorversion == 0) + if (cb->cb_minorversion == 0) return 0; status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); @@ -363,13 +364,12 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); - if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, - NFS4_MAX_SESSIONID_LEN)) { + if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("%s Invalid session id\n", __func__); goto out; } READ32(dummy); - if (dummy != res->cbs_clp->cl_cb_seq_nr) { + if (dummy != ses->se_cb_seq_nr) { dprintk("%s Invalid sequence number\n", __func__); goto out; } @@ -393,7 +393,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, - struct nfsd4_cb_sequence *seq) + struct nfsd4_callback *cb) { struct xdr_stream xdr; struct nfs4_cb_compound_hdr hdr; @@ -403,8 +403,8 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, status = decode_cb_compound_hdr(&xdr, &hdr); if (status) goto out; - if (seq) { - status = decode_cb_sequence(&xdr, seq, rqstp); + if (cb) { + status = decode_cb_sequence(&xdr, cb, rqstp); if (status) goto out; } @@ -473,30 +473,34 @@ static int max_cb_time(void) /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cl_cb_set an atomic? */ -int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) +int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) { struct rpc_timeout timeparms = { .to_initval = max_cb_time(), .to_retries = 0, }; struct rpc_create_args args = { - .protocol = XPRT_TRANSPORT_TCP, - .address = (struct sockaddr *) &cb->cb_addr, - .addrsize = cb->cb_addrlen, + .net = &init_net, + .address = (struct sockaddr *) &conn->cb_addr, + .addrsize = conn->cb_addrlen, .timeout = &timeparms, .program = &cb_program, - .prognumber = cb->cb_prog, .version = 0, .authflavor = clp->cl_flavor, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), - .client_name = clp->cl_principal, }; struct rpc_clnt *client; - if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) - return -EINVAL; - if (cb->cb_minorversion) { - args.bc_xprt = cb->cb_xprt; + if (clp->cl_minorversion == 0) { + if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) + return -EINVAL; + args.client_name = clp->cl_principal; + args.prognumber = conn->cb_prog, + args.protocol = XPRT_TRANSPORT_TCP; + clp->cl_cb_ident = conn->cb_ident; + } else { + args.bc_xprt = conn->cb_xprt; + args.prognumber = clp->cl_cb_session->se_cb_prog; args.protocol = XPRT_TRANSPORT_BC_TCP; } /* Create RPC client */ @@ -506,7 +510,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) PTR_ERR(client)); return PTR_ERR(client); } - nfsd4_set_callback_client(clp, client); + clp->cl_cb_client = client; return 0; } @@ -519,7 +523,7 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason) static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) { - struct nfs4_client *clp = calldata; + struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); if (task->tk_status) warn_no_callback_path(clp, task->tk_status); @@ -528,6 +532,8 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) } static const struct rpc_call_ops nfsd4_cb_probe_ops = { + /* XXX: release method to ensure we set the cb channel down if + * necessary on early failure? */ .rpc_call_done = nfsd4_cb_probe_done, }; @@ -543,38 +549,42 @@ int set_callback_cred(void) return 0; } +static struct workqueue_struct *callback_wq; -void do_probe_callback(struct nfs4_client *clp) +static void do_probe_callback(struct nfs4_client *clp) { - struct rpc_message msg = { - .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], - .rpc_argp = clp, - .rpc_cred = callback_cred - }; - int status; + struct nfsd4_callback *cb = &clp->cl_cb_null; - status = rpc_call_async(clp->cl_cb_client, &msg, - RPC_TASK_SOFT | RPC_TASK_SOFTCONN, - &nfsd4_cb_probe_ops, (void *)clp); - if (status) - warn_no_callback_path(clp, status); + cb->cb_op = NULL; + cb->cb_clp = clp; + + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; + cb->cb_msg.rpc_argp = NULL; + cb->cb_msg.rpc_resp = NULL; + cb->cb_msg.rpc_cred = callback_cred; + + cb->cb_ops = &nfsd4_cb_probe_ops; + + queue_work(callback_wq, &cb->cb_work); } /* - * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... + * Poke the callback thread to process any updates to the callback + * parameters, and send a null probe. */ -void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) +void nfsd4_probe_callback(struct nfs4_client *clp) { - int status; + set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + do_probe_callback(clp); +} +void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) +{ BUG_ON(atomic_read(&clp->cl_cb_set)); - status = setup_callback_client(clp, cb); - if (status) { - warn_no_callback_path(clp, status); - return; - } - do_probe_callback(clp); + spin_lock(&clp->cl_lock); + memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); + spin_unlock(&clp->cl_lock); } /* @@ -585,8 +595,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, struct rpc_task *task) { - struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; - u32 *ptr = (u32 *)clp->cl_sessionid.data; + u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data; int status = 0; dprintk("%s: %u:%u:%u:%u\n", __func__, @@ -598,14 +607,6 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, status = -EAGAIN; goto out; } - - /* - * We'll need the clp during XDR encoding and decoding, - * and the sequence during decoding to verify the reply - */ - args->args_seq.cbs_clp = clp; - task->tk_msg.rpc_resp = &args->args_seq; - out: dprintk("%s status=%d\n", __func__, status); return status; @@ -617,13 +618,13 @@ out: */ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { - struct nfs4_delegation *dp = calldata; + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); struct nfs4_client *clp = dp->dl_client; - struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; - u32 minorversion = clp->cl_cb_conn.cb_minorversion; + u32 minorversion = clp->cl_minorversion; int status = 0; - args->args_seq.cbs_minorversion = minorversion; + cb->cb_minorversion = minorversion; if (minorversion) { status = nfsd41_cb_setup_sequence(clp, task); if (status) { @@ -640,19 +641,20 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) static void nfsd4_cb_done(struct rpc_task *task, void *calldata) { - struct nfs4_delegation *dp = calldata; + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); struct nfs4_client *clp = dp->dl_client; dprintk("%s: minorversion=%d\n", __func__, - clp->cl_cb_conn.cb_minorversion); + clp->cl_minorversion); - if (clp->cl_cb_conn.cb_minorversion) { + if (clp->cl_minorversion) { /* No need for lock, access serialized in nfsd4_cb_prepare */ - ++clp->cl_cb_seq_nr; + ++clp->cl_cb_session->se_cb_seq_nr; clear_bit(0, &clp->cl_cb_slot_busy); rpc_wake_up_next(&clp->cl_cb_waitq); dprintk("%s: freed slot, new seqid=%d\n", __func__, - clp->cl_cb_seq_nr); + clp->cl_cb_session->se_cb_seq_nr); /* We're done looking into the sequence information */ task->tk_msg.rpc_resp = NULL; @@ -662,7 +664,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { - struct nfs4_delegation *dp = calldata; + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); struct nfs4_client *clp = dp->dl_client; struct rpc_clnt *current_rpc_client = clp->cl_cb_client; @@ -707,7 +710,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) static void nfsd4_cb_recall_release(void *calldata) { - struct nfs4_delegation *dp = calldata; + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); nfs4_put_delegation(dp); } @@ -718,8 +722,6 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = { .rpc_release = nfsd4_cb_recall_release, }; -static struct workqueue_struct *callback_wq; - int nfsd4_create_callback_queue(void) { callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); @@ -734,57 +736,88 @@ void nfsd4_destroy_callback_queue(void) } /* must be called under the state lock */ -void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) +void nfsd4_shutdown_callback(struct nfs4_client *clp) { - struct rpc_clnt *old = clp->cl_cb_client; - - clp->cl_cb_client = new; + set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags); /* - * After this, any work that saw the old value of cl_cb_client will - * be gone: + * Note this won't actually result in a null callback; + * instead, nfsd4_do_callback_rpc() will detect the killed + * client, destroy the rpc client, and stop: */ + do_probe_callback(clp); flush_workqueue(callback_wq); - /* So we can safely shut it down: */ - if (old) - rpc_shutdown_client(old); } -/* - * called with dp->dl_count inc'ed. - */ -static void _nfsd4_cb_recall(struct nfs4_delegation *dp) +void nfsd4_release_cb(struct nfsd4_callback *cb) { - struct nfs4_client *clp = dp->dl_client; - struct rpc_clnt *clnt = clp->cl_cb_client; - struct nfs4_rpc_args *args = &dp->dl_recall.cb_args; - struct rpc_message msg = { - .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], - .rpc_cred = callback_cred - }; + if (cb->cb_ops->rpc_release) + cb->cb_ops->rpc_release(cb); +} - if (clnt == NULL) { - nfs4_put_delegation(dp); - return; /* Client is shutting down; give up. */ +void nfsd4_process_cb_update(struct nfsd4_callback *cb) +{ + struct nfs4_cb_conn conn; + struct nfs4_client *clp = cb->cb_clp; + int err; + + /* + * This is either an update, or the client dying; in either case, + * kill the old client: + */ + if (clp->cl_cb_client) { + rpc_shutdown_client(clp->cl_cb_client); + clp->cl_cb_client = NULL; } + if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) + return; + spin_lock(&clp->cl_lock); + /* + * Only serialized callback code is allowed to clear these + * flags; main nfsd code can only set them: + */ + BUG_ON(!clp->cl_cb_flags); + clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); + spin_unlock(&clp->cl_lock); - args->args_op = dp; - msg.rpc_argp = args; - dp->dl_retries = 1; - rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp); + err = setup_callback_client(clp, &conn); + if (err) + warn_no_callback_path(clp, err); } void nfsd4_do_callback_rpc(struct work_struct *w) { - /* XXX: for now, just send off delegation recall. */ - /* In future, generalize to handle any sort of callback. */ - struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work); - struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall); + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); + struct nfs4_client *clp = cb->cb_clp; + struct rpc_clnt *clnt; - _nfsd4_cb_recall(dp); -} + if (clp->cl_cb_flags) + nfsd4_process_cb_update(cb); + clnt = clp->cl_cb_client; + if (!clnt) { + /* Callback channel broken, or client killed; give up: */ + nfsd4_release_cb(cb); + return; + } + rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + cb->cb_ops, cb); +} void nfsd4_cb_recall(struct nfs4_delegation *dp) { + struct nfsd4_callback *cb = &dp->dl_recall; + + dp->dl_retries = 1; + cb->cb_op = dp; + cb->cb_clp = dp->dl_client; + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; + cb->cb_msg.rpc_argp = cb; + cb->cb_msg.rpc_resp = cb; + cb->cb_msg.rpc_cred = callback_cred; + + cb->cb_ops = &nfsd4_cb_recall_ops; + dp->dl_retries = 1; + queue_work(callback_wq, &dp->dl_recall.cb_work); } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index c78dbf4..f0695e8 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -482,109 +482,26 @@ nfsd_idmap_shutdown(void) cache_unregister(&nametoid_cache); } -/* - * Deferred request handling - */ - -struct idmap_defer_req { - struct cache_req req; - struct cache_deferred_req deferred_req; - wait_queue_head_t waitq; - atomic_t count; -}; - -static inline void -put_mdr(struct idmap_defer_req *mdr) -{ - if (atomic_dec_and_test(&mdr->count)) - kfree(mdr); -} - -static inline void -get_mdr(struct idmap_defer_req *mdr) -{ - atomic_inc(&mdr->count); -} - -static void -idmap_revisit(struct cache_deferred_req *dreq, int toomany) -{ - struct idmap_defer_req *mdr = - container_of(dreq, struct idmap_defer_req, deferred_req); - - wake_up(&mdr->waitq); - put_mdr(mdr); -} - -static struct cache_deferred_req * -idmap_defer(struct cache_req *req) -{ - struct idmap_defer_req *mdr = - container_of(req, struct idmap_defer_req, req); - - mdr->deferred_req.revisit = idmap_revisit; - get_mdr(mdr); - return (&mdr->deferred_req); -} - -static inline int -do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key, - struct cache_detail *detail, struct ent **item, - struct idmap_defer_req *mdr) -{ - *item = lookup_fn(key); - if (!*item) - return -ENOMEM; - return cache_check(detail, &(*item)->h, &mdr->req); -} - -static inline int -do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), - struct ent *key, struct cache_detail *detail, - struct ent **item) -{ - int ret = -ENOMEM; - - *item = lookup_fn(key); - if (!*item) - goto out_err; - ret = -ETIMEDOUT; - if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() - || detail->flush_time > (*item)->h.last_refresh) - goto out_put; - ret = -ENOENT; - if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags)) - goto out_put; - return 0; -out_put: - cache_put(&(*item)->h, detail); -out_err: - *item = NULL; - return ret; -} - static int idmap_lookup(struct svc_rqst *rqstp, struct ent *(*lookup_fn)(struct ent *), struct ent *key, struct cache_detail *detail, struct ent **item) { - struct idmap_defer_req *mdr; int ret; - mdr = kzalloc(sizeof(*mdr), GFP_KERNEL); - if (!mdr) + *item = lookup_fn(key); + if (!*item) return -ENOMEM; - atomic_set(&mdr->count, 1); - init_waitqueue_head(&mdr->waitq); - mdr->req.defer = idmap_defer; - ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr); - if (ret == -EAGAIN) { - wait_event_interruptible_timeout(mdr->waitq, - test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ); - ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item); + retry: + ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle); + + if (ret == -ETIMEDOUT) { + struct ent *prev_item = *item; + *item = lookup_fn(key); + if (*item != prev_item) + goto retry; + cache_put(&(*item)->h, detail); } - put_mdr(mdr); return ret; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 59ec449..0cdfd02 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1031,8 +1031,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, resp->cstate.session = NULL; fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); - /* Use the deferral mechanism only for NFSv4.0 compounds */ - rqstp->rq_usedeferral = (args->minorversion == 0); + /* + * Don't use the deferral mechanism for NFSv4; compounds make it + * too hard to avoid non-idempotency problems. + */ + rqstp->rq_usedeferral = 0; /* * According to RFC3010, this takes precedence over all other errors. diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a7292fc..9019e8e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -207,7 +207,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; - struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; dprintk("NFSD alloc_init_deleg\n"); /* @@ -234,7 +233,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f nfs4_file_get_access(fp, O_RDONLY); dp->dl_flock = NULL; dp->dl_type = type; - dp->dl_ident = cb->cb_ident; dp->dl_stateid.si_boot = boot_time; dp->dl_stateid.si_stateownerid = current_delegid++; dp->dl_stateid.si_fileid = 0; @@ -535,171 +533,258 @@ gen_sessionid(struct nfsd4_session *ses) */ #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) +static void +free_session_slots(struct nfsd4_session *ses) +{ + int i; + + for (i = 0; i < ses->se_fchannel.maxreqs; i++) + kfree(ses->se_slots[i]); +} + /* - * Give the client the number of ca_maxresponsesize_cached slots it - * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, - * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more - * than NFSD_MAX_SLOTS_PER_SESSION. - * - * If we run out of reserved DRC memory we should (up to a point) + * We don't actually need to cache the rpc and session headers, so we + * can allocate a little less for each slot: + */ +static inline int slot_bytes(struct nfsd4_channel_attrs *ca) +{ + return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; +} + +static int nfsd4_sanitize_slot_size(u32 size) +{ + size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */ + size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE); + + return size; +} + +/* + * XXX: If we run out of reserved DRC memory we could (up to a point) * re-negotiate active sessions and reduce their slot usage to make * rooom for new connections. For now we just fail the create session. */ -static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) +static int nfsd4_get_drc_mem(int slotsize, u32 num) { - int mem, size = fchan->maxresp_cached; + int avail; - if (fchan->maxreqs < 1) - return nfserr_inval; + num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); - if (size < NFSD_MIN_HDR_SEQ_SZ) - size = NFSD_MIN_HDR_SEQ_SZ; - size -= NFSD_MIN_HDR_SEQ_SZ; - if (size > NFSD_SLOT_CACHE_SIZE) - size = NFSD_SLOT_CACHE_SIZE; - - /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */ - mem = fchan->maxreqs * size; - if (mem > NFSD_MAX_MEM_PER_SESSION) { - fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size; - if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) - fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - mem = fchan->maxreqs * size; - } + spin_lock(&nfsd_drc_lock); + avail = min_t(int, NFSD_MAX_MEM_PER_SESSION, + nfsd_drc_max_mem - nfsd_drc_mem_used); + num = min_t(int, num, avail / slotsize); + nfsd_drc_mem_used += num * slotsize; + spin_unlock(&nfsd_drc_lock); + return num; +} + +static void nfsd4_put_drc_mem(int slotsize, int num) +{ spin_lock(&nfsd_drc_lock); - /* bound the total session drc memory ussage */ - if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) { - fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size; - mem = fchan->maxreqs * size; - } - nfsd_drc_mem_used += mem; + nfsd_drc_mem_used -= slotsize * num; spin_unlock(&nfsd_drc_lock); +} - if (fchan->maxreqs == 0) - return nfserr_jukebox; +static struct nfsd4_session *alloc_session(int slotsize, int numslots) +{ + struct nfsd4_session *new; + int mem, i; - fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; - return 0; + BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) + + sizeof(struct nfsd4_session) > PAGE_SIZE); + mem = numslots * sizeof(struct nfsd4_slot *); + + new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); + if (!new) + return NULL; + /* allocate each struct nfsd4_slot and data cache in one piece */ + for (i = 0; i < numslots; i++) { + mem = sizeof(struct nfsd4_slot) + slotsize; + new->se_slots[i] = kzalloc(mem, GFP_KERNEL); + if (!new->se_slots[i]) + goto out_free; + } + return new; +out_free: + while (i--) + kfree(new->se_slots[i]); + kfree(new); + return NULL; } -/* - * fchan holds the client values on input, and the server values on output - * sv_max_mesg is the maximum payload plus one page for overhead. - */ -static int init_forechannel_attrs(struct svc_rqst *rqstp, - struct nfsd4_channel_attrs *session_fchan, - struct nfsd4_channel_attrs *fchan) +static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize) { - int status = 0; - __u32 maxcount = nfsd_serv->sv_max_mesg; + u32 maxrpc = nfsd_serv->sv_max_mesg; - /* headerpadsz set to zero in encode routine */ + new->maxreqs = numslots; + new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ; + new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); + new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); + new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); +} - /* Use the client's max request and max response size if possible */ - if (fchan->maxreq_sz > maxcount) - fchan->maxreq_sz = maxcount; - session_fchan->maxreq_sz = fchan->maxreq_sz; +static void free_conn(struct nfsd4_conn *c) +{ + svc_xprt_put(c->cn_xprt); + kfree(c); +} - if (fchan->maxresp_sz > maxcount) - fchan->maxresp_sz = maxcount; - session_fchan->maxresp_sz = fchan->maxresp_sz; +static void nfsd4_conn_lost(struct svc_xpt_user *u) +{ + struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user); + struct nfs4_client *clp = c->cn_session->se_client; - /* Use the client's maxops if possible */ - if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) - fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; - session_fchan->maxops = fchan->maxops; + spin_lock(&clp->cl_lock); + if (!list_empty(&c->cn_persession)) { + list_del(&c->cn_persession); + free_conn(c); + } + spin_unlock(&clp->cl_lock); +} - /* FIXME: Error means no more DRC pages so the server should - * recover pages from existing sessions. For now fail session - * creation. - */ - status = set_forechannel_drc_size(fchan); +static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) +{ + struct nfsd4_conn *conn; - session_fchan->maxresp_cached = fchan->maxresp_cached; - session_fchan->maxreqs = fchan->maxreqs; + conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL); + if (!conn) + return NULL; + svc_xprt_get(rqstp->rq_xprt); + conn->cn_xprt = rqstp->rq_xprt; + conn->cn_flags = flags; + INIT_LIST_HEAD(&conn->cn_xpt_user.list); + return conn; +} - dprintk("%s status %d\n", __func__, status); - return status; +static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) +{ + conn->cn_session = ses; + list_add(&conn->cn_persession, &ses->se_conns); } -static void -free_session_slots(struct nfsd4_session *ses) +static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) { - int i; + struct nfs4_client *clp = ses->se_client; - for (i = 0; i < ses->se_fchannel.maxreqs; i++) - kfree(ses->se_slots[i]); + spin_lock(&clp->cl_lock); + __nfsd4_hash_conn(conn, ses); + spin_unlock(&clp->cl_lock); } -/* - * We don't actually need to cache the rpc and session headers, so we - * can allocate a little less for each slot: - */ -static inline int slot_bytes(struct nfsd4_channel_attrs *ca) +static void nfsd4_register_conn(struct nfsd4_conn *conn) { - return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; + conn->cn_xpt_user.callback = nfsd4_conn_lost; + register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); } -static int -alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, - struct nfsd4_create_session *cses) +static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) { - struct nfsd4_session *new, tmp; - struct nfsd4_slot *sp; - int idx, slotsize, cachesize, i; - int status; + struct nfsd4_conn *conn; + u32 flags = NFS4_CDFC4_FORE; - memset(&tmp, 0, sizeof(tmp)); + if (ses->se_flags & SESSION4_BACK_CHAN) + flags |= NFS4_CDFC4_BACK; + conn = alloc_conn(rqstp, flags); + if (!conn) + return nfserr_jukebox; + nfsd4_hash_conn(conn, ses); + nfsd4_register_conn(conn); + return nfs_ok; +} - /* FIXME: For now, we just accept the client back channel attributes. */ - tmp.se_bchannel = cses->back_channel; - status = init_forechannel_attrs(rqstp, &tmp.se_fchannel, - &cses->fore_channel); - if (status) - goto out; +static void nfsd4_del_conns(struct nfsd4_session *s) +{ + struct nfs4_client *clp = s->se_client; + struct nfsd4_conn *c; - BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) - + sizeof(struct nfsd4_session) > PAGE_SIZE); + spin_lock(&clp->cl_lock); + while (!list_empty(&s->se_conns)) { + c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession); + list_del_init(&c->cn_persession); + spin_unlock(&clp->cl_lock); - status = nfserr_jukebox; - /* allocate struct nfsd4_session and slot table pointers in one piece */ - slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); - new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); - if (!new) - goto out; + unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user); + free_conn(c); - memcpy(new, &tmp, sizeof(*new)); + spin_lock(&clp->cl_lock); + } + spin_unlock(&clp->cl_lock); +} - /* allocate each struct nfsd4_slot and data cache in one piece */ - cachesize = slot_bytes(&new->se_fchannel); - for (i = 0; i < new->se_fchannel.maxreqs; i++) { - sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); - if (!sp) - goto out_free; - new->se_slots[i] = sp; +void free_session(struct kref *kref) +{ + struct nfsd4_session *ses; + int mem; + + ses = container_of(kref, struct nfsd4_session, se_ref); + nfsd4_del_conns(ses); + spin_lock(&nfsd_drc_lock); + mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); + nfsd_drc_mem_used -= mem; + spin_unlock(&nfsd_drc_lock); + free_session_slots(ses); + kfree(ses); +} + +static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) +{ + struct nfsd4_session *new; + struct nfsd4_channel_attrs *fchan = &cses->fore_channel; + int numslots, slotsize; + int status; + int idx; + + /* + * Note decreasing slot size below client's request may + * make it difficult for client to function correctly, whereas + * decreasing the number of slots will (just?) affect + * performance. When short on memory we therefore prefer to + * decrease number of slots instead of their size. + */ + slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); + numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); + + new = alloc_session(slotsize, numslots); + if (!new) { + nfsd4_put_drc_mem(slotsize, fchan->maxreqs); + return NULL; } + init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); new->se_client = clp; gen_sessionid(new); - idx = hash_sessionid(&new->se_sessionid); - memcpy(clp->cl_sessionid.data, new->se_sessionid.data, - NFS4_MAX_SESSIONID_LEN); + INIT_LIST_HEAD(&new->se_conns); + + new->se_cb_seq_nr = 1; new->se_flags = cses->flags; + new->se_cb_prog = cses->callback_prog; kref_init(&new->se_ref); + idx = hash_sessionid(&new->se_sessionid); spin_lock(&client_lock); list_add(&new->se_hash, &sessionid_hashtbl[idx]); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&client_lock); - status = nfs_ok; -out: - return status; -out_free: - free_session_slots(new); - kfree(new); - goto out; + status = nfsd4_new_conn(rqstp, new); + /* whoops: benny points out, status is ignored! (err, or bogus) */ + if (status) { + free_session(&new->se_ref); + return NULL; + } + if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) { + struct sockaddr *sa = svc_addr(rqstp); + + clp->cl_cb_session = new; + clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt; + svc_xprt_get(rqstp->rq_xprt); + rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); + clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); + nfsd4_probe_callback(clp); + } + return new; } /* caller must hold client_lock */ @@ -731,21 +816,6 @@ unhash_session(struct nfsd4_session *ses) list_del(&ses->se_perclnt); } -void -free_session(struct kref *kref) -{ - struct nfsd4_session *ses; - int mem; - - ses = container_of(kref, struct nfsd4_session, se_ref); - spin_lock(&nfsd_drc_lock); - mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); - nfsd_drc_mem_used -= mem; - spin_unlock(&nfsd_drc_lock); - free_session_slots(ses); - kfree(ses); -} - /* must be called under the client_lock */ static inline void renew_client_locked(struct nfs4_client *clp) @@ -812,6 +882,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { + while (!list_empty(&clp->cl_sessions)) { + struct nfsd4_session *ses; + ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, + se_perclnt); + list_del(&ses->se_perclnt); + nfsd4_put_session(ses); + } if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); @@ -838,15 +915,12 @@ release_session_client(struct nfsd4_session *session) static inline void unhash_client_locked(struct nfs4_client *clp) { + struct nfsd4_session *ses; + mark_client_expired(clp); list_del(&clp->cl_lru); - while (!list_empty(&clp->cl_sessions)) { - struct nfsd4_session *ses; - ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, - se_perclnt); - unhash_session(ses); - nfsd4_put_session(ses); - } + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + list_del_init(&ses->se_hash); } static void @@ -875,7 +949,7 @@ expire_client(struct nfs4_client *clp) sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); release_openowner(sop); } - nfsd4_set_callback_client(clp, NULL); + nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); list_del(&clp->cl_idhash); @@ -960,6 +1034,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, if (clp == NULL) return NULL; + INIT_LIST_HEAD(&clp->cl_sessions); + princ = svc_gss_principal(rqstp); if (princ) { clp->cl_principal = kstrdup(princ, GFP_KERNEL); @@ -976,8 +1052,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_sessions); INIT_LIST_HEAD(&clp->cl_lru); + spin_lock_init(&clp->cl_lock); + INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); @@ -986,7 +1063,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, clp->cl_flavor = rqstp->rq_flavor; copy_cred(&clp->cl_cred, &rqstp->rq_cred); gen_confirm(clp); - + clp->cl_cb_session = NULL; return clp; } @@ -1098,7 +1175,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) { - struct nfs4_cb_conn *cb = &clp->cl_cb_conn; + struct nfs4_cb_conn *conn = &clp->cl_cb_conn; unsigned short expected_family; /* Currently, we only support tcp and tcp6 for the callback channel */ @@ -1111,24 +1188,23 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) else goto out_err; - cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, se->se_callback_addr_len, - (struct sockaddr *) &cb->cb_addr, - sizeof(cb->cb_addr)); + (struct sockaddr *)&conn->cb_addr, + sizeof(conn->cb_addr)); - if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) + if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family) goto out_err; - if (cb->cb_addr.ss_family == AF_INET6) - ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; + if (conn->cb_addr.ss_family == AF_INET6) + ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid; - cb->cb_minorversion = 0; - cb->cb_prog = se->se_callback_prog; - cb->cb_ident = se->se_callback_ident; + conn->cb_prog = se->se_callback_prog; + conn->cb_ident = se->se_callback_ident; return; out_err: - cb->cb_addr.ss_family = AF_UNSPEC; - cb->cb_addrlen = 0; + conn->cb_addr.ss_family = AF_UNSPEC; + conn->cb_addrlen = 0; dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " "will not receive delegations\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); @@ -1415,7 +1491,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, { struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; + struct nfsd4_session *new; struct nfsd4_clid_slot *cs_slot = NULL; + bool confirm_me = false; int status = 0; nfs4_lock_state(); @@ -1438,7 +1516,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, cs_slot->sl_seqid, cr_ses->seqid); goto out; } - cs_slot->sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { @@ -1451,25 +1528,10 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (status) { /* an unconfirmed replay returns misordered */ status = nfserr_seq_misordered; - goto out_cache; + goto out; } - cs_slot->sl_seqid++; /* from 0 to 1 */ - move_to_confirmed(unconf); - - if (cr_ses->flags & SESSION4_BACK_CHAN) { - unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt; - svc_xprt_get(rqstp->rq_xprt); - rpc_copy_addr( - (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, - sa); - unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa); - unconf->cl_cb_conn.cb_minorversion = - cstate->minorversion; - unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; - unconf->cl_cb_seq_nr = 1; - nfsd4_probe_callback(unconf, &unconf->cl_cb_conn); - } + confirm_me = true; conf = unconf; } else { status = nfserr_stale_clientid; @@ -1477,22 +1539,30 @@ nfsd4_create_session(struct svc_rqst *rqstp, } /* + * XXX: we should probably set this at creation time, and check + * for consistent minorversion use throughout: + */ + conf->cl_minorversion = 1; + /* * We do not support RDMA or persistent sessions */ cr_ses->flags &= ~SESSION4_PERSIST; cr_ses->flags &= ~SESSION4_RDMA; - status = alloc_init_session(rqstp, conf, cr_ses); - if (status) + status = nfserr_jukebox; + new = alloc_init_session(rqstp, conf, cr_ses); + if (!new) goto out; - - memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, + status = nfs_ok; + memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); + cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; -out_cache: /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); + if (confirm_me) + move_to_confirmed(conf); out: nfs4_unlock_state(); dprintk("%s returns %d\n", __func__, ntohl(status)); @@ -1546,8 +1616,11 @@ nfsd4_destroy_session(struct svc_rqst *r, nfs4_lock_state(); /* wait for callbacks */ - nfsd4_set_callback_client(ses->se_client, NULL); + nfsd4_shutdown_callback(ses->se_client); nfs4_unlock_state(); + + nfsd4_del_conns(ses); + nfsd4_put_session(ses); status = nfs_ok; out: @@ -1555,6 +1628,36 @@ out: return status; } +static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) +{ + struct nfsd4_conn *c; + + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_xprt == xpt) { + return c; + } + } + return NULL; +} + +static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd4_conn *c; + + spin_lock(&clp->cl_lock); + c = __nfsd4_find_conn(new->cn_xprt, ses); + if (c) { + spin_unlock(&clp->cl_lock); + free_conn(new); + return; + } + __nfsd4_hash_conn(new, ses); + spin_unlock(&clp->cl_lock); + nfsd4_register_conn(new); + return; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1563,11 +1666,20 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_session *session; struct nfsd4_slot *slot; + struct nfsd4_conn *conn; int status; if (resp->opcnt != 1) return nfserr_sequence_pos; + /* + * Will be either used or freed by nfsd4_sequence_check_conn + * below. + */ + conn = alloc_conn(rqstp, NFS4_CDFC4_FORE); + if (!conn) + return nfserr_jukebox; + spin_lock(&client_lock); status = nfserr_badsession; session = find_in_sessionid_hashtbl(&seq->sessionid); @@ -1599,6 +1711,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status) goto out; + nfsd4_sequence_check_conn(conn, session); + conn = NULL; + /* Success! bump slot seqid */ slot->sl_inuse = true; slot->sl_seqid = seq->seqid; @@ -1613,6 +1728,7 @@ out: nfsd4_get_session(cstate->session); atomic_inc(&session->se_client->cl_refcount); } + kfree(conn); spin_unlock(&client_lock); dprintk("%s: return %d\n", __func__, ntohl(status)); return status; @@ -1747,6 +1863,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; gen_clid(new); } + /* + * XXX: we should probably set this at creation time, and check + * for consistent minorversion use throughout: + */ + new->cl_minorversion = 0; gen_callback(new, setclid, rpc_get_scope_id(sa)); add_to_unconfirmed(new, strhashval); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; @@ -1807,7 +1928,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, status = nfserr_clid_inuse; else { atomic_set(&conf->cl_cb_set, 0); - nfsd4_probe_callback(conf, &unconf->cl_cb_conn); + nfsd4_change_callback(conf, &unconf->cl_cb_conn); + nfsd4_probe_callback(conf); expire_client(unconf); status = nfs_ok; @@ -1841,7 +1963,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } move_to_confirmed(unconf); conf = unconf; - nfsd4_probe_callback(conf, &conf->cl_cb_conn); + nfsd4_probe_callback(conf); status = nfs_ok; } } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) @@ -2944,7 +3066,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, if (STALE_STATEID(stateid)) goto out; - status = nfserr_bad_stateid; + /* + * We assume that any stateid that has the current boot time, + * but that we can't find, is expired: + */ + status = nfserr_expired; if (is_delegation_stateid(stateid)) { dp = find_delegation_stateid(ino, stateid); if (!dp) @@ -2964,6 +3090,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, stp = find_stateid(stateid, flags); if (!stp) goto out; + status = nfserr_bad_stateid; if (nfs4_check_fh(current_fh, stp)) goto out; if (!stp->st_stateowner->so_confirmed) @@ -3038,8 +3165,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, * a replayed close: */ sop = search_close_lru(stateid->si_stateownerid, flags); + /* It's not stale; let's assume it's expired: */ if (sop == NULL) - return nfserr_bad_stateid; + return nfserr_expired; *sopp = sop; goto check_replay; } @@ -3304,6 +3432,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_bad_stateid; if (!is_delegation_stateid(stateid)) goto out; + status = nfserr_expired; dp = find_delegation_stateid(inode, stateid); if (!dp) goto out; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1a468bb..f35a94a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1805,19 +1805,23 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out_nfserr; } } - if ((buflen -= 16) < 0) - goto out_resource; - if (unlikely(bmval2)) { + if (bmval2) { + if ((buflen -= 16) < 0) + goto out_resource; WRITE32(3); WRITE32(bmval0); WRITE32(bmval1); WRITE32(bmval2); - } else if (likely(bmval1)) { + } else if (bmval1) { + if ((buflen -= 12) < 0) + goto out_resource; WRITE32(2); WRITE32(bmval0); WRITE32(bmval1); } else { + if ((buflen -= 8) < 0) + goto out_resource; WRITE32(1); WRITE32(bmval0); } @@ -1828,15 +1832,17 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, u32 word1 = nfsd_suppattrs1(minorversion); u32 word2 = nfsd_suppattrs2(minorversion); - if ((buflen -= 12) < 0) - goto out_resource; if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; if (!word2) { + if ((buflen -= 12) < 0) + goto out_resource; WRITE32(2); WRITE32(word0); WRITE32(word1); } else { + if ((buflen -= 16) < 0) + goto out_resource; WRITE32(3); WRITE32(word0); WRITE32(word1); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 06fa87e..d6dc3f6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -22,6 +22,7 @@ */ enum { NFSD_Root = 1, +#ifdef CONFIG_NFSD_DEPRECATED NFSD_Svc, NFSD_Add, NFSD_Del, @@ -29,6 +30,7 @@ enum { NFSD_Unexport, NFSD_Getfd, NFSD_Getfs, +#endif NFSD_List, NFSD_Export_features, NFSD_Fh, @@ -54,6 +56,7 @@ enum { /* * write() for these nodes. */ +#ifdef CONFIG_NFSD_DEPRECATED static ssize_t write_svc(struct file *file, char *buf, size_t size); static ssize_t write_add(struct file *file, char *buf, size_t size); static ssize_t write_del(struct file *file, char *buf, size_t size); @@ -61,6 +64,7 @@ static ssize_t write_export(struct file *file, char *buf, size_t size); static ssize_t write_unexport(struct file *file, char *buf, size_t size); static ssize_t write_getfd(struct file *file, char *buf, size_t size); static ssize_t write_getfs(struct file *file, char *buf, size_t size); +#endif static ssize_t write_filehandle(struct file *file, char *buf, size_t size); static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); @@ -76,6 +80,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif static ssize_t (*write_op[])(struct file *, char *, size_t) = { +#ifdef CONFIG_NFSD_DEPRECATED [NFSD_Svc] = write_svc, [NFSD_Add] = write_add, [NFSD_Del] = write_del, @@ -83,6 +88,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Unexport] = write_unexport, [NFSD_Getfd] = write_getfd, [NFSD_Getfs] = write_getfs, +#endif [NFSD_Fh] = write_filehandle, [NFSD_FO_UnlockIP] = write_unlock_ip, [NFSD_FO_UnlockFS] = write_unlock_fs, @@ -121,6 +127,14 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { + static int warned; + if (file->f_dentry->d_name.name[0] == '.' && !warned) { + printk(KERN_INFO + "Warning: \"%s\" uses deprecated NFSD interface: %s." + " This will be removed in 2.6.40\n", + current->comm, file->f_dentry->d_name.name); + warned = 1; + } if (! file->private_data) { /* An attempt to read a transaction file without writing * causes a 0-byte write so that the file can return @@ -187,6 +201,7 @@ static const struct file_operations pool_stats_operations = { * payload - write methods */ +#ifdef CONFIG_NFSD_DEPRECATED /** * write_svc - Start kernel's NFSD server * @@ -402,7 +417,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size) ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); - clp = auth_unix_lookup(&in6); + clp = auth_unix_lookup(&init_net, &in6); if (!clp) err = -EPERM; else { @@ -465,7 +480,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); - clp = auth_unix_lookup(&in6); + clp = auth_unix_lookup(&init_net, &in6); if (!clp) err = -EPERM; else { @@ -482,6 +497,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) out: return err; } +#endif /* CONFIG_NFSD_DEPRECATED */ /** * write_unlock_ip - Release all locks used by a client @@ -1000,12 +1016,12 @@ static ssize_t __write_ports_addxprt(char *buf) if (err != 0) return err; - err = svc_create_xprt(nfsd_serv, transport, + err = svc_create_xprt(nfsd_serv, transport, &init_net, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err < 0) goto out_err; - err = svc_create_xprt(nfsd_serv, transport, + err = svc_create_xprt(nfsd_serv, transport, &init_net, PF_INET6, port, SVC_SOCK_ANONYMOUS); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; @@ -1356,6 +1372,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { +#ifdef CONFIG_NFSD_DEPRECATED [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, @@ -1363,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, +#endif [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index b76ac3a..6b641cf 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -249,7 +249,7 @@ extern time_t nfsd4_grace; #define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ #define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ -#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ +#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ /* * The following attributes are currently not supported by the NFSv4 server: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e2c4346..2bae1d8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -16,6 +16,7 @@ #include <linux/lockd/bind.h> #include <linux/nfsacl.h> #include <linux/seq_file.h> +#include <net/net_namespace.h> #include "nfsd.h" #include "cache.h" #include "vfs.h" @@ -186,12 +187,12 @@ static int nfsd_init_socks(int port) if (!list_empty(&nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, + error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port, SVC_SOCK_DEFAULTS); if (error < 0) return error; - error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, + error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port, SVC_SOCK_DEFAULTS); if (error < 0) return error; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 322518c..39adc27 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -35,6 +35,7 @@ #ifndef _NFSD4_STATE_H #define _NFSD4_STATE_H +#include <linux/sunrpc/svc_xprt.h> #include <linux/nfsd/nfsfh.h> #include "nfsfh.h" @@ -64,19 +65,12 @@ typedef struct { (s)->si_fileid, \ (s)->si_generation -struct nfsd4_cb_sequence { - /* args/res */ - u32 cbs_minorversion; - struct nfs4_client *cbs_clp; -}; - -struct nfs4_rpc_args { - void *args_op; - struct nfsd4_cb_sequence args_seq; -}; - struct nfsd4_callback { - struct nfs4_rpc_args cb_args; + void *cb_op; + struct nfs4_client *cb_clp; + u32 cb_minorversion; + struct rpc_message cb_msg; + const struct rpc_call_ops *cb_ops; struct work_struct cb_work; }; @@ -91,7 +85,6 @@ struct nfs4_delegation { u32 dl_type; time_t dl_time; /* For recall: */ - u32 dl_ident; stateid_t dl_stateid; struct knfsd_fh dl_fh; int dl_retries; @@ -103,8 +96,8 @@ struct nfs4_cb_conn { /* SETCLIENTID info */ struct sockaddr_storage cb_addr; size_t cb_addrlen; - u32 cb_prog; - u32 cb_minorversion; + u32 cb_prog; /* used only in 4.0 case; + per-session otherwise */ u32 cb_ident; /* minorversion 0 only */ struct svc_xprt *cb_xprt; /* minorversion 1 only */ }; @@ -160,6 +153,15 @@ struct nfsd4_clid_slot { struct nfsd4_create_session sl_cr_ses; }; +struct nfsd4_conn { + struct list_head cn_persession; + struct svc_xprt *cn_xprt; + struct svc_xpt_user cn_xpt_user; + struct nfsd4_session *cn_session; +/* CDFC4_FORE, CDFC4_BACK: */ + unsigned char cn_flags; +}; + struct nfsd4_session { struct kref se_ref; struct list_head se_hash; /* hash by sessionid */ @@ -169,6 +171,9 @@ struct nfsd4_session { struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; struct nfsd4_channel_attrs se_bchannel; + struct list_head se_conns; + u32 se_cb_prog; + u32 se_cb_seq_nr; struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; @@ -221,24 +226,32 @@ struct nfs4_client { clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ u32 cl_firststate; /* recovery dir creation */ + u32 cl_minorversion; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; +#define NFSD4_CLIENT_CB_UPDATE 1 +#define NFSD4_CLIENT_KILL 2 + unsigned long cl_cb_flags; struct rpc_clnt *cl_cb_client; + u32 cl_cb_ident; atomic_t cl_cb_set; + struct nfsd4_callback cl_cb_null; + struct nfsd4_session *cl_cb_session; + + /* for all client information that callback code might need: */ + spinlock_t cl_lock; /* for nfs41 */ struct list_head cl_sessions; struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; - struct nfs4_sessionid cl_sessionid; /* number of rpc's in progress over an associated session: */ atomic_t cl_refcount; /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ unsigned long cl_cb_slot_busy; - u32 cl_cb_seq_nr; struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ }; @@ -440,12 +453,13 @@ extern int nfs4_in_grace(void); extern __be32 nfs4_check_open_reclaim(clientid_t *clid); extern void nfs4_free_stateowner(struct kref *kref); extern int set_callback_cred(void); -extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); +extern void nfsd4_probe_callback(struct nfs4_client *clp); +extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_do_callback_rpc(struct work_struct *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); -extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *); +extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); extern void nfsd4_init_recdir(char *recdir_name); diff --git a/include/linux/net.h b/include/linux/net.h index dee0b11..16faa13 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -229,6 +229,8 @@ enum { extern int sock_wake_async(struct socket *sk, int how, int band); extern int sock_register(const struct net_proto_family *fam); extern void sock_unregister(int family); +extern int __sock_create(struct net *net, int family, int type, int proto, + struct socket **res, int kern); extern int sock_create(int family, int type, int proto, struct socket **res); extern int sock_create_kern(int family, int type, int proto, diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 07e40c6..4925b22 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -17,7 +17,9 @@ #define NFS4_BITMAP_SIZE 2 #define NFS4_VERIFIER_SIZE 8 -#define NFS4_STATEID_SIZE 16 +#define NFS4_STATEID_SEQID_SIZE 4 +#define NFS4_STATEID_OTHER_SIZE 12 +#define NFS4_STATEID_SIZE (NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE) #define NFS4_FHSIZE 128 #define NFS4_MAXPATHLEN PATH_MAX #define NFS4_MAXNAMLEN NAME_MAX @@ -61,6 +63,9 @@ #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 +#define NFS4_CDFC4_FORE 0x1 +#define NFS4_CDFC4_BACK 0x2 + #define NFS4_SET_TO_SERVER_TIME 0 #define NFS4_SET_TO_CLIENT_TIME 1 @@ -167,7 +172,16 @@ struct nfs4_acl { }; typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; -typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid; + +struct nfs41_stateid { + __be32 seqid; + char other[NFS4_STATEID_OTHER_SIZE]; +} __attribute__ ((packed)); + +typedef union { + char data[NFS4_STATEID_SIZE]; + struct nfs41_stateid stateid; +} nfs4_stateid; enum nfs_opnum4 { OP_ACCESS = 3, @@ -471,6 +485,8 @@ enum lock_type4 { #define FATTR4_WORD1_TIME_MODIFY (1UL << 21) #define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) #define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) +#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) +#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define NFSPROC4_NULL 0 #define NFSPROC4_COMPOUND 1 @@ -532,6 +548,8 @@ enum { NFSPROC4_CLNT_SEQUENCE, NFSPROC4_CLNT_GET_LEASE_TIME, NFSPROC4_CLNT_RECLAIM_COMPLETE, + NFSPROC4_CLNT_LAYOUTGET, + NFSPROC4_CLNT_GETDEVICEINFO, }; /* nfs41 types */ @@ -550,6 +568,49 @@ enum state_protect_how4 { SP4_SSV = 2 }; +enum pnfs_layouttype { + LAYOUT_NFSV4_1_FILES = 1, + LAYOUT_OSD2_OBJECTS = 2, + LAYOUT_BLOCK_VOLUME = 3, +}; + +/* used for both layout return and recall */ +enum pnfs_layoutreturn_type { + RETURN_FILE = 1, + RETURN_FSID = 2, + RETURN_ALL = 3 +}; + +enum pnfs_iomode { + IOMODE_READ = 1, + IOMODE_RW = 2, + IOMODE_ANY = 3, +}; + +enum pnfs_notify_deviceid_type4 { + NOTIFY_DEVICEID4_CHANGE = 1 << 1, + NOTIFY_DEVICEID4_DELETE = 1 << 2, +}; + +#define NFL4_UFLG_MASK 0x0000003F +#define NFL4_UFLG_DENSE 0x00000001 +#define NFL4_UFLG_COMMIT_THRU_MDS 0x00000002 +#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK 0xFFFFFFC0 + +/* Encoded in the loh_body field of type layouthint4 */ +enum filelayout_hint_care4 { + NFLH4_CARE_DENSE = NFL4_UFLG_DENSE, + NFLH4_CARE_COMMIT_THRU_MDS = NFL4_UFLG_COMMIT_THRU_MDS, + NFLH4_CARE_STRIPE_UNIT_SIZE = 0x00000040, + NFLH4_CARE_STRIPE_COUNT = 0x00000080 +}; + +#define NFS4_DEVICEID4_SIZE 16 + +struct nfs4_deviceid { + char data[NFS4_DEVICEID4_SIZE]; +}; + #endif #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a46e430..bba2668 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -188,6 +188,9 @@ struct nfs_inode { struct nfs_delegation __rcu *delegation; fmode_t delegation_state; struct rw_semaphore rwsem; + + /* pNFS layout information */ + struct pnfs_layout_hdr *layout; #endif /* CONFIG_NFS_V4*/ #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; @@ -615,6 +618,8 @@ nfs_fileid_to_ino_t(u64 fileid) #define NFSDBG_CLIENT 0x0200 #define NFSDBG_MOUNT 0x0400 #define NFSDBG_FSCACHE 0x0800 +#define NFSDBG_PNFS 0x1000 +#define NFSDBG_PNFS_LD 0x2000 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5eef862..452d964 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -82,6 +82,8 @@ struct nfs_client { /* The flags used for obtaining the clientid during EXCHANGE_ID */ u32 cl_exchange_flags; struct nfs4_session *cl_session; /* sharred session */ + struct list_head cl_layouts; + struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */ #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_FSCACHE @@ -145,6 +147,7 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ + struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ #endif void (*destroy)(struct nfs_server *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index da7a130..ba6cc8f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -114,6 +114,7 @@ struct nfs_fsinfo { __u64 maxfilesize; struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ + __u32 layouttype; /* supported pnfs layout driver */ }; struct nfs_fsstat { @@ -186,6 +187,55 @@ struct nfs4_get_lease_time_res { struct nfs4_sequence_res lr_seq_res; }; +#define PNFS_LAYOUT_MAXSIZE 4096 + +struct nfs4_layoutdriver_data { + __u32 len; + void *buf; +}; + +struct pnfs_layout_range { + u32 iomode; + u64 offset; + u64 length; +}; + +struct nfs4_layoutget_args { + __u32 type; + struct pnfs_layout_range range; + __u64 minlength; + __u32 maxcount; + struct inode *inode; + struct nfs_open_context *ctx; + struct nfs4_sequence_args seq_args; +}; + +struct nfs4_layoutget_res { + __u32 return_on_close; + struct pnfs_layout_range range; + __u32 type; + nfs4_stateid stateid; + struct nfs4_layoutdriver_data layout; + struct nfs4_sequence_res seq_res; +}; + +struct nfs4_layoutget { + struct nfs4_layoutget_args args; + struct nfs4_layoutget_res res; + struct pnfs_layout_segment **lsegpp; + int status; +}; + +struct nfs4_getdeviceinfo_args { + struct pnfs_device *pdev; + struct nfs4_sequence_args seq_args; +}; + +struct nfs4_getdeviceinfo_res { + struct pnfs_device *pdev; + struct nfs4_sequence_res seq_res; +}; + /* * Arguments to the open call. */ diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 5bbc447..b202475 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -122,8 +122,8 @@ extern const struct rpc_authops authnull_ops; int __init rpc_init_authunix(void); int __init rpc_init_generic_auth(void); int __init rpcauth_init_module(void); -void __exit rpcauth_remove_module(void); -void __exit rpc_destroy_generic_auth(void); +void rpcauth_remove_module(void); +void rpc_destroy_generic_auth(void); void rpc_destroy_authunix(void); struct rpc_cred * rpc_lookup_cred(void); diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 7bf3e84..6950c98 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -125,12 +125,15 @@ struct cache_detail { */ struct cache_req { struct cache_deferred_req *(*defer)(struct cache_req *req); + int thread_wait; /* How long (jiffies) we can block the + * current thread to wait for updates. + */ }; /* this must be embedded in a deferred_request that is being * delayed awaiting cache-fill */ struct cache_deferred_req { - struct list_head hash; /* on hash chain */ + struct hlist_node hash; /* on hash chain */ struct list_head recent; /* on fifo */ struct cache_head *item; /* cache item we wait on */ void *owner; /* we might need to discard all defered requests @@ -194,7 +197,9 @@ extern void cache_purge(struct cache_detail *detail); #define NEVER (0x7FFFFFFF) extern void __init cache_initialize(void); extern int cache_register(struct cache_detail *cd); +extern int cache_register_net(struct cache_detail *cd, struct net *net); extern void cache_unregister(struct cache_detail *cd); +extern void cache_unregister_net(struct cache_detail *cd, struct net *net); extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, mode_t, struct cache_detail *); @@ -218,14 +223,42 @@ static inline int get_int(char **bpp, int *anint) return 0; } +/* + * timestamps kept in the cache are expressed in seconds + * since boot. This is the best for measuring differences in + * real time. + */ +static inline time_t seconds_since_boot(void) +{ + struct timespec boot; + getboottime(&boot); + return get_seconds() - boot.tv_sec; +} + +static inline time_t convert_to_wallclock(time_t sinceboot) +{ + struct timespec boot; + getboottime(&boot); + return boot.tv_sec + sinceboot; +} + static inline time_t get_expiry(char **bpp) { int rv; + struct timespec boot; + if (get_int(bpp, &rv)) return 0; if (rv < 0) return 0; - return rv; + getboottime(&boot); + return rv - boot.tv_sec; } +static inline void sunrpc_invalidate(struct cache_head *h, + struct cache_detail *detail) +{ + h->expiry_time = seconds_since_boot() - 1; + detail->nextcheck = seconds_since_boot(); +} #endif /* _LINUX_SUNRPC_CACHE_H_ */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c83df09..a5a55f2 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -102,6 +102,7 @@ struct rpc_procinfo { #ifdef __KERNEL__ struct rpc_create_args { + struct net *net; int protocol; struct sockaddr *address; size_t addrsize; diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h deleted file mode 100644 index e3e6a34..0000000 --- a/include/linux/sunrpc/gss_spkm3.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * linux/include/linux/sunrpc/gss_spkm3.h - * - * Copyright (c) 2000 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - */ - -#include <linux/sunrpc/auth_gss.h> -#include <linux/sunrpc/gss_err.h> -#include <linux/sunrpc/gss_asn1.h> - -struct spkm3_ctx { - struct xdr_netobj ctx_id; /* per message context id */ - int endtime; /* endtime of the context */ - struct xdr_netobj mech_used; - unsigned int ret_flags ; - struct xdr_netobj conf_alg; - struct xdr_netobj derived_conf_key; - struct xdr_netobj intg_alg; - struct xdr_netobj derived_integ_key; -}; - -/* OIDs declarations for K-ALG, I-ALG, C-ALG, and OWF-ALG */ -extern const struct xdr_netobj hmac_md5_oid; -extern const struct xdr_netobj cast5_cbc_oid; - -/* SPKM InnerContext Token types */ - -#define SPKM_ERROR_TOK 3 -#define SPKM_MIC_TOK 4 -#define SPKM_WRAP_TOK 5 -#define SPKM_DEL_TOK 6 - -u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype); - -u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype); - -#define CKSUMTYPE_RSA_MD5 0x0007 -#define CKSUMTYPE_HMAC_MD5 0x0008 - -s32 make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, - unsigned int hdrlen, struct xdr_buf *body, - unsigned int body_offset, struct xdr_netobj *cksum); -void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits); -int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, - int explen); -void spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, - unsigned char *ctxhdr, int elen, int zbit); -void spkm3_make_mic_token(unsigned char **tokp, int toklen, - struct xdr_netobj *mic_hdr, - struct xdr_netobj *md5cksum, int md5elen, int md5zbit); -u32 spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, - unsigned char **cksum); diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index 5fa0f20..680471d 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -38,8 +38,21 @@ struct svc_stat { rpcbadclnt; }; -void rpc_proc_init(void); -void rpc_proc_exit(void); +struct net; +#ifdef CONFIG_PROC_FS +int rpc_proc_init(struct net *); +void rpc_proc_exit(struct net *); +#else +static inline int rpc_proc_init(struct net *net) +{ + return 0; +} + +static inline void rpc_proc_exit(struct net *net) +{ +} +#endif + #ifdef MODULE void rpc_modcount(struct inode *, int); #endif @@ -54,9 +67,6 @@ void svc_proc_unregister(const char *); void svc_seq_show(struct seq_file *, const struct svc_stat *); - -extern struct proc_dir_entry *proc_net_rpc; - #else static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; } @@ -69,9 +79,6 @@ static inline void svc_proc_unregister(const char *p) {} static inline void svc_seq_show(struct seq_file *seq, const struct svc_stat *st) {} - -#define proc_net_rpc NULL - #endif #endif /* _LINUX_SUNRPC_STATS_H */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 5f4e18b..bbdb680 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -12,6 +12,7 @@ struct svc_xprt_ops { struct svc_xprt *(*xpo_create)(struct svc_serv *, + struct net *net, struct sockaddr *, int, int); struct svc_xprt *(*xpo_accept)(struct svc_xprt *); @@ -32,6 +33,16 @@ struct svc_xprt_class { u32 xcl_max_payload; }; +/* + * This is embedded in an object that wants a callback before deleting + * an xprt; intended for use by NFSv4.1, which needs to know when a + * client's tcp connection (and hence possibly a backchannel) goes away. + */ +struct svc_xpt_user { + struct list_head list; + void (*callback)(struct svc_xpt_user *); +}; + struct svc_xprt { struct svc_xprt_class *xpt_class; struct svc_xprt_ops *xpt_ops; @@ -66,14 +77,31 @@ struct svc_xprt { struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ + struct list_head xpt_users; /* callbacks on free */ + + struct net *xpt_net; }; +static inline void register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) +{ + spin_lock(&xpt->xpt_lock); + list_add(&u->list, &xpt->xpt_users); + spin_unlock(&xpt->xpt_lock); +} + +static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) +{ + spin_lock(&xpt->xpt_lock); + list_del_init(&u->list); + spin_unlock(&xpt->xpt_lock); +} + int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, const char *, const int, - const unsigned short, int); +int svc_create_xprt(struct svc_serv *, const char *, struct net *, + const int, const unsigned short, int); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_received(struct svc_xprt *); void svc_xprt_put(struct svc_xprt *xprt); diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index d39dbdc..25d333c 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -108,10 +108,15 @@ struct auth_ops { #define SVC_NEGATIVE 4 #define SVC_OK 5 #define SVC_DROP 6 -#define SVC_DENIED 7 -#define SVC_PENDING 8 -#define SVC_COMPLETE 9 +#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely + * lost so if there is a tcp connection, it + * should be closed + */ +#define SVC_DENIED 8 +#define SVC_PENDING 9 +#define SVC_COMPLETE 10 +struct svc_xprt; extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); extern int svc_authorise(struct svc_rqst *rqstp); @@ -121,13 +126,13 @@ extern void svc_auth_unregister(rpc_authflavor_t flavor); extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); -extern int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom); +extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); -extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr); +extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr); extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(void); -extern void svcauth_unix_info_release(void *); +extern void svcauth_unix_info_release(struct svc_xprt *xpt); extern int svcauth_unix_set_client(struct svc_rqst *rqstp); static inline unsigned long hash_str(char *name, int bits) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index ab91d86..498ab93 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -132,6 +132,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp) return p + 2; } +static inline __be32 * +xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len) +{ + memcpy(ptr, p, len); + return p + XDR_QUADLEN(len); +} + /* * Adjust kvec to reflect end of xdr'ed data (RPC client XDR) */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ff5a77b..89d10d2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -224,6 +224,7 @@ struct rpc_xprt { bklog_u; /* backlog queue utilization */ } stat; + struct net *xprt_net; const char *address_strings[RPC_DISPLAY_MAX]; }; @@ -249,6 +250,7 @@ static inline int bc_prealloc(struct rpc_rqst *req) struct xprt_create { int ident; /* XPRT_TRANSPORT identifier */ + struct net * net; struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; @@ -280,6 +282,8 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); void xprt_put(struct rpc_xprt *xprt); +struct rpc_xprt * xprt_alloc(struct net *net, int size, int max_req); +void xprt_free(struct rpc_xprt *); static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) { diff --git a/net/socket.c b/net/socket.c index abf3e25..7f67c07 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1145,7 +1145,7 @@ call_kill: } EXPORT_SYMBOL(sock_wake_async); -static int __sock_create(struct net *net, int family, int type, int protocol, +int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { int err; @@ -1257,6 +1257,7 @@ out_release: rcu_read_unlock(); goto out_sock_release; } +EXPORT_SYMBOL(__sock_create); int sock_create(int family, int type, int protocol, struct socket **res) { diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 3376d76..8873fd8 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -36,22 +36,3 @@ config RPCSEC_GSS_KRB5 Kerberos support should be installed. If unsure, say Y. - -config RPCSEC_GSS_SPKM3 - tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - select SUNRPC_GSS - select CRYPTO - select CRYPTO_MD5 - select CRYPTO_DES - select CRYPTO_CAST5 - select CRYPTO_CBC - help - Choose Y here to enable Secure RPC using the SPKM3 public key - GSS-API mechanism (RFC 2025). - - Secure RPC calls with SPKM3 require an auxiliary userspace - daemon which may be found in the Linux nfs-utils package - available from http://linux-nfs.org/. - - If unsure, say N. diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 68192e5..afe6784 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -658,7 +658,7 @@ out1: return err; } -void __exit rpcauth_remove_module(void) +void rpcauth_remove_module(void) { rpc_destroy_authunix(); rpc_destroy_generic_auth(); diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 43162bb..e010a01 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -158,7 +158,7 @@ int __init rpc_init_generic_auth(void) return rpcauth_init_credcache(&generic_auth); } -void __exit rpc_destroy_generic_auth(void) +void rpc_destroy_generic_auth(void) { rpcauth_destroy_credcache(&generic_auth); } diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index 74a2317..7350d86 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -11,8 +11,3 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o - -obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o - -rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \ - gss_spkm3_token.o diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 778e5df..f375dec 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -427,7 +427,7 @@ static int context_derive_keys_rc4(struct krb5_ctx *ctx) { struct crypto_hash *hmac; - char sigkeyconstant[] = "signaturekey"; + static const char sigkeyconstant[] = "signaturekey"; int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ struct hash_desc desc; struct scatterlist sg[1]; diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c deleted file mode 100644 index adade3d..0000000 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * linux/net/sunrpc/gss_spkm3_mech.c - * - * Copyright (c) 2003 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - * J. Bruce Fields <bfields@umich.edu> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/err.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/sunrpc/auth.h> -#include <linux/in.h> -#include <linux/sunrpc/svcauth_gss.h> -#include <linux/sunrpc/gss_spkm3.h> -#include <linux/sunrpc/xdr.h> -#include <linux/crypto.h> - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -static const void * -simple_get_bytes(const void *p, const void *end, void *res, int len) -{ - const void *q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - memcpy(res, p, len); - return q; -} - -static const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) -{ - const void *q; - unsigned int len; - p = simple_get_bytes(p, end, &len, sizeof(len)); - if (IS_ERR(p)) - return p; - res->len = len; - if (len == 0) { - res->data = NULL; - return p; - } - q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(res->data == NULL)) - return ERR_PTR(-ENOMEM); - return q; -} - -static int -gss_import_sec_context_spkm3(const void *p, size_t len, - struct gss_ctx *ctx_id, - gfp_t gfp_mask) -{ - const void *end = (const void *)((const char *)p + len); - struct spkm3_ctx *ctx; - int version; - - if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask))) - goto out_err; - - p = simple_get_bytes(p, end, &version, sizeof(version)); - if (IS_ERR(p)) - goto out_err_free_ctx; - if (version != 1) { - dprintk("RPC: unknown spkm3 token format: " - "obsolete nfs-utils?\n"); - p = ERR_PTR(-EINVAL); - goto out_err_free_ctx; - } - - p = simple_get_netobj(p, end, &ctx->ctx_id); - if (IS_ERR(p)) - goto out_err_free_ctx; - - p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); - if (IS_ERR(p)) - goto out_err_free_ctx_id; - - p = simple_get_netobj(p, end, &ctx->mech_used); - if (IS_ERR(p)) - goto out_err_free_ctx_id; - - p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags)); - if (IS_ERR(p)) - goto out_err_free_mech; - - p = simple_get_netobj(p, end, &ctx->conf_alg); - if (IS_ERR(p)) - goto out_err_free_mech; - - p = simple_get_netobj(p, end, &ctx->derived_conf_key); - if (IS_ERR(p)) - goto out_err_free_conf_alg; - - p = simple_get_netobj(p, end, &ctx->intg_alg); - if (IS_ERR(p)) - goto out_err_free_conf_key; - - p = simple_get_netobj(p, end, &ctx->derived_integ_key); - if (IS_ERR(p)) - goto out_err_free_intg_alg; - - if (p != end) { - p = ERR_PTR(-EFAULT); - goto out_err_free_intg_key; - } - - ctx_id->internal_ctx_id = ctx; - - dprintk("RPC: Successfully imported new spkm context.\n"); - return 0; - -out_err_free_intg_key: - kfree(ctx->derived_integ_key.data); -out_err_free_intg_alg: - kfree(ctx->intg_alg.data); -out_err_free_conf_key: - kfree(ctx->derived_conf_key.data); -out_err_free_conf_alg: - kfree(ctx->conf_alg.data); -out_err_free_mech: - kfree(ctx->mech_used.data); -out_err_free_ctx_id: - kfree(ctx->ctx_id.data); -out_err_free_ctx: - kfree(ctx); -out_err: - return PTR_ERR(p); -} - -static void -gss_delete_sec_context_spkm3(void *internal_ctx) -{ - struct spkm3_ctx *sctx = internal_ctx; - - kfree(sctx->derived_integ_key.data); - kfree(sctx->intg_alg.data); - kfree(sctx->derived_conf_key.data); - kfree(sctx->conf_alg.data); - kfree(sctx->mech_used.data); - kfree(sctx->ctx_id.data); - kfree(sctx); -} - -static u32 -gss_verify_mic_spkm3(struct gss_ctx *ctx, - struct xdr_buf *signbuf, - struct xdr_netobj *checksum) -{ - u32 maj_stat = 0; - struct spkm3_ctx *sctx = ctx->internal_ctx_id; - - maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); - - dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); - return maj_stat; -} - -static u32 -gss_get_mic_spkm3(struct gss_ctx *ctx, - struct xdr_buf *message_buffer, - struct xdr_netobj *message_token) -{ - u32 err = 0; - struct spkm3_ctx *sctx = ctx->internal_ctx_id; - - err = spkm3_make_token(sctx, message_buffer, - message_token, SPKM_MIC_TOK); - dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err); - return err; -} - -static const struct gss_api_ops gss_spkm3_ops = { - .gss_import_sec_context = gss_import_sec_context_spkm3, - .gss_get_mic = gss_get_mic_spkm3, - .gss_verify_mic = gss_verify_mic_spkm3, - .gss_delete_sec_context = gss_delete_sec_context_spkm3, -}; - -static struct pf_desc gss_spkm3_pfs[] = { - {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"}, - {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, -}; - -static struct gss_api_mech gss_spkm3_mech = { - .gm_name = "spkm3", - .gm_owner = THIS_MODULE, - .gm_oid = {7, "\053\006\001\005\005\001\003"}, - .gm_ops = &gss_spkm3_ops, - .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs), - .gm_pfs = gss_spkm3_pfs, -}; - -static int __init init_spkm3_module(void) -{ - int status; - - status = gss_mech_register(&gss_spkm3_mech); - if (status) - printk("Failed to register spkm3 gss mechanism!\n"); - return status; -} - -static void __exit cleanup_spkm3_module(void) -{ - gss_mech_unregister(&gss_spkm3_mech); -} - -MODULE_LICENSE("GPL"); -module_init(init_spkm3_module); -module_exit(cleanup_spkm3_module); diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c deleted file mode 100644 index 5a3a65a..0000000 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * linux/net/sunrpc/gss_spkm3_seal.c - * - * Copyright (c) 2003 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/types.h> -#include <linux/jiffies.h> -#include <linux/sunrpc/gss_spkm3.h> -#include <linux/random.h> -#include <linux/crypto.h> -#include <linux/pagemap.h> -#include <linux/scatterlist.h> -#include <linux/sunrpc/xdr.h> - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"}; -const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"}; - -/* - * spkm3_make_token() - * - * Only SPKM_MIC_TOK with md5 intg-alg is supported - */ - -u32 -spkm3_make_token(struct spkm3_ctx *ctx, - struct xdr_buf * text, struct xdr_netobj * token, - int toktype) -{ - s32 checksum_type; - char tokhdrbuf[25]; - char cksumdata[16]; - struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; - struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf}; - int tokenlen = 0; - unsigned char *ptr; - s32 now; - int ctxelen = 0, ctxzbit = 0; - int md5elen = 0, md5zbit = 0; - - now = jiffies; - - if (ctx->ctx_id.len != 16) { - dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", - ctx->ctx_id.len); - goto out_err; - } - - if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) { - dprintk("RPC: gss_spkm3_seal: unsupported I-ALG " - "algorithm. only support hmac-md5 I-ALG.\n"); - goto out_err; - } else - checksum_type = CKSUMTYPE_HMAC_MD5; - - if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) { - dprintk("RPC: gss_spkm3_seal: unsupported C-ALG " - "algorithm\n"); - goto out_err; - } - - if (toktype == SPKM_MIC_TOK) { - /* Calculate checksum over the mic-header */ - asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); - spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, - ctxelen, ctxzbit); - if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key, - (char *)mic_hdr.data, mic_hdr.len, - text, 0, &md5cksum)) - goto out_err; - - asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); - tokenlen = 10 + ctxelen + 1 + md5elen + 1; - - /* Create token header using generic routines */ - token->len = g_token_size(&ctx->mech_used, tokenlen + 2); - - ptr = token->data; - g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr); - - spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); - } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ - dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK " - "not supported\n"); - goto out_err; - } - - /* XXX need to implement sequence numbers, and ctx->expired */ - - return GSS_S_COMPLETE; -out_err: - token->data = NULL; - token->len = 0; - return GSS_S_FAILURE; -} - -static int -spkm3_checksummer(struct scatterlist *sg, void *data) -{ - struct hash_desc *desc = data; - - return crypto_hash_update(desc, sg, sg->length); -} - -/* checksum the plaintext data and hdrlen bytes of the token header */ -s32 -make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, - unsigned int hdrlen, struct xdr_buf *body, - unsigned int body_offset, struct xdr_netobj *cksum) -{ - char *cksumname; - struct hash_desc desc; /* XXX add to ctx? */ - struct scatterlist sg[1]; - int err; - - switch (cksumtype) { - case CKSUMTYPE_HMAC_MD5: - cksumname = "hmac(md5)"; - break; - default: - dprintk("RPC: spkm3_make_checksum:" - " unsupported checksum %d", cksumtype); - return GSS_S_FAILURE; - } - - if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE; - - desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(desc.tfm)) - return GSS_S_FAILURE; - cksum->len = crypto_hash_digestsize(desc.tfm); - desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - - err = crypto_hash_setkey(desc.tfm, key->data, key->len); - if (err) - goto out; - - err = crypto_hash_init(&desc); - if (err) - goto out; - - sg_init_one(sg, header, hdrlen); - crypto_hash_update(&desc, sg, sg->length); - - xdr_process_buf(body, body_offset, body->len - body_offset, - spkm3_checksummer, &desc); - crypto_hash_final(&desc, cksum->data); - -out: - crypto_free_hash(desc.tfm); - - return err ? GSS_S_FAILURE : 0; -} diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c deleted file mode 100644 index a99825d..0000000 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * linux/net/sunrpc/gss_spkm3_token.c - * - * Copyright (c) 2003 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/jiffies.h> -#include <linux/sunrpc/gss_spkm3.h> -#include <linux/random.h> -#include <linux/crypto.h> - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -/* - * asn1_bitstring_len() - * - * calculate the asn1 bitstring length of the xdr_netobject - */ -void -asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) -{ - int i, zbit = 0,elen = in->len; - char *ptr; - - ptr = &in->data[in->len -1]; - - /* count trailing 0's */ - for(i = in->len; i > 0; i--) { - if (*ptr == 0) { - ptr--; - elen--; - } else - break; - } - - /* count number of 0 bits in final octet */ - ptr = &in->data[elen - 1]; - for(i = 0; i < 8; i++) { - short mask = 0x01; - - if (!((mask << i) & *ptr)) - zbit++; - else - break; - } - *enclen = elen; - *zerobits = zbit; -} - -/* - * decode_asn1_bitstring() - * - * decode a bitstring into a buffer of the expected length. - * enclen = bit string length - * explen = expected length (define in rfc) - */ -int -decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) -{ - if (!(out->data = kzalloc(explen,GFP_NOFS))) - return 0; - out->len = explen; - memcpy(out->data, in, enclen); - return 1; -} - -/* - * SPKMInnerContextToken choice SPKM_MIC asn1 token layout - * - * contextid is always 16 bytes plain data. max asn1 bitstring len = 17. - * - * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum) - * - * pos value - * ---------- - * [0] a4 SPKM-MIC tag - * [1] ?? innertoken length (max 44) - * - * - * tok_hdr piece of checksum data starts here - * - * the maximum mic-header len = 9 + 17 = 26 - * mic-header - * ---------- - * [2] 30 SEQUENCE tag - * [3] ?? mic-header length: (max 23) = TokenID + ContextID - * - * TokenID - all fields constant and can be hardcoded - * ------- - * [4] 02 Type 2 - * [5] 02 Length 2 - * [6][7] 01 01 TokenID (SPKM_MIC_TOK) - * - * ContextID - encoded length not constant, calculated - * --------- - * [8] 03 Type 3 - * [9] ?? encoded length - * [10] ?? ctxzbit - * [11] contextid - * - * mic_header piece of checksum data ends here. - * - * int-cksum - encoded length not constant, calculated - * --------- - * [??] 03 Type 3 - * [??] ?? encoded length - * [??] ?? md5zbit - * [??] int-cksum (NID_md5 = 16) - * - * maximum SPKM-MIC innercontext token length = - * 10 + encoded contextid_size(17 max) + 2 + encoded - * cksum_size (17 maxfor NID_md5) = 46 - */ - -/* - * spkm3_mic_header() - * - * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation - * elen: 16 byte context id asn1 bitstring encoded length - */ -void -spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit) -{ - char *hptr = *hdrbuf; - char *top = *hdrbuf; - - *(u8 *)hptr++ = 0x30; - *(u8 *)hptr++ = elen + 7; /* on the wire header length */ - - /* tokenid */ - *(u8 *)hptr++ = 0x02; - *(u8 *)hptr++ = 0x02; - *(u8 *)hptr++ = 0x01; - *(u8 *)hptr++ = 0x01; - - /* coniextid */ - *(u8 *)hptr++ = 0x03; - *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */ - *(u8 *)hptr++ = zbit; - memcpy(hptr, ctxdata, elen); - hptr += elen; - *hdrlen = hptr - top; -} - -/* - * spkm3_mic_innercontext_token() - * - * *tokp points to the beginning of the SPKM_MIC token described - * in rfc 2025, section 3.2.1: - * - * toklen is the inner token length - */ -void -spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit) -{ - unsigned char *ict = *tokp; - - *(u8 *)ict++ = 0xa4; - *(u8 *)ict++ = toklen; - memcpy(ict, mic_hdr->data, mic_hdr->len); - ict += mic_hdr->len; - - *(u8 *)ict++ = 0x03; - *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */ - *(u8 *)ict++ = md5zbit; - memcpy(ict, md5cksum->data, md5elen); -} - -u32 -spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum) -{ - struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL}; - unsigned char *ptr = *tokp; - int ctxelen; - u32 ret = GSS_S_DEFECTIVE_TOKEN; - - /* spkm3 innercontext token preamble */ - if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) { - dprintk("RPC: BAD SPKM ictoken preamble\n"); - goto out; - } - - *mic_hdrlen = ptr[3]; - - /* token type */ - if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) { - dprintk("RPC: BAD asn1 SPKM3 token type\n"); - goto out; - } - - /* only support SPKM_MIC_TOK */ - if((ptr[6] != 0x01) || (ptr[7] != 0x01)) { - dprintk("RPC: ERROR unsupported SPKM3 token\n"); - goto out; - } - - /* contextid */ - if (ptr[8] != 0x03) { - dprintk("RPC: BAD SPKM3 asn1 context-id type\n"); - goto out; - } - - ctxelen = ptr[9]; - if (ctxelen > 17) { /* length includes asn1 zbit octet */ - dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen); - goto out; - } - - /* ignore ptr[10] */ - - if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16)) - goto out; - - /* - * in the current implementation: the optional int-alg is not present - * so the default int-alg (md5) is used the optional snd-seq field is - * also not present - */ - - if (*mic_hdrlen != 6 + ctxelen) { - dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only " - "support default int-alg (should be absent) " - "and do not support snd-seq\n", *mic_hdrlen); - goto out; - } - /* checksum */ - *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */ - - ret = GSS_S_COMPLETE; -out: - kfree(spkm3_ctx_id.data); - return ret; -} - diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c deleted file mode 100644 index cc21ee8..0000000 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * linux/net/sunrpc/gss_spkm3_unseal.c - * - * Copyright (c) 2003 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/jiffies.h> -#include <linux/sunrpc/gss_spkm3.h> -#include <linux/crypto.h> - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -/* - * spkm3_read_token() - * - * only SPKM_MIC_TOK with md5 intg-alg is supported - */ -u32 -spkm3_read_token(struct spkm3_ctx *ctx, - struct xdr_netobj *read_token, /* checksum */ - struct xdr_buf *message_buffer, /* signbuf */ - int toktype) -{ - s32 checksum_type; - s32 code; - struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; - char cksumdata[16]; - struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; - unsigned char *ptr = (unsigned char *)read_token->data; - unsigned char *cksum; - int bodysize, md5elen; - int mic_hdrlen; - u32 ret = GSS_S_DEFECTIVE_TOKEN; - - if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used, - &bodysize, &ptr, read_token->len)) - goto out; - - /* decode the token */ - - if (toktype != SPKM_MIC_TOK) { - dprintk("RPC: BAD SPKM3 token type: %d\n", toktype); - goto out; - } - - if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) - goto out; - - if (*cksum++ != 0x03) { - dprintk("RPC: spkm3_read_token BAD checksum type\n"); - goto out; - } - md5elen = *cksum++; - cksum++; /* move past the zbit */ - - if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16)) - goto out; - - /* HARD CODED FOR MD5 */ - - /* compute the checksum of the message. - * ptr + 2 = start of header piece of checksum - * mic_hdrlen + 2 = length of header piece of checksum - */ - ret = GSS_S_DEFECTIVE_TOKEN; - if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) { - dprintk("RPC: gss_spkm3_seal: unsupported I-ALG " - "algorithm\n"); - goto out; - } - - checksum_type = CKSUMTYPE_HMAC_MD5; - - code = make_spkm3_checksum(checksum_type, - &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2, - message_buffer, 0, &md5cksum); - - if (code) - goto out; - - ret = GSS_S_BAD_SIG; - code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len); - if (code) { - dprintk("RPC: bad MIC checksum\n"); - goto out; - } - - - /* XXX: need to add expiration and sequencing */ - ret = GSS_S_COMPLETE; -out: - kfree(wire_cksum.data); - return ret; -} diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index cc385b3..dec2a6f 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -964,7 +964,7 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) if (rqstp->rq_gssclient == NULL) return SVC_DENIED; stat = svcauth_unix_set_client(rqstp); - if (stat == SVC_DROP) + if (stat == SVC_DROP || stat == SVC_CLOSE) return stat; return SVC_OK; } @@ -1018,7 +1018,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, return SVC_DENIED; memset(&rsikey, 0, sizeof(rsikey)); if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) - return SVC_DROP; + return SVC_CLOSE; *authp = rpc_autherr_badverf; if (svc_safe_getnetobj(argv, &tmpobj)) { kfree(rsikey.in_handle.data); @@ -1026,38 +1026,35 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, } if (dup_netobj(&rsikey.in_token, &tmpobj)) { kfree(rsikey.in_handle.data); - return SVC_DROP; + return SVC_CLOSE; } /* Perform upcall, or find upcall result: */ rsip = rsi_lookup(&rsikey); rsi_free(&rsikey); if (!rsip) - return SVC_DROP; - switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { - case -EAGAIN: - case -ETIMEDOUT: - case -ENOENT: + return SVC_CLOSE; + if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0) /* No upcall result: */ - return SVC_DROP; - case 0: - ret = SVC_DROP; - /* Got an answer to the upcall; use it: */ - if (gss_write_init_verf(rqstp, rsip)) - goto out; - if (resv->iov_len + 4 > PAGE_SIZE) - goto out; - svc_putnl(resv, RPC_SUCCESS); - if (svc_safe_putnetobj(resv, &rsip->out_handle)) - goto out; - if (resv->iov_len + 3 * 4 > PAGE_SIZE) - goto out; - svc_putnl(resv, rsip->major_status); - svc_putnl(resv, rsip->minor_status); - svc_putnl(resv, GSS_SEQ_WIN); - if (svc_safe_putnetobj(resv, &rsip->out_token)) - goto out; - } + return SVC_CLOSE; + + ret = SVC_CLOSE; + /* Got an answer to the upcall; use it: */ + if (gss_write_init_verf(rqstp, rsip)) + goto out; + if (resv->iov_len + 4 > PAGE_SIZE) + goto out; + svc_putnl(resv, RPC_SUCCESS); + if (svc_safe_putnetobj(resv, &rsip->out_handle)) + goto out; + if (resv->iov_len + 3 * 4 > PAGE_SIZE) + goto out; + svc_putnl(resv, rsip->major_status); + svc_putnl(resv, rsip->minor_status); + svc_putnl(resv, GSS_SEQ_WIN); + if (svc_safe_putnetobj(resv, &rsip->out_token)) + goto out; + ret = SVC_COMPLETE; out: cache_put(&rsip->h, &rsi_cache); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 7dce81a..e433e75 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -33,15 +33,16 @@ #include <linux/sunrpc/cache.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include "netns.h" #define RPCDBG_FACILITY RPCDBG_CACHE -static int cache_defer_req(struct cache_req *req, struct cache_head *item); +static void cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) { - time_t now = get_seconds(); + time_t now = seconds_since_boot(); h->next = NULL; h->flags = 0; kref_init(&h->ref); @@ -51,7 +52,7 @@ static void cache_init(struct cache_head *h) static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - return (h->expiry_time < get_seconds()) || + return (h->expiry_time < seconds_since_boot()) || (detail->flush_time > h->last_refresh); } @@ -126,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); static void cache_fresh_locked(struct cache_head *head, time_t expiry) { head->expiry_time = expiry; - head->last_refresh = get_seconds(); + head->last_refresh = seconds_since_boot(); set_bit(CACHE_VALID, &head->flags); } @@ -237,7 +238,7 @@ int cache_check(struct cache_detail *detail, /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); - age = get_seconds() - h->last_refresh; + age = seconds_since_boot() - h->last_refresh; if (rqstp == NULL) { if (rv == -EAGAIN) @@ -252,7 +253,7 @@ int cache_check(struct cache_detail *detail, cache_revisit_request(h); if (rv == -EAGAIN) { set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); cache_fresh_unlocked(h, detail); rv = -ENOENT; } @@ -267,7 +268,8 @@ int cache_check(struct cache_detail *detail, } if (rv == -EAGAIN) { - if (cache_defer_req(rqstp, h) < 0) { + cache_defer_req(rqstp, h); + if (!test_bit(CACHE_PENDING, &h->flags)) { /* Request is not deferred */ rv = cache_is_valid(detail, h); if (rv == -EAGAIN) @@ -387,11 +389,11 @@ static int cache_clean(void) return -1; } current_detail = list_entry(next, struct cache_detail, others); - if (current_detail->nextcheck > get_seconds()) + if (current_detail->nextcheck > seconds_since_boot()) current_index = current_detail->hash_size; else { current_index = 0; - current_detail->nextcheck = get_seconds()+30*60; + current_detail->nextcheck = seconds_since_boot()+30*60; } } @@ -476,7 +478,7 @@ EXPORT_SYMBOL_GPL(cache_flush); void cache_purge(struct cache_detail *detail) { detail->flush_time = LONG_MAX; - detail->nextcheck = get_seconds(); + detail->nextcheck = seconds_since_boot(); cache_flush(); detail->flush_time = 1; } @@ -505,81 +507,155 @@ EXPORT_SYMBOL_GPL(cache_purge); static DEFINE_SPINLOCK(cache_defer_lock); static LIST_HEAD(cache_defer_list); -static struct list_head cache_defer_hash[DFR_HASHSIZE]; +static struct hlist_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; -static int cache_defer_req(struct cache_req *req, struct cache_head *item) +static void __unhash_deferred_req(struct cache_deferred_req *dreq) +{ + hlist_del_init(&dreq->hash); + if (!list_empty(&dreq->recent)) { + list_del_init(&dreq->recent); + cache_defer_cnt--; + } +} + +static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item) { - struct cache_deferred_req *dreq, *discard; int hash = DFR_HASH(item); - if (cache_defer_cnt >= DFR_MAX) { - /* too much in the cache, randomly drop this one, - * or continue and drop the oldest below - */ - if (net_random()&1) - return -ENOMEM; - } - dreq = req->defer(req); - if (dreq == NULL) - return -ENOMEM; + INIT_LIST_HEAD(&dreq->recent); + hlist_add_head(&dreq->hash, &cache_defer_hash[hash]); +} + +static void setup_deferral(struct cache_deferred_req *dreq, + struct cache_head *item, + int count_me) +{ dreq->item = item; spin_lock(&cache_defer_lock); - list_add(&dreq->recent, &cache_defer_list); - - if (cache_defer_hash[hash].next == NULL) - INIT_LIST_HEAD(&cache_defer_hash[hash]); - list_add(&dreq->hash, &cache_defer_hash[hash]); + __hash_deferred_req(dreq, item); - /* it is in, now maybe clean up */ - discard = NULL; - if (++cache_defer_cnt > DFR_MAX) { - discard = list_entry(cache_defer_list.prev, - struct cache_deferred_req, recent); - list_del_init(&discard->recent); - list_del_init(&discard->hash); - cache_defer_cnt--; + if (count_me) { + cache_defer_cnt++; + list_add(&dreq->recent, &cache_defer_list); } + spin_unlock(&cache_defer_lock); +} + +struct thread_deferred_req { + struct cache_deferred_req handle; + struct completion completion; +}; + +static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) +{ + struct thread_deferred_req *dr = + container_of(dreq, struct thread_deferred_req, handle); + complete(&dr->completion); +} + +static void cache_wait_req(struct cache_req *req, struct cache_head *item) +{ + struct thread_deferred_req sleeper; + struct cache_deferred_req *dreq = &sleeper.handle; + + sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); + dreq->revisit = cache_restart_thread; + + setup_deferral(dreq, item, 0); + + if (!test_bit(CACHE_PENDING, &item->flags) || + wait_for_completion_interruptible_timeout( + &sleeper.completion, req->thread_wait) <= 0) { + /* The completion wasn't completed, so we need + * to clean up + */ + spin_lock(&cache_defer_lock); + if (!hlist_unhashed(&sleeper.handle.hash)) { + __unhash_deferred_req(&sleeper.handle); + spin_unlock(&cache_defer_lock); + } else { + /* cache_revisit_request already removed + * this from the hash table, but hasn't + * called ->revisit yet. It will very soon + * and we need to wait for it. + */ + spin_unlock(&cache_defer_lock); + wait_for_completion(&sleeper.completion); + } + } +} + +static void cache_limit_defers(void) +{ + /* Make sure we haven't exceed the limit of allowed deferred + * requests. + */ + struct cache_deferred_req *discard = NULL; + + if (cache_defer_cnt <= DFR_MAX) + return; + + spin_lock(&cache_defer_lock); + + /* Consider removing either the first or the last */ + if (cache_defer_cnt > DFR_MAX) { + if (net_random() & 1) + discard = list_entry(cache_defer_list.next, + struct cache_deferred_req, recent); + else + discard = list_entry(cache_defer_list.prev, + struct cache_deferred_req, recent); + __unhash_deferred_req(discard); + } + spin_unlock(&cache_defer_lock); if (discard) - /* there was one too many */ discard->revisit(discard, 1); +} - if (!test_bit(CACHE_PENDING, &item->flags)) { - /* must have just been validated... */ - cache_revisit_request(item); - return -EAGAIN; +static void cache_defer_req(struct cache_req *req, struct cache_head *item) +{ + struct cache_deferred_req *dreq; + + if (req->thread_wait) { + cache_wait_req(req, item); + if (!test_bit(CACHE_PENDING, &item->flags)) + return; } - return 0; + dreq = req->defer(req); + if (dreq == NULL) + return; + setup_deferral(dreq, item, 1); + if (!test_bit(CACHE_PENDING, &item->flags)) + /* Bit could have been cleared before we managed to + * set up the deferral, so need to revisit just in case + */ + cache_revisit_request(item); + + cache_limit_defers(); } static void cache_revisit_request(struct cache_head *item) { struct cache_deferred_req *dreq; struct list_head pending; - - struct list_head *lp; + struct hlist_node *lp, *tmp; int hash = DFR_HASH(item); INIT_LIST_HEAD(&pending); spin_lock(&cache_defer_lock); - lp = cache_defer_hash[hash].next; - if (lp) { - while (lp != &cache_defer_hash[hash]) { - dreq = list_entry(lp, struct cache_deferred_req, hash); - lp = lp->next; - if (dreq->item == item) { - list_del_init(&dreq->hash); - list_move(&dreq->recent, &pending); - cache_defer_cnt--; - } + hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash) + if (dreq->item == item) { + __unhash_deferred_req(dreq); + list_add(&dreq->recent, &pending); } - } + spin_unlock(&cache_defer_lock); while (!list_empty(&pending)) { @@ -600,9 +676,8 @@ void cache_clean_deferred(void *owner) list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { if (dreq->owner == owner) { - list_del_init(&dreq->hash); - list_move(&dreq->recent, &pending); - cache_defer_cnt--; + __unhash_deferred_req(dreq); + list_add(&dreq->recent, &pending); } } spin_unlock(&cache_defer_lock); @@ -901,7 +976,7 @@ static int cache_release(struct inode *inode, struct file *filp, filp->private_data = NULL; kfree(rp); - cd->last_close = get_seconds(); + cd->last_close = seconds_since_boot(); atomic_dec(&cd->readers); } module_put(cd->owner); @@ -1014,6 +1089,23 @@ static void warn_no_listener(struct cache_detail *detail) } } +static bool cache_listeners_exist(struct cache_detail *detail) +{ + if (atomic_read(&detail->readers)) + return true; + if (detail->last_close == 0) + /* This cache was never opened */ + return false; + if (detail->last_close < seconds_since_boot() - 30) + /* + * We allow for the possibility that someone might + * restart a userspace daemon without restarting the + * server; but after 30 seconds, we give up. + */ + return false; + return true; +} + /* * register an upcall request to user-space and queue it up for read() by the * upcall daemon. @@ -1032,10 +1124,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, char *bp; int len; - if (atomic_read(&detail->readers) == 0 && - detail->last_close < get_seconds() - 30) { - warn_no_listener(detail); - return -EINVAL; + if (!cache_listeners_exist(detail)) { + warn_no_listener(detail); + return -EINVAL; } buf = kmalloc(PAGE_SIZE, GFP_KERNEL); @@ -1094,13 +1185,19 @@ int qword_get(char **bpp, char *dest, int bufsize) if (bp[0] == '\\' && bp[1] == 'x') { /* HEX STRING */ bp += 2; - while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) { - int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; - bp++; - byte <<= 4; - byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; - *dest++ = byte; - bp++; + while (len < bufsize) { + int h, l; + + h = hex_to_bin(bp[0]); + if (h < 0) + break; + + l = hex_to_bin(bp[1]); + if (l < 0) + break; + + *dest++ = (h << 4) | l; + bp += 2; len++; } } else { @@ -1218,7 +1315,8 @@ static int c_show(struct seq_file *m, void *p) ifdebug(CACHE) seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", - cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); + convert_to_wallclock(cp->expiry_time), + atomic_read(&cp->ref.refcount), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ @@ -1284,7 +1382,7 @@ static ssize_t read_flush(struct file *file, char __user *buf, unsigned long p = *ppos; size_t len; - sprintf(tbuf, "%lu\n", cd->flush_time); + sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time)); len = strlen(tbuf); if (p >= len) return 0; @@ -1302,19 +1400,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf, struct cache_detail *cd) { char tbuf[20]; - char *ep; - long flushtime; + char *bp, *ep; + if (*ppos || count > sizeof(tbuf)-1) return -EINVAL; if (copy_from_user(tbuf, buf, count)) return -EFAULT; tbuf[count] = 0; - flushtime = simple_strtoul(tbuf, &ep, 0); + simple_strtoul(tbuf, &ep, 0); if (*ep && *ep != '\n') return -EINVAL; - cd->flush_time = flushtime; - cd->nextcheck = get_seconds(); + bp = tbuf; + cd->flush_time = get_expiry(&bp); + cd->nextcheck = seconds_since_boot(); cache_flush(); *ppos += count; @@ -1438,8 +1537,10 @@ static const struct file_operations cache_flush_operations_procfs = { .llseek = no_llseek, }; -static void remove_cache_proc_entries(struct cache_detail *cd) +static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net) { + struct sunrpc_net *sn; + if (cd->u.procfs.proc_ent == NULL) return; if (cd->u.procfs.flush_ent) @@ -1449,15 +1550,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd) if (cd->u.procfs.content_ent) remove_proc_entry("content", cd->u.procfs.proc_ent); cd->u.procfs.proc_ent = NULL; - remove_proc_entry(cd->name, proc_net_rpc); + sn = net_generic(net, sunrpc_net_id); + remove_proc_entry(cd->name, sn->proc_net_rpc); } #ifdef CONFIG_PROC_FS -static int create_cache_proc_entries(struct cache_detail *cd) +static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { struct proc_dir_entry *p; + struct sunrpc_net *sn; - cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); + sn = net_generic(net, sunrpc_net_id); + cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc); if (cd->u.procfs.proc_ent == NULL) goto out_nomem; cd->u.procfs.channel_ent = NULL; @@ -1488,11 +1592,11 @@ static int create_cache_proc_entries(struct cache_detail *cd) } return 0; out_nomem: - remove_cache_proc_entries(cd); + remove_cache_proc_entries(cd, net); return -ENOMEM; } #else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd) +static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { return 0; } @@ -1503,23 +1607,33 @@ void __init cache_initialize(void) INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); } -int cache_register(struct cache_detail *cd) +int cache_register_net(struct cache_detail *cd, struct net *net) { int ret; sunrpc_init_cache_detail(cd); - ret = create_cache_proc_entries(cd); + ret = create_cache_proc_entries(cd, net); if (ret) sunrpc_destroy_cache_detail(cd); return ret; } + +int cache_register(struct cache_detail *cd) +{ + return cache_register_net(cd, &init_net); +} EXPORT_SYMBOL_GPL(cache_register); -void cache_unregister(struct cache_detail *cd) +void cache_unregister_net(struct cache_detail *cd, struct net *net) { - remove_cache_proc_entries(cd); + remove_cache_proc_entries(cd, net); sunrpc_destroy_cache_detail(cd); } + +void cache_unregister(struct cache_detail *cd) +{ + cache_unregister_net(cd, &init_net); +} EXPORT_SYMBOL_GPL(cache_unregister); static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cbc5b8c..9dab957 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -284,6 +284,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) struct rpc_xprt *xprt; struct rpc_clnt *clnt; struct xprt_create xprtargs = { + .net = args->net, .ident = args->protocol, .srcaddr = args->saddress, .dstaddr = args->address, diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h new file mode 100644 index 0000000..d013bf2 --- /dev/null +++ b/net/sunrpc/netns.h @@ -0,0 +1,19 @@ +#ifndef __SUNRPC_NETNS_H__ +#define __SUNRPC_NETNS_H__ + +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +struct cache_detail; + +struct sunrpc_net { + struct proc_dir_entry *proc_net_rpc; + struct cache_detail *ip_map_cache; +}; + +extern int sunrpc_net_id; + +int ip_map_cache_create(struct net *); +void ip_map_cache_destroy(struct net *); + +#endif diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 63ec116..fa6d7ca 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -177,6 +177,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex); static int rpcb_create_local(void) { struct rpc_create_args args = { + .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *)&rpcb_inaddr_loopback, .addrsize = sizeof(rpcb_inaddr_loopback), @@ -229,6 +230,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, size_t salen, int proto, u32 version) { struct rpc_create_args args = { + .net = &init_net, .protocol = proto, .address = srvaddr, .addrsize = salen, @@ -248,7 +250,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); break; default: - return NULL; + return ERR_PTR(-EAFNOSUPPORT); } return rpc_create(&args); diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index ea1046f..f71a731 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -22,11 +22,10 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/metrics.h> -#include <net/net_namespace.h> -#define RPCDBG_FACILITY RPCDBG_MISC +#include "netns.h" -struct proc_dir_entry *proc_net_rpc = NULL; +#define RPCDBG_FACILITY RPCDBG_MISC /* * Get RPC client stats @@ -218,10 +217,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats); static inline struct proc_dir_entry * do_register(const char *name, void *data, const struct file_operations *fops) { - rpc_proc_init(); - dprintk("RPC: registering /proc/net/rpc/%s\n", name); + struct sunrpc_net *sn; - return proc_create_data(name, 0, proc_net_rpc, fops, data); + dprintk("RPC: registering /proc/net/rpc/%s\n", name); + sn = net_generic(&init_net, sunrpc_net_id); + return proc_create_data(name, 0, sn->proc_net_rpc, fops, data); } struct proc_dir_entry * @@ -234,7 +234,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register); void rpc_proc_unregister(const char *name) { - remove_proc_entry(name, proc_net_rpc); + struct sunrpc_net *sn; + + sn = net_generic(&init_net, sunrpc_net_id); + remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(rpc_proc_unregister); @@ -248,25 +251,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register); void svc_proc_unregister(const char *name) { - remove_proc_entry(name, proc_net_rpc); + struct sunrpc_net *sn; + + sn = net_generic(&init_net, sunrpc_net_id); + remove_proc_entry(name, sn->proc_net_rpc); } EXPORT_SYMBOL_GPL(svc_proc_unregister); -void -rpc_proc_init(void) +int rpc_proc_init(struct net *net) { + struct sunrpc_net *sn; + dprintk("RPC: registering /proc/net/rpc\n"); - if (!proc_net_rpc) - proc_net_rpc = proc_mkdir("rpc", init_net.proc_net); + sn = net_generic(net, sunrpc_net_id); + sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net); + if (sn->proc_net_rpc == NULL) + return -ENOMEM; + + return 0; } -void -rpc_proc_exit(void) +void rpc_proc_exit(struct net *net) { dprintk("RPC: unregistering /proc/net/rpc\n"); - if (proc_net_rpc) { - proc_net_rpc = NULL; - remove_proc_entry("rpc", init_net.proc_net); - } + remove_proc_entry("rpc", net->proc_net); } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index c0d0850..9d08091 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -22,7 +22,44 @@ #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/xprtsock.h> -extern struct cache_detail ip_map_cache, unix_gid_cache; +#include "netns.h" + +int sunrpc_net_id; + +static __net_init int sunrpc_init_net(struct net *net) +{ + int err; + + err = rpc_proc_init(net); + if (err) + goto err_proc; + + err = ip_map_cache_create(net); + if (err) + goto err_ipmap; + + return 0; + +err_ipmap: + rpc_proc_exit(net); +err_proc: + return err; +} + +static __net_exit void sunrpc_exit_net(struct net *net) +{ + ip_map_cache_destroy(net); + rpc_proc_exit(net); +} + +static struct pernet_operations sunrpc_net_ops = { + .init = sunrpc_init_net, + .exit = sunrpc_exit_net, + .id = &sunrpc_net_id, + .size = sizeof(struct sunrpc_net), +}; + +extern struct cache_detail unix_gid_cache; extern void cleanup_rpcb_clnt(void); @@ -38,18 +75,22 @@ init_sunrpc(void) err = rpcauth_init_module(); if (err) goto out3; + + cache_initialize(); + + err = register_pernet_subsys(&sunrpc_net_ops); + if (err) + goto out4; #ifdef RPC_DEBUG rpc_register_sysctl(); #endif -#ifdef CONFIG_PROC_FS - rpc_proc_init(); -#endif - cache_initialize(); - cache_register(&ip_map_cache); cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ return 0; + +out4: + rpcauth_remove_module(); out3: rpc_destroy_mempool(); out2: @@ -67,14 +108,11 @@ cleanup_sunrpc(void) svc_cleanup_xprt_sock(); unregister_rpc_pipefs(); rpc_destroy_mempool(); - cache_unregister(&ip_map_cache); cache_unregister(&unix_gid_cache); + unregister_pernet_subsys(&sunrpc_net_ops); #ifdef RPC_DEBUG rpc_unregister_sysctl(); #endif -#ifdef CONFIG_PROC_FS - rpc_proc_exit(); -#endif rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_LICENSE("GPL"); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d9017d6..6359c42 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1055,6 +1055,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_bad; case SVC_DENIED: goto err_bad_auth; + case SVC_CLOSE: + if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) + svc_close_xprt(rqstp->rq_xprt); case SVC_DROP: goto dropit; case SVC_COMPLETE: diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index cbc0849..c82fe73 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -100,16 +100,14 @@ EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); */ int svc_print_xprts(char *buf, int maxlen) { - struct list_head *le; + struct svc_xprt_class *xcl; char tmpstr[80]; int len = 0; buf[0] = '\0'; spin_lock(&svc_xprt_class_lock); - list_for_each(le, &svc_xprt_class_list) { + list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { int slen; - struct svc_xprt_class *xcl = - list_entry(le, struct svc_xprt_class, xcl_list); sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); slen = strlen(tmpstr); @@ -128,9 +126,9 @@ static void svc_xprt_free(struct kref *kref) struct svc_xprt *xprt = container_of(kref, struct svc_xprt, xpt_ref); struct module *owner = xprt->xpt_class->xcl_owner; - if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) && - xprt->xpt_auth_cache != NULL) - svcauth_unix_info_release(xprt->xpt_auth_cache); + if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) + svcauth_unix_info_release(xprt); + put_net(xprt->xpt_net); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } @@ -156,15 +154,18 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, INIT_LIST_HEAD(&xprt->xpt_list); INIT_LIST_HEAD(&xprt->xpt_ready); INIT_LIST_HEAD(&xprt->xpt_deferred); + INIT_LIST_HEAD(&xprt->xpt_users); mutex_init(&xprt->xpt_mutex); spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); + xprt->xpt_net = get_net(&init_net); } EXPORT_SYMBOL_GPL(svc_xprt_init); static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct svc_serv *serv, + struct net *net, const int family, const unsigned short port, int flags) @@ -199,12 +200,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return ERR_PTR(-EAFNOSUPPORT); } - return xcl->xcl_ops->xpo_create(serv, sap, len, flags); + return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); } int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, - const int family, const unsigned short port, - int flags) + struct net *net, const int family, + const unsigned short port, int flags) { struct svc_xprt_class *xcl; @@ -220,7 +221,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = __svc_xpo_create(xcl, serv, family, port, flags); + newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags); if (IS_ERR(newxprt)) { module_put(xcl->xcl_owner); return PTR_ERR(newxprt); @@ -329,12 +330,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) "svc_xprt_enqueue: " "threads and transports both waiting??\n"); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { - /* Don't enqueue dead transports */ - dprintk("svc: transport %p is dead, not enqueued\n", xprt); - goto out_unlock; - } - pool->sp_stats.packets++; /* Mark transport as busy. It will remain in this state until @@ -651,6 +646,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (signalled() || kthread_should_stop()) return -EINTR; + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + rqstp->rq_chandle.thread_wait = 5*HZ; + spin_lock_bh(&pool->sp_lock); xprt = svc_xprt_dequeue(pool); if (xprt) { @@ -658,6 +658,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + + /* As there is a shortage of threads and this request + * had to be queued, don't allow the thread to wait so + * long for cache updates. + */ + rqstp->rq_chandle.thread_wait = 1*HZ; } else { /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); @@ -868,6 +874,19 @@ static void svc_age_temp_xprts(unsigned long closure) mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } +static void call_xpt_users(struct svc_xprt *xprt) +{ + struct svc_xpt_user *u; + + spin_lock(&xprt->xpt_lock); + while (!list_empty(&xprt->xpt_users)) { + u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list); + list_del(&u->list); + u->callback(u); + } + spin_unlock(&xprt->xpt_lock); +} + /* * Remove a dead transport */ @@ -878,7 +897,7 @@ void svc_delete_xprt(struct svc_xprt *xprt) /* Only do this once */ if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) - return; + BUG(); dprintk("svc: svc_delete_xprt(%p)\n", xprt); xprt->xpt_ops->xpo_detach(xprt); @@ -900,6 +919,7 @@ void svc_delete_xprt(struct svc_xprt *xprt) while ((dr = svc_deferred_dequeue(xprt)) != NULL) kfree(dr); + call_xpt_users(xprt); svc_xprt_put(xprt); } @@ -910,10 +930,7 @@ void svc_close_xprt(struct svc_xprt *xprt) /* someone else will have to effect the close */ return; - svc_xprt_get(xprt); svc_delete_xprt(xprt); - clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_put(xprt); } EXPORT_SYMBOL_GPL(svc_close_xprt); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 2073116..560677d 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -18,6 +18,8 @@ #include <linux/sunrpc/clnt.h> +#include "netns.h" + /* * AUTHUNIX and AUTHNULL credentials are both handled here. * AUTHNULL is treated just like AUTHUNIX except that the uid/gid @@ -92,7 +94,6 @@ struct ip_map { struct unix_domain *m_client; int m_add_change; }; -static struct cache_head *ip_table[IP_HASHMAX]; static void ip_map_put(struct kref *kref) { @@ -178,8 +179,8 @@ static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); } -static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); -static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); +static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr); +static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry); static int ip_map_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -219,10 +220,9 @@ static int ip_map_parse(struct cache_detail *cd, switch (address.sa.sa_family) { case AF_INET: /* Form a mapped IPv4 address in sin6 */ - memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; - sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr; + ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr, + &sin6.sin6_addr); break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: @@ -249,9 +249,9 @@ static int ip_map_parse(struct cache_detail *cd, dom = NULL; /* IPv6 scope IDs are ignored for now */ - ipmp = ip_map_lookup(class, &sin6.sin6_addr); + ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr); if (ipmp) { - err = ip_map_update(ipmp, + err = __ip_map_update(cd, ipmp, container_of(dom, struct unix_domain, h), expiry); } else @@ -294,29 +294,15 @@ static int ip_map_show(struct seq_file *m, } -struct cache_detail ip_map_cache = { - .owner = THIS_MODULE, - .hash_size = IP_HASHMAX, - .hash_table = ip_table, - .name = "auth.unix.ip", - .cache_put = ip_map_put, - .cache_upcall = ip_map_upcall, - .cache_parse = ip_map_parse, - .cache_show = ip_map_show, - .match = ip_map_match, - .init = ip_map_init, - .update = update, - .alloc = ip_map_alloc, -}; - -static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr) +static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, + struct in6_addr *addr) { struct ip_map ip; struct cache_head *ch; strcpy(ip.m_class, class); ipv6_addr_copy(&ip.m_addr, addr); - ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, + ch = sunrpc_cache_lookup(cd, &ip.h, hash_str(class, IP_HASHBITS) ^ hash_ip6(*addr)); @@ -326,7 +312,17 @@ static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr) return NULL; } -static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) +static inline struct ip_map *ip_map_lookup(struct net *net, char *class, + struct in6_addr *addr) +{ + struct sunrpc_net *sn; + + sn = net_generic(net, sunrpc_net_id); + return __ip_map_lookup(sn->ip_map_cache, class, addr); +} + +static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, + struct unix_domain *udom, time_t expiry) { struct ip_map ip; struct cache_head *ch; @@ -344,17 +340,25 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex ip.m_add_change++; } ip.h.expiry_time = expiry; - ch = sunrpc_cache_update(&ip_map_cache, - &ip.h, &ipm->h, + ch = sunrpc_cache_update(cd, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ hash_ip6(ipm->m_addr)); if (!ch) return -ENOMEM; - cache_put(ch, &ip_map_cache); + cache_put(ch, cd); return 0; } -int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) +static inline int ip_map_update(struct net *net, struct ip_map *ipm, + struct unix_domain *udom, time_t expiry) +{ + struct sunrpc_net *sn; + + sn = net_generic(net, sunrpc_net_id); + return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry); +} + +int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom) { struct unix_domain *udom; struct ip_map *ipmp; @@ -362,10 +366,10 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) if (dom->flavour != &svcauth_unix) return -EINVAL; udom = container_of(dom, struct unix_domain, h); - ipmp = ip_map_lookup("nfsd", addr); + ipmp = ip_map_lookup(net, "nfsd", addr); if (ipmp) - return ip_map_update(ipmp, udom, NEVER); + return ip_map_update(net, ipmp, udom, NEVER); else return -ENOMEM; } @@ -383,16 +387,18 @@ int auth_unix_forget_old(struct auth_domain *dom) } EXPORT_SYMBOL_GPL(auth_unix_forget_old); -struct auth_domain *auth_unix_lookup(struct in6_addr *addr) +struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) { struct ip_map *ipm; struct auth_domain *rv; + struct sunrpc_net *sn; - ipm = ip_map_lookup("nfsd", addr); + sn = net_generic(net, sunrpc_net_id); + ipm = ip_map_lookup(net, "nfsd", addr); if (!ipm) return NULL; - if (cache_check(&ip_map_cache, &ipm->h, NULL)) + if (cache_check(sn->ip_map_cache, &ipm->h, NULL)) return NULL; if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { @@ -403,22 +409,29 @@ struct auth_domain *auth_unix_lookup(struct in6_addr *addr) rv = &ipm->m_client->h; kref_get(&rv->ref); } - cache_put(&ipm->h, &ip_map_cache); + cache_put(&ipm->h, sn->ip_map_cache); return rv; } EXPORT_SYMBOL_GPL(auth_unix_lookup); void svcauth_unix_purge(void) { - cache_purge(&ip_map_cache); + struct net *net; + + for_each_net(net) { + struct sunrpc_net *sn; + + sn = net_generic(net, sunrpc_net_id); + cache_purge(sn->ip_map_cache); + } } EXPORT_SYMBOL_GPL(svcauth_unix_purge); static inline struct ip_map * -ip_map_cached_get(struct svc_rqst *rqstp) +ip_map_cached_get(struct svc_xprt *xprt) { struct ip_map *ipm = NULL; - struct svc_xprt *xprt = rqstp->rq_xprt; + struct sunrpc_net *sn; if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { spin_lock(&xprt->xpt_lock); @@ -430,9 +443,10 @@ ip_map_cached_get(struct svc_rqst *rqstp) * remembered, e.g. by a second mount from the * same IP address. */ + sn = net_generic(xprt->xpt_net, sunrpc_net_id); xprt->xpt_auth_cache = NULL; spin_unlock(&xprt->xpt_lock); - cache_put(&ipm->h, &ip_map_cache); + cache_put(&ipm->h, sn->ip_map_cache); return NULL; } cache_get(&ipm->h); @@ -443,10 +457,8 @@ ip_map_cached_get(struct svc_rqst *rqstp) } static inline void -ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) +ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm) { - struct svc_xprt *xprt = rqstp->rq_xprt; - if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { spin_lock(&xprt->xpt_lock); if (xprt->xpt_auth_cache == NULL) { @@ -456,15 +468,26 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) } spin_unlock(&xprt->xpt_lock); } - if (ipm) - cache_put(&ipm->h, &ip_map_cache); + if (ipm) { + struct sunrpc_net *sn; + + sn = net_generic(xprt->xpt_net, sunrpc_net_id); + cache_put(&ipm->h, sn->ip_map_cache); + } } void -svcauth_unix_info_release(void *info) +svcauth_unix_info_release(struct svc_xprt *xpt) { - struct ip_map *ipm = info; - cache_put(&ipm->h, &ip_map_cache); + struct ip_map *ipm; + + ipm = xpt->xpt_auth_cache; + if (ipm != NULL) { + struct sunrpc_net *sn; + + sn = net_generic(xpt->xpt_net, sunrpc_net_id); + cache_put(&ipm->h, sn->ip_map_cache); + } } /**************************************************************************** @@ -674,6 +697,8 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp) switch (ret) { case -ENOENT: return ERR_PTR(-ENOENT); + case -ETIMEDOUT: + return ERR_PTR(-ESHUTDOWN); case 0: gi = get_group_info(ug->gi); cache_put(&ug->h, &unix_gid_cache); @@ -691,6 +716,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) struct ip_map *ipm; struct group_info *gi; struct svc_cred *cred = &rqstp->rq_cred; + struct svc_xprt *xprt = rqstp->rq_xprt; + struct net *net = xprt->xpt_net; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); switch (rqstp->rq_addr.ss_family) { case AF_INET: @@ -709,26 +737,27 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) if (rqstp->rq_proc == 0) return SVC_OK; - ipm = ip_map_cached_get(rqstp); + ipm = ip_map_cached_get(xprt); if (ipm == NULL) - ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, + ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class, &sin6->sin6_addr); if (ipm == NULL) return SVC_DENIED; - switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { + switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { default: BUG(); - case -EAGAIN: case -ETIMEDOUT: + return SVC_CLOSE; + case -EAGAIN: return SVC_DROP; case -ENOENT: return SVC_DENIED; case 0: rqstp->rq_client = &ipm->m_client->h; kref_get(&rqstp->rq_client->ref); - ip_map_cached_put(rqstp, ipm); + ip_map_cached_put(xprt, ipm); break; } @@ -736,6 +765,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) switch (PTR_ERR(gi)) { case -EAGAIN: return SVC_DROP; + case -ESHUTDOWN: + return SVC_CLOSE; case -ENOENT: break; default: @@ -776,7 +807,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) cred->cr_gid = (gid_t) -1; cred->cr_group_info = groups_alloc(0); if (cred->cr_group_info == NULL) - return SVC_DROP; /* kmalloc failure - client must retry */ + return SVC_CLOSE; /* kmalloc failure - client must retry */ /* Put NULL verifier */ svc_putnl(resv, RPC_AUTH_NULL); @@ -840,7 +871,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) goto badcred; cred->cr_group_info = groups_alloc(slen); if (cred->cr_group_info == NULL) - return SVC_DROP; + return SVC_CLOSE; for (i = 0; i < slen; i++) GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { @@ -886,3 +917,56 @@ struct auth_ops svcauth_unix = { .set_client = svcauth_unix_set_client, }; +int ip_map_cache_create(struct net *net) +{ + int err = -ENOMEM; + struct cache_detail *cd; + struct cache_head **tbl; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); + if (cd == NULL) + goto err_cd; + + tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL); + if (tbl == NULL) + goto err_tbl; + + cd->owner = THIS_MODULE, + cd->hash_size = IP_HASHMAX, + cd->hash_table = tbl, + cd->name = "auth.unix.ip", + cd->cache_put = ip_map_put, + cd->cache_upcall = ip_map_upcall, + cd->cache_parse = ip_map_parse, + cd->cache_show = ip_map_show, + cd->match = ip_map_match, + cd->init = ip_map_init, + cd->update = update, + cd->alloc = ip_map_alloc, + + err = cache_register_net(cd, net); + if (err) + goto err_reg; + + sn->ip_map_cache = cd; + return 0; + +err_reg: + kfree(tbl); +err_tbl: + kfree(cd); +err_cd: + return err; +} + +void ip_map_cache_destroy(struct net *net) +{ + struct sunrpc_net *sn; + + sn = net_generic(net, sunrpc_net_id); + cache_purge(sn->ip_map_cache); + cache_unregister_net(sn->ip_map_cache, net); + kfree(sn->ip_map_cache->hash_table); + kfree(sn->ip_map_cache); +} diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7e534dd..07919e1 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -64,7 +64,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *); static void svc_sock_free(struct svc_xprt *); static struct svc_xprt *svc_create_socket(struct svc_serv *, int, - struct sockaddr *, int, int); + struct net *, struct sockaddr *, + int, int); #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key svc_key[2]; static struct lock_class_key svc_slock_key[2]; @@ -657,10 +658,11 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) } static struct svc_xprt *svc_udp_create(struct svc_serv *serv, + struct net *net, struct sockaddr *sa, int salen, int flags) { - return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); + return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags); } static struct svc_xprt_ops svc_udp_ops = { @@ -1133,9 +1135,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) reclen = htonl(0x80000000|((xbufp->len ) - 4)); memcpy(xbufp->head[0].iov_base, &reclen, 4); - if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags)) - return -ENOTCONN; - sent = svc_sendto(rqstp, &rqstp->rq_res); if (sent != xbufp->len) { printk(KERN_NOTICE @@ -1178,10 +1177,11 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) } static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, + struct net *net, struct sockaddr *sa, int salen, int flags) { - return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); + return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); } static struct svc_xprt_ops svc_tcp_ops = { @@ -1258,19 +1258,13 @@ void svc_sock_update_bufs(struct svc_serv *serv) * The number of server threads has changed. Update * rcvbuf and sndbuf accordingly on all sockets */ - struct list_head *le; + struct svc_sock *svsk; spin_lock_bh(&serv->sv_lock); - list_for_each(le, &serv->sv_permsocks) { - struct svc_sock *svsk = - list_entry(le, struct svc_sock, sk_xprt.xpt_list); + list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); - } - list_for_each(le, &serv->sv_tempsocks) { - struct svc_sock *svsk = - list_entry(le, struct svc_sock, sk_xprt.xpt_list); + list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list) set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); - } spin_unlock_bh(&serv->sv_lock); } EXPORT_SYMBOL_GPL(svc_sock_update_bufs); @@ -1385,6 +1379,7 @@ EXPORT_SYMBOL_GPL(svc_addsock); */ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, int protocol, + struct net *net, struct sockaddr *sin, int len, int flags) { @@ -1421,7 +1416,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, return ERR_PTR(-EINVAL); } - error = sock_create_kern(family, type, protocol, &sock); + error = __sock_create(net, family, type, protocol, &sock, 1); if (error < 0) return ERR_PTR(error); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 970fb00..4c8f18a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -199,8 +199,6 @@ int xprt_reserve_xprt(struct rpc_task *task) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) return 1; - if (task == NULL) - return 0; goto out_sleep; } xprt->snd_task = task; @@ -757,13 +755,11 @@ static void xprt_connect_status(struct rpc_task *task) */ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) { - struct list_head *pos; + struct rpc_rqst *entry; - list_for_each(pos, &xprt->recv) { - struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list); + list_for_each_entry(entry, &xprt->recv, rq_list) if (entry->rq_xid == xid) return entry; - } dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", ntohl(xid)); @@ -962,6 +958,37 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) spin_unlock(&xprt->reserve_lock); } +struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) +{ + struct rpc_xprt *xprt; + + xprt = kzalloc(size, GFP_KERNEL); + if (xprt == NULL) + goto out; + + xprt->max_reqs = max_req; + xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); + if (xprt->slot == NULL) + goto out_free; + + xprt->xprt_net = get_net(net); + return xprt; + +out_free: + kfree(xprt); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(xprt_alloc); + +void xprt_free(struct rpc_xprt *xprt) +{ + put_net(xprt->xprt_net); + kfree(xprt->slot); + kfree(xprt); +} +EXPORT_SYMBOL_GPL(xprt_free); + /** * xprt_reserve - allocate an RPC request slot * @task: RPC task requesting a slot allocation diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index d718b8f..09af4fa 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -43,6 +43,7 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/sysctl.h> +#include <linux/workqueue.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/svc_rdma.h> @@ -74,6 +75,8 @@ atomic_t rdma_stat_sq_prod; struct kmem_cache *svc_rdma_map_cachep; struct kmem_cache *svc_rdma_ctxt_cachep; +struct workqueue_struct *svc_rdma_wq; + /* * This function implements reading and resetting an atomic_t stat * variable through read/write to a proc file. Any write to the file @@ -231,7 +234,7 @@ static ctl_table svcrdma_root_table[] = { void svc_rdma_cleanup(void) { dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); - flush_scheduled_work(); + destroy_workqueue(svc_rdma_wq); if (svcrdma_table_header) { unregister_sysctl_table(svcrdma_table_header); svcrdma_table_header = NULL; @@ -249,6 +252,11 @@ int svc_rdma_init(void) dprintk("\tsq_depth : %d\n", svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); + + svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0); + if (!svc_rdma_wq) + return -ENOMEM; + if (!svcrdma_table_header) svcrdma_table_header = register_sysctl_table(svcrdma_root_table); @@ -283,6 +291,7 @@ int svc_rdma_init(void) kmem_cache_destroy(svc_rdma_map_cachep); err0: unregister_sysctl_table(svcrdma_table_header); + destroy_workqueue(svc_rdma_wq); return -ENOMEM; } MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 0194de8..df67211 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; for (page_no = 0; page_no < frmr->page_list_len; page_no++) { frmr->page_list->page_list[page_no] = - ib_dma_map_single(xprt->sc_cm_id->device, - page_address(rqstp->rq_arg.pages[page_no]), - PAGE_SIZE, DMA_FROM_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, + rqstp->rq_arg.pages[page_no], 0, + PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; @@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, int count) { int i; + unsigned long off; ctxt->count = count; ctxt->direction = DMA_FROM_DEVICE; for (i = 0; i < count; i++) { ctxt->sge[i].length = 0; /* in case map fails */ if (!frmr) { + BUG_ON(0 == virt_to_page(vec[i].iov_base)); + off = (unsigned long)vec[i].iov_base & ~PAGE_MASK; ctxt->sge[i].addr = - ib_dma_map_single(xprt->sc_cm_id->device, - vec[i].iov_base, - vec[i].iov_len, - DMA_FROM_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, + virt_to_page(vec[i].iov_base), + off, + vec[i].iov_len, + DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[i].addr)) return -EINVAL; @@ -491,6 +495,7 @@ next_sge: printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); goto out; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index b15e1eb..249a835 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -70,8 +70,8 @@ * on extra page for the RPCRMDA header. */ static int fast_reg_xdr(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct svc_rdma_req_map *vec) + struct xdr_buf *xdr, + struct svc_rdma_req_map *vec) { int sge_no; u32 sge_bytes; @@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, vec->count = 2; sge_no++; - /* Build the FRMR */ + /* Map the XDR head */ frmr->kva = frva; frmr->direction = DMA_TO_DEVICE; frmr->access_flags = 0; frmr->map_len = PAGE_SIZE; frmr->page_list_len = 1; + page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK; frmr->page_list->page_list[page_no] = - ib_dma_map_single(xprt->sc_cm_id->device, - (void *)xdr->head[0].iov_base, - PAGE_SIZE, DMA_TO_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, + virt_to_page(xdr->head[0].iov_base), + page_off, + PAGE_SIZE - page_off, + DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; atomic_inc(&xprt->sc_dma_used); + /* Map the XDR page list */ page_off = xdr->page_base; page_bytes = xdr->page_len + page_off; if (!page_bytes) @@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, page_bytes -= sge_bytes; frmr->page_list->page_list[page_no] = - ib_dma_map_single(xprt->sc_cm_id->device, - page_address(page), - PAGE_SIZE, DMA_TO_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, + page, page_off, + sge_bytes, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; @@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; frmr->page_list->page_list[page_no] = - ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, - DMA_TO_DEVICE); + ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va), + page_off, + PAGE_SIZE, + DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; @@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt, return 0; } +static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, + struct xdr_buf *xdr, + u32 xdr_off, size_t len, int dir) +{ + struct page *page; + dma_addr_t dma_addr; + if (xdr_off < xdr->head[0].iov_len) { + /* This offset is in the head */ + xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK; + page = virt_to_page(xdr->head[0].iov_base); + } else { + xdr_off -= xdr->head[0].iov_len; + if (xdr_off < xdr->page_len) { + /* This offset is in the page list */ + page = xdr->pages[xdr_off >> PAGE_SHIFT]; + xdr_off &= ~PAGE_MASK; + } else { + /* This offset is in the tail */ + xdr_off -= xdr->page_len; + xdr_off += (unsigned long) + xdr->tail[0].iov_base & ~PAGE_MASK; + page = virt_to_page(xdr->tail[0].iov_base); + } + } + dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off, + min_t(size_t, PAGE_SIZE, len), dir); + return dma_addr; +} + /* Assumptions: * - We are using FRMR * - or - @@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge[sge_no].length = sge_bytes; if (!vec->frmr) { sge[sge_no].addr = - ib_dma_map_single(xprt->sc_cm_id->device, - (void *) - vec->sge[xdr_sge_no].iov_base + sge_off, - sge_bytes, DMA_TO_DEVICE); + dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge[sge_no].addr)) goto err; @@ -333,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, goto err; return 0; err: + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_frmr(xprt, vec->frmr); svc_rdma_put_context(ctxt, 0); /* Fatal error, close transport */ return -EIO; @@ -494,7 +530,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, * In all three cases, this function prepares the RPCRDMA header in * sge[0], the 'type' parameter indicates the type to place in the * RPCRDMA header, and the 'byte_count' field indicates how much of - * the XDR to include in this RDMA_SEND. + * the XDR to include in this RDMA_SEND. NB: The offset of the payload + * to send is zero in the XDR. */ static int send_reply(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, @@ -536,23 +573,24 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[0].lkey = rdma->sc_dma_lkey; ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); ctxt->sge[0].addr = - ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), - ctxt->sge[0].length, DMA_TO_DEVICE); + ib_dma_map_page(rdma->sc_cm_id->device, page, 0, + ctxt->sge[0].length, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; atomic_inc(&rdma->sc_dma_used); ctxt->direction = DMA_TO_DEVICE; - /* Determine how many of our SGE are to be transmitted */ + /* Map the payload indicated by 'byte_count' */ for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { + int xdr_off = 0; sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; if (!vec->frmr) { ctxt->sge[sge_no].addr = - ib_dma_map_single(rdma->sc_cm_id->device, - vec->sge[sge_no].iov_base, - sge_bytes, DMA_TO_DEVICE); + dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[sge_no].addr)) goto err; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index edea15a..9df1ead 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -45,6 +45,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/workqueue.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include <linux/sunrpc/svc_rdma.h> @@ -52,6 +53,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, + struct net *net, struct sockaddr *sa, int salen, int flags); static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); @@ -89,6 +91,9 @@ struct svc_xprt_class svc_rdma_class = { /* WR context cache. Created in svc_rdma.c */ extern struct kmem_cache *svc_rdma_ctxt_cachep; +/* Workqueue created in svc_rdma.c */ +extern struct workqueue_struct *svc_rdma_wq; + struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt; @@ -120,7 +125,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) */ if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_single(xprt->sc_cm_id->device, + ib_dma_unmap_page(xprt->sc_cm_id->device, ctxt->sge[i].addr, ctxt->sge[i].length, ctxt->direction); @@ -502,8 +507,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; - pa = ib_dma_map_single(xprt->sc_cm_id->device, - page_address(page), PAGE_SIZE, + pa = ib_dma_map_page(xprt->sc_cm_id->device, + page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; @@ -511,9 +516,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; + ctxt->count = sge_no + 1; buflen += PAGE_SIZE; } - ctxt->count = sge_no; recv_wr.next = NULL; recv_wr.sg_list = &ctxt->sge[0]; recv_wr.num_sge = ctxt->count; @@ -529,6 +534,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) return ret; err_put_ctxt: + svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); return -ENOMEM; } @@ -670,6 +676,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, * Create a listening RDMA service endpoint. */ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, + struct net *net, struct sockaddr *sa, int salen, int flags) { @@ -798,8 +805,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt, if (ib_dma_mapping_error(frmr->mr->device, addr)) continue; atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, - frmr->direction); + ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE, + frmr->direction); } } @@ -1184,7 +1191,7 @@ static void svc_rdma_free(struct svc_xprt *xprt) struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); INIT_WORK(&rdma->sc_work, __svc_rdma_free); - schedule_work(&rdma->sc_work); + queue_work(svc_rdma_wq, &rdma->sc_work); } static int svc_rdma_has_wspace(struct svc_xprt *xprt) @@ -1274,7 +1281,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) atomic_read(&xprt->sc_sq_count) < xprt->sc_sq_depth); if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) - return 0; + return -ENOTCONN; continue; } /* Take a transport ref for each WR posted */ @@ -1306,7 +1313,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, enum rpcrdma_errcode err) { struct ib_send_wr err_wr; - struct ib_sge sge; struct page *p; struct svc_rdma_op_ctxt *ctxt; u32 *va; @@ -1319,26 +1325,27 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, /* XDR encode error */ length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); + ctxt = svc_rdma_get_context(xprt); + ctxt->direction = DMA_FROM_DEVICE; + ctxt->count = 1; + ctxt->pages[0] = p; + /* Prepare SGE for local address */ - sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, - page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { + ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device, + p, 0, length, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { put_page(p); return; } atomic_inc(&xprt->sc_dma_used); - sge.lkey = xprt->sc_dma_lkey; - sge.length = length; - - ctxt = svc_rdma_get_context(xprt); - ctxt->count = 1; - ctxt->pages[0] = p; + ctxt->sge[0].lkey = xprt->sc_dma_lkey; + ctxt->sge[0].length = length; /* Prepare SEND WR */ memset(&err_wr, 0, sizeof err_wr); ctxt->wr_op = IB_WR_SEND; err_wr.wr_id = (unsigned long)ctxt; - err_wr.sg_list = &sge; + err_wr.sg_list = ctxt->sge; err_wr.num_sge = 1; err_wr.opcode = IB_WR_SEND; err_wr.send_flags = IB_SEND_SIGNALED; @@ -1348,9 +1355,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, if (ret) { dprintk("svcrdma: Error %d posting send for protocol error\n", ret); - ib_dma_unmap_single(xprt->sc_cm_id->device, - sge.addr, PAGE_SIZE, - DMA_FROM_DEVICE); + svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); } } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a85e866..0867070 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -237,8 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) dprintk("RPC: %s: called\n", __func__); - cancel_delayed_work(&r_xprt->rdma_connect); - flush_scheduled_work(); + cancel_delayed_work_sync(&r_xprt->rdma_connect); xprt_clear_connected(xprt); @@ -251,9 +250,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) xprt_rdma_free_addresses(xprt); - kfree(xprt->slot); - xprt->slot = NULL; - kfree(xprt); + xprt_free(xprt); dprintk("RPC: %s: returning\n", __func__); @@ -285,23 +282,14 @@ xprt_setup_rdma(struct xprt_create *args) return ERR_PTR(-EBADF); } - xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), + xprt_rdma_slot_table_entries); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", __func__); return ERR_PTR(-ENOMEM); } - xprt->max_reqs = xprt_rdma_slot_table_entries; - xprt->slot = kcalloc(xprt->max_reqs, - sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) { - dprintk("RPC: %s: couldn't allocate %d slots\n", - __func__, xprt->max_reqs); - kfree(xprt); - return ERR_PTR(-ENOMEM); - } - /* 60 second timeout, no retries */ xprt->timeout = &xprt_rdma_default_timeout; xprt->bind_timeout = (60U * HZ); @@ -410,8 +398,7 @@ out3: out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); return ERR_PTR(rc); } @@ -460,7 +447,7 @@ xprt_rdma_connect(struct rpc_task *task) } else { schedule_delayed_work(&r_xprt->rdma_connect, 0); if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); + flush_delayed_work(&r_xprt->rdma_connect); } } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index fe9306b..dfcab5a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -774,8 +774,7 @@ static void xs_destroy(struct rpc_xprt *xprt) xs_close(xprt); xs_free_peer_addresses(xprt); - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); module_put(THIS_MODULE); } @@ -1516,7 +1515,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) xs_update_peer_port(xprt); } -static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) +static unsigned short xs_get_srcport(struct sock_xprt *transport) { unsigned short port = transport->srcport; @@ -1525,7 +1524,7 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket return port; } -static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) +static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) { if (transport->srcport != 0) transport->srcport = 0; @@ -1535,23 +1534,18 @@ static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket return xprt_max_resvport; return --port; } - -static int xs_bind4(struct sock_xprt *transport, struct socket *sock) +static int xs_bind(struct sock_xprt *transport, struct socket *sock) { - struct sockaddr_in myaddr = { - .sin_family = AF_INET, - }; - struct sockaddr_in *sa; + struct sockaddr_storage myaddr; int err, nloop = 0; - unsigned short port = xs_get_srcport(transport, sock); + unsigned short port = xs_get_srcport(transport); unsigned short last; - sa = (struct sockaddr_in *)&transport->srcaddr; - myaddr.sin_addr = sa->sin_addr; + memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); do { - myaddr.sin_port = htons(port); - err = kernel_bind(sock, (struct sockaddr *) &myaddr, - sizeof(myaddr)); + rpc_set_port((struct sockaddr *)&myaddr, port); + err = kernel_bind(sock, (struct sockaddr *)&myaddr, + transport->xprt.addrlen); if (port == 0) break; if (err == 0) { @@ -1559,48 +1553,23 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) break; } last = port; - port = xs_next_srcport(transport, sock, port); + port = xs_next_srcport(transport, port); if (port > last) nloop++; } while (err == -EADDRINUSE && nloop != 2); - dprintk("RPC: %s %pI4:%u: %s (%d)\n", - __func__, &myaddr.sin_addr, - port, err ? "failed" : "ok", err); - return err; -} - -static int xs_bind6(struct sock_xprt *transport, struct socket *sock) -{ - struct sockaddr_in6 myaddr = { - .sin6_family = AF_INET6, - }; - struct sockaddr_in6 *sa; - int err, nloop = 0; - unsigned short port = xs_get_srcport(transport, sock); - unsigned short last; - sa = (struct sockaddr_in6 *)&transport->srcaddr; - myaddr.sin6_addr = sa->sin6_addr; - do { - myaddr.sin6_port = htons(port); - err = kernel_bind(sock, (struct sockaddr *) &myaddr, - sizeof(myaddr)); - if (port == 0) - break; - if (err == 0) { - transport->srcport = port; - break; - } - last = port; - port = xs_next_srcport(transport, sock, port); - if (port > last) - nloop++; - } while (err == -EADDRINUSE && nloop != 2); - dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n", - &myaddr.sin6_addr, port, err ? "failed" : "ok", err); + if (myaddr.ss_family == AF_INET) + dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__, + &((struct sockaddr_in *)&myaddr)->sin_addr, + port, err ? "failed" : "ok", err); + else + dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__, + &((struct sockaddr_in6 *)&myaddr)->sin6_addr, + port, err ? "failed" : "ok", err); return err; } + #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; @@ -1622,6 +1591,18 @@ static inline void xs_reclassify_socket6(struct socket *sock) sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); } + +static inline void xs_reclassify_socket(int family, struct socket *sock) +{ + switch (family) { + case AF_INET: + xs_reclassify_socket4(sock); + break; + case AF_INET6: + xs_reclassify_socket6(sock); + break; + } +} #else static inline void xs_reclassify_socket4(struct socket *sock) { @@ -1630,8 +1611,36 @@ static inline void xs_reclassify_socket4(struct socket *sock) static inline void xs_reclassify_socket6(struct socket *sock) { } + +static inline void xs_reclassify_socket(int family, struct socket *sock) +{ +} #endif +static struct socket *xs_create_sock(struct rpc_xprt *xprt, + struct sock_xprt *transport, int family, int type, int protocol) +{ + struct socket *sock; + int err; + + err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1); + if (err < 0) { + dprintk("RPC: can't create %d transport socket (%d).\n", + protocol, -err); + goto out; + } + xs_reclassify_socket(family, sock); + + if (xs_bind(transport, sock)) { + sock_release(sock); + goto out; + } + + return sock; +out: + return ERR_PTR(err); +} + static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1661,82 +1670,23 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_udp_do_set_buffer_size(xprt); } -/** - * xs_udp_connect_worker4 - set up a UDP socket - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ -static void xs_udp_connect_worker4(struct work_struct *work) +static void xs_udp_setup_socket(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; struct socket *sock = transport->sock; - int err, status = -EIO; + int status = -EIO; if (xprt->shutdown) goto out; /* Start by resetting any existing state */ xs_reset_transport(transport); - - err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); - if (err < 0) { - dprintk("RPC: can't create UDP transport socket (%d).\n", -err); + sock = xs_create_sock(xprt, transport, + xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP); + if (IS_ERR(sock)) goto out; - } - xs_reclassify_socket4(sock); - - if (xs_bind4(transport, sock)) { - sock_release(sock); - goto out; - } - - dprintk("RPC: worker connecting xprt %p via %s to " - "%s (port %s)\n", xprt, - xprt->address_strings[RPC_DISPLAY_PROTO], - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT]); - - xs_udp_finish_connecting(xprt, sock); - status = 0; -out: - xprt_clear_connecting(xprt); - xprt_wake_pending_tasks(xprt, status); -} - -/** - * xs_udp_connect_worker6 - set up a UDP socket - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ -static void xs_udp_connect_worker6(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; - int err, status = -EIO; - - if (xprt->shutdown) - goto out; - - /* Start by resetting any existing state */ - xs_reset_transport(transport); - - err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock); - if (err < 0) { - dprintk("RPC: can't create UDP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket6(sock); - - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out; - } dprintk("RPC: worker connecting xprt %p via %s to " "%s (port %s)\n", xprt, @@ -1755,12 +1705,12 @@ out: * We need to preserve the port number so the reply cache on the server can * find our cached RPC replies when we get around to reconnecting. */ -static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) +static void xs_abort_connection(struct sock_xprt *transport) { int result; struct sockaddr any; - dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); + dprintk("RPC: disconnecting xprt %p to reuse port\n", transport); /* * Disconnect the transport socket by doing a connect operation @@ -1770,13 +1720,13 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo any.sa_family = AF_UNSPEC; result = kernel_connect(transport->sock, &any, sizeof(any), 0); if (!result) - xs_sock_mark_closed(xprt); + xs_sock_mark_closed(&transport->xprt); else dprintk("RPC: AF_UNSPEC connect return code %d\n", result); } -static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) +static void xs_tcp_reuse_connection(struct sock_xprt *transport) { unsigned int state = transport->inet->sk_state; @@ -1799,7 +1749,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra "sk_shutdown set to %d\n", __func__, transport->inet->sk_shutdown); } - xs_abort_connection(xprt, transport); + xs_abort_connection(transport); } static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -1852,12 +1802,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) * * Invoked by a work queue tasklet. */ -static void xs_tcp_setup_socket(struct rpc_xprt *xprt, - struct sock_xprt *transport, - struct socket *(*create_sock)(struct rpc_xprt *, - struct sock_xprt *)) +static void xs_tcp_setup_socket(struct work_struct *work) { + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); struct socket *sock = transport->sock; + struct rpc_xprt *xprt = &transport->xprt; int status = -EIO; if (xprt->shutdown) @@ -1865,7 +1815,8 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - sock = create_sock(xprt, transport); + sock = xs_create_sock(xprt, transport, + xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP); if (IS_ERR(sock)) { status = PTR_ERR(sock); goto out; @@ -1876,7 +1827,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt, transport); + xs_tcp_reuse_connection(transport); if (abort_and_exit) goto out_eagain; @@ -1925,84 +1876,6 @@ out: xprt_wake_pending_tasks(xprt, status); } -static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt, - struct sock_xprt *transport) -{ - struct socket *sock; - int err; - - /* start from scratch */ - err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); - if (err < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", - -err); - goto out_err; - } - xs_reclassify_socket4(sock); - - if (xs_bind4(transport, sock) < 0) { - sock_release(sock); - goto out_err; - } - return sock; -out_err: - return ERR_PTR(-EIO); -} - -/** - * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ -static void xs_tcp_connect_worker4(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; - - xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4); -} - -static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, - struct sock_xprt *transport) -{ - struct socket *sock; - int err; - - /* start from scratch */ - err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock); - if (err < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", - -err); - goto out_err; - } - xs_reclassify_socket6(sock); - - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out_err; - } - return sock; -out_err: - return ERR_PTR(-EIO); -} - -/** - * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ -static void xs_tcp_connect_worker6(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; - - xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6); -} - /** * xs_connect - connect a socket to a remote endpoint * @task: address of RPC task that manages state of connect request @@ -2262,6 +2135,31 @@ static struct rpc_xprt_ops bc_tcp_ops = { .print_stats = xs_tcp_print_stats, }; +static int xs_init_anyaddr(const int family, struct sockaddr *sap) +{ + static const struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + }; + static const struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + }; + + switch (family) { + case AF_INET: + memcpy(sap, &sin, sizeof(sin)); + break; + case AF_INET6: + memcpy(sap, &sin6, sizeof(sin6)); + break; + default: + dprintk("RPC: %s: Bad address family\n", __func__); + return -EAFNOSUPPORT; + } + return 0; +} + static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, unsigned int slot_table_size) { @@ -2273,27 +2171,25 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return ERR_PTR(-EBADF); } - new = kzalloc(sizeof(*new), GFP_KERNEL); - if (new == NULL) { + xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size); + if (xprt == NULL) { dprintk("RPC: xs_setup_xprt: couldn't allocate " "rpc_xprt\n"); return ERR_PTR(-ENOMEM); } - xprt = &new->xprt; - - xprt->max_reqs = slot_table_size; - xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) { - kfree(xprt); - dprintk("RPC: xs_setup_xprt: couldn't allocate slot " - "table\n"); - return ERR_PTR(-ENOMEM); - } + new = container_of(xprt, struct sock_xprt, xprt); memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; if (args->srcaddr) memcpy(&new->srcaddr, args->srcaddr, args->addrlen); + else { + int err; + err = xs_init_anyaddr(args->dstaddr->sa_family, + (struct sockaddr *)&new->srcaddr); + if (err != 0) + return ERR_PTR(err); + } return xprt; } @@ -2341,7 +2237,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, - xs_udp_connect_worker4); + xs_udp_setup_socket); xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); break; case AF_INET6: @@ -2349,7 +2245,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, - xs_udp_connect_worker6); + xs_udp_setup_socket); xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: @@ -2371,8 +2267,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) return xprt; ret = ERR_PTR(-EINVAL); out_err: - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); return ret; } @@ -2416,7 +2311,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, - xs_tcp_connect_worker4); + xs_tcp_setup_socket); xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); break; case AF_INET6: @@ -2424,7 +2319,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, - xs_tcp_connect_worker6); + xs_tcp_setup_socket); xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: @@ -2447,8 +2342,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) return xprt; ret = ERR_PTR(-EINVAL); out_err: - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); return ret; } @@ -2507,15 +2401,10 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) goto out_err; } - if (xprt_bound(xprt)) - dprintk("RPC: set up xprt to %s (port %s) via %s\n", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT], - xprt->address_strings[RPC_DISPLAY_PROTO]); - else - dprintk("RPC: set up xprt to %s (autobind) via %s\n", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PROTO]); + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); /* * Since we don't want connections for the backchannel, we set @@ -2528,8 +2417,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) return xprt; ret = ERR_PTR(-EINVAL); out_err: - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); return ret; } |