diff options
70 files changed, 4909 insertions, 1035 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index fe23269..66bd97a 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -50,6 +50,13 @@ specify the GFP_ flags (see kmalloc) for the allocation (the implementation may choose to ignore flags that affect the location of the returned memory, like GFP_DMA). +void * +dma_zalloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) + +Wraps dma_alloc_coherent() and also zeroes the returned memory if the +allocation attempt succeeded. + void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 06eb6d9..cc0ebc5 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -418,7 +418,6 @@ total_unevictable - sum of all children's "unevictable" # The following additional stats are dependent on CONFIG_DEBUG_VM. -inactive_ratio - VM internal parameter. (see mm/page_alloc.c) recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) recent_rotated_file - VM internal parameter. (see mm/vmscan.c) recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 7c799fc..3d84912 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -133,41 +133,6 @@ Who: Pavel Machek <pavel@ucw.cz> --------------------------- -What: sys_sysctl -When: September 2010 -Option: CONFIG_SYSCTL_SYSCALL -Why: The same information is available in a more convenient from - /proc/sys, and none of the sysctl variables appear to be - important performance wise. - - Binary sysctls are a long standing source of subtle kernel - bugs and security issues. - - When I looked several months ago all I could find after - searching several distributions were 5 user space programs and - glibc (which falls back to /proc/sys) using this syscall. - - The man page for sysctl(2) documents it as unusable for user - space programs. - - sysctl(2) is not generally ABI compatible to a 32bit user - space application on a 64bit and a 32bit kernel. - - For the last several months the policy has been no new binary - sysctls and no one has put forward an argument to use them. - - Binary sysctls issues seem to keep happening appearing so - properly deprecating them (with a warning to user space) and a - 2 year grace warning period will mean eventually we can kill - them and end the pain. - - In the mean time individual binary sysctls can be dealt with - in a piecewise fashion. - -Who: Eric Biederman <ebiederm@xmission.com> - ---------------------------- - What: /proc/<pid>/oom_adj When: August 2012 Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's diff --git a/Documentation/rapidio/rapidio.txt b/Documentation/rapidio/rapidio.txt index be70ee1..c75694b 100644 --- a/Documentation/rapidio/rapidio.txt +++ b/Documentation/rapidio/rapidio.txt @@ -144,7 +144,7 @@ and the default device ID in order to access the device on the active port. After the host has completed enumeration of the entire network it releases devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint -in the system, it sets the Master Enable bit in the Port General Control CSR +in the system, it sets the Discovered bit in the Port General Control CSR to indicate that enumeration is completed and agents are allowed to execute passive discovery of the network. diff --git a/Documentation/rapidio/tsi721.txt b/Documentation/rapidio/tsi721.txt new file mode 100644 index 0000000..335f3c6 --- /dev/null +++ b/Documentation/rapidio/tsi721.txt @@ -0,0 +1,49 @@ +RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge. +========================================================================= + +I. Overview + +This driver implements all currently defined RapidIO mport callback functions. +It supports maintenance read and write operations, inbound and outbound RapidIO +doorbells, inbound maintenance port-writes and RapidIO messaging. + +To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA +channels. This mechanism provides access to larger range of hop counts and +destination IDs without need for changes in outbound window translation. + +RapidIO messaging support uses dedicated messaging channels for each mailbox. +For inbound messages this driver uses destination ID matching to forward messages +into the corresponding message queue. Messaging callbacks are implemented to be +fully compatible with RIONET driver (Ethernet over RapidIO messaging services). + +II. Known problems + + None. + +III. To do + + Add DMA data transfers (non-messaging). + Add inbound region (SRIO-to-PCIe) mapping. + +IV. Version History + + 1.0.0 - Initial driver release. + +V. License +----------------------------------------------- + + Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index fec1320..d7efdbf 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -16,16 +16,6 @@ #ifdef __HAVE_ARCH_PTE_SPECIAL -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(atomic_read(&page->_count) < 0); - atomic_inc(&page->_count); -} - /* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much @@ -57,8 +47,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, put_page(page); return 0; } - if (PageTail(page)) - get_huge_page_tail(page); pages[*nr] = page; (*nr)++; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0b9a5c1..da5eb38 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -390,7 +390,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add { unsigned long mask; unsigned long pte_end; - struct page *head, *page; + struct page *head, *page, *tail; pte_t pte; int refs; @@ -413,6 +413,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add head = pte_page(pte); page = head + ((addr & (sz-1)) >> PAGE_SHIFT); + tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -428,10 +429,20 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add if (unlikely(pte_val(pte) != pte_val(*ptep))) { /* Could be optimized better */ - while (*nr) { - put_page(page); - (*nr)--; - } + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; } return 1; diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index c65f75a..22ffccd 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -1608,6 +1608,7 @@ int fsl_rio_setup(struct platform_device *dev) return 0; err: iounmap(priv->regs_win); + release_resource(&port->iores); err_res: kfree(priv); err_priv: diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 45b405c..65cb06e 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -52,7 +52,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; - struct page *head, *page; + struct page *head, *page, *tail; int refs; result = write ? 0 : _SEGMENT_ENTRY_RO; @@ -64,6 +64,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -81,6 +82,17 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, *nr -= refs; while (refs--) put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; } return 1; diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index a986b5d..42c55df 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -56,6 +56,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, put_page(head); return 0; } + if (head != page) + get_huge_page_tail(page); pages[*nr] = page; (*nr)++; diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index dbe34b9..ea30585 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -108,16 +108,6 @@ static inline void get_head_page_multiple(struct page *page, int nr) SetPageReferenced(page); } -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(atomic_read(&page->_count) < 0); - atomic_inc(&page->_count); -} - static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 053d36c..cd41d40 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -151,7 +151,7 @@ MODULE_LICENSE("GPL"); struct vmballoon_stats { unsigned int timer; - /* allocation statustics */ + /* allocation statistics */ unsigned int alloc; unsigned int alloc_fail; unsigned int sleep_alloc; @@ -412,6 +412,7 @@ static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep) gfp_t flags; unsigned int hv_status; bool locked = false; + flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP; do { if (!can_sleep) @@ -419,7 +420,6 @@ static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep) else STATS_INC(b->stats.sleep_alloc); - flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP; page = alloc_page(flags); if (!page) { if (!can_sleep) diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 3bb1311..7145714 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -88,8 +88,8 @@ static struct rio_dev **rionet_active; #define dev_rionet_capable(dev) \ is_rionet_capable(dev->src_ops, dev->dst_ops) -#define RIONET_MAC_MATCH(x) (*(u32 *)x == 0x00010001) -#define RIONET_GET_DESTID(x) (*(u16 *)(x + 4)) +#define RIONET_MAC_MATCH(x) (!memcmp((x), "\00\01\00\01", 4)) +#define RIONET_GET_DESTID(x) ((*((u8 *)x + 4) << 8) | *((u8 *)x + 5)) static int rionet_rx_clean(struct net_device *ndev) { diff --git a/drivers/power/ds2780_battery.c b/drivers/power/ds2780_battery.c index 1fefe82..91a783d 100644 --- a/drivers/power/ds2780_battery.c +++ b/drivers/power/ds2780_battery.c @@ -39,6 +39,7 @@ struct ds2780_device_info { struct device *dev; struct power_supply bat; struct device *w1_dev; + struct task_struct *mutex_holder; }; enum current_types { @@ -49,8 +50,8 @@ enum current_types { static const char model[] = "DS2780"; static const char manufacturer[] = "Maxim/Dallas"; -static inline struct ds2780_device_info *to_ds2780_device_info( - struct power_supply *psy) +static inline struct ds2780_device_info * +to_ds2780_device_info(struct power_supply *psy) { return container_of(psy, struct ds2780_device_info, bat); } @@ -60,17 +61,28 @@ static inline struct power_supply *to_power_supply(struct device *dev) return dev_get_drvdata(dev); } -static inline int ds2780_read8(struct device *dev, u8 *val, int addr) +static inline int ds2780_battery_io(struct ds2780_device_info *dev_info, + char *buf, int addr, size_t count, int io) { - return w1_ds2780_io(dev, val, addr, sizeof(u8), 0); + if (dev_info->mutex_holder == current) + return w1_ds2780_io_nolock(dev_info->w1_dev, buf, addr, count, io); + else + return w1_ds2780_io(dev_info->w1_dev, buf, addr, count, io); +} + +static inline int ds2780_read8(struct ds2780_device_info *dev_info, u8 *val, + int addr) +{ + return ds2780_battery_io(dev_info, val, addr, sizeof(u8), 0); } -static int ds2780_read16(struct device *dev, s16 *val, int addr) +static int ds2780_read16(struct ds2780_device_info *dev_info, s16 *val, + int addr) { int ret; u8 raw[2]; - ret = w1_ds2780_io(dev, raw, addr, sizeof(u8) * 2, 0); + ret = ds2780_battery_io(dev_info, raw, addr, sizeof(raw), 0); if (ret < 0) return ret; @@ -79,16 +91,16 @@ static int ds2780_read16(struct device *dev, s16 *val, int addr) return 0; } -static inline int ds2780_read_block(struct device *dev, u8 *val, int addr, - size_t count) +static inline int ds2780_read_block(struct ds2780_device_info *dev_info, + u8 *val, int addr, size_t count) { - return w1_ds2780_io(dev, val, addr, count, 0); + return ds2780_battery_io(dev_info, val, addr, count, 0); } -static inline int ds2780_write(struct device *dev, u8 *val, int addr, - size_t count) +static inline int ds2780_write(struct ds2780_device_info *dev_info, u8 *val, + int addr, size_t count) { - return w1_ds2780_io(dev, val, addr, count, 1); + return ds2780_battery_io(dev_info, val, addr, count, 1); } static inline int ds2780_store_eeprom(struct device *dev, int addr) @@ -122,7 +134,7 @@ static int ds2780_set_sense_register(struct ds2780_device_info *dev_info, { int ret; - ret = ds2780_write(dev_info->w1_dev, &conductance, + ret = ds2780_write(dev_info, &conductance, DS2780_RSNSP_REG, sizeof(u8)); if (ret < 0) return ret; @@ -134,7 +146,7 @@ static int ds2780_set_sense_register(struct ds2780_device_info *dev_info, static int ds2780_get_rsgain_register(struct ds2780_device_info *dev_info, u16 *rsgain) { - return ds2780_read16(dev_info->w1_dev, rsgain, DS2780_RSGAIN_MSB_REG); + return ds2780_read16(dev_info, rsgain, DS2780_RSGAIN_MSB_REG); } /* Set RSGAIN value from 0 to 1.999 in steps of 0.001 */ @@ -144,8 +156,8 @@ static int ds2780_set_rsgain_register(struct ds2780_device_info *dev_info, int ret; u8 raw[] = {rsgain >> 8, rsgain & 0xFF}; - ret = ds2780_write(dev_info->w1_dev, raw, - DS2780_RSGAIN_MSB_REG, sizeof(u8) * 2); + ret = ds2780_write(dev_info, raw, + DS2780_RSGAIN_MSB_REG, sizeof(raw)); if (ret < 0) return ret; @@ -167,7 +179,7 @@ static int ds2780_get_voltage(struct ds2780_device_info *dev_info, * Bits 2 - 0 of the voltage value are in bits 7 - 5 of the * voltage LSB register */ - ret = ds2780_read16(dev_info->w1_dev, &voltage_raw, + ret = ds2780_read16(dev_info, &voltage_raw, DS2780_VOLT_MSB_REG); if (ret < 0) return ret; @@ -196,7 +208,7 @@ static int ds2780_get_temperature(struct ds2780_device_info *dev_info, * Bits 2 - 0 of the temperature value are in bits 7 - 5 of the * temperature LSB register */ - ret = ds2780_read16(dev_info->w1_dev, &temperature_raw, + ret = ds2780_read16(dev_info, &temperature_raw, DS2780_TEMP_MSB_REG); if (ret < 0) return ret; @@ -222,13 +234,13 @@ static int ds2780_get_current(struct ds2780_device_info *dev_info, * The units of measurement for current are dependent on the value of * the sense resistor. */ - ret = ds2780_read8(dev_info->w1_dev, &sense_res_raw, DS2780_RSNSP_REG); + ret = ds2780_read8(dev_info, &sense_res_raw, DS2780_RSNSP_REG); if (ret < 0) return ret; if (sense_res_raw == 0) { dev_err(dev_info->dev, "sense resistor value is 0\n"); - return -ENXIO; + return -EINVAL; } sense_res = 1000 / sense_res_raw; @@ -248,7 +260,7 @@ static int ds2780_get_current(struct ds2780_device_info *dev_info, * Bits 7 - 0 of the current value are in bits 7 - 0 of the current * LSB register */ - ret = ds2780_read16(dev_info->w1_dev, ¤t_raw, reg_msb); + ret = ds2780_read16(dev_info, ¤t_raw, reg_msb); if (ret < 0) return ret; @@ -267,7 +279,7 @@ static int ds2780_get_accumulated_current(struct ds2780_device_info *dev_info, * The units of measurement for accumulated current are dependent on * the value of the sense resistor. */ - ret = ds2780_read8(dev_info->w1_dev, &sense_res_raw, DS2780_RSNSP_REG); + ret = ds2780_read8(dev_info, &sense_res_raw, DS2780_RSNSP_REG); if (ret < 0) return ret; @@ -285,7 +297,7 @@ static int ds2780_get_accumulated_current(struct ds2780_device_info *dev_info, * Bits 7 - 0 of the ACR value are in bits 7 - 0 of the ACR * LSB register */ - ret = ds2780_read16(dev_info->w1_dev, ¤t_raw, DS2780_ACR_MSB_REG); + ret = ds2780_read16(dev_info, ¤t_raw, DS2780_ACR_MSB_REG); if (ret < 0) return ret; @@ -299,7 +311,7 @@ static int ds2780_get_capacity(struct ds2780_device_info *dev_info, int ret; u8 raw; - ret = ds2780_read8(dev_info->w1_dev, &raw, DS2780_RARC_REG); + ret = ds2780_read8(dev_info, &raw, DS2780_RARC_REG); if (ret < 0) return ret; @@ -345,7 +357,7 @@ static int ds2780_get_charge_now(struct ds2780_device_info *dev_info, * Bits 7 - 0 of the RAAC value are in bits 7 - 0 of the RAAC * LSB register */ - ret = ds2780_read16(dev_info->w1_dev, &charge_raw, DS2780_RAAC_MSB_REG); + ret = ds2780_read16(dev_info, &charge_raw, DS2780_RAAC_MSB_REG); if (ret < 0) return ret; @@ -356,7 +368,7 @@ static int ds2780_get_charge_now(struct ds2780_device_info *dev_info, static int ds2780_get_control_register(struct ds2780_device_info *dev_info, u8 *control_reg) { - return ds2780_read8(dev_info->w1_dev, control_reg, DS2780_CONTROL_REG); + return ds2780_read8(dev_info, control_reg, DS2780_CONTROL_REG); } static int ds2780_set_control_register(struct ds2780_device_info *dev_info, @@ -364,7 +376,7 @@ static int ds2780_set_control_register(struct ds2780_device_info *dev_info, { int ret; - ret = ds2780_write(dev_info->w1_dev, &control_reg, + ret = ds2780_write(dev_info, &control_reg, DS2780_CONTROL_REG, sizeof(u8)); if (ret < 0) return ret; @@ -503,7 +515,7 @@ static ssize_t ds2780_get_sense_resistor_value(struct device *dev, struct power_supply *psy = to_power_supply(dev); struct ds2780_device_info *dev_info = to_ds2780_device_info(psy); - ret = ds2780_read8(dev_info->w1_dev, &sense_resistor, DS2780_RSNSP_REG); + ret = ds2780_read8(dev_info, &sense_resistor, DS2780_RSNSP_REG); if (ret < 0) return ret; @@ -584,7 +596,7 @@ static ssize_t ds2780_get_pio_pin(struct device *dev, struct power_supply *psy = to_power_supply(dev); struct ds2780_device_info *dev_info = to_ds2780_device_info(psy); - ret = ds2780_read8(dev_info->w1_dev, &sfr, DS2780_SFR_REG); + ret = ds2780_read8(dev_info, &sfr, DS2780_SFR_REG); if (ret < 0) return ret; @@ -611,7 +623,7 @@ static ssize_t ds2780_set_pio_pin(struct device *dev, return -EINVAL; } - ret = ds2780_write(dev_info->w1_dev, &new_setting, + ret = ds2780_write(dev_info, &new_setting, DS2780_SFR_REG, sizeof(u8)); if (ret < 0) return ret; @@ -632,7 +644,7 @@ static ssize_t ds2780_read_param_eeprom_bin(struct file *filp, DS2780_EEPROM_BLOCK1_END - DS2780_EEPROM_BLOCK1_START + 1 - off); - return ds2780_read_block(dev_info->w1_dev, buf, + return ds2780_read_block(dev_info, buf, DS2780_EEPROM_BLOCK1_START + off, count); } @@ -650,7 +662,7 @@ static ssize_t ds2780_write_param_eeprom_bin(struct file *filp, DS2780_EEPROM_BLOCK1_END - DS2780_EEPROM_BLOCK1_START + 1 - off); - ret = ds2780_write(dev_info->w1_dev, buf, + ret = ds2780_write(dev_info, buf, DS2780_EEPROM_BLOCK1_START + off, count); if (ret < 0) return ret; @@ -685,9 +697,8 @@ static ssize_t ds2780_read_user_eeprom_bin(struct file *filp, DS2780_EEPROM_BLOCK0_END - DS2780_EEPROM_BLOCK0_START + 1 - off); - return ds2780_read_block(dev_info->w1_dev, buf, + return ds2780_read_block(dev_info, buf, DS2780_EEPROM_BLOCK0_START + off, count); - } static ssize_t ds2780_write_user_eeprom_bin(struct file *filp, @@ -704,7 +715,7 @@ static ssize_t ds2780_write_user_eeprom_bin(struct file *filp, DS2780_EEPROM_BLOCK0_END - DS2780_EEPROM_BLOCK0_START + 1 - off); - ret = ds2780_write(dev_info->w1_dev, buf, + ret = ds2780_write(dev_info, buf, DS2780_EEPROM_BLOCK0_START + off, count); if (ret < 0) return ret; @@ -768,6 +779,7 @@ static int __devinit ds2780_battery_probe(struct platform_device *pdev) dev_info->bat.properties = ds2780_battery_props; dev_info->bat.num_properties = ARRAY_SIZE(ds2780_battery_props); dev_info->bat.get_property = ds2780_battery_get_property; + dev_info->mutex_holder = current; ret = power_supply_register(&pdev->dev, &dev_info->bat); if (ret) { @@ -797,6 +809,8 @@ static int __devinit ds2780_battery_probe(struct platform_device *pdev) goto fail_remove_bin_file; } + dev_info->mutex_holder = NULL; + return 0; fail_remove_bin_file: @@ -816,6 +830,8 @@ static int __devexit ds2780_battery_remove(struct platform_device *pdev) { struct ds2780_device_info *dev_info = platform_get_drvdata(pdev); + dev_info->mutex_holder = current; + /* remove attributes */ sysfs_remove_group(&dev_info->bat.dev->kobj, &ds2780_attr_group); diff --git a/drivers/pps/clients/Kconfig b/drivers/pps/clients/Kconfig index 8520a7f..445197d 100644 --- a/drivers/pps/clients/Kconfig +++ b/drivers/pps/clients/Kconfig @@ -29,4 +29,13 @@ config PPS_CLIENT_PARPORT If you say yes here you get support for a PPS source connected with the interrupt pin of your parallel port. +config PPS_CLIENT_GPIO + tristate "PPS client using GPIO" + depends on PPS && GENERIC_HARDIRQS + help + If you say yes here you get support for a PPS source using + GPIO. To be useful you must also register a platform device + specifying the GPIO pin and other options, usually in your board + setup. + endif diff --git a/drivers/pps/clients/Makefile b/drivers/pps/clients/Makefile index 4feb7e9..a461d15 100644 --- a/drivers/pps/clients/Makefile +++ b/drivers/pps/clients/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_PPS_CLIENT_KTIMER) += pps-ktimer.o obj-$(CONFIG_PPS_CLIENT_LDISC) += pps-ldisc.o obj-$(CONFIG_PPS_CLIENT_PARPORT) += pps_parport.o +obj-$(CONFIG_PPS_CLIENT_GPIO) += pps-gpio.o ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c new file mode 100644 index 0000000..6550555 --- /dev/null +++ b/drivers/pps/clients/pps-gpio.c @@ -0,0 +1,227 @@ +/* + * pps-gpio.c -- PPS client driver using GPIO + * + * + * Copyright (C) 2010 Ricardo Martins <rasm@fe.up.pt> + * Copyright (C) 2011 James Nuss <jamesnuss@nanometrics.ca> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define PPS_GPIO_NAME "pps-gpio" +#define pr_fmt(fmt) PPS_GPIO_NAME ": " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/pps_kernel.h> +#include <linux/pps-gpio.h> +#include <linux/gpio.h> +#include <linux/list.h> + +/* Info for each registered platform device */ +struct pps_gpio_device_data { + int irq; /* IRQ used as PPS source */ + struct pps_device *pps; /* PPS source device */ + struct pps_source_info info; /* PPS source information */ + const struct pps_gpio_platform_data *pdata; +}; + +/* + * Report the PPS event + */ + +static irqreturn_t pps_gpio_irq_handler(int irq, void *data) +{ + const struct pps_gpio_device_data *info; + struct pps_event_time ts; + int rising_edge; + + /* Get the time stamp first */ + pps_get_ts(&ts); + + info = data; + + rising_edge = gpio_get_value(info->pdata->gpio_pin); + if ((rising_edge && !info->pdata->assert_falling_edge) || + (!rising_edge && info->pdata->assert_falling_edge)) + pps_event(info->pps, &ts, PPS_CAPTUREASSERT, NULL); + else if (info->pdata->capture_clear && + ((rising_edge && info->pdata->assert_falling_edge) || + (!rising_edge && !info->pdata->assert_falling_edge))) + pps_event(info->pps, &ts, PPS_CAPTURECLEAR, NULL); + + return IRQ_HANDLED; +} + +static int pps_gpio_setup(struct platform_device *pdev) +{ + int ret; + const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data; + + ret = gpio_request(pdata->gpio_pin, pdata->gpio_label); + if (ret) { + pr_warning("failed to request GPIO %u\n", pdata->gpio_pin); + return -EINVAL; + } + + ret = gpio_direction_input(pdata->gpio_pin); + if (ret) { + pr_warning("failed to set pin direction\n"); + gpio_free(pdata->gpio_pin); + return -EINVAL; + } + + return 0; +} + +static unsigned long +get_irqf_trigger_flags(const struct pps_gpio_platform_data *pdata) +{ + unsigned long flags = pdata->assert_falling_edge ? + IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING; + + if (pdata->capture_clear) { + flags |= ((flags & IRQF_TRIGGER_RISING) ? + IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING); + } + + return flags; +} + +static int pps_gpio_probe(struct platform_device *pdev) +{ + struct pps_gpio_device_data *data; + int irq; + int ret; + int err; + int pps_default_params; + const struct pps_gpio_platform_data *pdata = pdev->dev.platform_data; + + + /* GPIO setup */ + ret = pps_gpio_setup(pdev); + if (ret) + return -EINVAL; + + /* IRQ setup */ + irq = gpio_to_irq(pdata->gpio_pin); + if (irq < 0) { + pr_err("failed to map GPIO to IRQ: %d\n", irq); + err = -EINVAL; + goto return_error; + } + + /* allocate space for device info */ + data = kzalloc(sizeof(struct pps_gpio_device_data), GFP_KERNEL); + if (data == NULL) { + err = -ENOMEM; + goto return_error; + } + + /* initialize PPS specific parts of the bookkeeping data structure. */ + data->info.mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | + PPS_ECHOASSERT | PPS_CANWAIT | PPS_TSFMT_TSPEC; + if (pdata->capture_clear) + data->info.mode |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR | + PPS_ECHOCLEAR; + data->info.owner = THIS_MODULE; + snprintf(data->info.name, PPS_MAX_NAME_LEN - 1, "%s.%d", + pdev->name, pdev->id); + + /* register PPS source */ + pps_default_params = PPS_CAPTUREASSERT | PPS_OFFSETASSERT; + if (pdata->capture_clear) + pps_default_params |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR; + data->pps = pps_register_source(&data->info, pps_default_params); + if (data->pps == NULL) { + kfree(data); + pr_err("failed to register IRQ %d as PPS source\n", irq); + err = -EINVAL; + goto return_error; + } + + data->irq = irq; + data->pdata = pdata; + + /* register IRQ interrupt handler */ + ret = request_irq(irq, pps_gpio_irq_handler, + get_irqf_trigger_flags(pdata), data->info.name, data); + if (ret) { + pps_unregister_source(data->pps); + kfree(data); + pr_err("failed to acquire IRQ %d\n", irq); + err = -EINVAL; + goto return_error; + } + + platform_set_drvdata(pdev, data); + dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", irq); + + return 0; + +return_error: + gpio_free(pdata->gpio_pin); + return err; +} + +static int pps_gpio_remove(struct platform_device *pdev) +{ + struct pps_gpio_device_data *data = platform_get_drvdata(pdev); + const struct pps_gpio_platform_data *pdata = data->pdata; + + platform_set_drvdata(pdev, NULL); + free_irq(data->irq, data); + gpio_free(pdata->gpio_pin); + pps_unregister_source(data->pps); + pr_info("removed IRQ %d as PPS source\n", data->irq); + kfree(data); + return 0; +} + +static struct platform_driver pps_gpio_driver = { + .probe = pps_gpio_probe, + .remove = __devexit_p(pps_gpio_remove), + .driver = { + .name = PPS_GPIO_NAME, + .owner = THIS_MODULE + }, +}; + +static int __init pps_gpio_init(void) +{ + int ret = platform_driver_register(&pps_gpio_driver); + if (ret < 0) + pr_err("failed to register platform driver\n"); + return ret; +} + +static void __exit pps_gpio_exit(void) +{ + platform_driver_unregister(&pps_gpio_driver); + pr_debug("unregistered platform driver\n"); +} + +module_init(pps_gpio_init); +module_exit(pps_gpio_exit); + +MODULE_AUTHOR("Ricardo Martins <rasm@fe.up.pt>"); +MODULE_AUTHOR("James Nuss <jamesnuss@nanometrics.ca>"); +MODULE_DESCRIPTION("Use GPIO pin as PPS source"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); diff --git a/drivers/pps/clients/pps-ktimer.c b/drivers/pps/clients/pps-ktimer.c index 82583b0..436b4e4 100644 --- a/drivers/pps/clients/pps-ktimer.c +++ b/drivers/pps/clients/pps-ktimer.c @@ -52,17 +52,6 @@ static void pps_ktimer_event(unsigned long ptr) } /* - * The echo function - */ - -static void pps_ktimer_echo(struct pps_device *pps, int event, void *data) -{ - dev_info(pps->dev, "echo %s %s\n", - event & PPS_CAPTUREASSERT ? "assert" : "", - event & PPS_CAPTURECLEAR ? "clear" : ""); -} - -/* * The PPS info struct */ @@ -72,7 +61,6 @@ static struct pps_source_info pps_ktimer_info = { .mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | PPS_ECHOASSERT | PPS_CANWAIT | PPS_TSFMT_TSPEC, - .echo = pps_ktimer_echo, .owner = THIS_MODULE, }; diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c index c571d6d..e1b4705 100644 --- a/drivers/pps/clients/pps_parport.c +++ b/drivers/pps/clients/pps_parport.c @@ -133,14 +133,6 @@ out_both: return; } -/* the PPS echo function */ -static void pps_echo(struct pps_device *pps, int event, void *data) -{ - dev_info(pps->dev, "echo %s %s\n", - event & PPS_CAPTUREASSERT ? "assert" : "", - event & PPS_CAPTURECLEAR ? "clear" : ""); -} - static void parport_attach(struct parport *port) { struct pps_client_pp *device; @@ -151,7 +143,6 @@ static void parport_attach(struct parport *port) PPS_OFFSETASSERT | PPS_OFFSETCLEAR | \ PPS_ECHOASSERT | PPS_ECHOCLEAR | \ PPS_CANWAIT | PPS_TSFMT_TSPEC, - .echo = pps_echo, .owner = THIS_MODULE, .dev = NULL }; diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c index a4e8eb9..f197e8e 100644 --- a/drivers/pps/kapi.c +++ b/drivers/pps/kapi.c @@ -52,6 +52,14 @@ static void pps_add_offset(struct pps_ktime *ts, struct pps_ktime *offset) ts->sec += offset->sec; } +static void pps_echo_client_default(struct pps_device *pps, int event, + void *data) +{ + dev_info(pps->dev, "echo %s %s\n", + event & PPS_CAPTUREASSERT ? "assert" : "", + event & PPS_CAPTURECLEAR ? "clear" : ""); +} + /* * Exported functions */ @@ -80,13 +88,6 @@ struct pps_device *pps_register_source(struct pps_source_info *info, err = -EINVAL; goto pps_register_source_exit; } - if ((info->mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) != 0 && - info->echo == NULL) { - pr_err("%s: echo function is not defined\n", - info->name); - err = -EINVAL; - goto pps_register_source_exit; - } if ((info->mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) { pr_err("%s: unspecified time format\n", info->name); @@ -108,6 +109,11 @@ struct pps_device *pps_register_source(struct pps_source_info *info, pps->params.mode = default_params; pps->info = *info; + /* check for default echo function */ + if ((pps->info.mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) && + pps->info.echo == NULL) + pps->info.echo = pps_echo_client_default; + init_waitqueue_head(&pps->queue); spin_lock_init(&pps->lock); diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig index 070211a..bc87192 100644 --- a/drivers/rapidio/Kconfig +++ b/drivers/rapidio/Kconfig @@ -1,6 +1,8 @@ # # RapidIO configuration # +source "drivers/rapidio/devices/Kconfig" + config RAPIDIO_DISC_TIMEOUT int "Discovery timeout duration (seconds)" depends on RAPIDIO @@ -20,8 +22,6 @@ config RAPIDIO_ENABLE_RX_TX_PORTS ports for Input/Output direction to allow other traffic than Maintenance transfers. -source "drivers/rapidio/switches/Kconfig" - config RAPIDIO_DEBUG bool "RapidIO subsystem debug messages" depends on RAPIDIO @@ -32,3 +32,5 @@ config RAPIDIO_DEBUG going on. If you are unsure about this, say N here. + +source "drivers/rapidio/switches/Kconfig" diff --git a/drivers/rapidio/Makefile b/drivers/rapidio/Makefile index 89b8eca..ec3fb81 100644 --- a/drivers/rapidio/Makefile +++ b/drivers/rapidio/Makefile @@ -4,5 +4,6 @@ obj-y += rio.o rio-access.o rio-driver.o rio-scan.o rio-sysfs.o obj-$(CONFIG_RAPIDIO) += switches/ +obj-$(CONFIG_RAPIDIO) += devices/ subdir-ccflags-$(CONFIG_RAPIDIO_DEBUG) := -DDEBUG diff --git a/drivers/rapidio/devices/Kconfig b/drivers/rapidio/devices/Kconfig new file mode 100644 index 0000000..12a9d7f --- /dev/null +++ b/drivers/rapidio/devices/Kconfig @@ -0,0 +1,10 @@ +# +# RapidIO master port configuration +# + +config RAPIDIO_TSI721 + bool "IDT Tsi721 PCI Express SRIO Controller support" + depends on RAPIDIO && PCIEPORTBUS + default "n" + ---help--- + Include support for IDT Tsi721 PCI Express Serial RapidIO controller. diff --git a/drivers/rapidio/devices/Makefile b/drivers/rapidio/devices/Makefile new file mode 100644 index 0000000..3b7b4e2 --- /dev/null +++ b/drivers/rapidio/devices/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for RapidIO devices +# + +obj-$(CONFIG_RAPIDIO_TSI721) += tsi721.o diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c new file mode 100644 index 0000000..5225930 --- /dev/null +++ b/drivers/rapidio/devices/tsi721.c @@ -0,0 +1,2360 @@ +/* + * RapidIO mport driver for Tsi721 PCIExpress-to-SRIO bridge + * + * Copyright 2011 Integrated Device Technology, Inc. + * Alexandre Bounine <alexandre.bounine@idt.com> + * Chul Kim <chul.kim@idt.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <linux/io.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/rio.h> +#include <linux/rio_drv.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/kfifo.h> +#include <linux/delay.h> + +#include "tsi721.h" + +#define DEBUG_PW /* Inbound Port-Write debugging */ + +static void tsi721_omsg_handler(struct tsi721_device *priv, int ch); +static void tsi721_imsg_handler(struct tsi721_device *priv, int ch); + +/** + * tsi721_lcread - read from local SREP config space + * @mport: RapidIO master port info + * @index: ID of RapdiIO interface + * @offset: Offset into configuration space + * @len: Length (in bytes) of the maintenance transaction + * @data: Value to be read into + * + * Generates a local SREP space read. Returns %0 on + * success or %-EINVAL on failure. + */ +static int tsi721_lcread(struct rio_mport *mport, int index, u32 offset, + int len, u32 *data) +{ + struct tsi721_device *priv = mport->priv; + + if (len != sizeof(u32)) + return -EINVAL; /* only 32-bit access is supported */ + + *data = ioread32(priv->regs + offset); + + return 0; +} + +/** + * tsi721_lcwrite - write into local SREP config space + * @mport: RapidIO master port info + * @index: ID of RapdiIO interface + * @offset: Offset into configuration space + * @len: Length (in bytes) of the maintenance transaction + * @data: Value to be written + * + * Generates a local write into SREP configuration space. Returns %0 on + * success or %-EINVAL on failure. + */ +static int tsi721_lcwrite(struct rio_mport *mport, int index, u32 offset, + int len, u32 data) +{ + struct tsi721_device *priv = mport->priv; + + if (len != sizeof(u32)) + return -EINVAL; /* only 32-bit access is supported */ + + iowrite32(data, priv->regs + offset); + + return 0; +} + +/** + * tsi721_maint_dma - Helper function to generate RapidIO maintenance + * transactions using designated Tsi721 DMA channel. + * @priv: pointer to tsi721 private data + * @sys_size: RapdiIO transport system size + * @destid: Destination ID of transaction + * @hopcount: Number of hops to target device + * @offset: Offset into configuration space + * @len: Length (in bytes) of the maintenance transaction + * @data: Location to be read from or write into + * @do_wr: Operation flag (1 == MAINT_WR) + * + * Generates a RapidIO maintenance transaction (Read or Write). + * Returns %0 on success and %-EINVAL or %-EFAULT on failure. + */ +static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size, + u16 destid, u8 hopcount, u32 offset, int len, + u32 *data, int do_wr) +{ + struct tsi721_dma_desc *bd_ptr; + u32 rd_count, swr_ptr, ch_stat; + int i, err = 0; + u32 op = do_wr ? MAINT_WR : MAINT_RD; + + if (offset > (RIO_MAINT_SPACE_SZ - len) || (len != sizeof(u32))) + return -EINVAL; + + bd_ptr = priv->bdma[TSI721_DMACH_MAINT].bd_base; + + rd_count = ioread32( + priv->regs + TSI721_DMAC_DRDCNT(TSI721_DMACH_MAINT)); + + /* Initialize DMA descriptor */ + bd_ptr[0].type_id = cpu_to_le32((DTYPE2 << 29) | (op << 19) | destid); + bd_ptr[0].bcount = cpu_to_le32((sys_size << 26) | 0x04); + bd_ptr[0].raddr_lo = cpu_to_le32((hopcount << 24) | offset); + bd_ptr[0].raddr_hi = 0; + if (do_wr) + bd_ptr[0].data[0] = cpu_to_be32p(data); + else + bd_ptr[0].data[0] = 0xffffffff; + + mb(); + + /* Start DMA operation */ + iowrite32(rd_count + 2, + priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT)); + ioread32(priv->regs + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT)); + i = 0; + + /* Wait until DMA transfer is finished */ + while ((ch_stat = ioread32(priv->regs + + TSI721_DMAC_STS(TSI721_DMACH_MAINT))) & TSI721_DMAC_STS_RUN) { + udelay(1); + if (++i >= 5000000) { + dev_dbg(&priv->pdev->dev, + "%s : DMA[%d] read timeout ch_status=%x\n", + __func__, TSI721_DMACH_MAINT, ch_stat); + if (!do_wr) + *data = 0xffffffff; + err = -EIO; + goto err_out; + } + } + + if (ch_stat & TSI721_DMAC_STS_ABORT) { + /* If DMA operation aborted due to error, + * reinitialize DMA channel + */ + dev_dbg(&priv->pdev->dev, "%s : DMA ABORT ch_stat=%x\n", + __func__, ch_stat); + dev_dbg(&priv->pdev->dev, "OP=%d : destid=%x hc=%x off=%x\n", + do_wr ? MAINT_WR : MAINT_RD, destid, hopcount, offset); + iowrite32(TSI721_DMAC_INT_ALL, + priv->regs + TSI721_DMAC_INT(TSI721_DMACH_MAINT)); + iowrite32(TSI721_DMAC_CTL_INIT, + priv->regs + TSI721_DMAC_CTL(TSI721_DMACH_MAINT)); + udelay(10); + iowrite32(0, priv->regs + + TSI721_DMAC_DWRCNT(TSI721_DMACH_MAINT)); + udelay(1); + if (!do_wr) + *data = 0xffffffff; + err = -EIO; + goto err_out; + } + + if (!do_wr) + *data = be32_to_cpu(bd_ptr[0].data[0]); + + /* + * Update descriptor status FIFO RD pointer. + * NOTE: Skipping check and clear FIFO entries because we are waiting + * for transfer to be completed. + */ + swr_ptr = ioread32(priv->regs + TSI721_DMAC_DSWP(TSI721_DMACH_MAINT)); + iowrite32(swr_ptr, priv->regs + TSI721_DMAC_DSRP(TSI721_DMACH_MAINT)); +err_out: + + return err; +} + +/** + * tsi721_cread_dma - Generate a RapidIO maintenance read transaction + * using Tsi721 BDMA engine. + * @mport: RapidIO master port control structure + * @index: ID of RapdiIO interface + * @destid: Destination ID of transaction + * @hopcount: Number of hops to target device + * @offset: Offset into configuration space + * @len: Length (in bytes) of the maintenance transaction + * @val: Location to be read into + * + * Generates a RapidIO maintenance read transaction. + * Returns %0 on success and %-EINVAL or %-EFAULT on failure. + */ +static int tsi721_cread_dma(struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 *data) +{ + struct tsi721_device *priv = mport->priv; + + return tsi721_maint_dma(priv, mport->sys_size, destid, hopcount, + offset, len, data, 0); +} + +/** + * tsi721_cwrite_dma - Generate a RapidIO maintenance write transaction + * using Tsi721 BDMA engine + * @mport: RapidIO master port control structure + * @index: ID of RapdiIO interface + * @destid: Destination ID of transaction + * @hopcount: Number of hops to target device + * @offset: Offset into configuration space + * @len: Length (in bytes) of the maintenance transaction + * @val: Value to be written + * + * Generates a RapidIO maintenance write transaction. + * Returns %0 on success and %-EINVAL or %-EFAULT on failure. + */ +static int tsi721_cwrite_dma(struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 data) +{ + struct tsi721_device *priv = mport->priv; + u32 temp = data; + + return tsi721_maint_dma(priv, mport->sys_size, destid, hopcount, + offset, len, &temp, 1); +} + +/** + * tsi721_pw_handler - Tsi721 inbound port-write interrupt handler + * @mport: RapidIO master port structure + * + * Handles inbound port-write interrupts. Copies PW message from an internal + * buffer into PW message FIFO and schedules deferred routine to process + * queued messages. + */ +static int +tsi721_pw_handler(struct rio_mport *mport) +{ + struct tsi721_device *priv = mport->priv; + u32 pw_stat; + u32 pw_buf[TSI721_RIO_PW_MSG_SIZE/sizeof(u32)]; + + + pw_stat = ioread32(priv->regs + TSI721_RIO_PW_RX_STAT); + + if (pw_stat & TSI721_RIO_PW_RX_STAT_PW_VAL) { + pw_buf[0] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(0)); + pw_buf[1] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(1)); + pw_buf[2] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(2)); + pw_buf[3] = ioread32(priv->regs + TSI721_RIO_PW_RX_CAPT(3)); + + /* Queue PW message (if there is room in FIFO), + * otherwise discard it. + */ + spin_lock(&priv->pw_fifo_lock); + if (kfifo_avail(&priv->pw_fifo) >= TSI721_RIO_PW_MSG_SIZE) + kfifo_in(&priv->pw_fifo, pw_buf, + TSI721_RIO_PW_MSG_SIZE); + else + priv->pw_discard_count++; + spin_unlock(&priv->pw_fifo_lock); + } + + /* Clear pending PW interrupts */ + iowrite32(TSI721_RIO_PW_RX_STAT_PW_DISC | TSI721_RIO_PW_RX_STAT_PW_VAL, + priv->regs + TSI721_RIO_PW_RX_STAT); + + schedule_work(&priv->pw_work); + + return 0; +} + +static void tsi721_pw_dpc(struct work_struct *work) +{ + struct tsi721_device *priv = container_of(work, struct tsi721_device, + pw_work); + u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)]; /* Use full size PW message + buffer for RIO layer */ + + /* + * Process port-write messages + */ + while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)msg_buffer, + TSI721_RIO_PW_MSG_SIZE, &priv->pw_fifo_lock)) { + /* Process one message */ +#ifdef DEBUG_PW + { + u32 i; + pr_debug("%s : Port-Write Message:", __func__); + for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); ) { + pr_debug("0x%02x: %08x %08x %08x %08x", i*4, + msg_buffer[i], msg_buffer[i + 1], + msg_buffer[i + 2], msg_buffer[i + 3]); + i += 4; + } + pr_debug("\n"); + } +#endif + /* Pass the port-write message to RIO core for processing */ + rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer); + } +} + +/** + * tsi721_pw_enable - enable/disable port-write interface init + * @mport: Master port implementing the port write unit + * @enable: 1=enable; 0=disable port-write message handling + */ +static int tsi721_pw_enable(struct rio_mport *mport, int enable) +{ + struct tsi721_device *priv = mport->priv; + u32 rval; + + rval = ioread32(priv->regs + TSI721_RIO_EM_INT_ENABLE); + + if (enable) + rval |= TSI721_RIO_EM_INT_ENABLE_PW_RX; + else + rval &= ~TSI721_RIO_EM_INT_ENABLE_PW_RX; + + /* Clear pending PW interrupts */ + iowrite32(TSI721_RIO_PW_RX_STAT_PW_DISC | TSI721_RIO_PW_RX_STAT_PW_VAL, + priv->regs + TSI721_RIO_PW_RX_STAT); + /* Update enable bits */ + iowrite32(rval, priv->regs + TSI721_RIO_EM_INT_ENABLE); + + return 0; +} + +/** + * tsi721_dsend - Send a RapidIO doorbell + * @mport: RapidIO master port info + * @index: ID of RapidIO interface + * @destid: Destination ID of target device + * @data: 16-bit info field of RapidIO doorbell + * + * Sends a RapidIO doorbell message. Always returns %0. + */ +static int tsi721_dsend(struct rio_mport *mport, int index, + u16 destid, u16 data) +{ + struct tsi721_device *priv = mport->priv; + u32 offset; + + offset = (((mport->sys_size) ? RIO_TT_CODE_16 : RIO_TT_CODE_8) << 18) | + (destid << 2); + + dev_dbg(&priv->pdev->dev, + "Send Doorbell 0x%04x to destID 0x%x\n", data, destid); + iowrite16be(data, priv->odb_base + offset); + + return 0; +} + +/** + * tsi721_dbell_handler - Tsi721 doorbell interrupt handler + * @mport: RapidIO master port structure + * + * Handles inbound doorbell interrupts. Copies doorbell entry from an internal + * buffer into DB message FIFO and schedules deferred routine to process + * queued DBs. + */ +static int +tsi721_dbell_handler(struct rio_mport *mport) +{ + struct tsi721_device *priv = mport->priv; + u32 regval; + + /* Disable IDB interrupts */ + regval = ioread32(priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); + regval &= ~TSI721_SR_CHINT_IDBQRCV; + iowrite32(regval, + priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); + + schedule_work(&priv->idb_work); + + return 0; +} + +static void tsi721_db_dpc(struct work_struct *work) +{ + struct tsi721_device *priv = container_of(work, struct tsi721_device, + idb_work); + struct rio_mport *mport; + struct rio_dbell *dbell; + int found = 0; + u32 wr_ptr, rd_ptr; + u64 *idb_entry; + u32 regval; + union { + u64 msg; + u8 bytes[8]; + } idb; + + /* + * Process queued inbound doorbells + */ + mport = priv->mport; + + wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)); + rd_ptr = ioread32(priv->regs + TSI721_IDQ_RP(IDB_QUEUE)); + + while (wr_ptr != rd_ptr) { + idb_entry = (u64 *)(priv->idb_base + + (TSI721_IDB_ENTRY_SIZE * rd_ptr)); + rd_ptr++; + idb.msg = *idb_entry; + *idb_entry = 0; + + /* Process one doorbell */ + list_for_each_entry(dbell, &mport->dbells, node) { + if ((dbell->res->start <= DBELL_INF(idb.bytes)) && + (dbell->res->end >= DBELL_INF(idb.bytes))) { + found = 1; + break; + } + } + + if (found) { + dbell->dinb(mport, dbell->dev_id, DBELL_SID(idb.bytes), + DBELL_TID(idb.bytes), DBELL_INF(idb.bytes)); + } else { + dev_dbg(&priv->pdev->dev, + "spurious inb doorbell, sid %2.2x tid %2.2x" + " info %4.4x\n", DBELL_SID(idb.bytes), + DBELL_TID(idb.bytes), DBELL_INF(idb.bytes)); + } + } + + iowrite32(rd_ptr & (IDB_QSIZE - 1), + priv->regs + TSI721_IDQ_RP(IDB_QUEUE)); + + /* Re-enable IDB interrupts */ + regval = ioread32(priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); + regval |= TSI721_SR_CHINT_IDBQRCV; + iowrite32(regval, + priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); +} + +/** + * tsi721_irqhandler - Tsi721 interrupt handler + * @irq: Linux interrupt number + * @ptr: Pointer to interrupt-specific data (mport structure) + * + * Handles Tsi721 interrupts signaled using MSI and INTA. Checks reported + * interrupt events and calls an event-specific handler(s). + */ +static irqreturn_t tsi721_irqhandler(int irq, void *ptr) +{ + struct rio_mport *mport = (struct rio_mport *)ptr; + struct tsi721_device *priv = mport->priv; + u32 dev_int; + u32 dev_ch_int; + u32 intval; + u32 ch_inte; + + dev_int = ioread32(priv->regs + TSI721_DEV_INT); + if (!dev_int) + return IRQ_NONE; + + dev_ch_int = ioread32(priv->regs + TSI721_DEV_CHAN_INT); + + if (dev_int & TSI721_DEV_INT_SR2PC_CH) { + /* Service SR2PC Channel interrupts */ + if (dev_ch_int & TSI721_INT_SR2PC_CHAN(IDB_QUEUE)) { + /* Service Inbound Doorbell interrupt */ + intval = ioread32(priv->regs + + TSI721_SR_CHINT(IDB_QUEUE)); + if (intval & TSI721_SR_CHINT_IDBQRCV) + tsi721_dbell_handler(mport); + else + dev_info(&priv->pdev->dev, + "Unsupported SR_CH_INT %x\n", intval); + + /* Clear interrupts */ + iowrite32(intval, + priv->regs + TSI721_SR_CHINT(IDB_QUEUE)); + ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE)); + } + } + + if (dev_int & TSI721_DEV_INT_SMSG_CH) { + int ch; + + /* + * Service channel interrupts from Messaging Engine + */ + + if (dev_ch_int & TSI721_INT_IMSG_CHAN_M) { /* Inbound Msg */ + /* Disable signaled OB MSG Channel interrupts */ + ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE); + ch_inte &= ~(dev_ch_int & TSI721_INT_IMSG_CHAN_M); + iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE); + + /* + * Process Inbound Message interrupt for each MBOX + */ + for (ch = 4; ch < RIO_MAX_MBOX + 4; ch++) { + if (!(dev_ch_int & TSI721_INT_IMSG_CHAN(ch))) + continue; + tsi721_imsg_handler(priv, ch); + } + } + + if (dev_ch_int & TSI721_INT_OMSG_CHAN_M) { /* Outbound Msg */ + /* Disable signaled OB MSG Channel interrupts */ + ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE); + ch_inte &= ~(dev_ch_int & TSI721_INT_OMSG_CHAN_M); + iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE); + + /* + * Process Outbound Message interrupts for each MBOX + */ + + for (ch = 0; ch < RIO_MAX_MBOX; ch++) { + if (!(dev_ch_int & TSI721_INT_OMSG_CHAN(ch))) + continue; + tsi721_omsg_handler(priv, ch); + } + } + } + + if (dev_int & TSI721_DEV_INT_SRIO) { + /* Service SRIO MAC interrupts */ + intval = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT); + if (intval & TSI721_RIO_EM_INT_STAT_PW_RX) + tsi721_pw_handler(mport); + } + + return IRQ_HANDLED; +} + +static void tsi721_interrupts_init(struct tsi721_device *priv) +{ + u32 intr; + + /* Enable IDB interrupts */ + iowrite32(TSI721_SR_CHINT_ALL, + priv->regs + TSI721_SR_CHINT(IDB_QUEUE)); + iowrite32(TSI721_SR_CHINT_IDBQRCV, + priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); + iowrite32(TSI721_INT_SR2PC_CHAN(IDB_QUEUE), + priv->regs + TSI721_DEV_CHAN_INTE); + + /* Enable SRIO MAC interrupts */ + iowrite32(TSI721_RIO_EM_DEV_INT_EN_INT, + priv->regs + TSI721_RIO_EM_DEV_INT_EN); + + if (priv->flags & TSI721_USING_MSIX) + intr = TSI721_DEV_INT_SRIO; + else + intr = TSI721_DEV_INT_SR2PC_CH | TSI721_DEV_INT_SRIO | + TSI721_DEV_INT_SMSG_CH; + + iowrite32(intr, priv->regs + TSI721_DEV_INTE); + ioread32(priv->regs + TSI721_DEV_INTE); +} + +#ifdef CONFIG_PCI_MSI +/** + * tsi721_omsg_msix - MSI-X interrupt handler for outbound messaging + * @irq: Linux interrupt number + * @ptr: Pointer to interrupt-specific data (mport structure) + * + * Handles outbound messaging interrupts signaled using MSI-X. + */ +static irqreturn_t tsi721_omsg_msix(int irq, void *ptr) +{ + struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv; + int mbox; + + mbox = (irq - priv->msix[TSI721_VECT_OMB0_DONE].vector) % RIO_MAX_MBOX; + tsi721_omsg_handler(priv, mbox); + return IRQ_HANDLED; +} + +/** + * tsi721_imsg_msix - MSI-X interrupt handler for inbound messaging + * @irq: Linux interrupt number + * @ptr: Pointer to interrupt-specific data (mport structure) + * + * Handles inbound messaging interrupts signaled using MSI-X. + */ +static irqreturn_t tsi721_imsg_msix(int irq, void *ptr) +{ + struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv; + int mbox; + + mbox = (irq - priv->msix[TSI721_VECT_IMB0_RCV].vector) % RIO_MAX_MBOX; + tsi721_imsg_handler(priv, mbox + 4); + return IRQ_HANDLED; +} + +/** + * tsi721_srio_msix - Tsi721 MSI-X SRIO MAC interrupt handler + * @irq: Linux interrupt number + * @ptr: Pointer to interrupt-specific data (mport structure) + * + * Handles Tsi721 interrupts from SRIO MAC. + */ +static irqreturn_t tsi721_srio_msix(int irq, void *ptr) +{ + struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv; + u32 srio_int; + + /* Service SRIO MAC interrupts */ + srio_int = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT); + if (srio_int & TSI721_RIO_EM_INT_STAT_PW_RX) + tsi721_pw_handler((struct rio_mport *)ptr); + + return IRQ_HANDLED; +} + +/** + * tsi721_sr2pc_ch_msix - Tsi721 MSI-X SR2PC Channel interrupt handler + * @irq: Linux interrupt number + * @ptr: Pointer to interrupt-specific data (mport structure) + * + * Handles Tsi721 interrupts from SR2PC Channel. + * NOTE: At this moment services only one SR2PC channel associated with inbound + * doorbells. + */ +static irqreturn_t tsi721_sr2pc_ch_msix(int irq, void *ptr) +{ + struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv; + u32 sr_ch_int; + + /* Service Inbound DB interrupt from SR2PC channel */ + sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE)); + if (sr_ch_int & TSI721_SR_CHINT_IDBQRCV) + tsi721_dbell_handler((struct rio_mport *)ptr); + + /* Clear interrupts */ + iowrite32(sr_ch_int, priv->regs + TSI721_SR_CHINT(IDB_QUEUE)); + /* Read back to ensure that interrupt was cleared */ + sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE)); + + return IRQ_HANDLED; +} + +/** + * tsi721_request_msix - register interrupt service for MSI-X mode. + * @mport: RapidIO master port structure + * + * Registers MSI-X interrupt service routines for interrupts that are active + * immediately after mport initialization. Messaging interrupt service routines + * should be registered during corresponding open requests. + */ +static int tsi721_request_msix(struct rio_mport *mport) +{ + struct tsi721_device *priv = mport->priv; + int err = 0; + + err = request_irq(priv->msix[TSI721_VECT_IDB].vector, + tsi721_sr2pc_ch_msix, 0, + priv->msix[TSI721_VECT_IDB].irq_name, (void *)mport); + if (err) + goto out; + + err = request_irq(priv->msix[TSI721_VECT_PWRX].vector, + tsi721_srio_msix, 0, + priv->msix[TSI721_VECT_PWRX].irq_name, (void *)mport); + if (err) + free_irq( + priv->msix[TSI721_VECT_IDB].vector, + (void *)mport); +out: + return err; +} + +/** + * tsi721_enable_msix - Attempts to enable MSI-X support for Tsi721. + * @priv: pointer to tsi721 private data + * + * Configures MSI-X support for Tsi721. Supports only an exact number + * of requested vectors. + */ +static int tsi721_enable_msix(struct tsi721_device *priv) +{ + struct msix_entry entries[TSI721_VECT_MAX]; + int err; + int i; + + entries[TSI721_VECT_IDB].entry = TSI721_MSIX_SR2PC_IDBQ_RCV(IDB_QUEUE); + entries[TSI721_VECT_PWRX].entry = TSI721_MSIX_SRIO_MAC_INT; + + /* + * Initialize MSI-X entries for Messaging Engine: + * this driver supports four RIO mailboxes (inbound and outbound) + * NOTE: Inbound message MBOX 0...4 use IB channels 4...7. Therefore + * offset +4 is added to IB MBOX number. + */ + for (i = 0; i < RIO_MAX_MBOX; i++) { + entries[TSI721_VECT_IMB0_RCV + i].entry = + TSI721_MSIX_IMSG_DQ_RCV(i + 4); + entries[TSI721_VECT_IMB0_INT + i].entry = + TSI721_MSIX_IMSG_INT(i + 4); + entries[TSI721_VECT_OMB0_DONE + i].entry = + TSI721_MSIX_OMSG_DONE(i); + entries[TSI721_VECT_OMB0_INT + i].entry = + TSI721_MSIX_OMSG_INT(i); + } + + err = pci_enable_msix(priv->pdev, entries, ARRAY_SIZE(entries)); + if (err) { + if (err > 0) + dev_info(&priv->pdev->dev, + "Only %d MSI-X vectors available, " + "not using MSI-X\n", err); + return err; + } + + /* + * Copy MSI-X vector information into tsi721 private structure + */ + priv->msix[TSI721_VECT_IDB].vector = entries[TSI721_VECT_IDB].vector; + snprintf(priv->msix[TSI721_VECT_IDB].irq_name, IRQ_DEVICE_NAME_MAX, + DRV_NAME "-idb@pci:%s", pci_name(priv->pdev)); + priv->msix[TSI721_VECT_PWRX].vector = entries[TSI721_VECT_PWRX].vector; + snprintf(priv->msix[TSI721_VECT_PWRX].irq_name, IRQ_DEVICE_NAME_MAX, + DRV_NAME "-pwrx@pci:%s", pci_name(priv->pdev)); + + for (i = 0; i < RIO_MAX_MBOX; i++) { + priv->msix[TSI721_VECT_IMB0_RCV + i].vector = + entries[TSI721_VECT_IMB0_RCV + i].vector; + snprintf(priv->msix[TSI721_VECT_IMB0_RCV + i].irq_name, + IRQ_DEVICE_NAME_MAX, DRV_NAME "-imbr%d@pci:%s", + i, pci_name(priv->pdev)); + + priv->msix[TSI721_VECT_IMB0_INT + i].vector = + entries[TSI721_VECT_IMB0_INT + i].vector; + snprintf(priv->msix[TSI721_VECT_IMB0_INT + i].irq_name, + IRQ_DEVICE_NAME_MAX, DRV_NAME "-imbi%d@pci:%s", + i, pci_name(priv->pdev)); + + priv->msix[TSI721_VECT_OMB0_DONE + i].vector = + entries[TSI721_VECT_OMB0_DONE + i].vector; + snprintf(priv->msix[TSI721_VECT_OMB0_DONE + i].irq_name, + IRQ_DEVICE_NAME_MAX, DRV_NAME "-ombd%d@pci:%s", + i, pci_name(priv->pdev)); + + priv->msix[TSI721_VECT_OMB0_INT + i].vector = + entries[TSI721_VECT_OMB0_INT + i].vector; + snprintf(priv->msix[TSI721_VECT_OMB0_INT + i].irq_name, + IRQ_DEVICE_NAME_MAX, DRV_NAME "-ombi%d@pci:%s", + i, pci_name(priv->pdev)); + } + + return 0; +} +#endif /* CONFIG_PCI_MSI */ + +static int tsi721_request_irq(struct rio_mport *mport) +{ + struct tsi721_device *priv = mport->priv; + int err; + +#ifdef CONFIG_PCI_MSI + if (priv->flags & TSI721_USING_MSIX) + err = tsi721_request_msix(mport); + else +#endif + err = request_irq(priv->pdev->irq, tsi721_irqhandler, + (priv->flags & TSI721_USING_MSI) ? 0 : IRQF_SHARED, + DRV_NAME, (void *)mport); + + if (err) + dev_err(&priv->pdev->dev, + "Unable to allocate interrupt, Error: %d\n", err); + + return err; +} + +/** + * tsi721_init_pc2sr_mapping - initializes outbound (PCIe->SRIO) + * translation regions. + * @priv: pointer to tsi721 private data + * + * Disables SREP translation regions. + */ +static void tsi721_init_pc2sr_mapping(struct tsi721_device *priv) +{ + int i; + + /* Disable all PC2SR translation windows */ + for (i = 0; i < TSI721_OBWIN_NUM; i++) + iowrite32(0, priv->regs + TSI721_OBWINLB(i)); +} + +/** + * tsi721_init_sr2pc_mapping - initializes inbound (SRIO->PCIe) + * translation regions. + * @priv: pointer to tsi721 private data + * + * Disables inbound windows. + */ +static void tsi721_init_sr2pc_mapping(struct tsi721_device *priv) +{ + int i; + + /* Disable all SR2PC inbound windows */ + for (i = 0; i < TSI721_IBWIN_NUM; i++) + iowrite32(0, priv->regs + TSI721_IBWINLB(i)); +} + +/** + * tsi721_port_write_init - Inbound port write interface init + * @priv: pointer to tsi721 private data + * + * Initializes inbound port write handler. + * Returns %0 on success or %-ENOMEM on failure. + */ +static int tsi721_port_write_init(struct tsi721_device *priv) +{ + priv->pw_discard_count = 0; + INIT_WORK(&priv->pw_work, tsi721_pw_dpc); + spin_lock_init(&priv->pw_fifo_lock); + if (kfifo_alloc(&priv->pw_fifo, + TSI721_RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) { + dev_err(&priv->pdev->dev, "PW FIFO allocation failed\n"); + return -ENOMEM; + } + + /* Use reliable port-write capture mode */ + iowrite32(TSI721_RIO_PW_CTL_PWC_REL, priv->regs + TSI721_RIO_PW_CTL); + return 0; +} + +static int tsi721_doorbell_init(struct tsi721_device *priv) +{ + /* Outbound Doorbells do not require any setup. + * Tsi721 uses dedicated PCI BAR1 to generate doorbells. + * That BAR1 was mapped during the probe routine. + */ + + /* Initialize Inbound Doorbell processing DPC and queue */ + priv->db_discard_count = 0; + INIT_WORK(&priv->idb_work, tsi721_db_dpc); + + /* Allocate buffer for inbound doorbells queue */ + priv->idb_base = dma_alloc_coherent(&priv->pdev->dev, + IDB_QSIZE * TSI721_IDB_ENTRY_SIZE, + &priv->idb_dma, GFP_KERNEL); + if (!priv->idb_base) + return -ENOMEM; + + memset(priv->idb_base, 0, IDB_QSIZE * TSI721_IDB_ENTRY_SIZE); + + dev_dbg(&priv->pdev->dev, "Allocated IDB buffer @ %p (phys = %llx)\n", + priv->idb_base, (unsigned long long)priv->idb_dma); + + iowrite32(TSI721_IDQ_SIZE_VAL(IDB_QSIZE), + priv->regs + TSI721_IDQ_SIZE(IDB_QUEUE)); + iowrite32(((u64)priv->idb_dma >> 32), + priv->regs + TSI721_IDQ_BASEU(IDB_QUEUE)); + iowrite32(((u64)priv->idb_dma & TSI721_IDQ_BASEL_ADDR), + priv->regs + TSI721_IDQ_BASEL(IDB_QUEUE)); + /* Enable accepting all inbound doorbells */ + iowrite32(0, priv->regs + TSI721_IDQ_MASK(IDB_QUEUE)); + + iowrite32(TSI721_IDQ_INIT, priv->regs + TSI721_IDQ_CTL(IDB_QUEUE)); + + iowrite32(0, priv->regs + TSI721_IDQ_RP(IDB_QUEUE)); + + return 0; +} + +static void tsi721_doorbell_free(struct tsi721_device *priv) +{ + if (priv->idb_base == NULL) + return; + + /* Free buffer allocated for inbound doorbell queue */ + dma_free_coherent(&priv->pdev->dev, IDB_QSIZE * TSI721_IDB_ENTRY_SIZE, + priv->idb_base, priv->idb_dma); + priv->idb_base = NULL; +} + +static int tsi721_bdma_ch_init(struct tsi721_device *priv, int chnum) +{ + struct tsi721_dma_desc *bd_ptr; + u64 *sts_ptr; + dma_addr_t bd_phys, sts_phys; + int sts_size; + int bd_num = priv->bdma[chnum].bd_num; + + dev_dbg(&priv->pdev->dev, "Init Block DMA Engine, CH%d\n", chnum); + + /* + * Initialize DMA channel for maintenance requests + */ + + /* Allocate space for DMA descriptors */ + bd_ptr = dma_alloc_coherent(&priv->pdev->dev, + bd_num * sizeof(struct tsi721_dma_desc), + &bd_phys, GFP_KERNEL); + if (!bd_ptr) + return -ENOMEM; + + priv->bdma[chnum].bd_phys = bd_phys; + priv->bdma[chnum].bd_base = bd_ptr; + + memset(bd_ptr, 0, bd_num * sizeof(struct tsi721_dma_desc)); + + dev_dbg(&priv->pdev->dev, "DMA descriptors @ %p (phys = %llx)\n", + bd_ptr, (unsigned long long)bd_phys); + + /* Allocate space for descriptor status FIFO */ + sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ? + bd_num : TSI721_DMA_MINSTSSZ; + sts_size = roundup_pow_of_two(sts_size); + sts_ptr = dma_alloc_coherent(&priv->pdev->dev, + sts_size * sizeof(struct tsi721_dma_sts), + &sts_phys, GFP_KERNEL); + if (!sts_ptr) { + /* Free space allocated for DMA descriptors */ + dma_free_coherent(&priv->pdev->dev, + bd_num * sizeof(struct tsi721_dma_desc), + bd_ptr, bd_phys); + priv->bdma[chnum].bd_base = NULL; + return -ENOMEM; + } + + priv->bdma[chnum].sts_phys = sts_phys; + priv->bdma[chnum].sts_base = sts_ptr; + priv->bdma[chnum].sts_size = sts_size; + + memset(sts_ptr, 0, sts_size); + + dev_dbg(&priv->pdev->dev, + "desc status FIFO @ %p (phys = %llx) size=0x%x\n", + sts_ptr, (unsigned long long)sts_phys, sts_size); + + /* Initialize DMA descriptors ring */ + bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29); + bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys & + TSI721_DMAC_DPTRL_MASK); + bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32); + + /* Setup DMA descriptor pointers */ + iowrite32(((u64)bd_phys >> 32), + priv->regs + TSI721_DMAC_DPTRH(chnum)); + iowrite32(((u64)bd_phys & TSI721_DMAC_DPTRL_MASK), + priv->regs + TSI721_DMAC_DPTRL(chnum)); + + /* Setup descriptor status FIFO */ + iowrite32(((u64)sts_phys >> 32), + priv->regs + TSI721_DMAC_DSBH(chnum)); + iowrite32(((u64)sts_phys & TSI721_DMAC_DSBL_MASK), + priv->regs + TSI721_DMAC_DSBL(chnum)); + iowrite32(TSI721_DMAC_DSSZ_SIZE(sts_size), + priv->regs + TSI721_DMAC_DSSZ(chnum)); + + /* Clear interrupt bits */ + iowrite32(TSI721_DMAC_INT_ALL, + priv->regs + TSI721_DMAC_INT(chnum)); + + ioread32(priv->regs + TSI721_DMAC_INT(chnum)); + + /* Toggle DMA channel initialization */ + iowrite32(TSI721_DMAC_CTL_INIT, priv->regs + TSI721_DMAC_CTL(chnum)); + ioread32(priv->regs + TSI721_DMAC_CTL(chnum)); + udelay(10); + + return 0; +} + +static int tsi721_bdma_ch_free(struct tsi721_device *priv, int chnum) +{ + u32 ch_stat; + + if (priv->bdma[chnum].bd_base == NULL) + return 0; + + /* Check if DMA channel still running */ + ch_stat = ioread32(priv->regs + TSI721_DMAC_STS(chnum)); + if (ch_stat & TSI721_DMAC_STS_RUN) + return -EFAULT; + + /* Put DMA channel into init state */ + iowrite32(TSI721_DMAC_CTL_INIT, + priv->regs + TSI721_DMAC_CTL(chnum)); + + /* Free space allocated for DMA descriptors */ + dma_free_coherent(&priv->pdev->dev, + priv->bdma[chnum].bd_num * sizeof(struct tsi721_dma_desc), + priv->bdma[chnum].bd_base, priv->bdma[chnum].bd_phys); + priv->bdma[chnum].bd_base = NULL; + + /* Free space allocated for status FIFO */ + dma_free_coherent(&priv->pdev->dev, + priv->bdma[chnum].sts_size * sizeof(struct tsi721_dma_sts), + priv->bdma[chnum].sts_base, priv->bdma[chnum].sts_phys); + priv->bdma[chnum].sts_base = NULL; + return 0; +} + +static int tsi721_bdma_init(struct tsi721_device *priv) +{ + /* Initialize BDMA channel allocated for RapidIO maintenance read/write + * request generation + */ + priv->bdma[TSI721_DMACH_MAINT].bd_num = 2; + if (tsi721_bdma_ch_init(priv, TSI721_DMACH_MAINT)) { + dev_err(&priv->pdev->dev, "Unable to initialize maintenance DMA" + " channel %d, aborting\n", TSI721_DMACH_MAINT); + return -ENOMEM; + } + + return 0; +} + +static void tsi721_bdma_free(struct tsi721_device *priv) +{ + tsi721_bdma_ch_free(priv, TSI721_DMACH_MAINT); +} + +/* Enable Inbound Messaging Interrupts */ +static void +tsi721_imsg_interrupt_enable(struct tsi721_device *priv, int ch, + u32 inte_mask) +{ + u32 rval; + + if (!inte_mask) + return; + + /* Clear pending Inbound Messaging interrupts */ + iowrite32(inte_mask, priv->regs + TSI721_IBDMAC_INT(ch)); + + /* Enable Inbound Messaging interrupts */ + rval = ioread32(priv->regs + TSI721_IBDMAC_INTE(ch)); + iowrite32(rval | inte_mask, priv->regs + TSI721_IBDMAC_INTE(ch)); + + if (priv->flags & TSI721_USING_MSIX) + return; /* Finished if we are in MSI-X mode */ + + /* + * For MSI and INTA interrupt signalling we need to enable next levels + */ + + /* Enable Device Channel Interrupt */ + rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE); + iowrite32(rval | TSI721_INT_IMSG_CHAN(ch), + priv->regs + TSI721_DEV_CHAN_INTE); +} + +/* Disable Inbound Messaging Interrupts */ +static void +tsi721_imsg_interrupt_disable(struct tsi721_device *priv, int ch, + u32 inte_mask) +{ + u32 rval; + + if (!inte_mask) + return; + + /* Clear pending Inbound Messaging interrupts */ + iowrite32(inte_mask, priv->regs + TSI721_IBDMAC_INT(ch)); + + /* Disable Inbound Messaging interrupts */ + rval = ioread32(priv->regs + TSI721_IBDMAC_INTE(ch)); + rval &= ~inte_mask; + iowrite32(rval, priv->regs + TSI721_IBDMAC_INTE(ch)); + + if (priv->flags & TSI721_USING_MSIX) + return; /* Finished if we are in MSI-X mode */ + + /* + * For MSI and INTA interrupt signalling we need to disable next levels + */ + + /* Disable Device Channel Interrupt */ + rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE); + rval &= ~TSI721_INT_IMSG_CHAN(ch); + iowrite32(rval, priv->regs + TSI721_DEV_CHAN_INTE); +} + +/* Enable Outbound Messaging interrupts */ +static void +tsi721_omsg_interrupt_enable(struct tsi721_device *priv, int ch, + u32 inte_mask) +{ + u32 rval; + + if (!inte_mask) + return; + + /* Clear pending Outbound Messaging interrupts */ + iowrite32(inte_mask, priv->regs + TSI721_OBDMAC_INT(ch)); + + /* Enable Outbound Messaging channel interrupts */ + rval = ioread32(priv->regs + TSI721_OBDMAC_INTE(ch)); + iowrite32(rval | inte_mask, priv->regs + TSI721_OBDMAC_INTE(ch)); + + if (priv->flags & TSI721_USING_MSIX) + return; /* Finished if we are in MSI-X mode */ + + /* + * For MSI and INTA interrupt signalling we need to enable next levels + */ + + /* Enable Device Channel Interrupt */ + rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE); + iowrite32(rval | TSI721_INT_OMSG_CHAN(ch), + priv->regs + TSI721_DEV_CHAN_INTE); +} + +/* Disable Outbound Messaging interrupts */ +static void +tsi721_omsg_interrupt_disable(struct tsi721_device *priv, int ch, + u32 inte_mask) +{ + u32 rval; + + if (!inte_mask) + return; + + /* Clear pending Outbound Messaging interrupts */ + iowrite32(inte_mask, priv->regs + TSI721_OBDMAC_INT(ch)); + + /* Disable Outbound Messaging interrupts */ + rval = ioread32(priv->regs + TSI721_OBDMAC_INTE(ch)); + rval &= ~inte_mask; + iowrite32(rval, priv->regs + TSI721_OBDMAC_INTE(ch)); + + if (priv->flags & TSI721_USING_MSIX) + return; /* Finished if we are in MSI-X mode */ + + /* + * For MSI and INTA interrupt signalling we need to disable next levels + */ + + /* Disable Device Channel Interrupt */ + rval = ioread32(priv->regs + TSI721_DEV_CHAN_INTE); + rval &= ~TSI721_INT_OMSG_CHAN(ch); + iowrite32(rval, priv->regs + TSI721_DEV_CHAN_INTE); +} + +/** + * tsi721_add_outb_message - Add message to the Tsi721 outbound message queue + * @mport: Master port with outbound message queue + * @rdev: Target of outbound message + * @mbox: Outbound mailbox + * @buffer: Message to add to outbound queue + * @len: Length of message + */ +static int +tsi721_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox, + void *buffer, size_t len) +{ + struct tsi721_device *priv = mport->priv; + struct tsi721_omsg_desc *desc; + u32 tx_slot; + + if (!priv->omsg_init[mbox] || + len > TSI721_MSG_MAX_SIZE || len < 8) + return -EINVAL; + + tx_slot = priv->omsg_ring[mbox].tx_slot; + + /* Copy copy message into transfer buffer */ + memcpy(priv->omsg_ring[mbox].omq_base[tx_slot], buffer, len); + + if (len & 0x7) + len += 8; + + /* Build descriptor associated with buffer */ + desc = priv->omsg_ring[mbox].omd_base; + desc[tx_slot].type_id = cpu_to_le32((DTYPE4 << 29) | rdev->destid); + if (tx_slot % 4 == 0) + desc[tx_slot].type_id |= cpu_to_le32(TSI721_OMD_IOF); + + desc[tx_slot].msg_info = + cpu_to_le32((mport->sys_size << 26) | (mbox << 22) | + (0xe << 12) | (len & 0xff8)); + desc[tx_slot].bufptr_lo = + cpu_to_le32((u64)priv->omsg_ring[mbox].omq_phys[tx_slot] & + 0xffffffff); + desc[tx_slot].bufptr_hi = + cpu_to_le32((u64)priv->omsg_ring[mbox].omq_phys[tx_slot] >> 32); + + priv->omsg_ring[mbox].wr_count++; + + /* Go to next descriptor */ + if (++priv->omsg_ring[mbox].tx_slot == priv->omsg_ring[mbox].size) { + priv->omsg_ring[mbox].tx_slot = 0; + /* Move through the ring link descriptor at the end */ + priv->omsg_ring[mbox].wr_count++; + } + + mb(); + + /* Set new write count value */ + iowrite32(priv->omsg_ring[mbox].wr_count, + priv->regs + TSI721_OBDMAC_DWRCNT(mbox)); + ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox)); + + return 0; +} + +/** + * tsi721_omsg_handler - Outbound Message Interrupt Handler + * @priv: pointer to tsi721 private data + * @ch: number of OB MSG channel to service + * + * Services channel interrupts from outbound messaging engine. + */ +static void tsi721_omsg_handler(struct tsi721_device *priv, int ch) +{ + u32 omsg_int; + + spin_lock(&priv->omsg_ring[ch].lock); + + omsg_int = ioread32(priv->regs + TSI721_OBDMAC_INT(ch)); + + if (omsg_int & TSI721_OBDMAC_INT_ST_FULL) + dev_info(&priv->pdev->dev, + "OB MBOX%d: Status FIFO is full\n", ch); + + if (omsg_int & (TSI721_OBDMAC_INT_DONE | TSI721_OBDMAC_INT_IOF_DONE)) { + u32 srd_ptr; + u64 *sts_ptr, last_ptr = 0, prev_ptr = 0; + int i, j; + u32 tx_slot; + + /* + * Find last successfully processed descriptor + */ + + /* Check and clear descriptor status FIFO entries */ + srd_ptr = priv->omsg_ring[ch].sts_rdptr; + sts_ptr = priv->omsg_ring[ch].sts_base; + j = srd_ptr * 8; + while (sts_ptr[j]) { + for (i = 0; i < 8 && sts_ptr[j]; i++, j++) { + prev_ptr = last_ptr; + last_ptr = le64_to_cpu(sts_ptr[j]); + sts_ptr[j] = 0; + } + + ++srd_ptr; + srd_ptr %= priv->omsg_ring[ch].sts_size; + j = srd_ptr * 8; + } + + if (last_ptr == 0) + goto no_sts_update; + + priv->omsg_ring[ch].sts_rdptr = srd_ptr; + iowrite32(srd_ptr, priv->regs + TSI721_OBDMAC_DSRP(ch)); + + if (!priv->mport->outb_msg[ch].mcback) + goto no_sts_update; + + /* Inform upper layer about transfer completion */ + + tx_slot = (last_ptr - (u64)priv->omsg_ring[ch].omd_phys)/ + sizeof(struct tsi721_omsg_desc); + + /* + * Check if this is a Link Descriptor (LD). + * If yes, ignore LD and use descriptor processed + * before LD. + */ + if (tx_slot == priv->omsg_ring[ch].size) { + if (prev_ptr) + tx_slot = (prev_ptr - + (u64)priv->omsg_ring[ch].omd_phys)/ + sizeof(struct tsi721_omsg_desc); + else + goto no_sts_update; + } + + /* Move slot index to the next message to be sent */ + ++tx_slot; + if (tx_slot == priv->omsg_ring[ch].size) + tx_slot = 0; + BUG_ON(tx_slot >= priv->omsg_ring[ch].size); + priv->mport->outb_msg[ch].mcback(priv->mport, + priv->omsg_ring[ch].dev_id, ch, + tx_slot); + } + +no_sts_update: + + if (omsg_int & TSI721_OBDMAC_INT_ERROR) { + /* + * Outbound message operation aborted due to error, + * reinitialize OB MSG channel + */ + + dev_dbg(&priv->pdev->dev, "OB MSG ABORT ch_stat=%x\n", + ioread32(priv->regs + TSI721_OBDMAC_STS(ch))); + + iowrite32(TSI721_OBDMAC_INT_ERROR, + priv->regs + TSI721_OBDMAC_INT(ch)); + iowrite32(TSI721_OBDMAC_CTL_INIT, + priv->regs + TSI721_OBDMAC_CTL(ch)); + ioread32(priv->regs + TSI721_OBDMAC_CTL(ch)); + + /* Inform upper level to clear all pending tx slots */ + if (priv->mport->outb_msg[ch].mcback) + priv->mport->outb_msg[ch].mcback(priv->mport, + priv->omsg_ring[ch].dev_id, ch, + priv->omsg_ring[ch].tx_slot); + /* Synch tx_slot tracking */ + iowrite32(priv->omsg_ring[ch].tx_slot, + priv->regs + TSI721_OBDMAC_DRDCNT(ch)); + ioread32(priv->regs + TSI721_OBDMAC_DRDCNT(ch)); + priv->omsg_ring[ch].wr_count = priv->omsg_ring[ch].tx_slot; + priv->omsg_ring[ch].sts_rdptr = 0; + } + + /* Clear channel interrupts */ + iowrite32(omsg_int, priv->regs + TSI721_OBDMAC_INT(ch)); + + if (!(priv->flags & TSI721_USING_MSIX)) { + u32 ch_inte; + + /* Re-enable channel interrupts */ + ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE); + ch_inte |= TSI721_INT_OMSG_CHAN(ch); + iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE); + } + + spin_unlock(&priv->omsg_ring[ch].lock); +} + +/** + * tsi721_open_outb_mbox - Initialize Tsi721 outbound mailbox + * @mport: Master port implementing Outbound Messaging Engine + * @dev_id: Device specific pointer to pass on event + * @mbox: Mailbox to open + * @entries: Number of entries in the outbound mailbox ring + */ +static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id, + int mbox, int entries) +{ + struct tsi721_device *priv = mport->priv; + struct tsi721_omsg_desc *bd_ptr; + int i, rc = 0; + + if ((entries < TSI721_OMSGD_MIN_RING_SIZE) || + (entries > (TSI721_OMSGD_RING_SIZE)) || + (!is_power_of_2(entries)) || mbox >= RIO_MAX_MBOX) { + rc = -EINVAL; + goto out; + } + + priv->omsg_ring[mbox].dev_id = dev_id; + priv->omsg_ring[mbox].size = entries; + priv->omsg_ring[mbox].sts_rdptr = 0; + spin_lock_init(&priv->omsg_ring[mbox].lock); + + /* Outbound Msg Buffer allocation based on + the number of maximum descriptor entries */ + for (i = 0; i < entries; i++) { + priv->omsg_ring[mbox].omq_base[i] = + dma_alloc_coherent( + &priv->pdev->dev, TSI721_MSG_BUFFER_SIZE, + &priv->omsg_ring[mbox].omq_phys[i], + GFP_KERNEL); + if (priv->omsg_ring[mbox].omq_base[i] == NULL) { + dev_dbg(&priv->pdev->dev, + "Unable to allocate OB MSG data buffer for" + " MBOX%d\n", mbox); + rc = -ENOMEM; + goto out_buf; + } + } + + /* Outbound message descriptor allocation */ + priv->omsg_ring[mbox].omd_base = dma_alloc_coherent( + &priv->pdev->dev, + (entries + 1) * sizeof(struct tsi721_omsg_desc), + &priv->omsg_ring[mbox].omd_phys, GFP_KERNEL); + if (priv->omsg_ring[mbox].omd_base == NULL) { + dev_dbg(&priv->pdev->dev, + "Unable to allocate OB MSG descriptor memory " + "for MBOX%d\n", mbox); + rc = -ENOMEM; + goto out_buf; + } + + priv->omsg_ring[mbox].tx_slot = 0; + + /* Outbound message descriptor status FIFO allocation */ + priv->omsg_ring[mbox].sts_size = roundup_pow_of_two(entries + 1); + priv->omsg_ring[mbox].sts_base = dma_alloc_coherent(&priv->pdev->dev, + priv->omsg_ring[mbox].sts_size * + sizeof(struct tsi721_dma_sts), + &priv->omsg_ring[mbox].sts_phys, GFP_KERNEL); + if (priv->omsg_ring[mbox].sts_base == NULL) { + dev_dbg(&priv->pdev->dev, + "Unable to allocate OB MSG descriptor status FIFO " + "for MBOX%d\n", mbox); + rc = -ENOMEM; + goto out_desc; + } + + memset(priv->omsg_ring[mbox].sts_base, 0, + entries * sizeof(struct tsi721_dma_sts)); + + /* + * Configure Outbound Messaging Engine + */ + + /* Setup Outbound Message descriptor pointer */ + iowrite32(((u64)priv->omsg_ring[mbox].omd_phys >> 32), + priv->regs + TSI721_OBDMAC_DPTRH(mbox)); + iowrite32(((u64)priv->omsg_ring[mbox].omd_phys & + TSI721_OBDMAC_DPTRL_MASK), + priv->regs + TSI721_OBDMAC_DPTRL(mbox)); + + /* Setup Outbound Message descriptor status FIFO */ + iowrite32(((u64)priv->omsg_ring[mbox].sts_phys >> 32), + priv->regs + TSI721_OBDMAC_DSBH(mbox)); + iowrite32(((u64)priv->omsg_ring[mbox].sts_phys & + TSI721_OBDMAC_DSBL_MASK), + priv->regs + TSI721_OBDMAC_DSBL(mbox)); + iowrite32(TSI721_DMAC_DSSZ_SIZE(priv->omsg_ring[mbox].sts_size), + priv->regs + (u32)TSI721_OBDMAC_DSSZ(mbox)); + + /* Enable interrupts */ + +#ifdef CONFIG_PCI_MSI + if (priv->flags & TSI721_USING_MSIX) { + /* Request interrupt service if we are in MSI-X mode */ + rc = request_irq( + priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector, + tsi721_omsg_msix, 0, + priv->msix[TSI721_VECT_OMB0_DONE + mbox].irq_name, + (void *)mport); + + if (rc) { + dev_dbg(&priv->pdev->dev, + "Unable to allocate MSI-X interrupt for " + "OBOX%d-DONE\n", mbox); + goto out_stat; + } + + rc = request_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector, + tsi721_omsg_msix, 0, + priv->msix[TSI721_VECT_OMB0_INT + mbox].irq_name, + (void *)mport); + + if (rc) { + dev_dbg(&priv->pdev->dev, + "Unable to allocate MSI-X interrupt for " + "MBOX%d-INT\n", mbox); + free_irq( + priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector, + (void *)mport); + goto out_stat; + } + } +#endif /* CONFIG_PCI_MSI */ + + tsi721_omsg_interrupt_enable(priv, mbox, TSI721_OBDMAC_INT_ALL); + + /* Initialize Outbound Message descriptors ring */ + bd_ptr = priv->omsg_ring[mbox].omd_base; + bd_ptr[entries].type_id = cpu_to_le32(DTYPE5 << 29); + bd_ptr[entries].msg_info = 0; + bd_ptr[entries].next_lo = + cpu_to_le32((u64)priv->omsg_ring[mbox].omd_phys & + TSI721_OBDMAC_DPTRL_MASK); + bd_ptr[entries].next_hi = + cpu_to_le32((u64)priv->omsg_ring[mbox].omd_phys >> 32); + priv->omsg_ring[mbox].wr_count = 0; + mb(); + + /* Initialize Outbound Message engine */ + iowrite32(TSI721_OBDMAC_CTL_INIT, priv->regs + TSI721_OBDMAC_CTL(mbox)); + ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox)); + udelay(10); + + priv->omsg_init[mbox] = 1; + + return 0; + +#ifdef CONFIG_PCI_MSI +out_stat: + dma_free_coherent(&priv->pdev->dev, + priv->omsg_ring[mbox].sts_size * sizeof(struct tsi721_dma_sts), + priv->omsg_ring[mbox].sts_base, + priv->omsg_ring[mbox].sts_phys); + + priv->omsg_ring[mbox].sts_base = NULL; +#endif /* CONFIG_PCI_MSI */ + +out_desc: + dma_free_coherent(&priv->pdev->dev, + (entries + 1) * sizeof(struct tsi721_omsg_desc), + priv->omsg_ring[mbox].omd_base, + priv->omsg_ring[mbox].omd_phys); + + priv->omsg_ring[mbox].omd_base = NULL; + +out_buf: + for (i = 0; i < priv->omsg_ring[mbox].size; i++) { + if (priv->omsg_ring[mbox].omq_base[i]) { + dma_free_coherent(&priv->pdev->dev, + TSI721_MSG_BUFFER_SIZE, + priv->omsg_ring[mbox].omq_base[i], + priv->omsg_ring[mbox].omq_phys[i]); + + priv->omsg_ring[mbox].omq_base[i] = NULL; + } + } + +out: + return rc; +} + +/** + * tsi721_close_outb_mbox - Close Tsi721 outbound mailbox + * @mport: Master port implementing the outbound message unit + * @mbox: Mailbox to close + */ +static void tsi721_close_outb_mbox(struct rio_mport *mport, int mbox) +{ + struct tsi721_device *priv = mport->priv; + u32 i; + + if (!priv->omsg_init[mbox]) + return; + priv->omsg_init[mbox] = 0; + + /* Disable Interrupts */ + + tsi721_omsg_interrupt_disable(priv, mbox, TSI721_OBDMAC_INT_ALL); + +#ifdef CONFIG_PCI_MSI + if (priv->flags & TSI721_USING_MSIX) { + free_irq(priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector, + (void *)mport); + free_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector, + (void *)mport); + } +#endif /* CONFIG_PCI_MSI */ + + /* Free OMSG Descriptor Status FIFO */ + dma_free_coherent(&priv->pdev->dev, + priv->omsg_ring[mbox].sts_size * sizeof(struct tsi721_dma_sts), + priv->omsg_ring[mbox].sts_base, + priv->omsg_ring[mbox].sts_phys); + + priv->omsg_ring[mbox].sts_base = NULL; + + /* Free OMSG descriptors */ + dma_free_coherent(&priv->pdev->dev, + (priv->omsg_ring[mbox].size + 1) * + sizeof(struct tsi721_omsg_desc), + priv->omsg_ring[mbox].omd_base, + priv->omsg_ring[mbox].omd_phys); + + priv->omsg_ring[mbox].omd_base = NULL; + + /* Free message buffers */ + for (i = 0; i < priv->omsg_ring[mbox].size; i++) { + if (priv->omsg_ring[mbox].omq_base[i]) { + dma_free_coherent(&priv->pdev->dev, + TSI721_MSG_BUFFER_SIZE, + priv->omsg_ring[mbox].omq_base[i], + priv->omsg_ring[mbox].omq_phys[i]); + + priv->omsg_ring[mbox].omq_base[i] = NULL; + } + } +} + +/** + * tsi721_imsg_handler - Inbound Message Interrupt Handler + * @priv: pointer to tsi721 private data + * @ch: inbound message channel number to service + * + * Services channel interrupts from inbound messaging engine. + */ +static void tsi721_imsg_handler(struct tsi721_device *priv, int ch) +{ + u32 mbox = ch - 4; + u32 imsg_int; + + spin_lock(&priv->imsg_ring[mbox].lock); + + imsg_int = ioread32(priv->regs + TSI721_IBDMAC_INT(ch)); + + if (imsg_int & TSI721_IBDMAC_INT_SRTO) + dev_info(&priv->pdev->dev, "IB MBOX%d SRIO timeout\n", + mbox); + + if (imsg_int & TSI721_IBDMAC_INT_PC_ERROR) + dev_info(&priv->pdev->dev, "IB MBOX%d PCIe error\n", + mbox); + + if (imsg_int & TSI721_IBDMAC_INT_FQ_LOW) + dev_info(&priv->pdev->dev, + "IB MBOX%d IB free queue low\n", mbox); + + /* Clear IB channel interrupts */ + iowrite32(imsg_int, priv->regs + TSI721_IBDMAC_INT(ch)); + + /* If an IB Msg is received notify the upper layer */ + if (imsg_int & TSI721_IBDMAC_INT_DQ_RCV && + priv->mport->inb_msg[mbox].mcback) + priv->mport->inb_msg[mbox].mcback(priv->mport, + priv->imsg_ring[mbox].dev_id, mbox, -1); + + if (!(priv->flags & TSI721_USING_MSIX)) { + u32 ch_inte; + + /* Re-enable channel interrupts */ + ch_inte = ioread32(priv->regs + TSI721_DEV_CHAN_INTE); + ch_inte |= TSI721_INT_IMSG_CHAN(ch); + iowrite32(ch_inte, priv->regs + TSI721_DEV_CHAN_INTE); + } + + spin_unlock(&priv->imsg_ring[mbox].lock); +} + +/** + * tsi721_open_inb_mbox - Initialize Tsi721 inbound mailbox + * @mport: Master port implementing the Inbound Messaging Engine + * @dev_id: Device specific pointer to pass on event + * @mbox: Mailbox to open + * @entries: Number of entries in the inbound mailbox ring + */ +static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id, + int mbox, int entries) +{ + struct tsi721_device *priv = mport->priv; + int ch = mbox + 4; + int i; + u64 *free_ptr; + int rc = 0; + + if ((entries < TSI721_IMSGD_MIN_RING_SIZE) || + (entries > TSI721_IMSGD_RING_SIZE) || + (!is_power_of_2(entries)) || mbox >= RIO_MAX_MBOX) { + rc = -EINVAL; + goto out; + } + + /* Initialize IB Messaging Ring */ + priv->imsg_ring[mbox].dev_id = dev_id; + priv->imsg_ring[mbox].size = entries; + priv->imsg_ring[mbox].rx_slot = 0; + priv->imsg_ring[mbox].desc_rdptr = 0; + priv->imsg_ring[mbox].fq_wrptr = 0; + for (i = 0; i < priv->imsg_ring[mbox].size; i++) + priv->imsg_ring[mbox].imq_base[i] = NULL; + spin_lock_init(&priv->imsg_ring[mbox].lock); + + /* Allocate buffers for incoming messages */ + priv->imsg_ring[mbox].buf_base = + dma_alloc_coherent(&priv->pdev->dev, + entries * TSI721_MSG_BUFFER_SIZE, + &priv->imsg_ring[mbox].buf_phys, + GFP_KERNEL); + + if (priv->imsg_ring[mbox].buf_base == NULL) { + dev_err(&priv->pdev->dev, + "Failed to allocate buffers for IB MBOX%d\n", mbox); + rc = -ENOMEM; + goto out; + } + + /* Allocate memory for circular free list */ + priv->imsg_ring[mbox].imfq_base = + dma_alloc_coherent(&priv->pdev->dev, + entries * 8, + &priv->imsg_ring[mbox].imfq_phys, + GFP_KERNEL); + + if (priv->imsg_ring[mbox].imfq_base == NULL) { + dev_err(&priv->pdev->dev, + "Failed to allocate free queue for IB MBOX%d\n", mbox); + rc = -ENOMEM; + goto out_buf; + } + + /* Allocate memory for Inbound message descriptors */ + priv->imsg_ring[mbox].imd_base = + dma_alloc_coherent(&priv->pdev->dev, + entries * sizeof(struct tsi721_imsg_desc), + &priv->imsg_ring[mbox].imd_phys, GFP_KERNEL); + + if (priv->imsg_ring[mbox].imd_base == NULL) { + dev_err(&priv->pdev->dev, + "Failed to allocate descriptor memory for IB MBOX%d\n", + mbox); + rc = -ENOMEM; + goto out_dma; + } + + /* Fill free buffer pointer list */ + free_ptr = priv->imsg_ring[mbox].imfq_base; + for (i = 0; i < entries; i++) + free_ptr[i] = cpu_to_le64( + (u64)(priv->imsg_ring[mbox].buf_phys) + + i * 0x1000); + + mb(); + + /* + * For mapping of inbound SRIO Messages into appropriate queues we need + * to set Inbound Device ID register in the messaging engine. We do it + * once when first inbound mailbox is requested. + */ + if (!(priv->flags & TSI721_IMSGID_SET)) { + iowrite32((u32)priv->mport->host_deviceid, + priv->regs + TSI721_IB_DEVID); + priv->flags |= TSI721_IMSGID_SET; + } + + /* + * Configure Inbound Messaging channel (ch = mbox + 4) + */ + + /* Setup Inbound Message free queue */ + iowrite32(((u64)priv->imsg_ring[mbox].imfq_phys >> 32), + priv->regs + TSI721_IBDMAC_FQBH(ch)); + iowrite32(((u64)priv->imsg_ring[mbox].imfq_phys & + TSI721_IBDMAC_FQBL_MASK), + priv->regs+TSI721_IBDMAC_FQBL(ch)); + iowrite32(TSI721_DMAC_DSSZ_SIZE(entries), + priv->regs + TSI721_IBDMAC_FQSZ(ch)); + + /* Setup Inbound Message descriptor queue */ + iowrite32(((u64)priv->imsg_ring[mbox].imd_phys >> 32), + priv->regs + TSI721_IBDMAC_DQBH(ch)); + iowrite32(((u32)priv->imsg_ring[mbox].imd_phys & + (u32)TSI721_IBDMAC_DQBL_MASK), + priv->regs+TSI721_IBDMAC_DQBL(ch)); + iowrite32(TSI721_DMAC_DSSZ_SIZE(entries), + priv->regs + TSI721_IBDMAC_DQSZ(ch)); + + /* Enable interrupts */ + +#ifdef CONFIG_PCI_MSI + if (priv->flags & TSI721_USING_MSIX) { + /* Request interrupt service if we are in MSI-X mode */ + rc = request_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector, + tsi721_imsg_msix, 0, + priv->msix[TSI721_VECT_IMB0_RCV + mbox].irq_name, + (void *)mport); + + if (rc) { + dev_dbg(&priv->pdev->dev, + "Unable to allocate MSI-X interrupt for " + "IBOX%d-DONE\n", mbox); + goto out_desc; + } + + rc = request_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector, + tsi721_imsg_msix, 0, + priv->msix[TSI721_VECT_IMB0_INT + mbox].irq_name, + (void *)mport); + + if (rc) { + dev_dbg(&priv->pdev->dev, + "Unable to allocate MSI-X interrupt for " + "IBOX%d-INT\n", mbox); + free_irq( + priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector, + (void *)mport); + goto out_desc; + } + } +#endif /* CONFIG_PCI_MSI */ + + tsi721_imsg_interrupt_enable(priv, ch, TSI721_IBDMAC_INT_ALL); + + /* Initialize Inbound Message Engine */ + iowrite32(TSI721_IBDMAC_CTL_INIT, priv->regs + TSI721_IBDMAC_CTL(ch)); + ioread32(priv->regs + TSI721_IBDMAC_CTL(ch)); + udelay(10); + priv->imsg_ring[mbox].fq_wrptr = entries - 1; + iowrite32(entries - 1, priv->regs + TSI721_IBDMAC_FQWP(ch)); + + priv->imsg_init[mbox] = 1; + return 0; + +#ifdef CONFIG_PCI_MSI +out_desc: + dma_free_coherent(&priv->pdev->dev, + priv->imsg_ring[mbox].size * sizeof(struct tsi721_imsg_desc), + priv->imsg_ring[mbox].imd_base, + priv->imsg_ring[mbox].imd_phys); + + priv->imsg_ring[mbox].imd_base = NULL; +#endif /* CONFIG_PCI_MSI */ + +out_dma: + dma_free_coherent(&priv->pdev->dev, + priv->imsg_ring[mbox].size * 8, + priv->imsg_ring[mbox].imfq_base, + priv->imsg_ring[mbox].imfq_phys); + + priv->imsg_ring[mbox].imfq_base = NULL; + +out_buf: + dma_free_coherent(&priv->pdev->dev, + priv->imsg_ring[mbox].size * TSI721_MSG_BUFFER_SIZE, + priv->imsg_ring[mbox].buf_base, + priv->imsg_ring[mbox].buf_phys); + + priv->imsg_ring[mbox].buf_base = NULL; + +out: + return rc; +} + +/** + * tsi721_close_inb_mbox - Shut down Tsi721 inbound mailbox + * @mport: Master port implementing the Inbound Messaging Engine + * @mbox: Mailbox to close + */ +static void tsi721_close_inb_mbox(struct rio_mport *mport, int mbox) +{ + struct tsi721_device *priv = mport->priv; + u32 rx_slot; + int ch = mbox + 4; + + if (!priv->imsg_init[mbox]) /* mbox isn't initialized yet */ + return; + priv->imsg_init[mbox] = 0; + + /* Disable Inbound Messaging Engine */ + + /* Disable Interrupts */ + tsi721_imsg_interrupt_disable(priv, ch, TSI721_OBDMAC_INT_MASK); + +#ifdef CONFIG_PCI_MSI + if (priv->flags & TSI721_USING_MSIX) { + free_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector, + (void *)mport); + free_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector, + (void *)mport); + } +#endif /* CONFIG_PCI_MSI */ + + /* Clear Inbound Buffer Queue */ + for (rx_slot = 0; rx_slot < priv->imsg_ring[mbox].size; rx_slot++) + priv->imsg_ring[mbox].imq_base[rx_slot] = NULL; + + /* Free memory allocated for message buffers */ + dma_free_coherent(&priv->pdev->dev, + priv->imsg_ring[mbox].size * TSI721_MSG_BUFFER_SIZE, + priv->imsg_ring[mbox].buf_base, + priv->imsg_ring[mbox].buf_phys); + + priv->imsg_ring[mbox].buf_base = NULL; + + /* Free memory allocated for free pointr list */ + dma_free_coherent(&priv->pdev->dev, + priv->imsg_ring[mbox].size * 8, + priv->imsg_ring[mbox].imfq_base, + priv->imsg_ring[mbox].imfq_phys); + + priv->imsg_ring[mbox].imfq_base = NULL; + + /* Free memory allocated for RX descriptors */ + dma_free_coherent(&priv->pdev->dev, + priv->imsg_ring[mbox].size * sizeof(struct tsi721_imsg_desc), + priv->imsg_ring[mbox].imd_base, + priv->imsg_ring[mbox].imd_phys); + + priv->imsg_ring[mbox].imd_base = NULL; +} + +/** + * tsi721_add_inb_buffer - Add buffer to the Tsi721 inbound message queue + * @mport: Master port implementing the Inbound Messaging Engine + * @mbox: Inbound mailbox number + * @buf: Buffer to add to inbound queue + */ +static int tsi721_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf) +{ + struct tsi721_device *priv = mport->priv; + u32 rx_slot; + int rc = 0; + + rx_slot = priv->imsg_ring[mbox].rx_slot; + if (priv->imsg_ring[mbox].imq_base[rx_slot]) { + dev_err(&priv->pdev->dev, + "Error adding inbound buffer %d, buffer exists\n", + rx_slot); + rc = -EINVAL; + goto out; + } + + priv->imsg_ring[mbox].imq_base[rx_slot] = buf; + + if (++priv->imsg_ring[mbox].rx_slot == priv->imsg_ring[mbox].size) + priv->imsg_ring[mbox].rx_slot = 0; + +out: + return rc; +} + +/** + * tsi721_get_inb_message - Fetch inbound message from the Tsi721 MSG Queue + * @mport: Master port implementing the Inbound Messaging Engine + * @mbox: Inbound mailbox number + * + * Returns pointer to the message on success or NULL on failure. + */ +static void *tsi721_get_inb_message(struct rio_mport *mport, int mbox) +{ + struct tsi721_device *priv = mport->priv; + struct tsi721_imsg_desc *desc; + u32 rx_slot; + void *rx_virt = NULL; + u64 rx_phys; + void *buf = NULL; + u64 *free_ptr; + int ch = mbox + 4; + int msg_size; + + if (!priv->imsg_init[mbox]) + return NULL; + + desc = priv->imsg_ring[mbox].imd_base; + desc += priv->imsg_ring[mbox].desc_rdptr; + + if (!(le32_to_cpu(desc->msg_info) & TSI721_IMD_HO)) + goto out; + + rx_slot = priv->imsg_ring[mbox].rx_slot; + while (priv->imsg_ring[mbox].imq_base[rx_slot] == NULL) { + if (++rx_slot == priv->imsg_ring[mbox].size) + rx_slot = 0; + } + + rx_phys = ((u64)le32_to_cpu(desc->bufptr_hi) << 32) | + le32_to_cpu(desc->bufptr_lo); + + rx_virt = priv->imsg_ring[mbox].buf_base + + (rx_phys - (u64)priv->imsg_ring[mbox].buf_phys); + + buf = priv->imsg_ring[mbox].imq_base[rx_slot]; + msg_size = le32_to_cpu(desc->msg_info) & TSI721_IMD_BCOUNT; + if (msg_size == 0) + msg_size = RIO_MAX_MSG_SIZE; + + memcpy(buf, rx_virt, msg_size); + priv->imsg_ring[mbox].imq_base[rx_slot] = NULL; + + desc->msg_info &= cpu_to_le32(~TSI721_IMD_HO); + if (++priv->imsg_ring[mbox].desc_rdptr == priv->imsg_ring[mbox].size) + priv->imsg_ring[mbox].desc_rdptr = 0; + + iowrite32(priv->imsg_ring[mbox].desc_rdptr, + priv->regs + TSI721_IBDMAC_DQRP(ch)); + + /* Return free buffer into the pointer list */ + free_ptr = priv->imsg_ring[mbox].imfq_base; + free_ptr[priv->imsg_ring[mbox].fq_wrptr] = cpu_to_le64(rx_phys); + + if (++priv->imsg_ring[mbox].fq_wrptr == priv->imsg_ring[mbox].size) + priv->imsg_ring[mbox].fq_wrptr = 0; + + iowrite32(priv->imsg_ring[mbox].fq_wrptr, + priv->regs + TSI721_IBDMAC_FQWP(ch)); +out: + return buf; +} + +/** + * tsi721_messages_init - Initialization of Messaging Engine + * @priv: pointer to tsi721 private data + * + * Configures Tsi721 messaging engine. + */ +static int tsi721_messages_init(struct tsi721_device *priv) +{ + int ch; + + iowrite32(0, priv->regs + TSI721_SMSG_ECC_LOG); + iowrite32(0, priv->regs + TSI721_RETRY_GEN_CNT); + iowrite32(0, priv->regs + TSI721_RETRY_RX_CNT); + + /* Set SRIO Message Request/Response Timeout */ + iowrite32(TSI721_RQRPTO_VAL, priv->regs + TSI721_RQRPTO); + + /* Initialize Inbound Messaging Engine Registers */ + for (ch = 0; ch < TSI721_IMSG_CHNUM; ch++) { + /* Clear interrupt bits */ + iowrite32(TSI721_IBDMAC_INT_MASK, + priv->regs + TSI721_IBDMAC_INT(ch)); + /* Clear Status */ + iowrite32(0, priv->regs + TSI721_IBDMAC_STS(ch)); + + iowrite32(TSI721_SMSG_ECC_COR_LOG_MASK, + priv->regs + TSI721_SMSG_ECC_COR_LOG(ch)); + iowrite32(TSI721_SMSG_ECC_NCOR_MASK, + priv->regs + TSI721_SMSG_ECC_NCOR(ch)); + } + + return 0; +} + +/** + * tsi721_disable_ints - disables all device interrupts + * @priv: pointer to tsi721 private data + */ +static void tsi721_disable_ints(struct tsi721_device *priv) +{ + int ch; + + /* Disable all device level interrupts */ + iowrite32(0, priv->regs + TSI721_DEV_INTE); + + /* Disable all Device Channel interrupts */ + iowrite32(0, priv->regs + TSI721_DEV_CHAN_INTE); + + /* Disable all Inbound Msg Channel interrupts */ + for (ch = 0; ch < TSI721_IMSG_CHNUM; ch++) + iowrite32(0, priv->regs + TSI721_IBDMAC_INTE(ch)); + + /* Disable all Outbound Msg Channel interrupts */ + for (ch = 0; ch < TSI721_OMSG_CHNUM; ch++) + iowrite32(0, priv->regs + TSI721_OBDMAC_INTE(ch)); + + /* Disable all general messaging interrupts */ + iowrite32(0, priv->regs + TSI721_SMSG_INTE); + + /* Disable all BDMA Channel interrupts */ + for (ch = 0; ch < TSI721_DMA_MAXCH; ch++) + iowrite32(0, priv->regs + TSI721_DMAC_INTE(ch)); + + /* Disable all general BDMA interrupts */ + iowrite32(0, priv->regs + TSI721_BDMA_INTE); + + /* Disable all SRIO Channel interrupts */ + for (ch = 0; ch < TSI721_SRIO_MAXCH; ch++) + iowrite32(0, priv->regs + TSI721_SR_CHINTE(ch)); + + /* Disable all general SR2PC interrupts */ + iowrite32(0, priv->regs + TSI721_SR2PC_GEN_INTE); + + /* Disable all PC2SR interrupts */ + iowrite32(0, priv->regs + TSI721_PC2SR_INTE); + + /* Disable all I2C interrupts */ + iowrite32(0, priv->regs + TSI721_I2C_INT_ENABLE); + + /* Disable SRIO MAC interrupts */ + iowrite32(0, priv->regs + TSI721_RIO_EM_INT_ENABLE); + iowrite32(0, priv->regs + TSI721_RIO_EM_DEV_INT_EN); +} + +/** + * tsi721_setup_mport - Setup Tsi721 as RapidIO subsystem master port + * @priv: pointer to tsi721 private data + * + * Configures Tsi721 as RapidIO master port. + */ +static int __devinit tsi721_setup_mport(struct tsi721_device *priv) +{ + struct pci_dev *pdev = priv->pdev; + int err = 0; + struct rio_ops *ops; + + struct rio_mport *mport; + + ops = kzalloc(sizeof(struct rio_ops), GFP_KERNEL); + if (!ops) { + dev_dbg(&pdev->dev, "Unable to allocate memory for rio_ops\n"); + return -ENOMEM; + } + + ops->lcread = tsi721_lcread; + ops->lcwrite = tsi721_lcwrite; + ops->cread = tsi721_cread_dma; + ops->cwrite = tsi721_cwrite_dma; + ops->dsend = tsi721_dsend; + ops->open_inb_mbox = tsi721_open_inb_mbox; + ops->close_inb_mbox = tsi721_close_inb_mbox; + ops->open_outb_mbox = tsi721_open_outb_mbox; + ops->close_outb_mbox = tsi721_close_outb_mbox; + ops->add_outb_message = tsi721_add_outb_message; + ops->add_inb_buffer = tsi721_add_inb_buffer; + ops->get_inb_message = tsi721_get_inb_message; + + mport = kzalloc(sizeof(struct rio_mport), GFP_KERNEL); + if (!mport) { + kfree(ops); + dev_dbg(&pdev->dev, "Unable to allocate memory for mport\n"); + return -ENOMEM; + } + + mport->ops = ops; + mport->index = 0; + mport->sys_size = 0; /* small system */ + mport->phy_type = RIO_PHY_SERIAL; + mport->priv = (void *)priv; + mport->phys_efptr = 0x100; + + INIT_LIST_HEAD(&mport->dbells); + + rio_init_dbell_res(&mport->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff); + rio_init_mbox_res(&mport->riores[RIO_INB_MBOX_RESOURCE], 0, 0); + rio_init_mbox_res(&mport->riores[RIO_OUTB_MBOX_RESOURCE], 0, 0); + strcpy(mport->name, "Tsi721 mport"); + + /* Hook up interrupt handler */ + +#ifdef CONFIG_PCI_MSI + if (!tsi721_enable_msix(priv)) + priv->flags |= TSI721_USING_MSIX; + else if (!pci_enable_msi(pdev)) + priv->flags |= TSI721_USING_MSI; + else + dev_info(&pdev->dev, + "MSI/MSI-X is not available. Using legacy INTx.\n"); +#endif /* CONFIG_PCI_MSI */ + + err = tsi721_request_irq(mport); + + if (!err) { + tsi721_interrupts_init(priv); + ops->pwenable = tsi721_pw_enable; + } else + dev_err(&pdev->dev, "Unable to get assigned PCI IRQ " + "vector %02X err=0x%x\n", pdev->irq, err); + + /* Enable SRIO link */ + iowrite32(ioread32(priv->regs + TSI721_DEVCTL) | + TSI721_DEVCTL_SRBOOT_CMPL, + priv->regs + TSI721_DEVCTL); + + rio_register_mport(mport); + priv->mport = mport; + + if (mport->host_deviceid >= 0) + iowrite32(RIO_PORT_GEN_HOST | RIO_PORT_GEN_MASTER | + RIO_PORT_GEN_DISCOVERED, + priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR)); + else + iowrite32(0, priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR)); + + return 0; +} + +static int __devinit tsi721_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct tsi721_device *priv; + int i; + int err; + u32 regval; + + priv = kzalloc(sizeof(struct tsi721_device), GFP_KERNEL); + if (priv == NULL) { + dev_err(&pdev->dev, "Failed to allocate memory for device\n"); + err = -ENOMEM; + goto err_exit; + } + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Failed to enable PCI device\n"); + goto err_clean; + } + + priv->pdev = pdev; + +#ifdef DEBUG + for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { + dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n", + i, (unsigned long long)pci_resource_start(pdev, i), + (unsigned long)pci_resource_len(pdev, i), + pci_resource_flags(pdev, i)); + } +#endif + /* + * Verify BAR configuration + */ + + /* BAR_0 (registers) must be 512KB+ in 32-bit address space */ + if (!(pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM) || + pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM_64 || + pci_resource_len(pdev, BAR_0) < TSI721_REG_SPACE_SIZE) { + dev_err(&pdev->dev, + "Missing or misconfigured CSR BAR0, aborting.\n"); + err = -ENODEV; + goto err_disable_pdev; + } + + /* BAR_1 (outbound doorbells) must be 16MB+ in 32-bit address space */ + if (!(pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM) || + pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM_64 || + pci_resource_len(pdev, BAR_1) < TSI721_DB_WIN_SIZE) { + dev_err(&pdev->dev, + "Missing or misconfigured Doorbell BAR1, aborting.\n"); + err = -ENODEV; + goto err_disable_pdev; + } + + /* + * BAR_2 and BAR_4 (outbound translation) must be in 64-bit PCIe address + * space. + * NOTE: BAR_2 and BAR_4 are not used by this version of driver. + * It may be a good idea to keep them disabled using HW configuration + * to save PCI memory space. + */ + if ((pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM) && + (pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM_64)) { + dev_info(&pdev->dev, "Outbound BAR2 is not used but enabled.\n"); + } + + if ((pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM) && + (pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM_64)) { + dev_info(&pdev->dev, "Outbound BAR4 is not used but enabled.\n"); + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(&pdev->dev, "Cannot obtain PCI resources, " + "aborting.\n"); + goto err_disable_pdev; + } + + pci_set_master(pdev); + + priv->regs = pci_ioremap_bar(pdev, BAR_0); + if (!priv->regs) { + dev_err(&pdev->dev, + "Unable to map device registers space, aborting\n"); + err = -ENOMEM; + goto err_free_res; + } + + priv->odb_base = pci_ioremap_bar(pdev, BAR_1); + if (!priv->odb_base) { + dev_err(&pdev->dev, + "Unable to map outbound doorbells space, aborting\n"); + err = -ENOMEM; + goto err_unmap_bars; + } + + /* Configure DMA attributes. */ + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { + dev_info(&pdev->dev, "Unable to set DMA mask\n"); + goto err_unmap_bars; + } + + if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) + dev_info(&pdev->dev, "Unable to set consistent DMA mask\n"); + } else { + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + dev_info(&pdev->dev, "Unable to set consistent DMA mask\n"); + } + + /* Clear "no snoop" and "relaxed ordering" bits. */ + pci_read_config_dword(pdev, 0x40 + PCI_EXP_DEVCTL, ®val); + regval &= ~(PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN); + pci_write_config_dword(pdev, 0x40 + PCI_EXP_DEVCTL, regval); + + /* + * FIXUP: correct offsets of MSI-X tables in the MSI-X Capability Block + */ + pci_write_config_dword(pdev, TSI721_PCIECFG_EPCTL, 0x01); + pci_write_config_dword(pdev, TSI721_PCIECFG_MSIXTBL, + TSI721_MSIXTBL_OFFSET); + pci_write_config_dword(pdev, TSI721_PCIECFG_MSIXPBA, + TSI721_MSIXPBA_OFFSET); + pci_write_config_dword(pdev, TSI721_PCIECFG_EPCTL, 0); + /* End of FIXUP */ + + tsi721_disable_ints(priv); + + tsi721_init_pc2sr_mapping(priv); + tsi721_init_sr2pc_mapping(priv); + + if (tsi721_bdma_init(priv)) { + dev_err(&pdev->dev, "BDMA initialization failed, aborting\n"); + err = -ENOMEM; + goto err_unmap_bars; + } + + err = tsi721_doorbell_init(priv); + if (err) + goto err_free_bdma; + + tsi721_port_write_init(priv); + + err = tsi721_messages_init(priv); + if (err) + goto err_free_consistent; + + err = tsi721_setup_mport(priv); + if (err) + goto err_free_consistent; + + return 0; + +err_free_consistent: + tsi721_doorbell_free(priv); +err_free_bdma: + tsi721_bdma_free(priv); +err_unmap_bars: + if (priv->regs) + iounmap(priv->regs); + if (priv->odb_base) + iounmap(priv->odb_base); +err_free_res: + pci_release_regions(pdev); + pci_clear_master(pdev); +err_disable_pdev: + pci_disable_device(pdev); +err_clean: + kfree(priv); +err_exit: + return err; +} + +static DEFINE_PCI_DEVICE_TABLE(tsi721_pci_tbl) = { + { PCI_DEVICE(PCI_VENDOR_ID_IDT, PCI_DEVICE_ID_TSI721) }, + { 0, } /* terminate list */ +}; + +MODULE_DEVICE_TABLE(pci, tsi721_pci_tbl); + +static struct pci_driver tsi721_driver = { + .name = "tsi721", + .id_table = tsi721_pci_tbl, + .probe = tsi721_probe, +}; + +static int __init tsi721_init(void) +{ + return pci_register_driver(&tsi721_driver); +} + +static void __exit tsi721_exit(void) +{ + pci_unregister_driver(&tsi721_driver); +} + +device_initcall(tsi721_init); diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h new file mode 100644 index 0000000..58be4de --- /dev/null +++ b/drivers/rapidio/devices/tsi721.h @@ -0,0 +1,766 @@ +/* + * Tsi721 PCIExpress-to-SRIO bridge definitions + * + * Copyright 2011, Integrated Device Technology, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __TSI721_H +#define __TSI721_H + +#define DRV_NAME "tsi721" + +#define DEFAULT_HOPCOUNT 0xff +#define DEFAULT_DESTID 0xff + +/* PCI device ID */ +#define PCI_DEVICE_ID_TSI721 0x80ab + +#define BAR_0 0 +#define BAR_1 1 +#define BAR_2 2 +#define BAR_4 4 + +#define TSI721_PC2SR_BARS 2 +#define TSI721_PC2SR_WINS 8 +#define TSI721_PC2SR_ZONES 8 +#define TSI721_MAINT_WIN 0 /* Window for outbound maintenance requests */ +#define IDB_QUEUE 0 /* Inbound Doorbell Queue to use */ +#define IDB_QSIZE 512 /* Inbound Doorbell Queue size */ + +/* Memory space sizes */ +#define TSI721_REG_SPACE_SIZE (512 * 1024) /* 512K */ +#define TSI721_DB_WIN_SIZE (16 * 1024 * 1024) /* 16MB */ + +#define RIO_TT_CODE_8 0x00000000 +#define RIO_TT_CODE_16 0x00000001 + +#define TSI721_DMA_MAXCH 8 +#define TSI721_DMA_MINSTSSZ 32 +#define TSI721_DMA_STSBLKSZ 8 + +#define TSI721_SRIO_MAXCH 8 + +#define DBELL_SID(buf) (((u8)buf[2] << 8) | (u8)buf[3]) +#define DBELL_TID(buf) (((u8)buf[4] << 8) | (u8)buf[5]) +#define DBELL_INF(buf) (((u8)buf[0] << 8) | (u8)buf[1]) + +#define TSI721_RIO_PW_MSG_SIZE 16 /* Tsi721 saves only 16 bytes of PW msg */ + +/* Register definitions */ + +/* + * Registers in PCIe configuration space + */ + +#define TSI721_PCIECFG_MSIXTBL 0x0a4 +#define TSI721_MSIXTBL_OFFSET 0x2c000 +#define TSI721_PCIECFG_MSIXPBA 0x0a8 +#define TSI721_MSIXPBA_OFFSET 0x2a000 +#define TSI721_PCIECFG_EPCTL 0x400 + +/* + * Event Management Registers + */ + +#define TSI721_RIO_EM_INT_STAT 0x10910 +#define TSI721_RIO_EM_INT_STAT_PW_RX 0x00010000 + +#define TSI721_RIO_EM_INT_ENABLE 0x10914 +#define TSI721_RIO_EM_INT_ENABLE_PW_RX 0x00010000 + +#define TSI721_RIO_EM_DEV_INT_EN 0x10930 +#define TSI721_RIO_EM_DEV_INT_EN_INT 0x00000001 + +/* + * Port-Write Block Registers + */ + +#define TSI721_RIO_PW_CTL 0x10a04 +#define TSI721_RIO_PW_CTL_PW_TIMER 0xf0000000 +#define TSI721_RIO_PW_CTL_PWT_DIS (0 << 28) +#define TSI721_RIO_PW_CTL_PWT_103 (1 << 28) +#define TSI721_RIO_PW_CTL_PWT_205 (1 << 29) +#define TSI721_RIO_PW_CTL_PWT_410 (1 << 30) +#define TSI721_RIO_PW_CTL_PWT_820 (1 << 31) +#define TSI721_RIO_PW_CTL_PWC_MODE 0x01000000 +#define TSI721_RIO_PW_CTL_PWC_CONT 0x00000000 +#define TSI721_RIO_PW_CTL_PWC_REL 0x01000000 + +#define TSI721_RIO_PW_RX_STAT 0x10a10 +#define TSI721_RIO_PW_RX_STAT_WR_SIZE 0x0000f000 +#define TSI_RIO_PW_RX_STAT_WDPTR 0x00000100 +#define TSI721_RIO_PW_RX_STAT_PW_SHORT 0x00000008 +#define TSI721_RIO_PW_RX_STAT_PW_TRUNC 0x00000004 +#define TSI721_RIO_PW_RX_STAT_PW_DISC 0x00000002 +#define TSI721_RIO_PW_RX_STAT_PW_VAL 0x00000001 + +#define TSI721_RIO_PW_RX_CAPT(x) (0x10a20 + (x)*4) + +/* + * Inbound Doorbells + */ + +#define TSI721_IDB_ENTRY_SIZE 64 + +#define TSI721_IDQ_CTL(x) (0x20000 + (x) * 1000) +#define TSI721_IDQ_SUSPEND 0x00000002 +#define TSI721_IDQ_INIT 0x00000001 + +#define TSI721_IDQ_STS(x) (0x20004 + (x) * 1000) +#define TSI721_IDQ_RUN 0x00200000 + +#define TSI721_IDQ_MASK(x) (0x20008 + (x) * 1000) +#define TSI721_IDQ_MASK_MASK 0xffff0000 +#define TSI721_IDQ_MASK_PATT 0x0000ffff + +#define TSI721_IDQ_RP(x) (0x2000c + (x) * 1000) +#define TSI721_IDQ_RP_PTR 0x0007ffff + +#define TSI721_IDQ_WP(x) (0x20010 + (x) * 1000) +#define TSI721_IDQ_WP_PTR 0x0007ffff + +#define TSI721_IDQ_BASEL(x) (0x20014 + (x) * 1000) +#define TSI721_IDQ_BASEL_ADDR 0xffffffc0 +#define TSI721_IDQ_BASEU(x) (0x20018 + (x) * 1000) +#define TSI721_IDQ_SIZE(x) (0x2001c + (x) * 1000) +#define TSI721_IDQ_SIZE_VAL(size) (__fls(size) - 4) +#define TSI721_IDQ_SIZE_MIN 512 +#define TSI721_IDQ_SIZE_MAX (512 * 1024) + +#define TSI721_SR_CHINT(x) (0x20040 + (x) * 1000) +#define TSI721_SR_CHINTE(x) (0x20044 + (x) * 1000) +#define TSI721_SR_CHINTSET(x) (0x20048 + (x) * 1000) +#define TSI721_SR_CHINT_ODBOK 0x00000020 +#define TSI721_SR_CHINT_IDBQRCV 0x00000010 +#define TSI721_SR_CHINT_SUSP 0x00000008 +#define TSI721_SR_CHINT_ODBTO 0x00000004 +#define TSI721_SR_CHINT_ODBRTRY 0x00000002 +#define TSI721_SR_CHINT_ODBERR 0x00000001 +#define TSI721_SR_CHINT_ALL 0x0000003f + +#define TSI721_IBWIN_NUM 8 + +#define TSI721_IBWINLB(x) (0x29000 + (x) * 20) +#define TSI721_IBWINLB_BA 0xfffff000 +#define TSI721_IBWINLB_WEN 0x00000001 + +#define TSI721_SR2PC_GEN_INTE 0x29800 +#define TSI721_SR2PC_PWE 0x29804 +#define TSI721_SR2PC_GEN_INT 0x29808 + +#define TSI721_DEV_INTE 0x29840 +#define TSI721_DEV_INT 0x29844 +#define TSI721_DEV_INTSET 0x29848 +#define TSI721_DEV_INT_SMSG_CH 0x00000800 +#define TSI721_DEV_INT_SMSG_NCH 0x00000400 +#define TSI721_DEV_INT_SR2PC_CH 0x00000200 +#define TSI721_DEV_INT_SRIO 0x00000020 + +#define TSI721_DEV_CHAN_INTE 0x2984c +#define TSI721_DEV_CHAN_INT 0x29850 + +#define TSI721_INT_SR2PC_CHAN_M 0xff000000 +#define TSI721_INT_SR2PC_CHAN(x) (1 << (24 + (x))) +#define TSI721_INT_IMSG_CHAN_M 0x00ff0000 +#define TSI721_INT_IMSG_CHAN(x) (1 << (16 + (x))) +#define TSI721_INT_OMSG_CHAN_M 0x0000ff00 +#define TSI721_INT_OMSG_CHAN(x) (1 << (8 + (x))) + +/* + * PC2SR block registers + */ +#define TSI721_OBWIN_NUM TSI721_PC2SR_WINS + +#define TSI721_OBWINLB(x) (0x40000 + (x) * 20) +#define TSI721_OBWINLB_BA 0xffff8000 +#define TSI721_OBWINLB_WEN 0x00000001 + +#define TSI721_OBWINUB(x) (0x40004 + (x) * 20) + +#define TSI721_OBWINSZ(x) (0x40008 + (x) * 20) +#define TSI721_OBWINSZ_SIZE 0x00001f00 +#define TSI721_OBWIN_SIZE(size) (__fls(size) - 15) + +#define TSI721_ZONE_SEL 0x41300 +#define TSI721_ZONE_SEL_RD_WRB 0x00020000 +#define TSI721_ZONE_SEL_GO 0x00010000 +#define TSI721_ZONE_SEL_WIN 0x00000038 +#define TSI721_ZONE_SEL_ZONE 0x00000007 + +#define TSI721_LUT_DATA0 0x41304 +#define TSI721_LUT_DATA0_ADD 0xfffff000 +#define TSI721_LUT_DATA0_RDTYPE 0x00000f00 +#define TSI721_LUT_DATA0_NREAD 0x00000100 +#define TSI721_LUT_DATA0_MNTRD 0x00000200 +#define TSI721_LUT_DATA0_RDCRF 0x00000020 +#define TSI721_LUT_DATA0_WRCRF 0x00000010 +#define TSI721_LUT_DATA0_WRTYPE 0x0000000f +#define TSI721_LUT_DATA0_NWR 0x00000001 +#define TSI721_LUT_DATA0_MNTWR 0x00000002 +#define TSI721_LUT_DATA0_NWR_R 0x00000004 + +#define TSI721_LUT_DATA1 0x41308 + +#define TSI721_LUT_DATA2 0x4130c +#define TSI721_LUT_DATA2_HC 0xff000000 +#define TSI721_LUT_DATA2_ADD65 0x000c0000 +#define TSI721_LUT_DATA2_TT 0x00030000 +#define TSI721_LUT_DATA2_DSTID 0x0000ffff + +#define TSI721_PC2SR_INTE 0x41310 + +#define TSI721_DEVCTL 0x48004 +#define TSI721_DEVCTL_SRBOOT_CMPL 0x00000004 + +#define TSI721_I2C_INT_ENABLE 0x49120 + +/* + * Block DMA Engine Registers + * x = 0..7 + */ + +#define TSI721_DMAC_DWRCNT(x) (0x51000 + (x) * 0x1000) +#define TSI721_DMAC_DRDCNT(x) (0x51004 + (x) * 0x1000) + +#define TSI721_DMAC_CTL(x) (0x51008 + (x) * 0x1000) +#define TSI721_DMAC_CTL_SUSP 0x00000002 +#define TSI721_DMAC_CTL_INIT 0x00000001 + +#define TSI721_DMAC_INT(x) (0x5100c + (x) * 0x1000) +#define TSI721_DMAC_INT_STFULL 0x00000010 +#define TSI721_DMAC_INT_DONE 0x00000008 +#define TSI721_DMAC_INT_SUSP 0x00000004 +#define TSI721_DMAC_INT_ERR 0x00000002 +#define TSI721_DMAC_INT_IOFDONE 0x00000001 +#define TSI721_DMAC_INT_ALL 0x0000001f + +#define TSI721_DMAC_INTSET(x) (0x51010 + (x) * 0x1000) + +#define TSI721_DMAC_STS(x) (0x51014 + (x) * 0x1000) +#define TSI721_DMAC_STS_ABORT 0x00400000 +#define TSI721_DMAC_STS_RUN 0x00200000 +#define TSI721_DMAC_STS_CS 0x001f0000 + +#define TSI721_DMAC_INTE(x) (0x51018 + (x) * 0x1000) + +#define TSI721_DMAC_DPTRL(x) (0x51024 + (x) * 0x1000) +#define TSI721_DMAC_DPTRL_MASK 0xffffffe0 + +#define TSI721_DMAC_DPTRH(x) (0x51028 + (x) * 0x1000) + +#define TSI721_DMAC_DSBL(x) (0x5102c + (x) * 0x1000) +#define TSI721_DMAC_DSBL_MASK 0xffffffc0 + +#define TSI721_DMAC_DSBH(x) (0x51030 + (x) * 0x1000) + +#define TSI721_DMAC_DSSZ(x) (0x51034 + (x) * 0x1000) +#define TSI721_DMAC_DSSZ_SIZE_M 0x0000000f +#define TSI721_DMAC_DSSZ_SIZE(size) (__fls(size) - 4) + + +#define TSI721_DMAC_DSRP(x) (0x51038 + (x) * 0x1000) +#define TSI721_DMAC_DSRP_MASK 0x0007ffff + +#define TSI721_DMAC_DSWP(x) (0x5103c + (x) * 0x1000) +#define TSI721_DMAC_DSWP_MASK 0x0007ffff + +#define TSI721_BDMA_INTE 0x5f000 + +/* + * Messaging definitions + */ +#define TSI721_MSG_BUFFER_SIZE RIO_MAX_MSG_SIZE +#define TSI721_MSG_MAX_SIZE RIO_MAX_MSG_SIZE +#define TSI721_IMSG_MAXCH 8 +#define TSI721_IMSG_CHNUM TSI721_IMSG_MAXCH +#define TSI721_IMSGD_MIN_RING_SIZE 32 +#define TSI721_IMSGD_RING_SIZE 512 + +#define TSI721_OMSG_CHNUM 4 /* One channel per MBOX */ +#define TSI721_OMSGD_MIN_RING_SIZE 32 +#define TSI721_OMSGD_RING_SIZE 512 + +/* + * Outbound Messaging Engine Registers + * x = 0..7 + */ + +#define TSI721_OBDMAC_DWRCNT(x) (0x61000 + (x) * 0x1000) + +#define TSI721_OBDMAC_DRDCNT(x) (0x61004 + (x) * 0x1000) + +#define TSI721_OBDMAC_CTL(x) (0x61008 + (x) * 0x1000) +#define TSI721_OBDMAC_CTL_MASK 0x00000007 +#define TSI721_OBDMAC_CTL_RETRY_THR 0x00000004 +#define TSI721_OBDMAC_CTL_SUSPEND 0x00000002 +#define TSI721_OBDMAC_CTL_INIT 0x00000001 + +#define TSI721_OBDMAC_INT(x) (0x6100c + (x) * 0x1000) +#define TSI721_OBDMAC_INTSET(x) (0x61010 + (x) * 0x1000) +#define TSI721_OBDMAC_INTE(x) (0x61018 + (x) * 0x1000) +#define TSI721_OBDMAC_INT_MASK 0x0000001F +#define TSI721_OBDMAC_INT_ST_FULL 0x00000010 +#define TSI721_OBDMAC_INT_DONE 0x00000008 +#define TSI721_OBDMAC_INT_SUSPENDED 0x00000004 +#define TSI721_OBDMAC_INT_ERROR 0x00000002 +#define TSI721_OBDMAC_INT_IOF_DONE 0x00000001 +#define TSI721_OBDMAC_INT_ALL TSI721_OBDMAC_INT_MASK + +#define TSI721_OBDMAC_STS(x) (0x61014 + (x) * 0x1000) +#define TSI721_OBDMAC_STS_MASK 0x007f0000 +#define TSI721_OBDMAC_STS_ABORT 0x00400000 +#define TSI721_OBDMAC_STS_RUN 0x00200000 +#define TSI721_OBDMAC_STS_CS 0x001f0000 + +#define TSI721_OBDMAC_PWE(x) (0x6101c + (x) * 0x1000) +#define TSI721_OBDMAC_PWE_MASK 0x00000002 +#define TSI721_OBDMAC_PWE_ERROR_EN 0x00000002 + +#define TSI721_OBDMAC_DPTRL(x) (0x61020 + (x) * 0x1000) +#define TSI721_OBDMAC_DPTRL_MASK 0xfffffff0 + +#define TSI721_OBDMAC_DPTRH(x) (0x61024 + (x) * 0x1000) +#define TSI721_OBDMAC_DPTRH_MASK 0xffffffff + +#define TSI721_OBDMAC_DSBL(x) (0x61040 + (x) * 0x1000) +#define TSI721_OBDMAC_DSBL_MASK 0xffffffc0 + +#define TSI721_OBDMAC_DSBH(x) (0x61044 + (x) * 0x1000) +#define TSI721_OBDMAC_DSBH_MASK 0xffffffff + +#define TSI721_OBDMAC_DSSZ(x) (0x61048 + (x) * 0x1000) +#define TSI721_OBDMAC_DSSZ_MASK 0x0000000f + +#define TSI721_OBDMAC_DSRP(x) (0x6104c + (x) * 0x1000) +#define TSI721_OBDMAC_DSRP_MASK 0x0007ffff + +#define TSI721_OBDMAC_DSWP(x) (0x61050 + (x) * 0x1000) +#define TSI721_OBDMAC_DSWP_MASK 0x0007ffff + +#define TSI721_RQRPTO 0x60010 +#define TSI721_RQRPTO_MASK 0x00ffffff +#define TSI721_RQRPTO_VAL 400 /* Response TO value */ + +/* + * Inbound Messaging Engine Registers + * x = 0..7 + */ + +#define TSI721_IB_DEVID_GLOBAL 0xffff +#define TSI721_IBDMAC_FQBL(x) (0x61200 + (x) * 0x1000) +#define TSI721_IBDMAC_FQBL_MASK 0xffffffc0 + +#define TSI721_IBDMAC_FQBH(x) (0x61204 + (x) * 0x1000) +#define TSI721_IBDMAC_FQBH_MASK 0xffffffff + +#define TSI721_IBDMAC_FQSZ_ENTRY_INX TSI721_IMSGD_RING_SIZE +#define TSI721_IBDMAC_FQSZ(x) (0x61208 + (x) * 0x1000) +#define TSI721_IBDMAC_FQSZ_MASK 0x0000000f + +#define TSI721_IBDMAC_FQRP(x) (0x6120c + (x) * 0x1000) +#define TSI721_IBDMAC_FQRP_MASK 0x0007ffff + +#define TSI721_IBDMAC_FQWP(x) (0x61210 + (x) * 0x1000) +#define TSI721_IBDMAC_FQWP_MASK 0x0007ffff + +#define TSI721_IBDMAC_FQTH(x) (0x61214 + (x) * 0x1000) +#define TSI721_IBDMAC_FQTH_MASK 0x0007ffff + +#define TSI721_IB_DEVID 0x60020 +#define TSI721_IB_DEVID_MASK 0x0000ffff + +#define TSI721_IBDMAC_CTL(x) (0x61240 + (x) * 0x1000) +#define TSI721_IBDMAC_CTL_MASK 0x00000003 +#define TSI721_IBDMAC_CTL_SUSPEND 0x00000002 +#define TSI721_IBDMAC_CTL_INIT 0x00000001 + +#define TSI721_IBDMAC_STS(x) (0x61244 + (x) * 0x1000) +#define TSI721_IBDMAC_STS_MASK 0x007f0000 +#define TSI721_IBSMAC_STS_ABORT 0x00400000 +#define TSI721_IBSMAC_STS_RUN 0x00200000 +#define TSI721_IBSMAC_STS_CS 0x001f0000 + +#define TSI721_IBDMAC_INT(x) (0x61248 + (x) * 0x1000) +#define TSI721_IBDMAC_INTSET(x) (0x6124c + (x) * 0x1000) +#define TSI721_IBDMAC_INTE(x) (0x61250 + (x) * 0x1000) +#define TSI721_IBDMAC_INT_MASK 0x0000100f +#define TSI721_IBDMAC_INT_SRTO 0x00001000 +#define TSI721_IBDMAC_INT_SUSPENDED 0x00000008 +#define TSI721_IBDMAC_INT_PC_ERROR 0x00000004 +#define TSI721_IBDMAC_INT_FQ_LOW 0x00000002 +#define TSI721_IBDMAC_INT_DQ_RCV 0x00000001 +#define TSI721_IBDMAC_INT_ALL TSI721_IBDMAC_INT_MASK + +#define TSI721_IBDMAC_PWE(x) (0x61254 + (x) * 0x1000) +#define TSI721_IBDMAC_PWE_MASK 0x00001700 +#define TSI721_IBDMAC_PWE_SRTO 0x00001000 +#define TSI721_IBDMAC_PWE_ILL_FMT 0x00000400 +#define TSI721_IBDMAC_PWE_ILL_DEC 0x00000200 +#define TSI721_IBDMAC_PWE_IMP_SP 0x00000100 + +#define TSI721_IBDMAC_DQBL(x) (0x61300 + (x) * 0x1000) +#define TSI721_IBDMAC_DQBL_MASK 0xffffffc0 +#define TSI721_IBDMAC_DQBL_ADDR 0xffffffc0 + +#define TSI721_IBDMAC_DQBH(x) (0x61304 + (x) * 0x1000) +#define TSI721_IBDMAC_DQBH_MASK 0xffffffff + +#define TSI721_IBDMAC_DQRP(x) (0x61308 + (x) * 0x1000) +#define TSI721_IBDMAC_DQRP_MASK 0x0007ffff + +#define TSI721_IBDMAC_DQWR(x) (0x6130c + (x) * 0x1000) +#define TSI721_IBDMAC_DQWR_MASK 0x0007ffff + +#define TSI721_IBDMAC_DQSZ(x) (0x61314 + (x) * 0x1000) +#define TSI721_IBDMAC_DQSZ_MASK 0x0000000f + +/* + * Messaging Engine Interrupts + */ + +#define TSI721_SMSG_PWE 0x6a004 + +#define TSI721_SMSG_INTE 0x6a000 +#define TSI721_SMSG_INT 0x6a008 +#define TSI721_SMSG_INTSET 0x6a010 +#define TSI721_SMSG_INT_MASK 0x0086ffff +#define TSI721_SMSG_INT_UNS_RSP 0x00800000 +#define TSI721_SMSG_INT_ECC_NCOR 0x00040000 +#define TSI721_SMSG_INT_ECC_COR 0x00020000 +#define TSI721_SMSG_INT_ECC_NCOR_CH 0x0000ff00 +#define TSI721_SMSG_INT_ECC_COR_CH 0x000000ff + +#define TSI721_SMSG_ECC_LOG 0x6a014 +#define TSI721_SMSG_ECC_LOG_MASK 0x00070007 +#define TSI721_SMSG_ECC_LOG_ECC_NCOR_M 0x00070000 +#define TSI721_SMSG_ECC_LOG_ECC_COR_M 0x00000007 + +#define TSI721_RETRY_GEN_CNT 0x6a100 +#define TSI721_RETRY_GEN_CNT_MASK 0xffffffff + +#define TSI721_RETRY_RX_CNT 0x6a104 +#define TSI721_RETRY_RX_CNT_MASK 0xffffffff + +#define TSI721_SMSG_ECC_COR_LOG(x) (0x6a300 + (x) * 4) +#define TSI721_SMSG_ECC_COR_LOG_MASK 0x000000ff + +#define TSI721_SMSG_ECC_NCOR(x) (0x6a340 + (x) * 4) +#define TSI721_SMSG_ECC_NCOR_MASK 0x000000ff + +/* + * Block DMA Descriptors + */ + +struct tsi721_dma_desc { + __le32 type_id; + +#define TSI721_DMAD_DEVID 0x0000ffff +#define TSI721_DMAD_CRF 0x00010000 +#define TSI721_DMAD_PRIO 0x00060000 +#define TSI721_DMAD_RTYPE 0x00780000 +#define TSI721_DMAD_IOF 0x08000000 +#define TSI721_DMAD_DTYPE 0xe0000000 + + __le32 bcount; + +#define TSI721_DMAD_BCOUNT1 0x03ffffff /* if DTYPE == 1 */ +#define TSI721_DMAD_BCOUNT2 0x0000000f /* if DTYPE == 2 */ +#define TSI721_DMAD_TT 0x0c000000 +#define TSI721_DMAD_RADDR0 0xc0000000 + + union { + __le32 raddr_lo; /* if DTYPE == (1 || 2) */ + __le32 next_lo; /* if DTYPE == 3 */ + }; + +#define TSI721_DMAD_CFGOFF 0x00ffffff +#define TSI721_DMAD_HOPCNT 0xff000000 + + union { + __le32 raddr_hi; /* if DTYPE == (1 || 2) */ + __le32 next_hi; /* if DTYPE == 3 */ + }; + + union { + struct { /* if DTYPE == 1 */ + __le32 bufptr_lo; + __le32 bufptr_hi; + __le32 s_dist; + __le32 s_size; + } t1; + __le32 data[4]; /* if DTYPE == 2 */ + u32 reserved[4]; /* if DTYPE == 3 */ + }; +} __aligned(32); + +/* + * Inbound Messaging Descriptor + */ +struct tsi721_imsg_desc { + __le32 type_id; + +#define TSI721_IMD_DEVID 0x0000ffff +#define TSI721_IMD_CRF 0x00010000 +#define TSI721_IMD_PRIO 0x00060000 +#define TSI721_IMD_TT 0x00180000 +#define TSI721_IMD_DTYPE 0xe0000000 + + __le32 msg_info; + +#define TSI721_IMD_BCOUNT 0x00000ff8 +#define TSI721_IMD_SSIZE 0x0000f000 +#define TSI721_IMD_LETER 0x00030000 +#define TSI721_IMD_XMBOX 0x003c0000 +#define TSI721_IMD_MBOX 0x00c00000 +#define TSI721_IMD_CS 0x78000000 +#define TSI721_IMD_HO 0x80000000 + + __le32 bufptr_lo; + __le32 bufptr_hi; + u32 reserved[12]; + +} __aligned(64); + +/* + * Outbound Messaging Descriptor + */ +struct tsi721_omsg_desc { + __le32 type_id; + +#define TSI721_OMD_DEVID 0x0000ffff +#define TSI721_OMD_CRF 0x00010000 +#define TSI721_OMD_PRIO 0x00060000 +#define TSI721_OMD_IOF 0x08000000 +#define TSI721_OMD_DTYPE 0xe0000000 +#define TSI721_OMD_RSRVD 0x17f80000 + + __le32 msg_info; + +#define TSI721_OMD_BCOUNT 0x00000ff8 +#define TSI721_OMD_SSIZE 0x0000f000 +#define TSI721_OMD_LETER 0x00030000 +#define TSI721_OMD_XMBOX 0x003c0000 +#define TSI721_OMD_MBOX 0x00c00000 +#define TSI721_OMD_TT 0x0c000000 + + union { + __le32 bufptr_lo; /* if DTYPE == 4 */ + __le32 next_lo; /* if DTYPE == 5 */ + }; + + union { + __le32 bufptr_hi; /* if DTYPE == 4 */ + __le32 next_hi; /* if DTYPE == 5 */ + }; + +} __aligned(16); + +struct tsi721_dma_sts { + __le64 desc_sts[8]; +} __aligned(64); + +struct tsi721_desc_sts_fifo { + union { + __le64 da64; + struct { + __le32 lo; + __le32 hi; + } da32; + } stat[8]; +} __aligned(64); + +/* Descriptor types for BDMA and Messaging blocks */ +enum dma_dtype { + DTYPE1 = 1, /* Data Transfer DMA Descriptor */ + DTYPE2 = 2, /* Immediate Data Transfer DMA Descriptor */ + DTYPE3 = 3, /* Block Pointer DMA Descriptor */ + DTYPE4 = 4, /* Outbound Msg DMA Descriptor */ + DTYPE5 = 5, /* OB Messaging Block Pointer Descriptor */ + DTYPE6 = 6 /* Inbound Messaging Descriptor */ +}; + +enum dma_rtype { + NREAD = 0, + LAST_NWRITE_R = 1, + ALL_NWRITE = 2, + ALL_NWRITE_R = 3, + MAINT_RD = 4, + MAINT_WR = 5 +}; + +/* + * mport Driver Definitions + */ +#define TSI721_DMA_CHNUM TSI721_DMA_MAXCH + +#define TSI721_DMACH_MAINT 0 /* DMA channel for maint requests */ +#define TSI721_DMACH_MAINT_NBD 32 /* Number of BDs for maint requests */ + +#define MSG_DMA_ENTRY_INX_TO_SIZE(x) ((0x10 << (x)) & 0xFFFF0) + +enum tsi721_smsg_int_flag { + SMSG_INT_NONE = 0x00000000, + SMSG_INT_ECC_COR_CH = 0x000000ff, + SMSG_INT_ECC_NCOR_CH = 0x0000ff00, + SMSG_INT_ECC_COR = 0x00020000, + SMSG_INT_ECC_NCOR = 0x00040000, + SMSG_INT_UNS_RSP = 0x00800000, + SMSG_INT_ALL = 0x0006ffff +}; + +/* Structures */ + +struct tsi721_bdma_chan { + int bd_num; /* number of buffer descriptors */ + void *bd_base; /* start of DMA descriptors */ + dma_addr_t bd_phys; + void *sts_base; /* start of DMA BD status FIFO */ + dma_addr_t sts_phys; + int sts_size; +}; + +struct tsi721_imsg_ring { + u32 size; + /* VA/PA of data buffers for incoming messages */ + void *buf_base; + dma_addr_t buf_phys; + /* VA/PA of circular free buffer list */ + void *imfq_base; + dma_addr_t imfq_phys; + /* VA/PA of Inbound message descriptors */ + void *imd_base; + dma_addr_t imd_phys; + /* Inbound Queue buffer pointers */ + void *imq_base[TSI721_IMSGD_RING_SIZE]; + + u32 rx_slot; + void *dev_id; + u32 fq_wrptr; + u32 desc_rdptr; + spinlock_t lock; +}; + +struct tsi721_omsg_ring { + u32 size; + /* VA/PA of OB Msg descriptors */ + void *omd_base; + dma_addr_t omd_phys; + /* VA/PA of OB Msg data buffers */ + void *omq_base[TSI721_OMSGD_RING_SIZE]; + dma_addr_t omq_phys[TSI721_OMSGD_RING_SIZE]; + /* VA/PA of OB Msg descriptor status FIFO */ + void *sts_base; + dma_addr_t sts_phys; + u32 sts_size; /* # of allocated status entries */ + u32 sts_rdptr; + + u32 tx_slot; + void *dev_id; + u32 wr_count; + spinlock_t lock; +}; + +enum tsi721_flags { + TSI721_USING_MSI = (1 << 0), + TSI721_USING_MSIX = (1 << 1), + TSI721_IMSGID_SET = (1 << 2), +}; + +#ifdef CONFIG_PCI_MSI +/* + * MSI-X Table Entries (0 ... 69) + */ +#define TSI721_MSIX_DMACH_DONE(x) (0 + (x)) +#define TSI721_MSIX_DMACH_INT(x) (8 + (x)) +#define TSI721_MSIX_BDMA_INT 16 +#define TSI721_MSIX_OMSG_DONE(x) (17 + (x)) +#define TSI721_MSIX_OMSG_INT(x) (25 + (x)) +#define TSI721_MSIX_IMSG_DQ_RCV(x) (33 + (x)) +#define TSI721_MSIX_IMSG_INT(x) (41 + (x)) +#define TSI721_MSIX_MSG_INT 49 +#define TSI721_MSIX_SR2PC_IDBQ_RCV(x) (50 + (x)) +#define TSI721_MSIX_SR2PC_CH_INT(x) (58 + (x)) +#define TSI721_MSIX_SR2PC_INT 66 +#define TSI721_MSIX_PC2SR_INT 67 +#define TSI721_MSIX_SRIO_MAC_INT 68 +#define TSI721_MSIX_I2C_INT 69 + +/* MSI-X vector and init table entry indexes */ +enum tsi721_msix_vect { + TSI721_VECT_IDB, + TSI721_VECT_PWRX, /* PW_RX is part of SRIO MAC Interrupt reporting */ + TSI721_VECT_OMB0_DONE, + TSI721_VECT_OMB1_DONE, + TSI721_VECT_OMB2_DONE, + TSI721_VECT_OMB3_DONE, + TSI721_VECT_OMB0_INT, + TSI721_VECT_OMB1_INT, + TSI721_VECT_OMB2_INT, + TSI721_VECT_OMB3_INT, + TSI721_VECT_IMB0_RCV, + TSI721_VECT_IMB1_RCV, + TSI721_VECT_IMB2_RCV, + TSI721_VECT_IMB3_RCV, + TSI721_VECT_IMB0_INT, + TSI721_VECT_IMB1_INT, + TSI721_VECT_IMB2_INT, + TSI721_VECT_IMB3_INT, + TSI721_VECT_MAX +}; + +#define IRQ_DEVICE_NAME_MAX 64 + +struct msix_irq { + u16 vector; + char irq_name[IRQ_DEVICE_NAME_MAX]; +}; +#endif /* CONFIG_PCI_MSI */ + +struct tsi721_device { + struct pci_dev *pdev; + struct rio_mport *mport; + u32 flags; + void __iomem *regs; +#ifdef CONFIG_PCI_MSI + struct msix_irq msix[TSI721_VECT_MAX]; +#endif + /* Doorbells */ + void __iomem *odb_base; + void *idb_base; + dma_addr_t idb_dma; + struct work_struct idb_work; + u32 db_discard_count; + + /* Inbound Port-Write */ + struct work_struct pw_work; + struct kfifo pw_fifo; + spinlock_t pw_fifo_lock; + u32 pw_discard_count; + + /* BDMA Engine */ + struct tsi721_bdma_chan bdma[TSI721_DMA_CHNUM]; + + /* Inbound Messaging */ + int imsg_init[TSI721_IMSG_CHNUM]; + struct tsi721_imsg_ring imsg_ring[TSI721_IMSG_CHNUM]; + + /* Outbound Messaging */ + int omsg_init[TSI721_OMSG_CHNUM]; + struct tsi721_omsg_ring omsg_ring[TSI721_OMSG_CHNUM]; +}; + +#endif diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index ebe77dd..2bebd79 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -516,7 +516,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, return rdev; cleanup: - if (rio_is_switch(rdev)) + if (rswitch) kfree(rswitch->route_table); kfree(rdev); @@ -923,7 +923,7 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, * rio_enum_complete- Tests if enumeration of a network is complete * @port: Master port to send transaction * - * Tests the Component Tag CSR for non-zero value (enumeration + * Tests the PGCCSR discovered bit for non-zero value (enumeration * complete flag). Return %1 if enumeration is complete or %0 if * enumeration is incomplete. */ @@ -933,7 +933,7 @@ static int rio_enum_complete(struct rio_mport *port) rio_local_read_config_32(port, port->phys_efptr + RIO_PORT_GEN_CTL_CSR, ®val); - return (regval & RIO_PORT_GEN_MASTER) ? 1 : 0; + return (regval & RIO_PORT_GEN_DISCOVERED) ? 1 : 0; } /** diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 01a7df53..e8326f2 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -21,16 +21,13 @@ #include "rtc-core.h" -static DEFINE_IDR(rtc_idr); -static DEFINE_MUTEX(idr_lock); +static DEFINE_IDA(rtc_ida); struct class *rtc_class; static void rtc_device_release(struct device *dev) { struct rtc_device *rtc = to_rtc_device(dev); - mutex_lock(&idr_lock); - idr_remove(&rtc_idr, rtc->id); - mutex_unlock(&idr_lock); + ida_simple_remove(&rtc_ida, rtc->id); kfree(rtc); } @@ -146,25 +143,16 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev, struct rtc_wkalrm alrm; int id, err; - if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) { - err = -ENOMEM; + id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + err = id; goto exit; } - - mutex_lock(&idr_lock); - err = idr_get_new(&rtc_idr, NULL, &id); - mutex_unlock(&idr_lock); - - if (err < 0) - goto exit; - - id = id & MAX_ID_MASK; - rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL); if (rtc == NULL) { err = -ENOMEM; - goto exit_idr; + goto exit_ida; } rtc->id = id; @@ -222,10 +210,8 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev, exit_kfree: kfree(rtc); -exit_idr: - mutex_lock(&idr_lock); - idr_remove(&rtc_idr, id); - mutex_unlock(&idr_lock); +exit_ida: + ida_simple_remove(&rtc_ida, id); exit: dev_err(dev, "rtc core: unable to register %s, err = %d\n", @@ -276,7 +262,7 @@ static void __exit rtc_exit(void) { rtc_dev_exit(); class_destroy(rtc_class); - idr_destroy(&rtc_idr); + ida_destroy(&rtc_ida); } subsys_initcall(rtc_init); diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index b2005b4..62b0763 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -34,6 +34,7 @@ enum ds_type { ds_1388, ds_3231, m41t00, + mcp7941x, rx_8025, // rs5c372 too? different address... }; @@ -43,6 +44,7 @@ enum ds_type { #define DS1307_REG_SECS 0x00 /* 00-59 */ # define DS1307_BIT_CH 0x80 # define DS1340_BIT_nEOSC 0x80 +# define MCP7941X_BIT_ST 0x80 #define DS1307_REG_MIN 0x01 /* 00-59 */ #define DS1307_REG_HOUR 0x02 /* 00-23, or 1-12{am,pm} */ # define DS1307_BIT_12HR 0x40 /* in REG_HOUR */ @@ -50,6 +52,7 @@ enum ds_type { # define DS1340_BIT_CENTURY_EN 0x80 /* in REG_HOUR */ # define DS1340_BIT_CENTURY 0x40 /* in REG_HOUR */ #define DS1307_REG_WDAY 0x03 /* 01-07 */ +# define MCP7941X_BIT_VBATEN 0x08 #define DS1307_REG_MDAY 0x04 /* 01-31 */ #define DS1307_REG_MONTH 0x05 /* 01-12 */ # define DS1337_BIT_CENTURY 0x80 /* in REG_MONTH */ @@ -137,6 +140,8 @@ static const struct chip_desc chips[] = { }, [m41t00] = { }, +[mcp7941x] = { +}, [rx_8025] = { }, }; @@ -149,6 +154,7 @@ static const struct i2c_device_id ds1307_id[] = { { "ds1340", ds_1340 }, { "ds3231", ds_3231 }, { "m41t00", m41t00 }, + { "mcp7941x", mcp7941x }, { "pt7c4338", ds_1307 }, { "rx8025", rx_8025 }, { } @@ -365,6 +371,10 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t) buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN | DS1340_BIT_CENTURY; break; + case mcp7941x: + buf[DS1307_REG_SECS] |= MCP7941X_BIT_ST; + buf[DS1307_REG_WDAY] |= MCP7941X_BIT_VBATEN; + break; default: break; } @@ -809,6 +819,23 @@ read_rtc: dev_warn(&client->dev, "SET TIME!\n"); } break; + case mcp7941x: + /* make sure that the backup battery is enabled */ + if (!(ds1307->regs[DS1307_REG_WDAY] & MCP7941X_BIT_VBATEN)) { + i2c_smbus_write_byte_data(client, DS1307_REG_WDAY, + ds1307->regs[DS1307_REG_WDAY] + | MCP7941X_BIT_VBATEN); + } + + /* clock halted? turn it on, so clock can tick. */ + if (!(tmp & MCP7941X_BIT_ST)) { + i2c_smbus_write_byte_data(client, DS1307_REG_SECS, + MCP7941X_BIT_ST); + dev_warn(&client->dev, "SET TIME!\n"); + goto read_rtc; + } + + break; case rx_8025: case ds_1337: case ds_1339: diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c index a1a278b..9d0c3b4 100644 --- a/drivers/rtc/rtc-mc13xxx.c +++ b/drivers/rtc/rtc-mc13xxx.c @@ -309,7 +309,7 @@ static irqreturn_t mc13xxx_rtc_reset_handler(int irq, void *dev) return IRQ_HANDLED; } -static int __devinit mc13xxx_rtc_probe(struct platform_device *pdev) +static int __init mc13xxx_rtc_probe(struct platform_device *pdev) { int ret; struct mc13xxx_rtc *priv; @@ -378,7 +378,7 @@ err_reset_irq_request: return ret; } -static int __devexit mc13xxx_rtc_remove(struct platform_device *pdev) +static int __exit mc13xxx_rtc_remove(struct platform_device *pdev) { struct mc13xxx_rtc *priv = platform_get_drvdata(pdev); @@ -410,7 +410,7 @@ const struct platform_device_id mc13xxx_rtc_idtable[] = { static struct platform_driver mc13xxx_rtc_driver = { .id_table = mc13xxx_rtc_idtable, - .remove = __devexit_p(mc13xxx_rtc_remove), + .remove = __exit_p(mc13xxx_rtc_remove), .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c index 483d451..5754c9a 100644 --- a/drivers/w1/slaves/w1_ds2760.c +++ b/drivers/w1/slaves/w1_ds2760.c @@ -114,43 +114,7 @@ static struct bin_attribute w1_ds2760_bin_attr = { .read = w1_ds2760_read_bin, }; -static DEFINE_IDR(bat_idr); -static DEFINE_MUTEX(bat_idr_lock); - -static int new_bat_id(void) -{ - int ret; - - while (1) { - int id; - - ret = idr_pre_get(&bat_idr, GFP_KERNEL); - if (ret == 0) - return -ENOMEM; - - mutex_lock(&bat_idr_lock); - ret = idr_get_new(&bat_idr, NULL, &id); - mutex_unlock(&bat_idr_lock); - - if (ret == 0) { - ret = id & MAX_ID_MASK; - break; - } else if (ret == -EAGAIN) { - continue; - } else { - break; - } - } - - return ret; -} - -static void release_bat_id(int id) -{ - mutex_lock(&bat_idr_lock); - idr_remove(&bat_idr, id); - mutex_unlock(&bat_idr_lock); -} +static DEFINE_IDA(bat_ida); static int w1_ds2760_add_slave(struct w1_slave *sl) { @@ -158,7 +122,7 @@ static int w1_ds2760_add_slave(struct w1_slave *sl) int id; struct platform_device *pdev; - id = new_bat_id(); + id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL); if (id < 0) { ret = id; goto noid; @@ -187,7 +151,7 @@ bin_attr_failed: pdev_add_failed: platform_device_unregister(pdev); pdev_alloc_failed: - release_bat_id(id); + ida_simple_remove(&bat_ida, id); noid: success: return ret; @@ -199,7 +163,7 @@ static void w1_ds2760_remove_slave(struct w1_slave *sl) int id = pdev->id; platform_device_unregister(pdev); - release_bat_id(id); + ida_simple_remove(&bat_ida, id); sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2760_bin_attr); } @@ -217,14 +181,14 @@ static int __init w1_ds2760_init(void) { printk(KERN_INFO "1-Wire driver for the DS2760 battery monitor " " chip - (c) 2004-2005, Szabolcs Gyurko\n"); - idr_init(&bat_idr); + ida_init(&bat_ida); return w1_register_family(&w1_ds2760_family); } static void __exit w1_ds2760_exit(void) { w1_unregister_family(&w1_ds2760_family); - idr_destroy(&bat_idr); + ida_destroy(&bat_ida); } EXPORT_SYMBOL(w1_ds2760_read); diff --git a/drivers/w1/slaves/w1_ds2780.c b/drivers/w1/slaves/w1_ds2780.c index 274c8f3..39f78c0 100644 --- a/drivers/w1/slaves/w1_ds2780.c +++ b/drivers/w1/slaves/w1_ds2780.c @@ -26,20 +26,14 @@ #include "../w1_family.h" #include "w1_ds2780.h" -int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count, - int io) +static int w1_ds2780_do_io(struct device *dev, char *buf, int addr, + size_t count, int io) { struct w1_slave *sl = container_of(dev, struct w1_slave, dev); - if (!dev) - return -ENODEV; + if (addr > DS2780_DATA_SIZE || addr < 0) + return 0; - mutex_lock(&sl->master->mutex); - - if (addr > DS2780_DATA_SIZE || addr < 0) { - count = 0; - goto out; - } count = min_t(int, count, DS2780_DATA_SIZE - addr); if (w1_reset_select_slave(sl) == 0) { @@ -47,7 +41,6 @@ int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count, w1_write_8(sl->master, W1_DS2780_WRITE_DATA); w1_write_8(sl->master, addr); w1_write_block(sl->master, buf, count); - /* XXX w1_write_block returns void, not n_written */ } else { w1_write_8(sl->master, W1_DS2780_READ_DATA); w1_write_8(sl->master, addr); @@ -55,13 +48,42 @@ int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count, } } -out: + return count; +} + +int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count, + int io) +{ + struct w1_slave *sl = container_of(dev, struct w1_slave, dev); + int ret; + + if (!dev) + return -ENODEV; + + mutex_lock(&sl->master->mutex); + + ret = w1_ds2780_do_io(dev, buf, addr, count, io); + mutex_unlock(&sl->master->mutex); - return count; + return ret; } EXPORT_SYMBOL(w1_ds2780_io); +int w1_ds2780_io_nolock(struct device *dev, char *buf, int addr, size_t count, + int io) +{ + int ret; + + if (!dev) + return -ENODEV; + + ret = w1_ds2780_do_io(dev, buf, addr, count, io); + + return ret; +} +EXPORT_SYMBOL(w1_ds2780_io_nolock); + int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd) { struct w1_slave *sl = container_of(dev, struct w1_slave, dev); @@ -99,43 +121,7 @@ static struct bin_attribute w1_ds2780_bin_attr = { .read = w1_ds2780_read_bin, }; -static DEFINE_IDR(bat_idr); -static DEFINE_MUTEX(bat_idr_lock); - -static int new_bat_id(void) -{ - int ret; - - while (1) { - int id; - - ret = idr_pre_get(&bat_idr, GFP_KERNEL); - if (ret == 0) - return -ENOMEM; - - mutex_lock(&bat_idr_lock); - ret = idr_get_new(&bat_idr, NULL, &id); - mutex_unlock(&bat_idr_lock); - - if (ret == 0) { - ret = id & MAX_ID_MASK; - break; - } else if (ret == -EAGAIN) { - continue; - } else { - break; - } - } - - return ret; -} - -static void release_bat_id(int id) -{ - mutex_lock(&bat_idr_lock); - idr_remove(&bat_idr, id); - mutex_unlock(&bat_idr_lock); -} +static DEFINE_IDA(bat_ida); static int w1_ds2780_add_slave(struct w1_slave *sl) { @@ -143,7 +129,7 @@ static int w1_ds2780_add_slave(struct w1_slave *sl) int id; struct platform_device *pdev; - id = new_bat_id(); + id = ida_simple_get(&bat_ida, 0, 0, GFP_KERNEL); if (id < 0) { ret = id; goto noid; @@ -172,7 +158,7 @@ bin_attr_failed: pdev_add_failed: platform_device_unregister(pdev); pdev_alloc_failed: - release_bat_id(id); + ida_simple_remove(&bat_ida, id); noid: return ret; } @@ -183,7 +169,7 @@ static void w1_ds2780_remove_slave(struct w1_slave *sl) int id = pdev->id; platform_device_unregister(pdev); - release_bat_id(id); + ida_simple_remove(&bat_ida, id); sysfs_remove_bin_file(&sl->dev.kobj, &w1_ds2780_bin_attr); } @@ -199,14 +185,14 @@ static struct w1_family w1_ds2780_family = { static int __init w1_ds2780_init(void) { - idr_init(&bat_idr); + ida_init(&bat_ida); return w1_register_family(&w1_ds2780_family); } static void __exit w1_ds2780_exit(void) { w1_unregister_family(&w1_ds2780_family); - idr_destroy(&bat_idr); + ida_destroy(&bat_ida); } module_init(w1_ds2780_init); diff --git a/drivers/w1/slaves/w1_ds2780.h b/drivers/w1/slaves/w1_ds2780.h index a1fba79..7373793 100644 --- a/drivers/w1/slaves/w1_ds2780.h +++ b/drivers/w1/slaves/w1_ds2780.h @@ -124,6 +124,8 @@ extern int w1_ds2780_io(struct device *dev, char *buf, int addr, size_t count, int io); +extern int w1_ds2780_io_nolock(struct device *dev, char *buf, int addr, + size_t count, int io); extern int w1_ds2780_eeprom_cmd(struct device *dev, int addr, int cmd); #endif /* !_W1_DS2780_H */ diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index d220bce..f79e62e 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -78,6 +78,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, memcpy(&dev->dev, device, sizeof(struct device)); dev_set_name(&dev->dev, "w1_bus_master%u", dev->id); snprintf(dev->name, sizeof(dev->name), "w1_bus_master%u", dev->id); + dev->dev.init_name = dev->name; dev->driver = driver; diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c index 765b37b..3135b2c 100644 --- a/drivers/w1/w1_io.c +++ b/drivers/w1/w1_io.c @@ -158,13 +158,18 @@ EXPORT_SYMBOL_GPL(w1_write_8); static u8 w1_read_bit(struct w1_master *dev) { int result; + unsigned long flags; + /* sample timing is critical here */ + local_irq_save(flags); dev->bus_master->write_bit(dev->bus_master->data, 0); w1_delay(6); dev->bus_master->write_bit(dev->bus_master->data, 1); w1_delay(9); result = dev->bus_master->read_bit(dev->bus_master->data); + local_irq_restore(flags); + w1_delay(55); return result & 0x1; @@ -440,8 +440,6 @@ void exit_aio(struct mm_struct *mm) static struct kiocb *__aio_get_req(struct kioctx *ctx) { struct kiocb *req = NULL; - struct aio_ring *ring; - int okay = 0; req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL); if (unlikely(!req)) @@ -459,39 +457,114 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx) INIT_LIST_HEAD(&req->ki_run_list); req->ki_eventfd = NULL; - /* Check if the completion queue has enough free space to - * accept an event from this io. - */ + return req; +} + +/* + * struct kiocb's are allocated in batches to reduce the number of + * times the ctx lock is acquired and released. + */ +#define KIOCB_BATCH_SIZE 32L +struct kiocb_batch { + struct list_head head; + long count; /* number of requests left to allocate */ +}; + +static void kiocb_batch_init(struct kiocb_batch *batch, long total) +{ + INIT_LIST_HEAD(&batch->head); + batch->count = total; +} + +static void kiocb_batch_free(struct kiocb_batch *batch) +{ + struct kiocb *req, *n; + + list_for_each_entry_safe(req, n, &batch->head, ki_batch) { + list_del(&req->ki_batch); + kmem_cache_free(kiocb_cachep, req); + } +} + +/* + * Allocate a batch of kiocbs. This avoids taking and dropping the + * context lock a lot during setup. + */ +static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) +{ + unsigned short allocated, to_alloc; + long avail; + bool called_fput = false; + struct kiocb *req, *n; + struct aio_ring *ring; + + to_alloc = min(batch->count, KIOCB_BATCH_SIZE); + for (allocated = 0; allocated < to_alloc; allocated++) { + req = __aio_get_req(ctx); + if (!req) + /* allocation failed, go with what we've got */ + break; + list_add(&req->ki_batch, &batch->head); + } + + if (allocated == 0) + goto out; + +retry: spin_lock_irq(&ctx->ctx_lock); - ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0); - if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) { + ring = kmap_atomic(ctx->ring_info.ring_pages[0]); + + avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active; + BUG_ON(avail < 0); + if (avail == 0 && !called_fput) { + /* + * Handle a potential starvation case. It is possible that + * we hold the last reference on a struct file, causing us + * to delay the final fput to non-irq context. In this case, + * ctx->reqs_active is artificially high. Calling the fput + * routine here may free up a slot in the event completion + * ring, allowing this allocation to succeed. + */ + kunmap_atomic(ring); + spin_unlock_irq(&ctx->ctx_lock); + aio_fput_routine(NULL); + called_fput = true; + goto retry; + } + + if (avail < allocated) { + /* Trim back the number of requests. */ + list_for_each_entry_safe(req, n, &batch->head, ki_batch) { + list_del(&req->ki_batch); + kmem_cache_free(kiocb_cachep, req); + if (--allocated <= avail) + break; + } + } + + batch->count -= allocated; + list_for_each_entry(req, &batch->head, ki_batch) { list_add(&req->ki_list, &ctx->active_reqs); ctx->reqs_active++; - okay = 1; } - kunmap_atomic(ring, KM_USER0); - spin_unlock_irq(&ctx->ctx_lock); - if (!okay) { - kmem_cache_free(kiocb_cachep, req); - req = NULL; - } + kunmap_atomic(ring); + spin_unlock_irq(&ctx->ctx_lock); - return req; +out: + return allocated; } -static inline struct kiocb *aio_get_req(struct kioctx *ctx) +static inline struct kiocb *aio_get_req(struct kioctx *ctx, + struct kiocb_batch *batch) { struct kiocb *req; - /* Handle a potential starvation case -- should be exceedingly rare as - * requests will be stuck on fput_head only if the aio_fput_routine is - * delayed and the requests were the last user of the struct file. - */ - req = __aio_get_req(ctx); - if (unlikely(NULL == req)) { - aio_fput_routine(NULL); - req = __aio_get_req(ctx); - } + + if (list_empty(&batch->head)) + if (kiocb_batch_refill(ctx, batch) == 0) + return NULL; + req = list_first_entry(&batch->head, struct kiocb, ki_batch); + list_del(&req->ki_batch); return req; } @@ -1515,7 +1588,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) } static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb, bool compat) + struct iocb *iocb, struct kiocb_batch *batch, + bool compat) { struct kiocb *req; struct file *file; @@ -1541,7 +1615,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!file)) return -EBADF; - req = aio_get_req(ctx); /* returns with 2 references to req */ + req = aio_get_req(ctx, batch); /* returns with 2 references to req */ if (unlikely(!req)) { fput(file); return -EAGAIN; @@ -1621,8 +1695,9 @@ long do_io_submit(aio_context_t ctx_id, long nr, { struct kioctx *ctx; long ret = 0; - int i; + int i = 0; struct blk_plug plug; + struct kiocb_batch batch; if (unlikely(nr < 0)) return -EINVAL; @@ -1639,6 +1714,8 @@ long do_io_submit(aio_context_t ctx_id, long nr, return -EINVAL; } + kiocb_batch_init(&batch, nr); + blk_start_plug(&plug); /* @@ -1659,12 +1736,13 @@ long do_io_submit(aio_context_t ctx_id, long nr, break; } - ret = io_submit_one(ctx, user_iocb, &tmp, compat); + ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat); if (ret) break; } blk_finish_plug(&plug); + kiocb_batch_free(&batch); put_ioctx(ctx); return i ? i : ret; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index dd0fdfc..21ac5ee 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -795,7 +795,16 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * might try to exec. This is because the brk will * follow the loader, and is not movable. */ #if defined(CONFIG_X86) || defined(CONFIG_ARM) - load_bias = 0; + /* Memory randomization might have been switched off + * in runtime via sysctl. + * If that is the case, retain the original non-zero + * load_bias value in order to establish proper + * non-randomized mappings. + */ + if (current->flags & PF_RANDOMIZE) + load_bias = 0; + else + load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); #else load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); #endif diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 3ebc437..1cbdeea 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -46,11 +46,26 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke case HFS_EXT_CNID: hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz)); + if (HFS_I(tree->inode)->alloc_blocks > + HFS_I(tree->inode)->first_blocks) { + printk(KERN_ERR "hfs: invalid btree extent records\n"); + unlock_new_inode(tree->inode); + goto free_inode; + } + tree->inode->i_mapping->a_ops = &hfs_btree_aops; break; case HFS_CAT_CNID: hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize, mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz)); + + if (!HFS_I(tree->inode)->first_blocks) { + printk(KERN_ERR "hfs: invalid btree extent records " + "(0 size).\n"); + unlock_new_inode(tree->inode); + goto free_inode; + } + tree->inode->i_mapping->a_ops = &hfs_btree_aops; break; default: @@ -59,11 +74,6 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke } unlock_new_inode(tree->inode); - if (!HFS_I(tree->inode)->first_blocks) { - printk(KERN_ERR "hfs: invalid btree extent records (0 size).\n"); - goto free_inode; - } - mapping = tree->inode->i_mapping; page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 562adab..f950059 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -20,6 +20,7 @@ #include <linux/statfs.h> #include <linux/cdrom.h> #include <linux/parser.h> +#include <linux/mpage.h> #include "isofs.h" #include "zisofs.h" @@ -1148,7 +1149,13 @@ struct buffer_head *isofs_bread(struct inode *inode, sector_t block) static int isofs_readpage(struct file *file, struct page *page) { - return block_read_full_page(page,isofs_get_block); + return mpage_readpage(page, isofs_get_block); +} + +static int isofs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, isofs_get_block); } static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) @@ -1158,6 +1165,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) static const struct address_space_operations isofs_aops = { .readpage = isofs_readpage, + .readpages = isofs_readpages, .bmap = _isofs_bmap }; diff --git a/fs/proc/base.c b/fs/proc/base.c index 851ba3d..2db1bd3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1652,12 +1652,46 @@ out: return error; } +static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct task_struct *task = get_proc_task(inode); + int rc; + + if (task == NULL) + return -ESRCH; + + rc = -EACCES; + if (lock_trace(task)) + goto out_task; + + generic_fillattr(inode, stat); + unlock_trace(task); + rc = 0; +out_task: + put_task_struct(task); + return rc; +} + static const struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, .follow_link = proc_pid_follow_link, .setattr = proc_setattr, }; +static const struct inode_operations proc_fdinfo_link_inode_operations = { + .setattr = proc_setattr, + .getattr = proc_pid_fd_link_getattr, +}; + +static const struct inode_operations proc_fd_link_inode_operations = { + .readlink = proc_pid_readlink, + .follow_link = proc_pid_follow_link, + .setattr = proc_setattr, + .getattr = proc_pid_fd_link_getattr, +}; + /* building an inode */ @@ -1889,49 +1923,61 @@ out: static int proc_fd_info(struct inode *inode, struct path *path, char *info) { - struct task_struct *task = get_proc_task(inode); - struct files_struct *files = NULL; + struct task_struct *task; + struct files_struct *files; struct file *file; int fd = proc_fd(inode); + int rc; - if (task) { - files = get_files_struct(task); - put_task_struct(task); - } - if (files) { - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); - if (file) { - unsigned int f_flags; - struct fdtable *fdt; - - fdt = files_fdtable(files); - f_flags = file->f_flags & ~O_CLOEXEC; - if (FD_ISSET(fd, fdt->close_on_exec)) - f_flags |= O_CLOEXEC; - - if (path) { - *path = file->f_path; - path_get(&file->f_path); - } - if (info) - snprintf(info, PROC_FDINFO_MAX, - "pos:\t%lli\n" - "flags:\t0%o\n", - (long long) file->f_pos, - f_flags); - spin_unlock(&files->file_lock); - put_files_struct(files); - return 0; + task = get_proc_task(inode); + if (!task) + return -ENOENT; + + rc = -EACCES; + if (lock_trace(task)) + goto out_task; + + rc = -ENOENT; + files = get_files_struct(task); + if (files == NULL) + goto out_unlock; + + /* + * We are not taking a ref to the file structure, so we must + * hold ->file_lock. + */ + spin_lock(&files->file_lock); + file = fcheck_files(files, fd); + if (file) { + unsigned int f_flags; + struct fdtable *fdt; + + fdt = files_fdtable(files); + f_flags = file->f_flags & ~O_CLOEXEC; + if (FD_ISSET(fd, fdt->close_on_exec)) + f_flags |= O_CLOEXEC; + + if (path) { + *path = file->f_path; + path_get(&file->f_path); } - spin_unlock(&files->file_lock); - put_files_struct(files); - } - return -ENOENT; + if (info) + snprintf(info, PROC_FDINFO_MAX, + "pos:\t%lli\n" + "flags:\t0%o\n", + (long long) file->f_pos, + f_flags); + rc = 0; + } else + rc = -ENOENT; + spin_unlock(&files->file_lock); + put_files_struct(files); + +out_unlock: + unlock_trace(task); +out_task: + put_task_struct(task); + return rc; } static int proc_fd_link(struct inode *inode, struct path *path) @@ -2026,7 +2072,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, spin_unlock(&files->file_lock); put_files_struct(files); - inode->i_op = &proc_pid_link_inode_operations; + inode->i_op = &proc_fd_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; d_set_d_op(dentry, &tid_fd_dentry_operations); @@ -2058,7 +2104,12 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, if (fd == ~0U) goto out; + result = ERR_PTR(-EACCES); + if (lock_trace(task)) + goto out; + result = instantiate(dir, dentry, task, &fd); + unlock_trace(task); out: put_task_struct(task); out_no_task: @@ -2078,23 +2129,28 @@ static int proc_readfd_common(struct file * filp, void * dirent, retval = -ENOENT; if (!p) goto out_no_task; + + retval = -EACCES; + if (lock_trace(p)) + goto out; + retval = 0; fd = filp->f_pos; switch (fd) { case 0: if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) - goto out; + goto out_unlock; filp->f_pos++; case 1: ino = parent_ino(dentry); if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) - goto out; + goto out_unlock; filp->f_pos++; default: files = get_files_struct(p); if (!files) - goto out; + goto out_unlock; rcu_read_lock(); for (fd = filp->f_pos-2; fd < files_fdtable(files)->max_fds; @@ -2118,6 +2174,9 @@ static int proc_readfd_common(struct file * filp, void * dirent, rcu_read_unlock(); put_files_struct(files); } + +out_unlock: + unlock_trace(p); out: put_task_struct(p); out_no_task: @@ -2195,6 +2254,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, ei->fd = fd; inode->i_mode = S_IFREG | S_IRUSR; inode->i_fop = &proc_fdinfo_file_operations; + inode->i_op = &proc_fdinfo_link_inode_operations; d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index b441132..a6b6217 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -3,6 +3,7 @@ */ #include <linux/init.h> #include <linux/sysctl.h> +#include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/namei.h> @@ -14,6 +15,15 @@ static const struct inode_operations proc_sys_inode_operations; static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; +void proc_sys_poll_notify(struct ctl_table_poll *poll) +{ + if (!poll) + return; + + atomic_inc(&poll->event); + wake_up_interruptible(&poll->wait); +} + static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { @@ -176,6 +186,39 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); } +static int proc_sys_open(struct inode *inode, struct file *filp) +{ + struct ctl_table *table = PROC_I(inode)->sysctl_entry; + + if (table->poll) + filp->private_data = proc_sys_poll_event(table->poll); + + return 0; +} + +static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct ctl_table *table = PROC_I(inode)->sysctl_entry; + unsigned long event = (unsigned long)filp->private_data; + unsigned int ret = DEFAULT_POLLMASK; + + if (!table->proc_handler) + goto out; + + if (!table->poll) + goto out; + + poll_wait(filp, &table->poll->wait, wait); + + if (event != atomic_read(&table->poll->event)) { + filp->private_data = proc_sys_poll_event(table->poll); + ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI; + } + +out: + return ret; +} static int proc_sys_fill_cache(struct file *filp, void *dirent, filldir_t filldir, @@ -364,12 +407,15 @@ static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct } static const struct file_operations proc_sys_file_operations = { + .open = proc_sys_open, + .poll = proc_sys_poll, .read = proc_sys_read, .write = proc_sys_write, .llseek = default_llseek, }; static const struct file_operations proc_sys_dir_file_operations = { + .read = generic_read_dir, .readdir = proc_sys_readdir, .llseek = generic_file_llseek, }; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index eacb166..462ceb3 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -23,7 +23,6 @@ * caches is sufficient. */ -#include <linux/module.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/highmem.h> @@ -288,14 +287,7 @@ static int __init init_ramfs_fs(void) { return register_filesystem(&ramfs_fs_type); } - -static void __exit exit_ramfs_fs(void) -{ - unregister_filesystem(&ramfs_fs_type); -} - module_init(init_ramfs_fs) -module_exit(exit_ramfs_fs) int __init init_rootfs(void) { @@ -311,5 +303,3 @@ int __init init_rootfs(void) return err; } - -MODULE_LICENSE("GPL"); diff --git a/include/linux/aio.h b/include/linux/aio.h index 2dcb72b..2314ad8 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -117,6 +117,7 @@ struct kiocb { struct list_head ki_list; /* the aio core uses this * for cancellation */ + struct list_head ki_batch; /* batch allocation */ /* * If the aio_resfd field of the userspace iocb is not zero, diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index da7e4bc..1b7f9d5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -516,7 +516,7 @@ struct cgroup_subsys { struct list_head sibling; /* used when use_id == true */ struct idr idr; - spinlock_t id_lock; + rwlock_t id_lock; /* should be defined only by modular subsystems */ struct module *module; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 347fdc3..be86ae1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -1,6 +1,7 @@ #ifndef _LINUX_DMA_MAPPING_H #define _LINUX_DMA_MAPPING_H +#include <linux/string.h> #include <linux/device.h> #include <linux/err.h> #include <linux/dma-attrs.h> @@ -117,6 +118,15 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) return -EIO; } +static inline void *dma_zalloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + void *ret = dma_alloc_coherent(dev, size, dma_handle, flag); + if (ret) + memset(ret, 0, size); + return ret; +} + #ifdef CONFIG_HAS_DMA static inline int dma_get_cache_alignment(void) { diff --git a/include/linux/magic.h b/include/linux/magic.h index 1e5df2af..2d4beab 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -30,11 +30,11 @@ #define ANON_INODE_FS_MAGIC 0x09041934 #define PSTOREFS_MAGIC 0x6165676C -#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ -#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ -#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */ -#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */ -#define MINIX3_SUPER_MAGIC 0x4d5a /* minix V3 fs */ +#define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */ +#define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */ +#define MINIX2_SUPER_MAGIC 0x2468 /* minix v2 fs, 14 char names */ +#define MINIX2_SUPER_MAGIC2 0x2478 /* minix v2 fs, 30 char names */ +#define MINIX3_SUPER_MAGIC 0x4d5a /* minix v3 fs, 60 char names */ #define MSDOS_SUPER_MAGIC 0x4d44 /* MD */ #define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ac797fa..b87068a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -78,8 +78,8 @@ extern void mem_cgroup_uncharge_end(void); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); -int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); +extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask); +int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg); extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); @@ -88,26 +88,28 @@ extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); static inline int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) { - struct mem_cgroup *mem; + struct mem_cgroup *memcg; rcu_read_lock(); - mem = mem_cgroup_from_task(rcu_dereference((mm)->owner)); + memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner)); rcu_read_unlock(); - return cgroup == mem; + return cgroup == memcg; } -extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem); +extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); extern int mem_cgroup_prepare_migration(struct page *page, struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask); -extern void mem_cgroup_end_migration(struct mem_cgroup *mem, +extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, struct page *oldpage, struct page *newpage, bool migration_ok); /* * For memory reclaim. */ -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg); -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg); +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, + struct zone *zone); +int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, + struct zone *zone); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, unsigned int lrumask); @@ -148,7 +150,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, unsigned long *total_scanned); -u64 mem_cgroup_get_limit(struct mem_cgroup *mem); +u64 mem_cgroup_get_limit(struct mem_cgroup *memcg); void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -244,18 +246,20 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm return NULL; } -static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) +static inline int mm_match_cgroup(struct mm_struct *mm, + struct mem_cgroup *memcg) { return 1; } static inline int task_in_mem_cgroup(struct task_struct *task, - const struct mem_cgroup *mem) + const struct mem_cgroup *memcg) { return 1; } -static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) +static inline struct cgroup_subsys_state + *mem_cgroup_css(struct mem_cgroup *memcg) { return NULL; } @@ -267,22 +271,22 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage, return 0; } -static inline void mem_cgroup_end_migration(struct mem_cgroup *mem, +static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, struct page *oldpage, struct page *newpage, bool migration_ok) { } -static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) +static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg) { return 0; } -static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, +static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *memcg, int priority) { } -static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, +static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *memcg, int priority) { } @@ -293,13 +297,13 @@ static inline bool mem_cgroup_disabled(void) } static inline int -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) { return 1; } static inline int -mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg) +mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) { return 1; } @@ -348,7 +352,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, } static inline -u64 mem_cgroup_get_limit(struct mem_cgroup *mem) +u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) { return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 3b3e3b8..3dc3a8c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -356,36 +356,50 @@ static inline struct page *compound_head(struct page *page) return page; } +/* + * The atomic page->_mapcount, starts from -1: so that transitions + * both from it and to it can be tracked, using atomic_inc_and_test + * and atomic_add_negative(-1). + */ +static inline void reset_page_mapcount(struct page *page) +{ + atomic_set(&(page)->_mapcount, -1); +} + +static inline int page_mapcount(struct page *page) +{ + return atomic_read(&(page)->_mapcount) + 1; +} + static inline int page_count(struct page *page) { return atomic_read(&compound_head(page)->_count); } +static inline void get_huge_page_tail(struct page *page) +{ + /* + * __split_huge_page_refcount() cannot run + * from under us. + */ + VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); + atomic_inc(&page->_mapcount); +} + +extern bool __get_page_tail(struct page *page); + static inline void get_page(struct page *page) { + if (unlikely(PageTail(page))) + if (likely(__get_page_tail(page))) + return; /* * Getting a normal page or the head of a compound page - * requires to already have an elevated page->_count. Only if - * we're getting a tail page, the elevated page->_count is - * required only in the head page, so for tail pages the - * bugcheck only verifies that the page->_count isn't - * negative. + * requires to already have an elevated page->_count. */ - VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page)); + VM_BUG_ON(atomic_read(&page->_count) <= 0); atomic_inc(&page->_count); - /* - * Getting a tail page will elevate both the head and tail - * page->_count(s). - */ - if (unlikely(PageTail(page))) { - /* - * This is safe only because - * __split_huge_page_refcount can't run under - * get_page(). - */ - VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); - atomic_inc(&page->first_page->_count); - } } static inline struct page *virt_to_head_page(const void *x) @@ -804,21 +818,6 @@ static inline pgoff_t page_index(struct page *page) } /* - * The atomic page->_mapcount, like _count, starts from -1: - * so that transitions both from it and to it can be tracked, - * using atomic_inc_and_test and atomic_add_negative(-1). - */ -static inline void reset_page_mapcount(struct page *page) -{ - atomic_set(&(page)->_mapcount, -1); -} - -static inline int page_mapcount(struct page *page) -{ - return atomic_read(&(page)->_mapcount) + 1; -} - -/* * Return true if this page is mapped into pagetables. */ static inline int page_mapped(struct page *page) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3e01a19..5b42f1b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -62,10 +62,23 @@ struct page { struct { union { - atomic_t _mapcount; /* Count of ptes mapped in mms, - * to show when page is mapped - * & limit reverse map searches. - */ + /* + * Count of ptes mapped in + * mms, to show when page is + * mapped & limit reverse map + * searches. + * + * Used also for tail pages + * refcounting instead of + * _count. Tail pages cannot + * be mapped and keeping the + * tail page _count zero at + * all times guarantees + * get_page_unless_zero() will + * never succeed on tail + * pages. + */ + atomic_t _mapcount; struct { unsigned inuse:16; diff --git a/include/linux/pps-gpio.h b/include/linux/pps-gpio.h new file mode 100644 index 0000000..0035abe --- /dev/null +++ b/include/linux/pps-gpio.h @@ -0,0 +1,32 @@ +/* + * pps-gpio.h -- PPS client for GPIOs + * + * + * Copyright (C) 2011 James Nuss <jamesnuss@nanometrics.ca> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _PPS_GPIO_H +#define _PPS_GPIO_H + +struct pps_gpio_platform_data { + bool assert_falling_edge; + bool capture_clear; + unsigned int gpio_pin; + const char *gpio_label; +}; + +#endif diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index 0cee015..b66d13d 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -39,5 +39,6 @@ #define RIO_DID_IDTCPS1616 0x0379 #define RIO_DID_IDTVPS1616 0x0377 #define RIO_DID_IDTSPS1616 0x0378 +#define RIO_DID_TSI721 0x80ab #endif /* LINUX_RIO_IDS_H */ diff --git a/include/linux/sem.h b/include/linux/sem.h index 1feb2de..10d6b22 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -83,13 +83,6 @@ struct seminfo { struct task_struct; -/* One semaphore structure for each semaphore in the system. */ -struct sem { - int semval; /* current value */ - int sempid; /* pid of last operation */ - struct list_head sem_pending; /* pending single-sop operations */ -}; - /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { struct kern_ipc_perm ____cacheline_aligned_in_smp @@ -103,51 +96,21 @@ struct sem_array { int complex_count; /* pending complex operations */ }; -/* One queue for each sleeping process in the system. */ -struct sem_queue { - struct list_head simple_list; /* queue of pending operations */ - struct list_head list; /* queue of pending operations */ - struct task_struct *sleeper; /* this process */ - struct sem_undo *undo; /* undo structure */ - int pid; /* process id of requesting process */ - int status; /* completion status of operation */ - struct sembuf *sops; /* array of pending operations */ - int nsops; /* number of operations */ - int alter; /* does the operation alter the array? */ -}; - -/* Each task has a list of undo requests. They are executed automatically - * when the process exits. - */ -struct sem_undo { - struct list_head list_proc; /* per-process list: all undos from one process. */ - /* rcu protected */ - struct rcu_head rcu; /* rcu struct for sem_undo() */ - struct sem_undo_list *ulp; /* sem_undo_list for the process */ - struct list_head list_id; /* per semaphore array list: all undos for one array */ - int semid; /* semaphore set identifier */ - short * semadj; /* array of adjustments, one per semaphore */ -}; - -/* sem_undo_list controls shared access to the list of sem_undo structures - * that may be shared among all a CLONE_SYSVSEM task group. - */ -struct sem_undo_list { - atomic_t refcnt; - spinlock_t lock; - struct list_head list_proc; -}; +#ifdef CONFIG_SYSVIPC struct sysv_sem { struct sem_undo_list *undo_list; }; -#ifdef CONFIG_SYSVIPC - extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk); extern void exit_sem(struct task_struct *tsk); #else + +struct sysv_sem { + /* empty */ +}; + static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) { return 0; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 9a1ec10..703cfa3 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -931,6 +931,7 @@ enum #ifdef __KERNEL__ #include <linux/list.h> #include <linux/rcupdate.h> +#include <linux/wait.h> /* For the /proc/sys support */ struct ctl_table; @@ -1011,6 +1012,26 @@ extern int proc_do_large_bitmap(struct ctl_table *, int, * cover common cases. */ +/* Support for userspace poll() to watch for changes */ +struct ctl_table_poll { + atomic_t event; + wait_queue_head_t wait; +}; + +static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) +{ + return (void *)(unsigned long)atomic_read(&poll->event); +} + +void proc_sys_poll_notify(struct ctl_table_poll *poll); + +#define __CTL_TABLE_POLL_INITIALIZER(name) { \ + .event = ATOMIC_INIT(0), \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) } + +#define DEFINE_CTL_TABLE_POLL(name) \ + struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name) + /* A sysctl table is an array of struct ctl_table: */ struct ctl_table { @@ -1021,6 +1042,7 @@ struct ctl_table struct ctl_table *child; struct ctl_table *parent; /* Automatically set */ proc_handler *proc_handler; /* Callback for text formatting */ + struct ctl_table_poll *poll; void *extra1; void *extra2; }; diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 4e5b021..c714ed7 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -37,6 +37,14 @@ struct new_utsname { #include <linux/nsproxy.h> #include <linux/err.h> +enum uts_proc { + UTS_PROC_OSTYPE, + UTS_PROC_OSRELEASE, + UTS_PROC_VERSION, + UTS_PROC_HOSTNAME, + UTS_PROC_DOMAINNAME, +}; + struct user_namespace; extern struct user_namespace init_user_ns; @@ -80,6 +88,14 @@ static inline struct uts_namespace *copy_utsname(unsigned long flags, } #endif +#ifdef CONFIG_PROC_SYSCTL +extern void uts_proc_notify(enum uts_proc proc); +#else +static inline void uts_proc_notify(enum uts_proc proc) +{ +} +#endif + static inline struct new_utsname *utsname(void) { return ¤t->nsproxy->uts_ns->name; diff --git a/init/Kconfig b/init/Kconfig index 31ba0fd..43298f9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -947,7 +947,7 @@ config UID16 config SYSCTL_SYSCALL bool "Sysctl syscall support" if EXPERT depends on PROC_SYSCTL - default y + default n select SYSCTL ---help--- sys_sysctl uses binary paths that have been found challenging @@ -959,7 +959,7 @@ config SYSCTL_SYSCALL trying to save some space it is probably safe to disable this, making your kernel marginally smaller. - If unsure say Y here. + If unsure say N here. config KALLSYMS bool "Load all symbols for debugging/ksymoops" if EXPERT diff --git a/init/do_mounts.c b/init/do_mounts.c index c0851a8..0f6e1d9 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -28,7 +28,7 @@ int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */ int root_mountflags = MS_RDONLY | MS_SILENT; static char * __initdata root_device_name; static char __initdata saved_root_name[64]; -static int __initdata root_wait; +static int root_wait; dev_t ROOT_DEV; @@ -85,12 +85,15 @@ no_match: /** * devt_from_partuuid - looks up the dev_t of a partition by its UUID - * @uuid: 36 byte char array containing a hex ascii UUID + * @uuid: min 36 byte char array containing a hex ascii UUID * * The function will return the first partition which contains a matching * UUID value in its partition_meta_info struct. This does not search * by filesystem UUIDs. * + * If @uuid is followed by a "/PARTNROFF=%d", then the number will be + * extracted and used as an offset from the partition identified by the UUID. + * * Returns the matching dev_t on success or 0 on failure. */ static dev_t devt_from_partuuid(char *uuid_str) @@ -98,6 +101,28 @@ static dev_t devt_from_partuuid(char *uuid_str) dev_t res = 0; struct device *dev = NULL; u8 uuid[16]; + struct gendisk *disk; + struct hd_struct *part; + int offset = 0; + + if (strlen(uuid_str) < 36) + goto done; + + /* Check for optional partition number offset attributes. */ + if (uuid_str[36]) { + char c = 0; + /* Explicitly fail on poor PARTUUID syntax. */ + if (sscanf(&uuid_str[36], + "/PARTNROFF=%d%c", &offset, &c) != 1) { + printk(KERN_ERR "VFS: PARTUUID= is invalid.\n" + "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n"); + if (root_wait) + printk(KERN_ERR + "Disabling rootwait; root= is invalid.\n"); + root_wait = 0; + goto done; + } + } /* Pack the requested UUID in the expected format. */ part_pack_uuid(uuid_str, uuid); @@ -107,8 +132,21 @@ static dev_t devt_from_partuuid(char *uuid_str) goto done; res = dev->devt; - put_device(dev); + /* Attempt to find the partition by offset. */ + if (!offset) + goto no_offset; + + res = 0; + disk = part_to_disk(dev_to_part(dev)); + part = disk_get_part(disk, dev_to_part(dev)->partno + offset); + if (part) { + res = part_devt(part); + put_device(part_to_dev(part)); + } + +no_offset: + put_device(dev); done: return res; } @@ -126,6 +164,8 @@ done: * used when disk name of partitioned disk ends on a digit. * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the * unique id of a partition if the partition table provides it. + * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to + * a partition with a known unique id. * * If name doesn't have fall into the categories above, we return (0,0). * block_class is used to check if something is a disk name. If the disk @@ -143,8 +183,6 @@ dev_t name_to_dev_t(char *name) #ifdef CONFIG_BLOCK if (strncmp(name, "PARTUUID=", 9) == 0) { name += 9; - if (strlen(name) != 36) - goto fail; res = devt_from_partuuid(name); if (!res) goto fail; diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index fe9acb0..887629e 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -120,6 +120,20 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) } /* + * Read 512 bytes further to check if cramfs is padded + */ + sys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0); + sys_read(fd, buf, size); + + if (cramfsb->magic == CRAMFS_MAGIC) { + printk(KERN_NOTICE + "RAMDISK: cramfs filesystem found at block %d\n", + start_block); + nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS; + goto done; + } + + /* * Read block 1 to test for minix and ext2 superblock */ sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0); @@ -90,6 +90,52 @@ #include <asm/uaccess.h> #include "util.h" +/* One semaphore structure for each semaphore in the system. */ +struct sem { + int semval; /* current value */ + int sempid; /* pid of last operation */ + struct list_head sem_pending; /* pending single-sop operations */ +}; + +/* One queue for each sleeping process in the system. */ +struct sem_queue { + struct list_head simple_list; /* queue of pending operations */ + struct list_head list; /* queue of pending operations */ + struct task_struct *sleeper; /* this process */ + struct sem_undo *undo; /* undo structure */ + int pid; /* process id of requesting process */ + int status; /* completion status of operation */ + struct sembuf *sops; /* array of pending operations */ + int nsops; /* number of operations */ + int alter; /* does *sops alter the array? */ +}; + +/* Each task has a list of undo requests. They are executed automatically + * when the process exits. + */ +struct sem_undo { + struct list_head list_proc; /* per-process list: * + * all undos from one process + * rcu protected */ + struct rcu_head rcu; /* rcu struct for sem_undo */ + struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ + struct list_head list_id; /* per semaphore array list: + * all undos for one array */ + int semid; /* semaphore set identifier */ + short *semadj; /* array of adjustments */ + /* one per semaphore */ +}; + +/* sem_undo_list controls shared access to the list of sem_undo structures + * that may be shared among all a CLONE_SYSVSEM task group. + */ +struct sem_undo_list { + atomic_t refcnt; + spinlock_t lock; + struct list_head list_proc; +}; + + #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) @@ -1426,6 +1472,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, queue.status = -EINTR; queue.sleeper = current; + +sleep_again: current->state = TASK_INTERRUPTIBLE; sem_unlock(sma); @@ -1460,7 +1508,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, * Array removed? If yes, leave without sem_unlock(). */ if (IS_ERR(sma)) { - error = -EIDRM; goto out_free; } @@ -1479,6 +1526,13 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, */ if (timeout && jiffies_left == 0) error = -EAGAIN; + + /* + * If the wakeup was spurious, just retry + */ + if (error == -EINTR && !signal_pending(current)) + goto sleep_again; + unlink_queue(sma, &queue); out_unlock_free: diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 453100a..d9d5648 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2027,7 +2027,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) goto out_free_group_list; /* prevent changes to the threadgroup list while we take a snapshot. */ - rcu_read_lock(); + read_lock(&tasklist_lock); if (!thread_group_leader(leader)) { /* * a race with de_thread from another thread's exec() may strip @@ -2036,7 +2036,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * throw this task away and try again (from cgroup_procs_write); * this is "double-double-toil-and-trouble-check locking". */ - rcu_read_unlock(); + read_unlock(&tasklist_lock); retval = -EAGAIN; goto out_free_group_list; } @@ -2057,7 +2057,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) } while_each_thread(leader, tsk); /* remember the number of threads in the array for later. */ group_size = i; - rcu_read_unlock(); + read_unlock(&tasklist_lock); /* * step 1: check that we can legitimately attach to the cgroup. @@ -2135,14 +2135,17 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) oldcgrp = task_cgroup_from_root(tsk, root); if (cgrp == oldcgrp) continue; - /* attach each task to each subsystem */ - for_each_subsys(root, ss) { - if (ss->attach_task) - ss->attach_task(cgrp, tsk); - } /* if the thread is PF_EXITING, it can just get skipped. */ retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); - BUG_ON(retval != 0 && retval != -ESRCH); + if (retval == 0) { + /* attach each task to each subsystem */ + for_each_subsys(root, ss) { + if (ss->attach_task) + ss->attach_task(cgrp, tsk); + } + } else { + BUG_ON(retval != -ESRCH); + } } /* nothing is sensitive to fork() after this point. */ @@ -4880,9 +4883,9 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) rcu_assign_pointer(id->css, NULL); rcu_assign_pointer(css->id, NULL); - spin_lock(&ss->id_lock); + write_lock(&ss->id_lock); idr_remove(&ss->idr, id->id); - spin_unlock(&ss->id_lock); + write_unlock(&ss->id_lock); kfree_rcu(id, rcu_head); } EXPORT_SYMBOL_GPL(free_css_id); @@ -4908,10 +4911,10 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) error = -ENOMEM; goto err_out; } - spin_lock(&ss->id_lock); + write_lock(&ss->id_lock); /* Don't use 0. allocates an ID of 1-65535 */ error = idr_get_new_above(&ss->idr, newid, 1, &myid); - spin_unlock(&ss->id_lock); + write_unlock(&ss->id_lock); /* Returns error when there are no free spaces for new ID.*/ if (error) { @@ -4926,9 +4929,9 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) return newid; remove_idr: error = -ENOSPC; - spin_lock(&ss->id_lock); + write_lock(&ss->id_lock); idr_remove(&ss->idr, myid); - spin_unlock(&ss->id_lock); + write_unlock(&ss->id_lock); err_out: kfree(newid); return ERR_PTR(error); @@ -4940,7 +4943,7 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss, { struct css_id *newid; - spin_lock_init(&ss->id_lock); + rwlock_init(&ss->id_lock); idr_init(&ss->idr); newid = get_new_cssid(ss, 0); @@ -5035,9 +5038,9 @@ css_get_next(struct cgroup_subsys *ss, int id, * scan next entry from bitmap(tree), tmpid is updated after * idr_get_next(). */ - spin_lock(&ss->id_lock); + read_lock(&ss->id_lock); tmp = idr_get_next(&ss->idr, &tmpid); - spin_unlock(&ss->id_lock); + read_unlock(&ss->id_lock); if (!tmp) break; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 10131fd..ed0ff44 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -949,6 +949,8 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, static void cpuset_change_task_nodemask(struct task_struct *tsk, nodemask_t *newmems) { + bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed); + repeat: /* * Allow tasks that have access to memory reserves because they have @@ -963,7 +965,6 @@ repeat: nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); - /* * ensure checking ->mems_allowed_change_disable after setting all new * allowed nodes. @@ -980,9 +981,11 @@ repeat: /* * Allocation of memory is very fast, we needn't sleep when waiting - * for the read-side. + * for the read-side. No wait is necessary, however, if at least one + * node remains unchanged. */ - while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) { + while (masks_disjoint && + ACCESS_ONCE(tsk->mems_allowed_change_disable)) { task_unlock(tsk); if (!task_curr(tsk)) yield(); diff --git a/kernel/sys.c b/kernel/sys.c index 5845950..d06c091 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1286,6 +1286,7 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) memset(u->nodename + len, 0, sizeof(u->nodename) - len); errno = 0; } + uts_proc_notify(UTS_PROC_HOSTNAME); up_write(&uts_sem); return errno; } @@ -1336,6 +1337,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) memset(u->domainname + len, 0, sizeof(u->domainname) - len); errno = 0; } + uts_proc_notify(UTS_PROC_DOMAINNAME); up_write(&uts_sem); return errno; } diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index a2cd77e..3b0d48e 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -13,6 +13,7 @@ #include <linux/uts.h> #include <linux/utsname.h> #include <linux/sysctl.h> +#include <linux/wait.h> static void *get_uts(ctl_table *table, int write) { @@ -51,12 +52,19 @@ static int proc_do_uts_string(ctl_table *table, int write, uts_table.data = get_uts(table, write); r = proc_dostring(&uts_table,write,buffer,lenp, ppos); put_uts(table, write, uts_table.data); + + if (write) + proc_sys_poll_notify(table->poll); + return r; } #else #define proc_do_uts_string NULL #endif +static DEFINE_CTL_TABLE_POLL(hostname_poll); +static DEFINE_CTL_TABLE_POLL(domainname_poll); + static struct ctl_table uts_kern_table[] = { { .procname = "ostype", @@ -85,6 +93,7 @@ static struct ctl_table uts_kern_table[] = { .maxlen = sizeof(init_uts_ns.name.nodename), .mode = 0644, .proc_handler = proc_do_uts_string, + .poll = &hostname_poll, }, { .procname = "domainname", @@ -92,6 +101,7 @@ static struct ctl_table uts_kern_table[] = { .maxlen = sizeof(init_uts_ns.name.domainname), .mode = 0644, .proc_handler = proc_do_uts_string, + .poll = &domainname_poll, }, {} }; @@ -105,6 +115,19 @@ static struct ctl_table uts_root_table[] = { {} }; +#ifdef CONFIG_PROC_SYSCTL +/* + * Notify userspace about a change in a certain entry of uts_kern_table, + * identified by the parameter proc. + */ +void uts_proc_notify(enum uts_proc proc) +{ + struct ctl_table *table = &uts_kern_table[proc]; + + proc_sys_poll_notify(table->poll); +} +#endif + static int __init utsname_sysctl_init(void) { register_sysctl_table(uts_root_table); @@ -944,6 +944,7 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, { int ret, id; unsigned int max; + unsigned long flags; BUG_ON((int)start < 0); BUG_ON((int)end < 0); @@ -959,7 +960,7 @@ again: if (!ida_pre_get(ida, gfp_mask)) return -ENOMEM; - spin_lock(&simple_ida_lock); + spin_lock_irqsave(&simple_ida_lock, flags); ret = ida_get_new_above(ida, start, &id); if (!ret) { if (id > max) { @@ -969,7 +970,7 @@ again: ret = id; } } - spin_unlock(&simple_ida_lock); + spin_unlock_irqrestore(&simple_ida_lock, flags); if (unlikely(ret == -EAGAIN)) goto again; @@ -985,10 +986,12 @@ EXPORT_SYMBOL(ida_simple_get); */ void ida_simple_remove(struct ida *ida, unsigned int id) { + unsigned long flags; + BUG_ON((int)id < 0); - spin_lock(&simple_ida_lock); + spin_lock_irqsave(&simple_ida_lock, flags); ida_remove(ida, id); - spin_unlock(&simple_ida_lock); + spin_unlock_irqrestore(&simple_ida_lock, flags); } EXPORT_SYMBOL(ida_simple_remove); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 860ec21..4298aba 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -990,7 +990,7 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm, page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; VM_BUG_ON(!PageCompound(page)); if (flags & FOLL_GET) - get_page(page); + get_page_foll(page); out: return page; @@ -1202,6 +1202,7 @@ static void __split_huge_page_refcount(struct page *page) unsigned long head_index = page->index; struct zone *zone = page_zone(page); int zonestat; + int tail_count = 0; /* prevent PageLRU to go away from under us, and freeze lru stats */ spin_lock_irq(&zone->lru_lock); @@ -1210,11 +1211,27 @@ static void __split_huge_page_refcount(struct page *page) for (i = 1; i < HPAGE_PMD_NR; i++) { struct page *page_tail = page + i; - /* tail_page->_count cannot change */ - atomic_sub(atomic_read(&page_tail->_count), &page->_count); - BUG_ON(page_count(page) <= 0); - atomic_add(page_mapcount(page) + 1, &page_tail->_count); - BUG_ON(atomic_read(&page_tail->_count) <= 0); + /* tail_page->_mapcount cannot change */ + BUG_ON(page_mapcount(page_tail) < 0); + tail_count += page_mapcount(page_tail); + /* check for overflow */ + BUG_ON(tail_count < 0); + BUG_ON(atomic_read(&page_tail->_count) != 0); + /* + * tail_page->_count is zero and not changing from + * under us. But get_page_unless_zero() may be running + * from under us on the tail_page. If we used + * atomic_set() below instead of atomic_add(), we + * would then run atomic_set() concurrently with + * get_page_unless_zero(), and atomic_set() is + * implemented in C not using locked ops. spin_unlock + * on x86 sometime uses locked ops because of PPro + * errata 66, 92, so unless somebody can guarantee + * atomic_set() here would be safe on all archs (and + * not only on x86), it's safer to use atomic_add(). + */ + atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1, + &page_tail->_count); /* after clearing PageTail the gup refcount can be released */ smp_mb(); @@ -1232,10 +1249,7 @@ static void __split_huge_page_refcount(struct page *page) (1L << PG_uptodate))); page_tail->flags |= (1L << PG_dirty); - /* - * 1) clear PageTail before overwriting first_page - * 2) clear PageTail before clearing PageHead for VM_BUG_ON - */ + /* clear PageTail before overwriting first_page */ smp_wmb(); /* @@ -1252,7 +1266,6 @@ static void __split_huge_page_refcount(struct page *page) * status is achieved setting a reserved bit in the * pmd, not by clearing the present bit. */ - BUG_ON(page_mapcount(page_tail)); page_tail->_mapcount = page->_mapcount; BUG_ON(page_tail->mapping); @@ -1269,6 +1282,8 @@ static void __split_huge_page_refcount(struct page *page) lru_add_page_tail(zone, page, page_tail); } + atomic_sub(tail_count, &page->_count); + BUG_ON(atomic_read(&page->_count) <= 0); __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); diff --git a/mm/internal.h b/mm/internal.h index d071d38..2189af4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -37,6 +37,52 @@ static inline void __put_page(struct page *page) atomic_dec(&page->_count); } +static inline void __get_page_tail_foll(struct page *page, + bool get_page_head) +{ + /* + * If we're getting a tail page, the elevated page->_count is + * required only in the head page and we will elevate the head + * page->_count and tail page->_mapcount. + * + * We elevate page_tail->_mapcount for tail pages to force + * page_tail->_count to be zero at all times to avoid getting + * false positives from get_page_unless_zero() with + * speculative page access (like in + * page_cache_get_speculative()) on tail pages. + */ + VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); + VM_BUG_ON(page_mapcount(page) < 0); + if (get_page_head) + atomic_inc(&page->first_page->_count); + atomic_inc(&page->_mapcount); +} + +/* + * This is meant to be called as the FOLL_GET operation of + * follow_page() and it must be called while holding the proper PT + * lock while the pte (or pmd_trans_huge) is still mapping the page. + */ +static inline void get_page_foll(struct page *page) +{ + if (unlikely(PageTail(page))) + /* + * This is safe only because + * __split_huge_page_refcount() can't run under + * get_page_foll() because we hold the proper PT lock. + */ + __get_page_tail_foll(page, true); + else { + /* + * Getting a normal page or the head of a compound page + * requires to already have an elevated page->_count. + */ + VM_BUG_ON(atomic_read(&page->_count) <= 0); + atomic_inc(&page->_count); + } +} + extern unsigned long highest_memmap_pfn; /* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2d57555..7af1d5e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -201,8 +201,8 @@ struct mem_cgroup_eventfd_list { struct eventfd_ctx *eventfd; }; -static void mem_cgroup_threshold(struct mem_cgroup *mem); -static void mem_cgroup_oom_notify(struct mem_cgroup *mem); +static void mem_cgroup_threshold(struct mem_cgroup *memcg); +static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); /* * The memory controller data structure. The memory controller controls both @@ -362,29 +362,29 @@ enum charge_type { #define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 #define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) -static void mem_cgroup_get(struct mem_cgroup *mem); -static void mem_cgroup_put(struct mem_cgroup *mem); -static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); -static void drain_all_stock_async(struct mem_cgroup *mem); +static void mem_cgroup_get(struct mem_cgroup *memcg); +static void mem_cgroup_put(struct mem_cgroup *memcg); +static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); +static void drain_all_stock_async(struct mem_cgroup *memcg); static struct mem_cgroup_per_zone * -mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) +mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid) { - return &mem->info.nodeinfo[nid]->zoneinfo[zid]; + return &memcg->info.nodeinfo[nid]->zoneinfo[zid]; } -struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) +struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg) { - return &mem->css; + return &memcg->css; } static struct mem_cgroup_per_zone * -page_cgroup_zoneinfo(struct mem_cgroup *mem, struct page *page) +page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page) { int nid = page_to_nid(page); int zid = page_zonenum(page); - return mem_cgroup_zoneinfo(mem, nid, zid); + return mem_cgroup_zoneinfo(memcg, nid, zid); } static struct mem_cgroup_tree_per_zone * @@ -403,7 +403,7 @@ soft_limit_tree_from_page(struct page *page) } static void -__mem_cgroup_insert_exceeded(struct mem_cgroup *mem, +__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg, struct mem_cgroup_per_zone *mz, struct mem_cgroup_tree_per_zone *mctz, unsigned long long new_usage_in_excess) @@ -437,7 +437,7 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, } static void -__mem_cgroup_remove_exceeded(struct mem_cgroup *mem, +__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, struct mem_cgroup_per_zone *mz, struct mem_cgroup_tree_per_zone *mctz) { @@ -448,17 +448,17 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, } static void -mem_cgroup_remove_exceeded(struct mem_cgroup *mem, +mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, struct mem_cgroup_per_zone *mz, struct mem_cgroup_tree_per_zone *mctz) { spin_lock(&mctz->lock); - __mem_cgroup_remove_exceeded(mem, mz, mctz); + __mem_cgroup_remove_exceeded(memcg, mz, mctz); spin_unlock(&mctz->lock); } -static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) +static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) { unsigned long long excess; struct mem_cgroup_per_zone *mz; @@ -471,9 +471,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) * Necessary to update all ancestors when hierarchy is used. * because their event counter is not touched. */ - for (; mem; mem = parent_mem_cgroup(mem)) { - mz = mem_cgroup_zoneinfo(mem, nid, zid); - excess = res_counter_soft_limit_excess(&mem->res); + for (; memcg; memcg = parent_mem_cgroup(memcg)) { + mz = mem_cgroup_zoneinfo(memcg, nid, zid); + excess = res_counter_soft_limit_excess(&memcg->res); /* * We have to update the tree if mz is on RB-tree or * mem is over its softlimit. @@ -482,18 +482,18 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) spin_lock(&mctz->lock); /* if on-tree, remove it */ if (mz->on_tree) - __mem_cgroup_remove_exceeded(mem, mz, mctz); + __mem_cgroup_remove_exceeded(memcg, mz, mctz); /* * Insert again. mz->usage_in_excess will be updated. * If excess is 0, no tree ops. */ - __mem_cgroup_insert_exceeded(mem, mz, mctz, excess); + __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess); spin_unlock(&mctz->lock); } } } -static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) +static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) { int node, zone; struct mem_cgroup_per_zone *mz; @@ -501,9 +501,9 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) for_each_node_state(node, N_POSSIBLE) { for (zone = 0; zone < MAX_NR_ZONES; zone++) { - mz = mem_cgroup_zoneinfo(mem, node, zone); + mz = mem_cgroup_zoneinfo(memcg, node, zone); mctz = soft_limit_tree_node_zone(node, zone); - mem_cgroup_remove_exceeded(mem, mz, mctz); + mem_cgroup_remove_exceeded(memcg, mz, mctz); } } } @@ -564,7 +564,7 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) * common workload, threashold and synchonization as vmstat[] should be * implemented. */ -static long mem_cgroup_read_stat(struct mem_cgroup *mem, +static long mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { long val = 0; @@ -572,81 +572,83 @@ static long mem_cgroup_read_stat(struct mem_cgroup *mem, get_online_cpus(); for_each_online_cpu(cpu) - val += per_cpu(mem->stat->count[idx], cpu); + val += per_cpu(memcg->stat->count[idx], cpu); #ifdef CONFIG_HOTPLUG_CPU - spin_lock(&mem->pcp_counter_lock); - val += mem->nocpu_base.count[idx]; - spin_unlock(&mem->pcp_counter_lock); + spin_lock(&memcg->pcp_counter_lock); + val += memcg->nocpu_base.count[idx]; + spin_unlock(&memcg->pcp_counter_lock); #endif put_online_cpus(); return val; } -static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, +static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, bool charge) { int val = (charge) ? 1 : -1; - this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); + this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); } -void mem_cgroup_pgfault(struct mem_cgroup *mem, int val) +void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val) { - this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val); + this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val); } -void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val) +void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val) { - this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val); + this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val); } -static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem, +static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx) { unsigned long val = 0; int cpu; for_each_online_cpu(cpu) - val += per_cpu(mem->stat->events[idx], cpu); + val += per_cpu(memcg->stat->events[idx], cpu); #ifdef CONFIG_HOTPLUG_CPU - spin_lock(&mem->pcp_counter_lock); - val += mem->nocpu_base.events[idx]; - spin_unlock(&mem->pcp_counter_lock); + spin_lock(&memcg->pcp_counter_lock); + val += memcg->nocpu_base.events[idx]; + spin_unlock(&memcg->pcp_counter_lock); #endif return val; } -static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, +static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, bool file, int nr_pages) { preempt_disable(); if (file) - __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); + __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE], + nr_pages); else - __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); + __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], + nr_pages); /* pagein of a big page is an event. So, ignore page size */ if (nr_pages > 0) - __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]); + __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGIN]); else { - __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]); + __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]); nr_pages = -nr_pages; /* for event */ } - __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); + __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); preempt_enable(); } unsigned long -mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid, +mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, unsigned int lru_mask) { struct mem_cgroup_per_zone *mz; enum lru_list l; unsigned long ret = 0; - mz = mem_cgroup_zoneinfo(mem, nid, zid); + mz = mem_cgroup_zoneinfo(memcg, nid, zid); for_each_lru(l) { if (BIT(l) & lru_mask) @@ -656,44 +658,45 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid, } static unsigned long -mem_cgroup_node_nr_lru_pages(struct mem_cgroup *mem, +mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask) { u64 total = 0; int zid; for (zid = 0; zid < MAX_NR_ZONES; zid++) - total += mem_cgroup_zone_nr_lru_pages(mem, nid, zid, lru_mask); + total += mem_cgroup_zone_nr_lru_pages(memcg, + nid, zid, lru_mask); return total; } -static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *mem, +static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, unsigned int lru_mask) { int nid; u64 total = 0; for_each_node_state(nid, N_HIGH_MEMORY) - total += mem_cgroup_node_nr_lru_pages(mem, nid, lru_mask); + total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask); return total; } -static bool __memcg_event_check(struct mem_cgroup *mem, int target) +static bool __memcg_event_check(struct mem_cgroup *memcg, int target) { unsigned long val, next; - val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); - next = this_cpu_read(mem->stat->targets[target]); + val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); + next = __this_cpu_read(memcg->stat->targets[target]); /* from time_after() in jiffies.h */ return ((long)next - (long)val < 0); } -static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target) +static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target) { unsigned long val, next; - val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); + val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); switch (target) { case MEM_CGROUP_TARGET_THRESH: @@ -709,34 +712,36 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target) return; } - this_cpu_write(mem->stat->targets[target], next); + __this_cpu_write(memcg->stat->targets[target], next); } /* * Check events in order. * */ -static void memcg_check_events(struct mem_cgroup *mem, struct page *page) +static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) { + preempt_disable(); /* threshold event is triggered in finer grain than soft limit */ - if (unlikely(__memcg_event_check(mem, MEM_CGROUP_TARGET_THRESH))) { - mem_cgroup_threshold(mem); - __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH); - if (unlikely(__memcg_event_check(mem, + if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) { + mem_cgroup_threshold(memcg); + __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH); + if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_SOFTLIMIT))) { - mem_cgroup_update_tree(mem, page); - __mem_cgroup_target_update(mem, + mem_cgroup_update_tree(memcg, page); + __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_SOFTLIMIT); } #if MAX_NUMNODES > 1 - if (unlikely(__memcg_event_check(mem, + if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_NUMAINFO))) { - atomic_inc(&mem->numainfo_events); - __mem_cgroup_target_update(mem, + atomic_inc(&memcg->numainfo_events); + __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_NUMAINFO); } #endif } + preempt_enable(); } static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) @@ -762,7 +767,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *memcg = NULL; if (!mm) return NULL; @@ -773,25 +778,25 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) */ rcu_read_lock(); do { - mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (unlikely(!mem)) + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (unlikely(!memcg)) break; - } while (!css_tryget(&mem->css)); + } while (!css_tryget(&memcg->css)); rcu_read_unlock(); - return mem; + return memcg; } /* The caller has to guarantee "mem" exists before calling this */ -static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) +static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *memcg) { struct cgroup_subsys_state *css; int found; - if (!mem) /* ROOT cgroup has the smallest ID */ + if (!memcg) /* ROOT cgroup has the smallest ID */ return root_mem_cgroup; /*css_put/get against root is ignored*/ - if (!mem->use_hierarchy) { - if (css_tryget(&mem->css)) - return mem; + if (!memcg->use_hierarchy) { + if (css_tryget(&memcg->css)) + return memcg; return NULL; } rcu_read_lock(); @@ -799,13 +804,13 @@ static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) * searching a memory cgroup which has the smallest ID under given * ROOT cgroup. (ID >= 1) */ - css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found); + css = css_get_next(&mem_cgroup_subsys, 1, &memcg->css, &found); if (css && css_tryget(css)) - mem = container_of(css, struct mem_cgroup, css); + memcg = container_of(css, struct mem_cgroup, css); else - mem = NULL; + memcg = NULL; rcu_read_unlock(); - return mem; + return memcg; } static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, @@ -859,29 +864,29 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, for_each_mem_cgroup_tree_cond(iter, NULL, true) -static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) +static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { - return (mem == root_mem_cgroup); + return (memcg == root_mem_cgroup); } void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) { - struct mem_cgroup *mem; + struct mem_cgroup *memcg; if (!mm) return; rcu_read_lock(); - mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (unlikely(!mem)) + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (unlikely(!memcg)) goto out; switch (idx) { case PGMAJFAULT: - mem_cgroup_pgmajfault(mem, 1); + mem_cgroup_pgmajfault(memcg, 1); break; case PGFAULT: - mem_cgroup_pgfault(mem, 1); + mem_cgroup_pgfault(memcg, 1); break; default: BUG(); @@ -990,6 +995,16 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) return; pc = lookup_page_cgroup(page); VM_BUG_ON(PageCgroupAcctLRU(pc)); + /* + * putback: charge: + * SetPageLRU SetPageCgroupUsed + * smp_mb smp_mb + * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU + * + * Ensure that one of the two sides adds the page to the memcg + * LRU during a race. + */ + smp_mb(); if (!PageCgroupUsed(pc)) return; /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ @@ -1041,7 +1056,16 @@ static void mem_cgroup_lru_add_after_commit(struct page *page) unsigned long flags; struct zone *zone = page_zone(page); struct page_cgroup *pc = lookup_page_cgroup(page); - + /* + * putback: charge: + * SetPageLRU SetPageCgroupUsed + * smp_mb smp_mb + * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU + * + * Ensure that one of the two sides adds the page to the memcg + * LRU during a race. + */ + smp_mb(); /* taking care of that the page is added to LRU while we commit it */ if (likely(!PageLRU(page))) return; @@ -1063,21 +1087,21 @@ void mem_cgroup_move_lists(struct page *page, } /* - * Checks whether given mem is same or in the root_mem's + * Checks whether given mem is same or in the root_mem_cgroup's * hierarchy subtree */ -static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_mem, - struct mem_cgroup *mem) +static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, + struct mem_cgroup *memcg) { - if (root_mem != mem) { - return (root_mem->use_hierarchy && - css_is_ancestor(&mem->css, &root_mem->css)); + if (root_memcg != memcg) { + return (root_memcg->use_hierarchy && + css_is_ancestor(&memcg->css, &root_memcg->css)); } return true; } -int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) +int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) { int ret; struct mem_cgroup *curr = NULL; @@ -1091,25 +1115,29 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) if (!curr) return 0; /* - * We should check use_hierarchy of "mem" not "curr". Because checking + * We should check use_hierarchy of "memcg" not "curr". Because checking * use_hierarchy of "curr" here make this function true if hierarchy is - * enabled in "curr" and "curr" is a child of "mem" in *cgroup* - * hierarchy(even if use_hierarchy is disabled in "mem"). + * enabled in "curr" and "curr" is a child of "memcg" in *cgroup* + * hierarchy(even if use_hierarchy is disabled in "memcg"). */ - ret = mem_cgroup_same_or_subtree(mem, curr); + ret = mem_cgroup_same_or_subtree(memcg, curr); css_put(&curr->css); return ret; } -static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages) +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) { - unsigned long active; + unsigned long inactive_ratio; + int nid = zone_to_nid(zone); + int zid = zone_idx(zone); unsigned long inactive; + unsigned long active; unsigned long gb; - unsigned long inactive_ratio; - inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); - active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); + inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, + BIT(LRU_INACTIVE_ANON)); + active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, + BIT(LRU_ACTIVE_ANON)); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) @@ -1117,39 +1145,20 @@ static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_ else inactive_ratio = 1; - if (present_pages) { - present_pages[0] = inactive; - present_pages[1] = active; - } - - return inactive_ratio; + return inactive * inactive_ratio < active; } -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) -{ - unsigned long active; - unsigned long inactive; - unsigned long present_pages[2]; - unsigned long inactive_ratio; - - inactive_ratio = calc_inactive_ratio(memcg, present_pages); - - inactive = present_pages[0]; - active = present_pages[1]; - - if (inactive * inactive_ratio < active) - return 1; - - return 0; -} - -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg) +int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) { unsigned long active; unsigned long inactive; + int zid = zone_idx(zone); + int nid = zone_to_nid(zone); - inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); - active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); + inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, + BIT(LRU_INACTIVE_FILE)); + active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, + BIT(LRU_ACTIVE_FILE)); return (active > inactive); } @@ -1254,13 +1263,13 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, * Returns the maximum amount of memory @mem can be charged with, in * pages. */ -static unsigned long mem_cgroup_margin(struct mem_cgroup *mem) +static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) { unsigned long long margin; - margin = res_counter_margin(&mem->res); + margin = res_counter_margin(&memcg->res); if (do_swap_account) - margin = min(margin, res_counter_margin(&mem->memsw)); + margin = min(margin, res_counter_margin(&memcg->memsw)); return margin >> PAGE_SHIFT; } @@ -1275,33 +1284,33 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg) return memcg->swappiness; } -static void mem_cgroup_start_move(struct mem_cgroup *mem) +static void mem_cgroup_start_move(struct mem_cgroup *memcg) { int cpu; get_online_cpus(); - spin_lock(&mem->pcp_counter_lock); + spin_lock(&memcg->pcp_counter_lock); for_each_online_cpu(cpu) - per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1; - mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1; - spin_unlock(&mem->pcp_counter_lock); + per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1; + memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1; + spin_unlock(&memcg->pcp_counter_lock); put_online_cpus(); synchronize_rcu(); } -static void mem_cgroup_end_move(struct mem_cgroup *mem) +static void mem_cgroup_end_move(struct mem_cgroup *memcg) { int cpu; - if (!mem) + if (!memcg) return; get_online_cpus(); - spin_lock(&mem->pcp_counter_lock); + spin_lock(&memcg->pcp_counter_lock); for_each_online_cpu(cpu) - per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1; - mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1; - spin_unlock(&mem->pcp_counter_lock); + per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1; + memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1; + spin_unlock(&memcg->pcp_counter_lock); put_online_cpus(); } /* @@ -1316,13 +1325,13 @@ static void mem_cgroup_end_move(struct mem_cgroup *mem) * waiting at hith-memory prressure caused by "move". */ -static bool mem_cgroup_stealed(struct mem_cgroup *mem) +static bool mem_cgroup_stealed(struct mem_cgroup *memcg) { VM_BUG_ON(!rcu_read_lock_held()); - return this_cpu_read(mem->stat->count[MEM_CGROUP_ON_MOVE]) > 0; + return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0; } -static bool mem_cgroup_under_move(struct mem_cgroup *mem) +static bool mem_cgroup_under_move(struct mem_cgroup *memcg) { struct mem_cgroup *from; struct mem_cgroup *to; @@ -1337,17 +1346,17 @@ static bool mem_cgroup_under_move(struct mem_cgroup *mem) if (!from) goto unlock; - ret = mem_cgroup_same_or_subtree(mem, from) - || mem_cgroup_same_or_subtree(mem, to); + ret = mem_cgroup_same_or_subtree(memcg, from) + || mem_cgroup_same_or_subtree(memcg, to); unlock: spin_unlock(&mc.lock); return ret; } -static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem) +static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) { if (mc.moving_task && current != mc.moving_task) { - if (mem_cgroup_under_move(mem)) { + if (mem_cgroup_under_move(memcg)) { DEFINE_WAIT(wait); prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE); /* moving charge context might have finished. */ @@ -1431,12 +1440,12 @@ done: * This function returns the number of memcg under hierarchy tree. Returns * 1(self count) if no children. */ -static int mem_cgroup_count_children(struct mem_cgroup *mem) +static int mem_cgroup_count_children(struct mem_cgroup *memcg) { int num = 0; struct mem_cgroup *iter; - for_each_mem_cgroup_tree(iter, mem) + for_each_mem_cgroup_tree(iter, memcg) num++; return num; } @@ -1466,21 +1475,21 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) * that to reclaim free pages from. */ static struct mem_cgroup * -mem_cgroup_select_victim(struct mem_cgroup *root_mem) +mem_cgroup_select_victim(struct mem_cgroup *root_memcg) { struct mem_cgroup *ret = NULL; struct cgroup_subsys_state *css; int nextid, found; - if (!root_mem->use_hierarchy) { - css_get(&root_mem->css); - ret = root_mem; + if (!root_memcg->use_hierarchy) { + css_get(&root_memcg->css); + ret = root_memcg; } while (!ret) { rcu_read_lock(); - nextid = root_mem->last_scanned_child + 1; - css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css, + nextid = root_memcg->last_scanned_child + 1; + css = css_get_next(&mem_cgroup_subsys, nextid, &root_memcg->css, &found); if (css && css_tryget(css)) ret = container_of(css, struct mem_cgroup, css); @@ -1489,9 +1498,9 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) /* Updates scanning parameter */ if (!css) { /* this means start scan from ID:1 */ - root_mem->last_scanned_child = 0; + root_memcg->last_scanned_child = 0; } else - root_mem->last_scanned_child = found; + root_memcg->last_scanned_child = found; } return ret; @@ -1507,14 +1516,14 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) * reclaimable pages on a node. Returns true if there are any reclaimable * pages in the node. */ -static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, +static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg, int nid, bool noswap) { - if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_FILE)) + if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE)) return true; if (noswap || !total_swap_pages) return false; - if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_ANON)) + if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON)) return true; return false; @@ -1527,29 +1536,29 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, * nodes based on the zonelist. So update the list loosely once per 10 secs. * */ -static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) +static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg) { int nid; /* * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET * pagein/pageout changes since the last update. */ - if (!atomic_read(&mem->numainfo_events)) + if (!atomic_read(&memcg->numainfo_events)) return; - if (atomic_inc_return(&mem->numainfo_updating) > 1) + if (atomic_inc_return(&memcg->numainfo_updating) > 1) return; /* make a nodemask where this memcg uses memory from */ - mem->scan_nodes = node_states[N_HIGH_MEMORY]; + memcg->scan_nodes = node_states[N_HIGH_MEMORY]; for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) { - if (!test_mem_cgroup_node_reclaimable(mem, nid, false)) - node_clear(nid, mem->scan_nodes); + if (!test_mem_cgroup_node_reclaimable(memcg, nid, false)) + node_clear(nid, memcg->scan_nodes); } - atomic_set(&mem->numainfo_events, 0); - atomic_set(&mem->numainfo_updating, 0); + atomic_set(&memcg->numainfo_events, 0); + atomic_set(&memcg->numainfo_updating, 0); } /* @@ -1564,16 +1573,16 @@ static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) * * Now, we use round-robin. Better algorithm is welcomed. */ -int mem_cgroup_select_victim_node(struct mem_cgroup *mem) +int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) { int node; - mem_cgroup_may_update_nodemask(mem); - node = mem->last_scanned_node; + mem_cgroup_may_update_nodemask(memcg); + node = memcg->last_scanned_node; - node = next_node(node, mem->scan_nodes); + node = next_node(node, memcg->scan_nodes); if (node == MAX_NUMNODES) - node = first_node(mem->scan_nodes); + node = first_node(memcg->scan_nodes); /* * We call this when we hit limit, not when pages are added to LRU. * No LRU may hold pages because all pages are UNEVICTABLE or @@ -1583,7 +1592,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem) if (unlikely(node == MAX_NUMNODES)) node = numa_node_id(); - mem->last_scanned_node = node; + memcg->last_scanned_node = node; return node; } @@ -1593,7 +1602,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem) * unused nodes. But scan_nodes is lazily updated and may not cotain * enough new information. We need to do double check. */ -bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) +bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) { int nid; @@ -1601,12 +1610,12 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) * quick check...making use of scan_node. * We can skip unused nodes. */ - if (!nodes_empty(mem->scan_nodes)) { - for (nid = first_node(mem->scan_nodes); + if (!nodes_empty(memcg->scan_nodes)) { + for (nid = first_node(memcg->scan_nodes); nid < MAX_NUMNODES; - nid = next_node(nid, mem->scan_nodes)) { + nid = next_node(nid, memcg->scan_nodes)) { - if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) + if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) return true; } } @@ -1614,23 +1623,23 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) * Check rest of nodes. */ for_each_node_state(nid, N_HIGH_MEMORY) { - if (node_isset(nid, mem->scan_nodes)) + if (node_isset(nid, memcg->scan_nodes)) continue; - if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) + if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) return true; } return false; } #else -int mem_cgroup_select_victim_node(struct mem_cgroup *mem) +int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) { return 0; } -bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) +bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) { - return test_mem_cgroup_node_reclaimable(mem, 0, noswap); + return test_mem_cgroup_node_reclaimable(memcg, 0, noswap); } #endif @@ -1639,14 +1648,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) * we reclaimed from, so that we don't end up penalizing one child extensively * based on its position in the children list. * - * root_mem is the original ancestor that we've been reclaim from. + * root_memcg is the original ancestor that we've been reclaim from. * - * We give up and return to the caller when we visit root_mem twice. + * We give up and return to the caller when we visit root_memcg twice. * (other groups can be removed while we're walking....) * * If shrink==true, for avoiding to free too much, this returns immedieately. */ -static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, +static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, struct zone *zone, gfp_t gfp_mask, unsigned long reclaim_options, @@ -1661,15 +1670,15 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, unsigned long excess; unsigned long nr_scanned; - excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; + excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; /* If memsw_is_minimum==1, swap-out is of-no-use. */ - if (!check_soft && !shrink && root_mem->memsw_is_minimum) + if (!check_soft && !shrink && root_memcg->memsw_is_minimum) noswap = true; while (1) { - victim = mem_cgroup_select_victim(root_mem); - if (victim == root_mem) { + victim = mem_cgroup_select_victim(root_memcg); + if (victim == root_memcg) { loop++; /* * We are not draining per cpu cached charges during @@ -1678,7 +1687,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, * charges will not give any. */ if (!check_soft && loop >= 1) - drain_all_stock_async(root_mem); + drain_all_stock_async(root_memcg); if (loop >= 2) { /* * If we have not been able to reclaim @@ -1725,9 +1734,9 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, return ret; total += ret; if (check_soft) { - if (!res_counter_soft_limit_excess(&root_mem->res)) + if (!res_counter_soft_limit_excess(&root_memcg->res)) return total; - } else if (mem_cgroup_margin(root_mem)) + } else if (mem_cgroup_margin(root_memcg)) return total; } return total; @@ -1738,12 +1747,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, * If someone is running, return false. * Has to be called with memcg_oom_lock */ -static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) +static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg) { struct mem_cgroup *iter, *failed = NULL; bool cond = true; - for_each_mem_cgroup_tree_cond(iter, mem, cond) { + for_each_mem_cgroup_tree_cond(iter, memcg, cond) { if (iter->oom_lock) { /* * this subtree of our hierarchy is already locked @@ -1763,7 +1772,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) * what we set up to the failing subtree */ cond = true; - for_each_mem_cgroup_tree_cond(iter, mem, cond) { + for_each_mem_cgroup_tree_cond(iter, memcg, cond) { if (iter == failed) { cond = false; continue; @@ -1776,24 +1785,24 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) /* * Has to be called with memcg_oom_lock */ -static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) +static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg) { struct mem_cgroup *iter; - for_each_mem_cgroup_tree(iter, mem) + for_each_mem_cgroup_tree(iter, memcg) iter->oom_lock = false; return 0; } -static void mem_cgroup_mark_under_oom(struct mem_cgroup *mem) +static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg) { struct mem_cgroup *iter; - for_each_mem_cgroup_tree(iter, mem) + for_each_mem_cgroup_tree(iter, memcg) atomic_inc(&iter->under_oom); } -static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem) +static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg) { struct mem_cgroup *iter; @@ -1802,7 +1811,7 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem) * mem_cgroup_oom_lock() may not be called. We have to use * atomic_add_unless() here. */ - for_each_mem_cgroup_tree(iter, mem) + for_each_mem_cgroup_tree(iter, memcg) atomic_add_unless(&iter->under_oom, -1, 0); } @@ -1817,85 +1826,85 @@ struct oom_wait_info { static int memcg_oom_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) { - struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg, - *oom_wait_mem; + struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg, + *oom_wait_memcg; struct oom_wait_info *oom_wait_info; oom_wait_info = container_of(wait, struct oom_wait_info, wait); - oom_wait_mem = oom_wait_info->mem; + oom_wait_memcg = oom_wait_info->mem; /* * Both of oom_wait_info->mem and wake_mem are stable under us. * Then we can use css_is_ancestor without taking care of RCU. */ - if (!mem_cgroup_same_or_subtree(oom_wait_mem, wake_mem) - && !mem_cgroup_same_or_subtree(wake_mem, oom_wait_mem)) + if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg) + && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg)) return 0; return autoremove_wake_function(wait, mode, sync, arg); } -static void memcg_wakeup_oom(struct mem_cgroup *mem) +static void memcg_wakeup_oom(struct mem_cgroup *memcg) { - /* for filtering, pass "mem" as argument. */ - __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, mem); + /* for filtering, pass "memcg" as argument. */ + __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg); } -static void memcg_oom_recover(struct mem_cgroup *mem) +static void memcg_oom_recover(struct mem_cgroup *memcg) { - if (mem && atomic_read(&mem->under_oom)) - memcg_wakeup_oom(mem); + if (memcg && atomic_read(&memcg->under_oom)) + memcg_wakeup_oom(memcg); } /* * try to call OOM killer. returns false if we should exit memory-reclaim loop. */ -bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) +bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask) { struct oom_wait_info owait; bool locked, need_to_kill; - owait.mem = mem; + owait.mem = memcg; owait.wait.flags = 0; owait.wait.func = memcg_oom_wake_function; owait.wait.private = current; INIT_LIST_HEAD(&owait.wait.task_list); need_to_kill = true; - mem_cgroup_mark_under_oom(mem); + mem_cgroup_mark_under_oom(memcg); - /* At first, try to OOM lock hierarchy under mem.*/ + /* At first, try to OOM lock hierarchy under memcg.*/ spin_lock(&memcg_oom_lock); - locked = mem_cgroup_oom_lock(mem); + locked = mem_cgroup_oom_lock(memcg); /* * Even if signal_pending(), we can't quit charge() loop without * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL * under OOM is always welcomed, use TASK_KILLABLE here. */ prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); - if (!locked || mem->oom_kill_disable) + if (!locked || memcg->oom_kill_disable) need_to_kill = false; if (locked) - mem_cgroup_oom_notify(mem); + mem_cgroup_oom_notify(memcg); spin_unlock(&memcg_oom_lock); if (need_to_kill) { finish_wait(&memcg_oom_waitq, &owait.wait); - mem_cgroup_out_of_memory(mem, mask); + mem_cgroup_out_of_memory(memcg, mask); } else { schedule(); finish_wait(&memcg_oom_waitq, &owait.wait); } spin_lock(&memcg_oom_lock); if (locked) - mem_cgroup_oom_unlock(mem); - memcg_wakeup_oom(mem); + mem_cgroup_oom_unlock(memcg); + memcg_wakeup_oom(memcg); spin_unlock(&memcg_oom_lock); - mem_cgroup_unmark_under_oom(mem); + mem_cgroup_unmark_under_oom(memcg); if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) return false; /* Give chance to dying process */ - schedule_timeout(1); + schedule_timeout_uninterruptible(1); return true; } @@ -1926,7 +1935,7 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) void mem_cgroup_update_page_stat(struct page *page, enum mem_cgroup_page_stat_item idx, int val) { - struct mem_cgroup *mem; + struct mem_cgroup *memcg; struct page_cgroup *pc = lookup_page_cgroup(page); bool need_unlock = false; unsigned long uninitialized_var(flags); @@ -1935,16 +1944,16 @@ void mem_cgroup_update_page_stat(struct page *page, return; rcu_read_lock(); - mem = pc->mem_cgroup; - if (unlikely(!mem || !PageCgroupUsed(pc))) + memcg = pc->mem_cgroup; + if (unlikely(!memcg || !PageCgroupUsed(pc))) goto out; /* pc->mem_cgroup is unstable ? */ - if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) { + if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) { /* take a lock against to access pc->mem_cgroup */ move_lock_page_cgroup(pc, &flags); need_unlock = true; - mem = pc->mem_cgroup; - if (!mem || !PageCgroupUsed(pc)) + memcg = pc->mem_cgroup; + if (!memcg || !PageCgroupUsed(pc)) goto out; } @@ -1960,7 +1969,7 @@ void mem_cgroup_update_page_stat(struct page *page, BUG(); } - this_cpu_add(mem->stat->count[idx], val); + this_cpu_add(memcg->stat->count[idx], val); out: if (unlikely(need_unlock)) @@ -1991,13 +2000,13 @@ static DEFINE_MUTEX(percpu_charge_mutex); * cgroup which is not current target, returns false. This stock will be * refilled. */ -static bool consume_stock(struct mem_cgroup *mem) +static bool consume_stock(struct mem_cgroup *memcg) { struct memcg_stock_pcp *stock; bool ret = true; stock = &get_cpu_var(memcg_stock); - if (mem == stock->cached && stock->nr_pages) + if (memcg == stock->cached && stock->nr_pages) stock->nr_pages--; else /* need to call res_counter_charge */ ret = false; @@ -2038,24 +2047,24 @@ static void drain_local_stock(struct work_struct *dummy) * Cache charges(val) which is from res_counter, to local per_cpu area. * This will be consumed by consume_stock() function, later. */ -static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages) +static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); - if (stock->cached != mem) { /* reset if necessary */ + if (stock->cached != memcg) { /* reset if necessary */ drain_stock(stock); - stock->cached = mem; + stock->cached = memcg; } stock->nr_pages += nr_pages; put_cpu_var(memcg_stock); } /* - * Drains all per-CPU charge caches for given root_mem resp. subtree + * Drains all per-CPU charge caches for given root_memcg resp. subtree * of the hierarchy under it. sync flag says whether we should block * until the work is done. */ -static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) +static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync) { int cpu, curcpu; @@ -2064,12 +2073,12 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) curcpu = get_cpu(); for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); - struct mem_cgroup *mem; + struct mem_cgroup *memcg; - mem = stock->cached; - if (!mem || !stock->nr_pages) + memcg = stock->cached; + if (!memcg || !stock->nr_pages) continue; - if (!mem_cgroup_same_or_subtree(root_mem, mem)) + if (!mem_cgroup_same_or_subtree(root_memcg, memcg)) continue; if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { if (cpu == curcpu) @@ -2098,23 +2107,23 @@ out: * expects some charges will be back to res_counter later but cannot wait for * it. */ -static void drain_all_stock_async(struct mem_cgroup *root_mem) +static void drain_all_stock_async(struct mem_cgroup *root_memcg) { /* * If someone calls draining, avoid adding more kworker runs. */ if (!mutex_trylock(&percpu_charge_mutex)) return; - drain_all_stock(root_mem, false); + drain_all_stock(root_memcg, false); mutex_unlock(&percpu_charge_mutex); } /* This is a synchronous drain interface. */ -static void drain_all_stock_sync(struct mem_cgroup *root_mem) +static void drain_all_stock_sync(struct mem_cgroup *root_memcg) { /* called when force_empty is called */ mutex_lock(&percpu_charge_mutex); - drain_all_stock(root_mem, true); + drain_all_stock(root_memcg, true); mutex_unlock(&percpu_charge_mutex); } @@ -2122,35 +2131,35 @@ static void drain_all_stock_sync(struct mem_cgroup *root_mem) * This function drains percpu counter value from DEAD cpu and * move it to local cpu. Note that this function can be preempted. */ -static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu) +static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu) { int i; - spin_lock(&mem->pcp_counter_lock); + spin_lock(&memcg->pcp_counter_lock); for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { - long x = per_cpu(mem->stat->count[i], cpu); + long x = per_cpu(memcg->stat->count[i], cpu); - per_cpu(mem->stat->count[i], cpu) = 0; - mem->nocpu_base.count[i] += x; + per_cpu(memcg->stat->count[i], cpu) = 0; + memcg->nocpu_base.count[i] += x; } for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { - unsigned long x = per_cpu(mem->stat->events[i], cpu); + unsigned long x = per_cpu(memcg->stat->events[i], cpu); - per_cpu(mem->stat->events[i], cpu) = 0; - mem->nocpu_base.events[i] += x; + per_cpu(memcg->stat->events[i], cpu) = 0; + memcg->nocpu_base.events[i] += x; } /* need to clear ON_MOVE value, works as a kind of lock. */ - per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; - spin_unlock(&mem->pcp_counter_lock); + per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; + spin_unlock(&memcg->pcp_counter_lock); } -static void synchronize_mem_cgroup_on_move(struct mem_cgroup *mem, int cpu) +static void synchronize_mem_cgroup_on_move(struct mem_cgroup *memcg, int cpu) { int idx = MEM_CGROUP_ON_MOVE; - spin_lock(&mem->pcp_counter_lock); - per_cpu(mem->stat->count[idx], cpu) = mem->nocpu_base.count[idx]; - spin_unlock(&mem->pcp_counter_lock); + spin_lock(&memcg->pcp_counter_lock); + per_cpu(memcg->stat->count[idx], cpu) = memcg->nocpu_base.count[idx]; + spin_unlock(&memcg->pcp_counter_lock); } static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, @@ -2188,7 +2197,7 @@ enum { CHARGE_OOM_DIE, /* the current is killed because of OOM */ }; -static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, +static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, unsigned int nr_pages, bool oom_check) { unsigned long csize = nr_pages * PAGE_SIZE; @@ -2197,16 +2206,16 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, unsigned long flags = 0; int ret; - ret = res_counter_charge(&mem->res, csize, &fail_res); + ret = res_counter_charge(&memcg->res, csize, &fail_res); if (likely(!ret)) { if (!do_swap_account) return CHARGE_OK; - ret = res_counter_charge(&mem->memsw, csize, &fail_res); + ret = res_counter_charge(&memcg->memsw, csize, &fail_res); if (likely(!ret)) return CHARGE_OK; - res_counter_uncharge(&mem->res, csize); + res_counter_uncharge(&memcg->res, csize); mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); flags |= MEM_CGROUP_RECLAIM_NOSWAP; } else @@ -2264,12 +2273,12 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, static int __mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, unsigned int nr_pages, - struct mem_cgroup **memcg, + struct mem_cgroup **ptr, bool oom) { unsigned int batch = max(CHARGE_BATCH, nr_pages); int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; - struct mem_cgroup *mem = NULL; + struct mem_cgroup *memcg = NULL; int ret; /* @@ -2287,17 +2296,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, * thread group leader migrates. It's possible that mm is not * set, if so charge the init_mm (happens for pagecache usage). */ - if (!*memcg && !mm) + if (!*ptr && !mm) goto bypass; again: - if (*memcg) { /* css should be a valid one */ - mem = *memcg; - VM_BUG_ON(css_is_removed(&mem->css)); - if (mem_cgroup_is_root(mem)) + if (*ptr) { /* css should be a valid one */ + memcg = *ptr; + VM_BUG_ON(css_is_removed(&memcg->css)); + if (mem_cgroup_is_root(memcg)) goto done; - if (nr_pages == 1 && consume_stock(mem)) + if (nr_pages == 1 && consume_stock(memcg)) goto done; - css_get(&mem->css); + css_get(&memcg->css); } else { struct task_struct *p; @@ -2305,7 +2314,7 @@ again: p = rcu_dereference(mm->owner); /* * Because we don't have task_lock(), "p" can exit. - * In that case, "mem" can point to root or p can be NULL with + * In that case, "memcg" can point to root or p can be NULL with * race with swapoff. Then, we have small risk of mis-accouning. * But such kind of mis-account by race always happens because * we don't have cgroup_mutex(). It's overkill and we allo that @@ -2313,12 +2322,12 @@ again: * (*) swapoff at el will charge against mm-struct not against * task-struct. So, mm->owner can be NULL. */ - mem = mem_cgroup_from_task(p); - if (!mem || mem_cgroup_is_root(mem)) { + memcg = mem_cgroup_from_task(p); + if (!memcg || mem_cgroup_is_root(memcg)) { rcu_read_unlock(); goto done; } - if (nr_pages == 1 && consume_stock(mem)) { + if (nr_pages == 1 && consume_stock(memcg)) { /* * It seems dagerous to access memcg without css_get(). * But considering how consume_stok works, it's not @@ -2331,7 +2340,7 @@ again: goto done; } /* after here, we may be blocked. we need to get refcnt */ - if (!css_tryget(&mem->css)) { + if (!css_tryget(&memcg->css)) { rcu_read_unlock(); goto again; } @@ -2343,7 +2352,7 @@ again: /* If killed, bypass charge */ if (fatal_signal_pending(current)) { - css_put(&mem->css); + css_put(&memcg->css); goto bypass; } @@ -2353,43 +2362,43 @@ again: nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; } - ret = mem_cgroup_do_charge(mem, gfp_mask, batch, oom_check); + ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, oom_check); switch (ret) { case CHARGE_OK: break; case CHARGE_RETRY: /* not in OOM situation but retry */ batch = nr_pages; - css_put(&mem->css); - mem = NULL; + css_put(&memcg->css); + memcg = NULL; goto again; case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ - css_put(&mem->css); + css_put(&memcg->css); goto nomem; case CHARGE_NOMEM: /* OOM routine works */ if (!oom) { - css_put(&mem->css); + css_put(&memcg->css); goto nomem; } /* If oom, we never return -ENOMEM */ nr_oom_retries--; break; case CHARGE_OOM_DIE: /* Killed by OOM Killer */ - css_put(&mem->css); + css_put(&memcg->css); goto bypass; } } while (ret != CHARGE_OK); if (batch > nr_pages) - refill_stock(mem, batch - nr_pages); - css_put(&mem->css); + refill_stock(memcg, batch - nr_pages); + css_put(&memcg->css); done: - *memcg = mem; + *ptr = memcg; return 0; nomem: - *memcg = NULL; + *ptr = NULL; return -ENOMEM; bypass: - *memcg = NULL; + *ptr = NULL; return 0; } @@ -2398,15 +2407,15 @@ bypass: * This function is for that and do uncharge, put css's refcnt. * gotten by try_charge(). */ -static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem, +static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) { - if (!mem_cgroup_is_root(mem)) { + if (!mem_cgroup_is_root(memcg)) { unsigned long bytes = nr_pages * PAGE_SIZE; - res_counter_uncharge(&mem->res, bytes); + res_counter_uncharge(&memcg->res, bytes); if (do_swap_account) - res_counter_uncharge(&mem->memsw, bytes); + res_counter_uncharge(&memcg->memsw, bytes); } } @@ -2431,7 +2440,7 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *memcg = NULL; struct page_cgroup *pc; unsigned short id; swp_entry_t ent; @@ -2441,23 +2450,23 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) pc = lookup_page_cgroup(page); lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { - mem = pc->mem_cgroup; - if (mem && !css_tryget(&mem->css)) - mem = NULL; + memcg = pc->mem_cgroup; + if (memcg && !css_tryget(&memcg->css)) + memcg = NULL; } else if (PageSwapCache(page)) { ent.val = page_private(page); id = lookup_swap_cgroup(ent); rcu_read_lock(); - mem = mem_cgroup_lookup(id); - if (mem && !css_tryget(&mem->css)) - mem = NULL; + memcg = mem_cgroup_lookup(id); + if (memcg && !css_tryget(&memcg->css)) + memcg = NULL; rcu_read_unlock(); } unlock_page_cgroup(pc); - return mem; + return memcg; } -static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, +static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, struct page *page, unsigned int nr_pages, struct page_cgroup *pc, @@ -2466,14 +2475,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); - __mem_cgroup_cancel_charge(mem, nr_pages); + __mem_cgroup_cancel_charge(memcg, nr_pages); return; } /* * we don't need page_cgroup_lock about tail pages, becase they are not * accessed by any other context at this point. */ - pc->mem_cgroup = mem; + pc->mem_cgroup = memcg; /* * We access a page_cgroup asynchronously without lock_page_cgroup(). * Especially when a page_cgroup is taken from a page, pc->mem_cgroup @@ -2496,14 +2505,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, break; } - mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages); + mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages); unlock_page_cgroup(pc); /* * "charge_statistics" updated event counter. Then, check it. * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. * if they exceeds softlimit. */ - memcg_check_events(mem, page); + memcg_check_events(memcg, page); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -2690,7 +2699,7 @@ out: static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, enum charge_type ctype) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *memcg = NULL; unsigned int nr_pages = 1; struct page_cgroup *pc; bool oom = true; @@ -2709,11 +2718,11 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, pc = lookup_page_cgroup(page); BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */ - ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &mem, oom); - if (ret || !mem) + ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom); + if (ret || !memcg) return ret; - __mem_cgroup_commit_charge(mem, page, nr_pages, pc, ctype); + __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype); return 0; } @@ -2742,7 +2751,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, enum charge_type ctype); static void -__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, +__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg, enum charge_type ctype) { struct page_cgroup *pc = lookup_page_cgroup(page); @@ -2752,7 +2761,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, * LRU. Take care of it. */ mem_cgroup_lru_del_before_commit(page); - __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); + __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); mem_cgroup_lru_add_after_commit(page); return; } @@ -2760,7 +2769,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *memcg = NULL; int ret; if (mem_cgroup_disabled()) @@ -2772,8 +2781,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, mm = &init_mm; if (page_is_file_cache(page)) { - ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &mem, true); - if (ret || !mem) + ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true); + if (ret || !memcg) return ret; /* @@ -2781,15 +2790,15 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, * put that would remove them from the LRU list, make * sure that they get relinked properly. */ - __mem_cgroup_commit_charge_lrucare(page, mem, + __mem_cgroup_commit_charge_lrucare(page, memcg, MEM_CGROUP_CHARGE_TYPE_CACHE); return ret; } /* shmem */ if (PageSwapCache(page)) { - ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); + ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg); if (!ret) - __mem_cgroup_commit_charge_swapin(page, mem, + __mem_cgroup_commit_charge_swapin(page, memcg, MEM_CGROUP_CHARGE_TYPE_SHMEM); } else ret = mem_cgroup_charge_common(page, mm, gfp_mask, @@ -2808,7 +2817,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t mask, struct mem_cgroup **ptr) { - struct mem_cgroup *mem; + struct mem_cgroup *memcg; int ret; *ptr = NULL; @@ -2826,12 +2835,12 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, */ if (!PageSwapCache(page)) goto charge_cur_mm; - mem = try_get_mem_cgroup_from_page(page); - if (!mem) + memcg = try_get_mem_cgroup_from_page(page); + if (!memcg) goto charge_cur_mm; - *ptr = mem; + *ptr = memcg; ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true); - css_put(&mem->css); + css_put(&memcg->css); return ret; charge_cur_mm: if (unlikely(!mm)) @@ -2891,16 +2900,16 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) MEM_CGROUP_CHARGE_TYPE_MAPPED); } -void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) +void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) { if (mem_cgroup_disabled()) return; - if (!mem) + if (!memcg) return; - __mem_cgroup_cancel_charge(mem, 1); + __mem_cgroup_cancel_charge(memcg, 1); } -static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, +static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages, const enum charge_type ctype) { @@ -2918,7 +2927,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, * uncharges. Then, it's ok to ignore memcg's refcnt. */ if (!batch->memcg) - batch->memcg = mem; + batch->memcg = memcg; /* * do_batch > 0 when unmapping pages or inode invalidate/truncate. * In those cases, all pages freed continuously can be expected to be in @@ -2938,7 +2947,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, * merge a series of uncharges to an uncharge of res_counter. * If not, we uncharge res_counter ony by one. */ - if (batch->memcg != mem) + if (batch->memcg != memcg) goto direct_uncharge; /* remember freed charge and uncharge it later */ batch->nr_pages++; @@ -2946,11 +2955,11 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, batch->memsw_nr_pages++; return; direct_uncharge: - res_counter_uncharge(&mem->res, nr_pages * PAGE_SIZE); + res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE); if (uncharge_memsw) - res_counter_uncharge(&mem->memsw, nr_pages * PAGE_SIZE); - if (unlikely(batch->memcg != mem)) - memcg_oom_recover(mem); + res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE); + if (unlikely(batch->memcg != memcg)) + memcg_oom_recover(memcg); return; } @@ -2960,7 +2969,7 @@ direct_uncharge: static struct mem_cgroup * __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *memcg = NULL; unsigned int nr_pages = 1; struct page_cgroup *pc; @@ -2983,7 +2992,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) lock_page_cgroup(pc); - mem = pc->mem_cgroup; + memcg = pc->mem_cgroup; if (!PageCgroupUsed(pc)) goto unlock_out; @@ -3006,7 +3015,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) break; } - mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -nr_pages); + mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -nr_pages); ClearPageCgroupUsed(pc); /* @@ -3018,18 +3027,18 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) unlock_page_cgroup(pc); /* - * even after unlock, we have mem->res.usage here and this memcg + * even after unlock, we have memcg->res.usage here and this memcg * will never be freed. */ - memcg_check_events(mem, page); + memcg_check_events(memcg, page); if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) { - mem_cgroup_swap_statistics(mem, true); - mem_cgroup_get(mem); + mem_cgroup_swap_statistics(memcg, true); + mem_cgroup_get(memcg); } - if (!mem_cgroup_is_root(mem)) - mem_cgroup_do_uncharge(mem, nr_pages, ctype); + if (!mem_cgroup_is_root(memcg)) + mem_cgroup_do_uncharge(memcg, nr_pages, ctype); - return mem; + return memcg; unlock_out: unlock_page_cgroup(pc); @@ -3219,7 +3228,7 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, int mem_cgroup_prepare_migration(struct page *page, struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *memcg = NULL; struct page_cgroup *pc; enum charge_type ctype; int ret = 0; @@ -3233,8 +3242,8 @@ int mem_cgroup_prepare_migration(struct page *page, pc = lookup_page_cgroup(page); lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { - mem = pc->mem_cgroup; - css_get(&mem->css); + memcg = pc->mem_cgroup; + css_get(&memcg->css); /* * At migrating an anonymous page, its mapcount goes down * to 0 and uncharge() will be called. But, even if it's fully @@ -3272,12 +3281,12 @@ int mem_cgroup_prepare_migration(struct page *page, * If the page is not charged at this point, * we return here. */ - if (!mem) + if (!memcg) return 0; - *ptr = mem; + *ptr = memcg; ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false); - css_put(&mem->css);/* drop extra refcnt */ + css_put(&memcg->css);/* drop extra refcnt */ if (ret || *ptr == NULL) { if (PageAnon(page)) { lock_page_cgroup(pc); @@ -3303,21 +3312,21 @@ int mem_cgroup_prepare_migration(struct page *page, ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; else ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; - __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); + __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); return ret; } /* remove redundant charge if migration failed*/ -void mem_cgroup_end_migration(struct mem_cgroup *mem, +void mem_cgroup_end_migration(struct mem_cgroup *memcg, struct page *oldpage, struct page *newpage, bool migration_ok) { struct page *used, *unused; struct page_cgroup *pc; - if (!mem) + if (!memcg) return; /* blocks rmdir() */ - cgroup_exclude_rmdir(&mem->css); + cgroup_exclude_rmdir(&memcg->css); if (!migration_ok) { used = oldpage; unused = newpage; @@ -3353,7 +3362,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, * So, rmdir()->pre_destroy() can be called while we do this charge. * In that case, we need to call pre_destroy() again. check it here. */ - cgroup_release_and_wakeup_rmdir(&mem->css); + cgroup_release_and_wakeup_rmdir(&memcg->css); } #ifdef CONFIG_DEBUG_VM @@ -3432,7 +3441,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, /* * Rather than hide all in some function, I do this in * open coded manner. You see what this really does. - * We have to guarantee mem->res.limit < mem->memsw.limit. + * We have to guarantee memcg->res.limit < memcg->memsw.limit. */ mutex_lock(&set_limit_mutex); memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); @@ -3494,7 +3503,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, /* * Rather than hide all in some function, I do this in * open coded manner. You see what this really does. - * We have to guarantee mem->res.limit < mem->memsw.limit. + * We have to guarantee memcg->res.limit < memcg->memsw.limit. */ mutex_lock(&set_limit_mutex); memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); @@ -3632,7 +3641,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, * This routine traverse page_cgroup in given list and drop them all. * *And* this routine doesn't reclaim page itself, just removes page_cgroup. */ -static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, +static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg, int node, int zid, enum lru_list lru) { struct zone *zone; @@ -3643,7 +3652,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, int ret = 0; zone = &NODE_DATA(node)->node_zones[zid]; - mz = mem_cgroup_zoneinfo(mem, node, zid); + mz = mem_cgroup_zoneinfo(memcg, node, zid); list = &mz->lists[lru]; loop = MEM_CGROUP_ZSTAT(mz, lru); @@ -3670,7 +3679,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, page = lookup_cgroup_page(pc); - ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); + ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL); if (ret == -ENOMEM) break; @@ -3691,14 +3700,14 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, * make mem_cgroup's charge to be 0 if there is no task. * This enables deleting this mem_cgroup. */ -static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) +static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all) { int ret; int node, zid, shrink; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; - struct cgroup *cgrp = mem->css.cgroup; + struct cgroup *cgrp = memcg->css.cgroup; - css_get(&mem->css); + css_get(&memcg->css); shrink = 0; /* should free all ? */ @@ -3714,14 +3723,14 @@ move_account: goto out; /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); - drain_all_stock_sync(mem); + drain_all_stock_sync(memcg); ret = 0; - mem_cgroup_start_move(mem); + mem_cgroup_start_move(memcg); for_each_node_state(node, N_HIGH_MEMORY) { for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { enum lru_list l; for_each_lru(l) { - ret = mem_cgroup_force_empty_list(mem, + ret = mem_cgroup_force_empty_list(memcg, node, zid, l); if (ret) break; @@ -3730,16 +3739,16 @@ move_account: if (ret) break; } - mem_cgroup_end_move(mem); - memcg_oom_recover(mem); + mem_cgroup_end_move(memcg); + memcg_oom_recover(memcg); /* it seems parent cgroup doesn't have enough mem */ if (ret == -ENOMEM) goto try_to_free; cond_resched(); /* "ret" should also be checked to ensure all lists are empty. */ - } while (mem->res.usage > 0 || ret); + } while (memcg->res.usage > 0 || ret); out: - css_put(&mem->css); + css_put(&memcg->css); return ret; try_to_free: @@ -3752,14 +3761,14 @@ try_to_free: lru_add_drain_all(); /* try to free all pages in this cgroup */ shrink = 1; - while (nr_retries && mem->res.usage > 0) { + while (nr_retries && memcg->res.usage > 0) { int progress; if (signal_pending(current)) { ret = -EINTR; goto out; } - progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, + progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, false); if (!progress) { nr_retries--; @@ -3788,12 +3797,12 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, u64 val) { int retval = 0; - struct mem_cgroup *mem = mem_cgroup_from_cont(cont); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); struct cgroup *parent = cont->parent; - struct mem_cgroup *parent_mem = NULL; + struct mem_cgroup *parent_memcg = NULL; if (parent) - parent_mem = mem_cgroup_from_cont(parent); + parent_memcg = mem_cgroup_from_cont(parent); cgroup_lock(); /* @@ -3804,10 +3813,10 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, * For the root cgroup, parent_mem is NULL, we allow value to be * set if there are no children. */ - if ((!parent_mem || !parent_mem->use_hierarchy) && + if ((!parent_memcg || !parent_memcg->use_hierarchy) && (val == 1 || val == 0)) { if (list_empty(&cont->children)) - mem->use_hierarchy = val; + memcg->use_hierarchy = val; else retval = -EBUSY; } else @@ -3818,14 +3827,14 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, } -static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem, +static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { struct mem_cgroup *iter; long val = 0; /* Per-cpu values can be negative, use a signed accumulator */ - for_each_mem_cgroup_tree(iter, mem) + for_each_mem_cgroup_tree(iter, memcg) val += mem_cgroup_read_stat(iter, idx); if (val < 0) /* race ? */ @@ -3833,29 +3842,29 @@ static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem, return val; } -static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) +static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) { u64 val; - if (!mem_cgroup_is_root(mem)) { + if (!mem_cgroup_is_root(memcg)) { if (!swap) - return res_counter_read_u64(&mem->res, RES_USAGE); + return res_counter_read_u64(&memcg->res, RES_USAGE); else - return res_counter_read_u64(&mem->memsw, RES_USAGE); + return res_counter_read_u64(&memcg->memsw, RES_USAGE); } - val = mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_CACHE); - val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_RSS); + val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); + val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS); if (swap) - val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_SWAPOUT); + val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAPOUT); return val << PAGE_SHIFT; } static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cont); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); u64 val; int type, name; @@ -3864,15 +3873,15 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) switch (type) { case _MEM: if (name == RES_USAGE) - val = mem_cgroup_usage(mem, false); + val = mem_cgroup_usage(memcg, false); else - val = res_counter_read_u64(&mem->res, name); + val = res_counter_read_u64(&memcg->res, name); break; case _MEMSWAP: if (name == RES_USAGE) - val = mem_cgroup_usage(mem, true); + val = mem_cgroup_usage(memcg, true); else - val = res_counter_read_u64(&mem->memsw, name); + val = res_counter_read_u64(&memcg->memsw, name); break; default: BUG(); @@ -3960,24 +3969,24 @@ out: static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) { - struct mem_cgroup *mem; + struct mem_cgroup *memcg; int type, name; - mem = mem_cgroup_from_cont(cont); + memcg = mem_cgroup_from_cont(cont); type = MEMFILE_TYPE(event); name = MEMFILE_ATTR(event); switch (name) { case RES_MAX_USAGE: if (type == _MEM) - res_counter_reset_max(&mem->res); + res_counter_reset_max(&memcg->res); else - res_counter_reset_max(&mem->memsw); + res_counter_reset_max(&memcg->memsw); break; case RES_FAILCNT: if (type == _MEM) - res_counter_reset_failcnt(&mem->res); + res_counter_reset_failcnt(&memcg->res); else - res_counter_reset_failcnt(&mem->memsw); + res_counter_reset_failcnt(&memcg->memsw); break; } @@ -3994,7 +4003,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp, static int mem_cgroup_move_charge_write(struct cgroup *cgrp, struct cftype *cft, u64 val) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); if (val >= (1 << NR_MOVE_TYPE)) return -EINVAL; @@ -4004,7 +4013,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp, * inconsistent. */ cgroup_lock(); - mem->move_charge_at_immigrate = val; + memcg->move_charge_at_immigrate = val; cgroup_unlock(); return 0; @@ -4061,49 +4070,49 @@ struct { static void -mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) +mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s) { s64 val; /* per cpu stat */ - val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); + val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE); s->stat[MCS_CACHE] += val * PAGE_SIZE; - val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); + val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS); s->stat[MCS_RSS] += val * PAGE_SIZE; - val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED); + val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; - val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGIN); + val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN); s->stat[MCS_PGPGIN] += val; - val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGOUT); + val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT); s->stat[MCS_PGPGOUT] += val; if (do_swap_account) { - val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT); + val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SWAPOUT); s->stat[MCS_SWAP] += val * PAGE_SIZE; } - val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT); + val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGFAULT); s->stat[MCS_PGFAULT] += val; - val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT); + val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT); s->stat[MCS_PGMAJFAULT] += val; /* per zone stat */ - val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_ANON)); + val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; - val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_ANON)); + val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE; - val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_FILE)); + val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE; - val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_FILE)); + val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; - val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_UNEVICTABLE)); + val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE)); s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; } static void -mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) +mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s) { struct mem_cgroup *iter; - for_each_mem_cgroup_tree(iter, mem) + for_each_mem_cgroup_tree(iter, memcg) mem_cgroup_get_local_stat(iter, s); } @@ -4189,8 +4198,6 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, } #ifdef CONFIG_DEBUG_VM - cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); - { int nid, zid; struct mem_cgroup_per_zone *mz; @@ -4327,20 +4334,20 @@ static int compare_thresholds(const void *a, const void *b) return _a->threshold - _b->threshold; } -static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem) +static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) { struct mem_cgroup_eventfd_list *ev; - list_for_each_entry(ev, &mem->oom_notify, list) + list_for_each_entry(ev, &memcg->oom_notify, list) eventfd_signal(ev->eventfd, 1); return 0; } -static void mem_cgroup_oom_notify(struct mem_cgroup *mem) +static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) { struct mem_cgroup *iter; - for_each_mem_cgroup_tree(iter, mem) + for_each_mem_cgroup_tree(iter, memcg) mem_cgroup_oom_notify_cb(iter); } @@ -4530,7 +4537,7 @@ static int mem_cgroup_oom_register_event(struct cgroup *cgrp, static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, struct cftype *cft, struct eventfd_ctx *eventfd) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); struct mem_cgroup_eventfd_list *ev, *tmp; int type = MEMFILE_TYPE(cft->private); @@ -4538,7 +4545,7 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, spin_lock(&memcg_oom_lock); - list_for_each_entry_safe(ev, tmp, &mem->oom_notify, list) { + list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) { if (ev->eventfd == eventfd) { list_del(&ev->list); kfree(ev); @@ -4551,11 +4558,11 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, static int mem_cgroup_oom_control_read(struct cgroup *cgrp, struct cftype *cft, struct cgroup_map_cb *cb) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); - cb->fill(cb, "oom_kill_disable", mem->oom_kill_disable); + cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable); - if (atomic_read(&mem->under_oom)) + if (atomic_read(&memcg->under_oom)) cb->fill(cb, "under_oom", 1); else cb->fill(cb, "under_oom", 0); @@ -4565,7 +4572,7 @@ static int mem_cgroup_oom_control_read(struct cgroup *cgrp, static int mem_cgroup_oom_control_write(struct cgroup *cgrp, struct cftype *cft, u64 val) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); struct mem_cgroup *parent; /* cannot set to root cgroup and only 0 and 1 are allowed */ @@ -4577,13 +4584,13 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp, cgroup_lock(); /* oom-kill-disable is a flag for subhierarchy. */ if ((parent->use_hierarchy) || - (mem->use_hierarchy && !list_empty(&cgrp->children))) { + (memcg->use_hierarchy && !list_empty(&cgrp->children))) { cgroup_unlock(); return -EINVAL; } - mem->oom_kill_disable = val; + memcg->oom_kill_disable = val; if (!val) - memcg_oom_recover(mem); + memcg_oom_recover(memcg); cgroup_unlock(); return 0; } @@ -4719,7 +4726,7 @@ static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) } #endif -static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) +static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) { struct mem_cgroup_per_node *pn; struct mem_cgroup_per_zone *mz; @@ -4739,21 +4746,21 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) if (!pn) return 1; - mem->info.nodeinfo[node] = pn; for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; for_each_lru(l) INIT_LIST_HEAD(&mz->lists[l]); mz->usage_in_excess = 0; mz->on_tree = false; - mz->mem = mem; + mz->mem = memcg; } + memcg->info.nodeinfo[node] = pn; return 0; } -static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) +static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) { - kfree(mem->info.nodeinfo[node]); + kfree(memcg->info.nodeinfo[node]); } static struct mem_cgroup *mem_cgroup_alloc(void) @@ -4795,51 +4802,51 @@ out_free: * Removal of cgroup itself succeeds regardless of refs from swap. */ -static void __mem_cgroup_free(struct mem_cgroup *mem) +static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; - mem_cgroup_remove_from_trees(mem); - free_css_id(&mem_cgroup_subsys, &mem->css); + mem_cgroup_remove_from_trees(memcg); + free_css_id(&mem_cgroup_subsys, &memcg->css); for_each_node_state(node, N_POSSIBLE) - free_mem_cgroup_per_zone_info(mem, node); + free_mem_cgroup_per_zone_info(memcg, node); - free_percpu(mem->stat); + free_percpu(memcg->stat); if (sizeof(struct mem_cgroup) < PAGE_SIZE) - kfree(mem); + kfree(memcg); else - vfree(mem); + vfree(memcg); } -static void mem_cgroup_get(struct mem_cgroup *mem) +static void mem_cgroup_get(struct mem_cgroup *memcg) { - atomic_inc(&mem->refcnt); + atomic_inc(&memcg->refcnt); } -static void __mem_cgroup_put(struct mem_cgroup *mem, int count) +static void __mem_cgroup_put(struct mem_cgroup *memcg, int count) { - if (atomic_sub_and_test(count, &mem->refcnt)) { - struct mem_cgroup *parent = parent_mem_cgroup(mem); - __mem_cgroup_free(mem); + if (atomic_sub_and_test(count, &memcg->refcnt)) { + struct mem_cgroup *parent = parent_mem_cgroup(memcg); + __mem_cgroup_free(memcg); if (parent) mem_cgroup_put(parent); } } -static void mem_cgroup_put(struct mem_cgroup *mem) +static void mem_cgroup_put(struct mem_cgroup *memcg) { - __mem_cgroup_put(mem, 1); + __mem_cgroup_put(memcg, 1); } /* * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. */ -static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem) +static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { - if (!mem->res.parent) + if (!memcg->res.parent) return NULL; - return mem_cgroup_from_res_counter(mem->res.parent, res); + return mem_cgroup_from_res_counter(memcg->res.parent, res); } #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP @@ -4882,16 +4889,16 @@ static int mem_cgroup_soft_limit_tree_init(void) static struct cgroup_subsys_state * __ref mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { - struct mem_cgroup *mem, *parent; + struct mem_cgroup *memcg, *parent; long error = -ENOMEM; int node; - mem = mem_cgroup_alloc(); - if (!mem) + memcg = mem_cgroup_alloc(); + if (!memcg) return ERR_PTR(error); for_each_node_state(node, N_POSSIBLE) - if (alloc_mem_cgroup_per_zone_info(mem, node)) + if (alloc_mem_cgroup_per_zone_info(memcg, node)) goto free_out; /* root ? */ @@ -4899,7 +4906,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) int cpu; enable_swap_cgroup(); parent = NULL; - root_mem_cgroup = mem; + root_mem_cgroup = memcg; if (mem_cgroup_soft_limit_tree_init()) goto free_out; for_each_possible_cpu(cpu) { @@ -4910,13 +4917,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) hotcpu_notifier(memcg_cpu_hotplug_callback, 0); } else { parent = mem_cgroup_from_cont(cont->parent); - mem->use_hierarchy = parent->use_hierarchy; - mem->oom_kill_disable = parent->oom_kill_disable; + memcg->use_hierarchy = parent->use_hierarchy; + memcg->oom_kill_disable = parent->oom_kill_disable; } if (parent && parent->use_hierarchy) { - res_counter_init(&mem->res, &parent->res); - res_counter_init(&mem->memsw, &parent->memsw); + res_counter_init(&memcg->res, &parent->res); + res_counter_init(&memcg->memsw, &parent->memsw); /* * We increment refcnt of the parent to ensure that we can * safely access it on res_counter_charge/uncharge. @@ -4925,21 +4932,21 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) */ mem_cgroup_get(parent); } else { - res_counter_init(&mem->res, NULL); - res_counter_init(&mem->memsw, NULL); + res_counter_init(&memcg->res, NULL); + res_counter_init(&memcg->memsw, NULL); } - mem->last_scanned_child = 0; - mem->last_scanned_node = MAX_NUMNODES; - INIT_LIST_HEAD(&mem->oom_notify); + memcg->last_scanned_child = 0; + memcg->last_scanned_node = MAX_NUMNODES; + INIT_LIST_HEAD(&memcg->oom_notify); if (parent) - mem->swappiness = mem_cgroup_swappiness(parent); - atomic_set(&mem->refcnt, 1); - mem->move_charge_at_immigrate = 0; - mutex_init(&mem->thresholds_lock); - return &mem->css; + memcg->swappiness = mem_cgroup_swappiness(parent); + atomic_set(&memcg->refcnt, 1); + memcg->move_charge_at_immigrate = 0; + mutex_init(&memcg->thresholds_lock); + return &memcg->css; free_out: - __mem_cgroup_free(mem); + __mem_cgroup_free(memcg); root_mem_cgroup = NULL; return ERR_PTR(error); } @@ -4947,17 +4954,17 @@ free_out: static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, struct cgroup *cont) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cont); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - return mem_cgroup_force_empty(mem, false); + return mem_cgroup_force_empty(memcg, false); } static void mem_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cont) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cont); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - mem_cgroup_put(mem); + mem_cgroup_put(memcg); } static int mem_cgroup_populate(struct cgroup_subsys *ss, @@ -4980,9 +4987,9 @@ static int mem_cgroup_do_precharge(unsigned long count) { int ret = 0; int batch_count = PRECHARGE_COUNT_AT_ONCE; - struct mem_cgroup *mem = mc.to; + struct mem_cgroup *memcg = mc.to; - if (mem_cgroup_is_root(mem)) { + if (mem_cgroup_is_root(memcg)) { mc.precharge += count; /* we don't need css_get for root */ return ret; @@ -4991,16 +4998,16 @@ static int mem_cgroup_do_precharge(unsigned long count) if (count > 1) { struct res_counter *dummy; /* - * "mem" cannot be under rmdir() because we've already checked + * "memcg" cannot be under rmdir() because we've already checked * by cgroup_lock_live_cgroup() that it is not removed and we * are still under the same cgroup_mutex. So we can postpone * css_get(). */ - if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy)) + if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy)) goto one_by_one; - if (do_swap_account && res_counter_charge(&mem->memsw, + if (do_swap_account && res_counter_charge(&memcg->memsw, PAGE_SIZE * count, &dummy)) { - res_counter_uncharge(&mem->res, PAGE_SIZE * count); + res_counter_uncharge(&memcg->res, PAGE_SIZE * count); goto one_by_one; } mc.precharge += count; @@ -5017,8 +5024,9 @@ one_by_one: batch_count = PRECHARGE_COUNT_AT_ONCE; cond_resched(); } - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, 1, &mem, false); - if (ret || !mem) + ret = __mem_cgroup_try_charge(NULL, + GFP_KERNEL, 1, &memcg, false); + if (ret || !memcg) /* mem_cgroup_clear_mc() will do uncharge later */ return -ENOMEM; mc.precharge++; @@ -5292,13 +5300,13 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, struct task_struct *p) { int ret = 0; - struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup); - if (mem->move_charge_at_immigrate) { + if (memcg->move_charge_at_immigrate) { struct mm_struct *mm; struct mem_cgroup *from = mem_cgroup_from_task(p); - VM_BUG_ON(from == mem); + VM_BUG_ON(from == memcg); mm = get_task_mm(p); if (!mm) @@ -5313,7 +5321,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, mem_cgroup_start_move(from); spin_lock(&mc.lock); mc.from = from; - mc.to = mem; + mc.to = memcg; spin_unlock(&mc.lock); /* We set mc.moving_task later */ diff --git a/mm/memory.c b/mm/memory.c index a56e3ba..b2b8731 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1503,7 +1503,7 @@ split_fallthrough: } if (flags & FOLL_GET) - get_page(page); + get_page_foll(page); if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 6bdc67d..2d123f9 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -133,10 +133,13 @@ struct page *lookup_cgroup_page(struct page_cgroup *pc) static void *__meminit alloc_page_cgroup(size_t size, int nid) { void *addr = NULL; + gfp_t flags = GFP_KERNEL | __GFP_NOWARN; - addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN); - if (addr) + addr = alloc_pages_exact_nid(nid, size, flags); + if (addr) { + kmemleak_alloc(addr, size, 1, flags); return addr; + } if (node_state(nid, N_HIGH_MEMORY)) addr = vmalloc_node(size, nid); @@ -357,7 +360,7 @@ struct swap_cgroup_ctrl { spinlock_t lock; }; -struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; +static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; struct swap_cgroup { unsigned short id; @@ -78,39 +78,22 @@ static void put_compound_page(struct page *page) { if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ - struct page *page_head = page->first_page; - smp_rmb(); - /* - * If PageTail is still set after smp_rmb() we can be sure - * that the page->first_page we read wasn't a dangling pointer. - * See __split_huge_page_refcount() smp_wmb(). - */ - if (likely(PageTail(page) && get_page_unless_zero(page_head))) { + struct page *page_head = compound_trans_head(page); + + if (likely(page != page_head && + get_page_unless_zero(page_head))) { unsigned long flags; /* - * Verify that our page_head wasn't converted - * to a a regular page before we got a - * reference on it. + * page_head wasn't a dangling pointer but it + * may not be a head page anymore by the time + * we obtain the lock. That is ok as long as it + * can't be freed from under us. */ - if (unlikely(!PageHead(page_head))) { - /* PageHead is cleared after PageTail */ - smp_rmb(); - VM_BUG_ON(PageTail(page)); - goto out_put_head; - } - /* - * Only run compound_lock on a valid PageHead, - * after having it pinned with - * get_page_unless_zero() above. - */ - smp_mb(); - /* page_head wasn't a dangling pointer */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); VM_BUG_ON(PageHead(page_head)); - out_put_head: if (put_page_testzero(page_head)) __put_single_page(page_head); out_put_single: @@ -121,16 +104,17 @@ static void put_compound_page(struct page *page) VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by - * get_page_unless_zero now that - * split_huge_page_refcount is blocked on the - * compound_lock. + * get_page_unless_zero() now that + * __split_huge_page_refcount() is blocked on + * the compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ - VM_BUG_ON(atomic_read(&page->_count) <= 0); - atomic_dec(&page->_count); + VM_BUG_ON(page_mapcount(page) <= 0); + atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) @@ -160,6 +144,45 @@ void put_page(struct page *page) } EXPORT_SYMBOL(put_page); +/* + * This function is exported but must not be called by anything other + * than get_page(). It implements the slow path of get_page(). + */ +bool __get_page_tail(struct page *page) +{ + /* + * This takes care of get_page() if run on a tail page + * returned by one of the get_user_pages/follow_page variants. + * get_user_pages/follow_page itself doesn't need the compound + * lock because it runs __get_page_tail_foll() under the + * proper PT lock that already serializes against + * split_huge_page(). + */ + unsigned long flags; + bool got = false; + struct page *page_head = compound_trans_head(page); + + if (likely(page != page_head && get_page_unless_zero(page_head))) { + /* + * page_head wasn't a dangling pointer but it + * may not be a head page anymore by the time + * we obtain the lock. That is ok as long as it + * can't be freed from under us. + */ + flags = compound_lock_irqsave(page_head); + /* here __split_huge_page_refcount won't run anymore */ + if (likely(PageTail(page))) { + __get_page_tail_foll(page, false); + got = true; + } + compound_unlock_irqrestore(page_head, flags); + if (unlikely(!got)) + put_page(page_head); + } + return got; +} +EXPORT_SYMBOL(__get_page_tail); + /** * put_pages_list() - release a list of pages * @pages: list of pages threaded on page->lru diff --git a/mm/vmscan.c b/mm/vmscan.c index a90c603..132d1dd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1767,7 +1767,7 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) if (scanning_global_lru(sc)) low = inactive_anon_is_low_global(zone); else - low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); + low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); return low; } #else @@ -1810,7 +1810,7 @@ static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) if (scanning_global_lru(sc)) low = inactive_file_is_low_global(zone); else - low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup); + low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone); return low; } |