diff options
Diffstat (limited to 'drivers')
76 files changed, 3490 insertions, 2676 deletions
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index cb648a4..edeb2d1 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -43,6 +43,7 @@ acpi-y += pci_root.o pci_link.o pci_irq.o acpi-y += acpi_lpss.o acpi_apd.o acpi-y += acpi_platform.o acpi-y += acpi_pnp.o +acpi-$(CONFIG_ARM_AMBA) += acpi_amba.o acpi-y += int340x_thermal.o acpi-y += power.o acpi-y += event.o diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c new file mode 100644 index 0000000..2a61b54 --- /dev/null +++ b/drivers/acpi/acpi_amba.c @@ -0,0 +1,122 @@ + +/* + * ACPI support for platform bus type. + * + * Copyright (C) 2015, Linaro Ltd + * Author: Graeme Gregory <graeme.gregory@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/acpi.h> +#include <linux/amba/bus.h> +#include <linux/clkdev.h> +#include <linux/clk-provider.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include "internal.h" + +static const struct acpi_device_id amba_id_list[] = { + {"ARMH0061", 0}, /* PL061 GPIO Device */ + {"", 0}, +}; + +static void amba_register_dummy_clk(void) +{ + static struct clk *amba_dummy_clk; + + /* If clock already registered */ + if (amba_dummy_clk) + return; + + amba_dummy_clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, + CLK_IS_ROOT, 0); + clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL); +} + +static int amba_handler_attach(struct acpi_device *adev, + const struct acpi_device_id *id) +{ + struct amba_device *dev; + struct resource_entry *rentry; + struct list_head resource_list; + bool address_found = false; + int irq_no = 0; + int ret; + + /* If the ACPI node already has a physical device attached, skip it. */ + if (adev->physical_node_count) + return 0; + + dev = amba_device_alloc(dev_name(&adev->dev), 0, 0); + if (!dev) { + dev_err(&adev->dev, "%s(): amba_device_alloc() failed\n", + __func__); + return -ENOMEM; + } + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL); + if (ret < 0) + goto err_free; + + list_for_each_entry(rentry, &resource_list, node) { + switch (resource_type(rentry->res)) { + case IORESOURCE_MEM: + if (!address_found) { + dev->res = *rentry->res; + address_found = true; + } + break; + case IORESOURCE_IRQ: + if (irq_no < AMBA_NR_IRQS) + dev->irq[irq_no++] = rentry->res->start; + break; + default: + dev_warn(&adev->dev, "Invalid resource\n"); + break; + } + } + + acpi_dev_free_resource_list(&resource_list); + + /* + * If the ACPI node has a parent and that parent has a physical device + * attached to it, that physical device should be the parent of + * the amba device we are about to create. + */ + if (adev->parent) + dev->dev.parent = acpi_get_first_physical_node(adev->parent); + + ACPI_COMPANION_SET(&dev->dev, adev); + + ret = amba_device_add(dev, &iomem_resource); + if (ret) { + dev_err(&adev->dev, "%s(): amba_device_add() failed (%d)\n", + __func__, ret); + goto err_free; + } + + return 1; + +err_free: + amba_device_put(dev); + return ret; +} + +static struct acpi_scan_handler amba_handler = { + .ids = amba_id_list, + .attach = amba_handler_attach, +}; + +void __init acpi_amba_init(void) +{ + amba_register_dummy_clk(); + acpi_scan_add_handler(&amba_handler); +} diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index d507cf6..d0aad06 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -143,6 +143,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = { /* Generic apd devices */ #ifdef CONFIG_X86_AMD_PLATFORM_DEVICE { "AMD0010", APD_ADDR(cz_i2c_desc) }, + { "AMDI0010", APD_ADDR(cz_i2c_desc) }, { "AMD0020", APD_ADDR(cz_uart_desc) }, { "AMD0030", }, #endif diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index b6f7fa3..159f7f1 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -43,7 +43,6 @@ static const struct acpi_device_id forbidden_id_list[] = { struct platform_device *acpi_create_platform_device(struct acpi_device *adev) { struct platform_device *pdev = NULL; - struct acpi_device *acpi_parent; struct platform_device_info pdevinfo; struct resource_entry *rentry; struct list_head resource_list; @@ -82,22 +81,8 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) * attached to it, that physical device should be the parent of the * platform device we are about to create. */ - pdevinfo.parent = NULL; - acpi_parent = adev->parent; - if (acpi_parent) { - struct acpi_device_physical_node *entry; - struct list_head *list; - - mutex_lock(&acpi_parent->physical_node_lock); - list = &acpi_parent->physical_node_list; - if (!list_empty(list)) { - entry = list_first_entry(list, - struct acpi_device_physical_node, - node); - pdevinfo.parent = entry->dev; - } - mutex_unlock(&acpi_parent->physical_node_lock); - } + pdevinfo.parent = adev->parent ? + acpi_get_first_physical_node(adev->parent) : NULL; pdevinfo.name = dev_name(&adev->dev); pdevinfo.id = -1; pdevinfo.res = resources; diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 6979186..b5e54f2 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -514,7 +514,24 @@ static struct acpi_scan_handler processor_handler = { }, }; +static int acpi_processor_container_attach(struct acpi_device *dev, + const struct acpi_device_id *id) +{ + return 1; +} + +static const struct acpi_device_id processor_container_ids[] = { + { ACPI_PROCESSOR_CONTAINER_HID, }, + { } +}; + +static struct acpi_scan_handler processor_container_handler = { + .ids = processor_container_ids, + .attach = acpi_processor_container_attach, +}; + void __init acpi_processor_init(void) { acpi_scan_add_handler_with_hotplug(&processor_handler, "processor"); + acpi_scan_add_handler(&processor_container_handler); } diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index a76f8be..4361bc9 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -218,13 +218,6 @@ struct acpi_video_device { struct thermal_cooling_device *cooling_dev; }; -static const char device_decode[][30] = { - "motherboard VGA device", - "PCI VGA device", - "AGP VGA device", - "UNKNOWN", -}; - static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data); static void acpi_video_device_rebind(struct acpi_video_bus *video); static void acpi_video_device_bind(struct acpi_video_bus *video, diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 55c8197..51b073b 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -165,7 +165,7 @@ ACPI_GLOBAL(u8, acpi_gbl_next_owner_id_offset); /* Initialization sequencing */ -ACPI_INIT_GLOBAL(u8, acpi_gbl_reg_methods_enabled, FALSE); +ACPI_INIT_GLOBAL(u8, acpi_gbl_namespace_initialized, FALSE); /* Misc */ diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index e4977fac..9562a10 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -85,7 +85,7 @@ union acpi_parse_object; #define ACPI_MTX_MEMORY 5 /* Debug memory tracking lists */ #define ACPI_MAX_MUTEX 5 -#define ACPI_NUM_MUTEX ACPI_MAX_MUTEX+1 +#define ACPI_NUM_MUTEX (ACPI_MAX_MUTEX+1) /* Lock structure for reader/writer interfaces */ @@ -103,11 +103,11 @@ struct acpi_rw_lock { #define ACPI_LOCK_HARDWARE 1 #define ACPI_MAX_LOCK 1 -#define ACPI_NUM_LOCK ACPI_MAX_LOCK+1 +#define ACPI_NUM_LOCK (ACPI_MAX_LOCK+1) /* This Thread ID means that the mutex is not in use (unlocked) */ -#define ACPI_MUTEX_NOT_ACQUIRED (acpi_thread_id) 0 +#define ACPI_MUTEX_NOT_ACQUIRED ((acpi_thread_id) 0) /* This Thread ID means an invalid thread ID */ diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 9684ed6..022d69c 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -88,7 +88,7 @@ */ acpi_status acpi_ns_initialize_objects(void); -acpi_status acpi_ns_initialize_devices(void); +acpi_status acpi_ns_initialize_devices(u32 flags); /* * nsload - Namespace loading diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h index 52f6bee..5faeab4 100644 --- a/drivers/acpi/acpica/acpredef.h +++ b/drivers/acpi/acpica/acpredef.h @@ -1125,7 +1125,7 @@ const union acpi_predefined_info acpi_gbl_resource_names[] = { PACKAGE_INFO(0, 0, 0, 0, 0, 0) /* Table terminator */ }; -static const union acpi_predefined_info acpi_gbl_scope_names[] = { +const union acpi_predefined_info acpi_gbl_scope_names[] = { {{"_GPE", 0, 0}}, {{"_PR_", 0, 0}}, {{"_SB_", 0, 0}}, diff --git a/drivers/acpi/acpica/dbcmds.c b/drivers/acpi/acpica/dbcmds.c index 7ec62c4..772178c 100644 --- a/drivers/acpi/acpica/dbcmds.c +++ b/drivers/acpi/acpica/dbcmds.c @@ -348,7 +348,7 @@ void acpi_db_display_table_info(char *table_arg) } else { /* If the pointer is null, the table has been unloaded */ - ACPI_INFO((AE_INFO, "%4.4s - Table has been unloaded", + ACPI_INFO(("%4.4s - Table has been unloaded", table_desc->signature.ascii)); } } diff --git a/drivers/acpi/acpica/dbconvert.c b/drivers/acpi/acpica/dbconvert.c index 9fee88f..68f4e0f4 100644 --- a/drivers/acpi/acpica/dbconvert.c +++ b/drivers/acpi/acpica/dbconvert.c @@ -408,7 +408,7 @@ void acpi_db_dump_pld_buffer(union acpi_object *obj_desc) new_buffer = acpi_db_encode_pld_buffer(pld_info); if (!new_buffer) { - return; + goto exit; } /* The two bit-packed buffers should match */ @@ -479,6 +479,7 @@ void acpi_db_dump_pld_buffer(union acpi_object *obj_desc) pld_info->horizontal_offset); } - ACPI_FREE(pld_info); ACPI_FREE(new_buffer); +exit: + ACPI_FREE(pld_info); } diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c index 6a72047..1982310 100644 --- a/drivers/acpi/acpica/dsmethod.c +++ b/drivers/acpi/acpica/dsmethod.c @@ -809,8 +809,7 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc, if (method_desc->method. info_flags & ACPI_METHOD_SERIALIZED_PENDING) { if (walk_state) { - ACPI_INFO((AE_INFO, - "Marking method %4.4s as Serialized " + ACPI_INFO(("Marking method %4.4s as Serialized " "because of AE_ALREADY_EXISTS error", walk_state->method_node->name. ascii)); diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c index c303e9d..a91de2b 100644 --- a/drivers/acpi/acpica/dsobject.c +++ b/drivers/acpi/acpica/dsobject.c @@ -524,8 +524,7 @@ acpi_ds_build_internal_package_obj(struct acpi_walk_state *walk_state, arg = arg->common.next; } - ACPI_INFO((AE_INFO, - "Actual Package length (%u) is larger than " + ACPI_INFO(("Actual Package length (%u) is larger than " "NumElements field (%u), truncated", i, element_count)); } else if (i < element_count) { diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c index 9275e62..447fa1c 100644 --- a/drivers/acpi/acpica/evgpeblk.c +++ b/drivers/acpi/acpica/evgpeblk.c @@ -499,8 +499,7 @@ acpi_ev_initialize_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, } if (gpe_enabled_count) { - ACPI_INFO((AE_INFO, - "Enabled %u GPEs in block %02X to %02X", + ACPI_INFO(("Enabled %u GPEs in block %02X to %02X", gpe_enabled_count, (u32)gpe_block->block_base_number, (u32)(gpe_block->block_base_number + (gpe_block->gpe_count - 1)))); diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c index 9fdd8d0..7dc7547 100644 --- a/drivers/acpi/acpica/evgpeinit.c +++ b/drivers/acpi/acpica/evgpeinit.c @@ -281,7 +281,7 @@ void acpi_ev_update_gpes(acpi_owner_id table_owner_id) } if (walk_info.count) { - ACPI_INFO((AE_INFO, "Enabled %u new GPEs", walk_info.count)); + ACPI_INFO(("Enabled %u new GPEs", walk_info.count)); } (void)acpi_ut_release_mutex(ACPI_MTX_EVENTS); diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index 47092b4..63924d1 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -600,7 +600,7 @@ acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function) if (region_obj2->extra.method_REG == NULL || region_obj->region.handler == NULL || - !acpi_gbl_reg_methods_enabled) { + !acpi_gbl_namespace_initialized) { return_ACPI_STATUS(AE_OK); } diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c index 011df21..f741613 100644 --- a/drivers/acpi/acpica/exconfig.c +++ b/drivers/acpi/acpica/exconfig.c @@ -252,7 +252,7 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state, status = acpi_get_table_by_index(table_index, &table); if (ACPI_SUCCESS(status)) { - ACPI_INFO((AE_INFO, "Dynamic OEM Table Load:")); + ACPI_INFO(("Dynamic OEM Table Load:")); acpi_tb_print_table_header(0, table); } @@ -472,7 +472,7 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc, /* Install the new table into the local data structures */ - ACPI_INFO((AE_INFO, "Dynamic OEM Table Load:")); + ACPI_INFO(("Dynamic OEM Table Load:")); (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table), diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c index 28eb861..5aa21c4 100644 --- a/drivers/acpi/acpica/exoparg3.c +++ b/drivers/acpi/acpica/exoparg3.c @@ -123,8 +123,10 @@ acpi_status acpi_ex_opcode_3A_0T_0R(struct acpi_walk_state *walk_state) * op is intended for use by disassemblers in order to properly * disassemble control method invocations. The opcode or group of * opcodes should be surrounded by an "if (0)" clause to ensure that - * AML interpreters never see the opcode. + * AML interpreters never see the opcode. Thus, something is + * wrong if an external opcode ever gets here. */ + ACPI_ERROR((AE_INFO, "Executed External Op")); status = AE_OK; goto cleanup; diff --git a/drivers/acpi/acpica/nseval.c b/drivers/acpi/acpica/nseval.c index 65d58be..5d59cfc 100644 --- a/drivers/acpi/acpica/nseval.c +++ b/drivers/acpi/acpica/nseval.c @@ -378,8 +378,7 @@ void acpi_ns_exec_module_code_list(void) acpi_ut_remove_reference(prev); } - ACPI_INFO((AE_INFO, - "Executed %u blocks of module-level executable AML code", + ACPI_INFO(("Executed %u blocks of module-level executable AML code", method_count)); ACPI_FREE(info); diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index bd75d46..d4aa8b6 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -46,6 +46,7 @@ #include "acnamesp.h" #include "acdispat.h" #include "acinterp.h" +#include "acevents.h" #define _COMPONENT ACPI_NAMESPACE ACPI_MODULE_NAME("nsinit") @@ -83,6 +84,8 @@ acpi_status acpi_ns_initialize_objects(void) ACPI_FUNCTION_TRACE(ns_initialize_objects); + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "[Init] Completing Initialization of ACPI Objects\n")); ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "**** Starting initialization of namespace objects ****\n")); ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, @@ -133,82 +136,108 @@ acpi_status acpi_ns_initialize_objects(void) * ******************************************************************************/ -acpi_status acpi_ns_initialize_devices(void) +acpi_status acpi_ns_initialize_devices(u32 flags) { - acpi_status status; + acpi_status status = AE_OK; struct acpi_device_walk_info info; ACPI_FUNCTION_TRACE(ns_initialize_devices); - /* Init counters */ + if (!(flags & ACPI_NO_DEVICE_INIT)) { + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "[Init] Initializing ACPI Devices\n")); - info.device_count = 0; - info.num_STA = 0; - info.num_INI = 0; + /* Init counters */ - ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, - "Initializing Device/Processor/Thermal objects " - "and executing _INI/_STA methods:\n")); + info.device_count = 0; + info.num_STA = 0; + info.num_INI = 0; - /* Tree analysis: find all subtrees that contain _INI methods */ + ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, + "Initializing Device/Processor/Thermal objects " + "and executing _INI/_STA methods:\n")); - status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, FALSE, - acpi_ns_find_ini_methods, NULL, &info, - NULL); - if (ACPI_FAILURE(status)) { - goto error_exit; - } + /* Tree analysis: find all subtrees that contain _INI methods */ + + status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, FALSE, + acpi_ns_find_ini_methods, NULL, + &info, NULL); + if (ACPI_FAILURE(status)) { + goto error_exit; + } + + /* Allocate the evaluation information block */ - /* Allocate the evaluation information block */ + info.evaluate_info = + ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info)); + if (!info.evaluate_info) { + status = AE_NO_MEMORY; + goto error_exit; + } - info.evaluate_info = - ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info)); - if (!info.evaluate_info) { - status = AE_NO_MEMORY; - goto error_exit; + /* + * Execute the "global" _INI method that may appear at the root. + * This support is provided for Windows compatibility (Vista+) and + * is not part of the ACPI specification. + */ + info.evaluate_info->prefix_node = acpi_gbl_root_node; + info.evaluate_info->relative_pathname = METHOD_NAME__INI; + info.evaluate_info->parameters = NULL; + info.evaluate_info->flags = ACPI_IGNORE_RETURN_VALUE; + + status = acpi_ns_evaluate(info.evaluate_info); + if (ACPI_SUCCESS(status)) { + info.num_INI++; + } } /* - * Execute the "global" _INI method that may appear at the root. This - * support is provided for Windows compatibility (Vista+) and is not - * part of the ACPI specification. + * Run all _REG methods + * + * Note: Any objects accessed by the _REG methods will be automatically + * initialized, even if they contain executable AML (see the call to + * acpi_ns_initialize_objects below). */ - info.evaluate_info->prefix_node = acpi_gbl_root_node; - info.evaluate_info->relative_pathname = METHOD_NAME__INI; - info.evaluate_info->parameters = NULL; - info.evaluate_info->flags = ACPI_IGNORE_RETURN_VALUE; + if (!(flags & ACPI_NO_ADDRESS_SPACE_INIT)) { + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "[Init] Executing _REG OpRegion methods\n")); - status = acpi_ns_evaluate(info.evaluate_info); - if (ACPI_SUCCESS(status)) { - info.num_INI++; + status = acpi_ev_initialize_op_regions(); + if (ACPI_FAILURE(status)) { + goto error_exit; + } } - /* Walk namespace to execute all _INIs on present devices */ + if (!(flags & ACPI_NO_DEVICE_INIT)) { - status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, FALSE, - acpi_ns_init_one_device, NULL, &info, - NULL); + /* Walk namespace to execute all _INIs on present devices */ - /* - * Any _OSI requests should be completed by now. If the BIOS has - * requested any Windows OSI strings, we will always truncate - * I/O addresses to 16 bits -- for Windows compatibility. - */ - if (acpi_gbl_osi_data >= ACPI_OSI_WIN_2000) { - acpi_gbl_truncate_io_addresses = TRUE; - } + status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, FALSE, + acpi_ns_init_one_device, NULL, + &info, NULL); - ACPI_FREE(info.evaluate_info); - if (ACPI_FAILURE(status)) { - goto error_exit; - } + /* + * Any _OSI requests should be completed by now. If the BIOS has + * requested any Windows OSI strings, we will always truncate + * I/O addresses to 16 bits -- for Windows compatibility. + */ + if (acpi_gbl_osi_data >= ACPI_OSI_WIN_2000) { + acpi_gbl_truncate_io_addresses = TRUE; + } - ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, - " Executed %u _INI methods requiring %u _STA executions " - "(examined %u objects)\n", - info.num_INI, info.num_STA, info.device_count)); + ACPI_FREE(info.evaluate_info); + if (ACPI_FAILURE(status)) { + goto error_exit; + } + + ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, + " Executed %u _INI methods requiring %u _STA executions " + "(examined %u objects)\n", + info.num_INI, info.num_STA, + info.device_count)); + } return_ACPI_STATUS(status); diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c index b661a1e..4dc6108 100644 --- a/drivers/acpi/acpica/tbinstal.c +++ b/drivers/acpi/acpica/tbinstal.c @@ -267,8 +267,7 @@ acpi_tb_install_standard_table(acpi_physical_address address, if (!reload && acpi_gbl_disable_ssdt_table_install && ACPI_COMPARE_NAME(&new_table_desc.signature, ACPI_SIG_SSDT)) { - ACPI_INFO((AE_INFO, - "Ignoring installation of %4.4s at %8.8X%8.8X", + ACPI_INFO(("Ignoring installation of %4.4s at %8.8X%8.8X", new_table_desc.signature.ascii, ACPI_FORMAT_UINT64(address))); goto release_and_exit; @@ -432,7 +431,7 @@ finish_override: return; } - ACPI_INFO((AE_INFO, "%4.4s 0x%8.8X%8.8X" + ACPI_INFO(("%4.4s 0x%8.8X%8.8X" " %s table override, new table: 0x%8.8X%8.8X", old_table_desc->signature.ascii, ACPI_FORMAT_UINT64(old_table_desc->address), diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c index fd4146d..26d61db 100644 --- a/drivers/acpi/acpica/tbprint.c +++ b/drivers/acpi/acpica/tbprint.c @@ -132,7 +132,7 @@ acpi_tb_print_table_header(acpi_physical_address address, /* FACS only has signature and length fields */ - ACPI_INFO((AE_INFO, "%-4.4s 0x%8.8X%8.8X %06X", + ACPI_INFO(("%-4.4s 0x%8.8X%8.8X %06X", header->signature, ACPI_FORMAT_UINT64(address), header->length)); } else if (ACPI_VALIDATE_RSDP_SIG(header->signature)) { @@ -144,7 +144,7 @@ acpi_tb_print_table_header(acpi_physical_address address, ACPI_OEM_ID_SIZE); acpi_tb_fix_string(local_header.oem_id, ACPI_OEM_ID_SIZE); - ACPI_INFO((AE_INFO, "RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)", + ACPI_INFO(("RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)", ACPI_FORMAT_UINT64(address), (ACPI_CAST_PTR(struct acpi_table_rsdp, header)-> revision > @@ -158,8 +158,7 @@ acpi_tb_print_table_header(acpi_physical_address address, acpi_tb_cleanup_table_header(&local_header, header); - ACPI_INFO((AE_INFO, - "%-4.4s 0x%8.8X%8.8X" + ACPI_INFO(("%-4.4s 0x%8.8X%8.8X" " %06X (v%.2d %-6.6s %-8.8s %08X %-4.4s %08X)", local_header.signature, ACPI_FORMAT_UINT64(address), local_header.length, local_header.revision, diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 3269bef..9240c76 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -174,9 +174,7 @@ struct acpi_table_header *acpi_tb_copy_dsdt(u32 table_index) ACPI_TABLE_ORIGIN_INTERNAL_VIRTUAL, new_table); - ACPI_INFO((AE_INFO, - "Forced DSDT copy: length 0x%05X copied locally, original unmapped", - new_table->length)); + ACPI_INFO(("Forced DSDT copy: length 0x%05X copied locally, original unmapped", new_table->length)); return (new_table); } diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index 278666e..3151968 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -47,6 +47,7 @@ #include "accommon.h" #include "acnamesp.h" #include "actables.h" +#include "acevents.h" #define _COMPONENT ACPI_TABLES ACPI_MODULE_NAME("tbxfload") @@ -68,6 +69,25 @@ acpi_status __init acpi_load_tables(void) ACPI_FUNCTION_TRACE(acpi_load_tables); + /* + * Install the default operation region handlers. These are the + * handlers that are defined by the ACPI specification to be + * "always accessible" -- namely, system_memory, system_IO, and + * PCI_Config. This also means that no _REG methods need to be + * run for these address spaces. We need to have these handlers + * installed before any AML code can be executed, especially any + * module-level code (11/2015). + * Note that we allow OSPMs to install their own region handlers + * between acpi_initialize_subsystem() and acpi_load_tables() to use + * their customized default region handlers. + */ + status = acpi_ev_install_region_handlers(); + if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) { + ACPI_EXCEPTION((AE_INFO, status, + "During Region initialization")); + return_ACPI_STATUS(status); + } + /* Load the namespace from the tables */ status = acpi_tb_load_namespace(); @@ -83,6 +103,20 @@ acpi_status __init acpi_load_tables(void) "While loading namespace from ACPI tables")); } + if (!acpi_gbl_group_module_level_code) { + /* + * Initialize the objects that remain uninitialized. This + * runs the executable AML that may be part of the + * declaration of these objects: + * operation_regions, buffer_fields, Buffers, and Packages. + */ + status = acpi_ns_initialize_objects(); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + } + + acpi_gbl_namespace_initialized = TRUE; return_ACPI_STATUS(status); } @@ -206,9 +240,7 @@ acpi_status acpi_tb_load_namespace(void) } if (!tables_failed) { - ACPI_INFO((AE_INFO, - "%u ACPI AML tables successfully acquired and loaded\n", - tables_loaded)); + ACPI_INFO(("%u ACPI AML tables successfully acquired and loaded\n", tables_loaded)); } else { ACPI_ERROR((AE_INFO, "%u table load failures, %u successful", @@ -301,7 +333,7 @@ acpi_status acpi_load_table(struct acpi_table_header *table) /* Install the table and load it into the namespace */ - ACPI_INFO((AE_INFO, "Host-directed Dynamic ACPI Table Load:")); + ACPI_INFO(("Host-directed Dynamic ACPI Table Load:")); (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table), diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c index c9a720f..f8e9978 100644 --- a/drivers/acpi/acpica/utcache.c +++ b/drivers/acpi/acpica/utcache.c @@ -245,7 +245,7 @@ void *acpi_os_acquire_object(struct acpi_memory_list *cache) acpi_status status; void *object; - ACPI_FUNCTION_NAME(os_acquire_object); + ACPI_FUNCTION_TRACE(os_acquire_object); if (!cache) { return_PTR(NULL); diff --git a/drivers/acpi/acpica/utnonansi.c b/drivers/acpi/acpica/utnonansi.c index c427a5c..d5c3adf 100644 --- a/drivers/acpi/acpica/utnonansi.c +++ b/drivers/acpi/acpica/utnonansi.c @@ -140,6 +140,67 @@ int acpi_ut_stricmp(char *string1, char *string2) return (c1 - c2); } +#if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION) +/******************************************************************************* + * + * FUNCTION: acpi_ut_safe_strcpy, acpi_ut_safe_strcat, acpi_ut_safe_strncat + * + * PARAMETERS: Adds a "DestSize" parameter to each of the standard string + * functions. This is the size of the Destination buffer. + * + * RETURN: TRUE if the operation would overflow the destination buffer. + * + * DESCRIPTION: Safe versions of standard Clib string functions. Ensure that + * the result of the operation will not overflow the output string + * buffer. + * + * NOTE: These functions are typically only helpful for processing + * user input and command lines. For most ACPICA code, the + * required buffer length is precisely calculated before buffer + * allocation, so the use of these functions is unnecessary. + * + ******************************************************************************/ + +u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source) +{ + + if (strlen(source) >= dest_size) { + return (TRUE); + } + + strcpy(dest, source); + return (FALSE); +} + +u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source) +{ + + if ((strlen(dest) + strlen(source)) >= dest_size) { + return (TRUE); + } + + strcat(dest, source); + return (FALSE); +} + +u8 +acpi_ut_safe_strncat(char *dest, + acpi_size dest_size, + char *source, acpi_size max_transfer_length) +{ + acpi_size actual_transfer_length; + + actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source)); + + if ((strlen(dest) + actual_transfer_length) >= dest_size) { + return (TRUE); + } + + strncat(dest, source, max_transfer_length); + return (FALSE); +} +#endif + /******************************************************************************* * * FUNCTION: acpi_ut_strtoul64 @@ -155,7 +216,15 @@ int acpi_ut_stricmp(char *string1, char *string2) * 32-bit or 64-bit conversion, depending on the current mode * of the interpreter. * - * NOTE: Does not support Octal strings, not needed. + * NOTES: acpi_gbl_integer_byte_width should be set to the proper width. + * For the core ACPICA code, this width depends on the DSDT + * version. For iASL, the default byte width is always 8. + * + * Does not support Octal strings, not needed at this time. + * + * There is an earlier version of the function after this one, + * below. It is slightly different than this one, and the two + * may eventually may need to be merged. (01/2016). * ******************************************************************************/ @@ -171,7 +240,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) u8 sign_of0x = 0; u8 term = 0; - ACPI_FUNCTION_TRACE_STR(ut_stroul64, string); + ACPI_FUNCTION_TRACE_STR(ut_strtoul64, string); switch (base) { case ACPI_ANY_BASE: @@ -318,63 +387,162 @@ error_exit: } } -#if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION) +#ifdef _OBSOLETE_FUNCTIONS +/* TBD: use version in ACPICA main code base? */ +/* DONE: 01/2016 */ + /******************************************************************************* * - * FUNCTION: acpi_ut_safe_strcpy, acpi_ut_safe_strcat, acpi_ut_safe_strncat + * FUNCTION: strtoul64 * - * PARAMETERS: Adds a "DestSize" parameter to each of the standard string - * functions. This is the size of the Destination buffer. + * PARAMETERS: string - Null terminated string + * terminater - Where a pointer to the terminating byte + * is returned + * base - Radix of the string * - * RETURN: TRUE if the operation would overflow the destination buffer. + * RETURN: Converted value * - * DESCRIPTION: Safe versions of standard Clib string functions. Ensure that - * the result of the operation will not overflow the output string - * buffer. - * - * NOTE: These functions are typically only helpful for processing - * user input and command lines. For most ACPICA code, the - * required buffer length is precisely calculated before buffer - * allocation, so the use of these functions is unnecessary. + * DESCRIPTION: Convert a string into an unsigned value. * ******************************************************************************/ -u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source) +acpi_status strtoul64(char *string, u32 base, u64 *ret_integer) { + u32 index; + u32 sign; + u64 return_value = 0; + acpi_status status = AE_OK; - if (strlen(source) >= dest_size) { - return (TRUE); + *ret_integer = 0; + + switch (base) { + case 0: + case 8: + case 10: + case 16: + + break; + + default: + /* + * The specified Base parameter is not in the domain of + * this function: + */ + return (AE_BAD_PARAMETER); } - strcpy(dest, source); - return (FALSE); -} + /* Skip over any white space in the buffer: */ -u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source) -{ + while (isspace((int)*string) || *string == '\t') { + ++string; + } - if ((strlen(dest) + strlen(source)) >= dest_size) { - return (TRUE); + /* + * The buffer may contain an optional plus or minus sign. + * If it does, then skip over it but remember what is was: + */ + if (*string == '-') { + sign = ACPI_SIGN_NEGATIVE; + ++string; + } else if (*string == '+') { + ++string; + sign = ACPI_SIGN_POSITIVE; + } else { + sign = ACPI_SIGN_POSITIVE; } - strcat(dest, source); - return (FALSE); -} + /* + * If the input parameter Base is zero, then we need to + * determine if it is octal, decimal, or hexadecimal: + */ + if (base == 0) { + if (*string == '0') { + if (tolower((int)*(++string)) == 'x') { + base = 16; + ++string; + } else { + base = 8; + } + } else { + base = 10; + } + } -u8 -acpi_ut_safe_strncat(char *dest, - acpi_size dest_size, - char *source, acpi_size max_transfer_length) -{ - acpi_size actual_transfer_length; + /* + * For octal and hexadecimal bases, skip over the leading + * 0 or 0x, if they are present. + */ + if (base == 8 && *string == '0') { + string++; + } - actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source)); + if (base == 16 && *string == '0' && tolower((int)*(++string)) == 'x') { + string++; + } - if ((strlen(dest) + actual_transfer_length) >= dest_size) { - return (TRUE); + /* Main loop: convert the string to an unsigned long */ + + while (*string) { + if (isdigit((int)*string)) { + index = ((u8)*string) - '0'; + } else { + index = (u8)toupper((int)*string); + if (isupper((int)index)) { + index = index - 'A' + 10; + } else { + goto error_exit; + } + } + + if (index >= base) { + goto error_exit; + } + + /* Check to see if value is out of range: */ + + if (return_value > ((ACPI_UINT64_MAX - (u64)index) / (u64)base)) { + goto error_exit; + } else { + return_value *= base; + return_value += index; + } + + ++string; } - strncat(dest, source, max_transfer_length); - return (FALSE); + /* If a minus sign was present, then "the conversion is negated": */ + + if (sign == ACPI_SIGN_NEGATIVE) { + return_value = (ACPI_UINT32_MAX - return_value) + 1; + } + + *ret_integer = return_value; + return (status); + +error_exit: + switch (base) { + case 8: + + status = AE_BAD_OCTAL_CONSTANT; + break; + + case 10: + + status = AE_BAD_DECIMAL_CONSTANT; + break; + + case 16: + + status = AE_BAD_HEX_CONSTANT; + break; + + default: + + /* Base validated above */ + + break; + } + + return (status); } #endif diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c index c7c2bb8..60c406a 100644 --- a/drivers/acpi/acpica/uttrack.c +++ b/drivers/acpi/acpica/uttrack.c @@ -712,7 +712,7 @@ void acpi_ut_dump_allocations(u32 component, const char *module) /* Print summary */ if (!num_outstanding) { - ACPI_INFO((AE_INFO, "No outstanding allocations")); + ACPI_INFO(("No outstanding allocations")); } else { ACPI_ERROR((AE_INFO, "%u(0x%X) Outstanding allocations", num_outstanding, num_outstanding)); diff --git a/drivers/acpi/acpica/utxferror.c b/drivers/acpi/acpica/utxferror.c index 6fe5959..d9f15cb 100644 --- a/drivers/acpi/acpica/utxferror.c +++ b/drivers/acpi/acpica/utxferror.c @@ -175,8 +175,7 @@ ACPI_EXPORT_SYMBOL(acpi_warning) * TBD: module_name and line_number args are not needed, should be removed. * ******************************************************************************/ -void ACPI_INTERNAL_VAR_XFACE -acpi_info(const char *module_name, u32 line_number, const char *format, ...) +void ACPI_INTERNAL_VAR_XFACE acpi_info(const char *format, ...) { va_list arg_list; diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c index 721b87c..75b5f27 100644 --- a/drivers/acpi/acpica/utxfinit.c +++ b/drivers/acpi/acpica/utxfinit.c @@ -154,21 +154,6 @@ acpi_status __init acpi_enable_subsystem(u32 flags) */ acpi_gbl_early_initialization = FALSE; - /* - * Install the default operation region handlers. These are the - * handlers that are defined by the ACPI specification to be - * "always accessible" -- namely, system_memory, system_IO, and - * PCI_Config. This also means that no _REG methods need to be - * run for these address spaces. We need to have these handlers - * installed before any AML code can be executed, especially any - * module-level code (11/2015). - */ - status = acpi_ev_install_region_handlers(); - if (ACPI_FAILURE(status)) { - ACPI_EXCEPTION((AE_INFO, status, - "During Region initialization")); - return_ACPI_STATUS(status); - } #if (!ACPI_REDUCED_HARDWARE) /* Enable ACPI mode */ @@ -260,23 +245,6 @@ acpi_status __init acpi_initialize_objects(u32 flags) ACPI_FUNCTION_TRACE(acpi_initialize_objects); - /* - * Run all _REG methods - * - * Note: Any objects accessed by the _REG methods will be automatically - * initialized, even if they contain executable AML (see the call to - * acpi_ns_initialize_objects below). - */ - acpi_gbl_reg_methods_enabled = TRUE; - if (!(flags & ACPI_NO_ADDRESS_SPACE_INIT)) { - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "[Init] Executing _REG OpRegion methods\n")); - - status = acpi_ev_initialize_op_regions(); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - } #ifdef ACPI_EXEC_APP /* * This call implements the "initialization file" option for acpi_exec. @@ -299,32 +267,27 @@ acpi_status __init acpi_initialize_objects(u32 flags) */ if (acpi_gbl_group_module_level_code) { acpi_ns_exec_module_code_list(); - } - - /* - * Initialize the objects that remain uninitialized. This runs the - * executable AML that may be part of the declaration of these objects: - * operation_regions, buffer_fields, Buffers, and Packages. - */ - if (!(flags & ACPI_NO_OBJECT_INIT)) { - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "[Init] Completing Initialization of ACPI Objects\n")); - status = acpi_ns_initialize_objects(); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); + /* + * Initialize the objects that remain uninitialized. This + * runs the executable AML that may be part of the + * declaration of these objects: + * operation_regions, buffer_fields, Buffers, and Packages. + */ + if (!(flags & ACPI_NO_OBJECT_INIT)) { + status = acpi_ns_initialize_objects(); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } } } /* - * Initialize all device objects in the namespace. This runs the device - * _STA and _INI methods. + * Initialize all device/region objects in the namespace. This runs + * the device _STA and _INI methods and region _REG methods. */ - if (!(flags & ACPI_NO_DEVICE_INIT)) { - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "[Init] Initializing ACPI Devices\n")); - - status = acpi_ns_initialize_devices(); + if (!(flags & (ACPI_NO_DEVICE_INIT | ACPI_NO_ADDRESS_SPACE_INIT))) { + status = acpi_ns_initialize_devices(flags); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index a2c8d7a..da370e1 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -536,7 +536,8 @@ int apei_resources_request(struct apei_resources *resources, goto err_unmap_ioport; } - return 0; + goto arch_res_fini; + err_unmap_ioport: list_for_each_entry(res, &resources->ioport, list) { if (res == res_bak) @@ -551,7 +552,8 @@ err_unmap_iomem: release_mem_region(res->start, res->end - res->start); } arch_res_fini: - apei_resources_fini(&arch_res); + if (arch_apei_filter_addr) + apei_resources_fini(&arch_res); nvs_res_fini: apei_resources_fini(&nvs_resources); return rc; diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 6e6bc10..006c389 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1207,6 +1207,9 @@ static int __init erst_init(void) "Failed to allocate %lld bytes for persistent store error log.\n", erst_erange.size); + /* Cleanup ERST Resources */ + apei_resources_fini(&erst_resources); + return 0; err_release_erange: diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 3dd9c46..60746ef 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -26,7 +26,7 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/init.h> #include <linux/acpi.h> #include <linux/io.h> @@ -79,6 +79,11 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) +/* + * This driver isn't really modular, however for the time being, + * continuing to use module_param is the easiest way to remain + * compatible with existing boot arg use cases. + */ bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -1148,18 +1153,4 @@ err_ioremap_exit: err: return rc; } - -static void __exit ghes_exit(void) -{ - platform_driver_unregister(&ghes_platform_driver); - ghes_estatus_pool_exit(); - ghes_ioremap_exit(); -} - -module_init(ghes_init); -module_exit(ghes_exit); - -MODULE_AUTHOR("Huang Ying"); -MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:GHES"); +device_initcall(ghes_init); diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c index a83e3c6..75f128e 100644 --- a/drivers/acpi/bgrt.c +++ b/drivers/acpi/bgrt.c @@ -1,4 +1,6 @@ /* + * BGRT boot graphic support + * Authors: Matthew Garrett, Josh Triplett <josh@joshtriplett.org> * Copyright 2012 Red Hat, Inc <mjg@redhat.com> * Copyright 2012 Intel Corporation * @@ -8,7 +10,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/init.h> #include <linux/device.h> #include <linux/sysfs.h> @@ -103,9 +104,4 @@ out_kobject: kobject_put(bgrt_kobj); return ret; } - -module_init(bgrt_init); - -MODULE_AUTHOR("Matthew Garrett, Josh Triplett <josh@joshtriplett.org>"); -MODULE_DESCRIPTION("BGRT boot graphic support"); -MODULE_LICENSE("GPL"); +device_initcall(bgrt_init); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 891c42d..0e85678 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -479,24 +479,38 @@ static void acpi_device_remove_notify_handler(struct acpi_device *device) Device Matching -------------------------------------------------------------------------- */ -static struct acpi_device *acpi_primary_dev_companion(struct acpi_device *adev, - const struct device *dev) +/** + * acpi_get_first_physical_node - Get first physical node of an ACPI device + * @adev: ACPI device in question + * + * Return: First physical node of ACPI device @adev + */ +struct device *acpi_get_first_physical_node(struct acpi_device *adev) { struct mutex *physical_node_lock = &adev->physical_node_lock; + struct device *phys_dev; mutex_lock(physical_node_lock); if (list_empty(&adev->physical_node_list)) { - adev = NULL; + phys_dev = NULL; } else { const struct acpi_device_physical_node *node; node = list_first_entry(&adev->physical_node_list, struct acpi_device_physical_node, node); - if (node->dev != dev) - adev = NULL; + + phys_dev = node->dev; } mutex_unlock(physical_node_lock); - return adev; + return phys_dev; +} + +static struct acpi_device *acpi_primary_dev_companion(struct acpi_device *adev, + const struct device *dev) +{ + const struct device *phys_dev = acpi_get_first_physical_node(adev); + + return phys_dev && phys_dev == dev ? adev : NULL; } /** diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 6730f96..8adac69 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -39,6 +39,7 @@ #include <linux/cpufreq.h> #include <linux/delay.h> +#include <linux/ktime.h> #include <acpi/cppc_acpi.h> /* @@ -63,58 +64,140 @@ static struct mbox_chan *pcc_channel; static void __iomem *pcc_comm_addr; static u64 comm_base_addr; static int pcc_subspace_idx = -1; -static u16 pcc_cmd_delay; static bool pcc_channel_acquired; +static ktime_t deadline; +static unsigned int pcc_mpar, pcc_mrtt; + +/* pcc mapped address + header size + offset within PCC subspace */ +#define GET_PCC_VADDR(offs) (pcc_comm_addr + 0x8 + (offs)) /* * Arbitrary Retries in case the remote processor is slow to respond - * to PCC commands. + * to PCC commands. Keeping it high enough to cover emulators where + * the processors run painfully slow. */ #define NUM_RETRIES 500 +static int check_pcc_chan(void) +{ + int ret = -EIO; + struct acpi_pcct_shared_memory __iomem *generic_comm_base = pcc_comm_addr; + ktime_t next_deadline = ktime_add(ktime_get(), deadline); + + /* Retry in case the remote processor was too slow to catch up. */ + while (!ktime_after(ktime_get(), next_deadline)) { + /* + * Per spec, prior to boot the PCC space wil be initialized by + * platform and should have set the command completion bit when + * PCC can be used by OSPM + */ + if (readw_relaxed(&generic_comm_base->status) & PCC_CMD_COMPLETE) { + ret = 0; + break; + } + /* + * Reducing the bus traffic in case this loop takes longer than + * a few retries. + */ + udelay(3); + } + + return ret; +} + static int send_pcc_cmd(u16 cmd) { - int retries, result = -EIO; - struct acpi_pcct_hw_reduced *pcct_ss = pcc_channel->con_priv; + int ret = -EIO; struct acpi_pcct_shared_memory *generic_comm_base = (struct acpi_pcct_shared_memory *) pcc_comm_addr; - u32 cmd_latency = pcct_ss->latency; + static ktime_t last_cmd_cmpl_time, last_mpar_reset; + static int mpar_count; + unsigned int time_delta; - /* Min time OS should wait before sending next command. */ - udelay(pcc_cmd_delay); + /* + * For CMD_WRITE we know for a fact the caller should have checked + * the channel before writing to PCC space + */ + if (cmd == CMD_READ) { + ret = check_pcc_chan(); + if (ret) + return ret; + } + + /* + * Handle the Minimum Request Turnaround Time(MRTT) + * "The minimum amount of time that OSPM must wait after the completion + * of a command before issuing the next command, in microseconds" + */ + if (pcc_mrtt) { + time_delta = ktime_us_delta(ktime_get(), last_cmd_cmpl_time); + if (pcc_mrtt > time_delta) + udelay(pcc_mrtt - time_delta); + } + + /* + * Handle the non-zero Maximum Periodic Access Rate(MPAR) + * "The maximum number of periodic requests that the subspace channel can + * support, reported in commands per minute. 0 indicates no limitation." + * + * This parameter should be ideally zero or large enough so that it can + * handle maximum number of requests that all the cores in the system can + * collectively generate. If it is not, we will follow the spec and just + * not send the request to the platform after hitting the MPAR limit in + * any 60s window + */ + if (pcc_mpar) { + if (mpar_count == 0) { + time_delta = ktime_ms_delta(ktime_get(), last_mpar_reset); + if (time_delta < 60 * MSEC_PER_SEC) { + pr_debug("PCC cmd not sent due to MPAR limit"); + return -EIO; + } + last_mpar_reset = ktime_get(); + mpar_count = pcc_mpar; + } + mpar_count--; + } /* Write to the shared comm region. */ - writew(cmd, &generic_comm_base->command); + writew_relaxed(cmd, &generic_comm_base->command); /* Flip CMD COMPLETE bit */ - writew(0, &generic_comm_base->status); + writew_relaxed(0, &generic_comm_base->status); /* Ring doorbell */ - result = mbox_send_message(pcc_channel, &cmd); - if (result < 0) { + ret = mbox_send_message(pcc_channel, &cmd); + if (ret < 0) { pr_err("Err sending PCC mbox message. cmd:%d, ret:%d\n", - cmd, result); - return result; + cmd, ret); + return ret; } - /* Wait for a nominal time to let platform process command. */ - udelay(cmd_latency); - - /* Retry in case the remote processor was too slow to catch up. */ - for (retries = NUM_RETRIES; retries > 0; retries--) { - if (readw_relaxed(&generic_comm_base->status) & PCC_CMD_COMPLETE) { - result = 0; - break; - } + /* + * For READs we need to ensure the cmd completed to ensure + * the ensuing read()s can proceed. For WRITEs we dont care + * because the actual write()s are done before coming here + * and the next READ or WRITE will check if the channel + * is busy/free at the entry of this call. + * + * If Minimum Request Turnaround Time is non-zero, we need + * to record the completion time of both READ and WRITE + * command for proper handling of MRTT, so we need to check + * for pcc_mrtt in addition to CMD_READ + */ + if (cmd == CMD_READ || pcc_mrtt) { + ret = check_pcc_chan(); + if (pcc_mrtt) + last_cmd_cmpl_time = ktime_get(); } - mbox_client_txdone(pcc_channel, result); - return result; + mbox_client_txdone(pcc_channel, ret); + return ret; } static void cppc_chan_tx_done(struct mbox_client *cl, void *msg, int ret) { - if (ret) + if (ret < 0) pr_debug("TX did not complete: CMD sent:%x, ret:%d\n", *(u16 *)msg, ret); else @@ -306,6 +389,7 @@ static int register_pcc_channel(int pcc_subspace_idx) { struct acpi_pcct_hw_reduced *cppc_ss; unsigned int len; + u64 usecs_lat; if (pcc_subspace_idx >= 0) { pcc_channel = pcc_mbox_request_channel(&cppc_mbox_cl, @@ -335,7 +419,16 @@ static int register_pcc_channel(int pcc_subspace_idx) */ comm_base_addr = cppc_ss->base_address; len = cppc_ss->length; - pcc_cmd_delay = cppc_ss->min_turnaround_time; + + /* + * cppc_ss->latency is just a Nominal value. In reality + * the remote processor could be much slower to reply. + * So add an arbitrary amount of wait on top of Nominal. + */ + usecs_lat = NUM_RETRIES * cppc_ss->latency; + deadline = ns_to_ktime(usecs_lat * NSEC_PER_USEC); + pcc_mrtt = cppc_ss->min_turnaround_time; + pcc_mpar = cppc_ss->max_access_rate; pcc_comm_addr = acpi_os_ioremap(comm_base_addr, len); if (!pcc_comm_addr) { @@ -546,29 +639,74 @@ void acpi_cppc_processor_exit(struct acpi_processor *pr) } EXPORT_SYMBOL_GPL(acpi_cppc_processor_exit); -static u64 get_phys_addr(struct cpc_reg *reg) -{ - /* PCC communication addr space begins at byte offset 0x8. */ - if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) - return (u64)comm_base_addr + 0x8 + reg->address; - else - return reg->address; -} +/* + * Since cpc_read and cpc_write are called while holding pcc_lock, it should be + * as fast as possible. We have already mapped the PCC subspace during init, so + * we can directly write to it. + */ -static void cpc_read(struct cpc_reg *reg, u64 *val) +static int cpc_read(struct cpc_reg *reg, u64 *val) { - u64 addr = get_phys_addr(reg); + int ret_val = 0; - acpi_os_read_memory((acpi_physical_address)addr, - val, reg->bit_width); + *val = 0; + if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + void __iomem *vaddr = GET_PCC_VADDR(reg->address); + + switch (reg->bit_width) { + case 8: + *val = readb_relaxed(vaddr); + break; + case 16: + *val = readw_relaxed(vaddr); + break; + case 32: + *val = readl_relaxed(vaddr); + break; + case 64: + *val = readq_relaxed(vaddr); + break; + default: + pr_debug("Error: Cannot read %u bit width from PCC\n", + reg->bit_width); + ret_val = -EFAULT; + } + } else + ret_val = acpi_os_read_memory((acpi_physical_address)reg->address, + val, reg->bit_width); + return ret_val; } -static void cpc_write(struct cpc_reg *reg, u64 val) +static int cpc_write(struct cpc_reg *reg, u64 val) { - u64 addr = get_phys_addr(reg); + int ret_val = 0; - acpi_os_write_memory((acpi_physical_address)addr, - val, reg->bit_width); + if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + void __iomem *vaddr = GET_PCC_VADDR(reg->address); + + switch (reg->bit_width) { + case 8: + writeb_relaxed(val, vaddr); + break; + case 16: + writew_relaxed(val, vaddr); + break; + case 32: + writel_relaxed(val, vaddr); + break; + case 64: + writeq_relaxed(val, vaddr); + break; + default: + pr_debug("Error: Cannot write %u bit width to PCC\n", + reg->bit_width); + ret_val = -EFAULT; + break; + } + } else + ret_val = acpi_os_write_memory((acpi_physical_address)reg->address, + val, reg->bit_width); + return ret_val; } /** @@ -604,7 +742,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) (ref_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) || (nom_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) { /* Ring doorbell once to update PCC subspace */ - if (send_pcc_cmd(CMD_READ)) { + if (send_pcc_cmd(CMD_READ) < 0) { ret = -EIO; goto out_err; } @@ -662,7 +800,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) if ((delivered_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) || (reference_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) { /* Ring doorbell once to update PCC subspace */ - if (send_pcc_cmd(CMD_READ)) { + if (send_pcc_cmd(CMD_READ) < 0) { ret = -EIO; goto out_err; } @@ -713,6 +851,13 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) spin_lock(&pcc_lock); + /* If this is PCC reg, check if channel is free before writing */ + if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + ret = check_pcc_chan(); + if (ret) + goto busy_channel; + } + /* * Skip writing MIN/MAX until Linux knows how to come up with * useful values. @@ -722,10 +867,10 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) /* Is this a PCC reg ?*/ if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { /* Ring doorbell so Remote can get our perf request. */ - if (send_pcc_cmd(CMD_WRITE)) + if (send_pcc_cmd(CMD_WRITE) < 0) ret = -EIO; } - +busy_channel: spin_unlock(&pcc_lock); return ret; diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c index bea8e42..6c7dd7a 100644 --- a/drivers/acpi/ec_sys.c +++ b/drivers/acpi/ec_sys.c @@ -73,6 +73,9 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf, loff_t init_off = *off; int err = 0; + if (!write_support) + return -EINVAL; + if (*off >= EC_SPACE_SIZE) return 0; if (*off + count >= EC_SPACE_SIZE) { diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 6322db6..384cfc3 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -46,7 +46,7 @@ MODULE_DEVICE_TABLE(acpi, fan_device_ids); #ifdef CONFIG_PM_SLEEP static int acpi_fan_suspend(struct device *dev); static int acpi_fan_resume(struct device *dev); -static struct dev_pm_ops acpi_fan_pm = { +static const struct dev_pm_ops acpi_fan_pm = { .resume = acpi_fan_resume, .freeze = acpi_fan_suspend, .thaw = acpi_fan_resume, diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 1e6833a..a37508e 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -20,6 +20,7 @@ #define PREFIX "ACPI: " +void acpi_initrd_initialize_tables(void); acpi_status acpi_os_initialize1(void); void init_acpi_device_notify(void); int acpi_scan_init(void); @@ -29,6 +30,11 @@ void acpi_processor_init(void); void acpi_platform_init(void); void acpi_pnp_init(void); void acpi_int340x_thermal_init(void); +#ifdef CONFIG_ARM_AMBA +void acpi_amba_init(void); +#else +static inline void acpi_amba_init(void) {} +#endif int acpi_sysfs_init(void); void acpi_container_init(void); void acpi_memory_hotplug_init(void); @@ -106,6 +112,7 @@ bool acpi_device_is_present(struct acpi_device *adev); bool acpi_device_is_battery(struct acpi_device *adev); bool acpi_device_is_first_physical_node(struct acpi_device *adev, const struct device *dev); +struct device *acpi_get_first_physical_node(struct acpi_device *adev); /* -------------------------------------------------------------------------- Device Matching and Notification diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 67da6fb..814d5f8 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -602,6 +602,14 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val, return AE_OK; } +static void acpi_table_taint(struct acpi_table_header *table) +{ + pr_warn(PREFIX + "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n", + table->signature, table->oem_table_id); + add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE); +} + #ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE #include <linux/earlycpio.h> #include <linux/memblock.h> @@ -636,6 +644,7 @@ static const char * const table_sigs[] = { #define ACPI_OVERRIDE_TABLES 64 static struct cpio_data __initdata acpi_initrd_files[ACPI_OVERRIDE_TABLES]; +static DECLARE_BITMAP(acpi_initrd_installed, ACPI_OVERRIDE_TABLES); #define MAP_CHUNK_SIZE (NR_FIX_BTMAPS << PAGE_SHIFT) @@ -746,96 +755,125 @@ void __init acpi_initrd_override(void *data, size_t size) } } } -#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ -static void acpi_table_taint(struct acpi_table_header *table) +acpi_status +acpi_os_physical_table_override(struct acpi_table_header *existing_table, + acpi_physical_address *address, u32 *length) { - pr_warn(PREFIX - "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n", - table->signature, table->oem_table_id); - add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE); -} + int table_offset = 0; + int table_index = 0; + struct acpi_table_header *table; + u32 table_length; + *length = 0; + *address = 0; + if (!acpi_tables_addr) + return AE_OK; -acpi_status -acpi_os_table_override(struct acpi_table_header * existing_table, - struct acpi_table_header ** new_table) -{ - if (!existing_table || !new_table) - return AE_BAD_PARAMETER; + while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) { + table = acpi_os_map_memory(acpi_tables_addr + table_offset, + ACPI_HEADER_SIZE); + if (table_offset + table->length > all_tables_size) { + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + WARN_ON(1); + return AE_OK; + } - *new_table = NULL; + table_length = table->length; -#ifdef CONFIG_ACPI_CUSTOM_DSDT - if (strncmp(existing_table->signature, "DSDT", 4) == 0) - *new_table = (struct acpi_table_header *)AmlCode; -#endif - if (*new_table != NULL) + /* Only override tables matched */ + if (test_bit(table_index, acpi_initrd_installed) || + memcmp(existing_table->signature, table->signature, 4) || + memcmp(table->oem_table_id, existing_table->oem_table_id, + ACPI_OEM_TABLE_ID_SIZE)) { + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + goto next_table; + } + + *length = table_length; + *address = acpi_tables_addr + table_offset; acpi_table_taint(existing_table); + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + set_bit(table_index, acpi_initrd_installed); + break; + +next_table: + table_offset += table_length; + table_index++; + } return AE_OK; } -acpi_status -acpi_os_physical_table_override(struct acpi_table_header *existing_table, - acpi_physical_address *address, - u32 *table_length) +void __init acpi_initrd_initialize_tables(void) { -#ifndef CONFIG_ACPI_INITRD_TABLE_OVERRIDE - *table_length = 0; - *address = 0; - return AE_OK; -#else int table_offset = 0; + int table_index = 0; + u32 table_length; struct acpi_table_header *table; - *table_length = 0; - *address = 0; - if (!acpi_tables_addr) - return AE_OK; - - do { - if (table_offset + ACPI_HEADER_SIZE > all_tables_size) { - WARN_ON(1); - return AE_OK; - } + return; + while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) { table = acpi_os_map_memory(acpi_tables_addr + table_offset, ACPI_HEADER_SIZE); - if (table_offset + table->length > all_tables_size) { acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); WARN_ON(1); - return AE_OK; + return; } - table_offset += table->length; + table_length = table->length; - if (memcmp(existing_table->signature, table->signature, 4)) { - acpi_os_unmap_memory(table, - ACPI_HEADER_SIZE); - continue; - } - - /* Only override tables with matching oem id */ - if (memcmp(table->oem_table_id, existing_table->oem_table_id, - ACPI_OEM_TABLE_ID_SIZE)) { - acpi_os_unmap_memory(table, - ACPI_HEADER_SIZE); - continue; + /* Skip RSDT/XSDT which should only be used for override */ + if (test_bit(table_index, acpi_initrd_installed) || + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) || + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) { + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + goto next_table; } - table_offset -= table->length; - *table_length = table->length; + acpi_table_taint(table); acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); - *address = acpi_tables_addr + table_offset; - break; - } while (table_offset + ACPI_HEADER_SIZE < all_tables_size); + acpi_install_table(acpi_tables_addr + table_offset, TRUE); + set_bit(table_index, acpi_initrd_installed); +next_table: + table_offset += table_length; + table_index++; + } +} +#else +acpi_status +acpi_os_physical_table_override(struct acpi_table_header *existing_table, + acpi_physical_address *address, + u32 *table_length) +{ + *table_length = 0; + *address = 0; + return AE_OK; +} + +void __init acpi_initrd_initialize_tables(void) +{ +} +#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ - if (*address != 0) +acpi_status +acpi_os_table_override(struct acpi_table_header *existing_table, + struct acpi_table_header **new_table) +{ + if (!existing_table || !new_table) + return AE_BAD_PARAMETER; + + *new_table = NULL; + +#ifdef CONFIG_ACPI_CUSTOM_DSDT + if (strncmp(existing_table->signature, "DSDT", 4) == 0) + *new_table = (struct acpi_table_header *)AmlCode; +#endif + if (*new_table != NULL) acpi_table_taint(existing_table); return AE_OK; -#endif } static irqreturn_t acpi_irq(int irq, void *dev_id) diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index c8e169e..2c45dd3 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -33,6 +33,7 @@ #include <linux/pci.h> #include <linux/acpi.h> #include <linux/slab.h> +#include <linux/interrupt.h> #define PREFIX "ACPI: " @@ -387,6 +388,23 @@ static inline int acpi_isa_register_gsi(struct pci_dev *dev) } #endif +static inline bool acpi_pci_irq_valid(struct pci_dev *dev, u8 pin) +{ +#ifdef CONFIG_X86 + /* + * On x86 irq line 0xff means "unknown" or "no connection" + * (PCI 3.0, Section 6.2.4, footnote on page 223). + */ + if (dev->irq == 0xff) { + dev->irq = IRQ_NOTCONNECTED; + dev_warn(&dev->dev, "PCI INT %c: not connected\n", + pin_name(pin)); + return false; + } +#endif + return true; +} + int acpi_pci_irq_enable(struct pci_dev *dev) { struct acpi_prt_entry *entry; @@ -431,11 +449,14 @@ int acpi_pci_irq_enable(struct pci_dev *dev) } else gsi = -1; - /* - * No IRQ known to the ACPI subsystem - maybe the BIOS / - * driver reported one, then use it. Exit in any case. - */ if (gsi < 0) { + /* + * No IRQ known to the ACPI subsystem - maybe the BIOS / + * driver reported one, then use it. Exit in any case. + */ + if (!acpi_pci_irq_valid(dev, pin)) + return 0; + if (acpi_isa_register_gsi(dev)) dev_warn(&dev->dev, "PCI INT %c: no GSI\n", pin_name(pin)); diff --git a/drivers/acpi/pmic/intel_pmic_crc.c b/drivers/acpi/pmic/intel_pmic_crc.c index 42df46a..fcd1852 100644 --- a/drivers/acpi/pmic/intel_pmic_crc.c +++ b/drivers/acpi/pmic/intel_pmic_crc.c @@ -13,7 +13,7 @@ * GNU General Public License for more details. */ -#include <linux/module.h> +#include <linux/init.h> #include <linux/acpi.h> #include <linux/mfd/intel_soc_pmic.h> #include <linux/regmap.h> @@ -205,7 +205,4 @@ static int __init intel_crc_pmic_opregion_driver_init(void) { return platform_driver_register(&intel_crc_pmic_opregion_driver); } -module_init(intel_crc_pmic_opregion_driver_init); - -MODULE_DESCRIPTION("CrystalCove ACPI operation region driver"); -MODULE_LICENSE("GPL"); +device_initcall(intel_crc_pmic_opregion_driver_init); diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 11154a3..d2fa8cb 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -314,7 +314,6 @@ static int __init acpi_processor_driver_init(void) if (result < 0) return result; - acpi_processor_syscore_init(); register_hotcpu_notifier(&acpi_cpu_notifier); acpi_thermal_cpufreq_init(); acpi_processor_ppc_init(); @@ -330,7 +329,6 @@ static void __exit acpi_processor_driver_exit(void) acpi_processor_ppc_exit(); acpi_thermal_cpufreq_exit(); unregister_hotcpu_notifier(&acpi_cpu_notifier); - acpi_processor_syscore_exit(); driver_unregister(&acpi_processor_driver); } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 9ca2b2f..444e374 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -23,6 +23,7 @@ * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#define pr_fmt(fmt) "ACPI: " fmt #include <linux/module.h> #include <linux/acpi.h> @@ -30,7 +31,6 @@ #include <linux/sched.h> /* need_resched() */ #include <linux/tick.h> #include <linux/cpuidle.h> -#include <linux/syscore_ops.h> #include <acpi/processor.h> /* @@ -43,8 +43,6 @@ #include <asm/apic.h> #endif -#define PREFIX "ACPI: " - #define ACPI_PROCESSOR_CLASS "processor" #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_idle"); @@ -81,9 +79,9 @@ static int set_max_cstate(const struct dmi_system_id *id) if (max_cstate > ACPI_PROCESSOR_MAX_POWER) return 0; - printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate." - " Override with \"processor.max_cstate=%d\"\n", id->ident, - (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1); + pr_notice("%s detected - limiting to C%ld max_cstate." + " Override with \"processor.max_cstate=%d\"\n", id->ident, + (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1); max_cstate = (long)id->driver_data; @@ -194,42 +192,6 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr, #endif -#ifdef CONFIG_PM_SLEEP -static u32 saved_bm_rld; - -static int acpi_processor_suspend(void) -{ - acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld); - return 0; -} - -static void acpi_processor_resume(void) -{ - u32 resumed_bm_rld = 0; - - acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld); - if (resumed_bm_rld == saved_bm_rld) - return; - - acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld); -} - -static struct syscore_ops acpi_processor_syscore_ops = { - .suspend = acpi_processor_suspend, - .resume = acpi_processor_resume, -}; - -void acpi_processor_syscore_init(void) -{ - register_syscore_ops(&acpi_processor_syscore_ops); -} - -void acpi_processor_syscore_exit(void) -{ - unregister_syscore_ops(&acpi_processor_syscore_ops); -} -#endif /* CONFIG_PM_SLEEP */ - #if defined(CONFIG_X86) static void tsc_check_state(int state) { @@ -351,7 +313,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) /* There must be at least 2 elements */ if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { - printk(KERN_ERR PREFIX "not enough elements in _CST\n"); + pr_err("not enough elements in _CST\n"); ret = -EFAULT; goto end; } @@ -360,7 +322,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) /* Validate number of power states. */ if (count < 1 || count != cst->package.count - 1) { - printk(KERN_ERR PREFIX "count given by _CST is not valid\n"); + pr_err("count given by _CST is not valid\n"); ret = -EFAULT; goto end; } @@ -469,11 +431,9 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1) */ if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) { - printk(KERN_WARNING - "Limiting number of power states to max (%d)\n", - ACPI_PROCESSOR_MAX_POWER); - printk(KERN_WARNING - "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); + pr_warn("Limiting number of power states to max (%d)\n", + ACPI_PROCESSOR_MAX_POWER); + pr_warn("Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); break; } } @@ -1097,8 +1057,8 @@ int acpi_processor_power_init(struct acpi_processor *pr) retval = cpuidle_register_driver(&acpi_idle_driver); if (retval) return retval; - printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n", - acpi_idle_driver.name); + pr_debug("%s registered with cpuidle\n", + acpi_idle_driver.name); } dev = kzalloc(sizeof(*dev), GFP_KERNEL); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 407a376..5f28cf7 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1930,6 +1930,7 @@ int __init acpi_scan_init(void) acpi_memory_hotplug_init(); acpi_pnp_init(); acpi_int340x_thermal_init(); + acpi_amba_init(); acpi_scan_add_handler(&generic_device_handler); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 9cb9752..fbfcce3 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -19,6 +19,7 @@ #include <linux/reboot.h> #include <linux/acpi.h> #include <linux/module.h> +#include <linux/syscore_ops.h> #include <asm/io.h> #include <trace/events/power.h> @@ -677,6 +678,39 @@ static void acpi_sleep_suspend_setup(void) static inline void acpi_sleep_suspend_setup(void) {} #endif /* !CONFIG_SUSPEND */ +#ifdef CONFIG_PM_SLEEP +static u32 saved_bm_rld; + +static int acpi_save_bm_rld(void) +{ + acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld); + return 0; +} + +static void acpi_restore_bm_rld(void) +{ + u32 resumed_bm_rld = 0; + + acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld); + if (resumed_bm_rld == saved_bm_rld) + return; + + acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld); +} + +static struct syscore_ops acpi_sleep_syscore_ops = { + .suspend = acpi_save_bm_rld, + .resume = acpi_restore_bm_rld, +}; + +void acpi_sleep_syscore_init(void) +{ + register_syscore_ops(&acpi_sleep_syscore_ops); +} +#else +static inline void acpi_sleep_syscore_init(void) {} +#endif /* CONFIG_PM_SLEEP */ + #ifdef CONFIG_HIBERNATION static unsigned long s4_hardware_signature; static struct acpi_table_facs *facs; @@ -839,6 +873,7 @@ int __init acpi_sleep_init(void) sleep_states[ACPI_STATE_S0] = 1; + acpi_sleep_syscore_init(); acpi_sleep_suspend_setup(); acpi_sleep_hibernate_setup(); diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 6c0f079..f49c024 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -32,6 +32,7 @@ #include <linux/errno.h> #include <linux/acpi.h> #include <linux/bootmem.h> +#include "internal.h" #define ACPI_MAX_TABLES 128 @@ -456,6 +457,7 @@ int __init acpi_table_init(void) status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0); if (ACPI_FAILURE(status)) return -EINVAL; + acpi_initrd_initialize_tables(); check_multiple_madt(); return 0; @@ -484,3 +486,13 @@ static int __init acpi_force_table_verification_setup(char *s) } early_param("acpi_force_table_verification", acpi_force_table_verification_setup); + +static int __init acpi_force_32bit_fadt_addr(char *s) +{ + pr_info("Forcing 32 Bit FADT addresses\n"); + acpi_gbl_use32_bit_fadt_addresses = TRUE; + + return 0; +} + +early_param("acpi_force_32bit_fadt_addr", acpi_force_32bit_fadt_addr); diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index f2f9873..f12a724 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -201,10 +201,6 @@ acpi_extract_package(union acpi_object *package, u8 **pointer = NULL; union acpi_object *element = &(package->package.elements[i]); - if (!element) { - return AE_BAD_DATA; - } - switch (element->type) { case ACPI_TYPE_INTEGER: diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 301b785..56705b5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -104,6 +104,7 @@ static void genpd_sd_counter_inc(struct generic_pm_domain *genpd) static int genpd_power_on(struct generic_pm_domain *genpd, bool timed) { + unsigned int state_idx = genpd->state_idx; ktime_t time_start; s64 elapsed_ns; int ret; @@ -120,10 +121,10 @@ static int genpd_power_on(struct generic_pm_domain *genpd, bool timed) return ret; elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); - if (elapsed_ns <= genpd->power_on_latency_ns) + if (elapsed_ns <= genpd->states[state_idx].power_on_latency_ns) return ret; - genpd->power_on_latency_ns = elapsed_ns; + genpd->states[state_idx].power_on_latency_ns = elapsed_ns; genpd->max_off_time_changed = true; pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n", genpd->name, "on", elapsed_ns); @@ -133,6 +134,7 @@ static int genpd_power_on(struct generic_pm_domain *genpd, bool timed) static int genpd_power_off(struct generic_pm_domain *genpd, bool timed) { + unsigned int state_idx = genpd->state_idx; ktime_t time_start; s64 elapsed_ns; int ret; @@ -149,10 +151,10 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed) return ret; elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); - if (elapsed_ns <= genpd->power_off_latency_ns) + if (elapsed_ns <= genpd->states[state_idx].power_off_latency_ns) return ret; - genpd->power_off_latency_ns = elapsed_ns; + genpd->states[state_idx].power_off_latency_ns = elapsed_ns; genpd->max_off_time_changed = true; pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n", genpd->name, "off", elapsed_ns); @@ -485,8 +487,13 @@ static int pm_genpd_runtime_resume(struct device *dev) if (timed && runtime_pm) time_start = ktime_get(); - genpd_start_dev(genpd, dev); - genpd_restore_dev(genpd, dev); + ret = genpd_start_dev(genpd, dev); + if (ret) + goto err_poweroff; + + ret = genpd_restore_dev(genpd, dev); + if (ret) + goto err_stop; /* Update resume latency value if the measured time exceeds it. */ if (timed && runtime_pm) { @@ -501,6 +508,17 @@ static int pm_genpd_runtime_resume(struct device *dev) } return 0; + +err_stop: + genpd_stop_dev(genpd, dev); +err_poweroff: + if (!dev->power.irq_safe) { + mutex_lock(&genpd->lock); + genpd_poweroff(genpd, 0); + mutex_unlock(&genpd->lock); + } + + return ret; } static bool pd_ignore_unused; @@ -585,6 +603,8 @@ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd, || atomic_read(&genpd->sd_count) > 0) return; + /* Choose the deepest state when suspending */ + genpd->state_idx = genpd->state_count - 1; genpd_power_off(genpd, timed); genpd->status = GPD_STATE_POWER_OFF; @@ -1378,7 +1398,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, mutex_lock(&subdomain->lock); mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING); - if (!list_empty(&subdomain->slave_links) || subdomain->device_count) { + if (!list_empty(&subdomain->master_links) || subdomain->device_count) { pr_warn("%s: unable to remove subdomain %s\n", genpd->name, subdomain->name); ret = -EBUSY; @@ -1508,6 +1528,20 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->dev_ops.start = pm_clk_resume; } + if (genpd->state_idx >= GENPD_MAX_NUM_STATES) { + pr_warn("Initial state index out of bounds.\n"); + genpd->state_idx = GENPD_MAX_NUM_STATES - 1; + } + + if (genpd->state_count > GENPD_MAX_NUM_STATES) { + pr_warn("Limiting states to %d\n", GENPD_MAX_NUM_STATES); + genpd->state_count = GENPD_MAX_NUM_STATES; + } + + /* Use only one "off" state if there were no states declared */ + if (genpd->state_count == 0) + genpd->state_count = 1; + mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); @@ -1668,6 +1702,9 @@ struct generic_pm_domain *of_genpd_get_from_provider( struct generic_pm_domain *genpd = ERR_PTR(-ENOENT); struct of_genpd_provider *provider; + if (!genpdspec) + return ERR_PTR(-EINVAL); + mutex_lock(&of_genpd_mutex); /* Check if we have such a provider in our array */ @@ -1864,6 +1901,7 @@ static int pm_genpd_summary_one(struct seq_file *s, struct pm_domain_data *pm_data; const char *kobj_path; struct gpd_link *link; + char state[16]; int ret; ret = mutex_lock_interruptible(&genpd->lock); @@ -1872,7 +1910,13 @@ static int pm_genpd_summary_one(struct seq_file *s, if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup))) goto exit; - seq_printf(s, "%-30s %-15s ", genpd->name, status_lookup[genpd->status]); + if (genpd->status == GPD_STATE_POWER_OFF) + snprintf(state, sizeof(state), "%s-%u", + status_lookup[genpd->status], genpd->state_idx); + else + snprintf(state, sizeof(state), "%s", + status_lookup[genpd->status]); + seq_printf(s, "%-30s %-15s ", genpd->name, state); /* * Modifications on the list require holding locks on both diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 1e937ac..00a5436 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -98,7 +98,8 @@ static bool default_stop_ok(struct device *dev) * * This routine must be executed under the PM domain's lock. */ -static bool default_power_down_ok(struct dev_pm_domain *pd) +static bool __default_power_down_ok(struct dev_pm_domain *pd, + unsigned int state) { struct generic_pm_domain *genpd = pd_to_genpd(pd); struct gpd_link *link; @@ -106,27 +107,9 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) s64 min_off_time_ns; s64 off_on_time_ns; - if (genpd->max_off_time_changed) { - struct gpd_link *link; - - /* - * We have to invalidate the cached results for the masters, so - * use the observation that default_power_down_ok() is not - * going to be called for any master until this instance - * returns. - */ - list_for_each_entry(link, &genpd->slave_links, slave_node) - link->master->max_off_time_changed = true; - - genpd->max_off_time_changed = false; - genpd->cached_power_down_ok = false; - genpd->max_off_time_ns = -1; - } else { - return genpd->cached_power_down_ok; - } + off_on_time_ns = genpd->states[state].power_off_latency_ns + + genpd->states[state].power_on_latency_ns; - off_on_time_ns = genpd->power_off_latency_ns + - genpd->power_on_latency_ns; min_off_time_ns = -1; /* @@ -186,8 +169,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) min_off_time_ns = constraint_ns; } - genpd->cached_power_down_ok = true; - /* * If the computed minimum device off time is negative, there are no * latency constraints, so the domain can spend arbitrary time in the @@ -201,10 +182,45 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) * time and the time needed to turn the domain on is the maximum * theoretical time this domain can spend in the "off" state. */ - genpd->max_off_time_ns = min_off_time_ns - genpd->power_on_latency_ns; + genpd->max_off_time_ns = min_off_time_ns - + genpd->states[state].power_on_latency_ns; return true; } +static bool default_power_down_ok(struct dev_pm_domain *pd) +{ + struct generic_pm_domain *genpd = pd_to_genpd(pd); + struct gpd_link *link; + + if (!genpd->max_off_time_changed) + return genpd->cached_power_down_ok; + + /* + * We have to invalidate the cached results for the masters, so + * use the observation that default_power_down_ok() is not + * going to be called for any master until this instance + * returns. + */ + list_for_each_entry(link, &genpd->slave_links, slave_node) + link->master->max_off_time_changed = true; + + genpd->max_off_time_ns = -1; + genpd->max_off_time_changed = false; + genpd->cached_power_down_ok = true; + genpd->state_idx = genpd->state_count - 1; + + /* Find a state to power down to, starting from the deepest. */ + while (!__default_power_down_ok(pd, genpd->state_idx)) { + if (genpd->state_idx == 0) { + genpd->cached_power_down_ok = false; + break; + } + genpd->state_idx--; + } + + return genpd->cached_power_down_ok; +} + static bool always_on_power_down_ok(struct dev_pm_domain *domain) { return false; diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index cf351d3..433b600 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -13,50 +13,52 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/clk.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/device.h> #include <linux/of.h> #include <linux/export.h> +#include <linux/regulator/consumer.h> #include "opp.h" /* - * The root of the list of all devices. All device_opp structures branch off - * from here, with each device_opp containing the list of opp it supports in + * The root of the list of all opp-tables. All opp_table structures branch off + * from here, with each opp_table containing the list of opps it supports in * various states of availability. */ -static LIST_HEAD(dev_opp_list); +static LIST_HEAD(opp_tables); /* Lock to allow exclusive modification to the device and opp lists */ -DEFINE_MUTEX(dev_opp_list_lock); +DEFINE_MUTEX(opp_table_lock); #define opp_rcu_lockdep_assert() \ do { \ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ - !lockdep_is_held(&dev_opp_list_lock), \ - "Missing rcu_read_lock() or " \ - "dev_opp_list_lock protection"); \ + !lockdep_is_held(&opp_table_lock), \ + "Missing rcu_read_lock() or " \ + "opp_table_lock protection"); \ } while (0) -static struct device_list_opp *_find_list_dev(const struct device *dev, - struct device_opp *dev_opp) +static struct opp_device *_find_opp_dev(const struct device *dev, + struct opp_table *opp_table) { - struct device_list_opp *list_dev; + struct opp_device *opp_dev; - list_for_each_entry(list_dev, &dev_opp->dev_list, node) - if (list_dev->dev == dev) - return list_dev; + list_for_each_entry(opp_dev, &opp_table->dev_list, node) + if (opp_dev->dev == dev) + return opp_dev; return NULL; } -static struct device_opp *_managed_opp(const struct device_node *np) +static struct opp_table *_managed_opp(const struct device_node *np) { - struct device_opp *dev_opp; + struct opp_table *opp_table; - list_for_each_entry_rcu(dev_opp, &dev_opp_list, node) { - if (dev_opp->np == np) { + list_for_each_entry_rcu(opp_table, &opp_tables, node) { + if (opp_table->np == np) { /* * Multiple devices can point to the same OPP table and * so will have same node-pointer, np. @@ -64,7 +66,7 @@ static struct device_opp *_managed_opp(const struct device_node *np) * But the OPPs will be considered as shared only if the * OPP table contains a "opp-shared" property. */ - return dev_opp->shared_opp ? dev_opp : NULL; + return opp_table->shared_opp ? opp_table : NULL; } } @@ -72,24 +74,24 @@ static struct device_opp *_managed_opp(const struct device_node *np) } /** - * _find_device_opp() - find device_opp struct using device pointer - * @dev: device pointer used to lookup device OPPs + * _find_opp_table() - find opp_table struct using device pointer + * @dev: device pointer used to lookup OPP table * - * Search list of device OPPs for one containing matching device. Does a RCU - * reader operation to grab the pointer needed. + * Search OPP table for one containing matching device. Does a RCU reader + * operation to grab the pointer needed. * - * Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or + * Return: pointer to 'struct opp_table' if found, otherwise -ENODEV or * -EINVAL based on type of error. * * Locking: For readers, this function must be called under rcu_read_lock(). - * device_opp is a RCU protected pointer, which means that device_opp is valid + * opp_table is a RCU protected pointer, which means that opp_table is valid * as long as we are under RCU lock. * - * For Writers, this function must be called with dev_opp_list_lock held. + * For Writers, this function must be called with opp_table_lock held. */ -struct device_opp *_find_device_opp(struct device *dev) +struct opp_table *_find_opp_table(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; opp_rcu_lockdep_assert(); @@ -98,9 +100,9 @@ struct device_opp *_find_device_opp(struct device *dev) return ERR_PTR(-EINVAL); } - list_for_each_entry_rcu(dev_opp, &dev_opp_list, node) - if (_find_list_dev(dev, dev_opp)) - return dev_opp; + list_for_each_entry_rcu(opp_table, &opp_tables, node) + if (_find_opp_dev(dev, opp_table)) + return opp_table; return ERR_PTR(-ENODEV); } @@ -213,16 +215,16 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_is_turbo); */ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; unsigned long clock_latency_ns; rcu_read_lock(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) clock_latency_ns = 0; else - clock_latency_ns = dev_opp->clock_latency_ns_max; + clock_latency_ns = opp_table->clock_latency_ns_max; rcu_read_unlock(); return clock_latency_ns; @@ -230,6 +232,82 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency); /** + * dev_pm_opp_get_max_volt_latency() - Get max voltage latency in nanoseconds + * @dev: device for which we do this operation + * + * Return: This function returns the max voltage latency in nanoseconds. + * + * Locking: This function takes rcu_read_lock(). + */ +unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) +{ + struct opp_table *opp_table; + struct dev_pm_opp *opp; + struct regulator *reg; + unsigned long latency_ns = 0; + unsigned long min_uV = ~0, max_uV = 0; + int ret; + + rcu_read_lock(); + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + rcu_read_unlock(); + return 0; + } + + reg = opp_table->regulator; + if (IS_ERR(reg)) { + /* Regulator may not be required for device */ + if (reg) + dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__, + PTR_ERR(reg)); + rcu_read_unlock(); + return 0; + } + + list_for_each_entry_rcu(opp, &opp_table->opp_list, node) { + if (!opp->available) + continue; + + if (opp->u_volt_min < min_uV) + min_uV = opp->u_volt_min; + if (opp->u_volt_max > max_uV) + max_uV = opp->u_volt_max; + } + + rcu_read_unlock(); + + /* + * The caller needs to ensure that opp_table (and hence the regulator) + * isn't freed, while we are executing this routine. + */ + ret = regulator_set_voltage_time(reg, min_uV, max_uV); + if (ret > 0) + latency_ns = ret * 1000; + + return latency_ns; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency); + +/** + * dev_pm_opp_get_max_transition_latency() - Get max transition latency in + * nanoseconds + * @dev: device for which we do this operation + * + * Return: This function returns the max transition latency, in nanoseconds, to + * switch from one OPP to other. + * + * Locking: This function takes rcu_read_lock(). + */ +unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev) +{ + return dev_pm_opp_get_max_volt_latency(dev) + + dev_pm_opp_get_max_clock_latency(dev); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency); + +/** * dev_pm_opp_get_suspend_opp() - Get suspend opp * @dev: device for which we do this operation * @@ -244,21 +322,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency); */ struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; opp_rcu_lockdep_assert(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp) || !dev_opp->suspend_opp || - !dev_opp->suspend_opp->available) + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table) || !opp_table->suspend_opp || + !opp_table->suspend_opp->available) return NULL; - return dev_opp->suspend_opp; + return opp_table->suspend_opp; } EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp); /** - * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list + * dev_pm_opp_get_opp_count() - Get number of opps available in the opp table * @dev: device for which we do this operation * * Return: This function returns the number of available opps if there are any, @@ -268,21 +346,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp); */ int dev_pm_opp_get_opp_count(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *temp_opp; int count = 0; rcu_read_lock(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - count = PTR_ERR(dev_opp); - dev_err(dev, "%s: device OPP not found (%d)\n", + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + count = PTR_ERR(opp_table); + dev_err(dev, "%s: OPP table not found (%d)\n", __func__, count); goto out_unlock; } - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { if (temp_opp->available) count++; } @@ -299,7 +377,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count); * @freq: frequency to search for * @available: true/false - match for available opp * - * Return: Searches for exact match in the opp list and returns pointer to the + * Return: Searches for exact match in the opp table and returns pointer to the * matching opp if found, else returns ERR_PTR in case of error and should * be handled using IS_ERR. Error return values can be: * EINVAL: for bad pointer @@ -323,19 +401,20 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); opp_rcu_lockdep_assert(); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - int r = PTR_ERR(dev_opp); - dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r); + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + int r = PTR_ERR(opp_table); + + dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r); return ERR_PTR(r); } - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { if (temp_opp->available == available && temp_opp->rate == freq) { opp = temp_opp; @@ -371,7 +450,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact); struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); opp_rcu_lockdep_assert(); @@ -381,11 +460,11 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, return ERR_PTR(-EINVAL); } - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) - return ERR_CAST(dev_opp); + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { if (temp_opp->available && temp_opp->rate >= *freq) { opp = temp_opp; *freq = opp->rate; @@ -421,7 +500,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil); struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); opp_rcu_lockdep_assert(); @@ -431,11 +510,11 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, return ERR_PTR(-EINVAL); } - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) - return ERR_CAST(dev_opp); + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { if (temp_opp->available) { /* go to the next node, before choosing prev */ if (temp_opp->rate > *freq) @@ -451,130 +530,343 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); -/* List-dev Helpers */ -static void _kfree_list_dev_rcu(struct rcu_head *head) +/* + * The caller needs to ensure that opp_table (and hence the clk) isn't freed, + * while clk returned here is used. + */ +static struct clk *_get_opp_clk(struct device *dev) +{ + struct opp_table *opp_table; + struct clk *clk; + + rcu_read_lock(); + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "%s: device opp doesn't exist\n", __func__); + clk = ERR_CAST(opp_table); + goto unlock; + } + + clk = opp_table->clk; + if (IS_ERR(clk)) + dev_err(dev, "%s: No clock available for the device\n", + __func__); + +unlock: + rcu_read_unlock(); + return clk; +} + +static int _set_opp_voltage(struct device *dev, struct regulator *reg, + unsigned long u_volt, unsigned long u_volt_min, + unsigned long u_volt_max) +{ + int ret; + + /* Regulator not available for device */ + if (IS_ERR(reg)) { + dev_dbg(dev, "%s: regulator not available: %ld\n", __func__, + PTR_ERR(reg)); + return 0; + } + + dev_dbg(dev, "%s: voltages (mV): %lu %lu %lu\n", __func__, u_volt_min, + u_volt, u_volt_max); + + ret = regulator_set_voltage_triplet(reg, u_volt_min, u_volt, + u_volt_max); + if (ret) + dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n", + __func__, u_volt_min, u_volt, u_volt_max, ret); + + return ret; +} + +/** + * dev_pm_opp_set_rate() - Configure new OPP based on frequency + * @dev: device for which we do this operation + * @target_freq: frequency to achieve + * + * This configures the power-supplies and clock source to the levels specified + * by the OPP corresponding to the target_freq. + * + * Locking: This function takes rcu_read_lock(). + */ +int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) +{ + struct opp_table *opp_table; + struct dev_pm_opp *old_opp, *opp; + struct regulator *reg; + struct clk *clk; + unsigned long freq, old_freq; + unsigned long u_volt, u_volt_min, u_volt_max; + unsigned long ou_volt, ou_volt_min, ou_volt_max; + int ret; + + if (unlikely(!target_freq)) { + dev_err(dev, "%s: Invalid target frequency %lu\n", __func__, + target_freq); + return -EINVAL; + } + + clk = _get_opp_clk(dev); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + freq = clk_round_rate(clk, target_freq); + if ((long)freq <= 0) + freq = target_freq; + + old_freq = clk_get_rate(clk); + + /* Return early if nothing to do */ + if (old_freq == freq) { + dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n", + __func__, freq); + return 0; + } + + rcu_read_lock(); + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "%s: device opp doesn't exist\n", __func__); + rcu_read_unlock(); + return PTR_ERR(opp_table); + } + + old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq); + if (!IS_ERR(old_opp)) { + ou_volt = old_opp->u_volt; + ou_volt_min = old_opp->u_volt_min; + ou_volt_max = old_opp->u_volt_max; + } else { + dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n", + __func__, old_freq, PTR_ERR(old_opp)); + } + + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n", + __func__, freq, ret); + rcu_read_unlock(); + return ret; + } + + u_volt = opp->u_volt; + u_volt_min = opp->u_volt_min; + u_volt_max = opp->u_volt_max; + + reg = opp_table->regulator; + + rcu_read_unlock(); + + /* Scaling up? Scale voltage before frequency */ + if (freq > old_freq) { + ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min, + u_volt_max); + if (ret) + goto restore_voltage; + } + + /* Change frequency */ + + dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n", + __func__, old_freq, freq); + + ret = clk_set_rate(clk, freq); + if (ret) { + dev_err(dev, "%s: failed to set clock rate: %d\n", __func__, + ret); + goto restore_voltage; + } + + /* Scaling down? Scale voltage after frequency */ + if (freq < old_freq) { + ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min, + u_volt_max); + if (ret) + goto restore_freq; + } + + return 0; + +restore_freq: + if (clk_set_rate(clk, old_freq)) + dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n", + __func__, old_freq); +restore_voltage: + /* This shouldn't harm even if the voltages weren't updated earlier */ + if (!IS_ERR(old_opp)) + _set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate); + +/* OPP-dev Helpers */ +static void _kfree_opp_dev_rcu(struct rcu_head *head) { - struct device_list_opp *list_dev; + struct opp_device *opp_dev; - list_dev = container_of(head, struct device_list_opp, rcu_head); - kfree_rcu(list_dev, rcu_head); + opp_dev = container_of(head, struct opp_device, rcu_head); + kfree_rcu(opp_dev, rcu_head); } -static void _remove_list_dev(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static void _remove_opp_dev(struct opp_device *opp_dev, + struct opp_table *opp_table) { - opp_debug_unregister(list_dev, dev_opp); - list_del(&list_dev->node); - call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head, - _kfree_list_dev_rcu); + opp_debug_unregister(opp_dev, opp_table); + list_del(&opp_dev->node); + call_srcu(&opp_table->srcu_head.srcu, &opp_dev->rcu_head, + _kfree_opp_dev_rcu); } -struct device_list_opp *_add_list_dev(const struct device *dev, - struct device_opp *dev_opp) +struct opp_device *_add_opp_dev(const struct device *dev, + struct opp_table *opp_table) { - struct device_list_opp *list_dev; + struct opp_device *opp_dev; int ret; - list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL); - if (!list_dev) + opp_dev = kzalloc(sizeof(*opp_dev), GFP_KERNEL); + if (!opp_dev) return NULL; - /* Initialize list-dev */ - list_dev->dev = dev; - list_add_rcu(&list_dev->node, &dev_opp->dev_list); + /* Initialize opp-dev */ + opp_dev->dev = dev; + list_add_rcu(&opp_dev->node, &opp_table->dev_list); - /* Create debugfs entries for the dev_opp */ - ret = opp_debug_register(list_dev, dev_opp); + /* Create debugfs entries for the opp_table */ + ret = opp_debug_register(opp_dev, opp_table); if (ret) dev_err(dev, "%s: Failed to register opp debugfs (%d)\n", __func__, ret); - return list_dev; + return opp_dev; } /** - * _add_device_opp() - Find device OPP table or allocate a new one + * _add_opp_table() - Find OPP table or allocate a new one * @dev: device for which we do this operation * * It tries to find an existing table first, if it couldn't find one, it * allocates a new OPP table and returns that. * - * Return: valid device_opp pointer if success, else NULL. + * Return: valid opp_table pointer if success, else NULL. */ -static struct device_opp *_add_device_opp(struct device *dev) +static struct opp_table *_add_opp_table(struct device *dev) { - struct device_opp *dev_opp; - struct device_list_opp *list_dev; + struct opp_table *opp_table; + struct opp_device *opp_dev; + struct device_node *np; + int ret; - /* Check for existing list for 'dev' first */ - dev_opp = _find_device_opp(dev); - if (!IS_ERR(dev_opp)) - return dev_opp; + /* Check for existing table for 'dev' first */ + opp_table = _find_opp_table(dev); + if (!IS_ERR(opp_table)) + return opp_table; /* - * Allocate a new device OPP table. In the infrequent case where a new + * Allocate a new OPP table. In the infrequent case where a new * device is needed to be added, we pay this penalty. */ - dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL); - if (!dev_opp) + opp_table = kzalloc(sizeof(*opp_table), GFP_KERNEL); + if (!opp_table) return NULL; - INIT_LIST_HEAD(&dev_opp->dev_list); + INIT_LIST_HEAD(&opp_table->dev_list); - list_dev = _add_list_dev(dev, dev_opp); - if (!list_dev) { - kfree(dev_opp); + opp_dev = _add_opp_dev(dev, opp_table); + if (!opp_dev) { + kfree(opp_table); return NULL; } - srcu_init_notifier_head(&dev_opp->srcu_head); - INIT_LIST_HEAD(&dev_opp->opp_list); + /* + * Only required for backward compatibility with v1 bindings, but isn't + * harmful for other cases. And so we do it unconditionally. + */ + np = of_node_get(dev->of_node); + if (np) { + u32 val; + + if (!of_property_read_u32(np, "clock-latency", &val)) + opp_table->clock_latency_ns_max = val; + of_property_read_u32(np, "voltage-tolerance", + &opp_table->voltage_tolerance_v1); + of_node_put(np); + } + + /* Set regulator to a non-NULL error value */ + opp_table->regulator = ERR_PTR(-ENXIO); + + /* Find clk for the device */ + opp_table->clk = clk_get(dev, NULL); + if (IS_ERR(opp_table->clk)) { + ret = PTR_ERR(opp_table->clk); + if (ret != -EPROBE_DEFER) + dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, + ret); + } - /* Secure the device list modification */ - list_add_rcu(&dev_opp->node, &dev_opp_list); - return dev_opp; + srcu_init_notifier_head(&opp_table->srcu_head); + INIT_LIST_HEAD(&opp_table->opp_list); + + /* Secure the device table modification */ + list_add_rcu(&opp_table->node, &opp_tables); + return opp_table; } /** - * _kfree_device_rcu() - Free device_opp RCU handler + * _kfree_device_rcu() - Free opp_table RCU handler * @head: RCU head */ static void _kfree_device_rcu(struct rcu_head *head) { - struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head); + struct opp_table *opp_table = container_of(head, struct opp_table, + rcu_head); - kfree_rcu(device_opp, rcu_head); + kfree_rcu(opp_table, rcu_head); } /** - * _remove_device_opp() - Removes a device OPP table - * @dev_opp: device OPP table to be removed. + * _remove_opp_table() - Removes a OPP table + * @opp_table: OPP table to be removed. * - * Removes/frees device OPP table it it doesn't contain any OPPs. + * Removes/frees OPP table if it doesn't contain any OPPs. */ -static void _remove_device_opp(struct device_opp *dev_opp) +static void _remove_opp_table(struct opp_table *opp_table) { - struct device_list_opp *list_dev; + struct opp_device *opp_dev; + + if (!list_empty(&opp_table->opp_list)) + return; - if (!list_empty(&dev_opp->opp_list)) + if (opp_table->supported_hw) return; - if (dev_opp->supported_hw) + if (opp_table->prop_name) return; - if (dev_opp->prop_name) + if (!IS_ERR(opp_table->regulator)) return; - list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp, - node); + /* Release clk */ + if (!IS_ERR(opp_table->clk)) + clk_put(opp_table->clk); - _remove_list_dev(list_dev, dev_opp); + opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device, + node); + + _remove_opp_dev(opp_dev, opp_table); /* dev_list must be empty now */ - WARN_ON(!list_empty(&dev_opp->dev_list)); + WARN_ON(!list_empty(&opp_table->dev_list)); - list_del_rcu(&dev_opp->node); - call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head, + list_del_rcu(&opp_table->node); + call_srcu(&opp_table->srcu_head.srcu, &opp_table->rcu_head, _kfree_device_rcu); } @@ -591,17 +883,17 @@ static void _kfree_opp_rcu(struct rcu_head *head) /** * _opp_remove() - Remove an OPP from a table definition - * @dev_opp: points back to the device_opp struct this opp belongs to + * @opp_table: points back to the opp_table struct this opp belongs to * @opp: pointer to the OPP to remove * @notify: OPP_EVENT_REMOVE notification should be sent or not * - * This function removes an opp definition from the opp list. + * This function removes an opp definition from the opp table. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * It is assumed that the caller holds required mutex for an RCU updater * strategy. */ -static void _opp_remove(struct device_opp *dev_opp, +static void _opp_remove(struct opp_table *opp_table, struct dev_pm_opp *opp, bool notify) { /* @@ -609,22 +901,23 @@ static void _opp_remove(struct device_opp *dev_opp, * frequency/voltage list. */ if (notify) - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp); + srcu_notifier_call_chain(&opp_table->srcu_head, + OPP_EVENT_REMOVE, opp); opp_debug_remove_one(opp); list_del_rcu(&opp->node); - call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); + call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); - _remove_device_opp(dev_opp); + _remove_opp_table(opp_table); } /** - * dev_pm_opp_remove() - Remove an OPP from OPP list + * dev_pm_opp_remove() - Remove an OPP from OPP table * @dev: device for which we do this operation * @freq: OPP to remove with matching 'freq' * - * This function removes an opp from the opp list. + * This function removes an opp from the opp table. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -633,17 +926,17 @@ static void _opp_remove(struct device_opp *dev_opp, void dev_pm_opp_remove(struct device *dev, unsigned long freq) { struct dev_pm_opp *opp; - struct device_opp *dev_opp; + struct opp_table *opp_table; bool found = false; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) goto unlock; - list_for_each_entry(opp, &dev_opp->opp_list, node) { + list_for_each_entry(opp, &opp_table->opp_list, node) { if (opp->rate == freq) { found = true; break; @@ -656,14 +949,14 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq) goto unlock; } - _opp_remove(dev_opp, opp, true); + _opp_remove(opp_table, opp, true); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_remove); static struct dev_pm_opp *_allocate_opp(struct device *dev, - struct device_opp **dev_opp) + struct opp_table **opp_table) { struct dev_pm_opp *opp; @@ -674,8 +967,8 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev, INIT_LIST_HEAD(&opp->node); - *dev_opp = _add_device_opp(dev); - if (!*dev_opp) { + *opp_table = _add_opp_table(dev); + if (!*opp_table) { kfree(opp); return NULL; } @@ -683,22 +976,38 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev, return opp; } +static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, + struct opp_table *opp_table) +{ + struct regulator *reg = opp_table->regulator; + + if (!IS_ERR(reg) && + !regulator_is_supported_voltage(reg, opp->u_volt_min, + opp->u_volt_max)) { + pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n", + __func__, opp->u_volt_min, opp->u_volt_max); + return false; + } + + return true; +} + static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, - struct device_opp *dev_opp) + struct opp_table *opp_table) { struct dev_pm_opp *opp; - struct list_head *head = &dev_opp->opp_list; + struct list_head *head = &opp_table->opp_list; int ret; /* * Insert new OPP in order of increasing frequency and discard if * already present. * - * Need to use &dev_opp->opp_list in the condition part of the 'for' + * Need to use &opp_table->opp_list in the condition part of the 'for' * loop, don't replace it with head otherwise it will become an infinite * loop. */ - list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) { + list_for_each_entry_rcu(opp, &opp_table->opp_list, node) { if (new_opp->rate > opp->rate) { head = &opp->node; continue; @@ -716,14 +1025,20 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, 0 : -EEXIST; } - new_opp->dev_opp = dev_opp; + new_opp->opp_table = opp_table; list_add_rcu(&new_opp->node, head); - ret = opp_debug_create_one(new_opp, dev_opp); + ret = opp_debug_create_one(new_opp, opp_table); if (ret) dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n", __func__, ret); + if (!_opp_supported_by_regulators(new_opp, opp_table)) { + new_opp->available = false; + dev_warn(dev, "%s: OPP not supported by regulators (%lu)\n", + __func__, new_opp->rate); + } + return 0; } @@ -734,14 +1049,14 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, * @u_volt: Voltage in uVolts for this OPP * @dynamic: Dynamically added OPPs. * - * This function adds an opp definition to the opp list and returns status. + * This function adds an opp definition to the opp table and returns status. * The opp is made available by default and it can be controlled using * dev_pm_opp_enable/disable functions and may be removed by dev_pm_opp_remove. * * NOTE: "dynamic" parameter impacts OPPs added by the dev_pm_opp_of_add_table * and freed by dev_pm_opp_of_remove_table. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -757,14 +1072,15 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, bool dynamic) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *new_opp; + unsigned long tol; int ret; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - new_opp = _allocate_opp(dev, &dev_opp); + new_opp = _allocate_opp(dev, &opp_table); if (!new_opp) { ret = -ENOMEM; goto unlock; @@ -772,33 +1088,36 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, /* populate the opp table */ new_opp->rate = freq; + tol = u_volt * opp_table->voltage_tolerance_v1 / 100; new_opp->u_volt = u_volt; + new_opp->u_volt_min = u_volt - tol; + new_opp->u_volt_max = u_volt + tol; new_opp->available = true; new_opp->dynamic = dynamic; - ret = _opp_add(dev, new_opp, dev_opp); + ret = _opp_add(dev, new_opp, opp_table); if (ret) goto free_opp; - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); /* * Notify the changes in the availability of the operable * frequency/voltage list. */ - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); + srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp); return 0; free_opp: - _opp_remove(dev_opp, new_opp, false); + _opp_remove(opp_table, new_opp, false); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } /* TODO: Support multiple regulators */ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, - struct device_opp *dev_opp) + struct opp_table *opp_table) { u32 microvolt[3] = {0}; u32 val; @@ -807,9 +1126,9 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, char name[NAME_MAX]; /* Search for "opp-microvolt-<name>" */ - if (dev_opp->prop_name) { + if (opp_table->prop_name) { snprintf(name, sizeof(name), "opp-microvolt-%s", - dev_opp->prop_name); + opp_table->prop_name); prop = of_find_property(opp->np, name, NULL); } @@ -844,14 +1163,20 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, } opp->u_volt = microvolt[0]; - opp->u_volt_min = microvolt[1]; - opp->u_volt_max = microvolt[2]; + + if (count == 1) { + opp->u_volt_min = opp->u_volt; + opp->u_volt_max = opp->u_volt; + } else { + opp->u_volt_min = microvolt[1]; + opp->u_volt_max = microvolt[2]; + } /* Search for "opp-microamp-<name>" */ prop = NULL; - if (dev_opp->prop_name) { + if (opp_table->prop_name) { snprintf(name, sizeof(name), "opp-microamp-%s", - dev_opp->prop_name); + opp_table->prop_name); prop = of_find_property(opp->np, name, NULL); } @@ -878,7 +1203,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, * OPPs, which are available for those versions, based on its 'opp-supported-hw' * property. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -887,44 +1212,44 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count) { - struct device_opp *dev_opp; + struct opp_table *opp_table; int ret = 0; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - dev_opp = _add_device_opp(dev); - if (!dev_opp) { + opp_table = _add_opp_table(dev); + if (!opp_table) { ret = -ENOMEM; goto unlock; } - /* Make sure there are no concurrent readers while updating dev_opp */ - WARN_ON(!list_empty(&dev_opp->opp_list)); + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); - /* Do we already have a version hierarchy associated with dev_opp? */ - if (dev_opp->supported_hw) { + /* Do we already have a version hierarchy associated with opp_table? */ + if (opp_table->supported_hw) { dev_err(dev, "%s: Already have supported hardware list\n", __func__); ret = -EBUSY; goto err; } - dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions), + opp_table->supported_hw = kmemdup(versions, count * sizeof(*versions), GFP_KERNEL); - if (!dev_opp->supported_hw) { + if (!opp_table->supported_hw) { ret = -ENOMEM; goto err; } - dev_opp->supported_hw_count = count; - mutex_unlock(&dev_opp_list_lock); + opp_table->supported_hw_count = count; + mutex_unlock(&opp_table_lock); return 0; err: - _remove_device_opp(dev_opp); + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } @@ -932,13 +1257,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw); /** * dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw - * @dev: Device for which supported-hw has to be set. + * @dev: Device for which supported-hw has to be put. * * This is required only for the V2 bindings, and is called for a matching - * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure + * dev_pm_opp_set_supported_hw(). Until this is called, the opp_table structure * will not be freed. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -946,42 +1271,43 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw); */ void dev_pm_opp_put_supported_hw(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - /* Check for existing list for 'dev' first */ - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + /* Check for existing table for 'dev' first */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "Failed to find opp_table: %ld\n", + PTR_ERR(opp_table)); goto unlock; } - /* Make sure there are no concurrent readers while updating dev_opp */ - WARN_ON(!list_empty(&dev_opp->opp_list)); + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); - if (!dev_opp->supported_hw) { + if (!opp_table->supported_hw) { dev_err(dev, "%s: Doesn't have supported hardware list\n", __func__); goto unlock; } - kfree(dev_opp->supported_hw); - dev_opp->supported_hw = NULL; - dev_opp->supported_hw_count = 0; + kfree(opp_table->supported_hw); + opp_table->supported_hw = NULL; + opp_table->supported_hw_count = 0; - /* Try freeing device_opp if this was the last blocking resource */ - _remove_device_opp(dev_opp); + /* Try freeing opp_table if this was the last blocking resource */ + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); /** * dev_pm_opp_set_prop_name() - Set prop-extn name - * @dev: Device for which the regulator has to be set. + * @dev: Device for which the prop-name has to be set. * @name: name to postfix to properties. * * This is required only for the V2 bindings, and it enables a platform to @@ -989,7 +1315,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); * which the extension will apply are opp-microvolt and opp-microamp. OPP core * should postfix the property name with -<name> while looking for them. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -997,42 +1323,42 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); */ int dev_pm_opp_set_prop_name(struct device *dev, const char *name) { - struct device_opp *dev_opp; + struct opp_table *opp_table; int ret = 0; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - dev_opp = _add_device_opp(dev); - if (!dev_opp) { + opp_table = _add_opp_table(dev); + if (!opp_table) { ret = -ENOMEM; goto unlock; } - /* Make sure there are no concurrent readers while updating dev_opp */ - WARN_ON(!list_empty(&dev_opp->opp_list)); + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); - /* Do we already have a prop-name associated with dev_opp? */ - if (dev_opp->prop_name) { + /* Do we already have a prop-name associated with opp_table? */ + if (opp_table->prop_name) { dev_err(dev, "%s: Already have prop-name %s\n", __func__, - dev_opp->prop_name); + opp_table->prop_name); ret = -EBUSY; goto err; } - dev_opp->prop_name = kstrdup(name, GFP_KERNEL); - if (!dev_opp->prop_name) { + opp_table->prop_name = kstrdup(name, GFP_KERNEL); + if (!opp_table->prop_name) { ret = -ENOMEM; goto err; } - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return 0; err: - _remove_device_opp(dev_opp); + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } @@ -1040,13 +1366,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name); /** * dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name - * @dev: Device for which the regulator has to be set. + * @dev: Device for which the prop-name has to be put. * * This is required only for the V2 bindings, and is called for a matching - * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure + * dev_pm_opp_set_prop_name(). Until this is called, the opp_table structure * will not be freed. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1054,45 +1380,154 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name); */ void dev_pm_opp_put_prop_name(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - /* Check for existing list for 'dev' first */ - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + /* Check for existing table for 'dev' first */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "Failed to find opp_table: %ld\n", + PTR_ERR(opp_table)); goto unlock; } - /* Make sure there are no concurrent readers while updating dev_opp */ - WARN_ON(!list_empty(&dev_opp->opp_list)); + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); - if (!dev_opp->prop_name) { + if (!opp_table->prop_name) { dev_err(dev, "%s: Doesn't have a prop-name\n", __func__); goto unlock; } - kfree(dev_opp->prop_name); - dev_opp->prop_name = NULL; + kfree(opp_table->prop_name); + opp_table->prop_name = NULL; - /* Try freeing device_opp if this was the last blocking resource */ - _remove_device_opp(dev_opp); + /* Try freeing opp_table if this was the last blocking resource */ + _remove_opp_table(opp_table); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); -static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, +/** + * dev_pm_opp_set_regulator() - Set regulator name for the device + * @dev: Device for which regulator name is being set. + * @name: Name of the regulator. + * + * In order to support OPP switching, OPP layer needs to know the name of the + * device's regulator, as the core would be required to switch voltages as well. + * + * This must be called before any OPPs are initialized for the device. + * + * Locking: The internal opp_table and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +int dev_pm_opp_set_regulator(struct device *dev, const char *name) +{ + struct opp_table *opp_table; + struct regulator *reg; + int ret; + + mutex_lock(&opp_table_lock); + + opp_table = _add_opp_table(dev); + if (!opp_table) { + ret = -ENOMEM; + goto unlock; + } + + /* This should be called before OPPs are initialized */ + if (WARN_ON(!list_empty(&opp_table->opp_list))) { + ret = -EBUSY; + goto err; + } + + /* Already have a regulator set */ + if (WARN_ON(!IS_ERR(opp_table->regulator))) { + ret = -EBUSY; + goto err; + } + /* Allocate the regulator */ + reg = regulator_get_optional(dev, name); + if (IS_ERR(reg)) { + ret = PTR_ERR(reg); + if (ret != -EPROBE_DEFER) + dev_err(dev, "%s: no regulator (%s) found: %d\n", + __func__, name, ret); + goto err; + } + + opp_table->regulator = reg; + + mutex_unlock(&opp_table_lock); + return 0; + +err: + _remove_opp_table(opp_table); +unlock: + mutex_unlock(&opp_table_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator); + +/** + * dev_pm_opp_put_regulator() - Releases resources blocked for regulator + * @dev: Device for which regulator was set. + * + * Locking: The internal opp_table and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +void dev_pm_opp_put_regulator(struct device *dev) +{ + struct opp_table *opp_table; + + mutex_lock(&opp_table_lock); + + /* Check for existing table for 'dev' first */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "Failed to find opp_table: %ld\n", + PTR_ERR(opp_table)); + goto unlock; + } + + if (IS_ERR(opp_table->regulator)) { + dev_err(dev, "%s: Doesn't have regulator set\n", __func__); + goto unlock; + } + + /* Make sure there are no concurrent readers while updating opp_table */ + WARN_ON(!list_empty(&opp_table->opp_list)); + + regulator_put(opp_table->regulator); + opp_table->regulator = ERR_PTR(-ENXIO); + + /* Try freeing opp_table if this was the last blocking resource */ + _remove_opp_table(opp_table); + +unlock: + mutex_unlock(&opp_table_lock); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator); + +static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, struct device_node *np) { - unsigned int count = dev_opp->supported_hw_count; + unsigned int count = opp_table->supported_hw_count; u32 version; int ret; - if (!dev_opp->supported_hw) + if (!opp_table->supported_hw) return true; while (count--) { @@ -1105,7 +1540,7 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, } /* Both of these are bitwise masks of the versions */ - if (!(version & dev_opp->supported_hw[count])) + if (!(version & opp_table->supported_hw[count])) return false; } @@ -1117,11 +1552,11 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, * @dev: device for which we do this operation * @np: device node * - * This function adds an opp definition to the opp list and returns status. The + * This function adds an opp definition to the opp table and returns status. The * opp can be controlled using dev_pm_opp_enable/disable functions and may be * removed by dev_pm_opp_remove. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1137,16 +1572,16 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, */ static int _opp_add_static_v2(struct device *dev, struct device_node *np) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *new_opp; u64 rate; u32 val; int ret; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - new_opp = _allocate_opp(dev, &dev_opp); + new_opp = _allocate_opp(dev, &opp_table); if (!new_opp) { ret = -ENOMEM; goto unlock; @@ -1159,7 +1594,7 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) } /* Check if the OPP supports hardware's hierarchy of versions or not */ - if (!_opp_is_supported(dev, dev_opp, np)) { + if (!_opp_is_supported(dev, opp_table, np)) { dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); goto free_opp; } @@ -1179,30 +1614,30 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) if (!of_property_read_u32(np, "clock-latency-ns", &val)) new_opp->clock_latency_ns = val; - ret = opp_parse_supplies(new_opp, dev, dev_opp); + ret = opp_parse_supplies(new_opp, dev, opp_table); if (ret) goto free_opp; - ret = _opp_add(dev, new_opp, dev_opp); + ret = _opp_add(dev, new_opp, opp_table); if (ret) goto free_opp; /* OPP to select on device suspend */ if (of_property_read_bool(np, "opp-suspend")) { - if (dev_opp->suspend_opp) { + if (opp_table->suspend_opp) { dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n", - __func__, dev_opp->suspend_opp->rate, + __func__, opp_table->suspend_opp->rate, new_opp->rate); } else { new_opp->suspend = true; - dev_opp->suspend_opp = new_opp; + opp_table->suspend_opp = new_opp; } } - if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max) - dev_opp->clock_latency_ns_max = new_opp->clock_latency_ns; + if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max) + opp_table->clock_latency_ns_max = new_opp->clock_latency_ns; - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n", __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt, @@ -1213,13 +1648,13 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) * Notify the changes in the availability of the operable * frequency/voltage list. */ - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); + srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp); return 0; free_opp: - _opp_remove(dev_opp, new_opp, false); + _opp_remove(opp_table, new_opp, false); unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } @@ -1229,11 +1664,11 @@ unlock: * @freq: Frequency in Hz for this OPP * @u_volt: Voltage in uVolts for this OPP * - * This function adds an opp definition to the opp list and returns status. + * This function adds an opp definition to the opp table and returns status. * The opp is made available by default and it can be controlled using * dev_pm_opp_enable/disable functions. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1265,7 +1700,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add); * copy operation, returns 0 if no modification was done OR modification was * successful. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks to * keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1274,7 +1709,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add); static int _opp_set_availability(struct device *dev, unsigned long freq, bool availability_req) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV); int r = 0; @@ -1283,18 +1718,18 @@ static int _opp_set_availability(struct device *dev, unsigned long freq, if (!new_opp) return -ENOMEM; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - /* Find the device_opp */ - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - r = PTR_ERR(dev_opp); + /* Find the opp_table */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + r = PTR_ERR(opp_table); dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r); goto unlock; } /* Do we have the frequency? */ - list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) { + list_for_each_entry(tmp_opp, &opp_table->opp_list, node) { if (tmp_opp->rate == freq) { opp = tmp_opp; break; @@ -1315,21 +1750,21 @@ static int _opp_set_availability(struct device *dev, unsigned long freq, new_opp->available = availability_req; list_replace_rcu(&opp->node, &new_opp->node); - mutex_unlock(&dev_opp_list_lock); - call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); + mutex_unlock(&opp_table_lock); + call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); /* Notify the change of the OPP availability */ if (availability_req) - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ENABLE, - new_opp); + srcu_notifier_call_chain(&opp_table->srcu_head, + OPP_EVENT_ENABLE, new_opp); else - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_DISABLE, - new_opp); + srcu_notifier_call_chain(&opp_table->srcu_head, + OPP_EVENT_DISABLE, new_opp); return 0; unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); kfree(new_opp); return r; } @@ -1343,7 +1778,7 @@ unlock: * corresponding error value. It is meant to be used for users an OPP available * after being temporarily made unavailable with dev_pm_opp_disable. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function indirectly uses RCU and mutex locks to keep the * integrity of the internal data structures. Callers should ensure that * this function is *NOT* called under RCU protection or in contexts where @@ -1369,7 +1804,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_enable); * control by users to make this OPP not available until the circumstances are * right to make it available again (with a call to dev_pm_opp_enable). * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function indirectly uses RCU and mutex locks to keep the * integrity of the internal data structures. Callers should ensure that * this function is *NOT* called under RCU protection or in contexts where @@ -1387,26 +1822,26 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_disable); /** * dev_pm_opp_get_notifier() - find notifier_head of the device with opp - * @dev: device pointer used to lookup device OPPs. + * @dev: device pointer used to lookup OPP table. * * Return: pointer to notifier head if found, otherwise -ENODEV or * -EINVAL based on type of error casted as pointer. value must be checked * with IS_ERR to determine valid pointer or error result. * - * Locking: This function must be called under rcu_read_lock(). dev_opp is a RCU - * protected pointer. The reason for the same is that the opp pointer which is - * returned will remain valid for use with opp_get_{voltage, freq} only while + * Locking: This function must be called under rcu_read_lock(). opp_table is a + * RCU protected pointer. The reason for the same is that the opp pointer which + * is returned will remain valid for use with opp_get_{voltage, freq} only while * under the locked area. The pointer returned must be used prior to unlocking * with rcu_read_unlock() to maintain the integrity of the pointer. */ struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev) { - struct device_opp *dev_opp = _find_device_opp(dev); + struct opp_table *opp_table = _find_opp_table(dev); - if (IS_ERR(dev_opp)) - return ERR_CAST(dev_opp); /* matching type */ + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); /* matching type */ - return &dev_opp->srcu_head; + return &opp_table->srcu_head; } EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier); @@ -1414,11 +1849,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier); /** * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT * entries - * @dev: device pointer used to lookup device OPPs. + * @dev: device pointer used to lookup OPP table. * * Free OPPs created using static entries present in DT. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function indirectly uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where @@ -1426,38 +1861,38 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier); */ void dev_pm_opp_of_remove_table(struct device *dev) { - struct device_opp *dev_opp; + struct opp_table *opp_table; struct dev_pm_opp *opp, *tmp; - /* Hold our list modification lock here */ - mutex_lock(&dev_opp_list_lock); + /* Hold our table modification lock here */ + mutex_lock(&opp_table_lock); - /* Check for existing list for 'dev' */ - dev_opp = _find_device_opp(dev); - if (IS_ERR(dev_opp)) { - int error = PTR_ERR(dev_opp); + /* Check for existing table for 'dev' */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + int error = PTR_ERR(opp_table); if (error != -ENODEV) - WARN(1, "%s: dev_opp: %d\n", + WARN(1, "%s: opp_table: %d\n", IS_ERR_OR_NULL(dev) ? "Invalid device" : dev_name(dev), error); goto unlock; } - /* Find if dev_opp manages a single device */ - if (list_is_singular(&dev_opp->dev_list)) { + /* Find if opp_table manages a single device */ + if (list_is_singular(&opp_table->dev_list)) { /* Free static OPPs */ - list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) { + list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) { if (!opp->dynamic) - _opp_remove(dev_opp, opp, true); + _opp_remove(opp_table, opp, true); } } else { - _remove_list_dev(_find_list_dev(dev, dev_opp), dev_opp); + _remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table); } unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); @@ -1478,22 +1913,22 @@ struct device_node *_of_get_opp_desc_node(struct device *dev) static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) { struct device_node *np; - struct device_opp *dev_opp; + struct opp_table *opp_table; int ret = 0, count = 0; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - dev_opp = _managed_opp(opp_np); - if (dev_opp) { + opp_table = _managed_opp(opp_np); + if (opp_table) { /* OPPs are already managed */ - if (!_add_list_dev(dev, dev_opp)) + if (!_add_opp_dev(dev, opp_table)) ret = -ENOMEM; - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); - /* We have opp-list node now, iterate over it and add OPPs */ + /* We have opp-table node now, iterate over it and add OPPs */ for_each_available_child_of_node(opp_np, np) { count++; @@ -1509,19 +1944,19 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) if (WARN_ON(!count)) return -ENOENT; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - dev_opp = _find_device_opp(dev); - if (WARN_ON(IS_ERR(dev_opp))) { - ret = PTR_ERR(dev_opp); - mutex_unlock(&dev_opp_list_lock); + opp_table = _find_opp_table(dev); + if (WARN_ON(IS_ERR(opp_table))) { + ret = PTR_ERR(opp_table); + mutex_unlock(&opp_table_lock); goto free_table; } - dev_opp->np = opp_np; - dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared"); + opp_table->np = opp_np; + opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared"); - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return 0; @@ -1550,7 +1985,7 @@ static int _of_add_opp_table_v1(struct device *dev) */ nr = prop->length / sizeof(u32); if (nr % 2) { - dev_err(dev, "%s: Invalid OPP list\n", __func__); + dev_err(dev, "%s: Invalid OPP table\n", __func__); return -EINVAL; } @@ -1570,11 +2005,11 @@ static int _of_add_opp_table_v1(struct device *dev) /** * dev_pm_opp_of_add_table() - Initialize opp table from device tree - * @dev: device pointer used to lookup device OPPs. + * @dev: device pointer used to lookup OPP table. * * Register the initial OPP table with the OPP library for given device. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function indirectly uses RCU updater strategy with mutex locks * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 9f0c155..ba2bdbd 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -31,7 +31,7 @@ * @table: Cpufreq table returned back to caller * * Generate a cpufreq table for a provided device- this assumes that the - * opp list is already initialized and ready for usage. + * opp table is already initialized and ready for usage. * * This function allocates required memory for the cpufreq table. It is * expected that the caller does the required maintenance such as freeing @@ -44,7 +44,7 @@ * WARNING: It is important for the callers to ensure refreshing their copy of * the table if any of the mentioned functions have been invoked in the interim. * - * Locking: The internal device_opp and opp structures are RCU protected. + * Locking: The internal opp_table and opp structures are RCU protected. * Since we just use the regular accessor functions to access the internal data * structures, we use RCU read lock inside this function. As a result, users of * this function DONOT need to use explicit locks for invoking. @@ -122,15 +122,15 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table); /* Required only for V1 bindings, as v2 can manage it from DT itself */ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) { - struct device_list_opp *list_dev; - struct device_opp *dev_opp; + struct opp_device *opp_dev; + struct opp_table *opp_table; struct device *dev; int cpu, ret = 0; - mutex_lock(&dev_opp_list_lock); + mutex_lock(&opp_table_lock); - dev_opp = _find_device_opp(cpu_dev); - if (IS_ERR(dev_opp)) { + opp_table = _find_opp_table(cpu_dev); + if (IS_ERR(opp_table)) { ret = -EINVAL; goto unlock; } @@ -146,15 +146,15 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) continue; } - list_dev = _add_list_dev(dev, dev_opp); - if (!list_dev) { - dev_err(dev, "%s: failed to add list-dev for cpu%d device\n", + opp_dev = _add_opp_dev(dev, opp_table); + if (!opp_dev) { + dev_err(dev, "%s: failed to add opp-dev for cpu%d device\n", __func__, cpu); continue; } } unlock: - mutex_unlock(&dev_opp_list_lock); + mutex_unlock(&opp_table_lock); return ret; } diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c index ddfe477..ef1ae6b 100644 --- a/drivers/base/power/opp/debugfs.c +++ b/drivers/base/power/opp/debugfs.c @@ -34,9 +34,9 @@ void opp_debug_remove_one(struct dev_pm_opp *opp) debugfs_remove_recursive(opp->dentry); } -int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp) +int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table) { - struct dentry *pdentry = dev_opp->dentry; + struct dentry *pdentry = opp_table->dentry; struct dentry *d; char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */ @@ -83,52 +83,52 @@ int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp) return 0; } -static int device_opp_debug_create_dir(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static int opp_list_debug_create_dir(struct opp_device *opp_dev, + struct opp_table *opp_table) { - const struct device *dev = list_dev->dev; + const struct device *dev = opp_dev->dev; struct dentry *d; - opp_set_dev_name(dev, dev_opp->dentry_name); + opp_set_dev_name(dev, opp_table->dentry_name); /* Create device specific directory */ - d = debugfs_create_dir(dev_opp->dentry_name, rootdir); + d = debugfs_create_dir(opp_table->dentry_name, rootdir); if (!d) { dev_err(dev, "%s: Failed to create debugfs dir\n", __func__); return -ENOMEM; } - list_dev->dentry = d; - dev_opp->dentry = d; + opp_dev->dentry = d; + opp_table->dentry = d; return 0; } -static int device_opp_debug_create_link(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static int opp_list_debug_create_link(struct opp_device *opp_dev, + struct opp_table *opp_table) { - const struct device *dev = list_dev->dev; + const struct device *dev = opp_dev->dev; char name[NAME_MAX]; struct dentry *d; - opp_set_dev_name(list_dev->dev, name); + opp_set_dev_name(opp_dev->dev, name); /* Create device specific directory link */ - d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name); + d = debugfs_create_symlink(name, rootdir, opp_table->dentry_name); if (!d) { dev_err(dev, "%s: Failed to create link\n", __func__); return -ENOMEM; } - list_dev->dentry = d; + opp_dev->dentry = d; return 0; } /** * opp_debug_register - add a device opp node to the debugfs 'opp' directory - * @list_dev: list-dev pointer for device - * @dev_opp: the device-opp being added + * @opp_dev: opp-dev pointer for device + * @opp_table: the device-opp being added * * Dynamically adds device specific directory in debugfs 'opp' directory. If the * device-opp is shared with other devices, then links will be created for all @@ -136,73 +136,72 @@ static int device_opp_debug_create_link(struct device_list_opp *list_dev, * * Return: 0 on success, otherwise negative error. */ -int opp_debug_register(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table) { if (!rootdir) { pr_debug("%s: Uninitialized rootdir\n", __func__); return -EINVAL; } - if (dev_opp->dentry) - return device_opp_debug_create_link(list_dev, dev_opp); + if (opp_table->dentry) + return opp_list_debug_create_link(opp_dev, opp_table); - return device_opp_debug_create_dir(list_dev, dev_opp); + return opp_list_debug_create_dir(opp_dev, opp_table); } -static void opp_migrate_dentry(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static void opp_migrate_dentry(struct opp_device *opp_dev, + struct opp_table *opp_table) { - struct device_list_opp *new_dev; + struct opp_device *new_dev; const struct device *dev; struct dentry *dentry; - /* Look for next list-dev */ - list_for_each_entry(new_dev, &dev_opp->dev_list, node) - if (new_dev != list_dev) + /* Look for next opp-dev */ + list_for_each_entry(new_dev, &opp_table->dev_list, node) + if (new_dev != opp_dev) break; /* new_dev is guaranteed to be valid here */ dev = new_dev->dev; debugfs_remove_recursive(new_dev->dentry); - opp_set_dev_name(dev, dev_opp->dentry_name); + opp_set_dev_name(dev, opp_table->dentry_name); - dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir, - dev_opp->dentry_name); + dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir, + opp_table->dentry_name); if (!dentry) { dev_err(dev, "%s: Failed to rename link from: %s to %s\n", - __func__, dev_name(list_dev->dev), dev_name(dev)); + __func__, dev_name(opp_dev->dev), dev_name(dev)); return; } new_dev->dentry = dentry; - dev_opp->dentry = dentry; + opp_table->dentry = dentry; } /** * opp_debug_unregister - remove a device opp node from debugfs opp directory - * @list_dev: list-dev pointer for device - * @dev_opp: the device-opp being removed + * @opp_dev: opp-dev pointer for device + * @opp_table: the device-opp being removed * * Dynamically removes device specific directory from debugfs 'opp' directory. */ -void opp_debug_unregister(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +void opp_debug_unregister(struct opp_device *opp_dev, + struct opp_table *opp_table) { - if (list_dev->dentry == dev_opp->dentry) { + if (opp_dev->dentry == opp_table->dentry) { /* Move the real dentry object under another device */ - if (!list_is_singular(&dev_opp->dev_list)) { - opp_migrate_dentry(list_dev, dev_opp); + if (!list_is_singular(&opp_table->dev_list)) { + opp_migrate_dentry(opp_dev, opp_table); goto out; } - dev_opp->dentry = NULL; + opp_table->dentry = NULL; } - debugfs_remove_recursive(list_dev->dentry); + debugfs_remove_recursive(opp_dev->dentry); out: - list_dev->dentry = NULL; + opp_dev->dentry = NULL; } static int __init opp_debug_init(void) diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 690638e..f67f806 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -22,13 +22,16 @@ #include <linux/rculist.h> #include <linux/rcupdate.h> +struct clk; +struct regulator; + /* Lock to allow exclusive modification to the device and opp lists */ -extern struct mutex dev_opp_list_lock; +extern struct mutex opp_table_lock; /* * Internal data structure organization with the OPP layer library is as * follows: - * dev_opp_list (root) + * opp_tables (root) * |- device 1 (represents voltage domain 1) * | |- opp 1 (availability, freq, voltage) * | |- opp 2 .. @@ -37,18 +40,18 @@ extern struct mutex dev_opp_list_lock; * |- device 2 (represents the next voltage domain) * ... * `- device m (represents mth voltage domain) - * device 1, 2.. are represented by dev_opp structure while each opp + * device 1, 2.. are represented by opp_table structure while each opp * is represented by the opp structure. */ /** * struct dev_pm_opp - Generic OPP description structure - * @node: opp list node. The nodes are maintained throughout the lifetime + * @node: opp table node. The nodes are maintained throughout the lifetime * of boot. It is expected only an optimal set of OPPs are * added to the library by the SoC framework. - * RCU usage: opp list is traversed with RCU locks. node + * RCU usage: opp table is traversed with RCU locks. node * modification is possible realtime, hence the modifications - * are protected by the dev_opp_list_lock for integrity. + * are protected by the opp_table_lock for integrity. * IMPORTANT: the opp nodes should be maintained in increasing * order. * @available: true/false - marks if this OPP as available or not @@ -62,7 +65,7 @@ extern struct mutex dev_opp_list_lock; * @u_amp: Maximum current drawn by the device in microamperes * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's * frequency from any other OPP's frequency. - * @dev_opp: points back to the device_opp struct this opp belongs to + * @opp_table: points back to the opp_table struct this opp belongs to * @rcu_head: RCU callback head used for deferred freeing * @np: OPP's device node. * @dentry: debugfs dentry pointer (per opp) @@ -84,7 +87,7 @@ struct dev_pm_opp { unsigned long u_amp; unsigned long clock_latency_ns; - struct device_opp *dev_opp; + struct opp_table *opp_table; struct rcu_head rcu_head; struct device_node *np; @@ -95,16 +98,16 @@ struct dev_pm_opp { }; /** - * struct device_list_opp - devices managed by 'struct device_opp' + * struct opp_device - devices managed by 'struct opp_table' * @node: list node * @dev: device to which the struct object belongs * @rcu_head: RCU callback head used for deferred freeing * @dentry: debugfs dentry pointer (per device) * - * This is an internal data structure maintaining the list of devices that are - * managed by 'struct device_opp'. + * This is an internal data structure maintaining the devices that are managed + * by 'struct opp_table'. */ -struct device_list_opp { +struct opp_device { struct list_head node; const struct device *dev; struct rcu_head rcu_head; @@ -115,16 +118,16 @@ struct device_list_opp { }; /** - * struct device_opp - Device opp structure - * @node: list node - contains the devices with OPPs that + * struct opp_table - Device opp structure + * @node: table node - contains the devices with OPPs that * have been registered. Nodes once added are not modified in this - * list. - * RCU usage: nodes are not modified in the list of device_opp, - * however addition is possible and is secured by dev_opp_list_lock + * table. + * RCU usage: nodes are not modified in the table of opp_table, + * however addition is possible and is secured by opp_table_lock * @srcu_head: notifier head to notify the OPP availability changes. * @rcu_head: RCU callback head used for deferred freeing * @dev_list: list of devices that share these OPPs - * @opp_list: list of opps + * @opp_list: table of opps * @np: struct device_node pointer for opp's DT node. * @clock_latency_ns_max: Max clock latency in nanoseconds. * @shared_opp: OPP is shared between multiple devices. @@ -132,9 +135,13 @@ struct device_list_opp { * @supported_hw: Array of version number to support. * @supported_hw_count: Number of elements in supported_hw array. * @prop_name: A name to postfix to many DT properties, while parsing them. + * @clk: Device's clock handle + * @regulator: Supply regulator * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * + * @voltage_tolerance_v1: In percentage, for v1 bindings only. + * * This is an internal data structure maintaining the link to opps attached to * a device. This structure is not meant to be shared to users as it is * meant for book keeping and private to OPP library. @@ -143,7 +150,7 @@ struct device_list_opp { * need to wait for the grace period of both of them before freeing any * resources. And so we have used kfree_rcu() from within call_srcu() handlers. */ -struct device_opp { +struct opp_table { struct list_head node; struct srcu_notifier_head srcu_head; @@ -153,12 +160,18 @@ struct device_opp { struct device_node *np; unsigned long clock_latency_ns_max; + + /* For backward compatibility with v1 bindings */ + unsigned int voltage_tolerance_v1; + bool shared_opp; struct dev_pm_opp *suspend_opp; unsigned int *supported_hw; unsigned int supported_hw_count; const char *prop_name; + struct clk *clk; + struct regulator *regulator; #ifdef CONFIG_DEBUG_FS struct dentry *dentry; @@ -167,30 +180,27 @@ struct device_opp { }; /* Routines internal to opp core */ -struct device_opp *_find_device_opp(struct device *dev); -struct device_list_opp *_add_list_dev(const struct device *dev, - struct device_opp *dev_opp); +struct opp_table *_find_opp_table(struct device *dev); +struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table); struct device_node *_of_get_opp_desc_node(struct device *dev); #ifdef CONFIG_DEBUG_FS void opp_debug_remove_one(struct dev_pm_opp *opp); -int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp); -int opp_debug_register(struct device_list_opp *list_dev, - struct device_opp *dev_opp); -void opp_debug_unregister(struct device_list_opp *list_dev, - struct device_opp *dev_opp); +int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table); +int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table); +void opp_debug_unregister(struct opp_device *opp_dev, struct opp_table *opp_table); #else static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {} static inline int opp_debug_create_one(struct dev_pm_opp *opp, - struct device_opp *dev_opp) + struct opp_table *opp_table) { return 0; } -static inline int opp_debug_register(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static inline int opp_debug_register(struct opp_device *opp_dev, + struct opp_table *opp_table) { return 0; } -static inline void opp_debug_unregister(struct device_list_opp *list_dev, - struct device_opp *dev_opp) +static inline void opp_debug_unregister(struct opp_device *opp_dev, + struct opp_table *opp_table) { } #endif /* DEBUG_FS */ diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index a311cfa..a697579 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -166,14 +166,14 @@ void generate_pm_trace(const void *tracedata, unsigned int user) } EXPORT_SYMBOL(generate_pm_trace); -extern char __tracedata_start, __tracedata_end; +extern char __tracedata_start[], __tracedata_end[]; static int show_file_hash(unsigned int value) { int match; char *tracedata; match = 0; - for (tracedata = &__tracedata_start ; tracedata < &__tracedata_end ; + for (tracedata = __tracedata_start ; tracedata < __tracedata_end ; tracedata += 2 + sizeof(unsigned long)) { unsigned short lineno = *(unsigned short *)tracedata; const char *file = *(const char **)(tracedata + 2); diff --git a/drivers/base/property.c b/drivers/base/property.c index a163f2c5..76628a7 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -218,7 +218,8 @@ bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname) bool ret; ret = __fwnode_property_present(fwnode, propname); - if (ret == false && fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) + if (ret == false && !IS_ERR_OR_NULL(fwnode) && + !IS_ERR_OR_NULL(fwnode->secondary)) ret = __fwnode_property_present(fwnode->secondary, propname); return ret; } @@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(device_property_match_string); int _ret_; \ _ret_ = FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_, \ _val_, _nval_); \ - if (_ret_ == -EINVAL && _fwnode_ && !IS_ERR_OR_NULL(_fwnode_->secondary)) \ + if (_ret_ == -EINVAL && !IS_ERR_OR_NULL(_fwnode_) && \ + !IS_ERR_OR_NULL(_fwnode_->secondary)) \ _ret_ = FWNODE_PROP_READ(_fwnode_->secondary, _propname_, _type_, \ _proptype_, _val_, _nval_); \ _ret_; \ @@ -593,7 +595,8 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode, int ret; ret = __fwnode_property_read_string_array(fwnode, propname, val, nval); - if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) + if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) && + !IS_ERR_OR_NULL(fwnode->secondary)) ret = __fwnode_property_read_string_array(fwnode->secondary, propname, val, nval); return ret; @@ -621,7 +624,8 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode, int ret; ret = __fwnode_property_read_string(fwnode, propname, val); - if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) + if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) && + !IS_ERR_OR_NULL(fwnode->secondary)) ret = __fwnode_property_read_string(fwnode->secondary, propname, val); return ret; @@ -820,11 +824,16 @@ void device_remove_property_set(struct device *dev) * the pset. If there is no real firmware node (ACPI/DT) primary * will hold the pset. */ - if (!is_pset_node(fwnode)) - fwnode = fwnode->secondary; - if (!IS_ERR(fwnode) && is_pset_node(fwnode)) + if (is_pset_node(fwnode)) { + set_primary_fwnode(dev, NULL); pset_free_set(to_pset_node(fwnode)); - set_secondary_fwnode(dev, NULL); + } else { + fwnode = fwnode->secondary; + if (!IS_ERR(fwnode) && is_pset_node(fwnode)) { + set_secondary_fwnode(dev, NULL); + pset_free_set(to_pset_node(fwnode)); + } + } } EXPORT_SYMBOL_GPL(device_remove_property_set); diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index f935110..a7f4585 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -19,6 +19,7 @@ config CPU_FREQ if CPU_FREQ config CPU_FREQ_GOV_COMMON + select IRQ_WORK bool config CPU_FREQ_BOOST_SW diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 51eef87..59a7b38 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -70,6 +70,8 @@ struct acpi_cpufreq_data { unsigned int cpu_feature; unsigned int acpi_perf_cpu; cpumask_var_t freqdomain_cpus; + void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); + u32 (*cpu_freq_read)(struct acpi_pct_register *reg); }; /* acpi_perf_data is a pointer to percpu data. */ @@ -243,125 +245,119 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) } } -struct msr_addr { - u32 reg; -}; +u32 cpu_freq_read_intel(struct acpi_pct_register *not_used) +{ + u32 val, dummy; -struct io_addr { - u16 port; - u8 bit_width; -}; + rdmsr(MSR_IA32_PERF_CTL, val, dummy); + return val; +} + +void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val) +{ + u32 lo, hi; + + rdmsr(MSR_IA32_PERF_CTL, lo, hi); + lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE); + wrmsr(MSR_IA32_PERF_CTL, lo, hi); +} + +u32 cpu_freq_read_amd(struct acpi_pct_register *not_used) +{ + u32 val, dummy; + + rdmsr(MSR_AMD_PERF_CTL, val, dummy); + return val; +} + +void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val) +{ + wrmsr(MSR_AMD_PERF_CTL, val, 0); +} + +u32 cpu_freq_read_io(struct acpi_pct_register *reg) +{ + u32 val; + + acpi_os_read_port(reg->address, &val, reg->bit_width); + return val; +} + +void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val) +{ + acpi_os_write_port(reg->address, val, reg->bit_width); +} struct drv_cmd { - unsigned int type; - const struct cpumask *mask; - union { - struct msr_addr msr; - struct io_addr io; - } addr; + struct acpi_pct_register *reg; u32 val; + union { + void (*write)(struct acpi_pct_register *reg, u32 val); + u32 (*read)(struct acpi_pct_register *reg); + } func; }; /* Called via smp_call_function_single(), on the target CPU */ static void do_drv_read(void *_cmd) { struct drv_cmd *cmd = _cmd; - u32 h; - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - case SYSTEM_AMD_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, cmd->val, h); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_read_port((acpi_io_address)cmd->addr.io.port, - &cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } + cmd->val = cmd->func.read(cmd->reg); } -/* Called via smp_call_function_many(), on the target CPUs */ -static void do_drv_write(void *_cmd) +static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask) { - struct drv_cmd *cmd = _cmd; - u32 lo, hi; + struct acpi_processor_performance *perf = to_perf_data(data); + struct drv_cmd cmd = { + .reg = &perf->control_register, + .func.read = data->cpu_freq_read, + }; + int err; - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, lo, hi); - lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); - wrmsr(cmd->addr.msr.reg, lo, hi); - break; - case SYSTEM_AMD_MSR_CAPABLE: - wrmsr(cmd->addr.msr.reg, cmd->val, 0); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_write_port((acpi_io_address)cmd->addr.io.port, - cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } + err = smp_call_function_any(mask, do_drv_read, &cmd, 1); + WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ + return cmd.val; } -static void drv_read(struct drv_cmd *cmd) +/* Called via smp_call_function_many(), on the target CPUs */ +static void do_drv_write(void *_cmd) { - int err; - cmd->val = 0; + struct drv_cmd *cmd = _cmd; - err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); - WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ + cmd->func.write(cmd->reg, cmd->val); } -static void drv_write(struct drv_cmd *cmd) +static void drv_write(struct acpi_cpufreq_data *data, + const struct cpumask *mask, u32 val) { + struct acpi_processor_performance *perf = to_perf_data(data); + struct drv_cmd cmd = { + .reg = &perf->control_register, + .val = val, + .func.write = data->cpu_freq_write, + }; int this_cpu; this_cpu = get_cpu(); - if (cpumask_test_cpu(this_cpu, cmd->mask)) - do_drv_write(cmd); - smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); + if (cpumask_test_cpu(this_cpu, mask)) + do_drv_write(&cmd); + + smp_call_function_many(mask, do_drv_write, &cmd, 1); put_cpu(); } -static u32 -get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) +static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) { - struct acpi_processor_performance *perf; - struct drv_cmd cmd; + u32 val; if (unlikely(cpumask_empty(mask))) return 0; - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - break; - case SYSTEM_AMD_MSR_CAPABLE: - cmd.type = SYSTEM_AMD_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_AMD_PERF_CTL; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - perf = to_perf_data(data); - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - break; - default: - return 0; - } - - cmd.mask = mask; - drv_read(&cmd); + val = drv_read(data, mask); - pr_debug("get_cur_val = %u\n", cmd.val); + pr_debug("get_cur_val = %u\n", val); - return cmd.val; + return val; } static unsigned int get_cur_freq_on_cpu(unsigned int cpu) @@ -416,7 +412,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, { struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; - struct drv_cmd cmd; + const struct cpumask *mask; unsigned int next_perf_state = 0; /* Index into perf table */ int result = 0; @@ -434,42 +430,21 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } else { pr_debug("Already at target state (P%d)\n", next_perf_state); - goto out; + return 0; } } - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - case SYSTEM_AMD_MSR_CAPABLE: - cmd.type = SYSTEM_AMD_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_AMD_PERF_CTL; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - default: - result = -ENODEV; - goto out; - } - - /* cpufreq holds the hotplug lock, so we are safe from here on */ - if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cmd.mask = policy->cpus; - else - cmd.mask = cpumask_of(policy->cpu); + /* + * The core won't allow CPUs to go away until the governor has been + * stopped, so we can rely on the stability of policy->cpus. + */ + mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ? + cpumask_of(policy->cpu) : policy->cpus; - drv_write(&cmd); + drv_write(data, mask, perf->states[next_perf_state].control); if (acpi_pstate_strict) { - if (!check_freqs(cmd.mask, data->freq_table[index].frequency, + if (!check_freqs(mask, data->freq_table[index].frequency, data)) { pr_debug("acpi_cpufreq_target failed (%d)\n", policy->cpu); @@ -480,7 +455,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, if (!result) perf->state = next_perf_state; -out: return result; } @@ -740,15 +714,21 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) } pr_debug("SYSTEM IO addr space\n"); data->cpu_feature = SYSTEM_IO_CAPABLE; + data->cpu_freq_read = cpu_freq_read_io; + data->cpu_freq_write = cpu_freq_write_io; break; case ACPI_ADR_SPACE_FIXED_HARDWARE: pr_debug("HARDWARE addr space\n"); if (check_est_cpu(cpu)) { data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; + data->cpu_freq_read = cpu_freq_read_intel; + data->cpu_freq_write = cpu_freq_write_intel; break; } if (check_amd_hwpstate_cpu(cpu)) { data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; + data->cpu_freq_read = cpu_freq_read_amd; + data->cpu_freq_write = cpu_freq_write_amd; break; } result = -ENODEV; diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index f6b79ab..404360c 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -21,7 +21,7 @@ #include <asm/msr.h> #include <asm/cpufeature.h> -#include "cpufreq_governor.h" +#include "cpufreq_ondemand.h" #define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080 #define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081 @@ -45,10 +45,10 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, long d_actual, d_reference; struct msr actual, reference; struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu); - struct dbs_data *od_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *od_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = od_data->tuners; - struct od_cpu_dbs_info_s *od_info = - od_data->cdata->get_cpu_dbs_info_s(policy->cpu); + struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs); if (!od_info->freq_table) return freq_next; diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 0ca74d0..f951f91 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -31,9 +31,8 @@ struct private_data { struct device *cpu_dev; - struct regulator *cpu_reg; struct thermal_cooling_device *cdev; - unsigned int voltage_tolerance; /* in percentage */ + const char *reg_name; }; static struct freq_attr *cpufreq_dt_attr[] = { @@ -44,175 +43,128 @@ static struct freq_attr *cpufreq_dt_attr[] = { static int set_target(struct cpufreq_policy *policy, unsigned int index) { - struct dev_pm_opp *opp; - struct cpufreq_frequency_table *freq_table = policy->freq_table; - struct clk *cpu_clk = policy->clk; struct private_data *priv = policy->driver_data; - struct device *cpu_dev = priv->cpu_dev; - struct regulator *cpu_reg = priv->cpu_reg; - unsigned long volt = 0, tol = 0; - int volt_old = 0; - unsigned int old_freq, new_freq; - long freq_Hz, freq_exact; - int ret; - - freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000); - if (freq_Hz <= 0) - freq_Hz = freq_table[index].frequency * 1000; - freq_exact = freq_Hz; - new_freq = freq_Hz / 1000; - old_freq = clk_get_rate(cpu_clk) / 1000; + return dev_pm_opp_set_rate(priv->cpu_dev, + policy->freq_table[index].frequency * 1000); +} - if (!IS_ERR(cpu_reg)) { - unsigned long opp_freq; +/* + * An earlier version of opp-v1 bindings used to name the regulator + * "cpu0-supply", we still need to handle that for backwards compatibility. + */ +static const char *find_supply_name(struct device *dev) +{ + struct device_node *np; + struct property *pp; + int cpu = dev->id; + const char *name = NULL; - rcu_read_lock(); - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz); - if (IS_ERR(opp)) { - rcu_read_unlock(); - dev_err(cpu_dev, "failed to find OPP for %ld\n", - freq_Hz); - return PTR_ERR(opp); - } - volt = dev_pm_opp_get_voltage(opp); - opp_freq = dev_pm_opp_get_freq(opp); - rcu_read_unlock(); - tol = volt * priv->voltage_tolerance / 100; - volt_old = regulator_get_voltage(cpu_reg); - dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n", - opp_freq / 1000, volt); - } + np = of_node_get(dev->of_node); - dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n", - old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1, - new_freq / 1000, volt ? volt / 1000 : -1); + /* This must be valid for sure */ + if (WARN_ON(!np)) + return NULL; - /* scaling up? scale voltage before frequency */ - if (!IS_ERR(cpu_reg) && new_freq > old_freq) { - ret = regulator_set_voltage_tol(cpu_reg, volt, tol); - if (ret) { - dev_err(cpu_dev, "failed to scale voltage up: %d\n", - ret); - return ret; + /* Try "cpu0" for older DTs */ + if (!cpu) { + pp = of_find_property(np, "cpu0-supply", NULL); + if (pp) { + name = "cpu0"; + goto node_put; } } - ret = clk_set_rate(cpu_clk, freq_exact); - if (ret) { - dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); - if (!IS_ERR(cpu_reg) && volt_old > 0) - regulator_set_voltage_tol(cpu_reg, volt_old, tol); - return ret; + pp = of_find_property(np, "cpu-supply", NULL); + if (pp) { + name = "cpu"; + goto node_put; } - /* scaling down? scale voltage after frequency */ - if (!IS_ERR(cpu_reg) && new_freq < old_freq) { - ret = regulator_set_voltage_tol(cpu_reg, volt, tol); - if (ret) { - dev_err(cpu_dev, "failed to scale voltage down: %d\n", - ret); - clk_set_rate(cpu_clk, old_freq * 1000); - } - } - - return ret; + dev_dbg(dev, "no regulator for cpu%d\n", cpu); +node_put: + of_node_put(np); + return name; } -static int allocate_resources(int cpu, struct device **cdev, - struct regulator **creg, struct clk **cclk) +static int resources_available(void) { struct device *cpu_dev; struct regulator *cpu_reg; struct clk *cpu_clk; int ret = 0; - char *reg_cpu0 = "cpu0", *reg_cpu = "cpu", *reg; + const char *name; - cpu_dev = get_cpu_device(cpu); + cpu_dev = get_cpu_device(0); if (!cpu_dev) { - pr_err("failed to get cpu%d device\n", cpu); + pr_err("failed to get cpu0 device\n"); return -ENODEV; } - /* Try "cpu0" for older DTs */ - if (!cpu) - reg = reg_cpu0; - else - reg = reg_cpu; - -try_again: - cpu_reg = regulator_get_optional(cpu_dev, reg); - ret = PTR_ERR_OR_ZERO(cpu_reg); + cpu_clk = clk_get(cpu_dev, NULL); + ret = PTR_ERR_OR_ZERO(cpu_clk); if (ret) { /* - * If cpu's regulator supply node is present, but regulator is - * not yet registered, we should try defering probe. + * If cpu's clk node is present, but clock is not yet + * registered, we should try defering probe. */ - if (ret == -EPROBE_DEFER) { - dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n", - cpu); - return ret; - } - - /* Try with "cpu-supply" */ - if (reg == reg_cpu0) { - reg = reg_cpu; - goto try_again; - } + if (ret == -EPROBE_DEFER) + dev_dbg(cpu_dev, "clock not ready, retry\n"); + else + dev_err(cpu_dev, "failed to get clock: %d\n", ret); - dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret); + return ret; } - cpu_clk = clk_get(cpu_dev, NULL); - ret = PTR_ERR_OR_ZERO(cpu_clk); - if (ret) { - /* put regulator */ - if (!IS_ERR(cpu_reg)) - regulator_put(cpu_reg); + clk_put(cpu_clk); + name = find_supply_name(cpu_dev); + /* Platform doesn't require regulator */ + if (!name) + return 0; + + cpu_reg = regulator_get_optional(cpu_dev, name); + ret = PTR_ERR_OR_ZERO(cpu_reg); + if (ret) { /* - * If cpu's clk node is present, but clock is not yet - * registered, we should try defering probe. + * If cpu's regulator supply node is present, but regulator is + * not yet registered, we should try defering probe. */ if (ret == -EPROBE_DEFER) - dev_dbg(cpu_dev, "cpu%d clock not ready, retry\n", cpu); + dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n"); else - dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", cpu, - ret); - } else { - *cdev = cpu_dev; - *creg = cpu_reg; - *cclk = cpu_clk; + dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret); + + return ret; } - return ret; + regulator_put(cpu_reg); + return 0; } static int cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *freq_table; - struct device_node *np; struct private_data *priv; struct device *cpu_dev; - struct regulator *cpu_reg; struct clk *cpu_clk; struct dev_pm_opp *suspend_opp; - unsigned long min_uV = ~0, max_uV = 0; unsigned int transition_latency; - bool need_update = false; + bool opp_v1 = false; + const char *name; int ret; - ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); - if (ret) { - pr_err("%s: Failed to allocate resources: %d\n", __func__, ret); - return ret; + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) { + pr_err("failed to get cpu%d device\n", policy->cpu); + return -ENODEV; } - np = of_node_get(cpu_dev->of_node); - if (!np) { - dev_err(cpu_dev, "failed to find cpu%d node\n", policy->cpu); - ret = -ENOENT; - goto out_put_reg_clk; + cpu_clk = clk_get(cpu_dev, NULL); + if (IS_ERR(cpu_clk)) { + ret = PTR_ERR(cpu_clk); + dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret); + return ret; } /* Get OPP-sharing information from "operating-points-v2" bindings */ @@ -223,9 +175,23 @@ static int cpufreq_init(struct cpufreq_policy *policy) * finding shared-OPPs for backward compatibility. */ if (ret == -ENOENT) - need_update = true; + opp_v1 = true; else - goto out_node_put; + goto out_put_clk; + } + + /* + * OPP layer will be taking care of regulators now, but it needs to know + * the name of the regulator first. + */ + name = find_supply_name(cpu_dev); + if (name) { + ret = dev_pm_opp_set_regulator(cpu_dev, name); + if (ret) { + dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", + policy->cpu, ret); + goto out_put_clk; + } } /* @@ -246,12 +212,12 @@ static int cpufreq_init(struct cpufreq_policy *policy) */ ret = dev_pm_opp_get_opp_count(cpu_dev); if (ret <= 0) { - pr_debug("OPP table is not ready, deferring probe\n"); + dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); ret = -EPROBE_DEFER; goto out_free_opp; } - if (need_update) { + if (opp_v1) { struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data(); if (!pd || !pd->independent_clocks) @@ -265,10 +231,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) if (ret) dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); - - of_property_read_u32(np, "clock-latency", &transition_latency); - } else { - transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); } priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -277,62 +239,16 @@ static int cpufreq_init(struct cpufreq_policy *policy) goto out_free_opp; } - of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); - - if (!transition_latency) - transition_latency = CPUFREQ_ETERNAL; - - if (!IS_ERR(cpu_reg)) { - unsigned long opp_freq = 0; - - /* - * Disable any OPPs where the connected regulator isn't able to - * provide the specified voltage and record minimum and maximum - * voltage levels. - */ - while (1) { - struct dev_pm_opp *opp; - unsigned long opp_uV, tol_uV; - - rcu_read_lock(); - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &opp_freq); - if (IS_ERR(opp)) { - rcu_read_unlock(); - break; - } - opp_uV = dev_pm_opp_get_voltage(opp); - rcu_read_unlock(); - - tol_uV = opp_uV * priv->voltage_tolerance / 100; - if (regulator_is_supported_voltage(cpu_reg, - opp_uV - tol_uV, - opp_uV + tol_uV)) { - if (opp_uV < min_uV) - min_uV = opp_uV; - if (opp_uV > max_uV) - max_uV = opp_uV; - } else { - dev_pm_opp_disable(cpu_dev, opp_freq); - } - - opp_freq++; - } - - ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV); - if (ret > 0) - transition_latency += ret * 1000; - } + priv->reg_name = name; ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { - pr_err("failed to init cpufreq table: %d\n", ret); + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); goto out_free_priv; } priv->cpu_dev = cpu_dev; - priv->cpu_reg = cpu_reg; policy->driver_data = priv; - policy->clk = cpu_clk; rcu_read_lock(); @@ -357,9 +273,11 @@ static int cpufreq_init(struct cpufreq_policy *policy) cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; } - policy->cpuinfo.transition_latency = transition_latency; + transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); + if (!transition_latency) + transition_latency = CPUFREQ_ETERNAL; - of_node_put(np); + policy->cpuinfo.transition_latency = transition_latency; return 0; @@ -369,12 +287,10 @@ out_free_priv: kfree(priv); out_free_opp: dev_pm_opp_of_cpumask_remove_table(policy->cpus); -out_node_put: - of_node_put(np); -out_put_reg_clk: + if (name) + dev_pm_opp_put_regulator(cpu_dev); +out_put_clk: clk_put(cpu_clk); - if (!IS_ERR(cpu_reg)) - regulator_put(cpu_reg); return ret; } @@ -386,9 +302,10 @@ static int cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); + if (priv->reg_name) + dev_pm_opp_put_regulator(priv->cpu_dev); + clk_put(policy->clk); - if (!IS_ERR(priv->cpu_reg)) - regulator_put(priv->cpu_reg); kfree(priv); return 0; @@ -441,9 +358,6 @@ static struct cpufreq_driver dt_cpufreq_driver = { static int dt_cpufreq_probe(struct platform_device *pdev) { - struct device *cpu_dev; - struct regulator *cpu_reg; - struct clk *cpu_clk; int ret; /* @@ -453,19 +367,15 @@ static int dt_cpufreq_probe(struct platform_device *pdev) * * FIXME: Is checking this only for CPU0 sufficient ? */ - ret = allocate_resources(0, &cpu_dev, &cpu_reg, &cpu_clk); + ret = resources_available(); if (ret) return ret; - clk_put(cpu_clk); - if (!IS_ERR(cpu_reg)) - regulator_put(cpu_reg); - dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); ret = cpufreq_register_driver(&dt_cpufreq_driver); if (ret) - dev_err(cpu_dev, "failed register driver: %d\n", ret); + dev_err(&pdev->dev, "failed register driver: %d\n", ret); return ret; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e979ec7..4c78258 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -38,48 +38,10 @@ static inline bool policy_is_inactive(struct cpufreq_policy *policy) return cpumask_empty(policy->cpus); } -static bool suitable_policy(struct cpufreq_policy *policy, bool active) -{ - return active == !policy_is_inactive(policy); -} - -/* Finds Next Acive/Inactive policy */ -static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy, - bool active) -{ - do { - /* No more policies in the list */ - if (list_is_last(&policy->policy_list, &cpufreq_policy_list)) - return NULL; - - policy = list_next_entry(policy, policy_list); - } while (!suitable_policy(policy, active)); - - return policy; -} - -static struct cpufreq_policy *first_policy(bool active) -{ - struct cpufreq_policy *policy; - - /* No policies in the list */ - if (list_empty(&cpufreq_policy_list)) - return NULL; - - policy = list_first_entry(&cpufreq_policy_list, typeof(*policy), - policy_list); - - if (!suitable_policy(policy, active)) - policy = next_policy(policy, active); - - return policy; -} - /* Macros to iterate over CPU policies */ -#define for_each_suitable_policy(__policy, __active) \ - for (__policy = first_policy(__active); \ - __policy; \ - __policy = next_policy(__policy, __active)) +#define for_each_suitable_policy(__policy, __active) \ + list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \ + if ((__active) == !policy_is_inactive(__policy)) #define for_each_active_policy(__policy) \ for_each_suitable_policy(__policy, true) @@ -102,7 +64,6 @@ static LIST_HEAD(cpufreq_governor_list); static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_RWLOCK(cpufreq_driver_lock); -DEFINE_MUTEX(cpufreq_governor_lock); /* Flag to suspend/resume CPUFreq governors */ static bool cpufreq_suspended; @@ -113,10 +74,8 @@ static inline bool has_target(void) } /* internal prototypes */ -static int __cpufreq_governor(struct cpufreq_policy *policy, - unsigned int event); +static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); -static void handle_update(struct work_struct *work); /** * Two notifier lists: the "policy" list is involved in the @@ -818,12 +777,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) ssize_t ret; down_read(&policy->rwsem); - - if (fattr->show) - ret = fattr->show(policy, buf); - else - ret = -EIO; - + ret = fattr->show(policy, buf); up_read(&policy->rwsem); return ret; @@ -838,18 +792,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, get_online_cpus(); - if (!cpu_online(policy->cpu)) - goto unlock; - - down_write(&policy->rwsem); - - if (fattr->store) + if (cpu_online(policy->cpu)) { + down_write(&policy->rwsem); ret = fattr->store(policy, buf, count); - else - ret = -EIO; + up_write(&policy->rwsem); + } - up_write(&policy->rwsem); -unlock: put_online_cpus(); return ret; @@ -959,6 +907,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return cpufreq_add_dev_symlink(policy); } +__weak struct cpufreq_governor *cpufreq_default_governor(void) +{ + return NULL; +} + static int cpufreq_init_policy(struct cpufreq_policy *policy) { struct cpufreq_governor *gov = NULL; @@ -968,11 +921,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) /* Update governor of new_policy to the governor used before hotplug */ gov = find_governor(policy->last_governor); - if (gov) + if (gov) { pr_debug("Restoring governor %s for cpu %d\n", policy->governor->name, policy->cpu); - else - gov = CPUFREQ_DEFAULT_GOVERNOR; + } else { + gov = cpufreq_default_governor(); + if (!gov) + return -ENODATA; + } new_policy.governor = gov; @@ -996,36 +952,45 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp if (cpumask_test_cpu(cpu, policy->cpus)) return 0; + down_write(&policy->rwsem); if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) { pr_err("%s: Failed to stop governor\n", __func__); - return ret; + goto unlock; } } - down_write(&policy->rwsem); cpumask_set_cpu(cpu, policy->cpus); - up_write(&policy->rwsem); if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) - ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); - if (ret) { + if (ret) pr_err("%s: Failed to start governor\n", __func__); - return ret; - } } - return 0; +unlock: + up_write(&policy->rwsem); + return ret; +} + +static void handle_update(struct work_struct *work) +{ + struct cpufreq_policy *policy = + container_of(work, struct cpufreq_policy, update); + unsigned int cpu = policy->cpu; + pr_debug("handle_update for cpu %u called\n", cpu); + cpufreq_update_policy(cpu); } static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); struct cpufreq_policy *policy; + int ret; if (WARN_ON(!dev)) return NULL; @@ -1043,7 +1008,13 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) goto err_free_rcpumask; - kobject_init(&policy->kobj, &ktype_cpufreq); + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, + cpufreq_global_kobject, "policy%u", cpu); + if (ret) { + pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); + goto err_free_real_cpus; + } + INIT_LIST_HEAD(&policy->policy_list); init_rwsem(&policy->rwsem); spin_lock_init(&policy->transition_lock); @@ -1054,6 +1025,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) policy->cpu = cpu; return policy; +err_free_real_cpus: + free_cpumask_var(policy->real_cpus); err_free_rcpumask: free_cpumask_var(policy->related_cpus); err_free_cpumask: @@ -1158,16 +1131,6 @@ static int cpufreq_online(unsigned int cpu) cpumask_copy(policy->related_cpus, policy->cpus); /* Remember CPUs present at the policy creation time. */ cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); - - /* Name and add the kobject */ - ret = kobject_add(&policy->kobj, cpufreq_global_kobject, - "policy%u", - cpumask_first(policy->related_cpus)); - if (ret) { - pr_err("%s: failed to add policy->kobj: %d\n", __func__, - ret); - goto out_exit_policy; - } } /* @@ -1309,9 +1272,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return ret; } -static void cpufreq_offline_prepare(unsigned int cpu) +static void cpufreq_offline(unsigned int cpu) { struct cpufreq_policy *policy; + int ret; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); @@ -1321,13 +1285,13 @@ static void cpufreq_offline_prepare(unsigned int cpu) return; } + down_write(&policy->rwsem); if (has_target()) { - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } - down_write(&policy->rwsem); cpumask_clear_cpu(cpu, policy->cpus); if (policy_is_inactive(policy)) { @@ -1340,39 +1304,27 @@ static void cpufreq_offline_prepare(unsigned int cpu) /* Nominate new CPU */ policy->cpu = cpumask_any(policy->cpus); } - up_write(&policy->rwsem); /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { if (has_target()) { - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) - ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); if (ret) pr_err("%s: Failed to start governor\n", __func__); } - } else if (cpufreq_driver->stop_cpu) { - cpufreq_driver->stop_cpu(policy); - } -} -static void cpufreq_offline_finish(unsigned int cpu) -{ - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - - if (!policy) { - pr_debug("%s: No cpu_data found\n", __func__); - return; + goto unlock; } - /* Only proceed for inactive policies */ - if (!policy_is_inactive(policy)) - return; + if (cpufreq_driver->stop_cpu) + cpufreq_driver->stop_cpu(policy); /* If cpu is last user of policy, free policy */ if (has_target()) { - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1386,6 +1338,9 @@ static void cpufreq_offline_finish(unsigned int cpu) cpufreq_driver->exit(policy); policy->freq_table = NULL; } + +unlock: + up_write(&policy->rwsem); } /** @@ -1401,10 +1356,8 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (!policy) return; - if (cpu_online(cpu)) { - cpufreq_offline_prepare(cpu); - cpufreq_offline_finish(cpu); - } + if (cpu_online(cpu)) + cpufreq_offline(cpu); cpumask_clear_cpu(cpu, policy->real_cpus); remove_cpu_dev_symlink(policy, cpu); @@ -1413,15 +1366,6 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) cpufreq_policy_free(policy, true); } -static void handle_update(struct work_struct *work) -{ - struct cpufreq_policy *policy = - container_of(work, struct cpufreq_policy, update); - unsigned int cpu = policy->cpu; - pr_debug("handle_update for cpu %u called\n", cpu); - cpufreq_update_policy(cpu); -} - /** * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're * in deep trouble. @@ -1584,6 +1528,7 @@ EXPORT_SYMBOL(cpufreq_generic_suspend); void cpufreq_suspend(void) { struct cpufreq_policy *policy; + int ret; if (!cpufreq_driver) return; @@ -1594,7 +1539,11 @@ void cpufreq_suspend(void) pr_debug("%s: Suspending Governors\n", __func__); for_each_active_policy(policy) { - if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP)) + down_write(&policy->rwsem); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); + up_write(&policy->rwsem); + + if (ret) pr_err("%s: Failed to stop governor for policy: %p\n", __func__, policy); else if (cpufreq_driver->suspend @@ -1616,6 +1565,7 @@ suspend: void cpufreq_resume(void) { struct cpufreq_policy *policy; + int ret; if (!cpufreq_driver) return; @@ -1628,13 +1578,20 @@ void cpufreq_resume(void) pr_debug("%s: Resuming Governors\n", __func__); for_each_active_policy(policy) { - if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) + if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) { pr_err("%s: Failed to resume driver: %p\n", __func__, policy); - else if (__cpufreq_governor(policy, CPUFREQ_GOV_START) - || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS)) - pr_err("%s: Failed to start governor for policy: %p\n", - __func__, policy); + } else { + down_write(&policy->rwsem); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); + if (!ret) + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + up_write(&policy->rwsem); + + if (ret) + pr_err("%s: Failed to start governor for policy: %p\n", + __func__, policy); + } } /* @@ -1846,7 +1803,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation) { unsigned int old_target_freq = target_freq; - int retval = -EINVAL; + struct cpufreq_frequency_table *freq_table; + int index, retval; if (cpufreq_disabled()) return -ENODEV; @@ -1873,34 +1831,28 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, policy->restore_freq = policy->cur; if (cpufreq_driver->target) - retval = cpufreq_driver->target(policy, target_freq, relation); - else if (cpufreq_driver->target_index) { - struct cpufreq_frequency_table *freq_table; - int index; - - freq_table = cpufreq_frequency_get_table(policy->cpu); - if (unlikely(!freq_table)) { - pr_err("%s: Unable to find freq_table\n", __func__); - goto out; - } + return cpufreq_driver->target(policy, target_freq, relation); - retval = cpufreq_frequency_table_target(policy, freq_table, - target_freq, relation, &index); - if (unlikely(retval)) { - pr_err("%s: Unable to find matching freq\n", __func__); - goto out; - } + if (!cpufreq_driver->target_index) + return -EINVAL; - if (freq_table[index].frequency == policy->cur) { - retval = 0; - goto out; - } + freq_table = cpufreq_frequency_get_table(policy->cpu); + if (unlikely(!freq_table)) { + pr_err("%s: Unable to find freq_table\n", __func__); + return -EINVAL; + } - retval = __target_index(policy, freq_table, index); + retval = cpufreq_frequency_table_target(policy, freq_table, target_freq, + relation, &index); + if (unlikely(retval)) { + pr_err("%s: Unable to find matching freq\n", __func__); + return retval; } -out: - return retval; + if (freq_table[index].frequency == policy->cur) + return 0; + + return __target_index(policy, freq_table, index); } EXPORT_SYMBOL_GPL(__cpufreq_driver_target); @@ -1920,20 +1872,14 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_driver_target); -static int __cpufreq_governor(struct cpufreq_policy *policy, - unsigned int event) +__weak struct cpufreq_governor *cpufreq_fallback_governor(void) { - int ret; + return NULL; +} - /* Only must be defined when default governor is known to have latency - restrictions, like e.g. conservative or ondemand. - That this is the case is already ensured in Kconfig - */ -#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE - struct cpufreq_governor *gov = &cpufreq_gov_performance; -#else - struct cpufreq_governor *gov = NULL; -#endif +static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) +{ + int ret; /* Don't start any governor operations if we are entering suspend */ if (cpufreq_suspended) @@ -1948,12 +1894,14 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, if (policy->governor->max_transition_latency && policy->cpuinfo.transition_latency > policy->governor->max_transition_latency) { - if (!gov) - return -EINVAL; - else { + struct cpufreq_governor *gov = cpufreq_fallback_governor(); + + if (gov) { pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", policy->governor->name, gov->name); policy->governor = gov; + } else { + return -EINVAL; } } @@ -1963,21 +1911,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); - mutex_lock(&cpufreq_governor_lock); - if ((policy->governor_enabled && event == CPUFREQ_GOV_START) - || (!policy->governor_enabled - && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) { - mutex_unlock(&cpufreq_governor_lock); - return -EBUSY; - } - - if (event == CPUFREQ_GOV_STOP) - policy->governor_enabled = false; - else if (event == CPUFREQ_GOV_START) - policy->governor_enabled = true; - - mutex_unlock(&cpufreq_governor_lock); - ret = policy->governor->governor(policy, event); if (!ret) { @@ -1985,14 +1918,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, policy->governor->initialized++; else if (event == CPUFREQ_GOV_POLICY_EXIT) policy->governor->initialized--; - } else { - /* Restore original values */ - mutex_lock(&cpufreq_governor_lock); - if (event == CPUFREQ_GOV_STOP) - policy->governor_enabled = true; - else if (event == CPUFREQ_GOV_START) - policy->governor_enabled = false; - mutex_unlock(&cpufreq_governor_lock); } if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || @@ -2147,7 +2072,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, old_gov = policy->governor; /* end old governor */ if (old_gov) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) { /* This can happen due to race with other operations */ pr_debug("%s: Failed to Stop Governor: %s (%d)\n", @@ -2155,10 +2080,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, return ret; } - up_write(&policy->rwsem); - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - down_write(&policy->rwsem); - + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) { pr_err("%s: Failed to Exit Governor: %s (%d)\n", __func__, old_gov->name, ret); @@ -2168,32 +2090,30 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, /* start new governor */ policy->governor = new_policy->governor; - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); if (!ret) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) goto out; - up_write(&policy->rwsem); - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - down_write(&policy->rwsem); + cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); } /* new governor failed, so re-start old one */ pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) + if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) policy->governor = NULL; else - __cpufreq_governor(policy, CPUFREQ_GOV_START); + cpufreq_governor(policy, CPUFREQ_GOV_START); } return ret; out: pr_debug("governor: change or update limits\n"); - return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); } /** @@ -2260,11 +2180,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - cpufreq_offline_prepare(cpu); - break; - - case CPU_POST_DEAD: - cpufreq_offline_finish(cpu); + cpufreq_offline(cpu); break; case CPU_DOWN_FAILED: @@ -2297,8 +2213,11 @@ static int cpufreq_boost_set_sw(int state) __func__); break; } + + down_write(&policy->rwsem); policy->user_policy.max = policy->max; - __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + up_write(&policy->rwsem); } } @@ -2384,7 +2303,7 @@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled); * submitted by the CPU Frequency driver. * * Registers a CPU Frequency driver to this core code. This code - * returns zero on success, -EBUSY when another driver got here first + * returns zero on success, -EEXIST when another driver got here first * (and isn't unregistered in the meantime). * */ diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 606ad74ab..bf4913f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -14,6 +14,22 @@ #include <linux/slab.h> #include "cpufreq_governor.h" +struct cs_policy_dbs_info { + struct policy_dbs_info policy_dbs; + unsigned int down_skip; + unsigned int requested_freq; +}; + +static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) +{ + return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs); +} + +struct cs_dbs_tuners { + unsigned int down_threshold; + unsigned int freq_step; +}; + /* Conservative governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) @@ -21,21 +37,6 @@ #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) -static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); - -static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event); - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE -static -#endif -struct cpufreq_governor cpufreq_gov_conservative = { - .name = "conservative", - .governor = cs_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, struct cpufreq_policy *policy) { @@ -57,27 +58,28 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, * Any frequency increase takes it to the maximum frequency. Frequency reduction * happens at minimum steps of 5% (default) of maximum frequency */ -static void cs_check_cpu(int cpu, unsigned int load) +static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) { - struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int load = dbs_update(policy); /* * break out if we 'cannot' reduce the speed as the user might * want freq_step to be zero */ if (cs_tuners->freq_step == 0) - return; + goto out; /* Check for frequency increase */ - if (load > cs_tuners->up_threshold) { + if (load > dbs_data->up_threshold) { dbs_info->down_skip = 0; /* if we are already at full speed then break out early */ if (dbs_info->requested_freq == policy->max) - return; + goto out; dbs_info->requested_freq += get_freq_target(cs_tuners, policy); @@ -86,12 +88,12 @@ static void cs_check_cpu(int cpu, unsigned int load) __cpufreq_driver_target(policy, dbs_info->requested_freq, CPUFREQ_RELATION_H); - return; + goto out; } /* if sampling_down_factor is active break out early */ - if (++dbs_info->down_skip < cs_tuners->sampling_down_factor) - return; + if (++dbs_info->down_skip < dbs_data->sampling_down_factor) + goto out; dbs_info->down_skip = 0; /* Check for frequency decrease */ @@ -101,7 +103,7 @@ static void cs_check_cpu(int cpu, unsigned int load) * if we cannot reduce the frequency anymore, break out early */ if (policy->cur == policy->min) - return; + goto out; freq_target = get_freq_target(cs_tuners, policy); if (dbs_info->requested_freq > freq_target) @@ -111,58 +113,25 @@ static void cs_check_cpu(int cpu, unsigned int load) __cpufreq_driver_target(policy, dbs_info->requested_freq, CPUFREQ_RELATION_L); - return; } -} - -static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all) -{ - struct dbs_data *dbs_data = policy->governor_data; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - - if (modify_all) - dbs_check_cpu(dbs_data, policy->cpu); - return delay_for_sampling_rate(cs_tuners->sampling_rate); + out: + return dbs_data->sampling_rate; } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - struct cs_cpu_dbs_info_s *dbs_info = - &per_cpu(cs_cpu_dbs_info, freq->cpu); - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); - - if (!policy) - return 0; - - /* policy isn't governed by conservative governor */ - if (policy->governor != &cpufreq_gov_conservative) - return 0; - - /* - * we only care if our internally tracked freq moves outside the 'valid' - * ranges of frequency available to us otherwise we do not change it - */ - if (dbs_info->requested_freq > policy->max - || dbs_info->requested_freq < policy->min) - dbs_info->requested_freq = freq->new; - - return 0; -} + void *data); static struct notifier_block cs_cpufreq_notifier_block = { .notifier_call = dbs_cpufreq_notifier, }; /************************** sysfs interface ************************/ -static struct common_dbs_data cs_dbs_cdata; +static struct dbs_governor cs_dbs_gov; static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -170,22 +139,7 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - cs_tuners->sampling_down_factor = input; - return count; -} - -static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, - size_t count) -{ - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - unsigned int input; - int ret; - ret = sscanf(buf, "%u", &input); - - if (ret != 1) - return -EINVAL; - - cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate); + dbs_data->sampling_down_factor = input; return count; } @@ -200,7 +154,7 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) return -EINVAL; - cs_tuners->up_threshold = input; + dbs_data->up_threshold = input; return count; } @@ -214,7 +168,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, /* cannot be lower than 11 otherwise freq will not fall */ if (ret != 1 || input < 11 || input > 100 || - input >= cs_tuners->up_threshold) + input >= dbs_data->up_threshold) return -EINVAL; cs_tuners->down_threshold = input; @@ -224,8 +178,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - unsigned int input, j; + unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -235,21 +188,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, if (input > 1) input = 1; - if (input == cs_tuners->ignore_nice_load) /* nothing to do */ + if (input == dbs_data->ignore_nice_load) /* nothing to do */ return count; - cs_tuners->ignore_nice_load = input; + dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - for_each_online_cpu(j) { - struct cs_cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(cs_cpu_dbs_info, j); - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, 0); - if (cs_tuners->ignore_nice_load) - dbs_info->cdbs.prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - } + gov_update_cpu_data(dbs_data); + return count; } @@ -275,55 +221,47 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, return count; } -show_store_one(cs, sampling_rate); -show_store_one(cs, sampling_down_factor); -show_store_one(cs, up_threshold); -show_store_one(cs, down_threshold); -show_store_one(cs, ignore_nice_load); -show_store_one(cs, freq_step); -declare_show_sampling_rate_min(cs); - -gov_sys_pol_attr_rw(sampling_rate); -gov_sys_pol_attr_rw(sampling_down_factor); -gov_sys_pol_attr_rw(up_threshold); -gov_sys_pol_attr_rw(down_threshold); -gov_sys_pol_attr_rw(ignore_nice_load); -gov_sys_pol_attr_rw(freq_step); -gov_sys_pol_attr_ro(sampling_rate_min); - -static struct attribute *dbs_attributes_gov_sys[] = { - &sampling_rate_min_gov_sys.attr, - &sampling_rate_gov_sys.attr, - &sampling_down_factor_gov_sys.attr, - &up_threshold_gov_sys.attr, - &down_threshold_gov_sys.attr, - &ignore_nice_load_gov_sys.attr, - &freq_step_gov_sys.attr, +gov_show_one_common(sampling_rate); +gov_show_one_common(sampling_down_factor); +gov_show_one_common(up_threshold); +gov_show_one_common(ignore_nice_load); +gov_show_one_common(min_sampling_rate); +gov_show_one(cs, down_threshold); +gov_show_one(cs, freq_step); + +gov_attr_rw(sampling_rate); +gov_attr_rw(sampling_down_factor); +gov_attr_rw(up_threshold); +gov_attr_rw(ignore_nice_load); +gov_attr_ro(min_sampling_rate); +gov_attr_rw(down_threshold); +gov_attr_rw(freq_step); + +static struct attribute *cs_attributes[] = { + &min_sampling_rate.attr, + &sampling_rate.attr, + &sampling_down_factor.attr, + &up_threshold.attr, + &down_threshold.attr, + &ignore_nice_load.attr, + &freq_step.attr, NULL }; -static struct attribute_group cs_attr_group_gov_sys = { - .attrs = dbs_attributes_gov_sys, - .name = "conservative", -}; +/************************** sysfs end ************************/ -static struct attribute *dbs_attributes_gov_pol[] = { - &sampling_rate_min_gov_pol.attr, - &sampling_rate_gov_pol.attr, - &sampling_down_factor_gov_pol.attr, - &up_threshold_gov_pol.attr, - &down_threshold_gov_pol.attr, - &ignore_nice_load_gov_pol.attr, - &freq_step_gov_pol.attr, - NULL -}; +static struct policy_dbs_info *cs_alloc(void) +{ + struct cs_policy_dbs_info *dbs_info; -static struct attribute_group cs_attr_group_gov_pol = { - .attrs = dbs_attributes_gov_pol, - .name = "conservative", -}; + dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); + return dbs_info ? &dbs_info->policy_dbs : NULL; +} -/************************** sysfs end ************************/ +static void cs_free(struct policy_dbs_info *policy_dbs) +{ + kfree(to_dbs_info(policy_dbs)); +} static int cs_init(struct dbs_data *dbs_data, bool notify) { @@ -335,11 +273,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) return -ENOMEM; } - tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; - tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice_load = 0; tuners->freq_step = DEF_FREQUENCY_STEP; + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + dbs_data->ignore_nice_load = 0; dbs_data->tuners = tuners; dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * @@ -361,35 +299,66 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) kfree(dbs_data->tuners); } -define_get_cpu_dbs_routines(cs_cpu_dbs_info); +static void cs_start(struct cpufreq_policy *policy) +{ + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); + + dbs_info->down_skip = 0; + dbs_info->requested_freq = policy->cur; +} -static struct common_dbs_data cs_dbs_cdata = { - .governor = GOV_CONSERVATIVE, - .attr_group_gov_sys = &cs_attr_group_gov_sys, - .attr_group_gov_pol = &cs_attr_group_gov_pol, - .get_cpu_cdbs = get_cpu_cdbs, - .get_cpu_dbs_info_s = get_cpu_dbs_info_s, +static struct dbs_governor cs_dbs_gov = { + .gov = { + .name = "conservative", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, + }, + .kobj_type = { .default_attrs = cs_attributes }, .gov_dbs_timer = cs_dbs_timer, - .gov_check_cpu = cs_check_cpu, + .alloc = cs_alloc, + .free = cs_free, .init = cs_init, .exit = cs_exit, - .mutex = __MUTEX_INITIALIZER(cs_dbs_cdata.mutex), + .start = cs_start, }; -static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event) +#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) + +static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) { - return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); + struct cpufreq_freqs *freq = data; + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); + struct cs_policy_dbs_info *dbs_info; + + if (!policy) + return 0; + + /* policy isn't governed by conservative governor */ + if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE) + return 0; + + dbs_info = to_dbs_info(policy->governor_data); + /* + * we only care if our internally tracked freq moves outside the 'valid' + * ranges of frequency available to us otherwise we do not change it + */ + if (dbs_info->requested_freq > policy->max + || dbs_info->requested_freq < policy->min) + dbs_info->requested_freq = freq->new; + + return 0; } static int __init cpufreq_gov_dbs_init(void) { - return cpufreq_register_governor(&cpufreq_gov_conservative); + return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE); } static void __exit cpufreq_gov_dbs_exit(void) { - cpufreq_unregister_governor(&cpufreq_gov_conservative); + cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE); } MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); @@ -399,6 +368,11 @@ MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return CPU_FREQ_GOV_CONSERVATIVE; +} + fs_initcall(cpufreq_gov_dbs_init); #else module_init(cpufreq_gov_dbs_init); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e0d1110..1c25ef4 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -18,95 +18,193 @@ #include <linux/export.h> #include <linux/kernel_stat.h> +#include <linux/sched.h> #include <linux/slab.h> #include "cpufreq_governor.h" -static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) -{ - if (have_governor_per_policy()) - return dbs_data->cdata->attr_group_gov_pol; - else - return dbs_data->cdata->attr_group_gov_sys; -} +static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); + +static DEFINE_MUTEX(gov_dbs_data_mutex); -void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) +/* Common sysfs tunables */ +/** + * store_sampling_rate - update sampling rate effective immediately if needed. + * + * If new rate is smaller than the old, simply updating + * dbs.sampling_rate might not be appropriate. For example, if the + * original sampling_rate was 1 second and the requested new sampling rate is 10 + * ms because the user needs immediate reaction from ondemand governor, but not + * sure if higher frequency will be required or not, then, the governor may + * change the sampling rate too late; up to 1 second later. Thus, if we are + * reducing the sampling rate, we need to make the new value effective + * immediately. + * + * This must be called with dbs_data->mutex held, otherwise traversing + * policy_dbs_list isn't safe. + */ +ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count) { - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - struct cpufreq_policy *policy = cdbs->shared->policy; - unsigned int sampling_rate; - unsigned int max_load = 0; - unsigned int ignore_nice; - unsigned int j; + struct policy_dbs_info *policy_dbs; + unsigned int rate; + int ret; + ret = sscanf(buf, "%u", &rate); + if (ret != 1) + return -EINVAL; - if (dbs_data->cdata->governor == GOV_ONDEMAND) { - struct od_cpu_dbs_info_s *od_dbs_info = - dbs_data->cdata->get_cpu_dbs_info_s(cpu); + dbs_data->sampling_rate = max(rate, dbs_data->min_sampling_rate); + /* + * We are operating under dbs_data->mutex and so the list and its + * entries can't be freed concurrently. + */ + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + mutex_lock(&policy_dbs->timer_mutex); /* - * Sometimes, the ondemand governor uses an additional - * multiplier to give long delays. So apply this multiplier to - * the 'sampling_rate', so as to keep the wake-up-from-idle - * detection logic a bit conservative. + * On 32-bit architectures this may race with the + * sample_delay_ns read in dbs_update_util_handler(), but that + * really doesn't matter. If the read returns a value that's + * too big, the sample will be skipped, but the next invocation + * of dbs_update_util_handler() (when the update has been + * completed) will take a sample. + * + * If this runs in parallel with dbs_work_handler(), we may end + * up overwriting the sample_delay_ns value that it has just + * written, but it will be corrected next time a sample is + * taken, so it shouldn't be significant. */ - sampling_rate = od_tuners->sampling_rate; - sampling_rate *= od_dbs_info->rate_mult; + gov_update_sample_delay(policy_dbs, 0); + mutex_unlock(&policy_dbs->timer_mutex); + } - ignore_nice = od_tuners->ignore_nice_load; - } else { - sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice_load; + return count; +} +EXPORT_SYMBOL_GPL(store_sampling_rate); + +/** + * gov_update_cpu_data - Update CPU load data. + * @dbs_data: Top-level governor data pointer. + * + * Update CPU load data for all CPUs in the domain governed by @dbs_data + * (that may be a single policy or a bunch of them if governor tunables are + * system-wide). + * + * Call under the @dbs_data mutex. + */ +void gov_update_cpu_data(struct dbs_data *dbs_data) +{ + struct policy_dbs_info *policy_dbs; + + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + unsigned int j; + + for_each_cpu(j, policy_dbs->policy->cpus) { + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); + + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, + dbs_data->io_is_busy); + if (dbs_data->ignore_nice_load) + j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } } +} +EXPORT_SYMBOL_GPL(gov_update_cpu_data); + +static inline struct dbs_data *to_dbs_data(struct kobject *kobj) +{ + return container_of(kobj, struct dbs_data, kobj); +} + +static inline struct governor_attr *to_gov_attr(struct attribute *attr) +{ + return container_of(attr, struct governor_attr, attr); +} + +static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct dbs_data *dbs_data = to_dbs_data(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + + return gattr->show(dbs_data, buf); +} + +static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct dbs_data *dbs_data = to_dbs_data(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret = -EBUSY; + + mutex_lock(&dbs_data->mutex); + + if (dbs_data->usage_count) + ret = gattr->store(dbs_data, buf, count); + + mutex_unlock(&dbs_data->mutex); + + return ret; +} + +/* + * Sysfs Ops for accessing governor attributes. + * + * All show/store invocations for governor specific sysfs attributes, will first + * call the below show/store callbacks and the attribute specific callback will + * be called from within it. + */ +static const struct sysfs_ops governor_sysfs_ops = { + .show = governor_show, + .store = governor_store, +}; + +unsigned int dbs_update(struct cpufreq_policy *policy) +{ + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; + unsigned int ignore_nice = dbs_data->ignore_nice_load; + unsigned int max_load = 0; + unsigned int sampling_rate, io_busy, j; + + /* + * Sometimes governors may use an additional multiplier to increase + * sample delays temporarily. Apply that multiplier to sampling_rate + * so as to keep the wake-up-from-idle detection logic a bit + * conservative. + */ + sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult; + /* + * For the purpose of ondemand, waiting for disk IO is an indication + * that you're performance critical, and not that the system is actually + * idle, so do not add the iowait time to the CPU idle time then. + */ + io_busy = dbs_data->io_is_busy; /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info *j_cdbs; + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); u64 cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; unsigned int load; - int io_busy = 0; - - j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); - /* - * For the purpose of ondemand, waiting for disk IO is - * an indication that you're performance critical, and - * not that the system is actually idle. So do not add - * the iowait time to the cpu idle time. - */ - if (dbs_data->cdata->governor == GOV_ONDEMAND) - io_busy = od_tuners->io_is_busy; cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); - wall_time = (unsigned int) - (cur_wall_time - j_cdbs->prev_cpu_wall); + wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; j_cdbs->prev_cpu_wall = cur_wall_time; - if (cur_idle_time < j_cdbs->prev_cpu_idle) - cur_idle_time = j_cdbs->prev_cpu_idle; - - idle_time = (unsigned int) - (cur_idle_time - j_cdbs->prev_cpu_idle); - j_cdbs->prev_cpu_idle = cur_idle_time; + if (cur_idle_time <= j_cdbs->prev_cpu_idle) { + idle_time = 0; + } else { + idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; + j_cdbs->prev_cpu_idle = cur_idle_time; + } if (ignore_nice) { - u64 cur_nice; - unsigned long cur_nice_jiffies; - - cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - - cdbs->prev_cpu_nice; - /* - * Assumption: nice time between sampling periods will - * be less than 2^32 jiffies for 32 bit sys - */ - cur_nice_jiffies = (unsigned long) - cputime64_to_jiffies64(cur_nice); + u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - cdbs->prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - idle_time += jiffies_to_usecs(cur_nice_jiffies); + idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice); + j_cdbs->prev_cpu_nice = cur_nice; } if (unlikely(!wall_time || wall_time < idle_time)) @@ -128,10 +226,10 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) * dropped down. So we perform the copy only once, upon the * first wake-up from idle.) * - * Detecting this situation is easy: the governor's deferrable - * timer would not have fired during CPU-idle periods. Hence - * an unusually large 'wall_time' (as compared to the sampling - * rate) indicates this scenario. + * Detecting this situation is easy: the governor's utilization + * update handler would not have run during CPU-idle periods. + * Hence, an unusually large 'wall_time' (as compared to the + * sampling rate) indicates this scenario. * * prev_load can be zero in two cases and we must recalculate it * for both cases: @@ -156,222 +254,224 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) if (load > max_load) max_load = load; } - - dbs_data->cdata->gov_check_cpu(cpu, max_load); + return max_load; } -EXPORT_SYMBOL_GPL(dbs_check_cpu); +EXPORT_SYMBOL_GPL(dbs_update); -void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay) +static void gov_set_update_util(struct policy_dbs_info *policy_dbs, + unsigned int delay_us) { - struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs; + struct cpufreq_policy *policy = policy_dbs->policy; int cpu; + gov_update_sample_delay(policy_dbs, delay_us); + policy_dbs->last_sample_time = 0; + for_each_cpu(cpu, policy->cpus) { - cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - cdbs->timer.expires = jiffies + delay; - add_timer_on(&cdbs->timer, cpu); + struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu); + + cpufreq_set_update_util_data(cpu, &cdbs->update_util); } } -EXPORT_SYMBOL_GPL(gov_add_timers); -static inline void gov_cancel_timers(struct cpufreq_policy *policy) +static inline void gov_clear_update_util(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; - struct cpu_dbs_info *cdbs; int i; - for_each_cpu(i, policy->cpus) { - cdbs = dbs_data->cdata->get_cpu_cdbs(i); - del_timer_sync(&cdbs->timer); - } -} + for_each_cpu(i, policy->cpus) + cpufreq_set_update_util_data(i, NULL); -void gov_cancel_work(struct cpu_common_dbs_info *shared) -{ - /* Tell dbs_timer_handler() to skip queuing up work items. */ - atomic_inc(&shared->skip_work); - /* - * If dbs_timer_handler() is already running, it may not notice the - * incremented skip_work, so wait for it to complete to prevent its work - * item from being queued up after the cancel_work_sync() below. - */ - gov_cancel_timers(shared->policy); - /* - * In case dbs_timer_handler() managed to run and spawn a work item - * before the timers have been canceled, wait for that work item to - * complete and then cancel all of the timers set up by it. If - * dbs_timer_handler() runs again at that point, it will see the - * positive value of skip_work and won't spawn any more work items. - */ - cancel_work_sync(&shared->work); - gov_cancel_timers(shared->policy); - atomic_set(&shared->skip_work, 0); + synchronize_sched(); } -EXPORT_SYMBOL_GPL(gov_cancel_work); -/* Will return if we need to evaluate cpu load again or not */ -static bool need_load_eval(struct cpu_common_dbs_info *shared, - unsigned int sampling_rate) +static void gov_cancel_work(struct cpufreq_policy *policy) { - if (policy_is_shared(shared->policy)) { - ktime_t time_now = ktime_get(); - s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); - - /* Do nothing if we recently have sampled */ - if (delta_us < (s64)(sampling_rate / 2)) - return false; - else - shared->time_stamp = time_now; - } + struct policy_dbs_info *policy_dbs = policy->governor_data; - return true; + gov_clear_update_util(policy_dbs->policy); + irq_work_sync(&policy_dbs->irq_work); + cancel_work_sync(&policy_dbs->work); + atomic_set(&policy_dbs->work_count, 0); + policy_dbs->work_in_progress = false; } static void dbs_work_handler(struct work_struct *work) { - struct cpu_common_dbs_info *shared = container_of(work, struct - cpu_common_dbs_info, work); + struct policy_dbs_info *policy_dbs; struct cpufreq_policy *policy; - struct dbs_data *dbs_data; - unsigned int sampling_rate, delay; - bool eval_load; - - policy = shared->policy; - dbs_data = policy->governor_data; + struct dbs_governor *gov; - /* Kill all timers */ - gov_cancel_timers(policy); + policy_dbs = container_of(work, struct policy_dbs_info, work); + policy = policy_dbs->policy; + gov = dbs_governor_of(policy); - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - - sampling_rate = cs_tuners->sampling_rate; - } else { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - - sampling_rate = od_tuners->sampling_rate; - } - - eval_load = need_load_eval(shared, sampling_rate); + /* + * Make sure cpufreq_governor_limits() isn't evaluating load or the + * ondemand governor isn't updating the sampling rate in parallel. + */ + mutex_lock(&policy_dbs->timer_mutex); + gov_update_sample_delay(policy_dbs, gov->gov_dbs_timer(policy)); + mutex_unlock(&policy_dbs->timer_mutex); + /* Allow the utilization update handler to queue up more work. */ + atomic_set(&policy_dbs->work_count, 0); /* - * Make sure cpufreq_governor_limits() isn't evaluating load in - * parallel. + * If the update below is reordered with respect to the sample delay + * modification, the utilization update handler may end up using a stale + * sample delay value. */ - mutex_lock(&shared->timer_mutex); - delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load); - mutex_unlock(&shared->timer_mutex); + smp_wmb(); + policy_dbs->work_in_progress = false; +} - atomic_dec(&shared->skip_work); +static void dbs_irq_work(struct irq_work *irq_work) +{ + struct policy_dbs_info *policy_dbs; - gov_add_timers(policy, delay); + policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work); + schedule_work(&policy_dbs->work); } -static void dbs_timer_handler(unsigned long data) +static void dbs_update_util_handler(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max) { - struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data; - struct cpu_common_dbs_info *shared = cdbs->shared; + struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; + u64 delta_ns, lst; /* - * Timer handler may not be allowed to queue the work at the moment, - * because: - * - Another timer handler has done that - * - We are stopping the governor - * - Or we are updating the sampling rate of the ondemand governor + * The work may not be allowed to be queued up right now. + * Possible reasons: + * - Work has already been queued up or is in progress. + * - It is too early (too little time from the previous sample). */ - if (atomic_inc_return(&shared->skip_work) > 1) - atomic_dec(&shared->skip_work); - else - queue_work(system_wq, &shared->work); -} + if (policy_dbs->work_in_progress) + return; -static void set_sampling_rate(struct dbs_data *dbs_data, - unsigned int sampling_rate) -{ - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - cs_tuners->sampling_rate = sampling_rate; - } else { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - od_tuners->sampling_rate = sampling_rate; + /* + * If the reads below are reordered before the check above, the value + * of sample_delay_ns used in the computation may be stale. + */ + smp_rmb(); + lst = READ_ONCE(policy_dbs->last_sample_time); + delta_ns = time - lst; + if ((s64)delta_ns < policy_dbs->sample_delay_ns) + return; + + /* + * If the policy is not shared, the irq_work may be queued up right away + * at this point. Otherwise, we need to ensure that only one of the + * CPUs sharing the policy will do that. + */ + if (policy_dbs->is_shared) { + if (!atomic_add_unless(&policy_dbs->work_count, 1, 1)) + return; + + /* + * If another CPU updated last_sample_time in the meantime, we + * shouldn't be here, so clear the work counter and bail out. + */ + if (unlikely(lst != READ_ONCE(policy_dbs->last_sample_time))) { + atomic_set(&policy_dbs->work_count, 0); + return; + } } + + policy_dbs->last_sample_time = time; + policy_dbs->work_in_progress = true; + irq_work_queue(&policy_dbs->irq_work); } -static int alloc_common_dbs_info(struct cpufreq_policy *policy, - struct common_dbs_data *cdata) +static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, + struct dbs_governor *gov) { - struct cpu_common_dbs_info *shared; + struct policy_dbs_info *policy_dbs; int j; - /* Allocate memory for the common information for policy->cpus */ - shared = kzalloc(sizeof(*shared), GFP_KERNEL); - if (!shared) - return -ENOMEM; + /* Allocate memory for per-policy governor data. */ + policy_dbs = gov->alloc(); + if (!policy_dbs) + return NULL; - /* Set shared for all CPUs, online+offline */ - for_each_cpu(j, policy->related_cpus) - cdata->get_cpu_cdbs(j)->shared = shared; + policy_dbs->policy = policy; + mutex_init(&policy_dbs->timer_mutex); + atomic_set(&policy_dbs->work_count, 0); + init_irq_work(&policy_dbs->irq_work, dbs_irq_work); + INIT_WORK(&policy_dbs->work, dbs_work_handler); - mutex_init(&shared->timer_mutex); - atomic_set(&shared->skip_work, 0); - INIT_WORK(&shared->work, dbs_work_handler); - return 0; + /* Set policy_dbs for all CPUs, online+offline */ + for_each_cpu(j, policy->related_cpus) { + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); + + j_cdbs->policy_dbs = policy_dbs; + j_cdbs->update_util.func = dbs_update_util_handler; + } + return policy_dbs; } -static void free_common_dbs_info(struct cpufreq_policy *policy, - struct common_dbs_data *cdata) +static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, + struct dbs_governor *gov) { - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); - struct cpu_common_dbs_info *shared = cdbs->shared; int j; - mutex_destroy(&shared->timer_mutex); + mutex_destroy(&policy_dbs->timer_mutex); - for_each_cpu(j, policy->cpus) - cdata->get_cpu_cdbs(j)->shared = NULL; + for_each_cpu(j, policy_dbs->policy->related_cpus) { + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); - kfree(shared); + j_cdbs->policy_dbs = NULL; + j_cdbs->update_util.func = NULL; + } + gov->free(policy_dbs); } -static int cpufreq_governor_init(struct cpufreq_policy *policy, - struct dbs_data *dbs_data, - struct common_dbs_data *cdata) +static int cpufreq_governor_init(struct cpufreq_policy *policy) { + struct dbs_governor *gov = dbs_governor_of(policy); + struct dbs_data *dbs_data; + struct policy_dbs_info *policy_dbs; unsigned int latency; - int ret; + int ret = 0; /* State should be equivalent to EXIT */ if (policy->governor_data) return -EBUSY; - if (dbs_data) { - if (WARN_ON(have_governor_per_policy())) - return -EINVAL; + policy_dbs = alloc_policy_dbs_info(policy, gov); + if (!policy_dbs) + return -ENOMEM; - ret = alloc_common_dbs_info(policy, cdata); - if (ret) - return ret; + /* Protect gov->gdbs_data against concurrent updates. */ + mutex_lock(&gov_dbs_data_mutex); + dbs_data = gov->gdbs_data; + if (dbs_data) { + if (WARN_ON(have_governor_per_policy())) { + ret = -EINVAL; + goto free_policy_dbs_info; + } + policy_dbs->dbs_data = dbs_data; + policy->governor_data = policy_dbs; + + mutex_lock(&dbs_data->mutex); dbs_data->usage_count++; - policy->governor_data = dbs_data; - return 0; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + mutex_unlock(&dbs_data->mutex); + goto out; } dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); - if (!dbs_data) - return -ENOMEM; - - ret = alloc_common_dbs_info(policy, cdata); - if (ret) - goto free_dbs_data; + if (!dbs_data) { + ret = -ENOMEM; + goto free_policy_dbs_info; + } - dbs_data->cdata = cdata; - dbs_data->usage_count = 1; + INIT_LIST_HEAD(&dbs_data->policy_dbs_list); + mutex_init(&dbs_data->mutex); - ret = cdata->init(dbs_data, !policy->governor->initialized); + ret = gov->init(dbs_data, !policy->governor->initialized); if (ret) - goto free_common_dbs_info; + goto free_policy_dbs_info; /* policy latency is in ns. Convert it to us first */ latency = policy->cpuinfo.transition_latency / 1000; @@ -381,216 +481,156 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, /* Bring kernel and HW constraints together */ dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, MIN_LATENCY_MULTIPLIER * latency); - set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, - latency * LATENCY_MULTIPLIER)); + dbs_data->sampling_rate = max(dbs_data->min_sampling_rate, + LATENCY_MULTIPLIER * latency); if (!have_governor_per_policy()) - cdata->gdbs_data = dbs_data; + gov->gdbs_data = dbs_data; - policy->governor_data = dbs_data; + policy->governor_data = policy_dbs; - ret = sysfs_create_group(get_governor_parent_kobj(policy), - get_sysfs_attr(dbs_data)); - if (ret) - goto reset_gdbs_data; + policy_dbs->dbs_data = dbs_data; + dbs_data->usage_count = 1; + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); - return 0; + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; + ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, + get_governor_parent_kobj(policy), + "%s", gov->gov.name); + if (!ret) + goto out; + + /* Failure, so roll back. */ + pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret); -reset_gdbs_data: policy->governor_data = NULL; if (!have_governor_per_policy()) - cdata->gdbs_data = NULL; - cdata->exit(dbs_data, !policy->governor->initialized); -free_common_dbs_info: - free_common_dbs_info(policy, cdata); -free_dbs_data: + gov->gdbs_data = NULL; + gov->exit(dbs_data, !policy->governor->initialized); kfree(dbs_data); + +free_policy_dbs_info: + free_policy_dbs_info(policy_dbs, gov); + +out: + mutex_unlock(&gov_dbs_data_mutex); return ret; } -static int cpufreq_governor_exit(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_exit(struct cpufreq_policy *policy) { - struct common_dbs_data *cdata = dbs_data->cdata; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + struct dbs_governor *gov = dbs_governor_of(policy); + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; + int count; - /* State should be equivalent to INIT */ - if (!cdbs->shared || cdbs->shared->policy) - return -EBUSY; + /* Protect gov->gdbs_data against concurrent updates. */ + mutex_lock(&gov_dbs_data_mutex); + + mutex_lock(&dbs_data->mutex); + list_del(&policy_dbs->list); + count = --dbs_data->usage_count; + mutex_unlock(&dbs_data->mutex); - if (!--dbs_data->usage_count) { - sysfs_remove_group(get_governor_parent_kobj(policy), - get_sysfs_attr(dbs_data)); + if (!count) { + kobject_put(&dbs_data->kobj); policy->governor_data = NULL; if (!have_governor_per_policy()) - cdata->gdbs_data = NULL; + gov->gdbs_data = NULL; - cdata->exit(dbs_data, policy->governor->initialized == 1); + gov->exit(dbs_data, policy->governor->initialized == 1); + mutex_destroy(&dbs_data->mutex); kfree(dbs_data); } else { policy->governor_data = NULL; } - free_common_dbs_info(policy, cdata); + free_policy_dbs_info(policy_dbs, gov); + + mutex_unlock(&gov_dbs_data_mutex); return 0; } -static int cpufreq_governor_start(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_start(struct cpufreq_policy *policy) { - struct common_dbs_data *cdata = dbs_data->cdata; - unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); - struct cpu_common_dbs_info *shared = cdbs->shared; - int io_busy = 0; + struct dbs_governor *gov = dbs_governor_of(policy); + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; + unsigned int sampling_rate, ignore_nice, j; + unsigned int io_busy; if (!policy->cur) return -EINVAL; - /* State should be equivalent to INIT */ - if (!shared || shared->policy) - return -EBUSY; + policy_dbs->is_shared = policy_is_shared(policy); + policy_dbs->rate_mult = 1; - if (cdata->governor == GOV_CONSERVATIVE) { - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - - sampling_rate = cs_tuners->sampling_rate; - ignore_nice = cs_tuners->ignore_nice_load; - } else { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - - sampling_rate = od_tuners->sampling_rate; - ignore_nice = od_tuners->ignore_nice_load; - io_busy = od_tuners->io_is_busy; - } - - shared->policy = policy; - shared->time_stamp = ktime_get(); + sampling_rate = dbs_data->sampling_rate; + ignore_nice = dbs_data->ignore_nice_load; + io_busy = dbs_data->io_is_busy; for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); unsigned int prev_load; - j_cdbs->prev_cpu_idle = - get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); - prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - - j_cdbs->prev_cpu_idle); - j_cdbs->prev_load = 100 * prev_load / - (unsigned int)j_cdbs->prev_cpu_wall; + prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle; + j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall; if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - - __setup_timer(&j_cdbs->timer, dbs_timer_handler, - (unsigned long)j_cdbs, - TIMER_DEFERRABLE | TIMER_IRQSAFE); } - if (cdata->governor == GOV_CONSERVATIVE) { - struct cs_cpu_dbs_info_s *cs_dbs_info = - cdata->get_cpu_dbs_info_s(cpu); - - cs_dbs_info->down_skip = 0; - cs_dbs_info->requested_freq = policy->cur; - } else { - struct od_ops *od_ops = cdata->gov_ops; - struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); - - od_dbs_info->rate_mult = 1; - od_dbs_info->sample_type = OD_NORMAL_SAMPLE; - od_ops->powersave_bias_init_cpu(cpu); - } + gov->start(policy); - gov_add_timers(policy, delay_for_sampling_rate(sampling_rate)); + gov_set_update_util(policy_dbs, sampling_rate); return 0; } -static int cpufreq_governor_stop(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_stop(struct cpufreq_policy *policy) { - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); - struct cpu_common_dbs_info *shared = cdbs->shared; - - /* State should be equivalent to START */ - if (!shared || !shared->policy) - return -EBUSY; - - gov_cancel_work(shared); - shared->policy = NULL; - + gov_cancel_work(policy); return 0; } -static int cpufreq_governor_limits(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_limits(struct cpufreq_policy *policy) { - struct common_dbs_data *cdata = dbs_data->cdata; - unsigned int cpu = policy->cpu; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct policy_dbs_info *policy_dbs = policy->governor_data; - /* State should be equivalent to START */ - if (!cdbs->shared || !cdbs->shared->policy) - return -EBUSY; + mutex_lock(&policy_dbs->timer_mutex); + + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); + + gov_update_sample_delay(policy_dbs, 0); - mutex_lock(&cdbs->shared->timer_mutex); - if (policy->max < cdbs->shared->policy->cur) - __cpufreq_driver_target(cdbs->shared->policy, policy->max, - CPUFREQ_RELATION_H); - else if (policy->min > cdbs->shared->policy->cur) - __cpufreq_driver_target(cdbs->shared->policy, policy->min, - CPUFREQ_RELATION_L); - dbs_check_cpu(dbs_data, cpu); - mutex_unlock(&cdbs->shared->timer_mutex); + mutex_unlock(&policy_dbs->timer_mutex); return 0; } -int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct common_dbs_data *cdata, unsigned int event) +int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - struct dbs_data *dbs_data; - int ret; - - /* Lock governor to block concurrent initialization of governor */ - mutex_lock(&cdata->mutex); - - if (have_governor_per_policy()) - dbs_data = policy->governor_data; - else - dbs_data = cdata->gdbs_data; - - if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { - ret = -EINVAL; - goto unlock; - } - - switch (event) { - case CPUFREQ_GOV_POLICY_INIT: - ret = cpufreq_governor_init(policy, dbs_data, cdata); - break; - case CPUFREQ_GOV_POLICY_EXIT: - ret = cpufreq_governor_exit(policy, dbs_data); - break; - case CPUFREQ_GOV_START: - ret = cpufreq_governor_start(policy, dbs_data); - break; - case CPUFREQ_GOV_STOP: - ret = cpufreq_governor_stop(policy, dbs_data); - break; - case CPUFREQ_GOV_LIMITS: - ret = cpufreq_governor_limits(policy, dbs_data); - break; - default: - ret = -EINVAL; + if (event == CPUFREQ_GOV_POLICY_INIT) { + return cpufreq_governor_init(policy); + } else if (policy->governor_data) { + switch (event) { + case CPUFREQ_GOV_POLICY_EXIT: + return cpufreq_governor_exit(policy); + case CPUFREQ_GOV_START: + return cpufreq_governor_start(policy); + case CPUFREQ_GOV_STOP: + return cpufreq_governor_stop(policy); + case CPUFREQ_GOV_LIMITS: + return cpufreq_governor_limits(policy); + } } - -unlock: - mutex_unlock(&cdata->mutex); - - return ret; + return -EINVAL; } EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 91e767a..61ff82f 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -18,6 +18,7 @@ #define _CPUFREQ_GOVERNOR_H #include <linux/atomic.h> +#include <linux/irq_work.h> #include <linux/cpufreq.h> #include <linux/kernel_stat.h> #include <linux/module.h> @@ -41,96 +42,68 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; /* - * Macro for creating governors sysfs routines - * - * - gov_sys: One governor instance per whole system - * - gov_pol: One governor instance per policy + * Abbreviations: + * dbs: used as a shortform for demand based switching It helps to keep variable + * names smaller, simpler + * cdbs: common dbs + * od_*: On-demand governor + * cs_*: Conservative governor */ -/* Create attributes */ -#define gov_sys_attr_ro(_name) \ -static struct global_attr _name##_gov_sys = \ -__ATTR(_name, 0444, show_##_name##_gov_sys, NULL) - -#define gov_sys_attr_rw(_name) \ -static struct global_attr _name##_gov_sys = \ -__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) - -#define gov_pol_attr_ro(_name) \ -static struct freq_attr _name##_gov_pol = \ -__ATTR(_name, 0444, show_##_name##_gov_pol, NULL) - -#define gov_pol_attr_rw(_name) \ -static struct freq_attr _name##_gov_pol = \ -__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) +/* Governor demand based switching data (per-policy or global). */ +struct dbs_data { + int usage_count; + void *tuners; + unsigned int min_sampling_rate; + unsigned int ignore_nice_load; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int io_is_busy; -#define gov_sys_pol_attr_rw(_name) \ - gov_sys_attr_rw(_name); \ - gov_pol_attr_rw(_name) + struct kobject kobj; + struct list_head policy_dbs_list; + /* + * Protect concurrent updates to governor tunables from sysfs, + * policy_dbs_list and usage_count. + */ + struct mutex mutex; +}; -#define gov_sys_pol_attr_ro(_name) \ - gov_sys_attr_ro(_name); \ - gov_pol_attr_ro(_name) +/* Governor's specific attributes */ +struct dbs_data; +struct governor_attr { + struct attribute attr; + ssize_t (*show)(struct dbs_data *dbs_data, char *buf); + ssize_t (*store)(struct dbs_data *dbs_data, const char *buf, + size_t count); +}; -/* Create show/store routines */ -#define show_one(_gov, file_name) \ -static ssize_t show_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ +#define gov_show_one(_gov, file_name) \ +static ssize_t show_##file_name \ +(struct dbs_data *dbs_data, char *buf) \ { \ - struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \ - return sprintf(buf, "%u\n", tuners->file_name); \ -} \ - \ -static ssize_t show_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct dbs_data *dbs_data = policy->governor_data; \ struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ return sprintf(buf, "%u\n", tuners->file_name); \ } -#define store_one(_gov, file_name) \ -static ssize_t store_##file_name##_gov_sys \ -(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ - return store_##file_name(dbs_data, buf, count); \ -} \ - \ -static ssize_t store_##file_name##_gov_pol \ -(struct cpufreq_policy *policy, const char *buf, size_t count) \ +#define gov_show_one_common(file_name) \ +static ssize_t show_##file_name \ +(struct dbs_data *dbs_data, char *buf) \ { \ - struct dbs_data *dbs_data = policy->governor_data; \ - return store_##file_name(dbs_data, buf, count); \ + return sprintf(buf, "%u\n", dbs_data->file_name); \ } -#define show_store_one(_gov, file_name) \ -show_one(_gov, file_name); \ -store_one(_gov, file_name) +#define gov_attr_ro(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) -/* create helper routines */ -#define define_get_cpu_dbs_routines(_dbs_info) \ -static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ -{ \ - return &per_cpu(_dbs_info, cpu).cdbs; \ -} \ - \ -static void *get_cpu_dbs_info_s(int cpu) \ -{ \ - return &per_cpu(_dbs_info, cpu); \ -} - -/* - * Abbreviations: - * dbs: used as a shortform for demand based switching It helps to keep variable - * names smaller, simpler - * cdbs: common dbs - * od_*: On-demand governor - * cs_*: Conservative governor - */ +#define gov_attr_rw(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) /* Common to all CPUs of a policy */ -struct cpu_common_dbs_info { +struct policy_dbs_info { struct cpufreq_policy *policy; /* * Per policy mutex that serializes load evaluation from limit-change @@ -138,11 +111,27 @@ struct cpu_common_dbs_info { */ struct mutex timer_mutex; - ktime_t time_stamp; - atomic_t skip_work; + u64 last_sample_time; + s64 sample_delay_ns; + atomic_t work_count; + struct irq_work irq_work; struct work_struct work; + /* dbs_data may be shared between multiple policy objects */ + struct dbs_data *dbs_data; + struct list_head list; + /* Multiplier for increasing sample delay temporarily. */ + unsigned int rate_mult; + /* Status indicators */ + bool is_shared; /* This object is used by multiple CPUs */ + bool work_in_progress; /* Work is being queued up or in progress */ }; +static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, + unsigned int delay_us) +{ + policy_dbs->sample_delay_ns = delay_us * NSEC_PER_USEC; +} + /* Per cpu structures */ struct cpu_dbs_info { u64 prev_cpu_idle; @@ -155,54 +144,14 @@ struct cpu_dbs_info { * wake-up from idle. */ unsigned int prev_load; - struct timer_list timer; - struct cpu_common_dbs_info *shared; -}; - -struct od_cpu_dbs_info_s { - struct cpu_dbs_info cdbs; - struct cpufreq_frequency_table *freq_table; - unsigned int freq_lo; - unsigned int freq_lo_jiffies; - unsigned int freq_hi_jiffies; - unsigned int rate_mult; - unsigned int sample_type:1; -}; - -struct cs_cpu_dbs_info_s { - struct cpu_dbs_info cdbs; - unsigned int down_skip; - unsigned int requested_freq; -}; - -/* Per policy Governors sysfs tunables */ -struct od_dbs_tuners { - unsigned int ignore_nice_load; - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; - unsigned int powersave_bias; - unsigned int io_is_busy; -}; - -struct cs_dbs_tuners { - unsigned int ignore_nice_load; - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; - unsigned int down_threshold; - unsigned int freq_step; + struct update_util_data update_util; + struct policy_dbs_info *policy_dbs; }; /* Common Governor data across policies */ -struct dbs_data; -struct common_dbs_data { - /* Common across governors */ - #define GOV_ONDEMAND 0 - #define GOV_CONSERVATIVE 1 - int governor; - struct attribute_group *attr_group_gov_sys; /* one governor - system */ - struct attribute_group *attr_group_gov_pol; /* one governor - policy */ +struct dbs_governor { + struct cpufreq_governor gov; + struct kobj_type kobj_type; /* * Common data for platforms that don't set @@ -210,74 +159,32 @@ struct common_dbs_data { */ struct dbs_data *gdbs_data; - struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); - void *(*get_cpu_dbs_info_s)(int cpu); - unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy, - bool modify_all); - void (*gov_check_cpu)(int cpu, unsigned int load); + unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); + struct policy_dbs_info *(*alloc)(void); + void (*free)(struct policy_dbs_info *policy_dbs); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); - - /* Governor specific ops, see below */ - void *gov_ops; - - /* - * Protects governor's data (struct dbs_data and struct common_dbs_data) - */ - struct mutex mutex; + void (*start)(struct cpufreq_policy *policy); }; -/* Governor Per policy data */ -struct dbs_data { - struct common_dbs_data *cdata; - unsigned int min_sampling_rate; - int usage_count; - void *tuners; -}; +static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) +{ + return container_of(policy->governor, struct dbs_governor, gov); +} -/* Governor specific ops, will be passed to dbs_data->gov_ops */ +/* Governor specific operations */ struct od_ops { - void (*powersave_bias_init_cpu)(int cpu); unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, unsigned int freq_next, unsigned int relation); - void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq); }; -static inline int delay_for_sampling_rate(unsigned int sampling_rate) -{ - int delay = usecs_to_jiffies(sampling_rate); - - /* We want all CPUs to do sampling nearly on same jiffy */ - if (num_online_cpus() > 1) - delay -= jiffies % delay; - - return delay; -} - -#define declare_show_sampling_rate_min(_gov) \ -static ssize_t show_sampling_rate_min_gov_sys \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ -} \ - \ -static ssize_t show_sampling_rate_min_gov_pol \ -(struct cpufreq_policy *policy, char *buf) \ -{ \ - struct dbs_data *dbs_data = policy->governor_data; \ - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ -} - -extern struct mutex cpufreq_governor_lock; - -void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay); -void gov_cancel_work(struct cpu_common_dbs_info *shared); -void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); -int cpufreq_governor_dbs(struct cpufreq_policy *policy, - struct common_dbs_data *cdata, unsigned int event); +unsigned int dbs_update(struct cpufreq_policy *policy); +int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); void od_unregister_powersave_bias_handler(void); +ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count); +void gov_update_cpu_data(struct dbs_data *dbs_data); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index eae5107..acd8027 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -16,7 +16,8 @@ #include <linux/percpu-defs.h> #include <linux/slab.h> #include <linux/tick.h> -#include "cpufreq_governor.h" + +#include "cpufreq_ondemand.h" /* On-demand governor macros */ #define DEF_FREQUENCY_UP_THRESHOLD (80) @@ -27,24 +28,10 @@ #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); - static struct od_ops od_ops; -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND -static struct cpufreq_governor cpufreq_gov_ondemand; -#endif - static unsigned int default_powersave_bias; -static void ondemand_powersave_bias_init_cpu(int cpu) -{ - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - - dbs_info->freq_table = cpufreq_frequency_get_table(cpu); - dbs_info->freq_lo = 0; -} - /* * Not all CPUs want IO time to be accounted as busy; this depends on how * efficient idling at a higher frequency/voltage is. @@ -70,8 +57,8 @@ static int should_io_be_busy(void) /* * Find right freq to be set now with powersave_bias on. - * Returns the freq_hi to be used right now and will set freq_hi_jiffies, - * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + * Returns the freq_hi to be used right now and will set freq_hi_delay_us, + * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs. */ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_next, unsigned int relation) @@ -79,15 +66,15 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, unsigned int freq_req, freq_reduc, freq_avg; unsigned int freq_hi, freq_lo; unsigned int index = 0; - unsigned int jiffies_total, jiffies_hi, jiffies_lo; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - policy->cpu); - struct dbs_data *dbs_data = policy->governor_data; + unsigned int delay_hi_us; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; if (!dbs_info->freq_table) { dbs_info->freq_lo = 0; - dbs_info->freq_lo_jiffies = 0; + dbs_info->freq_lo_delay_us = 0; return freq_next; } @@ -110,31 +97,30 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, /* Find out how long we have to be in hi and lo freqs */ if (freq_hi == freq_lo) { dbs_info->freq_lo = 0; - dbs_info->freq_lo_jiffies = 0; + dbs_info->freq_lo_delay_us = 0; return freq_lo; } - jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); - jiffies_hi = (freq_avg - freq_lo) * jiffies_total; - jiffies_hi += ((freq_hi - freq_lo) / 2); - jiffies_hi /= (freq_hi - freq_lo); - jiffies_lo = jiffies_total - jiffies_hi; + delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate; + delay_hi_us += (freq_hi - freq_lo) / 2; + delay_hi_us /= freq_hi - freq_lo; + dbs_info->freq_hi_delay_us = delay_hi_us; dbs_info->freq_lo = freq_lo; - dbs_info->freq_lo_jiffies = jiffies_lo; - dbs_info->freq_hi_jiffies = jiffies_hi; + dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us; return freq_hi; } -static void ondemand_powersave_bias_init(void) +static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) { - int i; - for_each_online_cpu(i) { - ondemand_powersave_bias_init_cpu(i); - } + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); + + dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu); + dbs_info->freq_lo = 0; } static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) { - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; if (od_tuners->powersave_bias) @@ -152,21 +138,21 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) * (default), then we try to increase frequency. Else, we adjust the frequency * proportional to load. */ -static void od_check_cpu(int cpu, unsigned int load) +static void od_update(struct cpufreq_policy *policy) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); + struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int load = dbs_update(policy); dbs_info->freq_lo = 0; /* Check for frequency increase */ - if (load > od_tuners->up_threshold) { + if (load > dbs_data->up_threshold) { /* If switching to max speed, apply sampling_down_factor */ if (policy->cur < policy->max) - dbs_info->rate_mult = - od_tuners->sampling_down_factor; + policy_dbs->rate_mult = dbs_data->sampling_down_factor; dbs_freq_increase(policy, policy->max); } else { /* Calculate the next frequency proportional to load */ @@ -177,177 +163,70 @@ static void od_check_cpu(int cpu, unsigned int load) freq_next = min_f + load * (max_f - min_f) / 100; /* No longer fully busy, reset rate_mult */ - dbs_info->rate_mult = 1; + policy_dbs->rate_mult = 1; - if (!od_tuners->powersave_bias) { - __cpufreq_driver_target(policy, freq_next, - CPUFREQ_RELATION_C); - return; - } + if (od_tuners->powersave_bias) + freq_next = od_ops.powersave_bias_target(policy, + freq_next, + CPUFREQ_RELATION_L); - freq_next = od_ops.powersave_bias_target(policy, freq_next, - CPUFREQ_RELATION_L); __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); } } -static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) +static unsigned int od_dbs_timer(struct cpufreq_policy *policy) { - struct dbs_data *dbs_data = policy->governor_data; - unsigned int cpu = policy->cpu; - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - cpu); - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - int delay = 0, sample_type = dbs_info->sample_type; - - if (!modify_all) - goto max_delay; + struct policy_dbs_info *policy_dbs = policy->governor_data; + struct dbs_data *dbs_data = policy_dbs->dbs_data; + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); + int sample_type = dbs_info->sample_type; /* Common NORMAL_SAMPLE setup */ dbs_info->sample_type = OD_NORMAL_SAMPLE; - if (sample_type == OD_SUB_SAMPLE) { - delay = dbs_info->freq_lo_jiffies; + /* + * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore + * it then. + */ + if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); - } else { - dbs_check_cpu(dbs_data, cpu); - if (dbs_info->freq_lo) { - /* Setup timer for SUB_SAMPLE */ - dbs_info->sample_type = OD_SUB_SAMPLE; - delay = dbs_info->freq_hi_jiffies; - } + return dbs_info->freq_lo_delay_us; } -max_delay: - if (!delay) - delay = delay_for_sampling_rate(od_tuners->sampling_rate - * dbs_info->rate_mult); - - return delay; -} - -/************************** sysfs interface ************************/ -static struct common_dbs_data od_dbs_cdata; - -/** - * update_sampling_rate - update sampling rate effective immediately if needed. - * @new_rate: new sampling rate - * - * If new rate is smaller than the old, simply updating - * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the - * original sampling_rate was 1 second and the requested new sampling rate is 10 - * ms because the user needs immediate reaction from ondemand governor, but not - * sure if higher frequency will be required or not, then, the governor may - * change the sampling rate too late; up to 1 second later. Thus, if we are - * reducing the sampling rate, we need to make the new value effective - * immediately. - */ -static void update_sampling_rate(struct dbs_data *dbs_data, - unsigned int new_rate) -{ - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - struct cpumask cpumask; - int cpu; - - od_tuners->sampling_rate = new_rate = max(new_rate, - dbs_data->min_sampling_rate); - - /* - * Lock governor so that governor start/stop can't execute in parallel. - */ - mutex_lock(&od_dbs_cdata.mutex); - - cpumask_copy(&cpumask, cpu_online_mask); - - for_each_cpu(cpu, &cpumask) { - struct cpufreq_policy *policy; - struct od_cpu_dbs_info_s *dbs_info; - struct cpu_dbs_info *cdbs; - struct cpu_common_dbs_info *shared; - unsigned long next_sampling, appointed_at; - - dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - cdbs = &dbs_info->cdbs; - shared = cdbs->shared; - - /* - * A valid shared and shared->policy means governor hasn't - * stopped or exited yet. - */ - if (!shared || !shared->policy) - continue; - - policy = shared->policy; - - /* clear all CPUs of this policy */ - cpumask_andnot(&cpumask, &cpumask, policy->cpus); + od_update(policy); - /* - * Update sampling rate for CPUs whose policy is governed by - * dbs_data. In case of governor_per_policy, only a single - * policy will be governed by dbs_data, otherwise there can be - * multiple policies that are governed by the same dbs_data. - */ - if (dbs_data != policy->governor_data) - continue; - - /* - * Checking this for any CPU should be fine, timers for all of - * them are scheduled together. - */ - next_sampling = jiffies + usecs_to_jiffies(new_rate); - appointed_at = dbs_info->cdbs.timer.expires; - - if (time_before(next_sampling, appointed_at)) { - gov_cancel_work(shared); - gov_add_timers(policy, usecs_to_jiffies(new_rate)); - - } + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = OD_SUB_SAMPLE; + return dbs_info->freq_hi_delay_us; } - mutex_unlock(&od_dbs_cdata.mutex); + return dbs_data->sampling_rate * policy_dbs->rate_mult; } -static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, - size_t count) -{ - unsigned int input; - int ret; - ret = sscanf(buf, "%u", &input); - if (ret != 1) - return -EINVAL; - - update_sampling_rate(dbs_data, input); - return count; -} +/************************** sysfs interface ************************/ +static struct dbs_governor od_dbs_gov; static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; - unsigned int j; ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; - od_tuners->io_is_busy = !!input; + dbs_data->io_is_busy = !!input; /* we need to re-evaluate prev_cpu_idle */ - for_each_online_cpu(j) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - j); - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); - } + gov_update_cpu_data(dbs_data); + return count; } static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -357,40 +236,43 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, return -EINVAL; } - od_tuners->up_threshold = input; + dbs_data->up_threshold = input; return count; } static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; - unsigned int input, j; + struct policy_dbs_info *policy_dbs; + unsigned int input; int ret; ret = sscanf(buf, "%u", &input); if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - od_tuners->sampling_down_factor = input; + + dbs_data->sampling_down_factor = input; /* Reset down sampling multiplier in case it was active */ - for_each_online_cpu(j) { - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, - j); - dbs_info->rate_mult = 1; + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + /* + * Doing this without locking might lead to using different + * rate_mult values in od_update() and od_dbs_timer(). + */ + mutex_lock(&policy_dbs->timer_mutex); + policy_dbs->rate_mult = 1; + mutex_unlock(&policy_dbs->timer_mutex); } + return count; } static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, const char *buf, size_t count) { - struct od_dbs_tuners *od_tuners = dbs_data->tuners; unsigned int input; int ret; - unsigned int j; - ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; @@ -398,22 +280,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, if (input > 1) input = 1; - if (input == od_tuners->ignore_nice_load) { /* nothing to do */ + if (input == dbs_data->ignore_nice_load) { /* nothing to do */ return count; } - od_tuners->ignore_nice_load = input; + dbs_data->ignore_nice_load = input; /* we need to re-evaluate prev_cpu_idle */ - for_each_online_cpu(j) { - struct od_cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(od_cpu_dbs_info, j); - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); - if (od_tuners->ignore_nice_load) - dbs_info->cdbs.prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + gov_update_cpu_data(dbs_data); - } return count; } @@ -421,6 +295,7 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, size_t count) { struct od_dbs_tuners *od_tuners = dbs_data->tuners; + struct policy_dbs_info *policy_dbs; unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -432,59 +307,54 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, input = 1000; od_tuners->powersave_bias = input; - ondemand_powersave_bias_init(); + + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) + ondemand_powersave_bias_init(policy_dbs->policy); + return count; } -show_store_one(od, sampling_rate); -show_store_one(od, io_is_busy); -show_store_one(od, up_threshold); -show_store_one(od, sampling_down_factor); -show_store_one(od, ignore_nice_load); -show_store_one(od, powersave_bias); -declare_show_sampling_rate_min(od); - -gov_sys_pol_attr_rw(sampling_rate); -gov_sys_pol_attr_rw(io_is_busy); -gov_sys_pol_attr_rw(up_threshold); -gov_sys_pol_attr_rw(sampling_down_factor); -gov_sys_pol_attr_rw(ignore_nice_load); -gov_sys_pol_attr_rw(powersave_bias); -gov_sys_pol_attr_ro(sampling_rate_min); - -static struct attribute *dbs_attributes_gov_sys[] = { - &sampling_rate_min_gov_sys.attr, - &sampling_rate_gov_sys.attr, - &up_threshold_gov_sys.attr, - &sampling_down_factor_gov_sys.attr, - &ignore_nice_load_gov_sys.attr, - &powersave_bias_gov_sys.attr, - &io_is_busy_gov_sys.attr, +gov_show_one_common(sampling_rate); +gov_show_one_common(up_threshold); +gov_show_one_common(sampling_down_factor); +gov_show_one_common(ignore_nice_load); +gov_show_one_common(min_sampling_rate); +gov_show_one_common(io_is_busy); +gov_show_one(od, powersave_bias); + +gov_attr_rw(sampling_rate); +gov_attr_rw(io_is_busy); +gov_attr_rw(up_threshold); +gov_attr_rw(sampling_down_factor); +gov_attr_rw(ignore_nice_load); +gov_attr_rw(powersave_bias); +gov_attr_ro(min_sampling_rate); + +static struct attribute *od_attributes[] = { + &min_sampling_rate.attr, + &sampling_rate.attr, + &up_threshold.attr, + &sampling_down_factor.attr, + &ignore_nice_load.attr, + &powersave_bias.attr, + &io_is_busy.attr, NULL }; -static struct attribute_group od_attr_group_gov_sys = { - .attrs = dbs_attributes_gov_sys, - .name = "ondemand", -}; +/************************** sysfs end ************************/ -static struct attribute *dbs_attributes_gov_pol[] = { - &sampling_rate_min_gov_pol.attr, - &sampling_rate_gov_pol.attr, - &up_threshold_gov_pol.attr, - &sampling_down_factor_gov_pol.attr, - &ignore_nice_load_gov_pol.attr, - &powersave_bias_gov_pol.attr, - &io_is_busy_gov_pol.attr, - NULL -}; +static struct policy_dbs_info *od_alloc(void) +{ + struct od_policy_dbs_info *dbs_info; -static struct attribute_group od_attr_group_gov_pol = { - .attrs = dbs_attributes_gov_pol, - .name = "ondemand", -}; + dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); + return dbs_info ? &dbs_info->policy_dbs : NULL; +} -/************************** sysfs end ************************/ +static void od_free(struct policy_dbs_info *policy_dbs) +{ + kfree(to_dbs_info(policy_dbs)); +} static int od_init(struct dbs_data *dbs_data, bool notify) { @@ -503,7 +373,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify) put_cpu(); if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ - tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; /* * In nohz/micro accounting case we set the minimum frequency * not depending on HZ, but fixed (very low). The deferred @@ -511,17 +381,17 @@ static int od_init(struct dbs_data *dbs_data, bool notify) */ dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; } else { - tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; /* For correct statistics, we need 10 ticks for each measure */ dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); } - tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; - tuners->ignore_nice_load = 0; + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + dbs_data->ignore_nice_load = 0; tuners->powersave_bias = default_powersave_bias; - tuners->io_is_busy = should_io_be_busy(); + dbs_data->io_is_busy = should_io_be_busy(); dbs_data->tuners = tuners; return 0; @@ -532,33 +402,38 @@ static void od_exit(struct dbs_data *dbs_data, bool notify) kfree(dbs_data->tuners); } -define_get_cpu_dbs_routines(od_cpu_dbs_info); +static void od_start(struct cpufreq_policy *policy) +{ + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); + + dbs_info->sample_type = OD_NORMAL_SAMPLE; + ondemand_powersave_bias_init(policy); +} static struct od_ops od_ops = { - .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, .powersave_bias_target = generic_powersave_bias_target, - .freq_increase = dbs_freq_increase, }; -static struct common_dbs_data od_dbs_cdata = { - .governor = GOV_ONDEMAND, - .attr_group_gov_sys = &od_attr_group_gov_sys, - .attr_group_gov_pol = &od_attr_group_gov_pol, - .get_cpu_cdbs = get_cpu_cdbs, - .get_cpu_dbs_info_s = get_cpu_dbs_info_s, +static struct dbs_governor od_dbs_gov = { + .gov = { + .name = "ondemand", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, + }, + .kobj_type = { .default_attrs = od_attributes }, .gov_dbs_timer = od_dbs_timer, - .gov_check_cpu = od_check_cpu, - .gov_ops = &od_ops, + .alloc = od_alloc, + .free = od_free, .init = od_init, .exit = od_exit, - .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), + .start = od_start, }; +#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) + static void od_set_powersave_bias(unsigned int powersave_bias) { - struct cpufreq_policy *policy; - struct dbs_data *dbs_data; - struct od_dbs_tuners *od_tuners; unsigned int cpu; cpumask_t done; @@ -567,22 +442,25 @@ static void od_set_powersave_bias(unsigned int powersave_bias) get_online_cpus(); for_each_online_cpu(cpu) { - struct cpu_common_dbs_info *shared; + struct cpufreq_policy *policy; + struct policy_dbs_info *policy_dbs; + struct dbs_data *dbs_data; + struct od_dbs_tuners *od_tuners; if (cpumask_test_cpu(cpu, &done)) continue; - shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; - if (!shared) + policy = cpufreq_cpu_get_raw(cpu); + if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND) continue; - policy = shared->policy; - cpumask_or(&done, &done, policy->cpus); - - if (policy->governor != &cpufreq_gov_ondemand) + policy_dbs = policy->governor_data; + if (!policy_dbs) continue; - dbs_data = policy->governor_data; + cpumask_or(&done, &done, policy->cpus); + + dbs_data = policy_dbs->dbs_data; od_tuners = dbs_data->tuners; od_tuners->powersave_bias = default_powersave_bias; } @@ -605,30 +483,14 @@ void od_unregister_powersave_bias_handler(void) } EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); -static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event) -{ - return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); -} - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND -static -#endif -struct cpufreq_governor cpufreq_gov_ondemand = { - .name = "ondemand", - .governor = od_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static int __init cpufreq_gov_dbs_init(void) { - return cpufreq_register_governor(&cpufreq_gov_ondemand); + return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND); } static void __exit cpufreq_gov_dbs_exit(void) { - cpufreq_unregister_governor(&cpufreq_gov_ondemand); + cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND); } MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); @@ -638,6 +500,11 @@ MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return CPU_FREQ_GOV_ONDEMAND; +} + fs_initcall(cpufreq_gov_dbs_init); #else module_init(cpufreq_gov_dbs_init); diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h new file mode 100644 index 0000000..f0121db --- /dev/null +++ b/drivers/cpufreq/cpufreq_ondemand.h @@ -0,0 +1,30 @@ +/* + * Header file for CPUFreq ondemand governor and related code. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "cpufreq_governor.h" + +struct od_policy_dbs_info { + struct policy_dbs_info policy_dbs; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_delay_us; + unsigned int freq_hi_delay_us; + unsigned int sample_type:1; +}; + +static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) +{ + return container_of(policy_dbs, struct od_policy_dbs_info, policy_dbs); +} + +struct od_dbs_tuners { + unsigned int powersave_bias; +}; diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index cf117de..af9f4b9 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -33,10 +33,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy, return 0; } -#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE -static -#endif -struct cpufreq_governor cpufreq_gov_performance = { +static struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", .governor = cpufreq_governor_performance, .owner = THIS_MODULE, @@ -52,6 +49,19 @@ static void __exit cpufreq_gov_performance_exit(void) cpufreq_unregister_governor(&cpufreq_gov_performance); } +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_performance; +} +#endif +#ifndef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE +struct cpufreq_governor *cpufreq_fallback_governor(void) +{ + return &cpufreq_gov_performance; +} +#endif + MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index e3b874c..b8b4002 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -33,10 +33,7 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, return 0; } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE -static -#endif -struct cpufreq_governor cpufreq_gov_powersave = { +static struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .governor = cpufreq_governor_powersave, .owner = THIS_MODULE, @@ -57,6 +54,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_powersave; +} + fs_initcall(cpufreq_gov_powersave_init); #else module_init(cpufreq_gov_powersave_init); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 4dbf1db..4d16f45 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -89,10 +89,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, return rc; } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE -static -#endif -struct cpufreq_governor cpufreq_gov_userspace = { +static struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", .governor = cpufreq_governor_userspace, .store_setspeed = cpufreq_set, @@ -116,6 +113,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_userspace; +} + fs_initcall(cpufreq_gov_userspace_init); #else module_init(cpufreq_gov_userspace_init); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3a4b39a..cb560749 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -71,7 +71,7 @@ struct sample { u64 mperf; u64 tsc; int freq; - ktime_t time; + u64 time; }; struct pstate_data { @@ -103,13 +103,13 @@ struct _pid { struct cpudata { int cpu; - struct timer_list timer; + struct update_util_data update_util; struct pstate_data pstate; struct vid_data vid; struct _pid pid; - ktime_t last_sample_time; + u64 last_sample_time; u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; @@ -120,6 +120,7 @@ struct cpudata { static struct cpudata **all_cpu_data; struct pstate_adjust_policy { int sample_rate_ms; + s64 sample_rate_ns; int deadband; int setpoint; int p_gain_pct; @@ -197,8 +198,8 @@ static struct perf_limits *limits = &powersave_limits; static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { - pid->setpoint = setpoint; - pid->deadband = deadband; + pid->setpoint = int_tofp(setpoint); + pid->deadband = int_tofp(deadband); pid->integral = int_tofp(integral); pid->last_err = int_tofp(setpoint) - int_tofp(busy); } @@ -224,9 +225,9 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) int32_t pterm, dterm, fp_error; int32_t integral_limit; - fp_error = int_tofp(pid->setpoint) - busy; + fp_error = pid->setpoint - busy; - if (abs(fp_error) <= int_tofp(pid->deadband)) + if (abs(fp_error) <= pid->deadband) return 0; pterm = mul_fp(pid->p_gain, fp_error); @@ -286,7 +287,7 @@ static inline void update_turbo_state(void) cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } -static void intel_pstate_hwp_set(void) +static void intel_pstate_hwp_set(const struct cpumask *cpumask) { int min, hw_min, max, hw_max, cpu, range, adj_range; u64 value, cap; @@ -296,9 +297,7 @@ static void intel_pstate_hwp_set(void) hw_max = HWP_HIGHEST_PERF(cap); range = hw_max - hw_min; - get_online_cpus(); - - for_each_online_cpu(cpu) { + for_each_cpu(cpu, cpumask) { rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); adj_range = limits->min_perf_pct * range / 100; min = hw_min + adj_range; @@ -317,7 +316,12 @@ static void intel_pstate_hwp_set(void) value |= HWP_MAX_PERF(max); wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } +} +static void intel_pstate_hwp_set_online_cpus(void) +{ + get_online_cpus(); + intel_pstate_hwp_set(cpu_online_mask); put_online_cpus(); } @@ -439,7 +443,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, limits->no_turbo = clamp_t(int, input, 0, 1); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set_online_cpus(); return count; } @@ -465,7 +469,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, int_tofp(100)); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set_online_cpus(); return count; } @@ -490,7 +494,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, int_tofp(100)); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set_online_cpus(); return count; } @@ -531,6 +535,9 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { + /* First disable HWP notification interrupt as we don't process them */ + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } @@ -712,7 +719,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) if (limits->no_turbo && !limits->turbo_disabled) val |= (u64)1 << 32; - wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); + wrmsrl(MSR_IA32_PERF_CTL, val); } static int knl_get_turbo_pstate(void) @@ -824,11 +831,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) * policy, or by cpu specific default values determined through * experimentation. */ - max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf)); + max_perf_adj = fp_toint(max_perf * limits->max_perf); *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf)); + min_perf = fp_toint(max_perf * limits->min_perf); *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } @@ -874,16 +881,10 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) core_pct = int_tofp(sample->aperf) * int_tofp(100); core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); - sample->freq = fp_toint( - mul_fp(int_tofp( - cpu->pstate.max_pstate_physical * - cpu->pstate.scaling / 100), - core_pct)); - sample->core_pct_busy = (int32_t)core_pct; } -static inline void intel_pstate_sample(struct cpudata *cpu) +static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) { u64 aperf, mperf; unsigned long flags; @@ -893,14 +894,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu) rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); tsc = rdtsc(); - if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) { + if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) { local_irq_restore(flags); - return; + return false; } local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; - cpu->sample.time = ktime_get(); + cpu->sample.time = time; cpu->sample.aperf = aperf; cpu->sample.mperf = mperf; cpu->sample.tsc = tsc; @@ -908,27 +909,16 @@ static inline void intel_pstate_sample(struct cpudata *cpu) cpu->sample.mperf -= cpu->prev_mperf; cpu->sample.tsc -= cpu->prev_tsc; - intel_pstate_calc_busy(cpu); - cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; cpu->prev_tsc = tsc; + return true; } -static inline void intel_hwp_set_sample_time(struct cpudata *cpu) -{ - int delay; - - delay = msecs_to_jiffies(50); - mod_timer_pinned(&cpu->timer, jiffies + delay); -} - -static inline void intel_pstate_set_sample_time(struct cpudata *cpu) +static inline int32_t get_avg_frequency(struct cpudata *cpu) { - int delay; - - delay = msecs_to_jiffies(pid_params.sample_rate_ms); - mod_timer_pinned(&cpu->timer, jiffies + delay); + return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf * + cpu->pstate.scaling, cpu->sample.mperf); } static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) @@ -954,7 +944,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) mperf = cpu->sample.mperf + delta_iowait_mperf; cpu->prev_cummulative_iowait = cummulative_iowait; - /* * The load can be estimated as the ratio of the mperf counter * running at a constant frequency during active periods @@ -970,8 +959,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) { int32_t core_busy, max_pstate, current_pstate, sample_ratio; - s64 duration_us; - u32 sample_time; + u64 duration_ns; + + intel_pstate_calc_busy(cpu); /* * core_busy is the ratio of actual performance to max @@ -990,18 +980,16 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); /* - * Since we have a deferred timer, it will not fire unless - * we are in C0. So, determine if the actual elapsed time - * is significantly greater (3x) than our sample interval. If it - * is, then we were idle for a long enough period of time - * to adjust our busyness. + * Since our utilization update callback will not run unless we are + * in C0, check if the actual elapsed time is significantly greater (3x) + * than our sample interval. If it is, then we were idle for a long + * enough period of time to adjust our busyness. */ - sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; - duration_us = ktime_us_delta(cpu->sample.time, - cpu->last_sample_time); - if (duration_us > sample_time * 3) { - sample_ratio = div_fp(int_tofp(sample_time), - int_tofp(duration_us)); + duration_ns = cpu->sample.time - cpu->last_sample_time; + if ((s64)duration_ns > pid_params.sample_rate_ns * 3 + && cpu->last_sample_time > 0) { + sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), + int_tofp(duration_ns)); core_busy = mul_fp(core_busy, sample_ratio); } @@ -1028,26 +1016,21 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) sample->mperf, sample->aperf, sample->tsc, - sample->freq); + get_avg_frequency(cpu)); } -static void intel_hwp_timer_func(unsigned long __data) +static void intel_pstate_update_util(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max) { - struct cpudata *cpu = (struct cpudata *) __data; + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; - intel_pstate_sample(cpu); - intel_hwp_set_sample_time(cpu); -} + if ((s64)delta_ns >= pid_params.sample_rate_ns) { + bool sample_taken = intel_pstate_sample(cpu, time); -static void intel_pstate_timer_func(unsigned long __data) -{ - struct cpudata *cpu = (struct cpudata *) __data; - - intel_pstate_sample(cpu); - - intel_pstate_adjust_busy_pstate(cpu); - - intel_pstate_set_sample_time(cpu); + if (sample_taken && !hwp_active) + intel_pstate_adjust_busy_pstate(cpu); + } } #define ICPU(model, policy) \ @@ -1095,24 +1078,19 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu->cpu = cpunum; - if (hwp_active) + if (hwp_active) { intel_pstate_hwp_enable(cpu); + pid_params.sample_rate_ms = 50; + pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; + } intel_pstate_get_cpu_pstates(cpu); - init_timer_deferrable(&cpu->timer); - cpu->timer.data = (unsigned long)cpu; - cpu->timer.expires = jiffies + HZ/100; - - if (!hwp_active) - cpu->timer.function = intel_pstate_timer_func; - else - cpu->timer.function = intel_hwp_timer_func; - intel_pstate_busy_pid_reset(cpu); - intel_pstate_sample(cpu); + intel_pstate_sample(cpu, 0); - add_timer_on(&cpu->timer, cpunum); + cpu->update_util.func = intel_pstate_update_util; + cpufreq_set_update_util_data(cpunum, &cpu->update_util); pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); @@ -1128,7 +1106,7 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) if (!cpu) return 0; sample = &cpu->sample; - return sample->freq; + return get_avg_frequency(cpu); } static int intel_pstate_set_policy(struct cpufreq_policy *policy) @@ -1141,7 +1119,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) pr_debug("intel_pstate: set performance\n"); limits = &performance_limits; if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set(policy->cpus); return 0; } @@ -1173,7 +1151,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) int_tofp(100)); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set(policy->cpus); return 0; } @@ -1196,7 +1174,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); - del_timer_sync(&all_cpu_data[cpu_num]->timer); + cpufreq_set_update_util_data(cpu_num, NULL); + synchronize_sched(); + if (hwp_active) return; @@ -1260,6 +1240,7 @@ static int intel_pstate_msrs_not_valid(void) static void copy_pid_params(struct pstate_adjust_policy *policy) { pid_params.sample_rate_ms = policy->sample_rate_ms; + pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; pid_params.p_gain_pct = policy->p_gain_pct; pid_params.i_gain_pct = policy->i_gain_pct; pid_params.d_gain_pct = policy->d_gain_pct; @@ -1397,6 +1378,11 @@ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } static inline bool intel_pstate_has_acpi_ppc(void) { return false; } #endif /* CONFIG_ACPI */ +static const struct x86_cpu_id hwp_support_ids[] __initconst = { + { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, + {} +}; + static int __init intel_pstate_init(void) { int cpu, rc = 0; @@ -1406,17 +1392,16 @@ static int __init intel_pstate_init(void) if (no_load) return -ENODEV; + if (x86_match_cpu(hwp_support_ids) && !no_hwp) { + copy_cpu_funcs(&core_params.funcs); + hwp_active++; + goto hwp_cpu_matched; + } + id = x86_match_cpu(intel_pstate_cpu_ids); if (!id) return -ENODEV; - /* - * The Intel pstate driver will be ignored if the platform - * firmware has its own power management modes. - */ - if (intel_pstate_platform_pwr_mgmt_exists()) - return -ENODEV; - cpu_def = (struct cpu_defaults *)id->driver_data; copy_pid_params(&cpu_def->pid_policy); @@ -1425,17 +1410,20 @@ static int __init intel_pstate_init(void) if (intel_pstate_msrs_not_valid()) return -ENODEV; +hwp_cpu_matched: + /* + * The Intel pstate driver will be ignored if the platform + * firmware has its own power management modes. + */ + if (intel_pstate_platform_pwr_mgmt_exists()) + return -ENODEV; + pr_info("Intel P-state driver initializing.\n"); all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); if (!all_cpu_data) return -ENOMEM; - if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) { - pr_info("intel_pstate: HWP enabled\n"); - hwp_active++; - } - if (!hwp_active && hwp_only) goto out; @@ -1446,12 +1434,16 @@ static int __init intel_pstate_init(void) intel_pstate_debug_expose_params(); intel_pstate_sysfs_expose_params(); + if (hwp_active) + pr_info("intel_pstate: HWP enabled\n"); + return rc; out: get_online_cpus(); for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) { - del_timer_sync(&all_cpu_data[cpu]->timer); + cpufreq_set_update_util_data(cpu, NULL); + synchronize_sched(); kfree(all_cpu_data[cpu]); } } diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 547890f..50bf120 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -28,6 +28,8 @@ #include <linux/of.h> #include <linux/reboot.h> #include <linux/slab.h> +#include <linux/cpu.h> +#include <trace/events/power.h> #include <asm/cputhreads.h> #include <asm/firmware.h> @@ -42,13 +44,24 @@ static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; static bool rebooting, throttled, occ_reset; +static unsigned int *core_to_chip_map; + +static const char * const throttle_reason[] = { + "No throttling", + "Power Cap", + "Processor Over Temperature", + "Power Supply Failure", + "Over Current", + "OCC Reset" +}; static struct chip { unsigned int id; bool throttled; + bool restore; + u8 throttle_reason; cpumask_t mask; struct work_struct throttle; - bool restore; } *chips; static int nr_chips; @@ -312,13 +325,14 @@ static inline unsigned int get_nominal_index(void) static void powernv_cpufreq_throttle_check(void *data) { unsigned int cpu = smp_processor_id(); + unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)]; unsigned long pmsr; int pmsr_pmax, i; pmsr = get_pmspr(SPRN_PMSR); for (i = 0; i < nr_chips; i++) - if (chips[i].id == cpu_to_chip_id(cpu)) + if (chips[i].id == chip_id) break; /* Check for Pmax Capping */ @@ -328,17 +342,17 @@ static void powernv_cpufreq_throttle_check(void *data) goto next; chips[i].throttled = true; if (pmsr_pmax < powernv_pstate_info.nominal) - pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", - cpu, chips[i].id, pmsr_pmax, - powernv_pstate_info.nominal); - else - pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n", - cpu, chips[i].id, pmsr_pmax, - powernv_pstate_info.max); + pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", + cpu, chips[i].id, pmsr_pmax, + powernv_pstate_info.nominal); + trace_powernv_throttle(chips[i].id, + throttle_reason[chips[i].throttle_reason], + pmsr_pmax); } else if (chips[i].throttled) { chips[i].throttled = false; - pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, - chips[i].id, pmsr_pmax); + trace_powernv_throttle(chips[i].id, + throttle_reason[chips[i].throttle_reason], + pmsr_pmax); } /* Check if Psafe_mode_active is set in PMSR. */ @@ -356,7 +370,7 @@ next: if (throttled) { pr_info("PMSR = %16lx\n", pmsr); - pr_crit("CPU Frequency could be throttled\n"); + pr_warn("CPU Frequency could be throttled\n"); } } @@ -423,18 +437,19 @@ void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); unsigned int cpu; - cpumask_var_t mask; + cpumask_t mask; - smp_call_function_any(&chip->mask, + get_online_cpus(); + cpumask_and(&mask, &chip->mask, cpu_online_mask); + smp_call_function_any(&mask, powernv_cpufreq_throttle_check, NULL, 0); if (!chip->restore) - return; + goto out; chip->restore = false; - cpumask_copy(mask, &chip->mask); - for_each_cpu_and(cpu, mask, cpu_online_mask) { - int index, tcpu; + for_each_cpu(cpu, &mask) { + int index; struct cpufreq_policy policy; cpufreq_get_policy(&policy, cpu); @@ -442,20 +457,12 @@ void powernv_cpufreq_work_fn(struct work_struct *work) policy.cur, CPUFREQ_RELATION_C, &index); powernv_cpufreq_target_index(&policy, index); - for_each_cpu(tcpu, policy.cpus) - cpumask_clear_cpu(tcpu, mask); + cpumask_andnot(&mask, &mask, policy.cpus); } +out: + put_online_cpus(); } -static char throttle_reason[][30] = { - "No throttling", - "Power Cap", - "Processor Over Temperature", - "Power Supply Failure", - "Over Current", - "OCC Reset" - }; - static int powernv_cpufreq_occ_msg(struct notifier_block *nb, unsigned long msg_type, void *_msg) { @@ -481,7 +488,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, */ if (!throttled) { throttled = true; - pr_crit("CPU frequency is throttled for duration\n"); + pr_warn("CPU frequency is throttled for duration\n"); } break; @@ -505,23 +512,18 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, return 0; } - if (omsg.throttle_status && + for (i = 0; i < nr_chips; i++) + if (chips[i].id == omsg.chip) + break; + + if (omsg.throttle_status >= 0 && omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) - pr_info("OCC: Chip %u Pmax reduced due to %s\n", - (unsigned int)omsg.chip, - throttle_reason[omsg.throttle_status]); - else if (!omsg.throttle_status) - pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip, - throttle_reason[omsg.throttle_status]); - else - return 0; + chips[i].throttle_reason = omsg.throttle_status; - for (i = 0; i < nr_chips; i++) - if (chips[i].id == omsg.chip) { - if (!omsg.throttle_status) - chips[i].restore = true; - schedule_work(&chips[i].throttle); - } + if (!omsg.throttle_status) + chips[i].restore = true; + + schedule_work(&chips[i].throttle); } return 0; } @@ -556,29 +558,54 @@ static int init_chip_info(void) unsigned int chip[256]; unsigned int cpu, i; unsigned int prev_chip_id = UINT_MAX; + cpumask_t cpu_mask; + int ret = -ENOMEM; + + core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int), + GFP_KERNEL); + if (!core_to_chip_map) + goto out; - for_each_possible_cpu(cpu) { + cpumask_copy(&cpu_mask, cpu_possible_mask); + for_each_cpu(cpu, &cpu_mask) { unsigned int id = cpu_to_chip_id(cpu); if (prev_chip_id != id) { prev_chip_id = id; chip[nr_chips++] = id; } + core_to_chip_map[cpu_core_index_of_thread(cpu)] = id; + cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu)); } - chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); + chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); if (!chips) - return -ENOMEM; + goto free_chip_map; for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; - chips[i].throttled = false; cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); - chips[i].restore = false; } return 0; +free_chip_map: + kfree(core_to_chip_map); +out: + return ret; +} + +static inline void clean_chip_info(void) +{ + kfree(chips); + kfree(core_to_chip_map); +} + +static inline void unregister_all_notifiers(void) +{ + opal_message_notifier_unregister(OPAL_MSG_OCC, + &powernv_cpufreq_opal_nb); + unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); } static int __init powernv_cpufreq_init(void) @@ -591,28 +618,35 @@ static int __init powernv_cpufreq_init(void) /* Discover pstates from device tree and init */ rc = init_powernv_pstates(); - if (rc) { - pr_info("powernv-cpufreq disabled. System does not support PState control\n"); - return rc; - } + if (rc) + goto out; /* Populate chip info */ rc = init_chip_info(); if (rc) - return rc; + goto out; register_reboot_notifier(&powernv_cpufreq_reboot_nb); opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); - return cpufreq_register_driver(&powernv_cpufreq_driver); + + rc = cpufreq_register_driver(&powernv_cpufreq_driver); + if (!rc) + return 0; + + pr_info("Failed to register the cpufreq driver (%d)\n", rc); + unregister_all_notifiers(); + clean_chip_info(); +out: + pr_info("Platform driver disabled. System does not support PState control\n"); + return rc; } module_init(powernv_cpufreq_init); static void __exit powernv_cpufreq_exit(void) { - unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); - opal_message_notifier_unregister(OPAL_MSG_OCC, - &powernv_cpufreq_opal_nb); cpufreq_unregister_driver(&powernv_cpufreq_driver); + unregister_all_notifiers(); + clean_chip_info(); } module_exit(powernv_cpufreq_exit); diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 0742b32..27fc733 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -199,8 +199,8 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); static void get_typical_interval(struct menu_device *data) { int i, divisor; - unsigned int max, thresh; - uint64_t avg, stddev; + unsigned int max, thresh, avg; + uint64_t sum, variance; thresh = UINT_MAX; /* Discard outliers above this value */ @@ -208,52 +208,51 @@ again: /* First calculate the average of past intervals */ max = 0; - avg = 0; + sum = 0; divisor = 0; for (i = 0; i < INTERVALS; i++) { unsigned int value = data->intervals[i]; if (value <= thresh) { - avg += value; + sum += value; divisor++; if (value > max) max = value; } } if (divisor == INTERVALS) - avg >>= INTERVAL_SHIFT; + avg = sum >> INTERVAL_SHIFT; else - do_div(avg, divisor); + avg = div_u64(sum, divisor); - /* Then try to determine standard deviation */ - stddev = 0; + /* Then try to determine variance */ + variance = 0; for (i = 0; i < INTERVALS; i++) { unsigned int value = data->intervals[i]; if (value <= thresh) { - int64_t diff = value - avg; - stddev += diff * diff; + int64_t diff = (int64_t)value - avg; + variance += diff * diff; } } if (divisor == INTERVALS) - stddev >>= INTERVAL_SHIFT; + variance >>= INTERVAL_SHIFT; else - do_div(stddev, divisor); + do_div(variance, divisor); /* - * The typical interval is obtained when standard deviation is small - * or standard deviation is small compared to the average interval. - * - * int_sqrt() formal parameter type is unsigned long. When the - * greatest difference to an outlier exceeds ~65 ms * sqrt(divisor) - * the resulting squared standard deviation exceeds the input domain - * of int_sqrt on platforms where unsigned long is 32 bits in size. - * In such case reject the candidate average. + * The typical interval is obtained when standard deviation is + * small (stddev <= 20 us, variance <= 400 us^2) or standard + * deviation is small compared to the average interval (avg > + * 6*stddev, avg^2 > 36*variance). The average is smaller than + * UINT_MAX aka U32_MAX, so computing its square does not + * overflow a u64. We simply reject this candidate average if + * the standard deviation is greater than 715 s (which is + * rather unlikely). * * Use this result only if there is no timer to wake us up sooner. */ - if (likely(stddev <= ULONG_MAX)) { - stddev = int_sqrt(stddev); - if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) - || stddev <= 20) { + if (likely(variance <= U64_MAX/36)) { + if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3)) + || variance <= 400) { if (data->next_timer_us > avg) data->predicted_us = avg; return; diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 438f1b4..d656657 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -123,6 +123,7 @@ static const struct acpi_device_id dw_i2c_acpi_match[] = { { "80860F41", 0 }, { "808622C1", 0 }, { "AMD0010", ACCESS_INTR_MASK }, + { "AMDI0010", ACCESS_INTR_MASK }, { "AMDI0510", 0 }, { "APMC0D0F", 0 }, { } diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 8f779a1..0ddf638 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -63,6 +63,7 @@ #include <linux/platform_device.h> #include <linux/mailbox_controller.h> #include <linux/mailbox_client.h> +#include <linux/io-64-nonatomic-lo-hi.h> #include "mailbox.h" @@ -70,6 +71,9 @@ static struct mbox_chan *pcc_mbox_channels; +/* Array of cached virtual address for doorbell registers */ +static void __iomem **pcc_doorbell_vaddr; + static struct mbox_controller pcc_mbox_ctrl = {}; /** * get_pcc_channel - Given a PCC subspace idx, get @@ -160,6 +164,66 @@ void pcc_mbox_free_channel(struct mbox_chan *chan) } EXPORT_SYMBOL_GPL(pcc_mbox_free_channel); +/* + * PCC can be used with perf critical drivers such as CPPC + * So it makes sense to locally cache the virtual address and + * use it to read/write to PCC registers such as doorbell register + * + * The below read_register and write_registers are used to read and + * write from perf critical registers such as PCC doorbell register + */ +static int read_register(void __iomem *vaddr, u64 *val, unsigned int bit_width) +{ + int ret_val = 0; + + switch (bit_width) { + case 8: + *val = readb(vaddr); + break; + case 16: + *val = readw(vaddr); + break; + case 32: + *val = readl(vaddr); + break; + case 64: + *val = readq(vaddr); + break; + default: + pr_debug("Error: Cannot read register of %u bit width", + bit_width); + ret_val = -EFAULT; + break; + } + return ret_val; +} + +static int write_register(void __iomem *vaddr, u64 val, unsigned int bit_width) +{ + int ret_val = 0; + + switch (bit_width) { + case 8: + writeb(val, vaddr); + break; + case 16: + writew(val, vaddr); + break; + case 32: + writel(val, vaddr); + break; + case 64: + writeq(val, vaddr); + break; + default: + pr_debug("Error: Cannot write register of %u bit width", + bit_width); + ret_val = -EFAULT; + break; + } + return ret_val; +} + /** * pcc_send_data - Called from Mailbox Controller code. Used * here only to ring the channel doorbell. The PCC client @@ -175,21 +239,39 @@ EXPORT_SYMBOL_GPL(pcc_mbox_free_channel); static int pcc_send_data(struct mbox_chan *chan, void *data) { struct acpi_pcct_hw_reduced *pcct_ss = chan->con_priv; - struct acpi_generic_address doorbell; + struct acpi_generic_address *doorbell; u64 doorbell_preserve; u64 doorbell_val; u64 doorbell_write; + u32 id = chan - pcc_mbox_channels; + int ret = 0; + + if (id >= pcc_mbox_ctrl.num_chans) { + pr_debug("pcc_send_data: Invalid mbox_chan passed\n"); + return -ENOENT; + } - doorbell = pcct_ss->doorbell_register; + doorbell = &pcct_ss->doorbell_register; doorbell_preserve = pcct_ss->preserve_mask; doorbell_write = pcct_ss->write_mask; /* Sync notification from OS to Platform. */ - acpi_read(&doorbell_val, &doorbell); - acpi_write((doorbell_val & doorbell_preserve) | doorbell_write, - &doorbell); - - return 0; + if (pcc_doorbell_vaddr[id]) { + ret = read_register(pcc_doorbell_vaddr[id], &doorbell_val, + doorbell->bit_width); + if (ret) + return ret; + ret = write_register(pcc_doorbell_vaddr[id], + (doorbell_val & doorbell_preserve) | doorbell_write, + doorbell->bit_width); + } else { + ret = acpi_read(&doorbell_val, doorbell); + if (ret) + return ret; + ret = acpi_write((doorbell_val & doorbell_preserve) | doorbell_write, + doorbell); + } + return ret; } static const struct mbox_chan_ops pcc_chan_ops = { @@ -265,14 +347,29 @@ static int __init acpi_pcc_probe(void) return -ENOMEM; } + pcc_doorbell_vaddr = kcalloc(count, sizeof(void *), GFP_KERNEL); + if (!pcc_doorbell_vaddr) { + kfree(pcc_mbox_channels); + return -ENOMEM; + } + /* Point to the first PCC subspace entry */ pcct_entry = (struct acpi_subtable_header *) ( (unsigned long) pcct_tbl + sizeof(struct acpi_table_pcct)); for (i = 0; i < count; i++) { + struct acpi_generic_address *db_reg; + struct acpi_pcct_hw_reduced *pcct_ss; pcc_mbox_channels[i].con_priv = pcct_entry; pcct_entry = (struct acpi_subtable_header *) ((unsigned long) pcct_entry + pcct_entry->length); + + /* If doorbell is in system memory cache the virt address */ + pcct_ss = (struct acpi_pcct_hw_reduced *)pcct_entry; + db_reg = &pcct_ss->doorbell_register; + if (db_reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + pcc_doorbell_vaddr[i] = acpi_os_ioremap(db_reg->address, + db_reg->bit_width/8); } pcc_mbox_ctrl.num_chans = count; diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 0579649..4b717c6 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -252,6 +252,10 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, case ACPI_RESOURCE_TYPE_GENERIC_REGISTER: break; + case ACPI_RESOURCE_TYPE_SERIAL_BUS: + /* serial bus connections (I2C/SPI/UART) are not pnp */ + break; + default: dev_warn(&dev->dev, "unknown resource type %d in _CRS\n", res->type); diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 6c592dc..cdfd01f0 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -133,6 +133,12 @@ struct rapl_domain_data { unsigned long timestamp; }; +struct msrl_action { + u32 msr_no; + u64 clear_mask; + u64 set_mask; + int err; +}; #define DOMAIN_STATE_INACTIVE BIT(0) #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1) @@ -149,6 +155,7 @@ struct rapl_power_limit { static const char pl1_name[] = "long_term"; static const char pl2_name[] = "short_term"; +struct rapl_package; struct rapl_domain { const char *name; enum rapl_domain_type id; @@ -159,7 +166,7 @@ struct rapl_domain { u64 attr_map; /* track capabilities */ unsigned int state; unsigned int domain_energy_unit; - int package_id; + struct rapl_package *rp; }; #define power_zone_to_rapl_domain(_zone) \ container_of(_zone, struct rapl_domain, power_zone) @@ -184,6 +191,7 @@ struct rapl_package { * notify interrupt enable status. */ struct list_head plist; + int lead_cpu; /* one active cpu per package for access */ }; struct rapl_defaults { @@ -231,10 +239,10 @@ static int rapl_read_data_raw(struct rapl_domain *rd, static int rapl_write_data_raw(struct rapl_domain *rd, enum rapl_primitives prim, unsigned long long value); -static u64 rapl_unit_xlate(struct rapl_domain *rd, int package, +static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, u64 value, int to_raw); -static void package_power_limit_irq_save(int package_id); +static void package_power_limit_irq_save(struct rapl_package *rp); static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ @@ -260,20 +268,6 @@ static struct rapl_package *find_package_by_id(int id) return NULL; } -/* caller to ensure CPU hotplug lock is held */ -static int find_active_cpu_on_package(int package_id) -{ - int i; - - for_each_online_cpu(i) { - if (topology_physical_package_id(i) == package_id) - return i; - } - /* all CPUs on this package are offline */ - - return -ENODEV; -} - /* caller must hold cpu hotplug lock */ static void rapl_cleanup_data(void) { @@ -312,25 +306,19 @@ static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy) { struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev); - *energy = rapl_unit_xlate(rd, 0, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); + *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); return 0; } static int release_zone(struct powercap_zone *power_zone) { struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); - struct rapl_package *rp; + struct rapl_package *rp = rd->rp; /* package zone is the last zone of a package, we can free * memory here since all children has been unregistered. */ if (rd->id == RAPL_DOMAIN_PACKAGE) { - rp = find_package_by_id(rd->package_id); - if (!rp) { - dev_warn(&power_zone->dev, "no package id %s\n", - rd->name); - return -ENODEV; - } kfree(rd); rp->domains = NULL; } @@ -432,11 +420,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int id, get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); - rp = find_package_by_id(rd->package_id); - if (!rp) { - ret = -ENODEV; - goto set_exit; - } + rp = rd->rp; if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { dev_warn(&power_zone->dev, "%s locked by BIOS, monitoring only\n", @@ -456,7 +440,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int id, ret = -EINVAL; } if (!ret) - package_power_limit_irq_save(rd->package_id); + package_power_limit_irq_save(rp); set_exit: put_online_cpus(); return ret; @@ -655,24 +639,19 @@ static void rapl_init_domains(struct rapl_package *rp) break; } if (mask) { - rd->package_id = rp->id; + rd->rp = rp; rd++; } } } -static u64 rapl_unit_xlate(struct rapl_domain *rd, int package, - enum unit_type type, u64 value, - int to_raw) +static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, + u64 value, int to_raw) { u64 units = 1; - struct rapl_package *rp; + struct rapl_package *rp = rd->rp; u64 scale = 1; - rp = find_package_by_id(package); - if (!rp) - return value; - switch (type) { case POWER_UNIT: units = rp->power_unit; @@ -769,10 +748,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd, msr = rd->msrs[rp->id]; if (!msr) return -EINVAL; - /* use physical package id to look up active cpus */ - cpu = find_active_cpu_on_package(rd->package_id); - if (cpu < 0) - return cpu; + + cpu = rd->rp->lead_cpu; /* special-case package domain, which uses a different bit*/ if (prim == FW_LOCK && rd->id == RAPL_DOMAIN_PACKAGE) { @@ -793,42 +770,66 @@ static int rapl_read_data_raw(struct rapl_domain *rd, final = value & rp->mask; final = final >> rp->shift; if (xlate) - *data = rapl_unit_xlate(rd, rd->package_id, rp->unit, final, 0); + *data = rapl_unit_xlate(rd, rp->unit, final, 0); else *data = final; return 0; } + +static int msrl_update_safe(u32 msr_no, u64 clear_mask, u64 set_mask) +{ + int err; + u64 val; + + err = rdmsrl_safe(msr_no, &val); + if (err) + goto out; + + val &= ~clear_mask; + val |= set_mask; + + err = wrmsrl_safe(msr_no, val); + +out: + return err; +} + +static void msrl_update_func(void *info) +{ + struct msrl_action *ma = info; + + ma->err = msrl_update_safe(ma->msr_no, ma->clear_mask, ma->set_mask); +} + /* Similar use of primitive info in the read counterpart */ static int rapl_write_data_raw(struct rapl_domain *rd, enum rapl_primitives prim, unsigned long long value) { - u64 msr_val; - u32 msr; struct rapl_primitive_info *rp = &rpi[prim]; int cpu; + u64 bits; + struct msrl_action ma; + int ret; - cpu = find_active_cpu_on_package(rd->package_id); - if (cpu < 0) - return cpu; - msr = rd->msrs[rp->id]; - if (rdmsrl_safe_on_cpu(cpu, msr, &msr_val)) { - dev_dbg(&rd->power_zone.dev, - "failed to read msr 0x%x on cpu %d\n", msr, cpu); - return -EIO; - } - value = rapl_unit_xlate(rd, rd->package_id, rp->unit, value, 1); - msr_val &= ~rp->mask; - msr_val |= value << rp->shift; - if (wrmsrl_safe_on_cpu(cpu, msr, msr_val)) { - dev_dbg(&rd->power_zone.dev, - "failed to write msr 0x%x on cpu %d\n", msr, cpu); - return -EIO; - } + cpu = rd->rp->lead_cpu; + bits = rapl_unit_xlate(rd, rp->unit, value, 1); + bits |= bits << rp->shift; + memset(&ma, 0, sizeof(ma)); - return 0; + ma.msr_no = rd->msrs[rp->id]; + ma.clear_mask = rp->mask; + ma.set_mask = bits; + + ret = smp_call_function_single(cpu, msrl_update_func, &ma, 1); + if (ret) + WARN_ON_ONCE(ret); + else + ret = ma.err; + + return ret; } /* @@ -893,6 +894,21 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) return 0; } +static void power_limit_irq_save_cpu(void *info) +{ + u32 l, h = 0; + struct rapl_package *rp = (struct rapl_package *)info; + + /* save the state of PLN irq mask bit before disabling it */ + rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); + if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) { + rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE; + rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED; + } + l &= ~PACKAGE_THERM_INT_PLN_ENABLE; + wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); +} + /* REVISIT: * When package power limit is set artificially low by RAPL, LVT @@ -904,61 +920,40 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) * to do by adding an atomic notifier. */ -static void package_power_limit_irq_save(int package_id) +static void package_power_limit_irq_save(struct rapl_package *rp) { - u32 l, h = 0; - int cpu; - struct rapl_package *rp; - - rp = find_package_by_id(package_id); - if (!rp) - return; - if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) return; - cpu = find_active_cpu_on_package(package_id); - if (cpu < 0) - return; - /* save the state of PLN irq mask bit before disabling it */ - rdmsr_safe_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); - if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) { - rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE; - rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED; - } - l &= ~PACKAGE_THERM_INT_PLN_ENABLE; - wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); + smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1); } -/* restore per package power limit interrupt enable state */ -static void package_power_limit_irq_restore(int package_id) +static void power_limit_irq_restore_cpu(void *info) { - u32 l, h; - int cpu; - struct rapl_package *rp; + u32 l, h = 0; + struct rapl_package *rp = (struct rapl_package *)info; - rp = find_package_by_id(package_id); - if (!rp) - return; + rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); - if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) - return; + if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE) + l |= PACKAGE_THERM_INT_PLN_ENABLE; + else + l &= ~PACKAGE_THERM_INT_PLN_ENABLE; + + wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); +} - cpu = find_active_cpu_on_package(package_id); - if (cpu < 0) +/* restore per package power limit interrupt enable state */ +static void package_power_limit_irq_restore(struct rapl_package *rp) +{ + if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) return; /* irq enable state not saved, nothing to restore */ if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) return; - rdmsr_safe_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); - - if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE) - l |= PACKAGE_THERM_INT_PLN_ENABLE; - else - l &= ~PACKAGE_THERM_INT_PLN_ENABLE; - wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); + smp_call_function_single(rp->lead_cpu, power_limit_irq_restore_cpu, rp, 1); } static void set_floor_freq_default(struct rapl_domain *rd, bool mode) @@ -1141,7 +1136,7 @@ static int rapl_unregister_powercap(void) * hotplug lock held */ list_for_each_entry(rp, &rapl_packages, plist) { - package_power_limit_irq_restore(rp->id); + package_power_limit_irq_restore(rp); for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { @@ -1392,7 +1387,8 @@ static int rapl_detect_topology(void) /* add the new package to the list */ new_package->id = phy_package_id; new_package->nr_cpus = 1; - + /* use the first active cpu of the package to access */ + new_package->lead_cpu = i; /* check if the package contains valid domains */ if (rapl_detect_domains(new_package, i) || rapl_defaults->check_unit(new_package, i)) { @@ -1448,6 +1444,8 @@ static int rapl_add_package(int cpu) /* add the new package to the list */ rp->id = phy_package_id; rp->nr_cpus = 1; + rp->lead_cpu = cpu; + /* check if the package contains valid domains */ if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) { @@ -1480,6 +1478,7 @@ static int rapl_cpu_callback(struct notifier_block *nfb, unsigned long cpu = (unsigned long)hcpu; int phy_package_id; struct rapl_package *rp; + int lead_cpu; phy_package_id = topology_physical_package_id(cpu); switch (action) { @@ -1500,6 +1499,15 @@ static int rapl_cpu_callback(struct notifier_block *nfb, break; if (--rp->nr_cpus == 0) rapl_remove_package(rp); + else if (cpu == rp->lead_cpu) { + /* choose another active cpu in the package */ + lead_cpu = cpumask_any_but(topology_core_cpumask(cpu), cpu); + if (lead_cpu < nr_cpu_ids) + rp->lead_cpu = lead_cpu; + else /* should never go here */ + pr_err("no active cpu available for package %d\n", + phy_package_id); + } } return NOTIFY_OK; |