summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/Makefile1
-rw-r--r--drivers/acpi/acpi_amba.c122
-rw-r--r--drivers/acpi/acpi_apd.c1
-rw-r--r--drivers/acpi/acpi_platform.c19
-rw-r--r--drivers/acpi/acpi_processor.c17
-rw-r--r--drivers/acpi/acpi_video.c7
-rw-r--r--drivers/acpi/acpica/acglobal.h2
-rw-r--r--drivers/acpi/acpica/aclocal.h6
-rw-r--r--drivers/acpi/acpica/acnamesp.h2
-rw-r--r--drivers/acpi/acpica/acpredef.h2
-rw-r--r--drivers/acpi/acpica/dbcmds.c2
-rw-r--r--drivers/acpi/acpica/dbconvert.c5
-rw-r--r--drivers/acpi/acpica/dsmethod.c3
-rw-r--r--drivers/acpi/acpica/dsobject.c3
-rw-r--r--drivers/acpi/acpica/evgpeblk.c3
-rw-r--r--drivers/acpi/acpica/evgpeinit.c2
-rw-r--r--drivers/acpi/acpica/evregion.c2
-rw-r--r--drivers/acpi/acpica/exconfig.c4
-rw-r--r--drivers/acpi/acpica/exoparg3.c4
-rw-r--r--drivers/acpi/acpica/nseval.c3
-rw-r--r--drivers/acpi/acpica/nsinit.c137
-rw-r--r--drivers/acpi/acpica/tbinstal.c5
-rw-r--r--drivers/acpi/acpica/tbprint.c7
-rw-r--r--drivers/acpi/acpica/tbutils.c4
-rw-r--r--drivers/acpi/acpica/tbxfload.c40
-rw-r--r--drivers/acpi/acpica/utcache.c2
-rw-r--r--drivers/acpi/acpica/utnonansi.c246
-rw-r--r--drivers/acpi/acpica/uttrack.c2
-rw-r--r--drivers/acpi/acpica/utxferror.c3
-rw-r--r--drivers/acpi/acpica/utxfinit.c67
-rw-r--r--drivers/acpi/apei/apei-base.c6
-rw-r--r--drivers/acpi/apei/erst.c3
-rw-r--r--drivers/acpi/apei/ghes.c23
-rw-r--r--drivers/acpi/bgrt.c10
-rw-r--r--drivers/acpi/bus.c26
-rw-r--r--drivers/acpi/cppc_acpi.c237
-rw-r--r--drivers/acpi/ec_sys.c3
-rw-r--r--drivers/acpi/fan.c2
-rw-r--r--drivers/acpi/internal.h7
-rw-r--r--drivers/acpi/osl.c158
-rw-r--r--drivers/acpi/pci_irq.c29
-rw-r--r--drivers/acpi/pmic/intel_pmic_crc.c7
-rw-r--r--drivers/acpi/processor_driver.c2
-rw-r--r--drivers/acpi/processor_idle.c62
-rw-r--r--drivers/acpi/scan.c1
-rw-r--r--drivers/acpi/sleep.c35
-rw-r--r--drivers/acpi/tables.c12
-rw-r--r--drivers/acpi/utils.c4
-rw-r--r--drivers/base/power/domain.c60
-rw-r--r--drivers/base/power/domain_governor.c64
-rw-r--r--drivers/base/power/opp/core.c1079
-rw-r--r--drivers/base/power/opp/cpu.c22
-rw-r--r--drivers/base/power/opp/debugfs.c85
-rw-r--r--drivers/base/power/opp/opp.h74
-rw-r--r--drivers/base/power/trace.c4
-rw-r--r--drivers/base/property.c25
-rw-r--r--drivers/cpufreq/Kconfig1
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c212
-rw-r--r--drivers/cpufreq/amd_freq_sensitivity.c8
-rw-r--r--drivers/cpufreq/cpufreq-dt.c300
-rw-r--r--drivers/cpufreq/cpufreq.c333
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c282
-rw-r--r--drivers/cpufreq/cpufreq_governor.c766
-rw-r--r--drivers/cpufreq/cpufreq_governor.h261
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c445
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.h30
-rw-r--r--drivers/cpufreq/cpufreq_performance.c18
-rw-r--r--drivers/cpufreq/cpufreq_powersave.c10
-rw-r--r--drivers/cpufreq/cpufreq_userspace.c10
-rw-r--r--drivers/cpufreq/intel_pstate.c192
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c152
-rw-r--r--drivers/cpuidle/governors/menu.c47
-rw-r--r--drivers/i2c/busses/i2c-designware-platdrv.c1
-rw-r--r--drivers/mailbox/pcc.c111
-rw-r--r--drivers/pnp/pnpacpi/rsparser.c4
-rw-r--r--drivers/powercap/intel_rapl.c220
76 files changed, 3490 insertions, 2676 deletions
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index cb648a4..edeb2d1 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -43,6 +43,7 @@ acpi-y += pci_root.o pci_link.o pci_irq.o
acpi-y += acpi_lpss.o acpi_apd.o
acpi-y += acpi_platform.o
acpi-y += acpi_pnp.o
+acpi-$(CONFIG_ARM_AMBA) += acpi_amba.o
acpi-y += int340x_thermal.o
acpi-y += power.o
acpi-y += event.o
diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c
new file mode 100644
index 0000000..2a61b54
--- /dev/null
+++ b/drivers/acpi/acpi_amba.c
@@ -0,0 +1,122 @@
+
+/*
+ * ACPI support for platform bus type.
+ *
+ * Copyright (C) 2015, Linaro Ltd
+ * Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/amba/bus.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+static const struct acpi_device_id amba_id_list[] = {
+ {"ARMH0061", 0}, /* PL061 GPIO Device */
+ {"", 0},
+};
+
+static void amba_register_dummy_clk(void)
+{
+ static struct clk *amba_dummy_clk;
+
+ /* If clock already registered */
+ if (amba_dummy_clk)
+ return;
+
+ amba_dummy_clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL,
+ CLK_IS_ROOT, 0);
+ clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
+}
+
+static int amba_handler_attach(struct acpi_device *adev,
+ const struct acpi_device_id *id)
+{
+ struct amba_device *dev;
+ struct resource_entry *rentry;
+ struct list_head resource_list;
+ bool address_found = false;
+ int irq_no = 0;
+ int ret;
+
+ /* If the ACPI node already has a physical device attached, skip it. */
+ if (adev->physical_node_count)
+ return 0;
+
+ dev = amba_device_alloc(dev_name(&adev->dev), 0, 0);
+ if (!dev) {
+ dev_err(&adev->dev, "%s(): amba_device_alloc() failed\n",
+ __func__);
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&resource_list);
+ ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+ if (ret < 0)
+ goto err_free;
+
+ list_for_each_entry(rentry, &resource_list, node) {
+ switch (resource_type(rentry->res)) {
+ case IORESOURCE_MEM:
+ if (!address_found) {
+ dev->res = *rentry->res;
+ address_found = true;
+ }
+ break;
+ case IORESOURCE_IRQ:
+ if (irq_no < AMBA_NR_IRQS)
+ dev->irq[irq_no++] = rentry->res->start;
+ break;
+ default:
+ dev_warn(&adev->dev, "Invalid resource\n");
+ break;
+ }
+ }
+
+ acpi_dev_free_resource_list(&resource_list);
+
+ /*
+ * If the ACPI node has a parent and that parent has a physical device
+ * attached to it, that physical device should be the parent of
+ * the amba device we are about to create.
+ */
+ if (adev->parent)
+ dev->dev.parent = acpi_get_first_physical_node(adev->parent);
+
+ ACPI_COMPANION_SET(&dev->dev, adev);
+
+ ret = amba_device_add(dev, &iomem_resource);
+ if (ret) {
+ dev_err(&adev->dev, "%s(): amba_device_add() failed (%d)\n",
+ __func__, ret);
+ goto err_free;
+ }
+
+ return 1;
+
+err_free:
+ amba_device_put(dev);
+ return ret;
+}
+
+static struct acpi_scan_handler amba_handler = {
+ .ids = amba_id_list,
+ .attach = amba_handler_attach,
+};
+
+void __init acpi_amba_init(void)
+{
+ amba_register_dummy_clk();
+ acpi_scan_add_handler(&amba_handler);
+}
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index d507cf6..d0aad06 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -143,6 +143,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = {
/* Generic apd devices */
#ifdef CONFIG_X86_AMD_PLATFORM_DEVICE
{ "AMD0010", APD_ADDR(cz_i2c_desc) },
+ { "AMDI0010", APD_ADDR(cz_i2c_desc) },
{ "AMD0020", APD_ADDR(cz_uart_desc) },
{ "AMD0030", },
#endif
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index b6f7fa3..159f7f1 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -43,7 +43,6 @@ static const struct acpi_device_id forbidden_id_list[] = {
struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
{
struct platform_device *pdev = NULL;
- struct acpi_device *acpi_parent;
struct platform_device_info pdevinfo;
struct resource_entry *rentry;
struct list_head resource_list;
@@ -82,22 +81,8 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
* attached to it, that physical device should be the parent of the
* platform device we are about to create.
*/
- pdevinfo.parent = NULL;
- acpi_parent = adev->parent;
- if (acpi_parent) {
- struct acpi_device_physical_node *entry;
- struct list_head *list;
-
- mutex_lock(&acpi_parent->physical_node_lock);
- list = &acpi_parent->physical_node_list;
- if (!list_empty(list)) {
- entry = list_first_entry(list,
- struct acpi_device_physical_node,
- node);
- pdevinfo.parent = entry->dev;
- }
- mutex_unlock(&acpi_parent->physical_node_lock);
- }
+ pdevinfo.parent = adev->parent ?
+ acpi_get_first_physical_node(adev->parent) : NULL;
pdevinfo.name = dev_name(&adev->dev);
pdevinfo.id = -1;
pdevinfo.res = resources;
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 6979186..b5e54f2 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -514,7 +514,24 @@ static struct acpi_scan_handler processor_handler = {
},
};
+static int acpi_processor_container_attach(struct acpi_device *dev,
+ const struct acpi_device_id *id)
+{
+ return 1;
+}
+
+static const struct acpi_device_id processor_container_ids[] = {
+ { ACPI_PROCESSOR_CONTAINER_HID, },
+ { }
+};
+
+static struct acpi_scan_handler processor_container_handler = {
+ .ids = processor_container_ids,
+ .attach = acpi_processor_container_attach,
+};
+
void __init acpi_processor_init(void)
{
acpi_scan_add_handler_with_hotplug(&processor_handler, "processor");
+ acpi_scan_add_handler(&processor_container_handler);
}
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index a76f8be..4361bc9 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -218,13 +218,6 @@ struct acpi_video_device {
struct thermal_cooling_device *cooling_dev;
};
-static const char device_decode[][30] = {
- "motherboard VGA device",
- "PCI VGA device",
- "AGP VGA device",
- "UNKNOWN",
-};
-
static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data);
static void acpi_video_device_rebind(struct acpi_video_bus *video);
static void acpi_video_device_bind(struct acpi_video_bus *video,
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 55c8197..51b073b 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -165,7 +165,7 @@ ACPI_GLOBAL(u8, acpi_gbl_next_owner_id_offset);
/* Initialization sequencing */
-ACPI_INIT_GLOBAL(u8, acpi_gbl_reg_methods_enabled, FALSE);
+ACPI_INIT_GLOBAL(u8, acpi_gbl_namespace_initialized, FALSE);
/* Misc */
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index e4977fac..9562a10 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -85,7 +85,7 @@ union acpi_parse_object;
#define ACPI_MTX_MEMORY 5 /* Debug memory tracking lists */
#define ACPI_MAX_MUTEX 5
-#define ACPI_NUM_MUTEX ACPI_MAX_MUTEX+1
+#define ACPI_NUM_MUTEX (ACPI_MAX_MUTEX+1)
/* Lock structure for reader/writer interfaces */
@@ -103,11 +103,11 @@ struct acpi_rw_lock {
#define ACPI_LOCK_HARDWARE 1
#define ACPI_MAX_LOCK 1
-#define ACPI_NUM_LOCK ACPI_MAX_LOCK+1
+#define ACPI_NUM_LOCK (ACPI_MAX_LOCK+1)
/* This Thread ID means that the mutex is not in use (unlocked) */
-#define ACPI_MUTEX_NOT_ACQUIRED (acpi_thread_id) 0
+#define ACPI_MUTEX_NOT_ACQUIRED ((acpi_thread_id) 0)
/* This Thread ID means an invalid thread ID */
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 9684ed6..022d69c 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -88,7 +88,7 @@
*/
acpi_status acpi_ns_initialize_objects(void);
-acpi_status acpi_ns_initialize_devices(void);
+acpi_status acpi_ns_initialize_devices(u32 flags);
/*
* nsload - Namespace loading
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 52f6bee..5faeab4 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -1125,7 +1125,7 @@ const union acpi_predefined_info acpi_gbl_resource_names[] = {
PACKAGE_INFO(0, 0, 0, 0, 0, 0) /* Table terminator */
};
-static const union acpi_predefined_info acpi_gbl_scope_names[] = {
+const union acpi_predefined_info acpi_gbl_scope_names[] = {
{{"_GPE", 0, 0}},
{{"_PR_", 0, 0}},
{{"_SB_", 0, 0}},
diff --git a/drivers/acpi/acpica/dbcmds.c b/drivers/acpi/acpica/dbcmds.c
index 7ec62c4..772178c 100644
--- a/drivers/acpi/acpica/dbcmds.c
+++ b/drivers/acpi/acpica/dbcmds.c
@@ -348,7 +348,7 @@ void acpi_db_display_table_info(char *table_arg)
} else {
/* If the pointer is null, the table has been unloaded */
- ACPI_INFO((AE_INFO, "%4.4s - Table has been unloaded",
+ ACPI_INFO(("%4.4s - Table has been unloaded",
table_desc->signature.ascii));
}
}
diff --git a/drivers/acpi/acpica/dbconvert.c b/drivers/acpi/acpica/dbconvert.c
index 9fee88f..68f4e0f4 100644
--- a/drivers/acpi/acpica/dbconvert.c
+++ b/drivers/acpi/acpica/dbconvert.c
@@ -408,7 +408,7 @@ void acpi_db_dump_pld_buffer(union acpi_object *obj_desc)
new_buffer = acpi_db_encode_pld_buffer(pld_info);
if (!new_buffer) {
- return;
+ goto exit;
}
/* The two bit-packed buffers should match */
@@ -479,6 +479,7 @@ void acpi_db_dump_pld_buffer(union acpi_object *obj_desc)
pld_info->horizontal_offset);
}
- ACPI_FREE(pld_info);
ACPI_FREE(new_buffer);
+exit:
+ ACPI_FREE(pld_info);
}
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index 6a72047..1982310 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -809,8 +809,7 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc,
if (method_desc->method.
info_flags & ACPI_METHOD_SERIALIZED_PENDING) {
if (walk_state) {
- ACPI_INFO((AE_INFO,
- "Marking method %4.4s as Serialized "
+ ACPI_INFO(("Marking method %4.4s as Serialized "
"because of AE_ALREADY_EXISTS error",
walk_state->method_node->name.
ascii));
diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c
index c303e9d..a91de2b 100644
--- a/drivers/acpi/acpica/dsobject.c
+++ b/drivers/acpi/acpica/dsobject.c
@@ -524,8 +524,7 @@ acpi_ds_build_internal_package_obj(struct acpi_walk_state *walk_state,
arg = arg->common.next;
}
- ACPI_INFO((AE_INFO,
- "Actual Package length (%u) is larger than "
+ ACPI_INFO(("Actual Package length (%u) is larger than "
"NumElements field (%u), truncated",
i, element_count));
} else if (i < element_count) {
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 9275e62..447fa1c 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -499,8 +499,7 @@ acpi_ev_initialize_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
}
if (gpe_enabled_count) {
- ACPI_INFO((AE_INFO,
- "Enabled %u GPEs in block %02X to %02X",
+ ACPI_INFO(("Enabled %u GPEs in block %02X to %02X",
gpe_enabled_count, (u32)gpe_block->block_base_number,
(u32)(gpe_block->block_base_number +
(gpe_block->gpe_count - 1))));
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 9fdd8d0..7dc7547 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -281,7 +281,7 @@ void acpi_ev_update_gpes(acpi_owner_id table_owner_id)
}
if (walk_info.count) {
- ACPI_INFO((AE_INFO, "Enabled %u new GPEs", walk_info.count));
+ ACPI_INFO(("Enabled %u new GPEs", walk_info.count));
}
(void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index 47092b4..63924d1 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c
@@ -600,7 +600,7 @@ acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function)
if (region_obj2->extra.method_REG == NULL ||
region_obj->region.handler == NULL ||
- !acpi_gbl_reg_methods_enabled) {
+ !acpi_gbl_namespace_initialized) {
return_ACPI_STATUS(AE_OK);
}
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index 011df21..f741613 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -252,7 +252,7 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state,
status = acpi_get_table_by_index(table_index, &table);
if (ACPI_SUCCESS(status)) {
- ACPI_INFO((AE_INFO, "Dynamic OEM Table Load:"));
+ ACPI_INFO(("Dynamic OEM Table Load:"));
acpi_tb_print_table_header(0, table);
}
@@ -472,7 +472,7 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc,
/* Install the new table into the local data structures */
- ACPI_INFO((AE_INFO, "Dynamic OEM Table Load:"));
+ ACPI_INFO(("Dynamic OEM Table Load:"));
(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table),
diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c
index 28eb861..5aa21c4 100644
--- a/drivers/acpi/acpica/exoparg3.c
+++ b/drivers/acpi/acpica/exoparg3.c
@@ -123,8 +123,10 @@ acpi_status acpi_ex_opcode_3A_0T_0R(struct acpi_walk_state *walk_state)
* op is intended for use by disassemblers in order to properly
* disassemble control method invocations. The opcode or group of
* opcodes should be surrounded by an "if (0)" clause to ensure that
- * AML interpreters never see the opcode.
+ * AML interpreters never see the opcode. Thus, something is
+ * wrong if an external opcode ever gets here.
*/
+ ACPI_ERROR((AE_INFO, "Executed External Op"));
status = AE_OK;
goto cleanup;
diff --git a/drivers/acpi/acpica/nseval.c b/drivers/acpi/acpica/nseval.c
index 65d58be..5d59cfc 100644
--- a/drivers/acpi/acpica/nseval.c
+++ b/drivers/acpi/acpica/nseval.c
@@ -378,8 +378,7 @@ void acpi_ns_exec_module_code_list(void)
acpi_ut_remove_reference(prev);
}
- ACPI_INFO((AE_INFO,
- "Executed %u blocks of module-level executable AML code",
+ ACPI_INFO(("Executed %u blocks of module-level executable AML code",
method_count));
ACPI_FREE(info);
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index bd75d46..d4aa8b6 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -46,6 +46,7 @@
#include "acnamesp.h"
#include "acdispat.h"
#include "acinterp.h"
+#include "acevents.h"
#define _COMPONENT ACPI_NAMESPACE
ACPI_MODULE_NAME("nsinit")
@@ -83,6 +84,8 @@ acpi_status acpi_ns_initialize_objects(void)
ACPI_FUNCTION_TRACE(ns_initialize_objects);
+ ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+ "[Init] Completing Initialization of ACPI Objects\n"));
ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
"**** Starting initialization of namespace objects ****\n"));
ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT,
@@ -133,82 +136,108 @@ acpi_status acpi_ns_initialize_objects(void)
*
******************************************************************************/
-acpi_status acpi_ns_initialize_devices(void)
+acpi_status acpi_ns_initialize_devices(u32 flags)
{
- acpi_status status;
+ acpi_status status = AE_OK;
struct acpi_device_walk_info info;
ACPI_FUNCTION_TRACE(ns_initialize_devices);
- /* Init counters */
+ if (!(flags & ACPI_NO_DEVICE_INIT)) {
+ ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+ "[Init] Initializing ACPI Devices\n"));
- info.device_count = 0;
- info.num_STA = 0;
- info.num_INI = 0;
+ /* Init counters */
- ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT,
- "Initializing Device/Processor/Thermal objects "
- "and executing _INI/_STA methods:\n"));
+ info.device_count = 0;
+ info.num_STA = 0;
+ info.num_INI = 0;
- /* Tree analysis: find all subtrees that contain _INI methods */
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT,
+ "Initializing Device/Processor/Thermal objects "
+ "and executing _INI/_STA methods:\n"));
- status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX, FALSE,
- acpi_ns_find_ini_methods, NULL, &info,
- NULL);
- if (ACPI_FAILURE(status)) {
- goto error_exit;
- }
+ /* Tree analysis: find all subtrees that contain _INI methods */
+
+ status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, FALSE,
+ acpi_ns_find_ini_methods, NULL,
+ &info, NULL);
+ if (ACPI_FAILURE(status)) {
+ goto error_exit;
+ }
+
+ /* Allocate the evaluation information block */
- /* Allocate the evaluation information block */
+ info.evaluate_info =
+ ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info));
+ if (!info.evaluate_info) {
+ status = AE_NO_MEMORY;
+ goto error_exit;
+ }
- info.evaluate_info =
- ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info));
- if (!info.evaluate_info) {
- status = AE_NO_MEMORY;
- goto error_exit;
+ /*
+ * Execute the "global" _INI method that may appear at the root.
+ * This support is provided for Windows compatibility (Vista+) and
+ * is not part of the ACPI specification.
+ */
+ info.evaluate_info->prefix_node = acpi_gbl_root_node;
+ info.evaluate_info->relative_pathname = METHOD_NAME__INI;
+ info.evaluate_info->parameters = NULL;
+ info.evaluate_info->flags = ACPI_IGNORE_RETURN_VALUE;
+
+ status = acpi_ns_evaluate(info.evaluate_info);
+ if (ACPI_SUCCESS(status)) {
+ info.num_INI++;
+ }
}
/*
- * Execute the "global" _INI method that may appear at the root. This
- * support is provided for Windows compatibility (Vista+) and is not
- * part of the ACPI specification.
+ * Run all _REG methods
+ *
+ * Note: Any objects accessed by the _REG methods will be automatically
+ * initialized, even if they contain executable AML (see the call to
+ * acpi_ns_initialize_objects below).
*/
- info.evaluate_info->prefix_node = acpi_gbl_root_node;
- info.evaluate_info->relative_pathname = METHOD_NAME__INI;
- info.evaluate_info->parameters = NULL;
- info.evaluate_info->flags = ACPI_IGNORE_RETURN_VALUE;
+ if (!(flags & ACPI_NO_ADDRESS_SPACE_INIT)) {
+ ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+ "[Init] Executing _REG OpRegion methods\n"));
- status = acpi_ns_evaluate(info.evaluate_info);
- if (ACPI_SUCCESS(status)) {
- info.num_INI++;
+ status = acpi_ev_initialize_op_regions();
+ if (ACPI_FAILURE(status)) {
+ goto error_exit;
+ }
}
- /* Walk namespace to execute all _INIs on present devices */
+ if (!(flags & ACPI_NO_DEVICE_INIT)) {
- status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX, FALSE,
- acpi_ns_init_one_device, NULL, &info,
- NULL);
+ /* Walk namespace to execute all _INIs on present devices */
- /*
- * Any _OSI requests should be completed by now. If the BIOS has
- * requested any Windows OSI strings, we will always truncate
- * I/O addresses to 16 bits -- for Windows compatibility.
- */
- if (acpi_gbl_osi_data >= ACPI_OSI_WIN_2000) {
- acpi_gbl_truncate_io_addresses = TRUE;
- }
+ status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, FALSE,
+ acpi_ns_init_one_device, NULL,
+ &info, NULL);
- ACPI_FREE(info.evaluate_info);
- if (ACPI_FAILURE(status)) {
- goto error_exit;
- }
+ /*
+ * Any _OSI requests should be completed by now. If the BIOS has
+ * requested any Windows OSI strings, we will always truncate
+ * I/O addresses to 16 bits -- for Windows compatibility.
+ */
+ if (acpi_gbl_osi_data >= ACPI_OSI_WIN_2000) {
+ acpi_gbl_truncate_io_addresses = TRUE;
+ }
- ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT,
- " Executed %u _INI methods requiring %u _STA executions "
- "(examined %u objects)\n",
- info.num_INI, info.num_STA, info.device_count));
+ ACPI_FREE(info.evaluate_info);
+ if (ACPI_FAILURE(status)) {
+ goto error_exit;
+ }
+
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT,
+ " Executed %u _INI methods requiring %u _STA executions "
+ "(examined %u objects)\n",
+ info.num_INI, info.num_STA,
+ info.device_count));
+ }
return_ACPI_STATUS(status);
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index b661a1e..4dc6108 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -267,8 +267,7 @@ acpi_tb_install_standard_table(acpi_physical_address address,
if (!reload &&
acpi_gbl_disable_ssdt_table_install &&
ACPI_COMPARE_NAME(&new_table_desc.signature, ACPI_SIG_SSDT)) {
- ACPI_INFO((AE_INFO,
- "Ignoring installation of %4.4s at %8.8X%8.8X",
+ ACPI_INFO(("Ignoring installation of %4.4s at %8.8X%8.8X",
new_table_desc.signature.ascii,
ACPI_FORMAT_UINT64(address)));
goto release_and_exit;
@@ -432,7 +431,7 @@ finish_override:
return;
}
- ACPI_INFO((AE_INFO, "%4.4s 0x%8.8X%8.8X"
+ ACPI_INFO(("%4.4s 0x%8.8X%8.8X"
" %s table override, new table: 0x%8.8X%8.8X",
old_table_desc->signature.ascii,
ACPI_FORMAT_UINT64(old_table_desc->address),
diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c
index fd4146d..26d61db 100644
--- a/drivers/acpi/acpica/tbprint.c
+++ b/drivers/acpi/acpica/tbprint.c
@@ -132,7 +132,7 @@ acpi_tb_print_table_header(acpi_physical_address address,
/* FACS only has signature and length fields */
- ACPI_INFO((AE_INFO, "%-4.4s 0x%8.8X%8.8X %06X",
+ ACPI_INFO(("%-4.4s 0x%8.8X%8.8X %06X",
header->signature, ACPI_FORMAT_UINT64(address),
header->length));
} else if (ACPI_VALIDATE_RSDP_SIG(header->signature)) {
@@ -144,7 +144,7 @@ acpi_tb_print_table_header(acpi_physical_address address,
ACPI_OEM_ID_SIZE);
acpi_tb_fix_string(local_header.oem_id, ACPI_OEM_ID_SIZE);
- ACPI_INFO((AE_INFO, "RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)",
+ ACPI_INFO(("RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)",
ACPI_FORMAT_UINT64(address),
(ACPI_CAST_PTR(struct acpi_table_rsdp, header)->
revision >
@@ -158,8 +158,7 @@ acpi_tb_print_table_header(acpi_physical_address address,
acpi_tb_cleanup_table_header(&local_header, header);
- ACPI_INFO((AE_INFO,
- "%-4.4s 0x%8.8X%8.8X"
+ ACPI_INFO(("%-4.4s 0x%8.8X%8.8X"
" %06X (v%.2d %-6.6s %-8.8s %08X %-4.4s %08X)",
local_header.signature, ACPI_FORMAT_UINT64(address),
local_header.length, local_header.revision,
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 3269bef..9240c76 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -174,9 +174,7 @@ struct acpi_table_header *acpi_tb_copy_dsdt(u32 table_index)
ACPI_TABLE_ORIGIN_INTERNAL_VIRTUAL,
new_table);
- ACPI_INFO((AE_INFO,
- "Forced DSDT copy: length 0x%05X copied locally, original unmapped",
- new_table->length));
+ ACPI_INFO(("Forced DSDT copy: length 0x%05X copied locally, original unmapped", new_table->length));
return (new_table);
}
diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index 278666e..3151968 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c
@@ -47,6 +47,7 @@
#include "accommon.h"
#include "acnamesp.h"
#include "actables.h"
+#include "acevents.h"
#define _COMPONENT ACPI_TABLES
ACPI_MODULE_NAME("tbxfload")
@@ -68,6 +69,25 @@ acpi_status __init acpi_load_tables(void)
ACPI_FUNCTION_TRACE(acpi_load_tables);
+ /*
+ * Install the default operation region handlers. These are the
+ * handlers that are defined by the ACPI specification to be
+ * "always accessible" -- namely, system_memory, system_IO, and
+ * PCI_Config. This also means that no _REG methods need to be
+ * run for these address spaces. We need to have these handlers
+ * installed before any AML code can be executed, especially any
+ * module-level code (11/2015).
+ * Note that we allow OSPMs to install their own region handlers
+ * between acpi_initialize_subsystem() and acpi_load_tables() to use
+ * their customized default region handlers.
+ */
+ status = acpi_ev_install_region_handlers();
+ if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) {
+ ACPI_EXCEPTION((AE_INFO, status,
+ "During Region initialization"));
+ return_ACPI_STATUS(status);
+ }
+
/* Load the namespace from the tables */
status = acpi_tb_load_namespace();
@@ -83,6 +103,20 @@ acpi_status __init acpi_load_tables(void)
"While loading namespace from ACPI tables"));
}
+ if (!acpi_gbl_group_module_level_code) {
+ /*
+ * Initialize the objects that remain uninitialized. This
+ * runs the executable AML that may be part of the
+ * declaration of these objects:
+ * operation_regions, buffer_fields, Buffers, and Packages.
+ */
+ status = acpi_ns_initialize_objects();
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+ }
+
+ acpi_gbl_namespace_initialized = TRUE;
return_ACPI_STATUS(status);
}
@@ -206,9 +240,7 @@ acpi_status acpi_tb_load_namespace(void)
}
if (!tables_failed) {
- ACPI_INFO((AE_INFO,
- "%u ACPI AML tables successfully acquired and loaded\n",
- tables_loaded));
+ ACPI_INFO(("%u ACPI AML tables successfully acquired and loaded\n", tables_loaded));
} else {
ACPI_ERROR((AE_INFO,
"%u table load failures, %u successful",
@@ -301,7 +333,7 @@ acpi_status acpi_load_table(struct acpi_table_header *table)
/* Install the table and load it into the namespace */
- ACPI_INFO((AE_INFO, "Host-directed Dynamic ACPI Table Load:"));
+ ACPI_INFO(("Host-directed Dynamic ACPI Table Load:"));
(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table),
diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c
index c9a720f..f8e9978 100644
--- a/drivers/acpi/acpica/utcache.c
+++ b/drivers/acpi/acpica/utcache.c
@@ -245,7 +245,7 @@ void *acpi_os_acquire_object(struct acpi_memory_list *cache)
acpi_status status;
void *object;
- ACPI_FUNCTION_NAME(os_acquire_object);
+ ACPI_FUNCTION_TRACE(os_acquire_object);
if (!cache) {
return_PTR(NULL);
diff --git a/drivers/acpi/acpica/utnonansi.c b/drivers/acpi/acpica/utnonansi.c
index c427a5c..d5c3adf 100644
--- a/drivers/acpi/acpica/utnonansi.c
+++ b/drivers/acpi/acpica/utnonansi.c
@@ -140,6 +140,67 @@ int acpi_ut_stricmp(char *string1, char *string2)
return (c1 - c2);
}
+#if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION)
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ut_safe_strcpy, acpi_ut_safe_strcat, acpi_ut_safe_strncat
+ *
+ * PARAMETERS: Adds a "DestSize" parameter to each of the standard string
+ * functions. This is the size of the Destination buffer.
+ *
+ * RETURN: TRUE if the operation would overflow the destination buffer.
+ *
+ * DESCRIPTION: Safe versions of standard Clib string functions. Ensure that
+ * the result of the operation will not overflow the output string
+ * buffer.
+ *
+ * NOTE: These functions are typically only helpful for processing
+ * user input and command lines. For most ACPICA code, the
+ * required buffer length is precisely calculated before buffer
+ * allocation, so the use of these functions is unnecessary.
+ *
+ ******************************************************************************/
+
+u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source)
+{
+
+ if (strlen(source) >= dest_size) {
+ return (TRUE);
+ }
+
+ strcpy(dest, source);
+ return (FALSE);
+}
+
+u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source)
+{
+
+ if ((strlen(dest) + strlen(source)) >= dest_size) {
+ return (TRUE);
+ }
+
+ strcat(dest, source);
+ return (FALSE);
+}
+
+u8
+acpi_ut_safe_strncat(char *dest,
+ acpi_size dest_size,
+ char *source, acpi_size max_transfer_length)
+{
+ acpi_size actual_transfer_length;
+
+ actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source));
+
+ if ((strlen(dest) + actual_transfer_length) >= dest_size) {
+ return (TRUE);
+ }
+
+ strncat(dest, source, max_transfer_length);
+ return (FALSE);
+}
+#endif
+
/*******************************************************************************
*
* FUNCTION: acpi_ut_strtoul64
@@ -155,7 +216,15 @@ int acpi_ut_stricmp(char *string1, char *string2)
* 32-bit or 64-bit conversion, depending on the current mode
* of the interpreter.
*
- * NOTE: Does not support Octal strings, not needed.
+ * NOTES: acpi_gbl_integer_byte_width should be set to the proper width.
+ * For the core ACPICA code, this width depends on the DSDT
+ * version. For iASL, the default byte width is always 8.
+ *
+ * Does not support Octal strings, not needed at this time.
+ *
+ * There is an earlier version of the function after this one,
+ * below. It is slightly different than this one, and the two
+ * may eventually may need to be merged. (01/2016).
*
******************************************************************************/
@@ -171,7 +240,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer)
u8 sign_of0x = 0;
u8 term = 0;
- ACPI_FUNCTION_TRACE_STR(ut_stroul64, string);
+ ACPI_FUNCTION_TRACE_STR(ut_strtoul64, string);
switch (base) {
case ACPI_ANY_BASE:
@@ -318,63 +387,162 @@ error_exit:
}
}
-#if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION)
+#ifdef _OBSOLETE_FUNCTIONS
+/* TBD: use version in ACPICA main code base? */
+/* DONE: 01/2016 */
+
/*******************************************************************************
*
- * FUNCTION: acpi_ut_safe_strcpy, acpi_ut_safe_strcat, acpi_ut_safe_strncat
+ * FUNCTION: strtoul64
*
- * PARAMETERS: Adds a "DestSize" parameter to each of the standard string
- * functions. This is the size of the Destination buffer.
+ * PARAMETERS: string - Null terminated string
+ * terminater - Where a pointer to the terminating byte
+ * is returned
+ * base - Radix of the string
*
- * RETURN: TRUE if the operation would overflow the destination buffer.
+ * RETURN: Converted value
*
- * DESCRIPTION: Safe versions of standard Clib string functions. Ensure that
- * the result of the operation will not overflow the output string
- * buffer.
- *
- * NOTE: These functions are typically only helpful for processing
- * user input and command lines. For most ACPICA code, the
- * required buffer length is precisely calculated before buffer
- * allocation, so the use of these functions is unnecessary.
+ * DESCRIPTION: Convert a string into an unsigned value.
*
******************************************************************************/
-u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source)
+acpi_status strtoul64(char *string, u32 base, u64 *ret_integer)
{
+ u32 index;
+ u32 sign;
+ u64 return_value = 0;
+ acpi_status status = AE_OK;
- if (strlen(source) >= dest_size) {
- return (TRUE);
+ *ret_integer = 0;
+
+ switch (base) {
+ case 0:
+ case 8:
+ case 10:
+ case 16:
+
+ break;
+
+ default:
+ /*
+ * The specified Base parameter is not in the domain of
+ * this function:
+ */
+ return (AE_BAD_PARAMETER);
}
- strcpy(dest, source);
- return (FALSE);
-}
+ /* Skip over any white space in the buffer: */
-u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source)
-{
+ while (isspace((int)*string) || *string == '\t') {
+ ++string;
+ }
- if ((strlen(dest) + strlen(source)) >= dest_size) {
- return (TRUE);
+ /*
+ * The buffer may contain an optional plus or minus sign.
+ * If it does, then skip over it but remember what is was:
+ */
+ if (*string == '-') {
+ sign = ACPI_SIGN_NEGATIVE;
+ ++string;
+ } else if (*string == '+') {
+ ++string;
+ sign = ACPI_SIGN_POSITIVE;
+ } else {
+ sign = ACPI_SIGN_POSITIVE;
}
- strcat(dest, source);
- return (FALSE);
-}
+ /*
+ * If the input parameter Base is zero, then we need to
+ * determine if it is octal, decimal, or hexadecimal:
+ */
+ if (base == 0) {
+ if (*string == '0') {
+ if (tolower((int)*(++string)) == 'x') {
+ base = 16;
+ ++string;
+ } else {
+ base = 8;
+ }
+ } else {
+ base = 10;
+ }
+ }
-u8
-acpi_ut_safe_strncat(char *dest,
- acpi_size dest_size,
- char *source, acpi_size max_transfer_length)
-{
- acpi_size actual_transfer_length;
+ /*
+ * For octal and hexadecimal bases, skip over the leading
+ * 0 or 0x, if they are present.
+ */
+ if (base == 8 && *string == '0') {
+ string++;
+ }
- actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source));
+ if (base == 16 && *string == '0' && tolower((int)*(++string)) == 'x') {
+ string++;
+ }
- if ((strlen(dest) + actual_transfer_length) >= dest_size) {
- return (TRUE);
+ /* Main loop: convert the string to an unsigned long */
+
+ while (*string) {
+ if (isdigit((int)*string)) {
+ index = ((u8)*string) - '0';
+ } else {
+ index = (u8)toupper((int)*string);
+ if (isupper((int)index)) {
+ index = index - 'A' + 10;
+ } else {
+ goto error_exit;
+ }
+ }
+
+ if (index >= base) {
+ goto error_exit;
+ }
+
+ /* Check to see if value is out of range: */
+
+ if (return_value > ((ACPI_UINT64_MAX - (u64)index) / (u64)base)) {
+ goto error_exit;
+ } else {
+ return_value *= base;
+ return_value += index;
+ }
+
+ ++string;
}
- strncat(dest, source, max_transfer_length);
- return (FALSE);
+ /* If a minus sign was present, then "the conversion is negated": */
+
+ if (sign == ACPI_SIGN_NEGATIVE) {
+ return_value = (ACPI_UINT32_MAX - return_value) + 1;
+ }
+
+ *ret_integer = return_value;
+ return (status);
+
+error_exit:
+ switch (base) {
+ case 8:
+
+ status = AE_BAD_OCTAL_CONSTANT;
+ break;
+
+ case 10:
+
+ status = AE_BAD_DECIMAL_CONSTANT;
+ break;
+
+ case 16:
+
+ status = AE_BAD_HEX_CONSTANT;
+ break;
+
+ default:
+
+ /* Base validated above */
+
+ break;
+ }
+
+ return (status);
}
#endif
diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c
index c7c2bb8..60c406a 100644
--- a/drivers/acpi/acpica/uttrack.c
+++ b/drivers/acpi/acpica/uttrack.c
@@ -712,7 +712,7 @@ void acpi_ut_dump_allocations(u32 component, const char *module)
/* Print summary */
if (!num_outstanding) {
- ACPI_INFO((AE_INFO, "No outstanding allocations"));
+ ACPI_INFO(("No outstanding allocations"));
} else {
ACPI_ERROR((AE_INFO, "%u(0x%X) Outstanding allocations",
num_outstanding, num_outstanding));
diff --git a/drivers/acpi/acpica/utxferror.c b/drivers/acpi/acpica/utxferror.c
index 6fe5959..d9f15cb 100644
--- a/drivers/acpi/acpica/utxferror.c
+++ b/drivers/acpi/acpica/utxferror.c
@@ -175,8 +175,7 @@ ACPI_EXPORT_SYMBOL(acpi_warning)
* TBD: module_name and line_number args are not needed, should be removed.
*
******************************************************************************/
-void ACPI_INTERNAL_VAR_XFACE
-acpi_info(const char *module_name, u32 line_number, const char *format, ...)
+void ACPI_INTERNAL_VAR_XFACE acpi_info(const char *format, ...)
{
va_list arg_list;
diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c
index 721b87c..75b5f27 100644
--- a/drivers/acpi/acpica/utxfinit.c
+++ b/drivers/acpi/acpica/utxfinit.c
@@ -154,21 +154,6 @@ acpi_status __init acpi_enable_subsystem(u32 flags)
*/
acpi_gbl_early_initialization = FALSE;
- /*
- * Install the default operation region handlers. These are the
- * handlers that are defined by the ACPI specification to be
- * "always accessible" -- namely, system_memory, system_IO, and
- * PCI_Config. This also means that no _REG methods need to be
- * run for these address spaces. We need to have these handlers
- * installed before any AML code can be executed, especially any
- * module-level code (11/2015).
- */
- status = acpi_ev_install_region_handlers();
- if (ACPI_FAILURE(status)) {
- ACPI_EXCEPTION((AE_INFO, status,
- "During Region initialization"));
- return_ACPI_STATUS(status);
- }
#if (!ACPI_REDUCED_HARDWARE)
/* Enable ACPI mode */
@@ -260,23 +245,6 @@ acpi_status __init acpi_initialize_objects(u32 flags)
ACPI_FUNCTION_TRACE(acpi_initialize_objects);
- /*
- * Run all _REG methods
- *
- * Note: Any objects accessed by the _REG methods will be automatically
- * initialized, even if they contain executable AML (see the call to
- * acpi_ns_initialize_objects below).
- */
- acpi_gbl_reg_methods_enabled = TRUE;
- if (!(flags & ACPI_NO_ADDRESS_SPACE_INIT)) {
- ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
- "[Init] Executing _REG OpRegion methods\n"));
-
- status = acpi_ev_initialize_op_regions();
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
- }
- }
#ifdef ACPI_EXEC_APP
/*
* This call implements the "initialization file" option for acpi_exec.
@@ -299,32 +267,27 @@ acpi_status __init acpi_initialize_objects(u32 flags)
*/
if (acpi_gbl_group_module_level_code) {
acpi_ns_exec_module_code_list();
- }
-
- /*
- * Initialize the objects that remain uninitialized. This runs the
- * executable AML that may be part of the declaration of these objects:
- * operation_regions, buffer_fields, Buffers, and Packages.
- */
- if (!(flags & ACPI_NO_OBJECT_INIT)) {
- ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
- "[Init] Completing Initialization of ACPI Objects\n"));
- status = acpi_ns_initialize_objects();
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
+ /*
+ * Initialize the objects that remain uninitialized. This
+ * runs the executable AML that may be part of the
+ * declaration of these objects:
+ * operation_regions, buffer_fields, Buffers, and Packages.
+ */
+ if (!(flags & ACPI_NO_OBJECT_INIT)) {
+ status = acpi_ns_initialize_objects();
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
}
}
/*
- * Initialize all device objects in the namespace. This runs the device
- * _STA and _INI methods.
+ * Initialize all device/region objects in the namespace. This runs
+ * the device _STA and _INI methods and region _REG methods.
*/
- if (!(flags & ACPI_NO_DEVICE_INIT)) {
- ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
- "[Init] Initializing ACPI Devices\n"));
-
- status = acpi_ns_initialize_devices();
+ if (!(flags & (ACPI_NO_DEVICE_INIT | ACPI_NO_ADDRESS_SPACE_INIT))) {
+ status = acpi_ns_initialize_devices(flags);
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index a2c8d7a..da370e1 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -536,7 +536,8 @@ int apei_resources_request(struct apei_resources *resources,
goto err_unmap_ioport;
}
- return 0;
+ goto arch_res_fini;
+
err_unmap_ioport:
list_for_each_entry(res, &resources->ioport, list) {
if (res == res_bak)
@@ -551,7 +552,8 @@ err_unmap_iomem:
release_mem_region(res->start, res->end - res->start);
}
arch_res_fini:
- apei_resources_fini(&arch_res);
+ if (arch_apei_filter_addr)
+ apei_resources_fini(&arch_res);
nvs_res_fini:
apei_resources_fini(&nvs_resources);
return rc;
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6e6bc10..006c389 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1207,6 +1207,9 @@ static int __init erst_init(void)
"Failed to allocate %lld bytes for persistent store error log.\n",
erst_erange.size);
+ /* Cleanup ERST Resources */
+ apei_resources_fini(&erst_resources);
+
return 0;
err_release_erange:
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 3dd9c46..60746ef 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -26,7 +26,7 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/io.h>
@@ -79,6 +79,11 @@
((struct acpi_hest_generic_status *) \
((struct ghes_estatus_node *)(estatus_node) + 1))
+/*
+ * This driver isn't really modular, however for the time being,
+ * continuing to use module_param is the easiest way to remain
+ * compatible with existing boot arg use cases.
+ */
bool ghes_disable;
module_param_named(disable, ghes_disable, bool, 0);
@@ -1148,18 +1153,4 @@ err_ioremap_exit:
err:
return rc;
}
-
-static void __exit ghes_exit(void)
-{
- platform_driver_unregister(&ghes_platform_driver);
- ghes_estatus_pool_exit();
- ghes_ioremap_exit();
-}
-
-module_init(ghes_init);
-module_exit(ghes_exit);
-
-MODULE_AUTHOR("Huang Ying");
-MODULE_DESCRIPTION("APEI Generic Hardware Error Source support");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:GHES");
+device_initcall(ghes_init);
diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c
index a83e3c6..75f128e 100644
--- a/drivers/acpi/bgrt.c
+++ b/drivers/acpi/bgrt.c
@@ -1,4 +1,6 @@
/*
+ * BGRT boot graphic support
+ * Authors: Matthew Garrett, Josh Triplett <josh@joshtriplett.org>
* Copyright 2012 Red Hat, Inc <mjg@redhat.com>
* Copyright 2012 Intel Corporation
*
@@ -8,7 +10,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/sysfs.h>
@@ -103,9 +104,4 @@ out_kobject:
kobject_put(bgrt_kobj);
return ret;
}
-
-module_init(bgrt_init);
-
-MODULE_AUTHOR("Matthew Garrett, Josh Triplett <josh@joshtriplett.org>");
-MODULE_DESCRIPTION("BGRT boot graphic support");
-MODULE_LICENSE("GPL");
+device_initcall(bgrt_init);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 891c42d..0e85678 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -479,24 +479,38 @@ static void acpi_device_remove_notify_handler(struct acpi_device *device)
Device Matching
-------------------------------------------------------------------------- */
-static struct acpi_device *acpi_primary_dev_companion(struct acpi_device *adev,
- const struct device *dev)
+/**
+ * acpi_get_first_physical_node - Get first physical node of an ACPI device
+ * @adev: ACPI device in question
+ *
+ * Return: First physical node of ACPI device @adev
+ */
+struct device *acpi_get_first_physical_node(struct acpi_device *adev)
{
struct mutex *physical_node_lock = &adev->physical_node_lock;
+ struct device *phys_dev;
mutex_lock(physical_node_lock);
if (list_empty(&adev->physical_node_list)) {
- adev = NULL;
+ phys_dev = NULL;
} else {
const struct acpi_device_physical_node *node;
node = list_first_entry(&adev->physical_node_list,
struct acpi_device_physical_node, node);
- if (node->dev != dev)
- adev = NULL;
+
+ phys_dev = node->dev;
}
mutex_unlock(physical_node_lock);
- return adev;
+ return phys_dev;
+}
+
+static struct acpi_device *acpi_primary_dev_companion(struct acpi_device *adev,
+ const struct device *dev)
+{
+ const struct device *phys_dev = acpi_get_first_physical_node(adev);
+
+ return phys_dev && phys_dev == dev ? adev : NULL;
}
/**
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 6730f96..8adac69 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -39,6 +39,7 @@
#include <linux/cpufreq.h>
#include <linux/delay.h>
+#include <linux/ktime.h>
#include <acpi/cppc_acpi.h>
/*
@@ -63,58 +64,140 @@ static struct mbox_chan *pcc_channel;
static void __iomem *pcc_comm_addr;
static u64 comm_base_addr;
static int pcc_subspace_idx = -1;
-static u16 pcc_cmd_delay;
static bool pcc_channel_acquired;
+static ktime_t deadline;
+static unsigned int pcc_mpar, pcc_mrtt;
+
+/* pcc mapped address + header size + offset within PCC subspace */
+#define GET_PCC_VADDR(offs) (pcc_comm_addr + 0x8 + (offs))
/*
* Arbitrary Retries in case the remote processor is slow to respond
- * to PCC commands.
+ * to PCC commands. Keeping it high enough to cover emulators where
+ * the processors run painfully slow.
*/
#define NUM_RETRIES 500
+static int check_pcc_chan(void)
+{
+ int ret = -EIO;
+ struct acpi_pcct_shared_memory __iomem *generic_comm_base = pcc_comm_addr;
+ ktime_t next_deadline = ktime_add(ktime_get(), deadline);
+
+ /* Retry in case the remote processor was too slow to catch up. */
+ while (!ktime_after(ktime_get(), next_deadline)) {
+ /*
+ * Per spec, prior to boot the PCC space wil be initialized by
+ * platform and should have set the command completion bit when
+ * PCC can be used by OSPM
+ */
+ if (readw_relaxed(&generic_comm_base->status) & PCC_CMD_COMPLETE) {
+ ret = 0;
+ break;
+ }
+ /*
+ * Reducing the bus traffic in case this loop takes longer than
+ * a few retries.
+ */
+ udelay(3);
+ }
+
+ return ret;
+}
+
static int send_pcc_cmd(u16 cmd)
{
- int retries, result = -EIO;
- struct acpi_pcct_hw_reduced *pcct_ss = pcc_channel->con_priv;
+ int ret = -EIO;
struct acpi_pcct_shared_memory *generic_comm_base =
(struct acpi_pcct_shared_memory *) pcc_comm_addr;
- u32 cmd_latency = pcct_ss->latency;
+ static ktime_t last_cmd_cmpl_time, last_mpar_reset;
+ static int mpar_count;
+ unsigned int time_delta;
- /* Min time OS should wait before sending next command. */
- udelay(pcc_cmd_delay);
+ /*
+ * For CMD_WRITE we know for a fact the caller should have checked
+ * the channel before writing to PCC space
+ */
+ if (cmd == CMD_READ) {
+ ret = check_pcc_chan();
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Handle the Minimum Request Turnaround Time(MRTT)
+ * "The minimum amount of time that OSPM must wait after the completion
+ * of a command before issuing the next command, in microseconds"
+ */
+ if (pcc_mrtt) {
+ time_delta = ktime_us_delta(ktime_get(), last_cmd_cmpl_time);
+ if (pcc_mrtt > time_delta)
+ udelay(pcc_mrtt - time_delta);
+ }
+
+ /*
+ * Handle the non-zero Maximum Periodic Access Rate(MPAR)
+ * "The maximum number of periodic requests that the subspace channel can
+ * support, reported in commands per minute. 0 indicates no limitation."
+ *
+ * This parameter should be ideally zero or large enough so that it can
+ * handle maximum number of requests that all the cores in the system can
+ * collectively generate. If it is not, we will follow the spec and just
+ * not send the request to the platform after hitting the MPAR limit in
+ * any 60s window
+ */
+ if (pcc_mpar) {
+ if (mpar_count == 0) {
+ time_delta = ktime_ms_delta(ktime_get(), last_mpar_reset);
+ if (time_delta < 60 * MSEC_PER_SEC) {
+ pr_debug("PCC cmd not sent due to MPAR limit");
+ return -EIO;
+ }
+ last_mpar_reset = ktime_get();
+ mpar_count = pcc_mpar;
+ }
+ mpar_count--;
+ }
/* Write to the shared comm region. */
- writew(cmd, &generic_comm_base->command);
+ writew_relaxed(cmd, &generic_comm_base->command);
/* Flip CMD COMPLETE bit */
- writew(0, &generic_comm_base->status);
+ writew_relaxed(0, &generic_comm_base->status);
/* Ring doorbell */
- result = mbox_send_message(pcc_channel, &cmd);
- if (result < 0) {
+ ret = mbox_send_message(pcc_channel, &cmd);
+ if (ret < 0) {
pr_err("Err sending PCC mbox message. cmd:%d, ret:%d\n",
- cmd, result);
- return result;
+ cmd, ret);
+ return ret;
}
- /* Wait for a nominal time to let platform process command. */
- udelay(cmd_latency);
-
- /* Retry in case the remote processor was too slow to catch up. */
- for (retries = NUM_RETRIES; retries > 0; retries--) {
- if (readw_relaxed(&generic_comm_base->status) & PCC_CMD_COMPLETE) {
- result = 0;
- break;
- }
+ /*
+ * For READs we need to ensure the cmd completed to ensure
+ * the ensuing read()s can proceed. For WRITEs we dont care
+ * because the actual write()s are done before coming here
+ * and the next READ or WRITE will check if the channel
+ * is busy/free at the entry of this call.
+ *
+ * If Minimum Request Turnaround Time is non-zero, we need
+ * to record the completion time of both READ and WRITE
+ * command for proper handling of MRTT, so we need to check
+ * for pcc_mrtt in addition to CMD_READ
+ */
+ if (cmd == CMD_READ || pcc_mrtt) {
+ ret = check_pcc_chan();
+ if (pcc_mrtt)
+ last_cmd_cmpl_time = ktime_get();
}
- mbox_client_txdone(pcc_channel, result);
- return result;
+ mbox_client_txdone(pcc_channel, ret);
+ return ret;
}
static void cppc_chan_tx_done(struct mbox_client *cl, void *msg, int ret)
{
- if (ret)
+ if (ret < 0)
pr_debug("TX did not complete: CMD sent:%x, ret:%d\n",
*(u16 *)msg, ret);
else
@@ -306,6 +389,7 @@ static int register_pcc_channel(int pcc_subspace_idx)
{
struct acpi_pcct_hw_reduced *cppc_ss;
unsigned int len;
+ u64 usecs_lat;
if (pcc_subspace_idx >= 0) {
pcc_channel = pcc_mbox_request_channel(&cppc_mbox_cl,
@@ -335,7 +419,16 @@ static int register_pcc_channel(int pcc_subspace_idx)
*/
comm_base_addr = cppc_ss->base_address;
len = cppc_ss->length;
- pcc_cmd_delay = cppc_ss->min_turnaround_time;
+
+ /*
+ * cppc_ss->latency is just a Nominal value. In reality
+ * the remote processor could be much slower to reply.
+ * So add an arbitrary amount of wait on top of Nominal.
+ */
+ usecs_lat = NUM_RETRIES * cppc_ss->latency;
+ deadline = ns_to_ktime(usecs_lat * NSEC_PER_USEC);
+ pcc_mrtt = cppc_ss->min_turnaround_time;
+ pcc_mpar = cppc_ss->max_access_rate;
pcc_comm_addr = acpi_os_ioremap(comm_base_addr, len);
if (!pcc_comm_addr) {
@@ -546,29 +639,74 @@ void acpi_cppc_processor_exit(struct acpi_processor *pr)
}
EXPORT_SYMBOL_GPL(acpi_cppc_processor_exit);
-static u64 get_phys_addr(struct cpc_reg *reg)
-{
- /* PCC communication addr space begins at byte offset 0x8. */
- if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM)
- return (u64)comm_base_addr + 0x8 + reg->address;
- else
- return reg->address;
-}
+/*
+ * Since cpc_read and cpc_write are called while holding pcc_lock, it should be
+ * as fast as possible. We have already mapped the PCC subspace during init, so
+ * we can directly write to it.
+ */
-static void cpc_read(struct cpc_reg *reg, u64 *val)
+static int cpc_read(struct cpc_reg *reg, u64 *val)
{
- u64 addr = get_phys_addr(reg);
+ int ret_val = 0;
- acpi_os_read_memory((acpi_physical_address)addr,
- val, reg->bit_width);
+ *val = 0;
+ if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
+ void __iomem *vaddr = GET_PCC_VADDR(reg->address);
+
+ switch (reg->bit_width) {
+ case 8:
+ *val = readb_relaxed(vaddr);
+ break;
+ case 16:
+ *val = readw_relaxed(vaddr);
+ break;
+ case 32:
+ *val = readl_relaxed(vaddr);
+ break;
+ case 64:
+ *val = readq_relaxed(vaddr);
+ break;
+ default:
+ pr_debug("Error: Cannot read %u bit width from PCC\n",
+ reg->bit_width);
+ ret_val = -EFAULT;
+ }
+ } else
+ ret_val = acpi_os_read_memory((acpi_physical_address)reg->address,
+ val, reg->bit_width);
+ return ret_val;
}
-static void cpc_write(struct cpc_reg *reg, u64 val)
+static int cpc_write(struct cpc_reg *reg, u64 val)
{
- u64 addr = get_phys_addr(reg);
+ int ret_val = 0;
- acpi_os_write_memory((acpi_physical_address)addr,
- val, reg->bit_width);
+ if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
+ void __iomem *vaddr = GET_PCC_VADDR(reg->address);
+
+ switch (reg->bit_width) {
+ case 8:
+ writeb_relaxed(val, vaddr);
+ break;
+ case 16:
+ writew_relaxed(val, vaddr);
+ break;
+ case 32:
+ writel_relaxed(val, vaddr);
+ break;
+ case 64:
+ writeq_relaxed(val, vaddr);
+ break;
+ default:
+ pr_debug("Error: Cannot write %u bit width to PCC\n",
+ reg->bit_width);
+ ret_val = -EFAULT;
+ break;
+ }
+ } else
+ ret_val = acpi_os_write_memory((acpi_physical_address)reg->address,
+ val, reg->bit_width);
+ return ret_val;
}
/**
@@ -604,7 +742,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
(ref_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
(nom_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) {
/* Ring doorbell once to update PCC subspace */
- if (send_pcc_cmd(CMD_READ)) {
+ if (send_pcc_cmd(CMD_READ) < 0) {
ret = -EIO;
goto out_err;
}
@@ -662,7 +800,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
if ((delivered_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
(reference_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) {
/* Ring doorbell once to update PCC subspace */
- if (send_pcc_cmd(CMD_READ)) {
+ if (send_pcc_cmd(CMD_READ) < 0) {
ret = -EIO;
goto out_err;
}
@@ -713,6 +851,13 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
spin_lock(&pcc_lock);
+ /* If this is PCC reg, check if channel is free before writing */
+ if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
+ ret = check_pcc_chan();
+ if (ret)
+ goto busy_channel;
+ }
+
/*
* Skip writing MIN/MAX until Linux knows how to come up with
* useful values.
@@ -722,10 +867,10 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
/* Is this a PCC reg ?*/
if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
/* Ring doorbell so Remote can get our perf request. */
- if (send_pcc_cmd(CMD_WRITE))
+ if (send_pcc_cmd(CMD_WRITE) < 0)
ret = -EIO;
}
-
+busy_channel:
spin_unlock(&pcc_lock);
return ret;
diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c
index bea8e42..6c7dd7a 100644
--- a/drivers/acpi/ec_sys.c
+++ b/drivers/acpi/ec_sys.c
@@ -73,6 +73,9 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf,
loff_t init_off = *off;
int err = 0;
+ if (!write_support)
+ return -EINVAL;
+
if (*off >= EC_SPACE_SIZE)
return 0;
if (*off + count >= EC_SPACE_SIZE) {
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 6322db6..384cfc3 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -46,7 +46,7 @@ MODULE_DEVICE_TABLE(acpi, fan_device_ids);
#ifdef CONFIG_PM_SLEEP
static int acpi_fan_suspend(struct device *dev);
static int acpi_fan_resume(struct device *dev);
-static struct dev_pm_ops acpi_fan_pm = {
+static const struct dev_pm_ops acpi_fan_pm = {
.resume = acpi_fan_resume,
.freeze = acpi_fan_suspend,
.thaw = acpi_fan_resume,
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 1e6833a..a37508e 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -20,6 +20,7 @@
#define PREFIX "ACPI: "
+void acpi_initrd_initialize_tables(void);
acpi_status acpi_os_initialize1(void);
void init_acpi_device_notify(void);
int acpi_scan_init(void);
@@ -29,6 +30,11 @@ void acpi_processor_init(void);
void acpi_platform_init(void);
void acpi_pnp_init(void);
void acpi_int340x_thermal_init(void);
+#ifdef CONFIG_ARM_AMBA
+void acpi_amba_init(void);
+#else
+static inline void acpi_amba_init(void) {}
+#endif
int acpi_sysfs_init(void);
void acpi_container_init(void);
void acpi_memory_hotplug_init(void);
@@ -106,6 +112,7 @@ bool acpi_device_is_present(struct acpi_device *adev);
bool acpi_device_is_battery(struct acpi_device *adev);
bool acpi_device_is_first_physical_node(struct acpi_device *adev,
const struct device *dev);
+struct device *acpi_get_first_physical_node(struct acpi_device *adev);
/* --------------------------------------------------------------------------
Device Matching and Notification
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 67da6fb..814d5f8 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -602,6 +602,14 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
return AE_OK;
}
+static void acpi_table_taint(struct acpi_table_header *table)
+{
+ pr_warn(PREFIX
+ "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n",
+ table->signature, table->oem_table_id);
+ add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE);
+}
+
#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
#include <linux/earlycpio.h>
#include <linux/memblock.h>
@@ -636,6 +644,7 @@ static const char * const table_sigs[] = {
#define ACPI_OVERRIDE_TABLES 64
static struct cpio_data __initdata acpi_initrd_files[ACPI_OVERRIDE_TABLES];
+static DECLARE_BITMAP(acpi_initrd_installed, ACPI_OVERRIDE_TABLES);
#define MAP_CHUNK_SIZE (NR_FIX_BTMAPS << PAGE_SHIFT)
@@ -746,96 +755,125 @@ void __init acpi_initrd_override(void *data, size_t size)
}
}
}
-#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */
-static void acpi_table_taint(struct acpi_table_header *table)
+acpi_status
+acpi_os_physical_table_override(struct acpi_table_header *existing_table,
+ acpi_physical_address *address, u32 *length)
{
- pr_warn(PREFIX
- "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n",
- table->signature, table->oem_table_id);
- add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE);
-}
+ int table_offset = 0;
+ int table_index = 0;
+ struct acpi_table_header *table;
+ u32 table_length;
+ *length = 0;
+ *address = 0;
+ if (!acpi_tables_addr)
+ return AE_OK;
-acpi_status
-acpi_os_table_override(struct acpi_table_header * existing_table,
- struct acpi_table_header ** new_table)
-{
- if (!existing_table || !new_table)
- return AE_BAD_PARAMETER;
+ while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
+ table = acpi_os_map_memory(acpi_tables_addr + table_offset,
+ ACPI_HEADER_SIZE);
+ if (table_offset + table->length > all_tables_size) {
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ WARN_ON(1);
+ return AE_OK;
+ }
- *new_table = NULL;
+ table_length = table->length;
-#ifdef CONFIG_ACPI_CUSTOM_DSDT
- if (strncmp(existing_table->signature, "DSDT", 4) == 0)
- *new_table = (struct acpi_table_header *)AmlCode;
-#endif
- if (*new_table != NULL)
+ /* Only override tables matched */
+ if (test_bit(table_index, acpi_initrd_installed) ||
+ memcmp(existing_table->signature, table->signature, 4) ||
+ memcmp(table->oem_table_id, existing_table->oem_table_id,
+ ACPI_OEM_TABLE_ID_SIZE)) {
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ goto next_table;
+ }
+
+ *length = table_length;
+ *address = acpi_tables_addr + table_offset;
acpi_table_taint(existing_table);
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ set_bit(table_index, acpi_initrd_installed);
+ break;
+
+next_table:
+ table_offset += table_length;
+ table_index++;
+ }
return AE_OK;
}
-acpi_status
-acpi_os_physical_table_override(struct acpi_table_header *existing_table,
- acpi_physical_address *address,
- u32 *table_length)
+void __init acpi_initrd_initialize_tables(void)
{
-#ifndef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
- *table_length = 0;
- *address = 0;
- return AE_OK;
-#else
int table_offset = 0;
+ int table_index = 0;
+ u32 table_length;
struct acpi_table_header *table;
- *table_length = 0;
- *address = 0;
-
if (!acpi_tables_addr)
- return AE_OK;
-
- do {
- if (table_offset + ACPI_HEADER_SIZE > all_tables_size) {
- WARN_ON(1);
- return AE_OK;
- }
+ return;
+ while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
table = acpi_os_map_memory(acpi_tables_addr + table_offset,
ACPI_HEADER_SIZE);
-
if (table_offset + table->length > all_tables_size) {
acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
WARN_ON(1);
- return AE_OK;
+ return;
}
- table_offset += table->length;
+ table_length = table->length;
- if (memcmp(existing_table->signature, table->signature, 4)) {
- acpi_os_unmap_memory(table,
- ACPI_HEADER_SIZE);
- continue;
- }
-
- /* Only override tables with matching oem id */
- if (memcmp(table->oem_table_id, existing_table->oem_table_id,
- ACPI_OEM_TABLE_ID_SIZE)) {
- acpi_os_unmap_memory(table,
- ACPI_HEADER_SIZE);
- continue;
+ /* Skip RSDT/XSDT which should only be used for override */
+ if (test_bit(table_index, acpi_initrd_installed) ||
+ ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) ||
+ ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) {
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ goto next_table;
}
- table_offset -= table->length;
- *table_length = table->length;
+ acpi_table_taint(table);
acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
- *address = acpi_tables_addr + table_offset;
- break;
- } while (table_offset + ACPI_HEADER_SIZE < all_tables_size);
+ acpi_install_table(acpi_tables_addr + table_offset, TRUE);
+ set_bit(table_index, acpi_initrd_installed);
+next_table:
+ table_offset += table_length;
+ table_index++;
+ }
+}
+#else
+acpi_status
+acpi_os_physical_table_override(struct acpi_table_header *existing_table,
+ acpi_physical_address *address,
+ u32 *table_length)
+{
+ *table_length = 0;
+ *address = 0;
+ return AE_OK;
+}
+
+void __init acpi_initrd_initialize_tables(void)
+{
+}
+#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */
- if (*address != 0)
+acpi_status
+acpi_os_table_override(struct acpi_table_header *existing_table,
+ struct acpi_table_header **new_table)
+{
+ if (!existing_table || !new_table)
+ return AE_BAD_PARAMETER;
+
+ *new_table = NULL;
+
+#ifdef CONFIG_ACPI_CUSTOM_DSDT
+ if (strncmp(existing_table->signature, "DSDT", 4) == 0)
+ *new_table = (struct acpi_table_header *)AmlCode;
+#endif
+ if (*new_table != NULL)
acpi_table_taint(existing_table);
return AE_OK;
-#endif
}
static irqreturn_t acpi_irq(int irq, void *dev_id)
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index c8e169e..2c45dd3 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -33,6 +33,7 @@
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/slab.h>
+#include <linux/interrupt.h>
#define PREFIX "ACPI: "
@@ -387,6 +388,23 @@ static inline int acpi_isa_register_gsi(struct pci_dev *dev)
}
#endif
+static inline bool acpi_pci_irq_valid(struct pci_dev *dev, u8 pin)
+{
+#ifdef CONFIG_X86
+ /*
+ * On x86 irq line 0xff means "unknown" or "no connection"
+ * (PCI 3.0, Section 6.2.4, footnote on page 223).
+ */
+ if (dev->irq == 0xff) {
+ dev->irq = IRQ_NOTCONNECTED;
+ dev_warn(&dev->dev, "PCI INT %c: not connected\n",
+ pin_name(pin));
+ return false;
+ }
+#endif
+ return true;
+}
+
int acpi_pci_irq_enable(struct pci_dev *dev)
{
struct acpi_prt_entry *entry;
@@ -431,11 +449,14 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
} else
gsi = -1;
- /*
- * No IRQ known to the ACPI subsystem - maybe the BIOS /
- * driver reported one, then use it. Exit in any case.
- */
if (gsi < 0) {
+ /*
+ * No IRQ known to the ACPI subsystem - maybe the BIOS /
+ * driver reported one, then use it. Exit in any case.
+ */
+ if (!acpi_pci_irq_valid(dev, pin))
+ return 0;
+
if (acpi_isa_register_gsi(dev))
dev_warn(&dev->dev, "PCI INT %c: no GSI\n",
pin_name(pin));
diff --git a/drivers/acpi/pmic/intel_pmic_crc.c b/drivers/acpi/pmic/intel_pmic_crc.c
index 42df46a..fcd1852 100644
--- a/drivers/acpi/pmic/intel_pmic_crc.c
+++ b/drivers/acpi/pmic/intel_pmic_crc.c
@@ -13,7 +13,7 @@
* GNU General Public License for more details.
*/
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/mfd/intel_soc_pmic.h>
#include <linux/regmap.h>
@@ -205,7 +205,4 @@ static int __init intel_crc_pmic_opregion_driver_init(void)
{
return platform_driver_register(&intel_crc_pmic_opregion_driver);
}
-module_init(intel_crc_pmic_opregion_driver_init);
-
-MODULE_DESCRIPTION("CrystalCove ACPI operation region driver");
-MODULE_LICENSE("GPL");
+device_initcall(intel_crc_pmic_opregion_driver_init);
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 11154a3..d2fa8cb 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -314,7 +314,6 @@ static int __init acpi_processor_driver_init(void)
if (result < 0)
return result;
- acpi_processor_syscore_init();
register_hotcpu_notifier(&acpi_cpu_notifier);
acpi_thermal_cpufreq_init();
acpi_processor_ppc_init();
@@ -330,7 +329,6 @@ static void __exit acpi_processor_driver_exit(void)
acpi_processor_ppc_exit();
acpi_thermal_cpufreq_exit();
unregister_hotcpu_notifier(&acpi_cpu_notifier);
- acpi_processor_syscore_exit();
driver_unregister(&acpi_processor_driver);
}
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 9ca2b2f..444e374 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -23,6 +23,7 @@
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
+#define pr_fmt(fmt) "ACPI: " fmt
#include <linux/module.h>
#include <linux/acpi.h>
@@ -30,7 +31,6 @@
#include <linux/sched.h> /* need_resched() */
#include <linux/tick.h>
#include <linux/cpuidle.h>
-#include <linux/syscore_ops.h>
#include <acpi/processor.h>
/*
@@ -43,8 +43,6 @@
#include <asm/apic.h>
#endif
-#define PREFIX "ACPI: "
-
#define ACPI_PROCESSOR_CLASS "processor"
#define _COMPONENT ACPI_PROCESSOR_COMPONENT
ACPI_MODULE_NAME("processor_idle");
@@ -81,9 +79,9 @@ static int set_max_cstate(const struct dmi_system_id *id)
if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
return 0;
- printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate."
- " Override with \"processor.max_cstate=%d\"\n", id->ident,
- (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
+ pr_notice("%s detected - limiting to C%ld max_cstate."
+ " Override with \"processor.max_cstate=%d\"\n", id->ident,
+ (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
max_cstate = (long)id->driver_data;
@@ -194,42 +192,6 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr,
#endif
-#ifdef CONFIG_PM_SLEEP
-static u32 saved_bm_rld;
-
-static int acpi_processor_suspend(void)
-{
- acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld);
- return 0;
-}
-
-static void acpi_processor_resume(void)
-{
- u32 resumed_bm_rld = 0;
-
- acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld);
- if (resumed_bm_rld == saved_bm_rld)
- return;
-
- acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld);
-}
-
-static struct syscore_ops acpi_processor_syscore_ops = {
- .suspend = acpi_processor_suspend,
- .resume = acpi_processor_resume,
-};
-
-void acpi_processor_syscore_init(void)
-{
- register_syscore_ops(&acpi_processor_syscore_ops);
-}
-
-void acpi_processor_syscore_exit(void)
-{
- unregister_syscore_ops(&acpi_processor_syscore_ops);
-}
-#endif /* CONFIG_PM_SLEEP */
-
#if defined(CONFIG_X86)
static void tsc_check_state(int state)
{
@@ -351,7 +313,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
/* There must be at least 2 elements */
if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
- printk(KERN_ERR PREFIX "not enough elements in _CST\n");
+ pr_err("not enough elements in _CST\n");
ret = -EFAULT;
goto end;
}
@@ -360,7 +322,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
/* Validate number of power states. */
if (count < 1 || count != cst->package.count - 1) {
- printk(KERN_ERR PREFIX "count given by _CST is not valid\n");
+ pr_err("count given by _CST is not valid\n");
ret = -EFAULT;
goto end;
}
@@ -469,11 +431,9 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
* (From 1 through ACPI_PROCESSOR_MAX_POWER - 1)
*/
if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) {
- printk(KERN_WARNING
- "Limiting number of power states to max (%d)\n",
- ACPI_PROCESSOR_MAX_POWER);
- printk(KERN_WARNING
- "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
+ pr_warn("Limiting number of power states to max (%d)\n",
+ ACPI_PROCESSOR_MAX_POWER);
+ pr_warn("Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
break;
}
}
@@ -1097,8 +1057,8 @@ int acpi_processor_power_init(struct acpi_processor *pr)
retval = cpuidle_register_driver(&acpi_idle_driver);
if (retval)
return retval;
- printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n",
- acpi_idle_driver.name);
+ pr_debug("%s registered with cpuidle\n",
+ acpi_idle_driver.name);
}
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 407a376..5f28cf7 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1930,6 +1930,7 @@ int __init acpi_scan_init(void)
acpi_memory_hotplug_init();
acpi_pnp_init();
acpi_int340x_thermal_init();
+ acpi_amba_init();
acpi_scan_add_handler(&generic_device_handler);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 9cb9752..fbfcce3 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -19,6 +19,7 @@
#include <linux/reboot.h>
#include <linux/acpi.h>
#include <linux/module.h>
+#include <linux/syscore_ops.h>
#include <asm/io.h>
#include <trace/events/power.h>
@@ -677,6 +678,39 @@ static void acpi_sleep_suspend_setup(void)
static inline void acpi_sleep_suspend_setup(void) {}
#endif /* !CONFIG_SUSPEND */
+#ifdef CONFIG_PM_SLEEP
+static u32 saved_bm_rld;
+
+static int acpi_save_bm_rld(void)
+{
+ acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld);
+ return 0;
+}
+
+static void acpi_restore_bm_rld(void)
+{
+ u32 resumed_bm_rld = 0;
+
+ acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld);
+ if (resumed_bm_rld == saved_bm_rld)
+ return;
+
+ acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld);
+}
+
+static struct syscore_ops acpi_sleep_syscore_ops = {
+ .suspend = acpi_save_bm_rld,
+ .resume = acpi_restore_bm_rld,
+};
+
+void acpi_sleep_syscore_init(void)
+{
+ register_syscore_ops(&acpi_sleep_syscore_ops);
+}
+#else
+static inline void acpi_sleep_syscore_init(void) {}
+#endif /* CONFIG_PM_SLEEP */
+
#ifdef CONFIG_HIBERNATION
static unsigned long s4_hardware_signature;
static struct acpi_table_facs *facs;
@@ -839,6 +873,7 @@ int __init acpi_sleep_init(void)
sleep_states[ACPI_STATE_S0] = 1;
+ acpi_sleep_syscore_init();
acpi_sleep_suspend_setup();
acpi_sleep_hibernate_setup();
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 6c0f079..f49c024 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -32,6 +32,7 @@
#include <linux/errno.h>
#include <linux/acpi.h>
#include <linux/bootmem.h>
+#include "internal.h"
#define ACPI_MAX_TABLES 128
@@ -456,6 +457,7 @@ int __init acpi_table_init(void)
status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
if (ACPI_FAILURE(status))
return -EINVAL;
+ acpi_initrd_initialize_tables();
check_multiple_madt();
return 0;
@@ -484,3 +486,13 @@ static int __init acpi_force_table_verification_setup(char *s)
}
early_param("acpi_force_table_verification", acpi_force_table_verification_setup);
+
+static int __init acpi_force_32bit_fadt_addr(char *s)
+{
+ pr_info("Forcing 32 Bit FADT addresses\n");
+ acpi_gbl_use32_bit_fadt_addresses = TRUE;
+
+ return 0;
+}
+
+early_param("acpi_force_32bit_fadt_addr", acpi_force_32bit_fadt_addr);
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index f2f9873..f12a724 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -201,10 +201,6 @@ acpi_extract_package(union acpi_object *package,
u8 **pointer = NULL;
union acpi_object *element = &(package->package.elements[i]);
- if (!element) {
- return AE_BAD_DATA;
- }
-
switch (element->type) {
case ACPI_TYPE_INTEGER:
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 301b785..56705b5 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -104,6 +104,7 @@ static void genpd_sd_counter_inc(struct generic_pm_domain *genpd)
static int genpd_power_on(struct generic_pm_domain *genpd, bool timed)
{
+ unsigned int state_idx = genpd->state_idx;
ktime_t time_start;
s64 elapsed_ns;
int ret;
@@ -120,10 +121,10 @@ static int genpd_power_on(struct generic_pm_domain *genpd, bool timed)
return ret;
elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
- if (elapsed_ns <= genpd->power_on_latency_ns)
+ if (elapsed_ns <= genpd->states[state_idx].power_on_latency_ns)
return ret;
- genpd->power_on_latency_ns = elapsed_ns;
+ genpd->states[state_idx].power_on_latency_ns = elapsed_ns;
genpd->max_off_time_changed = true;
pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
genpd->name, "on", elapsed_ns);
@@ -133,6 +134,7 @@ static int genpd_power_on(struct generic_pm_domain *genpd, bool timed)
static int genpd_power_off(struct generic_pm_domain *genpd, bool timed)
{
+ unsigned int state_idx = genpd->state_idx;
ktime_t time_start;
s64 elapsed_ns;
int ret;
@@ -149,10 +151,10 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed)
return ret;
elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
- if (elapsed_ns <= genpd->power_off_latency_ns)
+ if (elapsed_ns <= genpd->states[state_idx].power_off_latency_ns)
return ret;
- genpd->power_off_latency_ns = elapsed_ns;
+ genpd->states[state_idx].power_off_latency_ns = elapsed_ns;
genpd->max_off_time_changed = true;
pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
genpd->name, "off", elapsed_ns);
@@ -485,8 +487,13 @@ static int pm_genpd_runtime_resume(struct device *dev)
if (timed && runtime_pm)
time_start = ktime_get();
- genpd_start_dev(genpd, dev);
- genpd_restore_dev(genpd, dev);
+ ret = genpd_start_dev(genpd, dev);
+ if (ret)
+ goto err_poweroff;
+
+ ret = genpd_restore_dev(genpd, dev);
+ if (ret)
+ goto err_stop;
/* Update resume latency value if the measured time exceeds it. */
if (timed && runtime_pm) {
@@ -501,6 +508,17 @@ static int pm_genpd_runtime_resume(struct device *dev)
}
return 0;
+
+err_stop:
+ genpd_stop_dev(genpd, dev);
+err_poweroff:
+ if (!dev->power.irq_safe) {
+ mutex_lock(&genpd->lock);
+ genpd_poweroff(genpd, 0);
+ mutex_unlock(&genpd->lock);
+ }
+
+ return ret;
}
static bool pd_ignore_unused;
@@ -585,6 +603,8 @@ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd,
|| atomic_read(&genpd->sd_count) > 0)
return;
+ /* Choose the deepest state when suspending */
+ genpd->state_idx = genpd->state_count - 1;
genpd_power_off(genpd, timed);
genpd->status = GPD_STATE_POWER_OFF;
@@ -1378,7 +1398,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
mutex_lock(&subdomain->lock);
mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
- if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
+ if (!list_empty(&subdomain->master_links) || subdomain->device_count) {
pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
subdomain->name);
ret = -EBUSY;
@@ -1508,6 +1528,20 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
genpd->dev_ops.start = pm_clk_resume;
}
+ if (genpd->state_idx >= GENPD_MAX_NUM_STATES) {
+ pr_warn("Initial state index out of bounds.\n");
+ genpd->state_idx = GENPD_MAX_NUM_STATES - 1;
+ }
+
+ if (genpd->state_count > GENPD_MAX_NUM_STATES) {
+ pr_warn("Limiting states to %d\n", GENPD_MAX_NUM_STATES);
+ genpd->state_count = GENPD_MAX_NUM_STATES;
+ }
+
+ /* Use only one "off" state if there were no states declared */
+ if (genpd->state_count == 0)
+ genpd->state_count = 1;
+
mutex_lock(&gpd_list_lock);
list_add(&genpd->gpd_list_node, &gpd_list);
mutex_unlock(&gpd_list_lock);
@@ -1668,6 +1702,9 @@ struct generic_pm_domain *of_genpd_get_from_provider(
struct generic_pm_domain *genpd = ERR_PTR(-ENOENT);
struct of_genpd_provider *provider;
+ if (!genpdspec)
+ return ERR_PTR(-EINVAL);
+
mutex_lock(&of_genpd_mutex);
/* Check if we have such a provider in our array */
@@ -1864,6 +1901,7 @@ static int pm_genpd_summary_one(struct seq_file *s,
struct pm_domain_data *pm_data;
const char *kobj_path;
struct gpd_link *link;
+ char state[16];
int ret;
ret = mutex_lock_interruptible(&genpd->lock);
@@ -1872,7 +1910,13 @@ static int pm_genpd_summary_one(struct seq_file *s,
if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup)))
goto exit;
- seq_printf(s, "%-30s %-15s ", genpd->name, status_lookup[genpd->status]);
+ if (genpd->status == GPD_STATE_POWER_OFF)
+ snprintf(state, sizeof(state), "%s-%u",
+ status_lookup[genpd->status], genpd->state_idx);
+ else
+ snprintf(state, sizeof(state), "%s",
+ status_lookup[genpd->status]);
+ seq_printf(s, "%-30s %-15s ", genpd->name, state);
/*
* Modifications on the list require holding locks on both
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 1e937ac..00a5436 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -98,7 +98,8 @@ static bool default_stop_ok(struct device *dev)
*
* This routine must be executed under the PM domain's lock.
*/
-static bool default_power_down_ok(struct dev_pm_domain *pd)
+static bool __default_power_down_ok(struct dev_pm_domain *pd,
+ unsigned int state)
{
struct generic_pm_domain *genpd = pd_to_genpd(pd);
struct gpd_link *link;
@@ -106,27 +107,9 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
s64 min_off_time_ns;
s64 off_on_time_ns;
- if (genpd->max_off_time_changed) {
- struct gpd_link *link;
-
- /*
- * We have to invalidate the cached results for the masters, so
- * use the observation that default_power_down_ok() is not
- * going to be called for any master until this instance
- * returns.
- */
- list_for_each_entry(link, &genpd->slave_links, slave_node)
- link->master->max_off_time_changed = true;
-
- genpd->max_off_time_changed = false;
- genpd->cached_power_down_ok = false;
- genpd->max_off_time_ns = -1;
- } else {
- return genpd->cached_power_down_ok;
- }
+ off_on_time_ns = genpd->states[state].power_off_latency_ns +
+ genpd->states[state].power_on_latency_ns;
- off_on_time_ns = genpd->power_off_latency_ns +
- genpd->power_on_latency_ns;
min_off_time_ns = -1;
/*
@@ -186,8 +169,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
min_off_time_ns = constraint_ns;
}
- genpd->cached_power_down_ok = true;
-
/*
* If the computed minimum device off time is negative, there are no
* latency constraints, so the domain can spend arbitrary time in the
@@ -201,10 +182,45 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
* time and the time needed to turn the domain on is the maximum
* theoretical time this domain can spend in the "off" state.
*/
- genpd->max_off_time_ns = min_off_time_ns - genpd->power_on_latency_ns;
+ genpd->max_off_time_ns = min_off_time_ns -
+ genpd->states[state].power_on_latency_ns;
return true;
}
+static bool default_power_down_ok(struct dev_pm_domain *pd)
+{
+ struct generic_pm_domain *genpd = pd_to_genpd(pd);
+ struct gpd_link *link;
+
+ if (!genpd->max_off_time_changed)
+ return genpd->cached_power_down_ok;
+
+ /*
+ * We have to invalidate the cached results for the masters, so
+ * use the observation that default_power_down_ok() is not
+ * going to be called for any master until this instance
+ * returns.
+ */
+ list_for_each_entry(link, &genpd->slave_links, slave_node)
+ link->master->max_off_time_changed = true;
+
+ genpd->max_off_time_ns = -1;
+ genpd->max_off_time_changed = false;
+ genpd->cached_power_down_ok = true;
+ genpd->state_idx = genpd->state_count - 1;
+
+ /* Find a state to power down to, starting from the deepest. */
+ while (!__default_power_down_ok(pd, genpd->state_idx)) {
+ if (genpd->state_idx == 0) {
+ genpd->cached_power_down_ok = false;
+ break;
+ }
+ genpd->state_idx--;
+ }
+
+ return genpd->cached_power_down_ok;
+}
+
static bool always_on_power_down_ok(struct dev_pm_domain *domain)
{
return false;
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index cf351d3..433b600 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -13,50 +13,52 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/clk.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/of.h>
#include <linux/export.h>
+#include <linux/regulator/consumer.h>
#include "opp.h"
/*
- * The root of the list of all devices. All device_opp structures branch off
- * from here, with each device_opp containing the list of opp it supports in
+ * The root of the list of all opp-tables. All opp_table structures branch off
+ * from here, with each opp_table containing the list of opps it supports in
* various states of availability.
*/
-static LIST_HEAD(dev_opp_list);
+static LIST_HEAD(opp_tables);
/* Lock to allow exclusive modification to the device and opp lists */
-DEFINE_MUTEX(dev_opp_list_lock);
+DEFINE_MUTEX(opp_table_lock);
#define opp_rcu_lockdep_assert() \
do { \
RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
- !lockdep_is_held(&dev_opp_list_lock), \
- "Missing rcu_read_lock() or " \
- "dev_opp_list_lock protection"); \
+ !lockdep_is_held(&opp_table_lock), \
+ "Missing rcu_read_lock() or " \
+ "opp_table_lock protection"); \
} while (0)
-static struct device_list_opp *_find_list_dev(const struct device *dev,
- struct device_opp *dev_opp)
+static struct opp_device *_find_opp_dev(const struct device *dev,
+ struct opp_table *opp_table)
{
- struct device_list_opp *list_dev;
+ struct opp_device *opp_dev;
- list_for_each_entry(list_dev, &dev_opp->dev_list, node)
- if (list_dev->dev == dev)
- return list_dev;
+ list_for_each_entry(opp_dev, &opp_table->dev_list, node)
+ if (opp_dev->dev == dev)
+ return opp_dev;
return NULL;
}
-static struct device_opp *_managed_opp(const struct device_node *np)
+static struct opp_table *_managed_opp(const struct device_node *np)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
- list_for_each_entry_rcu(dev_opp, &dev_opp_list, node) {
- if (dev_opp->np == np) {
+ list_for_each_entry_rcu(opp_table, &opp_tables, node) {
+ if (opp_table->np == np) {
/*
* Multiple devices can point to the same OPP table and
* so will have same node-pointer, np.
@@ -64,7 +66,7 @@ static struct device_opp *_managed_opp(const struct device_node *np)
* But the OPPs will be considered as shared only if the
* OPP table contains a "opp-shared" property.
*/
- return dev_opp->shared_opp ? dev_opp : NULL;
+ return opp_table->shared_opp ? opp_table : NULL;
}
}
@@ -72,24 +74,24 @@ static struct device_opp *_managed_opp(const struct device_node *np)
}
/**
- * _find_device_opp() - find device_opp struct using device pointer
- * @dev: device pointer used to lookup device OPPs
+ * _find_opp_table() - find opp_table struct using device pointer
+ * @dev: device pointer used to lookup OPP table
*
- * Search list of device OPPs for one containing matching device. Does a RCU
- * reader operation to grab the pointer needed.
+ * Search OPP table for one containing matching device. Does a RCU reader
+ * operation to grab the pointer needed.
*
- * Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or
+ * Return: pointer to 'struct opp_table' if found, otherwise -ENODEV or
* -EINVAL based on type of error.
*
* Locking: For readers, this function must be called under rcu_read_lock().
- * device_opp is a RCU protected pointer, which means that device_opp is valid
+ * opp_table is a RCU protected pointer, which means that opp_table is valid
* as long as we are under RCU lock.
*
- * For Writers, this function must be called with dev_opp_list_lock held.
+ * For Writers, this function must be called with opp_table_lock held.
*/
-struct device_opp *_find_device_opp(struct device *dev)
+struct opp_table *_find_opp_table(struct device *dev)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
opp_rcu_lockdep_assert();
@@ -98,9 +100,9 @@ struct device_opp *_find_device_opp(struct device *dev)
return ERR_PTR(-EINVAL);
}
- list_for_each_entry_rcu(dev_opp, &dev_opp_list, node)
- if (_find_list_dev(dev, dev_opp))
- return dev_opp;
+ list_for_each_entry_rcu(opp_table, &opp_tables, node)
+ if (_find_opp_dev(dev, opp_table))
+ return opp_table;
return ERR_PTR(-ENODEV);
}
@@ -213,16 +215,16 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_is_turbo);
*/
unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
unsigned long clock_latency_ns;
rcu_read_lock();
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp))
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table))
clock_latency_ns = 0;
else
- clock_latency_ns = dev_opp->clock_latency_ns_max;
+ clock_latency_ns = opp_table->clock_latency_ns_max;
rcu_read_unlock();
return clock_latency_ns;
@@ -230,6 +232,82 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
/**
+ * dev_pm_opp_get_max_volt_latency() - Get max voltage latency in nanoseconds
+ * @dev: device for which we do this operation
+ *
+ * Return: This function returns the max voltage latency in nanoseconds.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
+{
+ struct opp_table *opp_table;
+ struct dev_pm_opp *opp;
+ struct regulator *reg;
+ unsigned long latency_ns = 0;
+ unsigned long min_uV = ~0, max_uV = 0;
+ int ret;
+
+ rcu_read_lock();
+
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ reg = opp_table->regulator;
+ if (IS_ERR(reg)) {
+ /* Regulator may not be required for device */
+ if (reg)
+ dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__,
+ PTR_ERR(reg));
+ rcu_read_unlock();
+ return 0;
+ }
+
+ list_for_each_entry_rcu(opp, &opp_table->opp_list, node) {
+ if (!opp->available)
+ continue;
+
+ if (opp->u_volt_min < min_uV)
+ min_uV = opp->u_volt_min;
+ if (opp->u_volt_max > max_uV)
+ max_uV = opp->u_volt_max;
+ }
+
+ rcu_read_unlock();
+
+ /*
+ * The caller needs to ensure that opp_table (and hence the regulator)
+ * isn't freed, while we are executing this routine.
+ */
+ ret = regulator_set_voltage_time(reg, min_uV, max_uV);
+ if (ret > 0)
+ latency_ns = ret * 1000;
+
+ return latency_ns;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency);
+
+/**
+ * dev_pm_opp_get_max_transition_latency() - Get max transition latency in
+ * nanoseconds
+ * @dev: device for which we do this operation
+ *
+ * Return: This function returns the max transition latency, in nanoseconds, to
+ * switch from one OPP to other.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev)
+{
+ return dev_pm_opp_get_max_volt_latency(dev) +
+ dev_pm_opp_get_max_clock_latency(dev);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency);
+
+/**
* dev_pm_opp_get_suspend_opp() - Get suspend opp
* @dev: device for which we do this operation
*
@@ -244,21 +322,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
*/
struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
opp_rcu_lockdep_assert();
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp) || !dev_opp->suspend_opp ||
- !dev_opp->suspend_opp->available)
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table) || !opp_table->suspend_opp ||
+ !opp_table->suspend_opp->available)
return NULL;
- return dev_opp->suspend_opp;
+ return opp_table->suspend_opp;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp);
/**
- * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list
+ * dev_pm_opp_get_opp_count() - Get number of opps available in the opp table
* @dev: device for which we do this operation
*
* Return: This function returns the number of available opps if there are any,
@@ -268,21 +346,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp);
*/
int dev_pm_opp_get_opp_count(struct device *dev)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
struct dev_pm_opp *temp_opp;
int count = 0;
rcu_read_lock();
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp)) {
- count = PTR_ERR(dev_opp);
- dev_err(dev, "%s: device OPP not found (%d)\n",
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ count = PTR_ERR(opp_table);
+ dev_err(dev, "%s: OPP table not found (%d)\n",
__func__, count);
goto out_unlock;
}
- list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+ list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
if (temp_opp->available)
count++;
}
@@ -299,7 +377,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
* @freq: frequency to search for
* @available: true/false - match for available opp
*
- * Return: Searches for exact match in the opp list and returns pointer to the
+ * Return: Searches for exact match in the opp table and returns pointer to the
* matching opp if found, else returns ERR_PTR in case of error and should
* be handled using IS_ERR. Error return values can be:
* EINVAL: for bad pointer
@@ -323,19 +401,20 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
unsigned long freq,
bool available)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
opp_rcu_lockdep_assert();
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp)) {
- int r = PTR_ERR(dev_opp);
- dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ int r = PTR_ERR(opp_table);
+
+ dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r);
return ERR_PTR(r);
}
- list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+ list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
if (temp_opp->available == available &&
temp_opp->rate == freq) {
opp = temp_opp;
@@ -371,7 +450,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
unsigned long *freq)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
opp_rcu_lockdep_assert();
@@ -381,11 +460,11 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
return ERR_PTR(-EINVAL);
}
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp))
- return ERR_CAST(dev_opp);
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table))
+ return ERR_CAST(opp_table);
- list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+ list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
if (temp_opp->available && temp_opp->rate >= *freq) {
opp = temp_opp;
*freq = opp->rate;
@@ -421,7 +500,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
unsigned long *freq)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
opp_rcu_lockdep_assert();
@@ -431,11 +510,11 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
return ERR_PTR(-EINVAL);
}
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp))
- return ERR_CAST(dev_opp);
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table))
+ return ERR_CAST(opp_table);
- list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+ list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) {
if (temp_opp->available) {
/* go to the next node, before choosing prev */
if (temp_opp->rate > *freq)
@@ -451,130 +530,343 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
}
EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
-/* List-dev Helpers */
-static void _kfree_list_dev_rcu(struct rcu_head *head)
+/*
+ * The caller needs to ensure that opp_table (and hence the clk) isn't freed,
+ * while clk returned here is used.
+ */
+static struct clk *_get_opp_clk(struct device *dev)
+{
+ struct opp_table *opp_table;
+ struct clk *clk;
+
+ rcu_read_lock();
+
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ dev_err(dev, "%s: device opp doesn't exist\n", __func__);
+ clk = ERR_CAST(opp_table);
+ goto unlock;
+ }
+
+ clk = opp_table->clk;
+ if (IS_ERR(clk))
+ dev_err(dev, "%s: No clock available for the device\n",
+ __func__);
+
+unlock:
+ rcu_read_unlock();
+ return clk;
+}
+
+static int _set_opp_voltage(struct device *dev, struct regulator *reg,
+ unsigned long u_volt, unsigned long u_volt_min,
+ unsigned long u_volt_max)
+{
+ int ret;
+
+ /* Regulator not available for device */
+ if (IS_ERR(reg)) {
+ dev_dbg(dev, "%s: regulator not available: %ld\n", __func__,
+ PTR_ERR(reg));
+ return 0;
+ }
+
+ dev_dbg(dev, "%s: voltages (mV): %lu %lu %lu\n", __func__, u_volt_min,
+ u_volt, u_volt_max);
+
+ ret = regulator_set_voltage_triplet(reg, u_volt_min, u_volt,
+ u_volt_max);
+ if (ret)
+ dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n",
+ __func__, u_volt_min, u_volt, u_volt_max, ret);
+
+ return ret;
+}
+
+/**
+ * dev_pm_opp_set_rate() - Configure new OPP based on frequency
+ * @dev: device for which we do this operation
+ * @target_freq: frequency to achieve
+ *
+ * This configures the power-supplies and clock source to the levels specified
+ * by the OPP corresponding to the target_freq.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
+{
+ struct opp_table *opp_table;
+ struct dev_pm_opp *old_opp, *opp;
+ struct regulator *reg;
+ struct clk *clk;
+ unsigned long freq, old_freq;
+ unsigned long u_volt, u_volt_min, u_volt_max;
+ unsigned long ou_volt, ou_volt_min, ou_volt_max;
+ int ret;
+
+ if (unlikely(!target_freq)) {
+ dev_err(dev, "%s: Invalid target frequency %lu\n", __func__,
+ target_freq);
+ return -EINVAL;
+ }
+
+ clk = _get_opp_clk(dev);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ freq = clk_round_rate(clk, target_freq);
+ if ((long)freq <= 0)
+ freq = target_freq;
+
+ old_freq = clk_get_rate(clk);
+
+ /* Return early if nothing to do */
+ if (old_freq == freq) {
+ dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
+ __func__, freq);
+ return 0;
+ }
+
+ rcu_read_lock();
+
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ dev_err(dev, "%s: device opp doesn't exist\n", __func__);
+ rcu_read_unlock();
+ return PTR_ERR(opp_table);
+ }
+
+ old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq);
+ if (!IS_ERR(old_opp)) {
+ ou_volt = old_opp->u_volt;
+ ou_volt_min = old_opp->u_volt_min;
+ ou_volt_max = old_opp->u_volt_max;
+ } else {
+ dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n",
+ __func__, old_freq, PTR_ERR(old_opp));
+ }
+
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+ if (IS_ERR(opp)) {
+ ret = PTR_ERR(opp);
+ dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n",
+ __func__, freq, ret);
+ rcu_read_unlock();
+ return ret;
+ }
+
+ u_volt = opp->u_volt;
+ u_volt_min = opp->u_volt_min;
+ u_volt_max = opp->u_volt_max;
+
+ reg = opp_table->regulator;
+
+ rcu_read_unlock();
+
+ /* Scaling up? Scale voltage before frequency */
+ if (freq > old_freq) {
+ ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
+ u_volt_max);
+ if (ret)
+ goto restore_voltage;
+ }
+
+ /* Change frequency */
+
+ dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n",
+ __func__, old_freq, freq);
+
+ ret = clk_set_rate(clk, freq);
+ if (ret) {
+ dev_err(dev, "%s: failed to set clock rate: %d\n", __func__,
+ ret);
+ goto restore_voltage;
+ }
+
+ /* Scaling down? Scale voltage after frequency */
+ if (freq < old_freq) {
+ ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
+ u_volt_max);
+ if (ret)
+ goto restore_freq;
+ }
+
+ return 0;
+
+restore_freq:
+ if (clk_set_rate(clk, old_freq))
+ dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
+ __func__, old_freq);
+restore_voltage:
+ /* This shouldn't harm even if the voltages weren't updated earlier */
+ if (!IS_ERR(old_opp))
+ _set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
+
+/* OPP-dev Helpers */
+static void _kfree_opp_dev_rcu(struct rcu_head *head)
{
- struct device_list_opp *list_dev;
+ struct opp_device *opp_dev;
- list_dev = container_of(head, struct device_list_opp, rcu_head);
- kfree_rcu(list_dev, rcu_head);
+ opp_dev = container_of(head, struct opp_device, rcu_head);
+ kfree_rcu(opp_dev, rcu_head);
}
-static void _remove_list_dev(struct device_list_opp *list_dev,
- struct device_opp *dev_opp)
+static void _remove_opp_dev(struct opp_device *opp_dev,
+ struct opp_table *opp_table)
{
- opp_debug_unregister(list_dev, dev_opp);
- list_del(&list_dev->node);
- call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head,
- _kfree_list_dev_rcu);
+ opp_debug_unregister(opp_dev, opp_table);
+ list_del(&opp_dev->node);
+ call_srcu(&opp_table->srcu_head.srcu, &opp_dev->rcu_head,
+ _kfree_opp_dev_rcu);
}
-struct device_list_opp *_add_list_dev(const struct device *dev,
- struct device_opp *dev_opp)
+struct opp_device *_add_opp_dev(const struct device *dev,
+ struct opp_table *opp_table)
{
- struct device_list_opp *list_dev;
+ struct opp_device *opp_dev;
int ret;
- list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL);
- if (!list_dev)
+ opp_dev = kzalloc(sizeof(*opp_dev), GFP_KERNEL);
+ if (!opp_dev)
return NULL;
- /* Initialize list-dev */
- list_dev->dev = dev;
- list_add_rcu(&list_dev->node, &dev_opp->dev_list);
+ /* Initialize opp-dev */
+ opp_dev->dev = dev;
+ list_add_rcu(&opp_dev->node, &opp_table->dev_list);
- /* Create debugfs entries for the dev_opp */
- ret = opp_debug_register(list_dev, dev_opp);
+ /* Create debugfs entries for the opp_table */
+ ret = opp_debug_register(opp_dev, opp_table);
if (ret)
dev_err(dev, "%s: Failed to register opp debugfs (%d)\n",
__func__, ret);
- return list_dev;
+ return opp_dev;
}
/**
- * _add_device_opp() - Find device OPP table or allocate a new one
+ * _add_opp_table() - Find OPP table or allocate a new one
* @dev: device for which we do this operation
*
* It tries to find an existing table first, if it couldn't find one, it
* allocates a new OPP table and returns that.
*
- * Return: valid device_opp pointer if success, else NULL.
+ * Return: valid opp_table pointer if success, else NULL.
*/
-static struct device_opp *_add_device_opp(struct device *dev)
+static struct opp_table *_add_opp_table(struct device *dev)
{
- struct device_opp *dev_opp;
- struct device_list_opp *list_dev;
+ struct opp_table *opp_table;
+ struct opp_device *opp_dev;
+ struct device_node *np;
+ int ret;
- /* Check for existing list for 'dev' first */
- dev_opp = _find_device_opp(dev);
- if (!IS_ERR(dev_opp))
- return dev_opp;
+ /* Check for existing table for 'dev' first */
+ opp_table = _find_opp_table(dev);
+ if (!IS_ERR(opp_table))
+ return opp_table;
/*
- * Allocate a new device OPP table. In the infrequent case where a new
+ * Allocate a new OPP table. In the infrequent case where a new
* device is needed to be added, we pay this penalty.
*/
- dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL);
- if (!dev_opp)
+ opp_table = kzalloc(sizeof(*opp_table), GFP_KERNEL);
+ if (!opp_table)
return NULL;
- INIT_LIST_HEAD(&dev_opp->dev_list);
+ INIT_LIST_HEAD(&opp_table->dev_list);
- list_dev = _add_list_dev(dev, dev_opp);
- if (!list_dev) {
- kfree(dev_opp);
+ opp_dev = _add_opp_dev(dev, opp_table);
+ if (!opp_dev) {
+ kfree(opp_table);
return NULL;
}
- srcu_init_notifier_head(&dev_opp->srcu_head);
- INIT_LIST_HEAD(&dev_opp->opp_list);
+ /*
+ * Only required for backward compatibility with v1 bindings, but isn't
+ * harmful for other cases. And so we do it unconditionally.
+ */
+ np = of_node_get(dev->of_node);
+ if (np) {
+ u32 val;
+
+ if (!of_property_read_u32(np, "clock-latency", &val))
+ opp_table->clock_latency_ns_max = val;
+ of_property_read_u32(np, "voltage-tolerance",
+ &opp_table->voltage_tolerance_v1);
+ of_node_put(np);
+ }
+
+ /* Set regulator to a non-NULL error value */
+ opp_table->regulator = ERR_PTR(-ENXIO);
+
+ /* Find clk for the device */
+ opp_table->clk = clk_get(dev, NULL);
+ if (IS_ERR(opp_table->clk)) {
+ ret = PTR_ERR(opp_table->clk);
+ if (ret != -EPROBE_DEFER)
+ dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__,
+ ret);
+ }
- /* Secure the device list modification */
- list_add_rcu(&dev_opp->node, &dev_opp_list);
- return dev_opp;
+ srcu_init_notifier_head(&opp_table->srcu_head);
+ INIT_LIST_HEAD(&opp_table->opp_list);
+
+ /* Secure the device table modification */
+ list_add_rcu(&opp_table->node, &opp_tables);
+ return opp_table;
}
/**
- * _kfree_device_rcu() - Free device_opp RCU handler
+ * _kfree_device_rcu() - Free opp_table RCU handler
* @head: RCU head
*/
static void _kfree_device_rcu(struct rcu_head *head)
{
- struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
+ struct opp_table *opp_table = container_of(head, struct opp_table,
+ rcu_head);
- kfree_rcu(device_opp, rcu_head);
+ kfree_rcu(opp_table, rcu_head);
}
/**
- * _remove_device_opp() - Removes a device OPP table
- * @dev_opp: device OPP table to be removed.
+ * _remove_opp_table() - Removes a OPP table
+ * @opp_table: OPP table to be removed.
*
- * Removes/frees device OPP table it it doesn't contain any OPPs.
+ * Removes/frees OPP table if it doesn't contain any OPPs.
*/
-static void _remove_device_opp(struct device_opp *dev_opp)
+static void _remove_opp_table(struct opp_table *opp_table)
{
- struct device_list_opp *list_dev;
+ struct opp_device *opp_dev;
+
+ if (!list_empty(&opp_table->opp_list))
+ return;
- if (!list_empty(&dev_opp->opp_list))
+ if (opp_table->supported_hw)
return;
- if (dev_opp->supported_hw)
+ if (opp_table->prop_name)
return;
- if (dev_opp->prop_name)
+ if (!IS_ERR(opp_table->regulator))
return;
- list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
- node);
+ /* Release clk */
+ if (!IS_ERR(opp_table->clk))
+ clk_put(opp_table->clk);
- _remove_list_dev(list_dev, dev_opp);
+ opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device,
+ node);
+
+ _remove_opp_dev(opp_dev, opp_table);
/* dev_list must be empty now */
- WARN_ON(!list_empty(&dev_opp->dev_list));
+ WARN_ON(!list_empty(&opp_table->dev_list));
- list_del_rcu(&dev_opp->node);
- call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head,
+ list_del_rcu(&opp_table->node);
+ call_srcu(&opp_table->srcu_head.srcu, &opp_table->rcu_head,
_kfree_device_rcu);
}
@@ -591,17 +883,17 @@ static void _kfree_opp_rcu(struct rcu_head *head)
/**
* _opp_remove() - Remove an OPP from a table definition
- * @dev_opp: points back to the device_opp struct this opp belongs to
+ * @opp_table: points back to the opp_table struct this opp belongs to
* @opp: pointer to the OPP to remove
* @notify: OPP_EVENT_REMOVE notification should be sent or not
*
- * This function removes an opp definition from the opp list.
+ * This function removes an opp definition from the opp table.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* It is assumed that the caller holds required mutex for an RCU updater
* strategy.
*/
-static void _opp_remove(struct device_opp *dev_opp,
+static void _opp_remove(struct opp_table *opp_table,
struct dev_pm_opp *opp, bool notify)
{
/*
@@ -609,22 +901,23 @@ static void _opp_remove(struct device_opp *dev_opp,
* frequency/voltage list.
*/
if (notify)
- srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp);
+ srcu_notifier_call_chain(&opp_table->srcu_head,
+ OPP_EVENT_REMOVE, opp);
opp_debug_remove_one(opp);
list_del_rcu(&opp->node);
- call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
+ call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
- _remove_device_opp(dev_opp);
+ _remove_opp_table(opp_table);
}
/**
- * dev_pm_opp_remove() - Remove an OPP from OPP list
+ * dev_pm_opp_remove() - Remove an OPP from OPP table
* @dev: device for which we do this operation
* @freq: OPP to remove with matching 'freq'
*
- * This function removes an opp from the opp list.
+ * This function removes an opp from the opp table.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -633,17 +926,17 @@ static void _opp_remove(struct device_opp *dev_opp,
void dev_pm_opp_remove(struct device *dev, unsigned long freq)
{
struct dev_pm_opp *opp;
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
bool found = false;
- /* Hold our list modification lock here */
- mutex_lock(&dev_opp_list_lock);
+ /* Hold our table modification lock here */
+ mutex_lock(&opp_table_lock);
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp))
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table))
goto unlock;
- list_for_each_entry(opp, &dev_opp->opp_list, node) {
+ list_for_each_entry(opp, &opp_table->opp_list, node) {
if (opp->rate == freq) {
found = true;
break;
@@ -656,14 +949,14 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq)
goto unlock;
}
- _opp_remove(dev_opp, opp, true);
+ _opp_remove(opp_table, opp, true);
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
static struct dev_pm_opp *_allocate_opp(struct device *dev,
- struct device_opp **dev_opp)
+ struct opp_table **opp_table)
{
struct dev_pm_opp *opp;
@@ -674,8 +967,8 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev,
INIT_LIST_HEAD(&opp->node);
- *dev_opp = _add_device_opp(dev);
- if (!*dev_opp) {
+ *opp_table = _add_opp_table(dev);
+ if (!*opp_table) {
kfree(opp);
return NULL;
}
@@ -683,22 +976,38 @@ static struct dev_pm_opp *_allocate_opp(struct device *dev,
return opp;
}
+static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
+ struct opp_table *opp_table)
+{
+ struct regulator *reg = opp_table->regulator;
+
+ if (!IS_ERR(reg) &&
+ !regulator_is_supported_voltage(reg, opp->u_volt_min,
+ opp->u_volt_max)) {
+ pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n",
+ __func__, opp->u_volt_min, opp->u_volt_max);
+ return false;
+ }
+
+ return true;
+}
+
static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
- struct device_opp *dev_opp)
+ struct opp_table *opp_table)
{
struct dev_pm_opp *opp;
- struct list_head *head = &dev_opp->opp_list;
+ struct list_head *head = &opp_table->opp_list;
int ret;
/*
* Insert new OPP in order of increasing frequency and discard if
* already present.
*
- * Need to use &dev_opp->opp_list in the condition part of the 'for'
+ * Need to use &opp_table->opp_list in the condition part of the 'for'
* loop, don't replace it with head otherwise it will become an infinite
* loop.
*/
- list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
+ list_for_each_entry_rcu(opp, &opp_table->opp_list, node) {
if (new_opp->rate > opp->rate) {
head = &opp->node;
continue;
@@ -716,14 +1025,20 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
0 : -EEXIST;
}
- new_opp->dev_opp = dev_opp;
+ new_opp->opp_table = opp_table;
list_add_rcu(&new_opp->node, head);
- ret = opp_debug_create_one(new_opp, dev_opp);
+ ret = opp_debug_create_one(new_opp, opp_table);
if (ret)
dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n",
__func__, ret);
+ if (!_opp_supported_by_regulators(new_opp, opp_table)) {
+ new_opp->available = false;
+ dev_warn(dev, "%s: OPP not supported by regulators (%lu)\n",
+ __func__, new_opp->rate);
+ }
+
return 0;
}
@@ -734,14 +1049,14 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
* @u_volt: Voltage in uVolts for this OPP
* @dynamic: Dynamically added OPPs.
*
- * This function adds an opp definition to the opp list and returns status.
+ * This function adds an opp definition to the opp table and returns status.
* The opp is made available by default and it can be controlled using
* dev_pm_opp_enable/disable functions and may be removed by dev_pm_opp_remove.
*
* NOTE: "dynamic" parameter impacts OPPs added by the dev_pm_opp_of_add_table
* and freed by dev_pm_opp_of_remove_table.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -757,14 +1072,15 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
bool dynamic)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
struct dev_pm_opp *new_opp;
+ unsigned long tol;
int ret;
- /* Hold our list modification lock here */
- mutex_lock(&dev_opp_list_lock);
+ /* Hold our table modification lock here */
+ mutex_lock(&opp_table_lock);
- new_opp = _allocate_opp(dev, &dev_opp);
+ new_opp = _allocate_opp(dev, &opp_table);
if (!new_opp) {
ret = -ENOMEM;
goto unlock;
@@ -772,33 +1088,36 @@ static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
/* populate the opp table */
new_opp->rate = freq;
+ tol = u_volt * opp_table->voltage_tolerance_v1 / 100;
new_opp->u_volt = u_volt;
+ new_opp->u_volt_min = u_volt - tol;
+ new_opp->u_volt_max = u_volt + tol;
new_opp->available = true;
new_opp->dynamic = dynamic;
- ret = _opp_add(dev, new_opp, dev_opp);
+ ret = _opp_add(dev, new_opp, opp_table);
if (ret)
goto free_opp;
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
/*
* Notify the changes in the availability of the operable
* frequency/voltage list.
*/
- srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
+ srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
return 0;
free_opp:
- _opp_remove(dev_opp, new_opp, false);
+ _opp_remove(opp_table, new_opp, false);
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
return ret;
}
/* TODO: Support multiple regulators */
static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
- struct device_opp *dev_opp)
+ struct opp_table *opp_table)
{
u32 microvolt[3] = {0};
u32 val;
@@ -807,9 +1126,9 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
char name[NAME_MAX];
/* Search for "opp-microvolt-<name>" */
- if (dev_opp->prop_name) {
+ if (opp_table->prop_name) {
snprintf(name, sizeof(name), "opp-microvolt-%s",
- dev_opp->prop_name);
+ opp_table->prop_name);
prop = of_find_property(opp->np, name, NULL);
}
@@ -844,14 +1163,20 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
}
opp->u_volt = microvolt[0];
- opp->u_volt_min = microvolt[1];
- opp->u_volt_max = microvolt[2];
+
+ if (count == 1) {
+ opp->u_volt_min = opp->u_volt;
+ opp->u_volt_max = opp->u_volt;
+ } else {
+ opp->u_volt_min = microvolt[1];
+ opp->u_volt_max = microvolt[2];
+ }
/* Search for "opp-microamp-<name>" */
prop = NULL;
- if (dev_opp->prop_name) {
+ if (opp_table->prop_name) {
snprintf(name, sizeof(name), "opp-microamp-%s",
- dev_opp->prop_name);
+ opp_table->prop_name);
prop = of_find_property(opp->np, name, NULL);
}
@@ -878,7 +1203,7 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
* OPPs, which are available for those versions, based on its 'opp-supported-hw'
* property.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -887,44 +1212,44 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
unsigned int count)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
int ret = 0;
- /* Hold our list modification lock here */
- mutex_lock(&dev_opp_list_lock);
+ /* Hold our table modification lock here */
+ mutex_lock(&opp_table_lock);
- dev_opp = _add_device_opp(dev);
- if (!dev_opp) {
+ opp_table = _add_opp_table(dev);
+ if (!opp_table) {
ret = -ENOMEM;
goto unlock;
}
- /* Make sure there are no concurrent readers while updating dev_opp */
- WARN_ON(!list_empty(&dev_opp->opp_list));
+ /* Make sure there are no concurrent readers while updating opp_table */
+ WARN_ON(!list_empty(&opp_table->opp_list));
- /* Do we already have a version hierarchy associated with dev_opp? */
- if (dev_opp->supported_hw) {
+ /* Do we already have a version hierarchy associated with opp_table? */
+ if (opp_table->supported_hw) {
dev_err(dev, "%s: Already have supported hardware list\n",
__func__);
ret = -EBUSY;
goto err;
}
- dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions),
+ opp_table->supported_hw = kmemdup(versions, count * sizeof(*versions),
GFP_KERNEL);
- if (!dev_opp->supported_hw) {
+ if (!opp_table->supported_hw) {
ret = -ENOMEM;
goto err;
}
- dev_opp->supported_hw_count = count;
- mutex_unlock(&dev_opp_list_lock);
+ opp_table->supported_hw_count = count;
+ mutex_unlock(&opp_table_lock);
return 0;
err:
- _remove_device_opp(dev_opp);
+ _remove_opp_table(opp_table);
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
return ret;
}
@@ -932,13 +1257,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw);
/**
* dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw
- * @dev: Device for which supported-hw has to be set.
+ * @dev: Device for which supported-hw has to be put.
*
* This is required only for the V2 bindings, and is called for a matching
- * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure
+ * dev_pm_opp_set_supported_hw(). Until this is called, the opp_table structure
* will not be freed.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -946,42 +1271,43 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw);
*/
void dev_pm_opp_put_supported_hw(struct device *dev)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
- /* Hold our list modification lock here */
- mutex_lock(&dev_opp_list_lock);
+ /* Hold our table modification lock here */
+ mutex_lock(&opp_table_lock);
- /* Check for existing list for 'dev' first */
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp)) {
- dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+ /* Check for existing table for 'dev' first */
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ dev_err(dev, "Failed to find opp_table: %ld\n",
+ PTR_ERR(opp_table));
goto unlock;
}
- /* Make sure there are no concurrent readers while updating dev_opp */
- WARN_ON(!list_empty(&dev_opp->opp_list));
+ /* Make sure there are no concurrent readers while updating opp_table */
+ WARN_ON(!list_empty(&opp_table->opp_list));
- if (!dev_opp->supported_hw) {
+ if (!opp_table->supported_hw) {
dev_err(dev, "%s: Doesn't have supported hardware list\n",
__func__);
goto unlock;
}
- kfree(dev_opp->supported_hw);
- dev_opp->supported_hw = NULL;
- dev_opp->supported_hw_count = 0;
+ kfree(opp_table->supported_hw);
+ opp_table->supported_hw = NULL;
+ opp_table->supported_hw_count = 0;
- /* Try freeing device_opp if this was the last blocking resource */
- _remove_device_opp(dev_opp);
+ /* Try freeing opp_table if this was the last blocking resource */
+ _remove_opp_table(opp_table);
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
/**
* dev_pm_opp_set_prop_name() - Set prop-extn name
- * @dev: Device for which the regulator has to be set.
+ * @dev: Device for which the prop-name has to be set.
* @name: name to postfix to properties.
*
* This is required only for the V2 bindings, and it enables a platform to
@@ -989,7 +1315,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
* which the extension will apply are opp-microvolt and opp-microamp. OPP core
* should postfix the property name with -<name> while looking for them.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -997,42 +1323,42 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
*/
int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
int ret = 0;
- /* Hold our list modification lock here */
- mutex_lock(&dev_opp_list_lock);
+ /* Hold our table modification lock here */
+ mutex_lock(&opp_table_lock);
- dev_opp = _add_device_opp(dev);
- if (!dev_opp) {
+ opp_table = _add_opp_table(dev);
+ if (!opp_table) {
ret = -ENOMEM;
goto unlock;
}
- /* Make sure there are no concurrent readers while updating dev_opp */
- WARN_ON(!list_empty(&dev_opp->opp_list));
+ /* Make sure there are no concurrent readers while updating opp_table */
+ WARN_ON(!list_empty(&opp_table->opp_list));
- /* Do we already have a prop-name associated with dev_opp? */
- if (dev_opp->prop_name) {
+ /* Do we already have a prop-name associated with opp_table? */
+ if (opp_table->prop_name) {
dev_err(dev, "%s: Already have prop-name %s\n", __func__,
- dev_opp->prop_name);
+ opp_table->prop_name);
ret = -EBUSY;
goto err;
}
- dev_opp->prop_name = kstrdup(name, GFP_KERNEL);
- if (!dev_opp->prop_name) {
+ opp_table->prop_name = kstrdup(name, GFP_KERNEL);
+ if (!opp_table->prop_name) {
ret = -ENOMEM;
goto err;
}
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
return 0;
err:
- _remove_device_opp(dev_opp);
+ _remove_opp_table(opp_table);
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
return ret;
}
@@ -1040,13 +1366,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name);
/**
* dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name
- * @dev: Device for which the regulator has to be set.
+ * @dev: Device for which the prop-name has to be put.
*
* This is required only for the V2 bindings, and is called for a matching
- * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure
+ * dev_pm_opp_set_prop_name(). Until this is called, the opp_table structure
* will not be freed.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -1054,45 +1380,154 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name);
*/
void dev_pm_opp_put_prop_name(struct device *dev)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
- /* Hold our list modification lock here */
- mutex_lock(&dev_opp_list_lock);
+ /* Hold our table modification lock here */
+ mutex_lock(&opp_table_lock);
- /* Check for existing list for 'dev' first */
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp)) {
- dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+ /* Check for existing table for 'dev' first */
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ dev_err(dev, "Failed to find opp_table: %ld\n",
+ PTR_ERR(opp_table));
goto unlock;
}
- /* Make sure there are no concurrent readers while updating dev_opp */
- WARN_ON(!list_empty(&dev_opp->opp_list));
+ /* Make sure there are no concurrent readers while updating opp_table */
+ WARN_ON(!list_empty(&opp_table->opp_list));
- if (!dev_opp->prop_name) {
+ if (!opp_table->prop_name) {
dev_err(dev, "%s: Doesn't have a prop-name\n", __func__);
goto unlock;
}
- kfree(dev_opp->prop_name);
- dev_opp->prop_name = NULL;
+ kfree(opp_table->prop_name);
+ opp_table->prop_name = NULL;
- /* Try freeing device_opp if this was the last blocking resource */
- _remove_device_opp(dev_opp);
+ /* Try freeing opp_table if this was the last blocking resource */
+ _remove_opp_table(opp_table);
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
-static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
+/**
+ * dev_pm_opp_set_regulator() - Set regulator name for the device
+ * @dev: Device for which regulator name is being set.
+ * @name: Name of the regulator.
+ *
+ * In order to support OPP switching, OPP layer needs to know the name of the
+ * device's regulator, as the core would be required to switch voltages as well.
+ *
+ * This must be called before any OPPs are initialized for the device.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_regulator(struct device *dev, const char *name)
+{
+ struct opp_table *opp_table;
+ struct regulator *reg;
+ int ret;
+
+ mutex_lock(&opp_table_lock);
+
+ opp_table = _add_opp_table(dev);
+ if (!opp_table) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ /* This should be called before OPPs are initialized */
+ if (WARN_ON(!list_empty(&opp_table->opp_list))) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ /* Already have a regulator set */
+ if (WARN_ON(!IS_ERR(opp_table->regulator))) {
+ ret = -EBUSY;
+ goto err;
+ }
+ /* Allocate the regulator */
+ reg = regulator_get_optional(dev, name);
+ if (IS_ERR(reg)) {
+ ret = PTR_ERR(reg);
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "%s: no regulator (%s) found: %d\n",
+ __func__, name, ret);
+ goto err;
+ }
+
+ opp_table->regulator = reg;
+
+ mutex_unlock(&opp_table_lock);
+ return 0;
+
+err:
+ _remove_opp_table(opp_table);
+unlock:
+ mutex_unlock(&opp_table_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
+
+/**
+ * dev_pm_opp_put_regulator() - Releases resources blocked for regulator
+ * @dev: Device for which regulator was set.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_regulator(struct device *dev)
+{
+ struct opp_table *opp_table;
+
+ mutex_lock(&opp_table_lock);
+
+ /* Check for existing table for 'dev' first */
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ dev_err(dev, "Failed to find opp_table: %ld\n",
+ PTR_ERR(opp_table));
+ goto unlock;
+ }
+
+ if (IS_ERR(opp_table->regulator)) {
+ dev_err(dev, "%s: Doesn't have regulator set\n", __func__);
+ goto unlock;
+ }
+
+ /* Make sure there are no concurrent readers while updating opp_table */
+ WARN_ON(!list_empty(&opp_table->opp_list));
+
+ regulator_put(opp_table->regulator);
+ opp_table->regulator = ERR_PTR(-ENXIO);
+
+ /* Try freeing opp_table if this was the last blocking resource */
+ _remove_opp_table(opp_table);
+
+unlock:
+ mutex_unlock(&opp_table_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
+
+static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
struct device_node *np)
{
- unsigned int count = dev_opp->supported_hw_count;
+ unsigned int count = opp_table->supported_hw_count;
u32 version;
int ret;
- if (!dev_opp->supported_hw)
+ if (!opp_table->supported_hw)
return true;
while (count--) {
@@ -1105,7 +1540,7 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
}
/* Both of these are bitwise masks of the versions */
- if (!(version & dev_opp->supported_hw[count]))
+ if (!(version & opp_table->supported_hw[count]))
return false;
}
@@ -1117,11 +1552,11 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
* @dev: device for which we do this operation
* @np: device node
*
- * This function adds an opp definition to the opp list and returns status. The
+ * This function adds an opp definition to the opp table and returns status. The
* opp can be controlled using dev_pm_opp_enable/disable functions and may be
* removed by dev_pm_opp_remove.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -1137,16 +1572,16 @@ static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
*/
static int _opp_add_static_v2(struct device *dev, struct device_node *np)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
struct dev_pm_opp *new_opp;
u64 rate;
u32 val;
int ret;
- /* Hold our list modification lock here */
- mutex_lock(&dev_opp_list_lock);
+ /* Hold our table modification lock here */
+ mutex_lock(&opp_table_lock);
- new_opp = _allocate_opp(dev, &dev_opp);
+ new_opp = _allocate_opp(dev, &opp_table);
if (!new_opp) {
ret = -ENOMEM;
goto unlock;
@@ -1159,7 +1594,7 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
}
/* Check if the OPP supports hardware's hierarchy of versions or not */
- if (!_opp_is_supported(dev, dev_opp, np)) {
+ if (!_opp_is_supported(dev, opp_table, np)) {
dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
goto free_opp;
}
@@ -1179,30 +1614,30 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
if (!of_property_read_u32(np, "clock-latency-ns", &val))
new_opp->clock_latency_ns = val;
- ret = opp_parse_supplies(new_opp, dev, dev_opp);
+ ret = opp_parse_supplies(new_opp, dev, opp_table);
if (ret)
goto free_opp;
- ret = _opp_add(dev, new_opp, dev_opp);
+ ret = _opp_add(dev, new_opp, opp_table);
if (ret)
goto free_opp;
/* OPP to select on device suspend */
if (of_property_read_bool(np, "opp-suspend")) {
- if (dev_opp->suspend_opp) {
+ if (opp_table->suspend_opp) {
dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
- __func__, dev_opp->suspend_opp->rate,
+ __func__, opp_table->suspend_opp->rate,
new_opp->rate);
} else {
new_opp->suspend = true;
- dev_opp->suspend_opp = new_opp;
+ opp_table->suspend_opp = new_opp;
}
}
- if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max)
- dev_opp->clock_latency_ns_max = new_opp->clock_latency_ns;
+ if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max)
+ opp_table->clock_latency_ns_max = new_opp->clock_latency_ns;
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
__func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
@@ -1213,13 +1648,13 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
* Notify the changes in the availability of the operable
* frequency/voltage list.
*/
- srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
+ srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
return 0;
free_opp:
- _opp_remove(dev_opp, new_opp, false);
+ _opp_remove(opp_table, new_opp, false);
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
return ret;
}
@@ -1229,11 +1664,11 @@ unlock:
* @freq: Frequency in Hz for this OPP
* @u_volt: Voltage in uVolts for this OPP
*
- * This function adds an opp definition to the opp list and returns status.
+ * This function adds an opp definition to the opp table and returns status.
* The opp is made available by default and it can be controlled using
* dev_pm_opp_enable/disable functions.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -1265,7 +1700,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add);
* copy operation, returns 0 if no modification was done OR modification was
* successful.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function internally uses RCU updater strategy with mutex locks to
* keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -1274,7 +1709,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add);
static int _opp_set_availability(struct device *dev, unsigned long freq,
bool availability_req)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
int r = 0;
@@ -1283,18 +1718,18 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
if (!new_opp)
return -ENOMEM;
- mutex_lock(&dev_opp_list_lock);
+ mutex_lock(&opp_table_lock);
- /* Find the device_opp */
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp)) {
- r = PTR_ERR(dev_opp);
+ /* Find the opp_table */
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ r = PTR_ERR(opp_table);
dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
goto unlock;
}
/* Do we have the frequency? */
- list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) {
+ list_for_each_entry(tmp_opp, &opp_table->opp_list, node) {
if (tmp_opp->rate == freq) {
opp = tmp_opp;
break;
@@ -1315,21 +1750,21 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
new_opp->available = availability_req;
list_replace_rcu(&opp->node, &new_opp->node);
- mutex_unlock(&dev_opp_list_lock);
- call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
+ mutex_unlock(&opp_table_lock);
+ call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
/* Notify the change of the OPP availability */
if (availability_req)
- srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ENABLE,
- new_opp);
+ srcu_notifier_call_chain(&opp_table->srcu_head,
+ OPP_EVENT_ENABLE, new_opp);
else
- srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_DISABLE,
- new_opp);
+ srcu_notifier_call_chain(&opp_table->srcu_head,
+ OPP_EVENT_DISABLE, new_opp);
return 0;
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
kfree(new_opp);
return r;
}
@@ -1343,7 +1778,7 @@ unlock:
* corresponding error value. It is meant to be used for users an OPP available
* after being temporarily made unavailable with dev_pm_opp_disable.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function indirectly uses RCU and mutex locks to keep the
* integrity of the internal data structures. Callers should ensure that
* this function is *NOT* called under RCU protection or in contexts where
@@ -1369,7 +1804,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_enable);
* control by users to make this OPP not available until the circumstances are
* right to make it available again (with a call to dev_pm_opp_enable).
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function indirectly uses RCU and mutex locks to keep the
* integrity of the internal data structures. Callers should ensure that
* this function is *NOT* called under RCU protection or in contexts where
@@ -1387,26 +1822,26 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
/**
* dev_pm_opp_get_notifier() - find notifier_head of the device with opp
- * @dev: device pointer used to lookup device OPPs.
+ * @dev: device pointer used to lookup OPP table.
*
* Return: pointer to notifier head if found, otherwise -ENODEV or
* -EINVAL based on type of error casted as pointer. value must be checked
* with IS_ERR to determine valid pointer or error result.
*
- * Locking: This function must be called under rcu_read_lock(). dev_opp is a RCU
- * protected pointer. The reason for the same is that the opp pointer which is
- * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * Locking: This function must be called under rcu_read_lock(). opp_table is a
+ * RCU protected pointer. The reason for the same is that the opp pointer which
+ * is returned will remain valid for use with opp_get_{voltage, freq} only while
* under the locked area. The pointer returned must be used prior to unlocking
* with rcu_read_unlock() to maintain the integrity of the pointer.
*/
struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev)
{
- struct device_opp *dev_opp = _find_device_opp(dev);
+ struct opp_table *opp_table = _find_opp_table(dev);
- if (IS_ERR(dev_opp))
- return ERR_CAST(dev_opp); /* matching type */
+ if (IS_ERR(opp_table))
+ return ERR_CAST(opp_table); /* matching type */
- return &dev_opp->srcu_head;
+ return &opp_table->srcu_head;
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
@@ -1414,11 +1849,11 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
/**
* dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
* entries
- * @dev: device pointer used to lookup device OPPs.
+ * @dev: device pointer used to lookup OPP table.
*
* Free OPPs created using static entries present in DT.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function indirectly uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
@@ -1426,38 +1861,38 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
*/
void dev_pm_opp_of_remove_table(struct device *dev)
{
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
struct dev_pm_opp *opp, *tmp;
- /* Hold our list modification lock here */
- mutex_lock(&dev_opp_list_lock);
+ /* Hold our table modification lock here */
+ mutex_lock(&opp_table_lock);
- /* Check for existing list for 'dev' */
- dev_opp = _find_device_opp(dev);
- if (IS_ERR(dev_opp)) {
- int error = PTR_ERR(dev_opp);
+ /* Check for existing table for 'dev' */
+ opp_table = _find_opp_table(dev);
+ if (IS_ERR(opp_table)) {
+ int error = PTR_ERR(opp_table);
if (error != -ENODEV)
- WARN(1, "%s: dev_opp: %d\n",
+ WARN(1, "%s: opp_table: %d\n",
IS_ERR_OR_NULL(dev) ?
"Invalid device" : dev_name(dev),
error);
goto unlock;
}
- /* Find if dev_opp manages a single device */
- if (list_is_singular(&dev_opp->dev_list)) {
+ /* Find if opp_table manages a single device */
+ if (list_is_singular(&opp_table->dev_list)) {
/* Free static OPPs */
- list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) {
+ list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) {
if (!opp->dynamic)
- _opp_remove(dev_opp, opp, true);
+ _opp_remove(opp_table, opp, true);
}
} else {
- _remove_list_dev(_find_list_dev(dev, dev_opp), dev_opp);
+ _remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table);
}
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
@@ -1478,22 +1913,22 @@ struct device_node *_of_get_opp_desc_node(struct device *dev)
static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
{
struct device_node *np;
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
int ret = 0, count = 0;
- mutex_lock(&dev_opp_list_lock);
+ mutex_lock(&opp_table_lock);
- dev_opp = _managed_opp(opp_np);
- if (dev_opp) {
+ opp_table = _managed_opp(opp_np);
+ if (opp_table) {
/* OPPs are already managed */
- if (!_add_list_dev(dev, dev_opp))
+ if (!_add_opp_dev(dev, opp_table))
ret = -ENOMEM;
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
return ret;
}
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
- /* We have opp-list node now, iterate over it and add OPPs */
+ /* We have opp-table node now, iterate over it and add OPPs */
for_each_available_child_of_node(opp_np, np) {
count++;
@@ -1509,19 +1944,19 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
if (WARN_ON(!count))
return -ENOENT;
- mutex_lock(&dev_opp_list_lock);
+ mutex_lock(&opp_table_lock);
- dev_opp = _find_device_opp(dev);
- if (WARN_ON(IS_ERR(dev_opp))) {
- ret = PTR_ERR(dev_opp);
- mutex_unlock(&dev_opp_list_lock);
+ opp_table = _find_opp_table(dev);
+ if (WARN_ON(IS_ERR(opp_table))) {
+ ret = PTR_ERR(opp_table);
+ mutex_unlock(&opp_table_lock);
goto free_table;
}
- dev_opp->np = opp_np;
- dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+ opp_table->np = opp_np;
+ opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
return 0;
@@ -1550,7 +1985,7 @@ static int _of_add_opp_table_v1(struct device *dev)
*/
nr = prop->length / sizeof(u32);
if (nr % 2) {
- dev_err(dev, "%s: Invalid OPP list\n", __func__);
+ dev_err(dev, "%s: Invalid OPP table\n", __func__);
return -EINVAL;
}
@@ -1570,11 +2005,11 @@ static int _of_add_opp_table_v1(struct device *dev)
/**
* dev_pm_opp_of_add_table() - Initialize opp table from device tree
- * @dev: device pointer used to lookup device OPPs.
+ * @dev: device pointer used to lookup OPP table.
*
* Register the initial OPP table with the OPP library for given device.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function indirectly uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 9f0c155..ba2bdbd 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -31,7 +31,7 @@
* @table: Cpufreq table returned back to caller
*
* Generate a cpufreq table for a provided device- this assumes that the
- * opp list is already initialized and ready for usage.
+ * opp table is already initialized and ready for usage.
*
* This function allocates required memory for the cpufreq table. It is
* expected that the caller does the required maintenance such as freeing
@@ -44,7 +44,7 @@
* WARNING: It is important for the callers to ensure refreshing their copy of
* the table if any of the mentioned functions have been invoked in the interim.
*
- * Locking: The internal device_opp and opp structures are RCU protected.
+ * Locking: The internal opp_table and opp structures are RCU protected.
* Since we just use the regular accessor functions to access the internal data
* structures, we use RCU read lock inside this function. As a result, users of
* this function DONOT need to use explicit locks for invoking.
@@ -122,15 +122,15 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
/* Required only for V1 bindings, as v2 can manage it from DT itself */
int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
{
- struct device_list_opp *list_dev;
- struct device_opp *dev_opp;
+ struct opp_device *opp_dev;
+ struct opp_table *opp_table;
struct device *dev;
int cpu, ret = 0;
- mutex_lock(&dev_opp_list_lock);
+ mutex_lock(&opp_table_lock);
- dev_opp = _find_device_opp(cpu_dev);
- if (IS_ERR(dev_opp)) {
+ opp_table = _find_opp_table(cpu_dev);
+ if (IS_ERR(opp_table)) {
ret = -EINVAL;
goto unlock;
}
@@ -146,15 +146,15 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
continue;
}
- list_dev = _add_list_dev(dev, dev_opp);
- if (!list_dev) {
- dev_err(dev, "%s: failed to add list-dev for cpu%d device\n",
+ opp_dev = _add_opp_dev(dev, opp_table);
+ if (!opp_dev) {
+ dev_err(dev, "%s: failed to add opp-dev for cpu%d device\n",
__func__, cpu);
continue;
}
}
unlock:
- mutex_unlock(&dev_opp_list_lock);
+ mutex_unlock(&opp_table_lock);
return ret;
}
diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c
index ddfe477..ef1ae6b 100644
--- a/drivers/base/power/opp/debugfs.c
+++ b/drivers/base/power/opp/debugfs.c
@@ -34,9 +34,9 @@ void opp_debug_remove_one(struct dev_pm_opp *opp)
debugfs_remove_recursive(opp->dentry);
}
-int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp)
+int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table)
{
- struct dentry *pdentry = dev_opp->dentry;
+ struct dentry *pdentry = opp_table->dentry;
struct dentry *d;
char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */
@@ -83,52 +83,52 @@ int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp)
return 0;
}
-static int device_opp_debug_create_dir(struct device_list_opp *list_dev,
- struct device_opp *dev_opp)
+static int opp_list_debug_create_dir(struct opp_device *opp_dev,
+ struct opp_table *opp_table)
{
- const struct device *dev = list_dev->dev;
+ const struct device *dev = opp_dev->dev;
struct dentry *d;
- opp_set_dev_name(dev, dev_opp->dentry_name);
+ opp_set_dev_name(dev, opp_table->dentry_name);
/* Create device specific directory */
- d = debugfs_create_dir(dev_opp->dentry_name, rootdir);
+ d = debugfs_create_dir(opp_table->dentry_name, rootdir);
if (!d) {
dev_err(dev, "%s: Failed to create debugfs dir\n", __func__);
return -ENOMEM;
}
- list_dev->dentry = d;
- dev_opp->dentry = d;
+ opp_dev->dentry = d;
+ opp_table->dentry = d;
return 0;
}
-static int device_opp_debug_create_link(struct device_list_opp *list_dev,
- struct device_opp *dev_opp)
+static int opp_list_debug_create_link(struct opp_device *opp_dev,
+ struct opp_table *opp_table)
{
- const struct device *dev = list_dev->dev;
+ const struct device *dev = opp_dev->dev;
char name[NAME_MAX];
struct dentry *d;
- opp_set_dev_name(list_dev->dev, name);
+ opp_set_dev_name(opp_dev->dev, name);
/* Create device specific directory link */
- d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name);
+ d = debugfs_create_symlink(name, rootdir, opp_table->dentry_name);
if (!d) {
dev_err(dev, "%s: Failed to create link\n", __func__);
return -ENOMEM;
}
- list_dev->dentry = d;
+ opp_dev->dentry = d;
return 0;
}
/**
* opp_debug_register - add a device opp node to the debugfs 'opp' directory
- * @list_dev: list-dev pointer for device
- * @dev_opp: the device-opp being added
+ * @opp_dev: opp-dev pointer for device
+ * @opp_table: the device-opp being added
*
* Dynamically adds device specific directory in debugfs 'opp' directory. If the
* device-opp is shared with other devices, then links will be created for all
@@ -136,73 +136,72 @@ static int device_opp_debug_create_link(struct device_list_opp *list_dev,
*
* Return: 0 on success, otherwise negative error.
*/
-int opp_debug_register(struct device_list_opp *list_dev,
- struct device_opp *dev_opp)
+int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table)
{
if (!rootdir) {
pr_debug("%s: Uninitialized rootdir\n", __func__);
return -EINVAL;
}
- if (dev_opp->dentry)
- return device_opp_debug_create_link(list_dev, dev_opp);
+ if (opp_table->dentry)
+ return opp_list_debug_create_link(opp_dev, opp_table);
- return device_opp_debug_create_dir(list_dev, dev_opp);
+ return opp_list_debug_create_dir(opp_dev, opp_table);
}
-static void opp_migrate_dentry(struct device_list_opp *list_dev,
- struct device_opp *dev_opp)
+static void opp_migrate_dentry(struct opp_device *opp_dev,
+ struct opp_table *opp_table)
{
- struct device_list_opp *new_dev;
+ struct opp_device *new_dev;
const struct device *dev;
struct dentry *dentry;
- /* Look for next list-dev */
- list_for_each_entry(new_dev, &dev_opp->dev_list, node)
- if (new_dev != list_dev)
+ /* Look for next opp-dev */
+ list_for_each_entry(new_dev, &opp_table->dev_list, node)
+ if (new_dev != opp_dev)
break;
/* new_dev is guaranteed to be valid here */
dev = new_dev->dev;
debugfs_remove_recursive(new_dev->dentry);
- opp_set_dev_name(dev, dev_opp->dentry_name);
+ opp_set_dev_name(dev, opp_table->dentry_name);
- dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir,
- dev_opp->dentry_name);
+ dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir,
+ opp_table->dentry_name);
if (!dentry) {
dev_err(dev, "%s: Failed to rename link from: %s to %s\n",
- __func__, dev_name(list_dev->dev), dev_name(dev));
+ __func__, dev_name(opp_dev->dev), dev_name(dev));
return;
}
new_dev->dentry = dentry;
- dev_opp->dentry = dentry;
+ opp_table->dentry = dentry;
}
/**
* opp_debug_unregister - remove a device opp node from debugfs opp directory
- * @list_dev: list-dev pointer for device
- * @dev_opp: the device-opp being removed
+ * @opp_dev: opp-dev pointer for device
+ * @opp_table: the device-opp being removed
*
* Dynamically removes device specific directory from debugfs 'opp' directory.
*/
-void opp_debug_unregister(struct device_list_opp *list_dev,
- struct device_opp *dev_opp)
+void opp_debug_unregister(struct opp_device *opp_dev,
+ struct opp_table *opp_table)
{
- if (list_dev->dentry == dev_opp->dentry) {
+ if (opp_dev->dentry == opp_table->dentry) {
/* Move the real dentry object under another device */
- if (!list_is_singular(&dev_opp->dev_list)) {
- opp_migrate_dentry(list_dev, dev_opp);
+ if (!list_is_singular(&opp_table->dev_list)) {
+ opp_migrate_dentry(opp_dev, opp_table);
goto out;
}
- dev_opp->dentry = NULL;
+ opp_table->dentry = NULL;
}
- debugfs_remove_recursive(list_dev->dentry);
+ debugfs_remove_recursive(opp_dev->dentry);
out:
- list_dev->dentry = NULL;
+ opp_dev->dentry = NULL;
}
static int __init opp_debug_init(void)
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 690638e..f67f806 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -22,13 +22,16 @@
#include <linux/rculist.h>
#include <linux/rcupdate.h>
+struct clk;
+struct regulator;
+
/* Lock to allow exclusive modification to the device and opp lists */
-extern struct mutex dev_opp_list_lock;
+extern struct mutex opp_table_lock;
/*
* Internal data structure organization with the OPP layer library is as
* follows:
- * dev_opp_list (root)
+ * opp_tables (root)
* |- device 1 (represents voltage domain 1)
* | |- opp 1 (availability, freq, voltage)
* | |- opp 2 ..
@@ -37,18 +40,18 @@ extern struct mutex dev_opp_list_lock;
* |- device 2 (represents the next voltage domain)
* ...
* `- device m (represents mth voltage domain)
- * device 1, 2.. are represented by dev_opp structure while each opp
+ * device 1, 2.. are represented by opp_table structure while each opp
* is represented by the opp structure.
*/
/**
* struct dev_pm_opp - Generic OPP description structure
- * @node: opp list node. The nodes are maintained throughout the lifetime
+ * @node: opp table node. The nodes are maintained throughout the lifetime
* of boot. It is expected only an optimal set of OPPs are
* added to the library by the SoC framework.
- * RCU usage: opp list is traversed with RCU locks. node
+ * RCU usage: opp table is traversed with RCU locks. node
* modification is possible realtime, hence the modifications
- * are protected by the dev_opp_list_lock for integrity.
+ * are protected by the opp_table_lock for integrity.
* IMPORTANT: the opp nodes should be maintained in increasing
* order.
* @available: true/false - marks if this OPP as available or not
@@ -62,7 +65,7 @@ extern struct mutex dev_opp_list_lock;
* @u_amp: Maximum current drawn by the device in microamperes
* @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's
* frequency from any other OPP's frequency.
- * @dev_opp: points back to the device_opp struct this opp belongs to
+ * @opp_table: points back to the opp_table struct this opp belongs to
* @rcu_head: RCU callback head used for deferred freeing
* @np: OPP's device node.
* @dentry: debugfs dentry pointer (per opp)
@@ -84,7 +87,7 @@ struct dev_pm_opp {
unsigned long u_amp;
unsigned long clock_latency_ns;
- struct device_opp *dev_opp;
+ struct opp_table *opp_table;
struct rcu_head rcu_head;
struct device_node *np;
@@ -95,16 +98,16 @@ struct dev_pm_opp {
};
/**
- * struct device_list_opp - devices managed by 'struct device_opp'
+ * struct opp_device - devices managed by 'struct opp_table'
* @node: list node
* @dev: device to which the struct object belongs
* @rcu_head: RCU callback head used for deferred freeing
* @dentry: debugfs dentry pointer (per device)
*
- * This is an internal data structure maintaining the list of devices that are
- * managed by 'struct device_opp'.
+ * This is an internal data structure maintaining the devices that are managed
+ * by 'struct opp_table'.
*/
-struct device_list_opp {
+struct opp_device {
struct list_head node;
const struct device *dev;
struct rcu_head rcu_head;
@@ -115,16 +118,16 @@ struct device_list_opp {
};
/**
- * struct device_opp - Device opp structure
- * @node: list node - contains the devices with OPPs that
+ * struct opp_table - Device opp structure
+ * @node: table node - contains the devices with OPPs that
* have been registered. Nodes once added are not modified in this
- * list.
- * RCU usage: nodes are not modified in the list of device_opp,
- * however addition is possible and is secured by dev_opp_list_lock
+ * table.
+ * RCU usage: nodes are not modified in the table of opp_table,
+ * however addition is possible and is secured by opp_table_lock
* @srcu_head: notifier head to notify the OPP availability changes.
* @rcu_head: RCU callback head used for deferred freeing
* @dev_list: list of devices that share these OPPs
- * @opp_list: list of opps
+ * @opp_list: table of opps
* @np: struct device_node pointer for opp's DT node.
* @clock_latency_ns_max: Max clock latency in nanoseconds.
* @shared_opp: OPP is shared between multiple devices.
@@ -132,9 +135,13 @@ struct device_list_opp {
* @supported_hw: Array of version number to support.
* @supported_hw_count: Number of elements in supported_hw array.
* @prop_name: A name to postfix to many DT properties, while parsing them.
+ * @clk: Device's clock handle
+ * @regulator: Supply regulator
* @dentry: debugfs dentry pointer of the real device directory (not links).
* @dentry_name: Name of the real dentry.
*
+ * @voltage_tolerance_v1: In percentage, for v1 bindings only.
+ *
* This is an internal data structure maintaining the link to opps attached to
* a device. This structure is not meant to be shared to users as it is
* meant for book keeping and private to OPP library.
@@ -143,7 +150,7 @@ struct device_list_opp {
* need to wait for the grace period of both of them before freeing any
* resources. And so we have used kfree_rcu() from within call_srcu() handlers.
*/
-struct device_opp {
+struct opp_table {
struct list_head node;
struct srcu_notifier_head srcu_head;
@@ -153,12 +160,18 @@ struct device_opp {
struct device_node *np;
unsigned long clock_latency_ns_max;
+
+ /* For backward compatibility with v1 bindings */
+ unsigned int voltage_tolerance_v1;
+
bool shared_opp;
struct dev_pm_opp *suspend_opp;
unsigned int *supported_hw;
unsigned int supported_hw_count;
const char *prop_name;
+ struct clk *clk;
+ struct regulator *regulator;
#ifdef CONFIG_DEBUG_FS
struct dentry *dentry;
@@ -167,30 +180,27 @@ struct device_opp {
};
/* Routines internal to opp core */
-struct device_opp *_find_device_opp(struct device *dev);
-struct device_list_opp *_add_list_dev(const struct device *dev,
- struct device_opp *dev_opp);
+struct opp_table *_find_opp_table(struct device *dev);
+struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
struct device_node *_of_get_opp_desc_node(struct device *dev);
#ifdef CONFIG_DEBUG_FS
void opp_debug_remove_one(struct dev_pm_opp *opp);
-int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp);
-int opp_debug_register(struct device_list_opp *list_dev,
- struct device_opp *dev_opp);
-void opp_debug_unregister(struct device_list_opp *list_dev,
- struct device_opp *dev_opp);
+int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table);
+int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table);
+void opp_debug_unregister(struct opp_device *opp_dev, struct opp_table *opp_table);
#else
static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {}
static inline int opp_debug_create_one(struct dev_pm_opp *opp,
- struct device_opp *dev_opp)
+ struct opp_table *opp_table)
{ return 0; }
-static inline int opp_debug_register(struct device_list_opp *list_dev,
- struct device_opp *dev_opp)
+static inline int opp_debug_register(struct opp_device *opp_dev,
+ struct opp_table *opp_table)
{ return 0; }
-static inline void opp_debug_unregister(struct device_list_opp *list_dev,
- struct device_opp *dev_opp)
+static inline void opp_debug_unregister(struct opp_device *opp_dev,
+ struct opp_table *opp_table)
{ }
#endif /* DEBUG_FS */
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index a311cfa..a697579 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -166,14 +166,14 @@ void generate_pm_trace(const void *tracedata, unsigned int user)
}
EXPORT_SYMBOL(generate_pm_trace);
-extern char __tracedata_start, __tracedata_end;
+extern char __tracedata_start[], __tracedata_end[];
static int show_file_hash(unsigned int value)
{
int match;
char *tracedata;
match = 0;
- for (tracedata = &__tracedata_start ; tracedata < &__tracedata_end ;
+ for (tracedata = __tracedata_start ; tracedata < __tracedata_end ;
tracedata += 2 + sizeof(unsigned long)) {
unsigned short lineno = *(unsigned short *)tracedata;
const char *file = *(const char **)(tracedata + 2);
diff --git a/drivers/base/property.c b/drivers/base/property.c
index a163f2c5..76628a7 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -218,7 +218,8 @@ bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
bool ret;
ret = __fwnode_property_present(fwnode, propname);
- if (ret == false && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
+ if (ret == false && !IS_ERR_OR_NULL(fwnode) &&
+ !IS_ERR_OR_NULL(fwnode->secondary))
ret = __fwnode_property_present(fwnode->secondary, propname);
return ret;
}
@@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(device_property_match_string);
int _ret_; \
_ret_ = FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_, \
_val_, _nval_); \
- if (_ret_ == -EINVAL && _fwnode_ && !IS_ERR_OR_NULL(_fwnode_->secondary)) \
+ if (_ret_ == -EINVAL && !IS_ERR_OR_NULL(_fwnode_) && \
+ !IS_ERR_OR_NULL(_fwnode_->secondary)) \
_ret_ = FWNODE_PROP_READ(_fwnode_->secondary, _propname_, _type_, \
_proptype_, _val_, _nval_); \
_ret_; \
@@ -593,7 +595,8 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
int ret;
ret = __fwnode_property_read_string_array(fwnode, propname, val, nval);
- if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
+ if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
+ !IS_ERR_OR_NULL(fwnode->secondary))
ret = __fwnode_property_read_string_array(fwnode->secondary,
propname, val, nval);
return ret;
@@ -621,7 +624,8 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode,
int ret;
ret = __fwnode_property_read_string(fwnode, propname, val);
- if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
+ if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
+ !IS_ERR_OR_NULL(fwnode->secondary))
ret = __fwnode_property_read_string(fwnode->secondary,
propname, val);
return ret;
@@ -820,11 +824,16 @@ void device_remove_property_set(struct device *dev)
* the pset. If there is no real firmware node (ACPI/DT) primary
* will hold the pset.
*/
- if (!is_pset_node(fwnode))
- fwnode = fwnode->secondary;
- if (!IS_ERR(fwnode) && is_pset_node(fwnode))
+ if (is_pset_node(fwnode)) {
+ set_primary_fwnode(dev, NULL);
pset_free_set(to_pset_node(fwnode));
- set_secondary_fwnode(dev, NULL);
+ } else {
+ fwnode = fwnode->secondary;
+ if (!IS_ERR(fwnode) && is_pset_node(fwnode)) {
+ set_secondary_fwnode(dev, NULL);
+ pset_free_set(to_pset_node(fwnode));
+ }
+ }
}
EXPORT_SYMBOL_GPL(device_remove_property_set);
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index f935110..a7f4585 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -19,6 +19,7 @@ config CPU_FREQ
if CPU_FREQ
config CPU_FREQ_GOV_COMMON
+ select IRQ_WORK
bool
config CPU_FREQ_BOOST_SW
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 51eef87..59a7b38 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -70,6 +70,8 @@ struct acpi_cpufreq_data {
unsigned int cpu_feature;
unsigned int acpi_perf_cpu;
cpumask_var_t freqdomain_cpus;
+ void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
+ u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
};
/* acpi_perf_data is a pointer to percpu data. */
@@ -243,125 +245,119 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
}
}
-struct msr_addr {
- u32 reg;
-};
+u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
+{
+ u32 val, dummy;
-struct io_addr {
- u16 port;
- u8 bit_width;
-};
+ rdmsr(MSR_IA32_PERF_CTL, val, dummy);
+ return val;
+}
+
+void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
+{
+ u32 lo, hi;
+
+ rdmsr(MSR_IA32_PERF_CTL, lo, hi);
+ lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
+ wrmsr(MSR_IA32_PERF_CTL, lo, hi);
+}
+
+u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
+{
+ u32 val, dummy;
+
+ rdmsr(MSR_AMD_PERF_CTL, val, dummy);
+ return val;
+}
+
+void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
+{
+ wrmsr(MSR_AMD_PERF_CTL, val, 0);
+}
+
+u32 cpu_freq_read_io(struct acpi_pct_register *reg)
+{
+ u32 val;
+
+ acpi_os_read_port(reg->address, &val, reg->bit_width);
+ return val;
+}
+
+void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
+{
+ acpi_os_write_port(reg->address, val, reg->bit_width);
+}
struct drv_cmd {
- unsigned int type;
- const struct cpumask *mask;
- union {
- struct msr_addr msr;
- struct io_addr io;
- } addr;
+ struct acpi_pct_register *reg;
u32 val;
+ union {
+ void (*write)(struct acpi_pct_register *reg, u32 val);
+ u32 (*read)(struct acpi_pct_register *reg);
+ } func;
};
/* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd)
{
struct drv_cmd *cmd = _cmd;
- u32 h;
- switch (cmd->type) {
- case SYSTEM_INTEL_MSR_CAPABLE:
- case SYSTEM_AMD_MSR_CAPABLE:
- rdmsr(cmd->addr.msr.reg, cmd->val, h);
- break;
- case SYSTEM_IO_CAPABLE:
- acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
- &cmd->val,
- (u32)cmd->addr.io.bit_width);
- break;
- default:
- break;
- }
+ cmd->val = cmd->func.read(cmd->reg);
}
-/* Called via smp_call_function_many(), on the target CPUs */
-static void do_drv_write(void *_cmd)
+static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask)
{
- struct drv_cmd *cmd = _cmd;
- u32 lo, hi;
+ struct acpi_processor_performance *perf = to_perf_data(data);
+ struct drv_cmd cmd = {
+ .reg = &perf->control_register,
+ .func.read = data->cpu_freq_read,
+ };
+ int err;
- switch (cmd->type) {
- case SYSTEM_INTEL_MSR_CAPABLE:
- rdmsr(cmd->addr.msr.reg, lo, hi);
- lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
- wrmsr(cmd->addr.msr.reg, lo, hi);
- break;
- case SYSTEM_AMD_MSR_CAPABLE:
- wrmsr(cmd->addr.msr.reg, cmd->val, 0);
- break;
- case SYSTEM_IO_CAPABLE:
- acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
- cmd->val,
- (u32)cmd->addr.io.bit_width);
- break;
- default:
- break;
- }
+ err = smp_call_function_any(mask, do_drv_read, &cmd, 1);
+ WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */
+ return cmd.val;
}
-static void drv_read(struct drv_cmd *cmd)
+/* Called via smp_call_function_many(), on the target CPUs */
+static void do_drv_write(void *_cmd)
{
- int err;
- cmd->val = 0;
+ struct drv_cmd *cmd = _cmd;
- err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
- WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */
+ cmd->func.write(cmd->reg, cmd->val);
}
-static void drv_write(struct drv_cmd *cmd)
+static void drv_write(struct acpi_cpufreq_data *data,
+ const struct cpumask *mask, u32 val)
{
+ struct acpi_processor_performance *perf = to_perf_data(data);
+ struct drv_cmd cmd = {
+ .reg = &perf->control_register,
+ .val = val,
+ .func.write = data->cpu_freq_write,
+ };
int this_cpu;
this_cpu = get_cpu();
- if (cpumask_test_cpu(this_cpu, cmd->mask))
- do_drv_write(cmd);
- smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
+ if (cpumask_test_cpu(this_cpu, mask))
+ do_drv_write(&cmd);
+
+ smp_call_function_many(mask, do_drv_write, &cmd, 1);
put_cpu();
}
-static u32
-get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
+static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
{
- struct acpi_processor_performance *perf;
- struct drv_cmd cmd;
+ u32 val;
if (unlikely(cpumask_empty(mask)))
return 0;
- switch (data->cpu_feature) {
- case SYSTEM_INTEL_MSR_CAPABLE:
- cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
- cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
- break;
- case SYSTEM_AMD_MSR_CAPABLE:
- cmd.type = SYSTEM_AMD_MSR_CAPABLE;
- cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
- break;
- case SYSTEM_IO_CAPABLE:
- cmd.type = SYSTEM_IO_CAPABLE;
- perf = to_perf_data(data);
- cmd.addr.io.port = perf->control_register.address;
- cmd.addr.io.bit_width = perf->control_register.bit_width;
- break;
- default:
- return 0;
- }
-
- cmd.mask = mask;
- drv_read(&cmd);
+ val = drv_read(data, mask);
- pr_debug("get_cur_val = %u\n", cmd.val);
+ pr_debug("get_cur_val = %u\n", val);
- return cmd.val;
+ return val;
}
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
@@ -416,7 +412,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
{
struct acpi_cpufreq_data *data = policy->driver_data;
struct acpi_processor_performance *perf;
- struct drv_cmd cmd;
+ const struct cpumask *mask;
unsigned int next_perf_state = 0; /* Index into perf table */
int result = 0;
@@ -434,42 +430,21 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
} else {
pr_debug("Already at target state (P%d)\n",
next_perf_state);
- goto out;
+ return 0;
}
}
- switch (data->cpu_feature) {
- case SYSTEM_INTEL_MSR_CAPABLE:
- cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
- cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
- cmd.val = (u32) perf->states[next_perf_state].control;
- break;
- case SYSTEM_AMD_MSR_CAPABLE:
- cmd.type = SYSTEM_AMD_MSR_CAPABLE;
- cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
- cmd.val = (u32) perf->states[next_perf_state].control;
- break;
- case SYSTEM_IO_CAPABLE:
- cmd.type = SYSTEM_IO_CAPABLE;
- cmd.addr.io.port = perf->control_register.address;
- cmd.addr.io.bit_width = perf->control_register.bit_width;
- cmd.val = (u32) perf->states[next_perf_state].control;
- break;
- default:
- result = -ENODEV;
- goto out;
- }
-
- /* cpufreq holds the hotplug lock, so we are safe from here on */
- if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
- cmd.mask = policy->cpus;
- else
- cmd.mask = cpumask_of(policy->cpu);
+ /*
+ * The core won't allow CPUs to go away until the governor has been
+ * stopped, so we can rely on the stability of policy->cpus.
+ */
+ mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ?
+ cpumask_of(policy->cpu) : policy->cpus;
- drv_write(&cmd);
+ drv_write(data, mask, perf->states[next_perf_state].control);
if (acpi_pstate_strict) {
- if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
+ if (!check_freqs(mask, data->freq_table[index].frequency,
data)) {
pr_debug("acpi_cpufreq_target failed (%d)\n",
policy->cpu);
@@ -480,7 +455,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
if (!result)
perf->state = next_perf_state;
-out:
return result;
}
@@ -740,15 +714,21 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
}
pr_debug("SYSTEM IO addr space\n");
data->cpu_feature = SYSTEM_IO_CAPABLE;
+ data->cpu_freq_read = cpu_freq_read_io;
+ data->cpu_freq_write = cpu_freq_write_io;
break;
case ACPI_ADR_SPACE_FIXED_HARDWARE:
pr_debug("HARDWARE addr space\n");
if (check_est_cpu(cpu)) {
data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
+ data->cpu_freq_read = cpu_freq_read_intel;
+ data->cpu_freq_write = cpu_freq_write_intel;
break;
}
if (check_amd_hwpstate_cpu(cpu)) {
data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
+ data->cpu_freq_read = cpu_freq_read_amd;
+ data->cpu_freq_write = cpu_freq_write_amd;
break;
}
result = -ENODEV;
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index f6b79ab..404360c 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -21,7 +21,7 @@
#include <asm/msr.h>
#include <asm/cpufeature.h>
-#include "cpufreq_governor.h"
+#include "cpufreq_ondemand.h"
#define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080
#define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081
@@ -45,10 +45,10 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
long d_actual, d_reference;
struct msr actual, reference;
struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu);
- struct dbs_data *od_data = policy->governor_data;
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct dbs_data *od_data = policy_dbs->dbs_data;
struct od_dbs_tuners *od_tuners = od_data->tuners;
- struct od_cpu_dbs_info_s *od_info =
- od_data->cdata->get_cpu_dbs_info_s(policy->cpu);
+ struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs);
if (!od_info->freq_table)
return freq_next;
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 0ca74d0..f951f91 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -31,9 +31,8 @@
struct private_data {
struct device *cpu_dev;
- struct regulator *cpu_reg;
struct thermal_cooling_device *cdev;
- unsigned int voltage_tolerance; /* in percentage */
+ const char *reg_name;
};
static struct freq_attr *cpufreq_dt_attr[] = {
@@ -44,175 +43,128 @@ static struct freq_attr *cpufreq_dt_attr[] = {
static int set_target(struct cpufreq_policy *policy, unsigned int index)
{
- struct dev_pm_opp *opp;
- struct cpufreq_frequency_table *freq_table = policy->freq_table;
- struct clk *cpu_clk = policy->clk;
struct private_data *priv = policy->driver_data;
- struct device *cpu_dev = priv->cpu_dev;
- struct regulator *cpu_reg = priv->cpu_reg;
- unsigned long volt = 0, tol = 0;
- int volt_old = 0;
- unsigned int old_freq, new_freq;
- long freq_Hz, freq_exact;
- int ret;
-
- freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
- if (freq_Hz <= 0)
- freq_Hz = freq_table[index].frequency * 1000;
- freq_exact = freq_Hz;
- new_freq = freq_Hz / 1000;
- old_freq = clk_get_rate(cpu_clk) / 1000;
+ return dev_pm_opp_set_rate(priv->cpu_dev,
+ policy->freq_table[index].frequency * 1000);
+}
- if (!IS_ERR(cpu_reg)) {
- unsigned long opp_freq;
+/*
+ * An earlier version of opp-v1 bindings used to name the regulator
+ * "cpu0-supply", we still need to handle that for backwards compatibility.
+ */
+static const char *find_supply_name(struct device *dev)
+{
+ struct device_node *np;
+ struct property *pp;
+ int cpu = dev->id;
+ const char *name = NULL;
- rcu_read_lock();
- opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz);
- if (IS_ERR(opp)) {
- rcu_read_unlock();
- dev_err(cpu_dev, "failed to find OPP for %ld\n",
- freq_Hz);
- return PTR_ERR(opp);
- }
- volt = dev_pm_opp_get_voltage(opp);
- opp_freq = dev_pm_opp_get_freq(opp);
- rcu_read_unlock();
- tol = volt * priv->voltage_tolerance / 100;
- volt_old = regulator_get_voltage(cpu_reg);
- dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n",
- opp_freq / 1000, volt);
- }
+ np = of_node_get(dev->of_node);
- dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n",
- old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1,
- new_freq / 1000, volt ? volt / 1000 : -1);
+ /* This must be valid for sure */
+ if (WARN_ON(!np))
+ return NULL;
- /* scaling up? scale voltage before frequency */
- if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
- ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
- if (ret) {
- dev_err(cpu_dev, "failed to scale voltage up: %d\n",
- ret);
- return ret;
+ /* Try "cpu0" for older DTs */
+ if (!cpu) {
+ pp = of_find_property(np, "cpu0-supply", NULL);
+ if (pp) {
+ name = "cpu0";
+ goto node_put;
}
}
- ret = clk_set_rate(cpu_clk, freq_exact);
- if (ret) {
- dev_err(cpu_dev, "failed to set clock rate: %d\n", ret);
- if (!IS_ERR(cpu_reg) && volt_old > 0)
- regulator_set_voltage_tol(cpu_reg, volt_old, tol);
- return ret;
+ pp = of_find_property(np, "cpu-supply", NULL);
+ if (pp) {
+ name = "cpu";
+ goto node_put;
}
- /* scaling down? scale voltage after frequency */
- if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
- ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
- if (ret) {
- dev_err(cpu_dev, "failed to scale voltage down: %d\n",
- ret);
- clk_set_rate(cpu_clk, old_freq * 1000);
- }
- }
-
- return ret;
+ dev_dbg(dev, "no regulator for cpu%d\n", cpu);
+node_put:
+ of_node_put(np);
+ return name;
}
-static int allocate_resources(int cpu, struct device **cdev,
- struct regulator **creg, struct clk **cclk)
+static int resources_available(void)
{
struct device *cpu_dev;
struct regulator *cpu_reg;
struct clk *cpu_clk;
int ret = 0;
- char *reg_cpu0 = "cpu0", *reg_cpu = "cpu", *reg;
+ const char *name;
- cpu_dev = get_cpu_device(cpu);
+ cpu_dev = get_cpu_device(0);
if (!cpu_dev) {
- pr_err("failed to get cpu%d device\n", cpu);
+ pr_err("failed to get cpu0 device\n");
return -ENODEV;
}
- /* Try "cpu0" for older DTs */
- if (!cpu)
- reg = reg_cpu0;
- else
- reg = reg_cpu;
-
-try_again:
- cpu_reg = regulator_get_optional(cpu_dev, reg);
- ret = PTR_ERR_OR_ZERO(cpu_reg);
+ cpu_clk = clk_get(cpu_dev, NULL);
+ ret = PTR_ERR_OR_ZERO(cpu_clk);
if (ret) {
/*
- * If cpu's regulator supply node is present, but regulator is
- * not yet registered, we should try defering probe.
+ * If cpu's clk node is present, but clock is not yet
+ * registered, we should try defering probe.
*/
- if (ret == -EPROBE_DEFER) {
- dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
- cpu);
- return ret;
- }
-
- /* Try with "cpu-supply" */
- if (reg == reg_cpu0) {
- reg = reg_cpu;
- goto try_again;
- }
+ if (ret == -EPROBE_DEFER)
+ dev_dbg(cpu_dev, "clock not ready, retry\n");
+ else
+ dev_err(cpu_dev, "failed to get clock: %d\n", ret);
- dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
+ return ret;
}
- cpu_clk = clk_get(cpu_dev, NULL);
- ret = PTR_ERR_OR_ZERO(cpu_clk);
- if (ret) {
- /* put regulator */
- if (!IS_ERR(cpu_reg))
- regulator_put(cpu_reg);
+ clk_put(cpu_clk);
+ name = find_supply_name(cpu_dev);
+ /* Platform doesn't require regulator */
+ if (!name)
+ return 0;
+
+ cpu_reg = regulator_get_optional(cpu_dev, name);
+ ret = PTR_ERR_OR_ZERO(cpu_reg);
+ if (ret) {
/*
- * If cpu's clk node is present, but clock is not yet
- * registered, we should try defering probe.
+ * If cpu's regulator supply node is present, but regulator is
+ * not yet registered, we should try defering probe.
*/
if (ret == -EPROBE_DEFER)
- dev_dbg(cpu_dev, "cpu%d clock not ready, retry\n", cpu);
+ dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n");
else
- dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", cpu,
- ret);
- } else {
- *cdev = cpu_dev;
- *creg = cpu_reg;
- *cclk = cpu_clk;
+ dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret);
+
+ return ret;
}
- return ret;
+ regulator_put(cpu_reg);
+ return 0;
}
static int cpufreq_init(struct cpufreq_policy *policy)
{
struct cpufreq_frequency_table *freq_table;
- struct device_node *np;
struct private_data *priv;
struct device *cpu_dev;
- struct regulator *cpu_reg;
struct clk *cpu_clk;
struct dev_pm_opp *suspend_opp;
- unsigned long min_uV = ~0, max_uV = 0;
unsigned int transition_latency;
- bool need_update = false;
+ bool opp_v1 = false;
+ const char *name;
int ret;
- ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk);
- if (ret) {
- pr_err("%s: Failed to allocate resources: %d\n", __func__, ret);
- return ret;
+ cpu_dev = get_cpu_device(policy->cpu);
+ if (!cpu_dev) {
+ pr_err("failed to get cpu%d device\n", policy->cpu);
+ return -ENODEV;
}
- np = of_node_get(cpu_dev->of_node);
- if (!np) {
- dev_err(cpu_dev, "failed to find cpu%d node\n", policy->cpu);
- ret = -ENOENT;
- goto out_put_reg_clk;
+ cpu_clk = clk_get(cpu_dev, NULL);
+ if (IS_ERR(cpu_clk)) {
+ ret = PTR_ERR(cpu_clk);
+ dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret);
+ return ret;
}
/* Get OPP-sharing information from "operating-points-v2" bindings */
@@ -223,9 +175,23 @@ static int cpufreq_init(struct cpufreq_policy *policy)
* finding shared-OPPs for backward compatibility.
*/
if (ret == -ENOENT)
- need_update = true;
+ opp_v1 = true;
else
- goto out_node_put;
+ goto out_put_clk;
+ }
+
+ /*
+ * OPP layer will be taking care of regulators now, but it needs to know
+ * the name of the regulator first.
+ */
+ name = find_supply_name(cpu_dev);
+ if (name) {
+ ret = dev_pm_opp_set_regulator(cpu_dev, name);
+ if (ret) {
+ dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n",
+ policy->cpu, ret);
+ goto out_put_clk;
+ }
}
/*
@@ -246,12 +212,12 @@ static int cpufreq_init(struct cpufreq_policy *policy)
*/
ret = dev_pm_opp_get_opp_count(cpu_dev);
if (ret <= 0) {
- pr_debug("OPP table is not ready, deferring probe\n");
+ dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n");
ret = -EPROBE_DEFER;
goto out_free_opp;
}
- if (need_update) {
+ if (opp_v1) {
struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data();
if (!pd || !pd->independent_clocks)
@@ -265,10 +231,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
if (ret)
dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
__func__, ret);
-
- of_property_read_u32(np, "clock-latency", &transition_latency);
- } else {
- transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev);
}
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -277,62 +239,16 @@ static int cpufreq_init(struct cpufreq_policy *policy)
goto out_free_opp;
}
- of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance);
-
- if (!transition_latency)
- transition_latency = CPUFREQ_ETERNAL;
-
- if (!IS_ERR(cpu_reg)) {
- unsigned long opp_freq = 0;
-
- /*
- * Disable any OPPs where the connected regulator isn't able to
- * provide the specified voltage and record minimum and maximum
- * voltage levels.
- */
- while (1) {
- struct dev_pm_opp *opp;
- unsigned long opp_uV, tol_uV;
-
- rcu_read_lock();
- opp = dev_pm_opp_find_freq_ceil(cpu_dev, &opp_freq);
- if (IS_ERR(opp)) {
- rcu_read_unlock();
- break;
- }
- opp_uV = dev_pm_opp_get_voltage(opp);
- rcu_read_unlock();
-
- tol_uV = opp_uV * priv->voltage_tolerance / 100;
- if (regulator_is_supported_voltage(cpu_reg,
- opp_uV - tol_uV,
- opp_uV + tol_uV)) {
- if (opp_uV < min_uV)
- min_uV = opp_uV;
- if (opp_uV > max_uV)
- max_uV = opp_uV;
- } else {
- dev_pm_opp_disable(cpu_dev, opp_freq);
- }
-
- opp_freq++;
- }
-
- ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV);
- if (ret > 0)
- transition_latency += ret * 1000;
- }
+ priv->reg_name = name;
ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
if (ret) {
- pr_err("failed to init cpufreq table: %d\n", ret);
+ dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
goto out_free_priv;
}
priv->cpu_dev = cpu_dev;
- priv->cpu_reg = cpu_reg;
policy->driver_data = priv;
-
policy->clk = cpu_clk;
rcu_read_lock();
@@ -357,9 +273,11 @@ static int cpufreq_init(struct cpufreq_policy *policy)
cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs;
}
- policy->cpuinfo.transition_latency = transition_latency;
+ transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
+ if (!transition_latency)
+ transition_latency = CPUFREQ_ETERNAL;
- of_node_put(np);
+ policy->cpuinfo.transition_latency = transition_latency;
return 0;
@@ -369,12 +287,10 @@ out_free_priv:
kfree(priv);
out_free_opp:
dev_pm_opp_of_cpumask_remove_table(policy->cpus);
-out_node_put:
- of_node_put(np);
-out_put_reg_clk:
+ if (name)
+ dev_pm_opp_put_regulator(cpu_dev);
+out_put_clk:
clk_put(cpu_clk);
- if (!IS_ERR(cpu_reg))
- regulator_put(cpu_reg);
return ret;
}
@@ -386,9 +302,10 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
cpufreq_cooling_unregister(priv->cdev);
dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+ if (priv->reg_name)
+ dev_pm_opp_put_regulator(priv->cpu_dev);
+
clk_put(policy->clk);
- if (!IS_ERR(priv->cpu_reg))
- regulator_put(priv->cpu_reg);
kfree(priv);
return 0;
@@ -441,9 +358,6 @@ static struct cpufreq_driver dt_cpufreq_driver = {
static int dt_cpufreq_probe(struct platform_device *pdev)
{
- struct device *cpu_dev;
- struct regulator *cpu_reg;
- struct clk *cpu_clk;
int ret;
/*
@@ -453,19 +367,15 @@ static int dt_cpufreq_probe(struct platform_device *pdev)
*
* FIXME: Is checking this only for CPU0 sufficient ?
*/
- ret = allocate_resources(0, &cpu_dev, &cpu_reg, &cpu_clk);
+ ret = resources_available();
if (ret)
return ret;
- clk_put(cpu_clk);
- if (!IS_ERR(cpu_reg))
- regulator_put(cpu_reg);
-
dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
ret = cpufreq_register_driver(&dt_cpufreq_driver);
if (ret)
- dev_err(cpu_dev, "failed register driver: %d\n", ret);
+ dev_err(&pdev->dev, "failed register driver: %d\n", ret);
return ret;
}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index e979ec7..4c78258 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -38,48 +38,10 @@ static inline bool policy_is_inactive(struct cpufreq_policy *policy)
return cpumask_empty(policy->cpus);
}
-static bool suitable_policy(struct cpufreq_policy *policy, bool active)
-{
- return active == !policy_is_inactive(policy);
-}
-
-/* Finds Next Acive/Inactive policy */
-static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy,
- bool active)
-{
- do {
- /* No more policies in the list */
- if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
- return NULL;
-
- policy = list_next_entry(policy, policy_list);
- } while (!suitable_policy(policy, active));
-
- return policy;
-}
-
-static struct cpufreq_policy *first_policy(bool active)
-{
- struct cpufreq_policy *policy;
-
- /* No policies in the list */
- if (list_empty(&cpufreq_policy_list))
- return NULL;
-
- policy = list_first_entry(&cpufreq_policy_list, typeof(*policy),
- policy_list);
-
- if (!suitable_policy(policy, active))
- policy = next_policy(policy, active);
-
- return policy;
-}
-
/* Macros to iterate over CPU policies */
-#define for_each_suitable_policy(__policy, __active) \
- for (__policy = first_policy(__active); \
- __policy; \
- __policy = next_policy(__policy, __active))
+#define for_each_suitable_policy(__policy, __active) \
+ list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \
+ if ((__active) == !policy_is_inactive(__policy))
#define for_each_active_policy(__policy) \
for_each_suitable_policy(__policy, true)
@@ -102,7 +64,6 @@ static LIST_HEAD(cpufreq_governor_list);
static struct cpufreq_driver *cpufreq_driver;
static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
static DEFINE_RWLOCK(cpufreq_driver_lock);
-DEFINE_MUTEX(cpufreq_governor_lock);
/* Flag to suspend/resume CPUFreq governors */
static bool cpufreq_suspended;
@@ -113,10 +74,8 @@ static inline bool has_target(void)
}
/* internal prototypes */
-static int __cpufreq_governor(struct cpufreq_policy *policy,
- unsigned int event);
+static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
-static void handle_update(struct work_struct *work);
/**
* Two notifier lists: the "policy" list is involved in the
@@ -818,12 +777,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
ssize_t ret;
down_read(&policy->rwsem);
-
- if (fattr->show)
- ret = fattr->show(policy, buf);
- else
- ret = -EIO;
-
+ ret = fattr->show(policy, buf);
up_read(&policy->rwsem);
return ret;
@@ -838,18 +792,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
get_online_cpus();
- if (!cpu_online(policy->cpu))
- goto unlock;
-
- down_write(&policy->rwsem);
-
- if (fattr->store)
+ if (cpu_online(policy->cpu)) {
+ down_write(&policy->rwsem);
ret = fattr->store(policy, buf, count);
- else
- ret = -EIO;
+ up_write(&policy->rwsem);
+ }
- up_write(&policy->rwsem);
-unlock:
put_online_cpus();
return ret;
@@ -959,6 +907,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
return cpufreq_add_dev_symlink(policy);
}
+__weak struct cpufreq_governor *cpufreq_default_governor(void)
+{
+ return NULL;
+}
+
static int cpufreq_init_policy(struct cpufreq_policy *policy)
{
struct cpufreq_governor *gov = NULL;
@@ -968,11 +921,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
/* Update governor of new_policy to the governor used before hotplug */
gov = find_governor(policy->last_governor);
- if (gov)
+ if (gov) {
pr_debug("Restoring governor %s for cpu %d\n",
policy->governor->name, policy->cpu);
- else
- gov = CPUFREQ_DEFAULT_GOVERNOR;
+ } else {
+ gov = cpufreq_default_governor();
+ if (!gov)
+ return -ENODATA;
+ }
new_policy.governor = gov;
@@ -996,36 +952,45 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
if (cpumask_test_cpu(cpu, policy->cpus))
return 0;
+ down_write(&policy->rwsem);
if (has_target()) {
- ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP);
if (ret) {
pr_err("%s: Failed to stop governor\n", __func__);
- return ret;
+ goto unlock;
}
}
- down_write(&policy->rwsem);
cpumask_set_cpu(cpu, policy->cpus);
- up_write(&policy->rwsem);
if (has_target()) {
- ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
if (!ret)
- ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
- if (ret) {
+ if (ret)
pr_err("%s: Failed to start governor\n", __func__);
- return ret;
- }
}
- return 0;
+unlock:
+ up_write(&policy->rwsem);
+ return ret;
+}
+
+static void handle_update(struct work_struct *work)
+{
+ struct cpufreq_policy *policy =
+ container_of(work, struct cpufreq_policy, update);
+ unsigned int cpu = policy->cpu;
+ pr_debug("handle_update for cpu %u called\n", cpu);
+ cpufreq_update_policy(cpu);
}
static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
{
struct device *dev = get_cpu_device(cpu);
struct cpufreq_policy *policy;
+ int ret;
if (WARN_ON(!dev))
return NULL;
@@ -1043,7 +1008,13 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL))
goto err_free_rcpumask;
- kobject_init(&policy->kobj, &ktype_cpufreq);
+ ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
+ cpufreq_global_kobject, "policy%u", cpu);
+ if (ret) {
+ pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret);
+ goto err_free_real_cpus;
+ }
+
INIT_LIST_HEAD(&policy->policy_list);
init_rwsem(&policy->rwsem);
spin_lock_init(&policy->transition_lock);
@@ -1054,6 +1025,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
policy->cpu = cpu;
return policy;
+err_free_real_cpus:
+ free_cpumask_var(policy->real_cpus);
err_free_rcpumask:
free_cpumask_var(policy->related_cpus);
err_free_cpumask:
@@ -1158,16 +1131,6 @@ static int cpufreq_online(unsigned int cpu)
cpumask_copy(policy->related_cpus, policy->cpus);
/* Remember CPUs present at the policy creation time. */
cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask);
-
- /* Name and add the kobject */
- ret = kobject_add(&policy->kobj, cpufreq_global_kobject,
- "policy%u",
- cpumask_first(policy->related_cpus));
- if (ret) {
- pr_err("%s: failed to add policy->kobj: %d\n", __func__,
- ret);
- goto out_exit_policy;
- }
}
/*
@@ -1309,9 +1272,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
return ret;
}
-static void cpufreq_offline_prepare(unsigned int cpu)
+static void cpufreq_offline(unsigned int cpu)
{
struct cpufreq_policy *policy;
+ int ret;
pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
@@ -1321,13 +1285,13 @@ static void cpufreq_offline_prepare(unsigned int cpu)
return;
}
+ down_write(&policy->rwsem);
if (has_target()) {
- int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP);
if (ret)
pr_err("%s: Failed to stop governor\n", __func__);
}
- down_write(&policy->rwsem);
cpumask_clear_cpu(cpu, policy->cpus);
if (policy_is_inactive(policy)) {
@@ -1340,39 +1304,27 @@ static void cpufreq_offline_prepare(unsigned int cpu)
/* Nominate new CPU */
policy->cpu = cpumask_any(policy->cpus);
}
- up_write(&policy->rwsem);
/* Start governor again for active policy */
if (!policy_is_inactive(policy)) {
if (has_target()) {
- int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
if (!ret)
- ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
if (ret)
pr_err("%s: Failed to start governor\n", __func__);
}
- } else if (cpufreq_driver->stop_cpu) {
- cpufreq_driver->stop_cpu(policy);
- }
-}
-static void cpufreq_offline_finish(unsigned int cpu)
-{
- struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
-
- if (!policy) {
- pr_debug("%s: No cpu_data found\n", __func__);
- return;
+ goto unlock;
}
- /* Only proceed for inactive policies */
- if (!policy_is_inactive(policy))
- return;
+ if (cpufreq_driver->stop_cpu)
+ cpufreq_driver->stop_cpu(policy);
/* If cpu is last user of policy, free policy */
if (has_target()) {
- int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
if (ret)
pr_err("%s: Failed to exit governor\n", __func__);
}
@@ -1386,6 +1338,9 @@ static void cpufreq_offline_finish(unsigned int cpu)
cpufreq_driver->exit(policy);
policy->freq_table = NULL;
}
+
+unlock:
+ up_write(&policy->rwsem);
}
/**
@@ -1401,10 +1356,8 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
if (!policy)
return;
- if (cpu_online(cpu)) {
- cpufreq_offline_prepare(cpu);
- cpufreq_offline_finish(cpu);
- }
+ if (cpu_online(cpu))
+ cpufreq_offline(cpu);
cpumask_clear_cpu(cpu, policy->real_cpus);
remove_cpu_dev_symlink(policy, cpu);
@@ -1413,15 +1366,6 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
cpufreq_policy_free(policy, true);
}
-static void handle_update(struct work_struct *work)
-{
- struct cpufreq_policy *policy =
- container_of(work, struct cpufreq_policy, update);
- unsigned int cpu = policy->cpu;
- pr_debug("handle_update for cpu %u called\n", cpu);
- cpufreq_update_policy(cpu);
-}
-
/**
* cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're
* in deep trouble.
@@ -1584,6 +1528,7 @@ EXPORT_SYMBOL(cpufreq_generic_suspend);
void cpufreq_suspend(void)
{
struct cpufreq_policy *policy;
+ int ret;
if (!cpufreq_driver)
return;
@@ -1594,7 +1539,11 @@ void cpufreq_suspend(void)
pr_debug("%s: Suspending Governors\n", __func__);
for_each_active_policy(policy) {
- if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP))
+ down_write(&policy->rwsem);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+ up_write(&policy->rwsem);
+
+ if (ret)
pr_err("%s: Failed to stop governor for policy: %p\n",
__func__, policy);
else if (cpufreq_driver->suspend
@@ -1616,6 +1565,7 @@ suspend:
void cpufreq_resume(void)
{
struct cpufreq_policy *policy;
+ int ret;
if (!cpufreq_driver)
return;
@@ -1628,13 +1578,20 @@ void cpufreq_resume(void)
pr_debug("%s: Resuming Governors\n", __func__);
for_each_active_policy(policy) {
- if (cpufreq_driver->resume && cpufreq_driver->resume(policy))
+ if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) {
pr_err("%s: Failed to resume driver: %p\n", __func__,
policy);
- else if (__cpufreq_governor(policy, CPUFREQ_GOV_START)
- || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
- pr_err("%s: Failed to start governor for policy: %p\n",
- __func__, policy);
+ } else {
+ down_write(&policy->rwsem);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
+ if (!ret)
+ cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+ up_write(&policy->rwsem);
+
+ if (ret)
+ pr_err("%s: Failed to start governor for policy: %p\n",
+ __func__, policy);
+ }
}
/*
@@ -1846,7 +1803,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
unsigned int relation)
{
unsigned int old_target_freq = target_freq;
- int retval = -EINVAL;
+ struct cpufreq_frequency_table *freq_table;
+ int index, retval;
if (cpufreq_disabled())
return -ENODEV;
@@ -1873,34 +1831,28 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
policy->restore_freq = policy->cur;
if (cpufreq_driver->target)
- retval = cpufreq_driver->target(policy, target_freq, relation);
- else if (cpufreq_driver->target_index) {
- struct cpufreq_frequency_table *freq_table;
- int index;
-
- freq_table = cpufreq_frequency_get_table(policy->cpu);
- if (unlikely(!freq_table)) {
- pr_err("%s: Unable to find freq_table\n", __func__);
- goto out;
- }
+ return cpufreq_driver->target(policy, target_freq, relation);
- retval = cpufreq_frequency_table_target(policy, freq_table,
- target_freq, relation, &index);
- if (unlikely(retval)) {
- pr_err("%s: Unable to find matching freq\n", __func__);
- goto out;
- }
+ if (!cpufreq_driver->target_index)
+ return -EINVAL;
- if (freq_table[index].frequency == policy->cur) {
- retval = 0;
- goto out;
- }
+ freq_table = cpufreq_frequency_get_table(policy->cpu);
+ if (unlikely(!freq_table)) {
+ pr_err("%s: Unable to find freq_table\n", __func__);
+ return -EINVAL;
+ }
- retval = __target_index(policy, freq_table, index);
+ retval = cpufreq_frequency_table_target(policy, freq_table, target_freq,
+ relation, &index);
+ if (unlikely(retval)) {
+ pr_err("%s: Unable to find matching freq\n", __func__);
+ return retval;
}
-out:
- return retval;
+ if (freq_table[index].frequency == policy->cur)
+ return 0;
+
+ return __target_index(policy, freq_table, index);
}
EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
@@ -1920,20 +1872,14 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
}
EXPORT_SYMBOL_GPL(cpufreq_driver_target);
-static int __cpufreq_governor(struct cpufreq_policy *policy,
- unsigned int event)
+__weak struct cpufreq_governor *cpufreq_fallback_governor(void)
{
- int ret;
+ return NULL;
+}
- /* Only must be defined when default governor is known to have latency
- restrictions, like e.g. conservative or ondemand.
- That this is the case is already ensured in Kconfig
- */
-#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
- struct cpufreq_governor *gov = &cpufreq_gov_performance;
-#else
- struct cpufreq_governor *gov = NULL;
-#endif
+static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
+{
+ int ret;
/* Don't start any governor operations if we are entering suspend */
if (cpufreq_suspended)
@@ -1948,12 +1894,14 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
if (policy->governor->max_transition_latency &&
policy->cpuinfo.transition_latency >
policy->governor->max_transition_latency) {
- if (!gov)
- return -EINVAL;
- else {
+ struct cpufreq_governor *gov = cpufreq_fallback_governor();
+
+ if (gov) {
pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n",
policy->governor->name, gov->name);
policy->governor = gov;
+ } else {
+ return -EINVAL;
}
}
@@ -1963,21 +1911,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event);
- mutex_lock(&cpufreq_governor_lock);
- if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
- || (!policy->governor_enabled
- && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
- mutex_unlock(&cpufreq_governor_lock);
- return -EBUSY;
- }
-
- if (event == CPUFREQ_GOV_STOP)
- policy->governor_enabled = false;
- else if (event == CPUFREQ_GOV_START)
- policy->governor_enabled = true;
-
- mutex_unlock(&cpufreq_governor_lock);
-
ret = policy->governor->governor(policy, event);
if (!ret) {
@@ -1985,14 +1918,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
policy->governor->initialized++;
else if (event == CPUFREQ_GOV_POLICY_EXIT)
policy->governor->initialized--;
- } else {
- /* Restore original values */
- mutex_lock(&cpufreq_governor_lock);
- if (event == CPUFREQ_GOV_STOP)
- policy->governor_enabled = true;
- else if (event == CPUFREQ_GOV_START)
- policy->governor_enabled = false;
- mutex_unlock(&cpufreq_governor_lock);
}
if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) ||
@@ -2147,7 +2072,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
old_gov = policy->governor;
/* end old governor */
if (old_gov) {
- ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP);
if (ret) {
/* This can happen due to race with other operations */
pr_debug("%s: Failed to Stop Governor: %s (%d)\n",
@@ -2155,10 +2080,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
return ret;
}
- up_write(&policy->rwsem);
- ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
- down_write(&policy->rwsem);
-
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
if (ret) {
pr_err("%s: Failed to Exit Governor: %s (%d)\n",
__func__, old_gov->name, ret);
@@ -2168,32 +2090,30 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
/* start new governor */
policy->governor = new_policy->governor;
- ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
if (!ret) {
- ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
+ ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
if (!ret)
goto out;
- up_write(&policy->rwsem);
- __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
- down_write(&policy->rwsem);
+ cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
}
/* new governor failed, so re-start old one */
pr_debug("starting governor %s failed\n", policy->governor->name);
if (old_gov) {
policy->governor = old_gov;
- if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
+ if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
policy->governor = NULL;
else
- __cpufreq_governor(policy, CPUFREQ_GOV_START);
+ cpufreq_governor(policy, CPUFREQ_GOV_START);
}
return ret;
out:
pr_debug("governor: change or update limits\n");
- return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+ return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
}
/**
@@ -2260,11 +2180,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
break;
case CPU_DOWN_PREPARE:
- cpufreq_offline_prepare(cpu);
- break;
-
- case CPU_POST_DEAD:
- cpufreq_offline_finish(cpu);
+ cpufreq_offline(cpu);
break;
case CPU_DOWN_FAILED:
@@ -2297,8 +2213,11 @@ static int cpufreq_boost_set_sw(int state)
__func__);
break;
}
+
+ down_write(&policy->rwsem);
policy->user_policy.max = policy->max;
- __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+ cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+ up_write(&policy->rwsem);
}
}
@@ -2384,7 +2303,7 @@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled);
* submitted by the CPU Frequency driver.
*
* Registers a CPU Frequency driver to this core code. This code
- * returns zero on success, -EBUSY when another driver got here first
+ * returns zero on success, -EEXIST when another driver got here first
* (and isn't unregistered in the meantime).
*
*/
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 606ad74ab..bf4913f 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -14,6 +14,22 @@
#include <linux/slab.h>
#include "cpufreq_governor.h"
+struct cs_policy_dbs_info {
+ struct policy_dbs_info policy_dbs;
+ unsigned int down_skip;
+ unsigned int requested_freq;
+};
+
+static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs)
+{
+ return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs);
+}
+
+struct cs_dbs_tuners {
+ unsigned int down_threshold;
+ unsigned int freq_step;
+};
+
/* Conservative governor macros */
#define DEF_FREQUENCY_UP_THRESHOLD (80)
#define DEF_FREQUENCY_DOWN_THRESHOLD (20)
@@ -21,21 +37,6 @@
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (10)
-static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info);
-
-static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy,
- unsigned int event);
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
-static
-#endif
-struct cpufreq_governor cpufreq_gov_conservative = {
- .name = "conservative",
- .governor = cs_cpufreq_governor_dbs,
- .max_transition_latency = TRANSITION_LATENCY_LIMIT,
- .owner = THIS_MODULE,
-};
-
static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
struct cpufreq_policy *policy)
{
@@ -57,27 +58,28 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
* Any frequency increase takes it to the maximum frequency. Frequency reduction
* happens at minimum steps of 5% (default) of maximum frequency
*/
-static void cs_check_cpu(int cpu, unsigned int load)
+static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
{
- struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu);
- struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy;
- struct dbs_data *dbs_data = policy->governor_data;
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
+ struct dbs_data *dbs_data = policy_dbs->dbs_data;
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
+ unsigned int load = dbs_update(policy);
/*
* break out if we 'cannot' reduce the speed as the user might
* want freq_step to be zero
*/
if (cs_tuners->freq_step == 0)
- return;
+ goto out;
/* Check for frequency increase */
- if (load > cs_tuners->up_threshold) {
+ if (load > dbs_data->up_threshold) {
dbs_info->down_skip = 0;
/* if we are already at full speed then break out early */
if (dbs_info->requested_freq == policy->max)
- return;
+ goto out;
dbs_info->requested_freq += get_freq_target(cs_tuners, policy);
@@ -86,12 +88,12 @@ static void cs_check_cpu(int cpu, unsigned int load)
__cpufreq_driver_target(policy, dbs_info->requested_freq,
CPUFREQ_RELATION_H);
- return;
+ goto out;
}
/* if sampling_down_factor is active break out early */
- if (++dbs_info->down_skip < cs_tuners->sampling_down_factor)
- return;
+ if (++dbs_info->down_skip < dbs_data->sampling_down_factor)
+ goto out;
dbs_info->down_skip = 0;
/* Check for frequency decrease */
@@ -101,7 +103,7 @@ static void cs_check_cpu(int cpu, unsigned int load)
* if we cannot reduce the frequency anymore, break out early
*/
if (policy->cur == policy->min)
- return;
+ goto out;
freq_target = get_freq_target(cs_tuners, policy);
if (dbs_info->requested_freq > freq_target)
@@ -111,58 +113,25 @@ static void cs_check_cpu(int cpu, unsigned int load)
__cpufreq_driver_target(policy, dbs_info->requested_freq,
CPUFREQ_RELATION_L);
- return;
}
-}
-
-static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all)
-{
- struct dbs_data *dbs_data = policy->governor_data;
- struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
-
- if (modify_all)
- dbs_check_cpu(dbs_data, policy->cpu);
- return delay_for_sampling_rate(cs_tuners->sampling_rate);
+ out:
+ return dbs_data->sampling_rate;
}
static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
- struct cs_cpu_dbs_info_s *dbs_info =
- &per_cpu(cs_cpu_dbs_info, freq->cpu);
- struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu);
-
- if (!policy)
- return 0;
-
- /* policy isn't governed by conservative governor */
- if (policy->governor != &cpufreq_gov_conservative)
- return 0;
-
- /*
- * we only care if our internally tracked freq moves outside the 'valid'
- * ranges of frequency available to us otherwise we do not change it
- */
- if (dbs_info->requested_freq > policy->max
- || dbs_info->requested_freq < policy->min)
- dbs_info->requested_freq = freq->new;
-
- return 0;
-}
+ void *data);
static struct notifier_block cs_cpufreq_notifier_block = {
.notifier_call = dbs_cpufreq_notifier,
};
/************************** sysfs interface ************************/
-static struct common_dbs_data cs_dbs_cdata;
+static struct dbs_governor cs_dbs_gov;
static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
const char *buf, size_t count)
{
- struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
@@ -170,22 +139,7 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
return -EINVAL;
- cs_tuners->sampling_down_factor = input;
- return count;
-}
-
-static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
- size_t count)
-{
- struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
- unsigned int input;
- int ret;
- ret = sscanf(buf, "%u", &input);
-
- if (ret != 1)
- return -EINVAL;
-
- cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate);
+ dbs_data->sampling_down_factor = input;
return count;
}
@@ -200,7 +154,7 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold)
return -EINVAL;
- cs_tuners->up_threshold = input;
+ dbs_data->up_threshold = input;
return count;
}
@@ -214,7 +168,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
/* cannot be lower than 11 otherwise freq will not fall */
if (ret != 1 || input < 11 || input > 100 ||
- input >= cs_tuners->up_threshold)
+ input >= dbs_data->up_threshold)
return -EINVAL;
cs_tuners->down_threshold = input;
@@ -224,8 +178,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
const char *buf, size_t count)
{
- struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
- unsigned int input, j;
+ unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
@@ -235,21 +188,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
if (input > 1)
input = 1;
- if (input == cs_tuners->ignore_nice_load) /* nothing to do */
+ if (input == dbs_data->ignore_nice_load) /* nothing to do */
return count;
- cs_tuners->ignore_nice_load = input;
+ dbs_data->ignore_nice_load = input;
/* we need to re-evaluate prev_cpu_idle */
- for_each_online_cpu(j) {
- struct cs_cpu_dbs_info_s *dbs_info;
- dbs_info = &per_cpu(cs_cpu_dbs_info, j);
- dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
- &dbs_info->cdbs.prev_cpu_wall, 0);
- if (cs_tuners->ignore_nice_load)
- dbs_info->cdbs.prev_cpu_nice =
- kcpustat_cpu(j).cpustat[CPUTIME_NICE];
- }
+ gov_update_cpu_data(dbs_data);
+
return count;
}
@@ -275,55 +221,47 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf,
return count;
}
-show_store_one(cs, sampling_rate);
-show_store_one(cs, sampling_down_factor);
-show_store_one(cs, up_threshold);
-show_store_one(cs, down_threshold);
-show_store_one(cs, ignore_nice_load);
-show_store_one(cs, freq_step);
-declare_show_sampling_rate_min(cs);
-
-gov_sys_pol_attr_rw(sampling_rate);
-gov_sys_pol_attr_rw(sampling_down_factor);
-gov_sys_pol_attr_rw(up_threshold);
-gov_sys_pol_attr_rw(down_threshold);
-gov_sys_pol_attr_rw(ignore_nice_load);
-gov_sys_pol_attr_rw(freq_step);
-gov_sys_pol_attr_ro(sampling_rate_min);
-
-static struct attribute *dbs_attributes_gov_sys[] = {
- &sampling_rate_min_gov_sys.attr,
- &sampling_rate_gov_sys.attr,
- &sampling_down_factor_gov_sys.attr,
- &up_threshold_gov_sys.attr,
- &down_threshold_gov_sys.attr,
- &ignore_nice_load_gov_sys.attr,
- &freq_step_gov_sys.attr,
+gov_show_one_common(sampling_rate);
+gov_show_one_common(sampling_down_factor);
+gov_show_one_common(up_threshold);
+gov_show_one_common(ignore_nice_load);
+gov_show_one_common(min_sampling_rate);
+gov_show_one(cs, down_threshold);
+gov_show_one(cs, freq_step);
+
+gov_attr_rw(sampling_rate);
+gov_attr_rw(sampling_down_factor);
+gov_attr_rw(up_threshold);
+gov_attr_rw(ignore_nice_load);
+gov_attr_ro(min_sampling_rate);
+gov_attr_rw(down_threshold);
+gov_attr_rw(freq_step);
+
+static struct attribute *cs_attributes[] = {
+ &min_sampling_rate.attr,
+ &sampling_rate.attr,
+ &sampling_down_factor.attr,
+ &up_threshold.attr,
+ &down_threshold.attr,
+ &ignore_nice_load.attr,
+ &freq_step.attr,
NULL
};
-static struct attribute_group cs_attr_group_gov_sys = {
- .attrs = dbs_attributes_gov_sys,
- .name = "conservative",
-};
+/************************** sysfs end ************************/
-static struct attribute *dbs_attributes_gov_pol[] = {
- &sampling_rate_min_gov_pol.attr,
- &sampling_rate_gov_pol.attr,
- &sampling_down_factor_gov_pol.attr,
- &up_threshold_gov_pol.attr,
- &down_threshold_gov_pol.attr,
- &ignore_nice_load_gov_pol.attr,
- &freq_step_gov_pol.attr,
- NULL
-};
+static struct policy_dbs_info *cs_alloc(void)
+{
+ struct cs_policy_dbs_info *dbs_info;
-static struct attribute_group cs_attr_group_gov_pol = {
- .attrs = dbs_attributes_gov_pol,
- .name = "conservative",
-};
+ dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL);
+ return dbs_info ? &dbs_info->policy_dbs : NULL;
+}
-/************************** sysfs end ************************/
+static void cs_free(struct policy_dbs_info *policy_dbs)
+{
+ kfree(to_dbs_info(policy_dbs));
+}
static int cs_init(struct dbs_data *dbs_data, bool notify)
{
@@ -335,11 +273,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify)
return -ENOMEM;
}
- tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD;
- tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
- tuners->ignore_nice_load = 0;
tuners->freq_step = DEF_FREQUENCY_STEP;
+ dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
+ dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
+ dbs_data->ignore_nice_load = 0;
dbs_data->tuners = tuners;
dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
@@ -361,35 +299,66 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify)
kfree(dbs_data->tuners);
}
-define_get_cpu_dbs_routines(cs_cpu_dbs_info);
+static void cs_start(struct cpufreq_policy *policy)
+{
+ struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
+
+ dbs_info->down_skip = 0;
+ dbs_info->requested_freq = policy->cur;
+}
-static struct common_dbs_data cs_dbs_cdata = {
- .governor = GOV_CONSERVATIVE,
- .attr_group_gov_sys = &cs_attr_group_gov_sys,
- .attr_group_gov_pol = &cs_attr_group_gov_pol,
- .get_cpu_cdbs = get_cpu_cdbs,
- .get_cpu_dbs_info_s = get_cpu_dbs_info_s,
+static struct dbs_governor cs_dbs_gov = {
+ .gov = {
+ .name = "conservative",
+ .governor = cpufreq_governor_dbs,
+ .max_transition_latency = TRANSITION_LATENCY_LIMIT,
+ .owner = THIS_MODULE,
+ },
+ .kobj_type = { .default_attrs = cs_attributes },
.gov_dbs_timer = cs_dbs_timer,
- .gov_check_cpu = cs_check_cpu,
+ .alloc = cs_alloc,
+ .free = cs_free,
.init = cs_init,
.exit = cs_exit,
- .mutex = __MUTEX_INITIALIZER(cs_dbs_cdata.mutex),
+ .start = cs_start,
};
-static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy,
- unsigned int event)
+#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov)
+
+static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
{
- return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event);
+ struct cpufreq_freqs *freq = data;
+ struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu);
+ struct cs_policy_dbs_info *dbs_info;
+
+ if (!policy)
+ return 0;
+
+ /* policy isn't governed by conservative governor */
+ if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE)
+ return 0;
+
+ dbs_info = to_dbs_info(policy->governor_data);
+ /*
+ * we only care if our internally tracked freq moves outside the 'valid'
+ * ranges of frequency available to us otherwise we do not change it
+ */
+ if (dbs_info->requested_freq > policy->max
+ || dbs_info->requested_freq < policy->min)
+ dbs_info->requested_freq = freq->new;
+
+ return 0;
}
static int __init cpufreq_gov_dbs_init(void)
{
- return cpufreq_register_governor(&cpufreq_gov_conservative);
+ return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE);
}
static void __exit cpufreq_gov_dbs_exit(void)
{
- cpufreq_unregister_governor(&cpufreq_gov_conservative);
+ cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE);
}
MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>");
@@ -399,6 +368,11 @@ MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+ return CPU_FREQ_GOV_CONSERVATIVE;
+}
+
fs_initcall(cpufreq_gov_dbs_init);
#else
module_init(cpufreq_gov_dbs_init);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index e0d1110..1c25ef4 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -18,95 +18,193 @@
#include <linux/export.h>
#include <linux/kernel_stat.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include "cpufreq_governor.h"
-static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
-{
- if (have_governor_per_policy())
- return dbs_data->cdata->attr_group_gov_pol;
- else
- return dbs_data->cdata->attr_group_gov_sys;
-}
+static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
+
+static DEFINE_MUTEX(gov_dbs_data_mutex);
-void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
+/* Common sysfs tunables */
+/**
+ * store_sampling_rate - update sampling rate effective immediately if needed.
+ *
+ * If new rate is smaller than the old, simply updating
+ * dbs.sampling_rate might not be appropriate. For example, if the
+ * original sampling_rate was 1 second and the requested new sampling rate is 10
+ * ms because the user needs immediate reaction from ondemand governor, but not
+ * sure if higher frequency will be required or not, then, the governor may
+ * change the sampling rate too late; up to 1 second later. Thus, if we are
+ * reducing the sampling rate, we need to make the new value effective
+ * immediately.
+ *
+ * This must be called with dbs_data->mutex held, otherwise traversing
+ * policy_dbs_list isn't safe.
+ */
+ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
+ size_t count)
{
- struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
- struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
- struct cpufreq_policy *policy = cdbs->shared->policy;
- unsigned int sampling_rate;
- unsigned int max_load = 0;
- unsigned int ignore_nice;
- unsigned int j;
+ struct policy_dbs_info *policy_dbs;
+ unsigned int rate;
+ int ret;
+ ret = sscanf(buf, "%u", &rate);
+ if (ret != 1)
+ return -EINVAL;
- if (dbs_data->cdata->governor == GOV_ONDEMAND) {
- struct od_cpu_dbs_info_s *od_dbs_info =
- dbs_data->cdata->get_cpu_dbs_info_s(cpu);
+ dbs_data->sampling_rate = max(rate, dbs_data->min_sampling_rate);
+ /*
+ * We are operating under dbs_data->mutex and so the list and its
+ * entries can't be freed concurrently.
+ */
+ list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+ mutex_lock(&policy_dbs->timer_mutex);
/*
- * Sometimes, the ondemand governor uses an additional
- * multiplier to give long delays. So apply this multiplier to
- * the 'sampling_rate', so as to keep the wake-up-from-idle
- * detection logic a bit conservative.
+ * On 32-bit architectures this may race with the
+ * sample_delay_ns read in dbs_update_util_handler(), but that
+ * really doesn't matter. If the read returns a value that's
+ * too big, the sample will be skipped, but the next invocation
+ * of dbs_update_util_handler() (when the update has been
+ * completed) will take a sample.
+ *
+ * If this runs in parallel with dbs_work_handler(), we may end
+ * up overwriting the sample_delay_ns value that it has just
+ * written, but it will be corrected next time a sample is
+ * taken, so it shouldn't be significant.
*/
- sampling_rate = od_tuners->sampling_rate;
- sampling_rate *= od_dbs_info->rate_mult;
+ gov_update_sample_delay(policy_dbs, 0);
+ mutex_unlock(&policy_dbs->timer_mutex);
+ }
- ignore_nice = od_tuners->ignore_nice_load;
- } else {
- sampling_rate = cs_tuners->sampling_rate;
- ignore_nice = cs_tuners->ignore_nice_load;
+ return count;
+}
+EXPORT_SYMBOL_GPL(store_sampling_rate);
+
+/**
+ * gov_update_cpu_data - Update CPU load data.
+ * @dbs_data: Top-level governor data pointer.
+ *
+ * Update CPU load data for all CPUs in the domain governed by @dbs_data
+ * (that may be a single policy or a bunch of them if governor tunables are
+ * system-wide).
+ *
+ * Call under the @dbs_data mutex.
+ */
+void gov_update_cpu_data(struct dbs_data *dbs_data)
+{
+ struct policy_dbs_info *policy_dbs;
+
+ list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+ unsigned int j;
+
+ for_each_cpu(j, policy_dbs->policy->cpus) {
+ struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
+
+ j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall,
+ dbs_data->io_is_busy);
+ if (dbs_data->ignore_nice_load)
+ j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+ }
}
+}
+EXPORT_SYMBOL_GPL(gov_update_cpu_data);
+
+static inline struct dbs_data *to_dbs_data(struct kobject *kobj)
+{
+ return container_of(kobj, struct dbs_data, kobj);
+}
+
+static inline struct governor_attr *to_gov_attr(struct attribute *attr)
+{
+ return container_of(attr, struct governor_attr, attr);
+}
+
+static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct dbs_data *dbs_data = to_dbs_data(kobj);
+ struct governor_attr *gattr = to_gov_attr(attr);
+
+ return gattr->show(dbs_data, buf);
+}
+
+static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dbs_data *dbs_data = to_dbs_data(kobj);
+ struct governor_attr *gattr = to_gov_attr(attr);
+ int ret = -EBUSY;
+
+ mutex_lock(&dbs_data->mutex);
+
+ if (dbs_data->usage_count)
+ ret = gattr->store(dbs_data, buf, count);
+
+ mutex_unlock(&dbs_data->mutex);
+
+ return ret;
+}
+
+/*
+ * Sysfs Ops for accessing governor attributes.
+ *
+ * All show/store invocations for governor specific sysfs attributes, will first
+ * call the below show/store callbacks and the attribute specific callback will
+ * be called from within it.
+ */
+static const struct sysfs_ops governor_sysfs_ops = {
+ .show = governor_show,
+ .store = governor_store,
+};
+
+unsigned int dbs_update(struct cpufreq_policy *policy)
+{
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct dbs_data *dbs_data = policy_dbs->dbs_data;
+ unsigned int ignore_nice = dbs_data->ignore_nice_load;
+ unsigned int max_load = 0;
+ unsigned int sampling_rate, io_busy, j;
+
+ /*
+ * Sometimes governors may use an additional multiplier to increase
+ * sample delays temporarily. Apply that multiplier to sampling_rate
+ * so as to keep the wake-up-from-idle detection logic a bit
+ * conservative.
+ */
+ sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult;
+ /*
+ * For the purpose of ondemand, waiting for disk IO is an indication
+ * that you're performance critical, and not that the system is actually
+ * idle, so do not add the iowait time to the CPU idle time then.
+ */
+ io_busy = dbs_data->io_is_busy;
/* Get Absolute Load */
for_each_cpu(j, policy->cpus) {
- struct cpu_dbs_info *j_cdbs;
+ struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
u64 cur_wall_time, cur_idle_time;
unsigned int idle_time, wall_time;
unsigned int load;
- int io_busy = 0;
-
- j_cdbs = dbs_data->cdata->get_cpu_cdbs(j);
- /*
- * For the purpose of ondemand, waiting for disk IO is
- * an indication that you're performance critical, and
- * not that the system is actually idle. So do not add
- * the iowait time to the cpu idle time.
- */
- if (dbs_data->cdata->governor == GOV_ONDEMAND)
- io_busy = od_tuners->io_is_busy;
cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
- wall_time = (unsigned int)
- (cur_wall_time - j_cdbs->prev_cpu_wall);
+ wall_time = cur_wall_time - j_cdbs->prev_cpu_wall;
j_cdbs->prev_cpu_wall = cur_wall_time;
- if (cur_idle_time < j_cdbs->prev_cpu_idle)
- cur_idle_time = j_cdbs->prev_cpu_idle;
-
- idle_time = (unsigned int)
- (cur_idle_time - j_cdbs->prev_cpu_idle);
- j_cdbs->prev_cpu_idle = cur_idle_time;
+ if (cur_idle_time <= j_cdbs->prev_cpu_idle) {
+ idle_time = 0;
+ } else {
+ idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
+ j_cdbs->prev_cpu_idle = cur_idle_time;
+ }
if (ignore_nice) {
- u64 cur_nice;
- unsigned long cur_nice_jiffies;
-
- cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
- cdbs->prev_cpu_nice;
- /*
- * Assumption: nice time between sampling periods will
- * be less than 2^32 jiffies for 32 bit sys
- */
- cur_nice_jiffies = (unsigned long)
- cputime64_to_jiffies64(cur_nice);
+ u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
- cdbs->prev_cpu_nice =
- kcpustat_cpu(j).cpustat[CPUTIME_NICE];
- idle_time += jiffies_to_usecs(cur_nice_jiffies);
+ idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice);
+ j_cdbs->prev_cpu_nice = cur_nice;
}
if (unlikely(!wall_time || wall_time < idle_time))
@@ -128,10 +226,10 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
* dropped down. So we perform the copy only once, upon the
* first wake-up from idle.)
*
- * Detecting this situation is easy: the governor's deferrable
- * timer would not have fired during CPU-idle periods. Hence
- * an unusually large 'wall_time' (as compared to the sampling
- * rate) indicates this scenario.
+ * Detecting this situation is easy: the governor's utilization
+ * update handler would not have run during CPU-idle periods.
+ * Hence, an unusually large 'wall_time' (as compared to the
+ * sampling rate) indicates this scenario.
*
* prev_load can be zero in two cases and we must recalculate it
* for both cases:
@@ -156,222 +254,224 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
if (load > max_load)
max_load = load;
}
-
- dbs_data->cdata->gov_check_cpu(cpu, max_load);
+ return max_load;
}
-EXPORT_SYMBOL_GPL(dbs_check_cpu);
+EXPORT_SYMBOL_GPL(dbs_update);
-void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay)
+static void gov_set_update_util(struct policy_dbs_info *policy_dbs,
+ unsigned int delay_us)
{
- struct dbs_data *dbs_data = policy->governor_data;
- struct cpu_dbs_info *cdbs;
+ struct cpufreq_policy *policy = policy_dbs->policy;
int cpu;
+ gov_update_sample_delay(policy_dbs, delay_us);
+ policy_dbs->last_sample_time = 0;
+
for_each_cpu(cpu, policy->cpus) {
- cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
- cdbs->timer.expires = jiffies + delay;
- add_timer_on(&cdbs->timer, cpu);
+ struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
+
+ cpufreq_set_update_util_data(cpu, &cdbs->update_util);
}
}
-EXPORT_SYMBOL_GPL(gov_add_timers);
-static inline void gov_cancel_timers(struct cpufreq_policy *policy)
+static inline void gov_clear_update_util(struct cpufreq_policy *policy)
{
- struct dbs_data *dbs_data = policy->governor_data;
- struct cpu_dbs_info *cdbs;
int i;
- for_each_cpu(i, policy->cpus) {
- cdbs = dbs_data->cdata->get_cpu_cdbs(i);
- del_timer_sync(&cdbs->timer);
- }
-}
+ for_each_cpu(i, policy->cpus)
+ cpufreq_set_update_util_data(i, NULL);
-void gov_cancel_work(struct cpu_common_dbs_info *shared)
-{
- /* Tell dbs_timer_handler() to skip queuing up work items. */
- atomic_inc(&shared->skip_work);
- /*
- * If dbs_timer_handler() is already running, it may not notice the
- * incremented skip_work, so wait for it to complete to prevent its work
- * item from being queued up after the cancel_work_sync() below.
- */
- gov_cancel_timers(shared->policy);
- /*
- * In case dbs_timer_handler() managed to run and spawn a work item
- * before the timers have been canceled, wait for that work item to
- * complete and then cancel all of the timers set up by it. If
- * dbs_timer_handler() runs again at that point, it will see the
- * positive value of skip_work and won't spawn any more work items.
- */
- cancel_work_sync(&shared->work);
- gov_cancel_timers(shared->policy);
- atomic_set(&shared->skip_work, 0);
+ synchronize_sched();
}
-EXPORT_SYMBOL_GPL(gov_cancel_work);
-/* Will return if we need to evaluate cpu load again or not */
-static bool need_load_eval(struct cpu_common_dbs_info *shared,
- unsigned int sampling_rate)
+static void gov_cancel_work(struct cpufreq_policy *policy)
{
- if (policy_is_shared(shared->policy)) {
- ktime_t time_now = ktime_get();
- s64 delta_us = ktime_us_delta(time_now, shared->time_stamp);
-
- /* Do nothing if we recently have sampled */
- if (delta_us < (s64)(sampling_rate / 2))
- return false;
- else
- shared->time_stamp = time_now;
- }
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
- return true;
+ gov_clear_update_util(policy_dbs->policy);
+ irq_work_sync(&policy_dbs->irq_work);
+ cancel_work_sync(&policy_dbs->work);
+ atomic_set(&policy_dbs->work_count, 0);
+ policy_dbs->work_in_progress = false;
}
static void dbs_work_handler(struct work_struct *work)
{
- struct cpu_common_dbs_info *shared = container_of(work, struct
- cpu_common_dbs_info, work);
+ struct policy_dbs_info *policy_dbs;
struct cpufreq_policy *policy;
- struct dbs_data *dbs_data;
- unsigned int sampling_rate, delay;
- bool eval_load;
-
- policy = shared->policy;
- dbs_data = policy->governor_data;
+ struct dbs_governor *gov;
- /* Kill all timers */
- gov_cancel_timers(policy);
+ policy_dbs = container_of(work, struct policy_dbs_info, work);
+ policy = policy_dbs->policy;
+ gov = dbs_governor_of(policy);
- if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
- struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
-
- sampling_rate = cs_tuners->sampling_rate;
- } else {
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
-
- sampling_rate = od_tuners->sampling_rate;
- }
-
- eval_load = need_load_eval(shared, sampling_rate);
+ /*
+ * Make sure cpufreq_governor_limits() isn't evaluating load or the
+ * ondemand governor isn't updating the sampling rate in parallel.
+ */
+ mutex_lock(&policy_dbs->timer_mutex);
+ gov_update_sample_delay(policy_dbs, gov->gov_dbs_timer(policy));
+ mutex_unlock(&policy_dbs->timer_mutex);
+ /* Allow the utilization update handler to queue up more work. */
+ atomic_set(&policy_dbs->work_count, 0);
/*
- * Make sure cpufreq_governor_limits() isn't evaluating load in
- * parallel.
+ * If the update below is reordered with respect to the sample delay
+ * modification, the utilization update handler may end up using a stale
+ * sample delay value.
*/
- mutex_lock(&shared->timer_mutex);
- delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load);
- mutex_unlock(&shared->timer_mutex);
+ smp_wmb();
+ policy_dbs->work_in_progress = false;
+}
- atomic_dec(&shared->skip_work);
+static void dbs_irq_work(struct irq_work *irq_work)
+{
+ struct policy_dbs_info *policy_dbs;
- gov_add_timers(policy, delay);
+ policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work);
+ schedule_work(&policy_dbs->work);
}
-static void dbs_timer_handler(unsigned long data)
+static void dbs_update_util_handler(struct update_util_data *data, u64 time,
+ unsigned long util, unsigned long max)
{
- struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data;
- struct cpu_common_dbs_info *shared = cdbs->shared;
+ struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
+ struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
+ u64 delta_ns, lst;
/*
- * Timer handler may not be allowed to queue the work at the moment,
- * because:
- * - Another timer handler has done that
- * - We are stopping the governor
- * - Or we are updating the sampling rate of the ondemand governor
+ * The work may not be allowed to be queued up right now.
+ * Possible reasons:
+ * - Work has already been queued up or is in progress.
+ * - It is too early (too little time from the previous sample).
*/
- if (atomic_inc_return(&shared->skip_work) > 1)
- atomic_dec(&shared->skip_work);
- else
- queue_work(system_wq, &shared->work);
-}
+ if (policy_dbs->work_in_progress)
+ return;
-static void set_sampling_rate(struct dbs_data *dbs_data,
- unsigned int sampling_rate)
-{
- if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
- struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
- cs_tuners->sampling_rate = sampling_rate;
- } else {
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
- od_tuners->sampling_rate = sampling_rate;
+ /*
+ * If the reads below are reordered before the check above, the value
+ * of sample_delay_ns used in the computation may be stale.
+ */
+ smp_rmb();
+ lst = READ_ONCE(policy_dbs->last_sample_time);
+ delta_ns = time - lst;
+ if ((s64)delta_ns < policy_dbs->sample_delay_ns)
+ return;
+
+ /*
+ * If the policy is not shared, the irq_work may be queued up right away
+ * at this point. Otherwise, we need to ensure that only one of the
+ * CPUs sharing the policy will do that.
+ */
+ if (policy_dbs->is_shared) {
+ if (!atomic_add_unless(&policy_dbs->work_count, 1, 1))
+ return;
+
+ /*
+ * If another CPU updated last_sample_time in the meantime, we
+ * shouldn't be here, so clear the work counter and bail out.
+ */
+ if (unlikely(lst != READ_ONCE(policy_dbs->last_sample_time))) {
+ atomic_set(&policy_dbs->work_count, 0);
+ return;
+ }
}
+
+ policy_dbs->last_sample_time = time;
+ policy_dbs->work_in_progress = true;
+ irq_work_queue(&policy_dbs->irq_work);
}
-static int alloc_common_dbs_info(struct cpufreq_policy *policy,
- struct common_dbs_data *cdata)
+static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
+ struct dbs_governor *gov)
{
- struct cpu_common_dbs_info *shared;
+ struct policy_dbs_info *policy_dbs;
int j;
- /* Allocate memory for the common information for policy->cpus */
- shared = kzalloc(sizeof(*shared), GFP_KERNEL);
- if (!shared)
- return -ENOMEM;
+ /* Allocate memory for per-policy governor data. */
+ policy_dbs = gov->alloc();
+ if (!policy_dbs)
+ return NULL;
- /* Set shared for all CPUs, online+offline */
- for_each_cpu(j, policy->related_cpus)
- cdata->get_cpu_cdbs(j)->shared = shared;
+ policy_dbs->policy = policy;
+ mutex_init(&policy_dbs->timer_mutex);
+ atomic_set(&policy_dbs->work_count, 0);
+ init_irq_work(&policy_dbs->irq_work, dbs_irq_work);
+ INIT_WORK(&policy_dbs->work, dbs_work_handler);
- mutex_init(&shared->timer_mutex);
- atomic_set(&shared->skip_work, 0);
- INIT_WORK(&shared->work, dbs_work_handler);
- return 0;
+ /* Set policy_dbs for all CPUs, online+offline */
+ for_each_cpu(j, policy->related_cpus) {
+ struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
+
+ j_cdbs->policy_dbs = policy_dbs;
+ j_cdbs->update_util.func = dbs_update_util_handler;
+ }
+ return policy_dbs;
}
-static void free_common_dbs_info(struct cpufreq_policy *policy,
- struct common_dbs_data *cdata)
+static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
+ struct dbs_governor *gov)
{
- struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
- struct cpu_common_dbs_info *shared = cdbs->shared;
int j;
- mutex_destroy(&shared->timer_mutex);
+ mutex_destroy(&policy_dbs->timer_mutex);
- for_each_cpu(j, policy->cpus)
- cdata->get_cpu_cdbs(j)->shared = NULL;
+ for_each_cpu(j, policy_dbs->policy->related_cpus) {
+ struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
- kfree(shared);
+ j_cdbs->policy_dbs = NULL;
+ j_cdbs->update_util.func = NULL;
+ }
+ gov->free(policy_dbs);
}
-static int cpufreq_governor_init(struct cpufreq_policy *policy,
- struct dbs_data *dbs_data,
- struct common_dbs_data *cdata)
+static int cpufreq_governor_init(struct cpufreq_policy *policy)
{
+ struct dbs_governor *gov = dbs_governor_of(policy);
+ struct dbs_data *dbs_data;
+ struct policy_dbs_info *policy_dbs;
unsigned int latency;
- int ret;
+ int ret = 0;
/* State should be equivalent to EXIT */
if (policy->governor_data)
return -EBUSY;
- if (dbs_data) {
- if (WARN_ON(have_governor_per_policy()))
- return -EINVAL;
+ policy_dbs = alloc_policy_dbs_info(policy, gov);
+ if (!policy_dbs)
+ return -ENOMEM;
- ret = alloc_common_dbs_info(policy, cdata);
- if (ret)
- return ret;
+ /* Protect gov->gdbs_data against concurrent updates. */
+ mutex_lock(&gov_dbs_data_mutex);
+ dbs_data = gov->gdbs_data;
+ if (dbs_data) {
+ if (WARN_ON(have_governor_per_policy())) {
+ ret = -EINVAL;
+ goto free_policy_dbs_info;
+ }
+ policy_dbs->dbs_data = dbs_data;
+ policy->governor_data = policy_dbs;
+
+ mutex_lock(&dbs_data->mutex);
dbs_data->usage_count++;
- policy->governor_data = dbs_data;
- return 0;
+ list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
+ mutex_unlock(&dbs_data->mutex);
+ goto out;
}
dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
- if (!dbs_data)
- return -ENOMEM;
-
- ret = alloc_common_dbs_info(policy, cdata);
- if (ret)
- goto free_dbs_data;
+ if (!dbs_data) {
+ ret = -ENOMEM;
+ goto free_policy_dbs_info;
+ }
- dbs_data->cdata = cdata;
- dbs_data->usage_count = 1;
+ INIT_LIST_HEAD(&dbs_data->policy_dbs_list);
+ mutex_init(&dbs_data->mutex);
- ret = cdata->init(dbs_data, !policy->governor->initialized);
+ ret = gov->init(dbs_data, !policy->governor->initialized);
if (ret)
- goto free_common_dbs_info;
+ goto free_policy_dbs_info;
/* policy latency is in ns. Convert it to us first */
latency = policy->cpuinfo.transition_latency / 1000;
@@ -381,216 +481,156 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
/* Bring kernel and HW constraints together */
dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
MIN_LATENCY_MULTIPLIER * latency);
- set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate,
- latency * LATENCY_MULTIPLIER));
+ dbs_data->sampling_rate = max(dbs_data->min_sampling_rate,
+ LATENCY_MULTIPLIER * latency);
if (!have_governor_per_policy())
- cdata->gdbs_data = dbs_data;
+ gov->gdbs_data = dbs_data;
- policy->governor_data = dbs_data;
+ policy->governor_data = policy_dbs;
- ret = sysfs_create_group(get_governor_parent_kobj(policy),
- get_sysfs_attr(dbs_data));
- if (ret)
- goto reset_gdbs_data;
+ policy_dbs->dbs_data = dbs_data;
+ dbs_data->usage_count = 1;
+ list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
- return 0;
+ gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
+ ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type,
+ get_governor_parent_kobj(policy),
+ "%s", gov->gov.name);
+ if (!ret)
+ goto out;
+
+ /* Failure, so roll back. */
+ pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret);
-reset_gdbs_data:
policy->governor_data = NULL;
if (!have_governor_per_policy())
- cdata->gdbs_data = NULL;
- cdata->exit(dbs_data, !policy->governor->initialized);
-free_common_dbs_info:
- free_common_dbs_info(policy, cdata);
-free_dbs_data:
+ gov->gdbs_data = NULL;
+ gov->exit(dbs_data, !policy->governor->initialized);
kfree(dbs_data);
+
+free_policy_dbs_info:
+ free_policy_dbs_info(policy_dbs, gov);
+
+out:
+ mutex_unlock(&gov_dbs_data_mutex);
return ret;
}
-static int cpufreq_governor_exit(struct cpufreq_policy *policy,
- struct dbs_data *dbs_data)
+static int cpufreq_governor_exit(struct cpufreq_policy *policy)
{
- struct common_dbs_data *cdata = dbs_data->cdata;
- struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu);
+ struct dbs_governor *gov = dbs_governor_of(policy);
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct dbs_data *dbs_data = policy_dbs->dbs_data;
+ int count;
- /* State should be equivalent to INIT */
- if (!cdbs->shared || cdbs->shared->policy)
- return -EBUSY;
+ /* Protect gov->gdbs_data against concurrent updates. */
+ mutex_lock(&gov_dbs_data_mutex);
+
+ mutex_lock(&dbs_data->mutex);
+ list_del(&policy_dbs->list);
+ count = --dbs_data->usage_count;
+ mutex_unlock(&dbs_data->mutex);
- if (!--dbs_data->usage_count) {
- sysfs_remove_group(get_governor_parent_kobj(policy),
- get_sysfs_attr(dbs_data));
+ if (!count) {
+ kobject_put(&dbs_data->kobj);
policy->governor_data = NULL;
if (!have_governor_per_policy())
- cdata->gdbs_data = NULL;
+ gov->gdbs_data = NULL;
- cdata->exit(dbs_data, policy->governor->initialized == 1);
+ gov->exit(dbs_data, policy->governor->initialized == 1);
+ mutex_destroy(&dbs_data->mutex);
kfree(dbs_data);
} else {
policy->governor_data = NULL;
}
- free_common_dbs_info(policy, cdata);
+ free_policy_dbs_info(policy_dbs, gov);
+
+ mutex_unlock(&gov_dbs_data_mutex);
return 0;
}
-static int cpufreq_governor_start(struct cpufreq_policy *policy,
- struct dbs_data *dbs_data)
+static int cpufreq_governor_start(struct cpufreq_policy *policy)
{
- struct common_dbs_data *cdata = dbs_data->cdata;
- unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu;
- struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
- struct cpu_common_dbs_info *shared = cdbs->shared;
- int io_busy = 0;
+ struct dbs_governor *gov = dbs_governor_of(policy);
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct dbs_data *dbs_data = policy_dbs->dbs_data;
+ unsigned int sampling_rate, ignore_nice, j;
+ unsigned int io_busy;
if (!policy->cur)
return -EINVAL;
- /* State should be equivalent to INIT */
- if (!shared || shared->policy)
- return -EBUSY;
+ policy_dbs->is_shared = policy_is_shared(policy);
+ policy_dbs->rate_mult = 1;
- if (cdata->governor == GOV_CONSERVATIVE) {
- struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
-
- sampling_rate = cs_tuners->sampling_rate;
- ignore_nice = cs_tuners->ignore_nice_load;
- } else {
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
-
- sampling_rate = od_tuners->sampling_rate;
- ignore_nice = od_tuners->ignore_nice_load;
- io_busy = od_tuners->io_is_busy;
- }
-
- shared->policy = policy;
- shared->time_stamp = ktime_get();
+ sampling_rate = dbs_data->sampling_rate;
+ ignore_nice = dbs_data->ignore_nice_load;
+ io_busy = dbs_data->io_is_busy;
for_each_cpu(j, policy->cpus) {
- struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
+ struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
unsigned int prev_load;
- j_cdbs->prev_cpu_idle =
- get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
+ j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
- prev_load = (unsigned int)(j_cdbs->prev_cpu_wall -
- j_cdbs->prev_cpu_idle);
- j_cdbs->prev_load = 100 * prev_load /
- (unsigned int)j_cdbs->prev_cpu_wall;
+ prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle;
+ j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall;
if (ignore_nice)
j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
-
- __setup_timer(&j_cdbs->timer, dbs_timer_handler,
- (unsigned long)j_cdbs,
- TIMER_DEFERRABLE | TIMER_IRQSAFE);
}
- if (cdata->governor == GOV_CONSERVATIVE) {
- struct cs_cpu_dbs_info_s *cs_dbs_info =
- cdata->get_cpu_dbs_info_s(cpu);
-
- cs_dbs_info->down_skip = 0;
- cs_dbs_info->requested_freq = policy->cur;
- } else {
- struct od_ops *od_ops = cdata->gov_ops;
- struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu);
-
- od_dbs_info->rate_mult = 1;
- od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
- od_ops->powersave_bias_init_cpu(cpu);
- }
+ gov->start(policy);
- gov_add_timers(policy, delay_for_sampling_rate(sampling_rate));
+ gov_set_update_util(policy_dbs, sampling_rate);
return 0;
}
-static int cpufreq_governor_stop(struct cpufreq_policy *policy,
- struct dbs_data *dbs_data)
+static int cpufreq_governor_stop(struct cpufreq_policy *policy)
{
- struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu);
- struct cpu_common_dbs_info *shared = cdbs->shared;
-
- /* State should be equivalent to START */
- if (!shared || !shared->policy)
- return -EBUSY;
-
- gov_cancel_work(shared);
- shared->policy = NULL;
-
+ gov_cancel_work(policy);
return 0;
}
-static int cpufreq_governor_limits(struct cpufreq_policy *policy,
- struct dbs_data *dbs_data)
+static int cpufreq_governor_limits(struct cpufreq_policy *policy)
{
- struct common_dbs_data *cdata = dbs_data->cdata;
- unsigned int cpu = policy->cpu;
- struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu);
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
- /* State should be equivalent to START */
- if (!cdbs->shared || !cdbs->shared->policy)
- return -EBUSY;
+ mutex_lock(&policy_dbs->timer_mutex);
+
+ if (policy->max < policy->cur)
+ __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+ else if (policy->min > policy->cur)
+ __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
+
+ gov_update_sample_delay(policy_dbs, 0);
- mutex_lock(&cdbs->shared->timer_mutex);
- if (policy->max < cdbs->shared->policy->cur)
- __cpufreq_driver_target(cdbs->shared->policy, policy->max,
- CPUFREQ_RELATION_H);
- else if (policy->min > cdbs->shared->policy->cur)
- __cpufreq_driver_target(cdbs->shared->policy, policy->min,
- CPUFREQ_RELATION_L);
- dbs_check_cpu(dbs_data, cpu);
- mutex_unlock(&cdbs->shared->timer_mutex);
+ mutex_unlock(&policy_dbs->timer_mutex);
return 0;
}
-int cpufreq_governor_dbs(struct cpufreq_policy *policy,
- struct common_dbs_data *cdata, unsigned int event)
+int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
{
- struct dbs_data *dbs_data;
- int ret;
-
- /* Lock governor to block concurrent initialization of governor */
- mutex_lock(&cdata->mutex);
-
- if (have_governor_per_policy())
- dbs_data = policy->governor_data;
- else
- dbs_data = cdata->gdbs_data;
-
- if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) {
- ret = -EINVAL;
- goto unlock;
- }
-
- switch (event) {
- case CPUFREQ_GOV_POLICY_INIT:
- ret = cpufreq_governor_init(policy, dbs_data, cdata);
- break;
- case CPUFREQ_GOV_POLICY_EXIT:
- ret = cpufreq_governor_exit(policy, dbs_data);
- break;
- case CPUFREQ_GOV_START:
- ret = cpufreq_governor_start(policy, dbs_data);
- break;
- case CPUFREQ_GOV_STOP:
- ret = cpufreq_governor_stop(policy, dbs_data);
- break;
- case CPUFREQ_GOV_LIMITS:
- ret = cpufreq_governor_limits(policy, dbs_data);
- break;
- default:
- ret = -EINVAL;
+ if (event == CPUFREQ_GOV_POLICY_INIT) {
+ return cpufreq_governor_init(policy);
+ } else if (policy->governor_data) {
+ switch (event) {
+ case CPUFREQ_GOV_POLICY_EXIT:
+ return cpufreq_governor_exit(policy);
+ case CPUFREQ_GOV_START:
+ return cpufreq_governor_start(policy);
+ case CPUFREQ_GOV_STOP:
+ return cpufreq_governor_stop(policy);
+ case CPUFREQ_GOV_LIMITS:
+ return cpufreq_governor_limits(policy);
+ }
}
-
-unlock:
- mutex_unlock(&cdata->mutex);
-
- return ret;
+ return -EINVAL;
}
EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 91e767a..61ff82f 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -18,6 +18,7 @@
#define _CPUFREQ_GOVERNOR_H
#include <linux/atomic.h>
+#include <linux/irq_work.h>
#include <linux/cpufreq.h>
#include <linux/kernel_stat.h>
#include <linux/module.h>
@@ -41,96 +42,68 @@
enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
/*
- * Macro for creating governors sysfs routines
- *
- * - gov_sys: One governor instance per whole system
- * - gov_pol: One governor instance per policy
+ * Abbreviations:
+ * dbs: used as a shortform for demand based switching It helps to keep variable
+ * names smaller, simpler
+ * cdbs: common dbs
+ * od_*: On-demand governor
+ * cs_*: Conservative governor
*/
-/* Create attributes */
-#define gov_sys_attr_ro(_name) \
-static struct global_attr _name##_gov_sys = \
-__ATTR(_name, 0444, show_##_name##_gov_sys, NULL)
-
-#define gov_sys_attr_rw(_name) \
-static struct global_attr _name##_gov_sys = \
-__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys)
-
-#define gov_pol_attr_ro(_name) \
-static struct freq_attr _name##_gov_pol = \
-__ATTR(_name, 0444, show_##_name##_gov_pol, NULL)
-
-#define gov_pol_attr_rw(_name) \
-static struct freq_attr _name##_gov_pol = \
-__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
+/* Governor demand based switching data (per-policy or global). */
+struct dbs_data {
+ int usage_count;
+ void *tuners;
+ unsigned int min_sampling_rate;
+ unsigned int ignore_nice_load;
+ unsigned int sampling_rate;
+ unsigned int sampling_down_factor;
+ unsigned int up_threshold;
+ unsigned int io_is_busy;
-#define gov_sys_pol_attr_rw(_name) \
- gov_sys_attr_rw(_name); \
- gov_pol_attr_rw(_name)
+ struct kobject kobj;
+ struct list_head policy_dbs_list;
+ /*
+ * Protect concurrent updates to governor tunables from sysfs,
+ * policy_dbs_list and usage_count.
+ */
+ struct mutex mutex;
+};
-#define gov_sys_pol_attr_ro(_name) \
- gov_sys_attr_ro(_name); \
- gov_pol_attr_ro(_name)
+/* Governor's specific attributes */
+struct dbs_data;
+struct governor_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct dbs_data *dbs_data, char *buf);
+ ssize_t (*store)(struct dbs_data *dbs_data, const char *buf,
+ size_t count);
+};
-/* Create show/store routines */
-#define show_one(_gov, file_name) \
-static ssize_t show_##file_name##_gov_sys \
-(struct kobject *kobj, struct attribute *attr, char *buf) \
+#define gov_show_one(_gov, file_name) \
+static ssize_t show_##file_name \
+(struct dbs_data *dbs_data, char *buf) \
{ \
- struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \
- return sprintf(buf, "%u\n", tuners->file_name); \
-} \
- \
-static ssize_t show_##file_name##_gov_pol \
-(struct cpufreq_policy *policy, char *buf) \
-{ \
- struct dbs_data *dbs_data = policy->governor_data; \
struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \
return sprintf(buf, "%u\n", tuners->file_name); \
}
-#define store_one(_gov, file_name) \
-static ssize_t store_##file_name##_gov_sys \
-(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \
-{ \
- struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \
- return store_##file_name(dbs_data, buf, count); \
-} \
- \
-static ssize_t store_##file_name##_gov_pol \
-(struct cpufreq_policy *policy, const char *buf, size_t count) \
+#define gov_show_one_common(file_name) \
+static ssize_t show_##file_name \
+(struct dbs_data *dbs_data, char *buf) \
{ \
- struct dbs_data *dbs_data = policy->governor_data; \
- return store_##file_name(dbs_data, buf, count); \
+ return sprintf(buf, "%u\n", dbs_data->file_name); \
}
-#define show_store_one(_gov, file_name) \
-show_one(_gov, file_name); \
-store_one(_gov, file_name)
+#define gov_attr_ro(_name) \
+static struct governor_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
-/* create helper routines */
-#define define_get_cpu_dbs_routines(_dbs_info) \
-static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \
-{ \
- return &per_cpu(_dbs_info, cpu).cdbs; \
-} \
- \
-static void *get_cpu_dbs_info_s(int cpu) \
-{ \
- return &per_cpu(_dbs_info, cpu); \
-}
-
-/*
- * Abbreviations:
- * dbs: used as a shortform for demand based switching It helps to keep variable
- * names smaller, simpler
- * cdbs: common dbs
- * od_*: On-demand governor
- * cs_*: Conservative governor
- */
+#define gov_attr_rw(_name) \
+static struct governor_attr _name = \
+__ATTR(_name, 0644, show_##_name, store_##_name)
/* Common to all CPUs of a policy */
-struct cpu_common_dbs_info {
+struct policy_dbs_info {
struct cpufreq_policy *policy;
/*
* Per policy mutex that serializes load evaluation from limit-change
@@ -138,11 +111,27 @@ struct cpu_common_dbs_info {
*/
struct mutex timer_mutex;
- ktime_t time_stamp;
- atomic_t skip_work;
+ u64 last_sample_time;
+ s64 sample_delay_ns;
+ atomic_t work_count;
+ struct irq_work irq_work;
struct work_struct work;
+ /* dbs_data may be shared between multiple policy objects */
+ struct dbs_data *dbs_data;
+ struct list_head list;
+ /* Multiplier for increasing sample delay temporarily. */
+ unsigned int rate_mult;
+ /* Status indicators */
+ bool is_shared; /* This object is used by multiple CPUs */
+ bool work_in_progress; /* Work is being queued up or in progress */
};
+static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs,
+ unsigned int delay_us)
+{
+ policy_dbs->sample_delay_ns = delay_us * NSEC_PER_USEC;
+}
+
/* Per cpu structures */
struct cpu_dbs_info {
u64 prev_cpu_idle;
@@ -155,54 +144,14 @@ struct cpu_dbs_info {
* wake-up from idle.
*/
unsigned int prev_load;
- struct timer_list timer;
- struct cpu_common_dbs_info *shared;
-};
-
-struct od_cpu_dbs_info_s {
- struct cpu_dbs_info cdbs;
- struct cpufreq_frequency_table *freq_table;
- unsigned int freq_lo;
- unsigned int freq_lo_jiffies;
- unsigned int freq_hi_jiffies;
- unsigned int rate_mult;
- unsigned int sample_type:1;
-};
-
-struct cs_cpu_dbs_info_s {
- struct cpu_dbs_info cdbs;
- unsigned int down_skip;
- unsigned int requested_freq;
-};
-
-/* Per policy Governors sysfs tunables */
-struct od_dbs_tuners {
- unsigned int ignore_nice_load;
- unsigned int sampling_rate;
- unsigned int sampling_down_factor;
- unsigned int up_threshold;
- unsigned int powersave_bias;
- unsigned int io_is_busy;
-};
-
-struct cs_dbs_tuners {
- unsigned int ignore_nice_load;
- unsigned int sampling_rate;
- unsigned int sampling_down_factor;
- unsigned int up_threshold;
- unsigned int down_threshold;
- unsigned int freq_step;
+ struct update_util_data update_util;
+ struct policy_dbs_info *policy_dbs;
};
/* Common Governor data across policies */
-struct dbs_data;
-struct common_dbs_data {
- /* Common across governors */
- #define GOV_ONDEMAND 0
- #define GOV_CONSERVATIVE 1
- int governor;
- struct attribute_group *attr_group_gov_sys; /* one governor - system */
- struct attribute_group *attr_group_gov_pol; /* one governor - policy */
+struct dbs_governor {
+ struct cpufreq_governor gov;
+ struct kobj_type kobj_type;
/*
* Common data for platforms that don't set
@@ -210,74 +159,32 @@ struct common_dbs_data {
*/
struct dbs_data *gdbs_data;
- struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu);
- void *(*get_cpu_dbs_info_s)(int cpu);
- unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy,
- bool modify_all);
- void (*gov_check_cpu)(int cpu, unsigned int load);
+ unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy);
+ struct policy_dbs_info *(*alloc)(void);
+ void (*free)(struct policy_dbs_info *policy_dbs);
int (*init)(struct dbs_data *dbs_data, bool notify);
void (*exit)(struct dbs_data *dbs_data, bool notify);
-
- /* Governor specific ops, see below */
- void *gov_ops;
-
- /*
- * Protects governor's data (struct dbs_data and struct common_dbs_data)
- */
- struct mutex mutex;
+ void (*start)(struct cpufreq_policy *policy);
};
-/* Governor Per policy data */
-struct dbs_data {
- struct common_dbs_data *cdata;
- unsigned int min_sampling_rate;
- int usage_count;
- void *tuners;
-};
+static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy)
+{
+ return container_of(policy->governor, struct dbs_governor, gov);
+}
-/* Governor specific ops, will be passed to dbs_data->gov_ops */
+/* Governor specific operations */
struct od_ops {
- void (*powersave_bias_init_cpu)(int cpu);
unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy,
unsigned int freq_next, unsigned int relation);
- void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq);
};
-static inline int delay_for_sampling_rate(unsigned int sampling_rate)
-{
- int delay = usecs_to_jiffies(sampling_rate);
-
- /* We want all CPUs to do sampling nearly on same jiffy */
- if (num_online_cpus() > 1)
- delay -= jiffies % delay;
-
- return delay;
-}
-
-#define declare_show_sampling_rate_min(_gov) \
-static ssize_t show_sampling_rate_min_gov_sys \
-(struct kobject *kobj, struct attribute *attr, char *buf) \
-{ \
- struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \
- return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \
-} \
- \
-static ssize_t show_sampling_rate_min_gov_pol \
-(struct cpufreq_policy *policy, char *buf) \
-{ \
- struct dbs_data *dbs_data = policy->governor_data; \
- return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \
-}
-
-extern struct mutex cpufreq_governor_lock;
-
-void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay);
-void gov_cancel_work(struct cpu_common_dbs_info *shared);
-void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
-int cpufreq_governor_dbs(struct cpufreq_policy *policy,
- struct common_dbs_data *cdata, unsigned int event);
+unsigned int dbs_update(struct cpufreq_policy *policy);
+int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
void od_register_powersave_bias_handler(unsigned int (*f)
(struct cpufreq_policy *, unsigned int, unsigned int),
unsigned int powersave_bias);
void od_unregister_powersave_bias_handler(void);
+ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
+ size_t count);
+void gov_update_cpu_data(struct dbs_data *dbs_data);
#endif /* _CPUFREQ_GOVERNOR_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index eae5107..acd8027 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -16,7 +16,8 @@
#include <linux/percpu-defs.h>
#include <linux/slab.h>
#include <linux/tick.h>
-#include "cpufreq_governor.h"
+
+#include "cpufreq_ondemand.h"
/* On-demand governor macros */
#define DEF_FREQUENCY_UP_THRESHOLD (80)
@@ -27,24 +28,10 @@
#define MIN_FREQUENCY_UP_THRESHOLD (11)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
-static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
-
static struct od_ops od_ops;
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
-static struct cpufreq_governor cpufreq_gov_ondemand;
-#endif
-
static unsigned int default_powersave_bias;
-static void ondemand_powersave_bias_init_cpu(int cpu)
-{
- struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
-
- dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
- dbs_info->freq_lo = 0;
-}
-
/*
* Not all CPUs want IO time to be accounted as busy; this depends on how
* efficient idling at a higher frequency/voltage is.
@@ -70,8 +57,8 @@ static int should_io_be_busy(void)
/*
* Find right freq to be set now with powersave_bias on.
- * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
- * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
+ * Returns the freq_hi to be used right now and will set freq_hi_delay_us,
+ * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs.
*/
static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
unsigned int freq_next, unsigned int relation)
@@ -79,15 +66,15 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
unsigned int freq_req, freq_reduc, freq_avg;
unsigned int freq_hi, freq_lo;
unsigned int index = 0;
- unsigned int jiffies_total, jiffies_hi, jiffies_lo;
- struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
- policy->cpu);
- struct dbs_data *dbs_data = policy->governor_data;
+ unsigned int delay_hi_us;
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
+ struct dbs_data *dbs_data = policy_dbs->dbs_data;
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
if (!dbs_info->freq_table) {
dbs_info->freq_lo = 0;
- dbs_info->freq_lo_jiffies = 0;
+ dbs_info->freq_lo_delay_us = 0;
return freq_next;
}
@@ -110,31 +97,30 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
/* Find out how long we have to be in hi and lo freqs */
if (freq_hi == freq_lo) {
dbs_info->freq_lo = 0;
- dbs_info->freq_lo_jiffies = 0;
+ dbs_info->freq_lo_delay_us = 0;
return freq_lo;
}
- jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate);
- jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
- jiffies_hi += ((freq_hi - freq_lo) / 2);
- jiffies_hi /= (freq_hi - freq_lo);
- jiffies_lo = jiffies_total - jiffies_hi;
+ delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate;
+ delay_hi_us += (freq_hi - freq_lo) / 2;
+ delay_hi_us /= freq_hi - freq_lo;
+ dbs_info->freq_hi_delay_us = delay_hi_us;
dbs_info->freq_lo = freq_lo;
- dbs_info->freq_lo_jiffies = jiffies_lo;
- dbs_info->freq_hi_jiffies = jiffies_hi;
+ dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us;
return freq_hi;
}
-static void ondemand_powersave_bias_init(void)
+static void ondemand_powersave_bias_init(struct cpufreq_policy *policy)
{
- int i;
- for_each_online_cpu(i) {
- ondemand_powersave_bias_init_cpu(i);
- }
+ struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
+
+ dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu);
+ dbs_info->freq_lo = 0;
}
static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
{
- struct dbs_data *dbs_data = policy->governor_data;
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct dbs_data *dbs_data = policy_dbs->dbs_data;
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
if (od_tuners->powersave_bias)
@@ -152,21 +138,21 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
* (default), then we try to increase frequency. Else, we adjust the frequency
* proportional to load.
*/
-static void od_check_cpu(int cpu, unsigned int load)
+static void od_update(struct cpufreq_policy *policy)
{
- struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
- struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy;
- struct dbs_data *dbs_data = policy->governor_data;
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
+ struct dbs_data *dbs_data = policy_dbs->dbs_data;
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+ unsigned int load = dbs_update(policy);
dbs_info->freq_lo = 0;
/* Check for frequency increase */
- if (load > od_tuners->up_threshold) {
+ if (load > dbs_data->up_threshold) {
/* If switching to max speed, apply sampling_down_factor */
if (policy->cur < policy->max)
- dbs_info->rate_mult =
- od_tuners->sampling_down_factor;
+ policy_dbs->rate_mult = dbs_data->sampling_down_factor;
dbs_freq_increase(policy, policy->max);
} else {
/* Calculate the next frequency proportional to load */
@@ -177,177 +163,70 @@ static void od_check_cpu(int cpu, unsigned int load)
freq_next = min_f + load * (max_f - min_f) / 100;
/* No longer fully busy, reset rate_mult */
- dbs_info->rate_mult = 1;
+ policy_dbs->rate_mult = 1;
- if (!od_tuners->powersave_bias) {
- __cpufreq_driver_target(policy, freq_next,
- CPUFREQ_RELATION_C);
- return;
- }
+ if (od_tuners->powersave_bias)
+ freq_next = od_ops.powersave_bias_target(policy,
+ freq_next,
+ CPUFREQ_RELATION_L);
- freq_next = od_ops.powersave_bias_target(policy, freq_next,
- CPUFREQ_RELATION_L);
__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C);
}
}
-static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all)
+static unsigned int od_dbs_timer(struct cpufreq_policy *policy)
{
- struct dbs_data *dbs_data = policy->governor_data;
- unsigned int cpu = policy->cpu;
- struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
- cpu);
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
- int delay = 0, sample_type = dbs_info->sample_type;
-
- if (!modify_all)
- goto max_delay;
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct dbs_data *dbs_data = policy_dbs->dbs_data;
+ struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
+ int sample_type = dbs_info->sample_type;
/* Common NORMAL_SAMPLE setup */
dbs_info->sample_type = OD_NORMAL_SAMPLE;
- if (sample_type == OD_SUB_SAMPLE) {
- delay = dbs_info->freq_lo_jiffies;
+ /*
+ * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore
+ * it then.
+ */
+ if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) {
__cpufreq_driver_target(policy, dbs_info->freq_lo,
CPUFREQ_RELATION_H);
- } else {
- dbs_check_cpu(dbs_data, cpu);
- if (dbs_info->freq_lo) {
- /* Setup timer for SUB_SAMPLE */
- dbs_info->sample_type = OD_SUB_SAMPLE;
- delay = dbs_info->freq_hi_jiffies;
- }
+ return dbs_info->freq_lo_delay_us;
}
-max_delay:
- if (!delay)
- delay = delay_for_sampling_rate(od_tuners->sampling_rate
- * dbs_info->rate_mult);
-
- return delay;
-}
-
-/************************** sysfs interface ************************/
-static struct common_dbs_data od_dbs_cdata;
-
-/**
- * update_sampling_rate - update sampling rate effective immediately if needed.
- * @new_rate: new sampling rate
- *
- * If new rate is smaller than the old, simply updating
- * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the
- * original sampling_rate was 1 second and the requested new sampling rate is 10
- * ms because the user needs immediate reaction from ondemand governor, but not
- * sure if higher frequency will be required or not, then, the governor may
- * change the sampling rate too late; up to 1 second later. Thus, if we are
- * reducing the sampling rate, we need to make the new value effective
- * immediately.
- */
-static void update_sampling_rate(struct dbs_data *dbs_data,
- unsigned int new_rate)
-{
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
- struct cpumask cpumask;
- int cpu;
-
- od_tuners->sampling_rate = new_rate = max(new_rate,
- dbs_data->min_sampling_rate);
-
- /*
- * Lock governor so that governor start/stop can't execute in parallel.
- */
- mutex_lock(&od_dbs_cdata.mutex);
-
- cpumask_copy(&cpumask, cpu_online_mask);
-
- for_each_cpu(cpu, &cpumask) {
- struct cpufreq_policy *policy;
- struct od_cpu_dbs_info_s *dbs_info;
- struct cpu_dbs_info *cdbs;
- struct cpu_common_dbs_info *shared;
- unsigned long next_sampling, appointed_at;
-
- dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
- cdbs = &dbs_info->cdbs;
- shared = cdbs->shared;
-
- /*
- * A valid shared and shared->policy means governor hasn't
- * stopped or exited yet.
- */
- if (!shared || !shared->policy)
- continue;
-
- policy = shared->policy;
-
- /* clear all CPUs of this policy */
- cpumask_andnot(&cpumask, &cpumask, policy->cpus);
+ od_update(policy);
- /*
- * Update sampling rate for CPUs whose policy is governed by
- * dbs_data. In case of governor_per_policy, only a single
- * policy will be governed by dbs_data, otherwise there can be
- * multiple policies that are governed by the same dbs_data.
- */
- if (dbs_data != policy->governor_data)
- continue;
-
- /*
- * Checking this for any CPU should be fine, timers for all of
- * them are scheduled together.
- */
- next_sampling = jiffies + usecs_to_jiffies(new_rate);
- appointed_at = dbs_info->cdbs.timer.expires;
-
- if (time_before(next_sampling, appointed_at)) {
- gov_cancel_work(shared);
- gov_add_timers(policy, usecs_to_jiffies(new_rate));
-
- }
+ if (dbs_info->freq_lo) {
+ /* Setup timer for SUB_SAMPLE */
+ dbs_info->sample_type = OD_SUB_SAMPLE;
+ return dbs_info->freq_hi_delay_us;
}
- mutex_unlock(&od_dbs_cdata.mutex);
+ return dbs_data->sampling_rate * policy_dbs->rate_mult;
}
-static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
- size_t count)
-{
- unsigned int input;
- int ret;
- ret = sscanf(buf, "%u", &input);
- if (ret != 1)
- return -EINVAL;
-
- update_sampling_rate(dbs_data, input);
- return count;
-}
+/************************** sysfs interface ************************/
+static struct dbs_governor od_dbs_gov;
static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf,
size_t count)
{
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
unsigned int input;
int ret;
- unsigned int j;
ret = sscanf(buf, "%u", &input);
if (ret != 1)
return -EINVAL;
- od_tuners->io_is_busy = !!input;
+ dbs_data->io_is_busy = !!input;
/* we need to re-evaluate prev_cpu_idle */
- for_each_online_cpu(j) {
- struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
- j);
- dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
- &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy);
- }
+ gov_update_cpu_data(dbs_data);
+
return count;
}
static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
size_t count)
{
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
@@ -357,40 +236,43 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
return -EINVAL;
}
- od_tuners->up_threshold = input;
+ dbs_data->up_threshold = input;
return count;
}
static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
const char *buf, size_t count)
{
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
- unsigned int input, j;
+ struct policy_dbs_info *policy_dbs;
+ unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
return -EINVAL;
- od_tuners->sampling_down_factor = input;
+
+ dbs_data->sampling_down_factor = input;
/* Reset down sampling multiplier in case it was active */
- for_each_online_cpu(j) {
- struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
- j);
- dbs_info->rate_mult = 1;
+ list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+ /*
+ * Doing this without locking might lead to using different
+ * rate_mult values in od_update() and od_dbs_timer().
+ */
+ mutex_lock(&policy_dbs->timer_mutex);
+ policy_dbs->rate_mult = 1;
+ mutex_unlock(&policy_dbs->timer_mutex);
}
+
return count;
}
static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
const char *buf, size_t count)
{
- struct od_dbs_tuners *od_tuners = dbs_data->tuners;
unsigned int input;
int ret;
- unsigned int j;
-
ret = sscanf(buf, "%u", &input);
if (ret != 1)
return -EINVAL;
@@ -398,22 +280,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
if (input > 1)
input = 1;
- if (input == od_tuners->ignore_nice_load) { /* nothing to do */
+ if (input == dbs_data->ignore_nice_load) { /* nothing to do */
return count;
}
- od_tuners->ignore_nice_load = input;
+ dbs_data->ignore_nice_load = input;
/* we need to re-evaluate prev_cpu_idle */
- for_each_online_cpu(j) {
- struct od_cpu_dbs_info_s *dbs_info;
- dbs_info = &per_cpu(od_cpu_dbs_info, j);
- dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
- &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy);
- if (od_tuners->ignore_nice_load)
- dbs_info->cdbs.prev_cpu_nice =
- kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+ gov_update_cpu_data(dbs_data);
- }
return count;
}
@@ -421,6 +295,7 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf,
size_t count)
{
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+ struct policy_dbs_info *policy_dbs;
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
@@ -432,59 +307,54 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf,
input = 1000;
od_tuners->powersave_bias = input;
- ondemand_powersave_bias_init();
+
+ list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list)
+ ondemand_powersave_bias_init(policy_dbs->policy);
+
return count;
}
-show_store_one(od, sampling_rate);
-show_store_one(od, io_is_busy);
-show_store_one(od, up_threshold);
-show_store_one(od, sampling_down_factor);
-show_store_one(od, ignore_nice_load);
-show_store_one(od, powersave_bias);
-declare_show_sampling_rate_min(od);
-
-gov_sys_pol_attr_rw(sampling_rate);
-gov_sys_pol_attr_rw(io_is_busy);
-gov_sys_pol_attr_rw(up_threshold);
-gov_sys_pol_attr_rw(sampling_down_factor);
-gov_sys_pol_attr_rw(ignore_nice_load);
-gov_sys_pol_attr_rw(powersave_bias);
-gov_sys_pol_attr_ro(sampling_rate_min);
-
-static struct attribute *dbs_attributes_gov_sys[] = {
- &sampling_rate_min_gov_sys.attr,
- &sampling_rate_gov_sys.attr,
- &up_threshold_gov_sys.attr,
- &sampling_down_factor_gov_sys.attr,
- &ignore_nice_load_gov_sys.attr,
- &powersave_bias_gov_sys.attr,
- &io_is_busy_gov_sys.attr,
+gov_show_one_common(sampling_rate);
+gov_show_one_common(up_threshold);
+gov_show_one_common(sampling_down_factor);
+gov_show_one_common(ignore_nice_load);
+gov_show_one_common(min_sampling_rate);
+gov_show_one_common(io_is_busy);
+gov_show_one(od, powersave_bias);
+
+gov_attr_rw(sampling_rate);
+gov_attr_rw(io_is_busy);
+gov_attr_rw(up_threshold);
+gov_attr_rw(sampling_down_factor);
+gov_attr_rw(ignore_nice_load);
+gov_attr_rw(powersave_bias);
+gov_attr_ro(min_sampling_rate);
+
+static struct attribute *od_attributes[] = {
+ &min_sampling_rate.attr,
+ &sampling_rate.attr,
+ &up_threshold.attr,
+ &sampling_down_factor.attr,
+ &ignore_nice_load.attr,
+ &powersave_bias.attr,
+ &io_is_busy.attr,
NULL
};
-static struct attribute_group od_attr_group_gov_sys = {
- .attrs = dbs_attributes_gov_sys,
- .name = "ondemand",
-};
+/************************** sysfs end ************************/
-static struct attribute *dbs_attributes_gov_pol[] = {
- &sampling_rate_min_gov_pol.attr,
- &sampling_rate_gov_pol.attr,
- &up_threshold_gov_pol.attr,
- &sampling_down_factor_gov_pol.attr,
- &ignore_nice_load_gov_pol.attr,
- &powersave_bias_gov_pol.attr,
- &io_is_busy_gov_pol.attr,
- NULL
-};
+static struct policy_dbs_info *od_alloc(void)
+{
+ struct od_policy_dbs_info *dbs_info;
-static struct attribute_group od_attr_group_gov_pol = {
- .attrs = dbs_attributes_gov_pol,
- .name = "ondemand",
-};
+ dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL);
+ return dbs_info ? &dbs_info->policy_dbs : NULL;
+}
-/************************** sysfs end ************************/
+static void od_free(struct policy_dbs_info *policy_dbs)
+{
+ kfree(to_dbs_info(policy_dbs));
+}
static int od_init(struct dbs_data *dbs_data, bool notify)
{
@@ -503,7 +373,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify)
put_cpu();
if (idle_time != -1ULL) {
/* Idle micro accounting is supported. Use finer thresholds */
- tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
+ dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
/*
* In nohz/micro accounting case we set the minimum frequency
* not depending on HZ, but fixed (very low). The deferred
@@ -511,17 +381,17 @@ static int od_init(struct dbs_data *dbs_data, bool notify)
*/
dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
} else {
- tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
+ dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
/* For correct statistics, we need 10 ticks for each measure */
dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
jiffies_to_usecs(10);
}
- tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
- tuners->ignore_nice_load = 0;
+ dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
+ dbs_data->ignore_nice_load = 0;
tuners->powersave_bias = default_powersave_bias;
- tuners->io_is_busy = should_io_be_busy();
+ dbs_data->io_is_busy = should_io_be_busy();
dbs_data->tuners = tuners;
return 0;
@@ -532,33 +402,38 @@ static void od_exit(struct dbs_data *dbs_data, bool notify)
kfree(dbs_data->tuners);
}
-define_get_cpu_dbs_routines(od_cpu_dbs_info);
+static void od_start(struct cpufreq_policy *policy)
+{
+ struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
+
+ dbs_info->sample_type = OD_NORMAL_SAMPLE;
+ ondemand_powersave_bias_init(policy);
+}
static struct od_ops od_ops = {
- .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu,
.powersave_bias_target = generic_powersave_bias_target,
- .freq_increase = dbs_freq_increase,
};
-static struct common_dbs_data od_dbs_cdata = {
- .governor = GOV_ONDEMAND,
- .attr_group_gov_sys = &od_attr_group_gov_sys,
- .attr_group_gov_pol = &od_attr_group_gov_pol,
- .get_cpu_cdbs = get_cpu_cdbs,
- .get_cpu_dbs_info_s = get_cpu_dbs_info_s,
+static struct dbs_governor od_dbs_gov = {
+ .gov = {
+ .name = "ondemand",
+ .governor = cpufreq_governor_dbs,
+ .max_transition_latency = TRANSITION_LATENCY_LIMIT,
+ .owner = THIS_MODULE,
+ },
+ .kobj_type = { .default_attrs = od_attributes },
.gov_dbs_timer = od_dbs_timer,
- .gov_check_cpu = od_check_cpu,
- .gov_ops = &od_ops,
+ .alloc = od_alloc,
+ .free = od_free,
.init = od_init,
.exit = od_exit,
- .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex),
+ .start = od_start,
};
+#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov)
+
static void od_set_powersave_bias(unsigned int powersave_bias)
{
- struct cpufreq_policy *policy;
- struct dbs_data *dbs_data;
- struct od_dbs_tuners *od_tuners;
unsigned int cpu;
cpumask_t done;
@@ -567,22 +442,25 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
get_online_cpus();
for_each_online_cpu(cpu) {
- struct cpu_common_dbs_info *shared;
+ struct cpufreq_policy *policy;
+ struct policy_dbs_info *policy_dbs;
+ struct dbs_data *dbs_data;
+ struct od_dbs_tuners *od_tuners;
if (cpumask_test_cpu(cpu, &done))
continue;
- shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared;
- if (!shared)
+ policy = cpufreq_cpu_get_raw(cpu);
+ if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND)
continue;
- policy = shared->policy;
- cpumask_or(&done, &done, policy->cpus);
-
- if (policy->governor != &cpufreq_gov_ondemand)
+ policy_dbs = policy->governor_data;
+ if (!policy_dbs)
continue;
- dbs_data = policy->governor_data;
+ cpumask_or(&done, &done, policy->cpus);
+
+ dbs_data = policy_dbs->dbs_data;
od_tuners = dbs_data->tuners;
od_tuners->powersave_bias = default_powersave_bias;
}
@@ -605,30 +483,14 @@ void od_unregister_powersave_bias_handler(void)
}
EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler);
-static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
- unsigned int event)
-{
- return cpufreq_governor_dbs(policy, &od_dbs_cdata, event);
-}
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
-static
-#endif
-struct cpufreq_governor cpufreq_gov_ondemand = {
- .name = "ondemand",
- .governor = od_cpufreq_governor_dbs,
- .max_transition_latency = TRANSITION_LATENCY_LIMIT,
- .owner = THIS_MODULE,
-};
-
static int __init cpufreq_gov_dbs_init(void)
{
- return cpufreq_register_governor(&cpufreq_gov_ondemand);
+ return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND);
}
static void __exit cpufreq_gov_dbs_exit(void)
{
- cpufreq_unregister_governor(&cpufreq_gov_ondemand);
+ cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND);
}
MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
@@ -638,6 +500,11 @@ MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+ return CPU_FREQ_GOV_ONDEMAND;
+}
+
fs_initcall(cpufreq_gov_dbs_init);
#else
module_init(cpufreq_gov_dbs_init);
diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h
new file mode 100644
index 0000000..f0121db
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_ondemand.h
@@ -0,0 +1,30 @@
+/*
+ * Header file for CPUFreq ondemand governor and related code.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "cpufreq_governor.h"
+
+struct od_policy_dbs_info {
+ struct policy_dbs_info policy_dbs;
+ struct cpufreq_frequency_table *freq_table;
+ unsigned int freq_lo;
+ unsigned int freq_lo_delay_us;
+ unsigned int freq_hi_delay_us;
+ unsigned int sample_type:1;
+};
+
+static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs)
+{
+ return container_of(policy_dbs, struct od_policy_dbs_info, policy_dbs);
+}
+
+struct od_dbs_tuners {
+ unsigned int powersave_bias;
+};
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index cf117de..af9f4b9 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -33,10 +33,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy,
return 0;
}
-#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE
-static
-#endif
-struct cpufreq_governor cpufreq_gov_performance = {
+static struct cpufreq_governor cpufreq_gov_performance = {
.name = "performance",
.governor = cpufreq_governor_performance,
.owner = THIS_MODULE,
@@ -52,6 +49,19 @@ static void __exit cpufreq_gov_performance_exit(void)
cpufreq_unregister_governor(&cpufreq_gov_performance);
}
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+ return &cpufreq_gov_performance;
+}
+#endif
+#ifndef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE
+struct cpufreq_governor *cpufreq_fallback_governor(void)
+{
+ return &cpufreq_gov_performance;
+}
+#endif
+
MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
MODULE_DESCRIPTION("CPUfreq policy governor 'performance'");
MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index e3b874c..b8b4002 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -33,10 +33,7 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
return 0;
}
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
-static
-#endif
-struct cpufreq_governor cpufreq_gov_powersave = {
+static struct cpufreq_governor cpufreq_gov_powersave = {
.name = "powersave",
.governor = cpufreq_governor_powersave,
.owner = THIS_MODULE,
@@ -57,6 +54,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'");
MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+ return &cpufreq_gov_powersave;
+}
+
fs_initcall(cpufreq_gov_powersave_init);
#else
module_init(cpufreq_gov_powersave_init);
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 4dbf1db..4d16f45 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -89,10 +89,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
return rc;
}
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
-static
-#endif
-struct cpufreq_governor cpufreq_gov_userspace = {
+static struct cpufreq_governor cpufreq_gov_userspace = {
.name = "userspace",
.governor = cpufreq_governor_userspace,
.store_setspeed = cpufreq_set,
@@ -116,6 +113,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'");
MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+ return &cpufreq_gov_userspace;
+}
+
fs_initcall(cpufreq_gov_userspace_init);
#else
module_init(cpufreq_gov_userspace_init);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3a4b39a..cb560749 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -71,7 +71,7 @@ struct sample {
u64 mperf;
u64 tsc;
int freq;
- ktime_t time;
+ u64 time;
};
struct pstate_data {
@@ -103,13 +103,13 @@ struct _pid {
struct cpudata {
int cpu;
- struct timer_list timer;
+ struct update_util_data update_util;
struct pstate_data pstate;
struct vid_data vid;
struct _pid pid;
- ktime_t last_sample_time;
+ u64 last_sample_time;
u64 prev_aperf;
u64 prev_mperf;
u64 prev_tsc;
@@ -120,6 +120,7 @@ struct cpudata {
static struct cpudata **all_cpu_data;
struct pstate_adjust_policy {
int sample_rate_ms;
+ s64 sample_rate_ns;
int deadband;
int setpoint;
int p_gain_pct;
@@ -197,8 +198,8 @@ static struct perf_limits *limits = &powersave_limits;
static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
int deadband, int integral) {
- pid->setpoint = setpoint;
- pid->deadband = deadband;
+ pid->setpoint = int_tofp(setpoint);
+ pid->deadband = int_tofp(deadband);
pid->integral = int_tofp(integral);
pid->last_err = int_tofp(setpoint) - int_tofp(busy);
}
@@ -224,9 +225,9 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
int32_t pterm, dterm, fp_error;
int32_t integral_limit;
- fp_error = int_tofp(pid->setpoint) - busy;
+ fp_error = pid->setpoint - busy;
- if (abs(fp_error) <= int_tofp(pid->deadband))
+ if (abs(fp_error) <= pid->deadband)
return 0;
pterm = mul_fp(pid->p_gain, fp_error);
@@ -286,7 +287,7 @@ static inline void update_turbo_state(void)
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}
-static void intel_pstate_hwp_set(void)
+static void intel_pstate_hwp_set(const struct cpumask *cpumask)
{
int min, hw_min, max, hw_max, cpu, range, adj_range;
u64 value, cap;
@@ -296,9 +297,7 @@ static void intel_pstate_hwp_set(void)
hw_max = HWP_HIGHEST_PERF(cap);
range = hw_max - hw_min;
- get_online_cpus();
-
- for_each_online_cpu(cpu) {
+ for_each_cpu(cpu, cpumask) {
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
adj_range = limits->min_perf_pct * range / 100;
min = hw_min + adj_range;
@@ -317,7 +316,12 @@ static void intel_pstate_hwp_set(void)
value |= HWP_MAX_PERF(max);
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
+}
+static void intel_pstate_hwp_set_online_cpus(void)
+{
+ get_online_cpus();
+ intel_pstate_hwp_set(cpu_online_mask);
put_online_cpus();
}
@@ -439,7 +443,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
limits->no_turbo = clamp_t(int, input, 0, 1);
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set_online_cpus();
return count;
}
@@ -465,7 +469,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
int_tofp(100));
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set_online_cpus();
return count;
}
@@ -490,7 +494,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
int_tofp(100));
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set_online_cpus();
return count;
}
@@ -531,6 +535,9 @@ static void __init intel_pstate_sysfs_expose_params(void)
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
+ /* First disable HWP notification interrupt as we don't process them */
+ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
}
@@ -712,7 +719,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
if (limits->no_turbo && !limits->turbo_disabled)
val |= (u64)1 << 32;
- wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
+ wrmsrl(MSR_IA32_PERF_CTL, val);
}
static int knl_get_turbo_pstate(void)
@@ -824,11 +831,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
* policy, or by cpu specific default values determined through
* experimentation.
*/
- max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
+ max_perf_adj = fp_toint(max_perf * limits->max_perf);
*max = clamp_t(int, max_perf_adj,
cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
- min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
+ min_perf = fp_toint(max_perf * limits->min_perf);
*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
}
@@ -874,16 +881,10 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
core_pct = int_tofp(sample->aperf) * int_tofp(100);
core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
- sample->freq = fp_toint(
- mul_fp(int_tofp(
- cpu->pstate.max_pstate_physical *
- cpu->pstate.scaling / 100),
- core_pct));
-
sample->core_pct_busy = (int32_t)core_pct;
}
-static inline void intel_pstate_sample(struct cpudata *cpu)
+static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
{
u64 aperf, mperf;
unsigned long flags;
@@ -893,14 +894,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
tsc = rdtsc();
- if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) {
+ if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
local_irq_restore(flags);
- return;
+ return false;
}
local_irq_restore(flags);
cpu->last_sample_time = cpu->sample.time;
- cpu->sample.time = ktime_get();
+ cpu->sample.time = time;
cpu->sample.aperf = aperf;
cpu->sample.mperf = mperf;
cpu->sample.tsc = tsc;
@@ -908,27 +909,16 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
cpu->sample.mperf -= cpu->prev_mperf;
cpu->sample.tsc -= cpu->prev_tsc;
- intel_pstate_calc_busy(cpu);
-
cpu->prev_aperf = aperf;
cpu->prev_mperf = mperf;
cpu->prev_tsc = tsc;
+ return true;
}
-static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
-{
- int delay;
-
- delay = msecs_to_jiffies(50);
- mod_timer_pinned(&cpu->timer, jiffies + delay);
-}
-
-static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
+static inline int32_t get_avg_frequency(struct cpudata *cpu)
{
- int delay;
-
- delay = msecs_to_jiffies(pid_params.sample_rate_ms);
- mod_timer_pinned(&cpu->timer, jiffies + delay);
+ return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf *
+ cpu->pstate.scaling, cpu->sample.mperf);
}
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
@@ -954,7 +944,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
mperf = cpu->sample.mperf + delta_iowait_mperf;
cpu->prev_cummulative_iowait = cummulative_iowait;
-
/*
* The load can be estimated as the ratio of the mperf counter
* running at a constant frequency during active periods
@@ -970,8 +959,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
{
int32_t core_busy, max_pstate, current_pstate, sample_ratio;
- s64 duration_us;
- u32 sample_time;
+ u64 duration_ns;
+
+ intel_pstate_calc_busy(cpu);
/*
* core_busy is the ratio of actual performance to max
@@ -990,18 +980,16 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
/*
- * Since we have a deferred timer, it will not fire unless
- * we are in C0. So, determine if the actual elapsed time
- * is significantly greater (3x) than our sample interval. If it
- * is, then we were idle for a long enough period of time
- * to adjust our busyness.
+ * Since our utilization update callback will not run unless we are
+ * in C0, check if the actual elapsed time is significantly greater (3x)
+ * than our sample interval. If it is, then we were idle for a long
+ * enough period of time to adjust our busyness.
*/
- sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC;
- duration_us = ktime_us_delta(cpu->sample.time,
- cpu->last_sample_time);
- if (duration_us > sample_time * 3) {
- sample_ratio = div_fp(int_tofp(sample_time),
- int_tofp(duration_us));
+ duration_ns = cpu->sample.time - cpu->last_sample_time;
+ if ((s64)duration_ns > pid_params.sample_rate_ns * 3
+ && cpu->last_sample_time > 0) {
+ sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
+ int_tofp(duration_ns));
core_busy = mul_fp(core_busy, sample_ratio);
}
@@ -1028,26 +1016,21 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
sample->mperf,
sample->aperf,
sample->tsc,
- sample->freq);
+ get_avg_frequency(cpu));
}
-static void intel_hwp_timer_func(unsigned long __data)
+static void intel_pstate_update_util(struct update_util_data *data, u64 time,
+ unsigned long util, unsigned long max)
{
- struct cpudata *cpu = (struct cpudata *) __data;
+ struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+ u64 delta_ns = time - cpu->sample.time;
- intel_pstate_sample(cpu);
- intel_hwp_set_sample_time(cpu);
-}
+ if ((s64)delta_ns >= pid_params.sample_rate_ns) {
+ bool sample_taken = intel_pstate_sample(cpu, time);
-static void intel_pstate_timer_func(unsigned long __data)
-{
- struct cpudata *cpu = (struct cpudata *) __data;
-
- intel_pstate_sample(cpu);
-
- intel_pstate_adjust_busy_pstate(cpu);
-
- intel_pstate_set_sample_time(cpu);
+ if (sample_taken && !hwp_active)
+ intel_pstate_adjust_busy_pstate(cpu);
+ }
}
#define ICPU(model, policy) \
@@ -1095,24 +1078,19 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
cpu->cpu = cpunum;
- if (hwp_active)
+ if (hwp_active) {
intel_pstate_hwp_enable(cpu);
+ pid_params.sample_rate_ms = 50;
+ pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
+ }
intel_pstate_get_cpu_pstates(cpu);
- init_timer_deferrable(&cpu->timer);
- cpu->timer.data = (unsigned long)cpu;
- cpu->timer.expires = jiffies + HZ/100;
-
- if (!hwp_active)
- cpu->timer.function = intel_pstate_timer_func;
- else
- cpu->timer.function = intel_hwp_timer_func;
-
intel_pstate_busy_pid_reset(cpu);
- intel_pstate_sample(cpu);
+ intel_pstate_sample(cpu, 0);
- add_timer_on(&cpu->timer, cpunum);
+ cpu->update_util.func = intel_pstate_update_util;
+ cpufreq_set_update_util_data(cpunum, &cpu->update_util);
pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
@@ -1128,7 +1106,7 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
if (!cpu)
return 0;
sample = &cpu->sample;
- return sample->freq;
+ return get_avg_frequency(cpu);
}
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
@@ -1141,7 +1119,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
pr_debug("intel_pstate: set performance\n");
limits = &performance_limits;
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set(policy->cpus);
return 0;
}
@@ -1173,7 +1151,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
int_tofp(100));
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set(policy->cpus);
return 0;
}
@@ -1196,7 +1174,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
- del_timer_sync(&all_cpu_data[cpu_num]->timer);
+ cpufreq_set_update_util_data(cpu_num, NULL);
+ synchronize_sched();
+
if (hwp_active)
return;
@@ -1260,6 +1240,7 @@ static int intel_pstate_msrs_not_valid(void)
static void copy_pid_params(struct pstate_adjust_policy *policy)
{
pid_params.sample_rate_ms = policy->sample_rate_ms;
+ pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
pid_params.p_gain_pct = policy->p_gain_pct;
pid_params.i_gain_pct = policy->i_gain_pct;
pid_params.d_gain_pct = policy->d_gain_pct;
@@ -1397,6 +1378,11 @@ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
#endif /* CONFIG_ACPI */
+static const struct x86_cpu_id hwp_support_ids[] __initconst = {
+ { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP },
+ {}
+};
+
static int __init intel_pstate_init(void)
{
int cpu, rc = 0;
@@ -1406,17 +1392,16 @@ static int __init intel_pstate_init(void)
if (no_load)
return -ENODEV;
+ if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
+ copy_cpu_funcs(&core_params.funcs);
+ hwp_active++;
+ goto hwp_cpu_matched;
+ }
+
id = x86_match_cpu(intel_pstate_cpu_ids);
if (!id)
return -ENODEV;
- /*
- * The Intel pstate driver will be ignored if the platform
- * firmware has its own power management modes.
- */
- if (intel_pstate_platform_pwr_mgmt_exists())
- return -ENODEV;
-
cpu_def = (struct cpu_defaults *)id->driver_data;
copy_pid_params(&cpu_def->pid_policy);
@@ -1425,17 +1410,20 @@ static int __init intel_pstate_init(void)
if (intel_pstate_msrs_not_valid())
return -ENODEV;
+hwp_cpu_matched:
+ /*
+ * The Intel pstate driver will be ignored if the platform
+ * firmware has its own power management modes.
+ */
+ if (intel_pstate_platform_pwr_mgmt_exists())
+ return -ENODEV;
+
pr_info("Intel P-state driver initializing.\n");
all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
if (!all_cpu_data)
return -ENOMEM;
- if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) {
- pr_info("intel_pstate: HWP enabled\n");
- hwp_active++;
- }
-
if (!hwp_active && hwp_only)
goto out;
@@ -1446,12 +1434,16 @@ static int __init intel_pstate_init(void)
intel_pstate_debug_expose_params();
intel_pstate_sysfs_expose_params();
+ if (hwp_active)
+ pr_info("intel_pstate: HWP enabled\n");
+
return rc;
out:
get_online_cpus();
for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) {
- del_timer_sync(&all_cpu_data[cpu]->timer);
+ cpufreq_set_update_util_data(cpu, NULL);
+ synchronize_sched();
kfree(all_cpu_data[cpu]);
}
}
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 547890f..50bf120 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -28,6 +28,8 @@
#include <linux/of.h>
#include <linux/reboot.h>
#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <trace/events/power.h>
#include <asm/cputhreads.h>
#include <asm/firmware.h>
@@ -42,13 +44,24 @@
static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
static bool rebooting, throttled, occ_reset;
+static unsigned int *core_to_chip_map;
+
+static const char * const throttle_reason[] = {
+ "No throttling",
+ "Power Cap",
+ "Processor Over Temperature",
+ "Power Supply Failure",
+ "Over Current",
+ "OCC Reset"
+};
static struct chip {
unsigned int id;
bool throttled;
+ bool restore;
+ u8 throttle_reason;
cpumask_t mask;
struct work_struct throttle;
- bool restore;
} *chips;
static int nr_chips;
@@ -312,13 +325,14 @@ static inline unsigned int get_nominal_index(void)
static void powernv_cpufreq_throttle_check(void *data)
{
unsigned int cpu = smp_processor_id();
+ unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)];
unsigned long pmsr;
int pmsr_pmax, i;
pmsr = get_pmspr(SPRN_PMSR);
for (i = 0; i < nr_chips; i++)
- if (chips[i].id == cpu_to_chip_id(cpu))
+ if (chips[i].id == chip_id)
break;
/* Check for Pmax Capping */
@@ -328,17 +342,17 @@ static void powernv_cpufreq_throttle_check(void *data)
goto next;
chips[i].throttled = true;
if (pmsr_pmax < powernv_pstate_info.nominal)
- pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
- cpu, chips[i].id, pmsr_pmax,
- powernv_pstate_info.nominal);
- else
- pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n",
- cpu, chips[i].id, pmsr_pmax,
- powernv_pstate_info.max);
+ pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
+ cpu, chips[i].id, pmsr_pmax,
+ powernv_pstate_info.nominal);
+ trace_powernv_throttle(chips[i].id,
+ throttle_reason[chips[i].throttle_reason],
+ pmsr_pmax);
} else if (chips[i].throttled) {
chips[i].throttled = false;
- pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu,
- chips[i].id, pmsr_pmax);
+ trace_powernv_throttle(chips[i].id,
+ throttle_reason[chips[i].throttle_reason],
+ pmsr_pmax);
}
/* Check if Psafe_mode_active is set in PMSR. */
@@ -356,7 +370,7 @@ next:
if (throttled) {
pr_info("PMSR = %16lx\n", pmsr);
- pr_crit("CPU Frequency could be throttled\n");
+ pr_warn("CPU Frequency could be throttled\n");
}
}
@@ -423,18 +437,19 @@ void powernv_cpufreq_work_fn(struct work_struct *work)
{
struct chip *chip = container_of(work, struct chip, throttle);
unsigned int cpu;
- cpumask_var_t mask;
+ cpumask_t mask;
- smp_call_function_any(&chip->mask,
+ get_online_cpus();
+ cpumask_and(&mask, &chip->mask, cpu_online_mask);
+ smp_call_function_any(&mask,
powernv_cpufreq_throttle_check, NULL, 0);
if (!chip->restore)
- return;
+ goto out;
chip->restore = false;
- cpumask_copy(mask, &chip->mask);
- for_each_cpu_and(cpu, mask, cpu_online_mask) {
- int index, tcpu;
+ for_each_cpu(cpu, &mask) {
+ int index;
struct cpufreq_policy policy;
cpufreq_get_policy(&policy, cpu);
@@ -442,20 +457,12 @@ void powernv_cpufreq_work_fn(struct work_struct *work)
policy.cur,
CPUFREQ_RELATION_C, &index);
powernv_cpufreq_target_index(&policy, index);
- for_each_cpu(tcpu, policy.cpus)
- cpumask_clear_cpu(tcpu, mask);
+ cpumask_andnot(&mask, &mask, policy.cpus);
}
+out:
+ put_online_cpus();
}
-static char throttle_reason[][30] = {
- "No throttling",
- "Power Cap",
- "Processor Over Temperature",
- "Power Supply Failure",
- "Over Current",
- "OCC Reset"
- };
-
static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
unsigned long msg_type, void *_msg)
{
@@ -481,7 +488,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
*/
if (!throttled) {
throttled = true;
- pr_crit("CPU frequency is throttled for duration\n");
+ pr_warn("CPU frequency is throttled for duration\n");
}
break;
@@ -505,23 +512,18 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
return 0;
}
- if (omsg.throttle_status &&
+ for (i = 0; i < nr_chips; i++)
+ if (chips[i].id == omsg.chip)
+ break;
+
+ if (omsg.throttle_status >= 0 &&
omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS)
- pr_info("OCC: Chip %u Pmax reduced due to %s\n",
- (unsigned int)omsg.chip,
- throttle_reason[omsg.throttle_status]);
- else if (!omsg.throttle_status)
- pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip,
- throttle_reason[omsg.throttle_status]);
- else
- return 0;
+ chips[i].throttle_reason = omsg.throttle_status;
- for (i = 0; i < nr_chips; i++)
- if (chips[i].id == omsg.chip) {
- if (!omsg.throttle_status)
- chips[i].restore = true;
- schedule_work(&chips[i].throttle);
- }
+ if (!omsg.throttle_status)
+ chips[i].restore = true;
+
+ schedule_work(&chips[i].throttle);
}
return 0;
}
@@ -556,29 +558,54 @@ static int init_chip_info(void)
unsigned int chip[256];
unsigned int cpu, i;
unsigned int prev_chip_id = UINT_MAX;
+ cpumask_t cpu_mask;
+ int ret = -ENOMEM;
+
+ core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int),
+ GFP_KERNEL);
+ if (!core_to_chip_map)
+ goto out;
- for_each_possible_cpu(cpu) {
+ cpumask_copy(&cpu_mask, cpu_possible_mask);
+ for_each_cpu(cpu, &cpu_mask) {
unsigned int id = cpu_to_chip_id(cpu);
if (prev_chip_id != id) {
prev_chip_id = id;
chip[nr_chips++] = id;
}
+ core_to_chip_map[cpu_core_index_of_thread(cpu)] = id;
+ cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu));
}
- chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL);
+ chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
if (!chips)
- return -ENOMEM;
+ goto free_chip_map;
for (i = 0; i < nr_chips; i++) {
chips[i].id = chip[i];
- chips[i].throttled = false;
cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
- chips[i].restore = false;
}
return 0;
+free_chip_map:
+ kfree(core_to_chip_map);
+out:
+ return ret;
+}
+
+static inline void clean_chip_info(void)
+{
+ kfree(chips);
+ kfree(core_to_chip_map);
+}
+
+static inline void unregister_all_notifiers(void)
+{
+ opal_message_notifier_unregister(OPAL_MSG_OCC,
+ &powernv_cpufreq_opal_nb);
+ unregister_reboot_notifier(&powernv_cpufreq_reboot_nb);
}
static int __init powernv_cpufreq_init(void)
@@ -591,28 +618,35 @@ static int __init powernv_cpufreq_init(void)
/* Discover pstates from device tree and init */
rc = init_powernv_pstates();
- if (rc) {
- pr_info("powernv-cpufreq disabled. System does not support PState control\n");
- return rc;
- }
+ if (rc)
+ goto out;
/* Populate chip info */
rc = init_chip_info();
if (rc)
- return rc;
+ goto out;
register_reboot_notifier(&powernv_cpufreq_reboot_nb);
opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb);
- return cpufreq_register_driver(&powernv_cpufreq_driver);
+
+ rc = cpufreq_register_driver(&powernv_cpufreq_driver);
+ if (!rc)
+ return 0;
+
+ pr_info("Failed to register the cpufreq driver (%d)\n", rc);
+ unregister_all_notifiers();
+ clean_chip_info();
+out:
+ pr_info("Platform driver disabled. System does not support PState control\n");
+ return rc;
}
module_init(powernv_cpufreq_init);
static void __exit powernv_cpufreq_exit(void)
{
- unregister_reboot_notifier(&powernv_cpufreq_reboot_nb);
- opal_message_notifier_unregister(OPAL_MSG_OCC,
- &powernv_cpufreq_opal_nb);
cpufreq_unregister_driver(&powernv_cpufreq_driver);
+ unregister_all_notifiers();
+ clean_chip_info();
}
module_exit(powernv_cpufreq_exit);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 0742b32..27fc733 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -199,8 +199,8 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
static void get_typical_interval(struct menu_device *data)
{
int i, divisor;
- unsigned int max, thresh;
- uint64_t avg, stddev;
+ unsigned int max, thresh, avg;
+ uint64_t sum, variance;
thresh = UINT_MAX; /* Discard outliers above this value */
@@ -208,52 +208,51 @@ again:
/* First calculate the average of past intervals */
max = 0;
- avg = 0;
+ sum = 0;
divisor = 0;
for (i = 0; i < INTERVALS; i++) {
unsigned int value = data->intervals[i];
if (value <= thresh) {
- avg += value;
+ sum += value;
divisor++;
if (value > max)
max = value;
}
}
if (divisor == INTERVALS)
- avg >>= INTERVAL_SHIFT;
+ avg = sum >> INTERVAL_SHIFT;
else
- do_div(avg, divisor);
+ avg = div_u64(sum, divisor);
- /* Then try to determine standard deviation */
- stddev = 0;
+ /* Then try to determine variance */
+ variance = 0;
for (i = 0; i < INTERVALS; i++) {
unsigned int value = data->intervals[i];
if (value <= thresh) {
- int64_t diff = value - avg;
- stddev += diff * diff;
+ int64_t diff = (int64_t)value - avg;
+ variance += diff * diff;
}
}
if (divisor == INTERVALS)
- stddev >>= INTERVAL_SHIFT;
+ variance >>= INTERVAL_SHIFT;
else
- do_div(stddev, divisor);
+ do_div(variance, divisor);
/*
- * The typical interval is obtained when standard deviation is small
- * or standard deviation is small compared to the average interval.
- *
- * int_sqrt() formal parameter type is unsigned long. When the
- * greatest difference to an outlier exceeds ~65 ms * sqrt(divisor)
- * the resulting squared standard deviation exceeds the input domain
- * of int_sqrt on platforms where unsigned long is 32 bits in size.
- * In such case reject the candidate average.
+ * The typical interval is obtained when standard deviation is
+ * small (stddev <= 20 us, variance <= 400 us^2) or standard
+ * deviation is small compared to the average interval (avg >
+ * 6*stddev, avg^2 > 36*variance). The average is smaller than
+ * UINT_MAX aka U32_MAX, so computing its square does not
+ * overflow a u64. We simply reject this candidate average if
+ * the standard deviation is greater than 715 s (which is
+ * rather unlikely).
*
* Use this result only if there is no timer to wake us up sooner.
*/
- if (likely(stddev <= ULONG_MAX)) {
- stddev = int_sqrt(stddev);
- if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
- || stddev <= 20) {
+ if (likely(variance <= U64_MAX/36)) {
+ if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3))
+ || variance <= 400) {
if (data->next_timer_us > avg)
data->predicted_us = avg;
return;
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 438f1b4..d656657 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -123,6 +123,7 @@ static const struct acpi_device_id dw_i2c_acpi_match[] = {
{ "80860F41", 0 },
{ "808622C1", 0 },
{ "AMD0010", ACCESS_INTR_MASK },
+ { "AMDI0010", ACCESS_INTR_MASK },
{ "AMDI0510", 0 },
{ "APMC0D0F", 0 },
{ }
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index 8f779a1..0ddf638 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -63,6 +63,7 @@
#include <linux/platform_device.h>
#include <linux/mailbox_controller.h>
#include <linux/mailbox_client.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include "mailbox.h"
@@ -70,6 +71,9 @@
static struct mbox_chan *pcc_mbox_channels;
+/* Array of cached virtual address for doorbell registers */
+static void __iomem **pcc_doorbell_vaddr;
+
static struct mbox_controller pcc_mbox_ctrl = {};
/**
* get_pcc_channel - Given a PCC subspace idx, get
@@ -160,6 +164,66 @@ void pcc_mbox_free_channel(struct mbox_chan *chan)
}
EXPORT_SYMBOL_GPL(pcc_mbox_free_channel);
+/*
+ * PCC can be used with perf critical drivers such as CPPC
+ * So it makes sense to locally cache the virtual address and
+ * use it to read/write to PCC registers such as doorbell register
+ *
+ * The below read_register and write_registers are used to read and
+ * write from perf critical registers such as PCC doorbell register
+ */
+static int read_register(void __iomem *vaddr, u64 *val, unsigned int bit_width)
+{
+ int ret_val = 0;
+
+ switch (bit_width) {
+ case 8:
+ *val = readb(vaddr);
+ break;
+ case 16:
+ *val = readw(vaddr);
+ break;
+ case 32:
+ *val = readl(vaddr);
+ break;
+ case 64:
+ *val = readq(vaddr);
+ break;
+ default:
+ pr_debug("Error: Cannot read register of %u bit width",
+ bit_width);
+ ret_val = -EFAULT;
+ break;
+ }
+ return ret_val;
+}
+
+static int write_register(void __iomem *vaddr, u64 val, unsigned int bit_width)
+{
+ int ret_val = 0;
+
+ switch (bit_width) {
+ case 8:
+ writeb(val, vaddr);
+ break;
+ case 16:
+ writew(val, vaddr);
+ break;
+ case 32:
+ writel(val, vaddr);
+ break;
+ case 64:
+ writeq(val, vaddr);
+ break;
+ default:
+ pr_debug("Error: Cannot write register of %u bit width",
+ bit_width);
+ ret_val = -EFAULT;
+ break;
+ }
+ return ret_val;
+}
+
/**
* pcc_send_data - Called from Mailbox Controller code. Used
* here only to ring the channel doorbell. The PCC client
@@ -175,21 +239,39 @@ EXPORT_SYMBOL_GPL(pcc_mbox_free_channel);
static int pcc_send_data(struct mbox_chan *chan, void *data)
{
struct acpi_pcct_hw_reduced *pcct_ss = chan->con_priv;
- struct acpi_generic_address doorbell;
+ struct acpi_generic_address *doorbell;
u64 doorbell_preserve;
u64 doorbell_val;
u64 doorbell_write;
+ u32 id = chan - pcc_mbox_channels;
+ int ret = 0;
+
+ if (id >= pcc_mbox_ctrl.num_chans) {
+ pr_debug("pcc_send_data: Invalid mbox_chan passed\n");
+ return -ENOENT;
+ }
- doorbell = pcct_ss->doorbell_register;
+ doorbell = &pcct_ss->doorbell_register;
doorbell_preserve = pcct_ss->preserve_mask;
doorbell_write = pcct_ss->write_mask;
/* Sync notification from OS to Platform. */
- acpi_read(&doorbell_val, &doorbell);
- acpi_write((doorbell_val & doorbell_preserve) | doorbell_write,
- &doorbell);
-
- return 0;
+ if (pcc_doorbell_vaddr[id]) {
+ ret = read_register(pcc_doorbell_vaddr[id], &doorbell_val,
+ doorbell->bit_width);
+ if (ret)
+ return ret;
+ ret = write_register(pcc_doorbell_vaddr[id],
+ (doorbell_val & doorbell_preserve) | doorbell_write,
+ doorbell->bit_width);
+ } else {
+ ret = acpi_read(&doorbell_val, doorbell);
+ if (ret)
+ return ret;
+ ret = acpi_write((doorbell_val & doorbell_preserve) | doorbell_write,
+ doorbell);
+ }
+ return ret;
}
static const struct mbox_chan_ops pcc_chan_ops = {
@@ -265,14 +347,29 @@ static int __init acpi_pcc_probe(void)
return -ENOMEM;
}
+ pcc_doorbell_vaddr = kcalloc(count, sizeof(void *), GFP_KERNEL);
+ if (!pcc_doorbell_vaddr) {
+ kfree(pcc_mbox_channels);
+ return -ENOMEM;
+ }
+
/* Point to the first PCC subspace entry */
pcct_entry = (struct acpi_subtable_header *) (
(unsigned long) pcct_tbl + sizeof(struct acpi_table_pcct));
for (i = 0; i < count; i++) {
+ struct acpi_generic_address *db_reg;
+ struct acpi_pcct_hw_reduced *pcct_ss;
pcc_mbox_channels[i].con_priv = pcct_entry;
pcct_entry = (struct acpi_subtable_header *)
((unsigned long) pcct_entry + pcct_entry->length);
+
+ /* If doorbell is in system memory cache the virt address */
+ pcct_ss = (struct acpi_pcct_hw_reduced *)pcct_entry;
+ db_reg = &pcct_ss->doorbell_register;
+ if (db_reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
+ pcc_doorbell_vaddr[i] = acpi_os_ioremap(db_reg->address,
+ db_reg->bit_width/8);
}
pcc_mbox_ctrl.num_chans = count;
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 0579649..4b717c6 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -252,6 +252,10 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
break;
+ case ACPI_RESOURCE_TYPE_SERIAL_BUS:
+ /* serial bus connections (I2C/SPI/UART) are not pnp */
+ break;
+
default:
dev_warn(&dev->dev, "unknown resource type %d in _CRS\n",
res->type);
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 6c592dc..cdfd01f0 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -133,6 +133,12 @@ struct rapl_domain_data {
unsigned long timestamp;
};
+struct msrl_action {
+ u32 msr_no;
+ u64 clear_mask;
+ u64 set_mask;
+ int err;
+};
#define DOMAIN_STATE_INACTIVE BIT(0)
#define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
@@ -149,6 +155,7 @@ struct rapl_power_limit {
static const char pl1_name[] = "long_term";
static const char pl2_name[] = "short_term";
+struct rapl_package;
struct rapl_domain {
const char *name;
enum rapl_domain_type id;
@@ -159,7 +166,7 @@ struct rapl_domain {
u64 attr_map; /* track capabilities */
unsigned int state;
unsigned int domain_energy_unit;
- int package_id;
+ struct rapl_package *rp;
};
#define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
@@ -184,6 +191,7 @@ struct rapl_package {
* notify interrupt enable status.
*/
struct list_head plist;
+ int lead_cpu; /* one active cpu per package for access */
};
struct rapl_defaults {
@@ -231,10 +239,10 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
static int rapl_write_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
unsigned long long value);
-static u64 rapl_unit_xlate(struct rapl_domain *rd, int package,
+static u64 rapl_unit_xlate(struct rapl_domain *rd,
enum unit_type type, u64 value,
int to_raw);
-static void package_power_limit_irq_save(int package_id);
+static void package_power_limit_irq_save(struct rapl_package *rp);
static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */
@@ -260,20 +268,6 @@ static struct rapl_package *find_package_by_id(int id)
return NULL;
}
-/* caller to ensure CPU hotplug lock is held */
-static int find_active_cpu_on_package(int package_id)
-{
- int i;
-
- for_each_online_cpu(i) {
- if (topology_physical_package_id(i) == package_id)
- return i;
- }
- /* all CPUs on this package are offline */
-
- return -ENODEV;
-}
-
/* caller must hold cpu hotplug lock */
static void rapl_cleanup_data(void)
{
@@ -312,25 +306,19 @@ static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev);
- *energy = rapl_unit_xlate(rd, 0, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
+ *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
return 0;
}
static int release_zone(struct powercap_zone *power_zone)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
- struct rapl_package *rp;
+ struct rapl_package *rp = rd->rp;
/* package zone is the last zone of a package, we can free
* memory here since all children has been unregistered.
*/
if (rd->id == RAPL_DOMAIN_PACKAGE) {
- rp = find_package_by_id(rd->package_id);
- if (!rp) {
- dev_warn(&power_zone->dev, "no package id %s\n",
- rd->name);
- return -ENODEV;
- }
kfree(rd);
rp->domains = NULL;
}
@@ -432,11 +420,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int id,
get_online_cpus();
rd = power_zone_to_rapl_domain(power_zone);
- rp = find_package_by_id(rd->package_id);
- if (!rp) {
- ret = -ENODEV;
- goto set_exit;
- }
+ rp = rd->rp;
if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
dev_warn(&power_zone->dev, "%s locked by BIOS, monitoring only\n",
@@ -456,7 +440,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int id,
ret = -EINVAL;
}
if (!ret)
- package_power_limit_irq_save(rd->package_id);
+ package_power_limit_irq_save(rp);
set_exit:
put_online_cpus();
return ret;
@@ -655,24 +639,19 @@ static void rapl_init_domains(struct rapl_package *rp)
break;
}
if (mask) {
- rd->package_id = rp->id;
+ rd->rp = rp;
rd++;
}
}
}
-static u64 rapl_unit_xlate(struct rapl_domain *rd, int package,
- enum unit_type type, u64 value,
- int to_raw)
+static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
+ u64 value, int to_raw)
{
u64 units = 1;
- struct rapl_package *rp;
+ struct rapl_package *rp = rd->rp;
u64 scale = 1;
- rp = find_package_by_id(package);
- if (!rp)
- return value;
-
switch (type) {
case POWER_UNIT:
units = rp->power_unit;
@@ -769,10 +748,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
msr = rd->msrs[rp->id];
if (!msr)
return -EINVAL;
- /* use physical package id to look up active cpus */
- cpu = find_active_cpu_on_package(rd->package_id);
- if (cpu < 0)
- return cpu;
+
+ cpu = rd->rp->lead_cpu;
/* special-case package domain, which uses a different bit*/
if (prim == FW_LOCK && rd->id == RAPL_DOMAIN_PACKAGE) {
@@ -793,42 +770,66 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
final = value & rp->mask;
final = final >> rp->shift;
if (xlate)
- *data = rapl_unit_xlate(rd, rd->package_id, rp->unit, final, 0);
+ *data = rapl_unit_xlate(rd, rp->unit, final, 0);
else
*data = final;
return 0;
}
+
+static int msrl_update_safe(u32 msr_no, u64 clear_mask, u64 set_mask)
+{
+ int err;
+ u64 val;
+
+ err = rdmsrl_safe(msr_no, &val);
+ if (err)
+ goto out;
+
+ val &= ~clear_mask;
+ val |= set_mask;
+
+ err = wrmsrl_safe(msr_no, val);
+
+out:
+ return err;
+}
+
+static void msrl_update_func(void *info)
+{
+ struct msrl_action *ma = info;
+
+ ma->err = msrl_update_safe(ma->msr_no, ma->clear_mask, ma->set_mask);
+}
+
/* Similar use of primitive info in the read counterpart */
static int rapl_write_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
unsigned long long value)
{
- u64 msr_val;
- u32 msr;
struct rapl_primitive_info *rp = &rpi[prim];
int cpu;
+ u64 bits;
+ struct msrl_action ma;
+ int ret;
- cpu = find_active_cpu_on_package(rd->package_id);
- if (cpu < 0)
- return cpu;
- msr = rd->msrs[rp->id];
- if (rdmsrl_safe_on_cpu(cpu, msr, &msr_val)) {
- dev_dbg(&rd->power_zone.dev,
- "failed to read msr 0x%x on cpu %d\n", msr, cpu);
- return -EIO;
- }
- value = rapl_unit_xlate(rd, rd->package_id, rp->unit, value, 1);
- msr_val &= ~rp->mask;
- msr_val |= value << rp->shift;
- if (wrmsrl_safe_on_cpu(cpu, msr, msr_val)) {
- dev_dbg(&rd->power_zone.dev,
- "failed to write msr 0x%x on cpu %d\n", msr, cpu);
- return -EIO;
- }
+ cpu = rd->rp->lead_cpu;
+ bits = rapl_unit_xlate(rd, rp->unit, value, 1);
+ bits |= bits << rp->shift;
+ memset(&ma, 0, sizeof(ma));
- return 0;
+ ma.msr_no = rd->msrs[rp->id];
+ ma.clear_mask = rp->mask;
+ ma.set_mask = bits;
+
+ ret = smp_call_function_single(cpu, msrl_update_func, &ma, 1);
+ if (ret)
+ WARN_ON_ONCE(ret);
+ else
+ ret = ma.err;
+
+ return ret;
}
/*
@@ -893,6 +894,21 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
return 0;
}
+static void power_limit_irq_save_cpu(void *info)
+{
+ u32 l, h = 0;
+ struct rapl_package *rp = (struct rapl_package *)info;
+
+ /* save the state of PLN irq mask bit before disabling it */
+ rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
+ if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) {
+ rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE;
+ rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED;
+ }
+ l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
+ wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+}
+
/* REVISIT:
* When package power limit is set artificially low by RAPL, LVT
@@ -904,61 +920,40 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
* to do by adding an atomic notifier.
*/
-static void package_power_limit_irq_save(int package_id)
+static void package_power_limit_irq_save(struct rapl_package *rp)
{
- u32 l, h = 0;
- int cpu;
- struct rapl_package *rp;
-
- rp = find_package_by_id(package_id);
- if (!rp)
- return;
-
if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
return;
- cpu = find_active_cpu_on_package(package_id);
- if (cpu < 0)
- return;
- /* save the state of PLN irq mask bit before disabling it */
- rdmsr_safe_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
- if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) {
- rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE;
- rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED;
- }
- l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
- wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+ smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1);
}
-/* restore per package power limit interrupt enable state */
-static void package_power_limit_irq_restore(int package_id)
+static void power_limit_irq_restore_cpu(void *info)
{
- u32 l, h;
- int cpu;
- struct rapl_package *rp;
+ u32 l, h = 0;
+ struct rapl_package *rp = (struct rapl_package *)info;
- rp = find_package_by_id(package_id);
- if (!rp)
- return;
+ rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
- if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
- return;
+ if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE)
+ l |= PACKAGE_THERM_INT_PLN_ENABLE;
+ else
+ l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
+
+ wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+}
- cpu = find_active_cpu_on_package(package_id);
- if (cpu < 0)
+/* restore per package power limit interrupt enable state */
+static void package_power_limit_irq_restore(struct rapl_package *rp)
+{
+ if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
return;
/* irq enable state not saved, nothing to restore */
if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED))
return;
- rdmsr_safe_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
-
- if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE)
- l |= PACKAGE_THERM_INT_PLN_ENABLE;
- else
- l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
- wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+ smp_call_function_single(rp->lead_cpu, power_limit_irq_restore_cpu, rp, 1);
}
static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
@@ -1141,7 +1136,7 @@ static int rapl_unregister_powercap(void)
* hotplug lock held
*/
list_for_each_entry(rp, &rapl_packages, plist) {
- package_power_limit_irq_restore(rp->id);
+ package_power_limit_irq_restore(rp);
for (rd = rp->domains; rd < rp->domains + rp->nr_domains;
rd++) {
@@ -1392,7 +1387,8 @@ static int rapl_detect_topology(void)
/* add the new package to the list */
new_package->id = phy_package_id;
new_package->nr_cpus = 1;
-
+ /* use the first active cpu of the package to access */
+ new_package->lead_cpu = i;
/* check if the package contains valid domains */
if (rapl_detect_domains(new_package, i) ||
rapl_defaults->check_unit(new_package, i)) {
@@ -1448,6 +1444,8 @@ static int rapl_add_package(int cpu)
/* add the new package to the list */
rp->id = phy_package_id;
rp->nr_cpus = 1;
+ rp->lead_cpu = cpu;
+
/* check if the package contains valid domains */
if (rapl_detect_domains(rp, cpu) ||
rapl_defaults->check_unit(rp, cpu)) {
@@ -1480,6 +1478,7 @@ static int rapl_cpu_callback(struct notifier_block *nfb,
unsigned long cpu = (unsigned long)hcpu;
int phy_package_id;
struct rapl_package *rp;
+ int lead_cpu;
phy_package_id = topology_physical_package_id(cpu);
switch (action) {
@@ -1500,6 +1499,15 @@ static int rapl_cpu_callback(struct notifier_block *nfb,
break;
if (--rp->nr_cpus == 0)
rapl_remove_package(rp);
+ else if (cpu == rp->lead_cpu) {
+ /* choose another active cpu in the package */
+ lead_cpu = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ if (lead_cpu < nr_cpu_ids)
+ rp->lead_cpu = lead_cpu;
+ else /* should never go here */
+ pr_err("no active cpu available for package %d\n",
+ phy_package_id);
+ }
}
return NOTIFY_OK;
OpenPOWER on IntegriCloud