diff options
Diffstat (limited to 'common/recipes-core/fan-ctrl')
-rw-r--r-- | common/recipes-core/fan-ctrl/fan-ctrl/Makefile | 26 | ||||
-rw-r--r-- | common/recipes-core/fan-ctrl/fan-ctrl/README | 5 | ||||
-rw-r--r-- | common/recipes-core/fan-ctrl/fan-ctrl/fand.cpp | 1030 | ||||
-rw-r--r-- | common/recipes-core/fan-ctrl/fan-ctrl/watchdog.cpp | 201 | ||||
-rw-r--r-- | common/recipes-core/fan-ctrl/fan-ctrl/watchdog.h | 60 | ||||
-rw-r--r-- | common/recipes-core/fan-ctrl/fan-ctrl_0.1.bb | 61 |
6 files changed, 1383 insertions, 0 deletions
diff --git a/common/recipes-core/fan-ctrl/fan-ctrl/Makefile b/common/recipes-core/fan-ctrl/fan-ctrl/Makefile new file mode 100644 index 0000000..da74a34 --- /dev/null +++ b/common/recipes-core/fan-ctrl/fan-ctrl/Makefile @@ -0,0 +1,26 @@ +# Copyright 2014-present Facebook. All Rights Reserved. +# +# This program file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program in a file named COPYING; if not, write to the +# Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301 USA + +all: fand + +fand: fand.cpp watchdog.cpp + $(CXX) $(CXXFLAGS) -pthread -o $@ $^ $(LDFLAGS) + +.PHONY: clean + +clean: + rm -rf *.o fand diff --git a/common/recipes-core/fan-ctrl/fan-ctrl/README b/common/recipes-core/fan-ctrl/fan-ctrl/README new file mode 100644 index 0000000..2a92b9d --- /dev/null +++ b/common/recipes-core/fan-ctrl/fan-ctrl/README @@ -0,0 +1,5 @@ +The AST PWM/Tach driver is in the kernel sources at drivers/hwmon/ast_pwm_fan.c + +There are 7 PWM output pins. Each PWM can be configured in one of 3 types (M, +N, or O). The clock settings for each type are configurable. See init_pwm.sh +for more comments about how we configure the settings. diff --git a/common/recipes-core/fan-ctrl/fan-ctrl/fand.cpp b/common/recipes-core/fan-ctrl/fan-ctrl/fand.cpp new file mode 100644 index 0000000..49c6f6b --- /dev/null +++ b/common/recipes-core/fan-ctrl/fan-ctrl/fand.cpp @@ -0,0 +1,1030 @@ +/* + * fand + * + * Copyright 2014-present Facebook. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Daemon to manage the fan speed to ensure that we stay within a reasonable + * temperature range. We're using a simplistic algorithm to get started: + * + * If the fan is already on high, we'll move it to medium if we fall below + * a top temperature. If we're on medium, we'll move it to high + * if the temperature goes over the top value, and to low if the + * temperature falls to a bottom level. If the fan is on low, + * we'll increase the speed if the temperature rises to the top level. + * + * To ensure that we're not just turning the fans up, then back down again, + * we'll require an extra few degrees of temperature drop before we lower + * the fan speed. + * + * We check the RPM of the fans against the requested RPMs to determine + * whether the fans are failing, in which case we'll turn up all of + * the other fans and report the problem.. + * + * TODO: Implement a PID algorithm to closely track the ideal temperature. + * TODO: Determine if the daemon is already started. + */ + +/* Yeah, the file ends in .cpp, but it's a C program. Deal. */ + +#if !defined(CONFIG_YOSEMITE) && !defined(CONFIG_WEDGE) +#error "No hardware platform defined!" +#endif +#if defined(CONFIG_YOSEMITE) && defined(CONFIG_WEDGE) +#error "Both hardware platform defined!" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <signal.h> +#include <syslog.h> +#ifdef CONFIG_YOSEMITE +#include <openbmc/ipmi.h> +#include <facebook/bic.h> +#include <facebook/yosemite_sensor.h> +#endif +#ifdef CONFIG_WEDGE +#include <facebook/wedge_eeprom.h> +#endif + +#include "watchdog.h" + +/* Sensor definitions */ + +#ifdef CONFIG_WEDGE +#define INTERNAL_TEMPS(x) ((x) * 1000) // stored a C * 1000 +#define EXTERNAL_TEMPS(x) ((x) / 1000) +#elif CONFIG_YOSEMITE +#define INTERNAL_TEMPS(x) (x) +#define EXTERNAL_TEMPS(x) (x) +#define TOTAL_1S_SERVERS 4 +#endif + +#define I2C_BUS_3_DIR "/sys/class/i2c-adapter/i2c-3/" +#define I2C_BUS_4_DIR "/sys/class/i2c-adapter/i2c-4/" + +#define INTAKE_TEMP_DEVICE I2C_BUS_3_DIR "3-0048" +#define T2_TEMP_DEVICE I2C_BUS_3_DIR "3-0049" +#define EXHAUST_TEMP_DEVICE I2C_BUS_3_DIR "3-004a" +#define USERVER_TEMP_DEVICE I2C_BUS_4_DIR "4-0040" + +/* + * The sensor for the uServer CPU is not on the CPU itself, so it reads + * a little low. We are special casing this, but we should obviously + * be thinking about a way to generalize these tweaks, and perhaps + * the entire configuration. JSON file? + */ + +#ifdef CONFIG_WEDGE +#define USERVER_TEMP_FUDGE INTERNAL_TEMPS(10) +#else +#define USERVER_TEMP_FUDGE INTERNAL_TEMPS(1) +#endif + +#define BAD_TEMP INTERNAL_TEMPS(-60) + +#define BAD_READ_THRESHOLD 4 /* How many times can reads fail */ +#define FAN_FAILURE_THRESHOLD 4 /* How many times can a fan fail */ +#define FAN_SHUTDOWN_THRESHOLD 20 /* How long fans can be failed before */ + /* we just shut down the whole thing. */ + + +#define PWM_DIR "/sys/devices/platform/ast_pwm_tacho.0" + +#define PWM_UNIT_MAX 96 + +#define LARGEST_DEVICE_NAME 120 + +#define GPIO_USERVER_POWER_DIRECTION "/sys/class/gpio/gpio25/direction" +#define GPIO_USERVER_POWER "/sys/class/gpio/gpio25/value" +#define GPIO_T2_POWER_DIRECTION "/tmp/gpionames/T2_POWER_UP/direction" +#define GPIO_T2_POWER "/tmp/gpionames/T2_POWER_UP/value" + +#define GPIO_FAN0_LED "/sys/class/gpio/gpio53/value" +#define GPIO_FAN1_LED "/sys/class/gpio/gpio54/value" +#define GPIO_FAN2_LED "/sys/class/gpio/gpio55/value" +#define GPIO_FAN3_LED "/sys/class/gpio/gpio72/value" + +const char *fan_led[] = {GPIO_FAN0_LED, GPIO_FAN1_LED, + GPIO_FAN2_LED, GPIO_FAN3_LED}; + +#define FAN_LED_RED "0" +#define FAN_LED_BLUE "1" + +#define GPIO_PIN_ID "/sys/class/gpio/gpio%d/value" +#define REV_IDS 3 +#define GPIO_REV_ID_START 192 + +#define BOARD_IDS 4 +#define GPIO_BOARD_ID_START 160 + +/* + * With hardware revisions after 3, we use a different set of pins for + * the BOARD_ID. + */ + +#define REV_ID_NEW_BOARD_ID 3 +#define GPIO_BOARD_ID_START_NEW 166 + +#define REPORT_TEMP 720 /* Report temp every so many cycles */ + +/* Sensor limits and tuning parameters */ + +#define INTAKE_LIMIT INTERNAL_TEMPS(60) +#define T2_LIMIT INTERNAL_TEMPS(80) +#define USERVER_LIMIT INTERNAL_TEMPS(90) + +#define TEMP_TOP INTERNAL_TEMPS(70) +#define TEMP_BOTTOM INTERNAL_TEMPS(40) + +/* + * Toggling the fan constantly will wear it out (and annoy anyone who + * can hear it), so we'll only turn down the fan after the temperature + * has dipped a bit below the point at which we'd otherwise switch + * things up. + */ + +#define COOLDOWN_SLOP INTERNAL_TEMPS(6) + +#define WEDGE_FAN_LOW 35 +#define WEDGE_FAN_MEDIUM 50 +#define WEDGE_FAN_HIGH 70 +#define WEDGE_FAN_MAX 99 + +#define SIXPACK_FAN_LOW 35 +#define SIXPACK_FAN_MEDIUM 55 +#define SIXPACK_FAN_HIGH 75 +#define SIXPACK_FAN_MAX 99 + +/* + * Mapping physical to hardware addresses for fans; it's different for + * RPM measuring and PWM setting, naturally. Doh. + */ + +#ifdef CONFIG_WEDGE +int fan_to_rpm_map[] = {3, 2, 0, 1}; +int fan_to_pwm_map[] = {7, 6, 0, 1}; +#define FANS 4 +// Tacho offset between front and rear fans: +#define REAR_FAN_OFFSET 4 +#define BACK_TO_BACK_FANS +#else +int fan_to_rpm_map[] = {0, 1}; +int fan_to_pwm_map[] = {0, 1}; +#define FANS 2 +// Tacho offset between front and rear fans: +#define REAR_FAN_OFFSET 1 +#endif + + +/* + * The measured RPM of the fans doesn't match linearly to the requested + * rate. In addition, there are coaxially mounted fans, so the rear fans + * feed into the front fans. The rear fans will run slower since they're + * grabbing still air, and the front fants are getting an extra boost. + * + * We'd like to measure the fan RPM and compare it to the expected RPM + * so that we can detect failed fans, so we have a table (derived from + * hardware testing): + */ + +struct rpm_to_pct_map { + ushort pct; + ushort rpm; +}; + +#ifdef CONFIG_WEDGE +struct rpm_to_pct_map rpm_front_map[] = {{30, 6150}, + {35, 7208}, + {40, 8195}, + {45, 9133}, + {50, 10017}, + {55, 10847}, + {60, 11612}, + {65, 12342}, + {70, 13057}, + {75, 13717}, + {80, 14305}, + {85, 14869}, + {90, 15384}, + {95, 15871}, + {100, 16095}}; +#define FRONT_MAP_SIZE (sizeof(rpm_front_map) / sizeof(struct rpm_to_pct_map)) + +struct rpm_to_pct_map rpm_rear_map[] = {{30, 3911}, + {35, 4760}, + {40, 5587}, + {45, 6434}, + {50, 7295}, + {55, 8187}, + {60, 9093}, + {65, 10008}, + {70, 10949}, + {75, 11883}, + {80, 12822}, + {85, 13726}, + {90, 14690}, + {95, 15516}, + {100, 15897}}; +#define REAR_MAP_SIZE (sizeof(rpm_rear_map) / sizeof(struct rpm_to_pct_map)) +#else +struct rpm_to_pct_map rpm_map[] = {{30, 3413}, + {35, 3859}, + {40, 4305}, + {45, 4686}, + {50, 5032}, + {55, 5432}, + {60, 5991}, + {65, 6460}, + {70, 6927}, + {75, 7379}, + {80, 7733}, + {85, 8156}, + {90, 8599}, + {95, 9049}, + {100, 9265}}; +struct rpm_to_pct_map *rpm_front_map = rpm_map; +struct rpm_to_pct_map *rpm_rear_map = rpm_map; +#define MAP_SIZE (sizeof(rpm_map) / sizeof(struct rpm_to_pct_map)) +#define FRONT_MAP_SIZE MAP_SIZE +#define REAR_MAP_SIZE MAP_SIZE + +#endif + +#define FAN_FAILURE_OFFSET 30 + +int fan_low = WEDGE_FAN_LOW; +int fan_medium = WEDGE_FAN_MEDIUM; +int fan_high = WEDGE_FAN_HIGH; +int fan_max = WEDGE_FAN_MAX; +int total_fans = FANS; +int fan_offset = 0; + +int temp_bottom = TEMP_BOTTOM; +int temp_top = TEMP_TOP; + +int report_temp = REPORT_TEMP; +bool verbose = false; + +void usage() { + fprintf(stderr, + "fand [-v] [-l <low-pct>] [-m <medium-pct>] " + "[-h <high-pct>]\n" + "\t[-b <temp-bottom>] [-t <temp-top>] [-r <report-temp>]\n\n" + "\tlow-pct defaults to %d%% fan\n" + "\tmedium-pct defaults to %d%% fan\n" + "\thigh-pct defaults to %d%% fan\n" + "\ttemp-bottom defaults to %dC\n" + "\ttemp-top defaults to %dC\n" + "\treport-temp defaults to every %d measurements\n\n" + "fand compensates for uServer temperature reading %d degrees low\n" + "kill with SIGUSR1 to stop watchdog\n", + fan_low, + fan_medium, + fan_high, + EXTERNAL_TEMPS(temp_bottom), + EXTERNAL_TEMPS(temp_top), + report_temp, + EXTERNAL_TEMPS(USERVER_TEMP_FUDGE)); + exit(1); +} + +/* We need to open the device each time to read a value */ + +int read_device(const char *device, int *value) { + FILE *fp; + int rc; + + fp = fopen(device, "r"); + if (!fp) { + int err = errno; + + syslog(LOG_INFO, "failed to open device %s", device); + return err; + } + + rc = fscanf(fp, "%d", value); + fclose(fp); + + if (rc != 1) { + syslog(LOG_INFO, "failed to read device %s", device); + return ENOENT; + } else { + return 0; + } +} + +/* We need to open the device again each time to write a value */ + +int write_device(const char *device, const char *value) { + FILE *fp; + int rc; + + fp = fopen(device, "w"); + if (!fp) { + int err = errno; + + syslog(LOG_INFO, "failed to open device for write %s", device); + return err; + } + + rc = fputs(value, fp); + fclose(fp); + + if (rc < 0) { + syslog(LOG_INFO, "failed to write device %s", device); + return ENOENT; + } else { + return 0; + } +} + +#ifdef CONFIG_WEDGE +int read_temp(const char *device, int *value) { + char full_name[LARGEST_DEVICE_NAME + 1]; + + /* We set an impossible value to check for errors */ + *value = BAD_TEMP; + snprintf( + full_name, LARGEST_DEVICE_NAME, "%s/temp1_input", device); + return read_device(full_name, value); +} + +int read_gpio_value(const int id, const char *device, int *value) { + char full_name[LARGEST_DEVICE_NAME]; + + snprintf(full_name, LARGEST_DEVICE_NAME, device, id); + return read_device(full_name, value); +} + +int read_gpio_values(const int start, const int count, + const char *device, int *result) { + int status = 0; + int value; + + *result = 0; + for (int i = 0; i < count; i++) { + status |= read_gpio_value(start + i, GPIO_PIN_ID, &value); + *result |= value << i; + } + return status; +} + +int read_ids(int *rev_id, int *board_id) { + int status = 0; + int value; + + status = read_gpio_values(GPIO_REV_ID_START, REV_IDS, GPIO_PIN_ID, rev_id); + if (status != 0) { + syslog(LOG_INFO, "failed to read rev_id"); + return status; + } + + int board_id_start; + if (*rev_id >= REV_ID_NEW_BOARD_ID) { + board_id_start = GPIO_BOARD_ID_START_NEW; + } else { + board_id_start = GPIO_BOARD_ID_START; + } + + status = read_gpio_values(board_id_start, BOARD_IDS, GPIO_PIN_ID, board_id); + if (status != 0) { + syslog(LOG_INFO, "failed to read board_id"); + } + return status; +} + +bool is_two_fan_board(bool verbose) { + struct wedge_eeprom_st eeprom; + /* Retrieve the board type from EEPROM */ + if (wedge_eeprom_parse(NULL, &eeprom) == 0) { + /* able to parse EEPROM */ + if (verbose) { + syslog(LOG_INFO, "board type is %s", eeprom.fbw_location); + } + /* only WEDGE is NOT two-fan board */ + return strncasecmp(eeprom.fbw_location, "wedge", + sizeof(eeprom.fbw_location)); + } else { + int status; + int board_id = 0; + int rev_id = 0; + /* + * Could not parse EEPROM. Most likely, it is an old HW without EEPROM. + * In this case, use board ID to distinguish if it is wedge or 6-pack. + */ + status = read_ids(&rev_id, &board_id); + if (verbose) { + syslog(LOG_INFO, "rev ID %d, board id %d", rev_id, board_id); + } + if (status == 0 && board_id != 0xf) { + return true; + } else { + return false; + } + } +} +#endif + +int read_fan_value(const int fan, const char *device, int *value) { + char device_name[LARGEST_DEVICE_NAME]; + char output_value[LARGEST_DEVICE_NAME]; + char full_name[LARGEST_DEVICE_NAME]; + + snprintf(device_name, LARGEST_DEVICE_NAME, device, fan); + snprintf(full_name, LARGEST_DEVICE_NAME, "%s/%s", PWM_DIR, device_name); + return read_device(full_name, value); +} + +int write_fan_value(const int fan, const char *device, const int value) { + char full_name[LARGEST_DEVICE_NAME]; + char device_name[LARGEST_DEVICE_NAME]; + char output_value[LARGEST_DEVICE_NAME]; + + snprintf(device_name, LARGEST_DEVICE_NAME, device, fan); + snprintf(full_name, LARGEST_DEVICE_NAME, "%s/%s", PWM_DIR, device_name); + snprintf(output_value, LARGEST_DEVICE_NAME, "%d", value); + return write_device(full_name, output_value); +} + +/* Return fan speed as a percentage of maximum -- not necessarily linear. */ + +int fan_rpm_to_pct(const struct rpm_to_pct_map *table, + const int table_len, + int rpm) { + int i; + + for (i = 0; i < table_len; i++) { + if (table[i].rpm > rpm) { + break; + } + } + + /* + * If the fan RPM is lower than the lowest value in the table, + * we may have a problem -- fans can only go so slow, and it might + * have stopped. In this case, we'll return an interpolated + * percentage, as just returning zero is even more problematic. + */ + + if (i == 0) { + return (rpm * table[i].pct) / table[i].rpm; + } else if (i == table_len) { // Fell off the top? + return table[i - 1].pct; + } + + // Interpolate the right percentage value: + + int percent_diff = table[i].pct - table[i - 1].pct; + int rpm_diff = table[i].rpm - table[i - 1].rpm; + int fan_diff = table[i].rpm - rpm; + + return table[i].pct - (fan_diff * percent_diff / rpm_diff); +} + +int fan_speed_okay(const int fan, const int speed, const int slop) { + int front_fan, rear_fan; + int front_pct, rear_pct; + int real_fan; + int okay; + + /* + * The hardware fan numbers are different from the physical order + * in the box, so we have to map them: + */ + + real_fan = fan_to_rpm_map[fan]; + + front_fan = 0; + read_fan_value(real_fan, "tacho%d_rpm", &front_fan); + front_pct = fan_rpm_to_pct(rpm_front_map, FRONT_MAP_SIZE, front_fan); +#ifdef BACK_TO_BACK_FANS + rear_fan = 0; + read_fan_value(real_fan + REAR_FAN_OFFSET, "tacho%d_rpm", &rear_fan); + rear_pct = fan_rpm_to_pct(rpm_rear_map, REAR_MAP_SIZE, rear_fan); +#endif + + + /* + * If the fans are broken, the measured rate will be rather + * different from the requested rate, and we can turn up the + * rest of the fans to compensate. The slop is the percentage + * of error that we'll tolerate. + * + * XXX: I suppose that we should only measure negative values; + * running too fast isn't really a problem. + */ + +#ifdef BACK_TO_BACK_FANS + okay = (abs(front_pct - speed) * 100 / speed < slop && + abs(rear_pct - speed) * 100 / speed < slop); +#else + okay = (abs(front_pct - speed) * 100 / speed < slop); +#endif + + if (!okay || verbose) { + syslog(!okay ? LOG_ALERT : LOG_INFO, +#ifdef BACK_TO_BACK_FANS + "fan %d rear %d (%d%%), front %d (%d%%), expected %d", +#else + "fan %d %d RPM (%d%%), expected %d", +#endif + fan, +#ifdef BACK_TO_BACK_FANS + rear_fan, + rear_pct, +#endif + front_fan, + front_pct, + speed); + } + + return okay; +} + +/* Set fan speed as a percentage */ + +int write_fan_speed(const int fan, const int value) { + /* + * The hardware fan numbers for pwm control are different from + * both the physical order in the box, and the mapping for reading + * the RPMs per fan, above. + */ + + int real_fan = fan_to_pwm_map[fan]; + + if (value == 0) { + return write_fan_value(real_fan, "pwm%d_en", 0); + } else { + int unit = value * PWM_UNIT_MAX / 100; + int status; + + if (unit == PWM_UNIT_MAX) + unit = 0; + + if ((status = write_fan_value(real_fan, "pwm%d_type", 0)) != 0 || + (status = write_fan_value(real_fan, "pwm%d_rising", 0)) != 0 || + (status = write_fan_value(real_fan, "pwm%d_falling", unit)) != 0 || + (status = write_fan_value(real_fan, "pwm%d_en", 1)) != 0) { + return status; + } + } +} + +/* Set up fan LEDs */ + +int write_fan_led(const int fan, const char *color) +{ +#ifdef CONFIG_WEDGE + return write_device(fan_led[fan], color); +#else + return 0; +#endif +} + +int server_shutdown(const char *why) { + int fan; + for (fan = 0; fan < total_fans; fan++) { + write_fan_speed(fan + fan_offset, fan_max); + } + + syslog(LOG_EMERG, "Shutting down: %s", why); + write_device(GPIO_USERVER_POWER_DIRECTION, "out"); + write_device(GPIO_USERVER_POWER, "0"); +#ifdef CONFIG_WEDGE + /* + * Putting T2 in reset generates a non-maskable interrupt to uS, + * the kernel running on uS might panic depending on its version. + * sleep 5s here to make sure uS is completely down. + */ + sleep(5); + + if (write_device(GPIO_T2_POWER_DIRECTION, "out") || + write_device(GPIO_T2_POWER, "1")) { + /* + * We're here because something has gone badly wrong. If we + * didn't manage to shut down the T2, cut power to the whole box, + * using the PMBus OPERATION register. This will require a power + * cycle (removal of both power inputs) to recover. + */ + syslog(LOG_EMERG, "T2 power off failed; turning off via ADM1278"); + system("rmmod adm1275"); + system("i2cset -y 12 0x10 0x01 00"); + } + +#else + // TODO(7088822): try throttling, then shutting down server. + syslog(LOG_EMERG, "Need to implement actual shutdown!\n"); +#endif + + /* + * We have to stop the watchdog, or the system will be automatically + * rebooted some seconds after fand exits (and stops kicking the + * watchdog). + */ + + stop_watchdog(); + + sleep(2); + exit(2); +} + +/* Gracefully shut down on receipt of a signal */ + +void fand_interrupt(int sig) +{ + int fan; + for (fan = 0; fan < total_fans; fan++) { + write_fan_speed(fan + fan_offset, fan_max); + } + + syslog(LOG_ALERT, "Shutting down fand on signal %s", strsignal(sig)); + if (sig == SIGUSR1) { + stop_watchdog(); + } + exit(3); +} + +int main(int argc, char **argv) { + /* Sensor values */ + +#ifdef CONFIG_WEDGE + int intake_temp; + int exhaust_temp; + int t2_temp; + int userver_temp; +#else + float intake_temp; + float exhaust_temp; + float userver_temp; +#endif + + int fan_speed = fan_high; + int bad_reads = 0; + int fan_failure = 0; + int fan_speed_changes = 0; + int old_speed; + + int fan_bad[FANS]; + int fan; + + unsigned log_count = 0; // How many times have we logged our temps? + int opt; + int prev_fans_bad = 0; + + struct sigaction sa; + + sa.sa_handler = fand_interrupt; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + + sigaction(SIGTERM, &sa, NULL); + sigaction(SIGINT, &sa, NULL); + sigaction(SIGUSR1, &sa, NULL); + + // Start writing to syslog as early as possible for diag purposes. + + openlog("fand", LOG_CONS, LOG_DAEMON); + +#ifdef CONFIG_WEDGE + if (is_two_fan_board(false)) { + /* Alternate, two fan configuration */ + total_fans = 2; + fan_offset = 2; /* fan 3 is the first */ + + fan_low = SIXPACK_FAN_LOW; + fan_medium = SIXPACK_FAN_MEDIUM; + fan_high = SIXPACK_FAN_HIGH; + fan_max = SIXPACK_FAN_MAX; + fan_speed = fan_high; + } +#endif + + while ((opt = getopt(argc, argv, "l:m:h:b:t:r:v")) != -1) { + switch (opt) { + case 'l': + fan_low = atoi(optarg); + break; + case 'm': + fan_medium = atoi(optarg); + break; + case 'h': + fan_high = atoi(optarg); + break; + case 'b': + temp_bottom = INTERNAL_TEMPS(atoi(optarg)); + break; + case 't': + temp_top = INTERNAL_TEMPS(atoi(optarg)); + break; + case 'r': + report_temp = atoi(optarg); + break; + case 'v': + verbose = true; + break; + default: + usage(); + break; + } + } + + if (optind > argc) { + usage(); + } + + if (temp_bottom > temp_top) { + fprintf(stderr, + "Should temp-bottom (%d) be higher than " + "temp-top (%d)? Starting anyway.\n", + EXTERNAL_TEMPS(temp_bottom), + EXTERNAL_TEMPS(temp_top)); + } + + if (fan_low > fan_medium || fan_low > fan_high || fan_medium > fan_high) { + fprintf(stderr, + "fan RPMs not strictly increasing " + "-- %d, %d, %d, starting anyway\n", + fan_low, + fan_medium, + fan_high); + } + + daemon(1, 0); + + if (verbose) { + syslog(LOG_DEBUG, "Starting up; system should have %d fans.", + total_fans); + } + + for (fan = 0; fan < total_fans; fan++) { + fan_bad[fan] = 0; + write_fan_speed(fan + fan_offset, fan_speed); + write_fan_led(fan + fan_offset, FAN_LED_BLUE); + } + +#ifdef CONFIG_YOSEMITE + /* Ensure that we can read from sensors before proceeding. */ + + while (yosemite_sensor_read(1, BIC_SENSOR_SOC_TEMP, &userver_temp)) + syslog(LOG_DEBUG, "Failed reading of SOC_TEMP."); +#endif + + /* Start watchdog in manual mode */ + start_watchdog(0); + + /* Set watchdog to persistent mode so timer expiry will happen independent + * of this process's liveliness. */ + set_persistent_watchdog(WATCHDOG_SET_PERSISTENT); + + sleep(5); /* Give the fans time to come up to speed */ + + while (1) { + int max_temp; + old_speed = fan_speed; + + /* Read sensors */ + +#ifdef CONFIG_WEDGE + read_temp(INTAKE_TEMP_DEVICE, &intake_temp); + read_temp(EXHAUST_TEMP_DEVICE, &exhaust_temp); + read_temp(T2_TEMP_DEVICE, &t2_temp); + read_temp(USERVER_TEMP_DEVICE, &userver_temp); + + /* + * uServer can be powered down, but all of the rest of the sensors + * should be readable at any time. + */ + + if ((intake_temp == BAD_TEMP || exhaust_temp == BAD_TEMP || + t2_temp == BAD_TEMP)) { + bad_reads++; + } +#else + userver_temp = BAD_TEMP; + if (yosemite_sensor_read(1, SP_SENSOR_INLET_TEMP, &intake_temp) || + yosemite_sensor_read(1, SP_SENSOR_OUTLET_TEMP, &exhaust_temp)) + bad_reads++; + + /* + * There are a number of 1S servers; any or all of them + * could be powered off and returning no values. Ignore these + * invalid values. + */ + for (int node = 1; node <= TOTAL_1S_SERVERS; node++) { + float new_temp; + if (!yosemite_sensor_read(node, BIC_SENSOR_SOC_TEMP, &new_temp)) { + if (userver_temp < new_temp) { + userver_temp = new_temp; + } + } + } +#endif + + if (bad_reads > BAD_READ_THRESHOLD) { + server_shutdown("Some sensors couldn't be read"); + } + + if (log_count++ % report_temp == 0) { + syslog(LOG_DEBUG, +#ifdef CONFIG_WEDGE + "Temp intake %d, t2 %d, " + " userver %d, exhaust %d, " + "fan speed %d, speed changes %d", +#else + "Temp intake %f, max server %f, exhaust %f, " + "fan speed %d, speed changes %d", +#endif + intake_temp, +#ifdef CONFIG_WEDGE + t2_temp, +#endif + userver_temp, + exhaust_temp, + fan_speed, + fan_speed_changes); + } + + /* Protection heuristics */ + + if (intake_temp > INTAKE_LIMIT) { + server_shutdown("Intake temp limit reached"); + } + +#ifdef CONFIG_WEDGE + if (t2_temp > T2_LIMIT) { + server_shutdown("T2 temp limit reached"); + } +#endif + + if (userver_temp + USERVER_TEMP_FUDGE > USERVER_LIMIT) { + server_shutdown("uServer temp limit reached"); + } + + /* + * Calculate change needed -- we should eventually + * do something more sophisticated, like PID. + * + * We should use the intake temperature to adjust this + * as well. + */ + +#ifdef CONFIG_WEDGE + if (t2_temp > userver_temp + USERVER_TEMP_FUDGE) { + max_temp = t2_temp; + } else { + max_temp = userver_temp + USERVER_TEMP_FUDGE; + } +#else + /* Yosemite could have no servers turned on, so ignore that case. */ + if (userver_temp + USERVER_TEMP_FUDGE > exhaust_temp) { + max_temp = userver_temp + USERVER_TEMP_FUDGE; + } else { + max_temp = exhaust_temp; + } +#endif + + /* + * If recovering from a fan problem, spin down fans gradually in case + * temperatures are still high. Gradual spin down also reduces wear on + * the fans. + */ + if (fan_speed == fan_max) { + if (fan_failure == 0) { + fan_speed = fan_high; + } + } else if (fan_speed == fan_high) { + if (max_temp + COOLDOWN_SLOP < temp_top) { + fan_speed = fan_medium; + } + } else if (fan_speed == fan_medium) { + if (max_temp > temp_top) { + fan_speed = fan_high; + } else if (max_temp + COOLDOWN_SLOP < temp_bottom) { + fan_speed = fan_low; + } + } else {/* low */ + if (max_temp > temp_bottom) { + fan_speed = fan_medium; + } + } + + /* + * Update fans only if there are no failed ones. If any fans failed + * earlier, all remaining fans should continue to run at max speed. + */ + + if (fan_failure == 0 && fan_speed != old_speed) { + syslog(LOG_NOTICE, + "Fan speed changing from %d to %d", + old_speed, + fan_speed); + fan_speed_changes++; + for (fan = 0; fan < total_fans; fan++) { + write_fan_speed(fan + fan_offset, fan_speed); + } + } + + /* + * Wait for some change. Typical I2C temperature sensors + * only provide a new value every second and a half, so + * checking again more quickly than that is a waste. + * + * We also have to wait for the fan changes to take effect + * before measuring them. + */ + + sleep(5); + + /* Check fan RPMs */ + + for (fan = 0; fan < total_fans; fan++) { + /* + * Make sure that we're within some percentage + * of the requested speed. + */ + if (fan_speed_okay(fan + fan_offset, fan_speed, FAN_FAILURE_OFFSET)) { + if (fan_bad[fan] > FAN_FAILURE_THRESHOLD) { + write_fan_led(fan + fan_offset, FAN_LED_BLUE); + syslog(LOG_NOTICE, + "Fan %d has recovered", + fan); + } + fan_bad[fan] = 0; + } else { + fan_bad[fan]++; + } + } + + fan_failure = 0; + for (fan = 0; fan < total_fans; fan++) { + if (fan_bad[fan] > FAN_FAILURE_THRESHOLD) { + fan_failure++; + write_fan_led(fan + fan_offset, FAN_LED_RED); + } + } + + if (fan_failure > 0) { + if (prev_fans_bad != fan_failure) { + syslog(LOG_ALERT, "%d fans failed", fan_failure); + } + + /* + * If fans are bad, we need to blast all of the + * fans at 100%; we don't bother to turn off + * the bad fans, in case they are all that is left. + * + * Note that we have a temporary bug with setting fans to + * 100% so we only do fan_max = 99%. + */ + + fan_speed = fan_max; + for (fan = 0; fan < total_fans; fan++) { + write_fan_speed(fan + fan_offset, fan_speed); + } + + if (fan_failure == total_fans) { + int count = 0; + for (fan = 0; fan < total_fans; fan++) { + if (fan_bad[fan] > FAN_SHUTDOWN_THRESHOLD) + count++; + } + if (count == total_fans) { + server_shutdown("all fans are bad for more than 12 cycles"); + } + } + + + /* + * Fans can be hot swapped and replaced; in which case the fan daemon + * will automatically detect the new fan and (assuming the new fan isn't + * itself faulty), automatically readjust the speeds for all fans down + * to a more suitable rpm. The fan daemon does not need to be restarted. + */ + } + + /* Suppress multiple warnings for similar number of fan failures. */ + prev_fans_bad = fan_failure; + + /* if everything is fine, restart the watchdog countdown. If this process + * is terminated, the persistent watchdog setting will cause the system + * to reboot after the watchdog timeout. */ + kick_watchdog(); + } +} diff --git a/common/recipes-core/fan-ctrl/fan-ctrl/watchdog.cpp b/common/recipes-core/fan-ctrl/fan-ctrl/watchdog.cpp new file mode 100644 index 0000000..ebb390a --- /dev/null +++ b/common/recipes-core/fan-ctrl/fan-ctrl/watchdog.cpp @@ -0,0 +1,201 @@ +/* + * watchdog + * + * Copyright 2014-present Facebook. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "watchdog.h" + +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <syslog.h> +#include <unistd.h> + +#define WATCHDOG_START_KEY "x" +#define WATCHDOG_STOP_KEY "X" +/* The "magic close" character (as defined in Linux watchdog specs). */ +#define WATCHDOG_PERSISTENT_KEY "V" +#define WATCHDOG_NON_PERSISTENT_KEY "a" + +static int watchdog_dev = -1; + +/* This is needed to prevent rapid consecutive stop/start watchdog calls from + * generating multiple threads. */ +static int watchdog_started = 0; + +static const char* watchdog_kick_key = WATCHDOG_PERSISTENT_KEY; +static pthread_t watchdog_tid; +static pthread_mutex_t watchdog_lock = PTHREAD_MUTEX_INITIALIZER; + +/* Forward declarations. */ +static void* watchdog_thread(void* args); +static int kick_watchdog_unsafe(); + +/* + * When started, this background thread will constantly reset the watchdog + * at every 5 second interval. + */ + +static void* watchdog_thread(void* args) { + + pthread_detach(pthread_self()); + + /* Make sure another instance of the thread hasn't already been started. */ + pthread_mutex_lock(&watchdog_lock); + if (watchdog_started) { + goto done; + } else { + watchdog_started = 1; + } + pthread_mutex_unlock(&watchdog_lock); + + /* Actual loop for refreshing the watchdog timer. */ + while (1) { + pthread_mutex_lock(&watchdog_lock); + if (watchdog_dev != -1) { + kick_watchdog_unsafe(); + } else { + break; + } + pthread_mutex_unlock(&watchdog_lock); + sleep(5); + } + + /* Broke out of loop because watchdog was stopped. */ + watchdog_started = 0; +done: + pthread_mutex_unlock(&watchdog_lock); + return NULL; +} + +/* + * Starts the watchdog timer. timer counts down and restarts the ARM chip + * upon timeout. use kick_watchdog() to restart the timer. + * + * Returns: 1 on success; 0 otherwise. + */ + +int start_watchdog(const int auto_mode) { + int status; + + pthread_mutex_lock(&watchdog_lock); + + /* Don't start the watchdog again if it has already been started. */ + if (watchdog_dev != -1) { + while ((status = write(watchdog_dev, WATCHDOG_START_KEY, 1)) == 0 + && errno == EINTR); + pthread_mutex_unlock(&watchdog_lock); + syslog(LOG_ALERT, "system watchdog already started.\n"); + return 0; + } + + while (((watchdog_dev = open("/dev/watchdog", O_WRONLY)) == -1) && + errno == EINTR); + + /* Fail if watchdog device is invalid or if the user asked for auto + * mode and the thread failed to spawn. */ + if ((watchdog_dev == -1) || + (auto_mode == 1 && watchdog_started == 0 && + pthread_create(&watchdog_tid, NULL, watchdog_thread, NULL) != 0)) { + goto fail; + } + + while ((status = write(watchdog_dev, WATCHDOG_START_KEY, 1)) == 0 + && errno == EINTR); + pthread_mutex_unlock(&watchdog_lock); + syslog(LOG_INFO, "system watchdog started.\n"); + return 1; + +fail: + if (watchdog_dev != -1) { + close(watchdog_dev); + watchdog_dev = -1; + } + + pthread_mutex_unlock(&watchdog_lock); + syslog(LOG_ALERT, "system watchdog failed to start!\n"); + return 0; +} + +/* + * Toggles between watchdog persistent modes. In persistent mode, the watchdog + * timer will continue to tick even after process shutdown. Under non- + * persistent mode, the watchdog timer will automatically be disabled when the + * process shuts down. + */ +void set_persistent_watchdog(enum watchdog_persistent_en persistent) { + switch (persistent) { + case WATCHDOG_SET_PERSISTENT: + watchdog_kick_key = WATCHDOG_PERSISTENT_KEY; + break; + default: + watchdog_kick_key = WATCHDOG_NON_PERSISTENT_KEY; + break; + } + kick_watchdog(); +} + +/* + * Restarts the countdown timer on the watchdog, delaying restart by another + * timeout period (default: 11 seconds as configured in the device driver). + * + * This function assumes the watchdog lock has already been acquired and is + * only used internally within the watchdog code. + * + * Returns 1 on success; 0 or -1 indicates failure (check errno). + */ + +static int kick_watchdog_unsafe() { + int status = 0; + if (watchdog_dev != -1) { + while ((status = write(watchdog_dev, watchdog_kick_key, 1)) == 0 + && errno == EINTR); + } + return status; +} + +/* + * Acquires the watchdog lock and resets the watchdog atomically. For use by + * library users. + */ + +int kick_watchdog() { + int result; + pthread_mutex_lock(&watchdog_lock); + result = kick_watchdog_unsafe(); + pthread_mutex_unlock(&watchdog_lock); + + return result; +} + +/* Shuts down the watchdog gracefully and disables the watchdog timer so that + * restarts no longer happen. + */ + +void stop_watchdog() { + int status; + pthread_mutex_lock(&watchdog_lock); + if (watchdog_dev != -1) { + while ((status = write(watchdog_dev, WATCHDOG_STOP_KEY, 1)) == 0 + && errno == EINTR); + close(watchdog_dev); + watchdog_dev = -1; + syslog(LOG_INFO, "system watchdog stopped.\n"); + } + pthread_mutex_unlock(&watchdog_lock); +} diff --git a/common/recipes-core/fan-ctrl/fan-ctrl/watchdog.h b/common/recipes-core/fan-ctrl/fan-ctrl/watchdog.h new file mode 100644 index 0000000..19b9944 --- /dev/null +++ b/common/recipes-core/fan-ctrl/fan-ctrl/watchdog.h @@ -0,0 +1,60 @@ +/* + * watchdog + * + * Copyright 2014-present Facebook. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Utility library to handle the aspeed watchdog. Only one watchdog + * should be in use at any time throughout the entire system (multiple users + * will not cause adverse effects but the behavior of the watchdog becomes + * undefined). + * + * The watchdog can be started in either manual or automatic mode. In manual + * mode, the watchdog has to be constantly reset by the user via the + * kick_watchdog() function. Under automatic mode, the watchdog will + * run in a separate thread and reset the timer on its own so no intervention + * is required from the user. + * + * In both modes, the watchdog timer will not stop when the process is + * terminated, unless a call to stop_watchdog() has been made beforehand, or + * if the user runs in manual mode and uses a non persistent watchdog kick. + * + * The default timeout for the watchdog is 11 seconds. When this time period + * elapses, the ARM chip is restarted and the kernel is rebooted. Other + * hardware state is not reset, so this may introduce strange behavior on + * reboot (example: an I2C bus may be left in the open state, triggering + * constant interrupts). In rare cases, this could result in the kernel + * failing to fully restart itself and thus preclude the possibility of + * reinitializing the watchdog timer. Someone will then have to go over and + * physically restart the machine. + * + * The alternative to the soft reset is to request the watchdog device driver + * for a hard reset on timeout. However this will stop the fans. If the + * kernel fails to fully boot and restart the fan daemon, the system could + * overheat. For this reason, we've chosen to take the risk of a stuck soft + * reset instead. + * + */ + +/* Forward declarations. */ +int start_watchdog(const int auto_mode); +enum watchdog_persistent_en { + WATCHDOG_SET_PERSISTENT, + WATCHDOG_SET_NONPERSISTENT, +}; +void set_persistent_watchdog(enum watchdog_persistent_en persistent); +int kick_watchdog(); +void stop_watchdog(); diff --git a/common/recipes-core/fan-ctrl/fan-ctrl_0.1.bb b/common/recipes-core/fan-ctrl/fan-ctrl_0.1.bb new file mode 100644 index 0000000..1abfaaa --- /dev/null +++ b/common/recipes-core/fan-ctrl/fan-ctrl_0.1.bb @@ -0,0 +1,61 @@ +# Copyright 2014-present Facebook. All Rights Reserved. +# +# This program file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program in a file named COPYING; if not, write to the +# Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301 USA +SUMMARY = "Fan controller" +DESCRIPTION = "The utilities to control fan." +SECTION = "base" +PR = "r1" +LICENSE = "GPLv2" +LIC_FILES_CHKSUM = "file://fand.cpp;beginline=6;endline=18;md5=da35978751a9d71b73679307c4d296ec" + +SRC_URI = "file://README \ + file://Makefile \ + file://fand.cpp \ + file://watchdog.h \ + file://watchdog.cpp \ + " + +S = "${WORKDIR}" + +binfiles = "fand \ + " + +otherfiles = "README" + +pkgdir = "fan_ctrl" + +do_install() { + dst="${D}/usr/local/fbpackages/${pkgdir}" + bin="${D}/usr/local/bin" + install -d $dst + install -d $bin + for f in ${binfiles}; do + install -m 755 $f ${dst}/$f + ln -snf ../fbpackages/${pkgdir}/$f ${bin}/$f + done + for f in ${otherfiles}; do + install -m 644 $f ${dst}/$f + done +} + +FBPACKAGEDIR = "${prefix}/local/fbpackages" + +FILES_${PN} = "${FBPACKAGEDIR}/fan_ctrl ${prefix}/local/bin" + +# Inhibit complaints about .debug directories for the fand binary: + +INHIBIT_PACKAGE_DEBUG_SPLIT = "1" +INHIBIT_PACKAGE_STRIP = "1" |