diff options
Diffstat (limited to 'meta-facebook/meta-wedge/recipes-wedge/fan-ctrl/fan-ctrl/fand.cpp')
-rw-r--r-- | meta-facebook/meta-wedge/recipes-wedge/fan-ctrl/fan-ctrl/fand.cpp | 851 |
1 files changed, 0 insertions, 851 deletions
diff --git a/meta-facebook/meta-wedge/recipes-wedge/fan-ctrl/fan-ctrl/fand.cpp b/meta-facebook/meta-wedge/recipes-wedge/fan-ctrl/fan-ctrl/fand.cpp deleted file mode 100644 index 24e107c..0000000 --- a/meta-facebook/meta-wedge/recipes-wedge/fan-ctrl/fan-ctrl/fand.cpp +++ /dev/null @@ -1,851 +0,0 @@ -/* - * fand - * - * Copyright 2014-present Facebook. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Daemon to manage the fan speed to ensure that we stay within a reasonable - * temperature range. We're using a simplistic algorithm to get started: - * - * If the fan is already on high, we'll move it to medium if we fall below - * a top temperature. If we're on medium, we'll move it to high - * if the temperature goes over the top value, and to low if the - * temperature falls to a bottom level. If the fan is on low, - * we'll increase the speed if the temperature rises to the top level. - * - * To ensure that we're not just turning the fans up, then back down again, - * we'll require an extra few degrees of temperature drop before we lower - * the fan speed. - * - * We check the RPM of the fans against the requested RPMs to determine - * whether the fans are failing, in which case we'll turn up all of - * the other fans and report the problem.. - * - * TODO: Implement a PID algorithm to closely track the ideal temperature. - * TODO: Determine if the daemon is already started. - */ - -/* Yeah, the file ends in .cpp, but it's a C program. Deal. */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> -#include <errno.h> -#include <signal.h> -#include <syslog.h> -#include "watchdog.h" - -#include "facebook/wedge_eeprom.h" - -/* Sensor definitions */ - -#define INTERNAL_TEMPS(x) ((x) * 1000) // stored a C * 1000 -#define EXTERNAL_TEMPS(x) ((x) / 1000) - -#define I2C_BUS_3_DIR "/sys/class/i2c-adapter/i2c-3/" -#define I2C_BUS_4_DIR "/sys/class/i2c-adapter/i2c-4/" - -#define INTAKE_TEMP_DEVICE I2C_BUS_3_DIR "3-0048" -#define T2_TEMP_DEVICE I2C_BUS_3_DIR "3-0049" -#define EXHAUST_TEMP_DEVICE I2C_BUS_3_DIR "3-004a" -#define USERVER_TEMP_DEVICE I2C_BUS_4_DIR "4-0040" - -/* - * The sensor for the uServer CPU is not on the CPU itself, so it reads - * a little low. We are special casing this, but we should obviously - * be thinking about a way to generalize these tweaks, and perhaps - * the entire configuration. JSON file? - */ - -#define USERVER_TEMP_FUDGE INTERNAL_TEMPS(10) - -#define BAD_TEMP INTERNAL_TEMPS(-60) - -#define BAD_READ_THRESHOLD 4 /* How many times can reads fail */ -#define FAN_FAILURE_THRESHOLD 4 /* How many times can a fan fail */ - -#define PWM_DIR "/sys/devices/platform/ast_pwm_tacho.0" - -#define PWM_UNIT_MAX 96 - -#define LARGEST_DEVICE_NAME 120 - -#define GPIO_USERVER_POWER_DIRECTION "/sys/class/gpio/gpio25/direction" -#define GPIO_USERVER_POWER "/sys/class/gpio/gpio25/value" -#define GPIO_T2_POWER_DIRECTION "/sys/class/gpio/gpio41/direction" -#define GPIO_T2_POWER "/sys/class/gpio/gpio41/value" - -#define GPIO_FAN0_LED "/sys/class/gpio/gpio53/value" -#define GPIO_FAN1_LED "/sys/class/gpio/gpio54/value" -#define GPIO_FAN2_LED "/sys/class/gpio/gpio55/value" -#define GPIO_FAN3_LED "/sys/class/gpio/gpio72/value" - -const char *fan_led[] = {GPIO_FAN0_LED, GPIO_FAN1_LED, - GPIO_FAN2_LED, GPIO_FAN3_LED}; - -#define FAN_LED_RED "0" -#define FAN_LED_BLUE "1" - -#define GPIO_PIN_ID "/sys/class/gpio/gpio%d/value" -#define REV_IDS 3 -#define GPIO_REV_ID_START 192 - -#define BOARD_IDS 4 -#define GPIO_BOARD_ID_START 160 - -/* - * With hardware revisions after 3, we use a different set of pins for - * the BOARD_ID. - */ - -#define REV_ID_NEW_BOARD_ID 3 -#define GPIO_BOARD_ID_START_NEW 166 - -#define REPORT_TEMP 720 /* Report temp every so many cycles */ - -/* Sensor limits and tuning parameters */ - -#define INTAKE_LIMIT INTERNAL_TEMPS(60) -#define T2_LIMIT INTERNAL_TEMPS(95) -#define USERVER_LIMIT INTERNAL_TEMPS(75) - -#define TEMP_TOP INTERNAL_TEMPS(70) -#define TEMP_BOTTOM INTERNAL_TEMPS(40) - -/* - * Toggling the fan constantly will wear it out (and annoy anyone who - * can hear it), so we'll only turn down the fan after the temperature - * has dipped a bit below the point at which we'd otherwise switch - * things up. - */ - -#define COOLDOWN_SLOP INTERNAL_TEMPS(6) - -#define FAN_LOW 35 -#define FAN_MEDIUM 50 -#define FAN_HIGH 70 -#define FAN_MAX 99 - -/* - * Mapping physical to hardware addresses for fans; it's different for - * RPM measuring and PWM setting, naturally. Doh. - */ - -int fan_to_rpm_map[] = {3, 2, 0, 1}; -int fan_to_pwm_map[] = {7, 6, 0, 1}; - -#define FANS 4 - -/* - * The measured RPM of the fans doesn't match linearly to the requested - * rate. In addition, there are coaxially mounted fans, so the rear fans - * feed into the front fans. The rear fans will run slower since they're - * grabbing still air, and the front fants are getting an extra boost. - * - * We'd like to measure the fan RPM and compare it to the expected RPM - * so that we can detect failed fans, so we have a table (derived from - * hardware testing): - */ - -struct rpm_to_pct_map { - ushort pct; - ushort rpm; -}; - -struct rpm_to_pct_map rpm_front_map[] = {{30, 6150}, - {35, 7208}, - {40, 8195}, - {45, 9133}, - {50, 10017}, - {55, 10847}, - {60, 11612}, - {65, 12342}, - {70, 13057}, - {75, 13717}, - {80, 14305}, - {85, 14869}, - {90, 15384}, - {95, 15871}, - {100, 16095}}; - -struct rpm_to_pct_map rpm_rear_map[] = {{30, 3911}, - {35, 4760}, - {40, 5587}, - {45, 6434}, - {50, 7295}, - {55, 8187}, - {60, 9093}, - {65, 10008}, - {70, 10949}, - {75, 11883}, - {80, 12822}, - {85, 13726}, - {90, 14690}, - {95, 15516}, - {100, 15897}}; - -#define FAN_FAILURE_OFFSET 30 - -int fan_low = FAN_LOW; -int fan_medium = FAN_MEDIUM; -int fan_high = FAN_HIGH; -int fan_max = FAN_MAX; -int total_fans = FANS; -int fan_offset = 0; - -int temp_bottom = TEMP_BOTTOM; -int temp_top = TEMP_TOP; - -int report_temp = REPORT_TEMP; -bool verbose = false; - -void usage() { - fprintf(stderr, - "fand [-v] [-l <low-pct>] [-m <medium-pct>] " - "[-h <high-pct>]\n" - "\t[-b <temp-bottom>] [-t <temp-top>] [-r <report-temp>]\n\n" - "\tlow-pct defaults to %d%% fan\n" - "\tmedium-pct defaults to %d%% fan\n" - "\thigh-pct defaults to %d%% fan\n" - "\ttemp-bottom defaults to %dC\n" - "\ttemp-top defaults to %dC\n" - "\treport-temp defaults to every %d measurements\n\n" - "fand compensates for uServer temperature reading %d degrees low\n" - "kill with SIGUSR1 to stop watchdog\n", - fan_low, - fan_medium, - fan_high, - EXTERNAL_TEMPS(temp_bottom), - EXTERNAL_TEMPS(temp_top), - report_temp, - EXTERNAL_TEMPS(USERVER_TEMP_FUDGE)); - exit(1); -} - -/* We need to open the device each time to read a value */ - -int read_device(const char *device, int *value) { - FILE *fp; - int rc; - - fp = fopen(device, "r"); - if (!fp) { - int err = errno; - - syslog(LOG_INFO, "failed to open device %s", device); - return err; - } - - rc = fscanf(fp, "%d", value); - fclose(fp); - - if (rc != 1) { - syslog(LOG_INFO, "failed to read device %s", device); - return ENOENT; - } else { - return 0; - } -} - -/* We need to open the device again each time to write a value */ - -int write_device(const char *device, const char *value) { - FILE *fp; - int rc; - - fp = fopen(device, "w"); - if (!fp) { - int err = errno; - - syslog(LOG_INFO, "failed to open device for write %s", device); - return err; - } - - rc = fputs(value, fp); - fclose(fp); - - if (rc < 0) { - syslog(LOG_INFO, "failed to write device %s", device); - return ENOENT; - } else { - return 0; - } -} - -int read_temp(const char *device, int *value) { - char full_name[LARGEST_DEVICE_NAME + 1]; - - /* We set an impossible value to check for errors */ - *value = BAD_TEMP; - snprintf( - full_name, LARGEST_DEVICE_NAME, "%s/temp1_input", device); - return read_device(full_name, value); -} - -int read_gpio_value(const int id, const char *device, int *value) { - char full_name[LARGEST_DEVICE_NAME]; - - snprintf(full_name, LARGEST_DEVICE_NAME, device, id); - return read_device(full_name, value); -} - -int read_gpio_values(const int start, const int count, - const char *device, int *result) { - int status = 0; - int value; - - *result = 0; - for (int i = 0; i < count; i++) { - status |= read_gpio_value(start + i, GPIO_PIN_ID, &value); - *result |= value << i; - } - return status; -} - -int read_ids(int *rev_id, int *board_id) { - int status = 0; - int value; - - status = read_gpio_values(GPIO_REV_ID_START, REV_IDS, GPIO_PIN_ID, rev_id); - if (status != 0) { - syslog(LOG_INFO, "failed to read rev_id"); - return status; - } - - int board_id_start; - if (*rev_id >= REV_ID_NEW_BOARD_ID) { - board_id_start = GPIO_BOARD_ID_START_NEW; - } else { - board_id_start = GPIO_BOARD_ID_START; - } - - status = read_gpio_values(board_id_start, BOARD_IDS, GPIO_PIN_ID, board_id); - if (status != 0) { - syslog(LOG_INFO, "failed to read board_id"); - } - return status; -} - -bool is_two_fan_board(bool verbose) { - struct wedge_eeprom_st eeprom; - /* Retrieve the board type from EEPROM */ - if (wedge_eeprom_parse(NULL, &eeprom) == 0) { - /* able to parse EEPROM */ - if (verbose) { - syslog(LOG_INFO, "board type is %s", eeprom.fbw_location); - } - /* only WEDGE is NOT two-fan board */ - return strncasecmp(eeprom.fbw_location, "wedge", - sizeof(eeprom.fbw_location)); - } else { - int status; - int board_id = 0; - int rev_id = 0; - /* - * Could not parse EEPROM. Most likely, it is an old HW without EEPROM. - * In this case, use board ID to distinguish if it is wedge or 6-pack. - */ - status = read_ids(&rev_id, &board_id); - if (verbose) { - syslog(LOG_INFO, "rev ID %d, board id %d", rev_id, board_id); - } - if (status == 0 && board_id != 0xf) { - return true; - } else { - return false; - } - } -} - -int read_fan_value(const int fan, const char *device, int *value) { - char device_name[LARGEST_DEVICE_NAME]; - char output_value[LARGEST_DEVICE_NAME]; - char full_name[LARGEST_DEVICE_NAME]; - - snprintf(device_name, LARGEST_DEVICE_NAME, device, fan); - snprintf(full_name, LARGEST_DEVICE_NAME, "%s/%s", PWM_DIR, device_name); - return read_device(full_name, value); -} - -int write_fan_value(const int fan, const char *device, const int value) { - char full_name[LARGEST_DEVICE_NAME]; - char device_name[LARGEST_DEVICE_NAME]; - char output_value[LARGEST_DEVICE_NAME]; - - snprintf(device_name, LARGEST_DEVICE_NAME, device, fan); - snprintf(full_name, LARGEST_DEVICE_NAME, "%s/%s", PWM_DIR, device_name); - snprintf(output_value, LARGEST_DEVICE_NAME, "%d", value); - return write_device(full_name, output_value); -} - -/* Return fan speed as a percentage of maximum -- not necessarily linear. */ - -int fan_rpm_to_pct(const struct rpm_to_pct_map *table, - const int table_len, - int rpm) { - int i; - - for (i = 0; i < table_len; i++) { - if (table[i].rpm > rpm) { - break; - } - } - - /* - * If the fan RPM is lower than the lowest value in the table, - * we may have a problem -- fans can only go so slow, and it might - * have stopped. In this case, we'll return an interpolated - * percentage, as just returning zero is even more problematic. - */ - - if (i == 0) { - return (rpm * table[i].pct) / table[i].rpm; - } else if (i == table_len) { // Fell off the top? - return table[i - 1].pct; - } - - // Interpolate the right percentage value: - - int percent_diff = table[i].pct - table[i - 1].pct; - int rpm_diff = table[i].rpm - table[i - 1].rpm; - int fan_diff = table[i].rpm - rpm; - - return table[i].pct - (fan_diff * percent_diff / rpm_diff); -} - -int fan_speed_okay(const int fan, const int speed, const int slop) { - int front_fan, rear_fan; - int front_pct, rear_pct; - int real_fan; - int okay; - - /* - * The hardware fan numbers are different from the physical order - * in the box, so we have to map them: - */ - - real_fan = fan_to_rpm_map[fan]; - - front_fan = 0; - rear_fan = 0; - read_fan_value(real_fan, "tacho%d_rpm", &front_fan); - read_fan_value(real_fan + 4, "tacho%d_rpm", &rear_fan); - - front_pct = - fan_rpm_to_pct(rpm_front_map, - sizeof(rpm_front_map) / sizeof(struct rpm_to_pct_map), - front_fan); - rear_pct = - fan_rpm_to_pct(rpm_rear_map, - sizeof(rpm_rear_map) / sizeof(struct rpm_to_pct_map), - rear_fan); - - /* - * If the fans are broken, the measured rate will be rather - * different from the requested rate, and we can turn up the - * rest of the fans to compensate. The slop is the percentage - * of error that we'll tolerate. - * - * XXX: I suppose that we should only measure negative values; - * running too fast isn't really a problem. - */ - - okay = (abs(front_pct - speed) * 100 / speed < slop && - abs(rear_pct - speed) * 100 / speed < slop); - - if (!okay || verbose) { - syslog(!okay ? LOG_ALERT : LOG_INFO, - "fan %d rear %d (%d%%), front %d (%d%%), expected %d", - fan, - rear_fan, - rear_pct, - front_fan, - front_pct, - speed); - } - - return okay; -} - -/* Set fan speed as a percentage */ - -int write_fan_speed(const int fan, const int value) { - /* - * The hardware fan numbers for pwm control are different from - * both the physical order in the box, and the mapping for reading - * the RPMs per fan, above. - */ - - int real_fan = fan_to_pwm_map[fan]; - - if (value == 0) { - return write_fan_value(real_fan, "pwm%d_en", 0); - } else { - int unit = value * PWM_UNIT_MAX / 100; - int status; - - if (unit == PWM_UNIT_MAX) - unit = 0; - - if ((status = write_fan_value(real_fan, "pwm%d_type", 0)) != 0 || - (status = write_fan_value(real_fan, "pwm%d_rising", 0)) != 0 || - (status = write_fan_value(real_fan, "pwm%d_falling", unit)) != 0 || - (status = write_fan_value(real_fan, "pwm%d_en", 1)) != 0) { - return status; - } - } -} - -/* Set up fan LEDs */ - -int write_fan_led(const int fan, const char *color) -{ - return write_device(fan_led[fan], color); -} - -int server_shutdown(const char *why) { - int fan; - for (fan = 0; fan < total_fans; fan++) { - write_fan_speed(fan + fan_offset, fan_max); - } - - syslog(LOG_EMERG, "Shutting down: %s", why); - write_device(GPIO_USERVER_POWER_DIRECTION, "out"); - write_device(GPIO_USERVER_POWER, "0"); - /* - * Putting T2 in reset generating a non-maskable interrupt to uS, - * the kernel running on uS might panic depending on its version. - * sleep 5s here to make sure uS is completely down. - */ - sleep(5); - write_device(GPIO_T2_POWER_DIRECTION, "out"); - write_device(GPIO_T2_POWER, "0"); - - /* - * We have to stop the watchdog, or the system will be automatically - * rebooted some seconds after fand exits (and stops kicking the - * watchdog). - */ - - stop_watchdog(); - - sleep(2); - exit(2); -} - -/* Gracefully shut down on receipt of a signal */ - -void fand_interrupt(int sig) -{ - int fan; - for (fan = 0; fan < total_fans; fan++) { - write_fan_speed(fan + fan_offset, fan_max); - } - - syslog(LOG_ALERT, "Shutting down fand on signal %s", strsignal(sig)); - if (sig == SIGUSR1) { - stop_watchdog(); - } - exit(3); -} - -int main(int argc, char **argv) { - /* Sensor values */ - - int intake_temp; - int exhaust_temp; - int t2_temp; - int userver_temp; - - int fan_speed = FAN_HIGH; - int bad_reads = 0; - int fan_failure = 0; - int fan_speed_changes = 0; - int old_speed; - - int fan_bad[FANS]; - int fan; - - unsigned log_count = 0; // How many times have we logged our temps? - int opt; - int prev_fans_bad = 0; - - struct sigaction sa; - - sa.sa_handler = fand_interrupt; - sa.sa_flags = 0; - sigemptyset(&sa.sa_mask); - - sigaction(SIGTERM, &sa, NULL); - sigaction(SIGINT, &sa, NULL); - sigaction(SIGUSR1, &sa, NULL); - - while ((opt = getopt(argc, argv, "l:m:h:b:t:r:v")) != -1) { - switch (opt) { - case 'l': - fan_low = atoi(optarg); - break; - case 'm': - fan_medium = atoi(optarg); - break; - case 'h': - fan_high = atoi(optarg); - break; - case 'b': - temp_bottom = INTERNAL_TEMPS(atoi(optarg)); - break; - case 't': - temp_top = INTERNAL_TEMPS(atoi(optarg)); - break; - case 'r': - report_temp = atoi(optarg); - break; - case 'v': - verbose = true; - break; - default: - usage(); - break; - } - } - - if (optind > argc) { - usage(); - } - - if (temp_bottom > temp_top) { - fprintf(stderr, - "Should temp-bottom (%d) be higher than " - "temp-top (%d)? Starting anyway.\n", - EXTERNAL_TEMPS(temp_bottom), - EXTERNAL_TEMPS(temp_top)); - } - - if (fan_low > fan_medium || fan_low > fan_high || fan_medium > fan_high) { - fprintf(stderr, - "fan RPMs not strictly increasing " - "-- %d, %d, %d, starting anyway\n", - fan_low, - fan_medium, - fan_high); - } - - daemon(1, 0); - openlog("fand", LOG_CONS, LOG_DAEMON); - - /* Start watchdog in manual mode */ - start_watchdog(0); - - /* Set watchdog to persistent mode so timer expiry will happen independent - * of this process's liveliness. */ - set_persistent_watchdog(WATCHDOG_SET_PERSISTENT); - - if (is_two_fan_board(verbose)) { - /* Alternate, two fan configuration */ - total_fans = 2; - fan_offset = 2; /* fan 3 is the first */ - } - - for (fan = 0; fan < total_fans; fan++) { - fan_bad[fan] = 0; - write_fan_speed(fan + fan_offset, fan_speed); - write_fan_led(fan + fan_offset, FAN_LED_BLUE); - } - - sleep(5); /* Give the fans time to come up to speed */ - - while (1) { - int max_temp; - old_speed = fan_speed; - - /* Read sensors */ - - read_temp(INTAKE_TEMP_DEVICE, &intake_temp); - read_temp(EXHAUST_TEMP_DEVICE, &exhaust_temp); - read_temp(T2_TEMP_DEVICE, &t2_temp); - read_temp(USERVER_TEMP_DEVICE, &userver_temp); - - /* - * uServer can be powered down, but all of the rest of the sensors - * should be readable at any time. - */ - - if ((intake_temp == BAD_TEMP || exhaust_temp == BAD_TEMP || - t2_temp == BAD_TEMP)) { - bad_reads++; - } - - if (bad_reads > BAD_READ_THRESHOLD) { - server_shutdown("Some sensors couldn't be read"); - } - - if (log_count++ % report_temp == 0) { - syslog(LOG_DEBUG, - "Temp intake %d, t2 %d, userver %d, exhaust %d, " - "fan speed %d, speed changes %d", - intake_temp, - t2_temp, - userver_temp, - exhaust_temp, - fan_speed, - fan_speed_changes); - } - - /* Protection heuristics */ - - if (intake_temp > INTAKE_LIMIT) { - server_shutdown("Intake temp limit reached"); - } - - if (t2_temp > T2_LIMIT) { - server_shutdown("T2 temp limit reached"); - } - - if (userver_temp + USERVER_TEMP_FUDGE > USERVER_LIMIT) { - server_shutdown("uServer temp limit reached"); - } - - /* - * Calculate change needed -- we should eventually - * do something more sophisticated, like PID. - * - * We should use the intake temperature to adjust this - * as well. - */ - - if (t2_temp > userver_temp + USERVER_TEMP_FUDGE) { - max_temp = t2_temp; - } else { - max_temp = userver_temp + USERVER_TEMP_FUDGE; - } - - /* - * If recovering from a fan problem, spin down fans gradually in case - * temperatures are still high. Gradual spin down also reduces wear on - * the fans. - */ - if (fan_speed == fan_max) { - if (fan_failure == 0) { - fan_speed = fan_high; - } - } else if (fan_speed == fan_high) { - if (max_temp + COOLDOWN_SLOP < temp_top) { - fan_speed = fan_medium; - } - } else if (fan_speed == fan_medium) { - if (max_temp > temp_top) { - fan_speed = fan_high; - } else if (max_temp + COOLDOWN_SLOP < temp_bottom) { - fan_speed = fan_low; - } - } else {/* low */ - if (max_temp > temp_bottom) { - fan_speed = fan_medium; - } - } - - /* - * Update fans only if there are no failed ones. If any fans failed - * earlier, all remaining fans should continue to run at max speed. - */ - - if (fan_failure == 0 && fan_speed != old_speed) { - syslog(LOG_NOTICE, - "Fan speed changing from %d to %d", - old_speed, - fan_speed); - fan_speed_changes++; - for (fan = 0; fan < total_fans; fan++) { - write_fan_speed(fan + fan_offset, fan_speed); - } - } - - /* - * Wait for some change. Typical I2C temperature sensors - * only provide a new value every second and a half, so - * checking again more quickly than that is a waste. - * - * We also have to wait for the fan changes to take effect - * before measuring them. - */ - - sleep(5); - - /* Check fan RPMs */ - - for (fan = 0; fan < total_fans; fan++) { - /* - * Make sure that we're within some percentage - * of the requested speed. - */ - if (fan_speed_okay(fan + fan_offset, fan_speed, FAN_FAILURE_OFFSET)) { - if (fan_bad[fan] > FAN_FAILURE_THRESHOLD) { - write_fan_led(fan + fan_offset, FAN_LED_BLUE); - syslog(LOG_NOTICE, - "Fan %d has recovered", - fan); - } - fan_bad[fan] = 0; - } else { - fan_bad[fan]++; - } - } - - fan_failure = 0; - for (fan = 0; fan < total_fans; fan++) { - if (fan_bad[fan] > FAN_FAILURE_THRESHOLD) { - fan_failure++; - write_fan_led(fan + fan_offset, FAN_LED_RED); - } - } - - if (fan_failure > 0) { - if (prev_fans_bad != fan_failure) { - syslog(LOG_ALERT, "%d fans failed", fan_failure); - } - - /* - * If fans are bad, we need to blast all of the - * fans at 100%; we don't bother to turn off - * the bad fans, in case they are all that is left. - * - * Note that we have a temporary bug with setting fans to - * 100% so we only do fan_max = 99%. - */ - - fan_speed = fan_max; - for (fan = 0; fan < total_fans; fan++) { - write_fan_speed(fan + fan_offset, fan_speed); - } - - /* - * Fans can be hot swapped and replaced; in which case the fan daemon - * will automatically detect the new fan and (assuming the new fan isn't - * itself faulty), automatically readjust the speeds for all fans down - * to a more suitable rpm. The fan daemon does not need to be restarted. - */ - } - - /* Suppress multiple warnings for similar number of fan failures. */ - prev_fans_bad = fan_failure; - - /* if everything is fine, restart the watchdog countdown. If this process - * is terminated, the persistent watchdog setting will cause the system - * to reboot after the watchdog timeout. */ - kick_watchdog(); - } -} |