summaryrefslogtreecommitdiffstats
path: root/sbin/hastd
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2011-09-28 13:08:51 +0000
committerpjd <pjd@FreeBSD.org>2011-09-28 13:08:51 +0000
commit374501b4952a809fac64c99d244ca1236ae5d5ca (patch)
tree6a9d1cc1d75b048ee6b4589e34493d484a6f71ab /sbin/hastd
parent444642eb38a412e2dc6dc790a370f2c6ea209ebf (diff)
downloadFreeBSD-src-374501b4952a809fac64c99d244ca1236ae5d5ca.zip
FreeBSD-src-374501b4952a809fac64c99d244ca1236ae5d5ca.tar.gz
After every activemap change flush disk's write cache, so that write
reordering won't make the actual write to be committed before marking the coresponding extent as dirty. It can be disabled in configuration file. If BIO_FLUSH is not supported by the underlying file system we log a warning and never send BIO_FLUSH again to that GEOM provider. MFC after: 3 days
Diffstat (limited to 'sbin/hastd')
-rw-r--r--sbin/hastd/hast.conf.523
-rw-r--r--sbin/hastd/hast.h2
-rw-r--r--sbin/hastd/hastd.c21
-rw-r--r--sbin/hastd/parse.y58
-rw-r--r--sbin/hastd/primary.c19
-rw-r--r--sbin/hastd/token.l2
6 files changed, 118 insertions, 7 deletions
diff --git a/sbin/hastd/hast.conf.5 b/sbin/hastd/hast.conf.5
index f53e6e3..6410ae7 100644
--- a/sbin/hastd/hast.conf.5
+++ b/sbin/hastd/hast.conf.5
@@ -63,6 +63,7 @@ checksum <algorithm>
compression <algorithm>
timeout <seconds>
exec <path>
+metaflush "on" | "off"
on <node> {
# Node section
@@ -85,12 +86,14 @@ resource <name> {
local <path>
timeout <seconds>
exec <path>
+ metaflush "on" | "off"
on <node> {
# Resource-node section
name <name>
# Required
local <path>
+ metaflush "on" | "off"
# Required
remote <addr>
source <addr>
@@ -100,6 +103,7 @@ resource <name> {
name <name>
# Required
local <path>
+ metaflush "on" | "off"
# Required
remote <addr>
source <addr>
@@ -318,6 +322,25 @@ It can be one of:
.Ar secondary ,
.Ar primary .
.Pp
+.It Ic metaflush on | off
+.Pp
+When set to
+.Va on ,
+flush write cache of the local provider after every metadata (activemap) update.
+Flushing write cache ensures that provider will not reorder writes and that
+metadata will be properly updated before real data is stored.
+If the local provider does not support flushing write cache (it returns
+.Er EOPNOTSUPP
+on the
+.Cm BIO_FLUSH
+request),
+.Nm hastd
+will disable
+.Ic metaflush
+automatically.
+The default value is
+.Va on .
+.Pp
.It Ic name Aq name
.Pp
GEOM provider name that will appear as
diff --git a/sbin/hastd/hast.h b/sbin/hastd/hast.h
index a62b63a..7bfef4c 100644
--- a/sbin/hastd/hast.h
+++ b/sbin/hastd/hast.h
@@ -167,6 +167,8 @@ struct hast_resource {
off_t hr_local_mediasize;
/* Sector size of local provider. */
unsigned int hr_local_sectorsize;
+ /* Flush write cache on metadata updates? */
+ int hr_metaflush;
/* Descriptor for /dev/ggctl communication. */
int hr_ggatefd;
diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c
index 6518f0c..d21f7f6 100644
--- a/sbin/hastd/hastd.c
+++ b/sbin/hastd/hastd.c
@@ -386,6 +386,12 @@ resource_needs_restart(const struct hast_resource *res0,
return (true);
if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
return (true);
+ /*
+ * When metaflush has changed we don't really need restart,
+ * but it is just easier this way.
+ */
+ if (res0->hr_metaflush != res1->hr_metaflush)
+ return (true);
}
return (false);
}
@@ -416,6 +422,8 @@ resource_needs_reload(const struct hast_resource *res0,
return (true);
if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
return (true);
+ if (res0->hr_metaflush != res1->hr_metaflush)
+ return (true);
return (false);
}
@@ -436,6 +444,7 @@ resource_reload(const struct hast_resource *res)
nv_add_int32(nvout, (int32_t)res->hr_compression, "compression");
nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout");
nv_add_string(nvout, res->hr_exec, "exec");
+ nv_add_int32(nvout, (int32_t)res->hr_metaflush, "metaflush");
if (nv_error(nvout) != 0) {
nv_free(nvout);
pjdlog_error("Unable to allocate header for reload message.");
@@ -591,12 +600,13 @@ hastd_reload(void)
* recreating it.
*
* We do just reload (send SIGHUP to worker process) if we act as
- * PRIMARY, but only if remote address, replication mode, timeout or
- * execution path has changed. For those, there is no need to restart
- * worker process.
+ * PRIMARY, but only if remote address, source address, replication
+ * mode, timeout, execution path or metaflush has changed.
+ * For those, there is no need to restart worker process.
* If PRIMARY receives SIGHUP, it will reconnect if remote address or
- * replication mode has changed or simply set new timeout if only
- * timeout has changed.
+ * source address has changed or it will set new timeout if only timeout
+ * has changed or it will update metaflush if only metaflush has
+ * changed.
*/
TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
@@ -627,6 +637,7 @@ hastd_reload(void)
cres->hr_timeout = nres->hr_timeout;
strlcpy(cres->hr_exec, nres->hr_exec,
sizeof(cres->hr_exec));
+ cres->hr_metaflush = nres->hr_metaflush;
if (cres->hr_workerpid != 0)
resource_reload(cres);
}
diff --git a/sbin/hastd/parse.y b/sbin/hastd/parse.y
index 01e8593a..e548a85 100644
--- a/sbin/hastd/parse.y
+++ b/sbin/hastd/parse.y
@@ -68,9 +68,11 @@ static int depth0_checksum;
static int depth0_compression;
static int depth0_timeout;
static char depth0_exec[PATH_MAX];
+static int depth0_metaflush;
static char depth1_provname[PATH_MAX];
static char depth1_localpath[PATH_MAX];
+static int depth1_metaflush;
extern void yyrestart(FILE *);
@@ -197,6 +199,7 @@ yy_config_parse(const char *config, bool exitonerror)
strlcpy(depth0_listen_tcp6, HASTD_LISTEN_TCP6,
sizeof(depth0_listen_tcp6));
depth0_exec[0] = '\0';
+ depth0_metaflush = 1;
lconfig = calloc(1, sizeof(*lconfig));
if (lconfig == NULL) {
@@ -328,6 +331,13 @@ yy_config_parse(const char *config, bool exitonerror)
strlcpy(curres->hr_exec, depth0_exec,
sizeof(curres->hr_exec));
}
+ if (curres->hr_metaflush == -1) {
+ /*
+ * Metaflush is not set at resource-level.
+ * Use global or default setting.
+ */
+ curres->hr_metaflush = depth0_metaflush;
+ }
}
return (lconfig);
@@ -355,8 +365,8 @@ yy_config_free(struct hastd_config *config)
}
%}
-%token CONTROL LISTEN PORT REPLICATION CHECKSUM COMPRESSION
-%token TIMEOUT EXEC EXTENTSIZE RESOURCE NAME LOCAL REMOTE SOURCE ON
+%token CONTROL LISTEN PORT REPLICATION CHECKSUM COMPRESSION METAFLUSH
+%token TIMEOUT EXEC EXTENTSIZE RESOURCE NAME LOCAL REMOTE SOURCE ON OFF
%token FULLSYNC MEMSYNC ASYNC NONE CRC32 SHA256 HOLE LZF
%token NUM STR OB CB
@@ -364,6 +374,7 @@ yy_config_free(struct hastd_config *config)
%type <num> replication_type
%type <num> checksum_type
%type <num> compression_type
+%type <num> boolean
%union
{
@@ -396,6 +407,8 @@ statement:
|
exec_statement
|
+ metaflush_statement
+ |
node_statement
|
resource_statement
@@ -585,6 +598,34 @@ exec_statement: EXEC STR
}
;
+metaflush_statement: METAFLUSH boolean
+ {
+ switch (depth) {
+ case 0:
+ depth0_metaflush = $2;
+ break;
+ case 1:
+ PJDLOG_ASSERT(curres != NULL);
+ depth1_metaflush = $2;
+ break;
+ case 2:
+ if (!mynode)
+ break;
+ PJDLOG_ASSERT(curres != NULL);
+ curres->hr_metaflush = $2;
+ break;
+ default:
+ PJDLOG_ABORT("metaflush at wrong depth level");
+ }
+ }
+ ;
+
+boolean:
+ ON { $$ = 1; }
+ |
+ OFF { $$ = 0; }
+ ;
+
node_statement: ON node_start OB node_entries CB
{
mynode = false;
@@ -660,6 +701,13 @@ resource_statement: RESOURCE resource_start OB resource_entries CB
strlcpy(curres->hr_localpath, depth1_localpath,
sizeof(curres->hr_localpath));
}
+ if (curres->hr_metaflush == -1 && depth1_metaflush != -1) {
+ /*
+ * Metaflush is not set at node-level,
+ * but is set at resource-level, use it.
+ */
+ curres->hr_metaflush = depth1_metaflush;
+ }
/*
* If provider name is not given, use resource name
@@ -713,6 +761,7 @@ resource_start: STR
*/
depth1_provname[0] = '\0';
depth1_localpath[0] = '\0';
+ depth1_metaflush = -1;
hadmynode = false;
curres = calloc(1, sizeof(*curres));
@@ -739,6 +788,7 @@ resource_start: STR
curres->hr_provname[0] = '\0';
curres->hr_localpath[0] = '\0';
curres->hr_localfd = -1;
+ curres->hr_metaflush = -1;
curres->hr_remoteaddr[0] = '\0';
curres->hr_sourceaddr[0] = '\0';
curres->hr_ggateunit = -1;
@@ -761,6 +811,8 @@ resource_entry:
|
exec_statement
|
+ metaflush_statement
+ |
name_statement
|
local_statement
@@ -869,6 +921,8 @@ resource_node_entry:
remote_statement
|
source_statement
+ |
+ metaflush_statement
;
remote_statement: REMOTE remote_str
diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c
index ebb758e..08c3329 100644
--- a/sbin/hastd/primary.c
+++ b/sbin/hastd/primary.c
@@ -296,6 +296,17 @@ hast_activemap_flush(struct hast_resource *res)
pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk");
return (-1);
}
+ if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) {
+ if (errno == EOPNOTSUPP) {
+ pjdlog_warning("The %s provider doesn't support flushing write cache. Disabling it.",
+ res->hr_localpath);
+ res->hr_metaflush = 0;
+ } else {
+ pjdlog_errno(LOG_ERR,
+ "Unable to flush disk cache on activemap update");
+ return (-1);
+ }
+ }
return (0);
}
@@ -1999,6 +2010,7 @@ primary_config_reload(struct hast_resource *res, struct nv *nv)
nv_assert(nv, "compression");
nv_assert(nv, "timeout");
nv_assert(nv, "exec");
+ nv_assert(nv, "metaflush");
ncomps = HAST_NCOMPONENTS;
@@ -2009,6 +2021,7 @@ primary_config_reload(struct hast_resource *res, struct nv *nv)
#define MODIFIED_COMPRESSION 0x10
#define MODIFIED_TIMEOUT 0x20
#define MODIFIED_EXEC 0x40
+#define MODIFIED_METAFLUSH 0x80
modified = 0;
vstr = nv_get_string(nv, "remoteaddr");
@@ -2050,6 +2063,11 @@ primary_config_reload(struct hast_resource *res, struct nv *nv)
strlcpy(gres->hr_exec, vstr, sizeof(gres->hr_exec));
modified |= MODIFIED_EXEC;
}
+ vint = nv_get_int32(nv, "metaflush");
+ if (gres->hr_metaflush != vint) {
+ gres->hr_metaflush = vint;
+ modified |= MODIFIED_METAFLUSH;
+ }
/*
* Change timeout for connected sockets.
@@ -2099,6 +2117,7 @@ primary_config_reload(struct hast_resource *res, struct nv *nv)
#undef MODIFIED_COMPRESSION
#undef MODIFIED_TIMEOUT
#undef MODIFIED_EXEC
+#undef MODIFIED_METAFLUSH
pjdlog_info("Configuration reloaded successfully.");
}
diff --git a/sbin/hastd/token.l b/sbin/hastd/token.l
index 67c1e13..3a868d7 100644
--- a/sbin/hastd/token.l
+++ b/sbin/hastd/token.l
@@ -53,12 +53,14 @@ checksum { DP; return CHECKSUM; }
compression { DP; return COMPRESSION; }
timeout { DP; return TIMEOUT; }
exec { DP; return EXEC; }
+metaflush { DP; return METAFLUSH; }
resource { DP; return RESOURCE; }
name { DP; return NAME; }
local { DP; return LOCAL; }
remote { DP; return REMOTE; }
source { DP; return SOURCE; }
on { DP; return ON; }
+off { DP; return OFF; }
fullsync { DP; return FULLSYNC; }
memsync { DP; return MEMSYNC; }
async { DP; return ASYNC; }
OpenPOWER on IntegriCloud