diff options
author | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-12 10:21:01 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-12 10:21:01 -0800 |
commit | 741441ab7800f1eb031e74fd720f4f8f361678ed (patch) | |
tree | cd265afa96c3753116f570e483408ed8a94fe1d7 | |
parent | 659dba34807692a6ebd55e7859dff2c7cb1b005d (diff) | |
parent | 828ae6afbef03bfe107a4a8cc38798419d6a2765 (diff) | |
download | op-kernel-dev-741441ab7800f1eb031e74fd720f4f8f361678ed.zip op-kernel-dev-741441ab7800f1eb031e74fd720f4f8f361678ed.tar.gz |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2:
[patch 3/3] OCFS2 Configurable timeouts - Protocol changes
[patch 2/3] OCFS2 Configurable timeouts
[patch 1/3] OCFS2 - Expose struct o2nm_cluster
ocfs2: Synchronize feature incompat flags in ocfs2_fs.h
ocfs2: update mount option documentation
ocfs2: local mounts
-rw-r--r-- | Documentation/filesystems/ocfs2.txt | 3 | ||||
-rw-r--r-- | fs/ocfs2/cluster/nodemanager.c | 192 | ||||
-rw-r--r-- | fs/ocfs2/cluster/nodemanager.h | 17 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 152 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.h | 8 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 15 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 79 | ||||
-rw-r--r-- | fs/ocfs2/heartbeat.c | 9 | ||||
-rw-r--r-- | fs/ocfs2/inode.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 46 | ||||
-rw-r--r-- | fs/ocfs2/journal.h | 5 | ||||
-rw-r--r-- | fs/ocfs2/mmap.c | 6 | ||||
-rw-r--r-- | fs/ocfs2/namei.c | 8 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 5 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 14 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 90 | ||||
-rw-r--r-- | fs/ocfs2/vote.c | 3 |
17 files changed, 549 insertions, 106 deletions
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index af6defd..8ccf0c1 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -54,3 +54,6 @@ errors=panic Panic and halt the machine if an error occurs. intr (*) Allow signals to interrupt cluster operations. nointr Do not allow signals to interrupt cluster operations. +atime_quantum=60(*) OCFS2 will not update atime unless this number + of seconds has passed since the last update. + Set to zero to always update atime. diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index d11753c..357f1d5 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -35,7 +35,7 @@ /* for now we operate under the assertion that there can be only one * cluster active at a time. Changing this will require trickling * cluster references throughout where nodes are looked up */ -static struct o2nm_cluster *o2nm_single_cluster = NULL; +struct o2nm_cluster *o2nm_single_cluster = NULL; #define OCFS2_MAX_HB_CTL_PATH 256 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; @@ -97,17 +97,6 @@ const char *o2nm_get_hb_ctl_path(void) } EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); -struct o2nm_cluster { - struct config_group cl_group; - unsigned cl_has_local:1; - u8 cl_local_node; - rwlock_t cl_nodes_lock; - struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; - struct rb_root cl_node_ip_tree; - /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ - unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; -}; - struct o2nm_node *o2nm_get_node_by_num(u8 node_num) { struct o2nm_node *node = NULL; @@ -543,6 +532,179 @@ static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group) } #endif +struct o2nm_cluster_attribute { + struct configfs_attribute attr; + ssize_t (*show)(struct o2nm_cluster *, char *); + ssize_t (*store)(struct o2nm_cluster *, const char *, size_t); +}; + +static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, + unsigned int *val) +{ + unsigned long tmp; + char *p = (char *)page; + + tmp = simple_strtoul(p, &p, 0); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp == 0) + return -EINVAL; + if (tmp >= (u32)-1) + return -ERANGE; + + *val = tmp; + + return count; +} + +static ssize_t o2nm_cluster_attr_idle_timeout_ms_read( + struct o2nm_cluster *cluster, char *page) +{ + return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms); +} + +static ssize_t o2nm_cluster_attr_idle_timeout_ms_write( + struct o2nm_cluster *cluster, const char *page, size_t count) +{ + ssize_t ret; + unsigned int val; + + ret = o2nm_cluster_attr_write(page, count, &val); + + if (ret > 0) { + if (cluster->cl_idle_timeout_ms != val + && o2net_num_connected_peers()) { + mlog(ML_NOTICE, + "o2net: cannot change idle timeout after " + "the first peer has agreed to it." + " %d connected peers\n", + o2net_num_connected_peers()); + ret = -EINVAL; + } else if (val <= cluster->cl_keepalive_delay_ms) { + mlog(ML_NOTICE, "o2net: idle timeout must be larger " + "than keepalive delay\n"); + ret = -EINVAL; + } else { + cluster->cl_idle_timeout_ms = val; + } + } + + return ret; +} + +static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read( + struct o2nm_cluster *cluster, char *page) +{ + return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms); +} + +static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write( + struct o2nm_cluster *cluster, const char *page, size_t count) +{ + ssize_t ret; + unsigned int val; + + ret = o2nm_cluster_attr_write(page, count, &val); + + if (ret > 0) { + if (cluster->cl_keepalive_delay_ms != val + && o2net_num_connected_peers()) { + mlog(ML_NOTICE, + "o2net: cannot change keepalive delay after" + " the first peer has agreed to it." + " %d connected peers\n", + o2net_num_connected_peers()); + ret = -EINVAL; + } else if (val >= cluster->cl_idle_timeout_ms) { + mlog(ML_NOTICE, "o2net: keepalive delay must be " + "smaller than idle timeout\n"); + ret = -EINVAL; + } else { + cluster->cl_keepalive_delay_ms = val; + } + } + + return ret; +} + +static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read( + struct o2nm_cluster *cluster, char *page) +{ + return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms); +} + +static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( + struct o2nm_cluster *cluster, const char *page, size_t count) +{ + return o2nm_cluster_attr_write(page, count, + &cluster->cl_reconnect_delay_ms); +} +static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { + .attr = { .ca_owner = THIS_MODULE, + .ca_name = "idle_timeout_ms", + .ca_mode = S_IRUGO | S_IWUSR }, + .show = o2nm_cluster_attr_idle_timeout_ms_read, + .store = o2nm_cluster_attr_idle_timeout_ms_write, +}; + +static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = { + .attr = { .ca_owner = THIS_MODULE, + .ca_name = "keepalive_delay_ms", + .ca_mode = S_IRUGO | S_IWUSR }, + .show = o2nm_cluster_attr_keepalive_delay_ms_read, + .store = o2nm_cluster_attr_keepalive_delay_ms_write, +}; + +static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = { + .attr = { .ca_owner = THIS_MODULE, + .ca_name = "reconnect_delay_ms", + .ca_mode = S_IRUGO | S_IWUSR }, + .show = o2nm_cluster_attr_reconnect_delay_ms_read, + .store = o2nm_cluster_attr_reconnect_delay_ms_write, +}; + +static struct configfs_attribute *o2nm_cluster_attrs[] = { + &o2nm_cluster_attr_idle_timeout_ms.attr, + &o2nm_cluster_attr_keepalive_delay_ms.attr, + &o2nm_cluster_attr_reconnect_delay_ms.attr, + NULL, +}; +static ssize_t o2nm_cluster_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + struct o2nm_cluster *cluster = to_o2nm_cluster(item); + struct o2nm_cluster_attribute *o2nm_cluster_attr = + container_of(attr, struct o2nm_cluster_attribute, attr); + ssize_t ret = 0; + + if (o2nm_cluster_attr->show) + ret = o2nm_cluster_attr->show(cluster, page); + return ret; +} + +static ssize_t o2nm_cluster_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct o2nm_cluster *cluster = to_o2nm_cluster(item); + struct o2nm_cluster_attribute *o2nm_cluster_attr = + container_of(attr, struct o2nm_cluster_attribute, attr); + ssize_t ret; + + if (o2nm_cluster_attr->store == NULL) { + ret = -EINVAL; + goto out; + } + + ret = o2nm_cluster_attr->store(cluster, page, count); + if (ret < count) + goto out; +out: + return ret; +} + static struct config_item *o2nm_node_group_make_item(struct config_group *group, const char *name) { @@ -624,10 +786,13 @@ static void o2nm_cluster_release(struct config_item *item) static struct configfs_item_operations o2nm_cluster_item_ops = { .release = o2nm_cluster_release, + .show_attribute = o2nm_cluster_show, + .store_attribute = o2nm_cluster_store, }; static struct config_item_type o2nm_cluster_type = { .ct_item_ops = &o2nm_cluster_item_ops, + .ct_attrs = o2nm_cluster_attrs, .ct_owner = THIS_MODULE, }; @@ -678,6 +843,9 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g cluster->cl_group.default_groups[2] = NULL; rwlock_init(&cluster->cl_nodes_lock); cluster->cl_node_ip_tree = RB_ROOT; + cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; + cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; + cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; ret = &cluster->cl_group; o2nm_single_cluster = cluster; diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index fce8033..8fb23ca 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h @@ -53,6 +53,23 @@ struct o2nm_node { unsigned long nd_set_attributes; }; +struct o2nm_cluster { + struct config_group cl_group; + unsigned cl_has_local:1; + u8 cl_local_node; + rwlock_t cl_nodes_lock; + struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; + struct rb_root cl_node_ip_tree; + unsigned int cl_idle_timeout_ms; + unsigned int cl_keepalive_delay_ms; + unsigned int cl_reconnect_delay_ms; + + /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ + unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; +}; + +extern struct o2nm_cluster *o2nm_single_cluster; + u8 o2nm_this_node(void); int o2nm_configured_node_map(unsigned long *map, unsigned bytes); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 9b3209d..457753d 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -147,6 +147,28 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes); static void o2net_sc_send_keep_req(struct work_struct *work); static void o2net_idle_timer(unsigned long data); static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); +static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); + +/* + * FIXME: These should use to_o2nm_cluster_from_node(), but we end up + * losing our parent link to the cluster during shutdown. This can be + * solved by adding a pre-removal callback to configfs, or passing + * around the cluster with the node. -jeffm + */ +static inline int o2net_reconnect_delay(struct o2nm_node *node) +{ + return o2nm_single_cluster->cl_reconnect_delay_ms; +} + +static inline int o2net_keepalive_delay(struct o2nm_node *node) +{ + return o2nm_single_cluster->cl_keepalive_delay_ms; +} + +static inline int o2net_idle_timeout(struct o2nm_node *node) +{ + return o2nm_single_cluster->cl_idle_timeout_ms; +} static inline int o2net_sys_err_to_errno(enum o2net_system_error err) { @@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref) { struct o2net_sock_container *sc = container_of(kref, struct o2net_sock_container, sc_kref); + BUG_ON(timer_pending(&sc->sc_idle_timeout)); + sclog(sc, "releasing\n"); if (sc->sc_sock) { @@ -356,6 +380,13 @@ static void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc, sc_put(sc); } +static atomic_t o2net_connected_peers = ATOMIC_INIT(0); + +int o2net_num_connected_peers(void) +{ + return atomic_read(&o2net_connected_peers); +} + static void o2net_set_nn_state(struct o2net_node *nn, struct o2net_sock_container *sc, unsigned valid, int err) @@ -366,6 +397,11 @@ static void o2net_set_nn_state(struct o2net_node *nn, assert_spin_locked(&nn->nn_lock); + if (old_sc && !sc) + atomic_dec(&o2net_connected_peers); + else if (!old_sc && sc) + atomic_inc(&o2net_connected_peers); + /* the node num comparison and single connect/accept path should stop * an non-null sc from being overwritten with another */ BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc); @@ -424,9 +460,9 @@ static void o2net_set_nn_state(struct o2net_node *nn, /* delay if we're withing a RECONNECT_DELAY of the * last attempt */ delay = (nn->nn_last_connect_attempt + - msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) + msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) - jiffies; - if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) + if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node))) delay = 0; mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); @@ -1099,13 +1135,51 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) return -1; } + /* + * Ensure timeouts are consistent with other nodes, otherwise + * we can end up with one node thinking that the other must be down, + * but isn't. This can ultimately cause corruption. + */ + if (be32_to_cpu(hand->o2net_idle_timeout_ms) != + o2net_idle_timeout(sc->sc_node)) { + mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " + "%u ms, but we use %u ms locally. disconnecting\n", + SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_idle_timeout_ms), + o2net_idle_timeout(sc->sc_node)); + o2net_ensure_shutdown(nn, sc, -ENOTCONN); + return -1; + } + + if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != + o2net_keepalive_delay(sc->sc_node)) { + mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " + "%u ms, but we use %u ms locally. disconnecting\n", + SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_keepalive_delay_ms), + o2net_keepalive_delay(sc->sc_node)); + o2net_ensure_shutdown(nn, sc, -ENOTCONN); + return -1; + } + + if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != + O2HB_MAX_WRITE_TIMEOUT_MS) { + mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " + "%u ms, but we use %u ms locally. disconnecting\n", + SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), + O2HB_MAX_WRITE_TIMEOUT_MS); + o2net_ensure_shutdown(nn, sc, -ENOTCONN); + return -1; + } + sc->sc_handshake_ok = 1; spin_lock(&nn->nn_lock); /* set valid and queue the idle timers only if it hasn't been * shut down already */ if (nn->nn_sc == sc) { - o2net_sc_postpone_idle(sc); + o2net_sc_reset_idle_timer(sc); o2net_set_nn_state(nn, sc, 1, 0); } spin_unlock(&nn->nn_lock); @@ -1131,6 +1205,23 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) sclog(sc, "receiving\n"); do_gettimeofday(&sc->sc_tv_advance_start); + if (unlikely(sc->sc_handshake_ok == 0)) { + if(sc->sc_page_off < sizeof(struct o2net_handshake)) { + data = page_address(sc->sc_page) + sc->sc_page_off; + datalen = sizeof(struct o2net_handshake) - sc->sc_page_off; + ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); + if (ret > 0) + sc->sc_page_off += ret; + } + + if (sc->sc_page_off == sizeof(struct o2net_handshake)) { + o2net_check_handshake(sc); + if (unlikely(sc->sc_handshake_ok == 0)) + ret = -EPROTO; + } + goto out; + } + /* do we need more header? */ if (sc->sc_page_off < sizeof(struct o2net_msg)) { data = page_address(sc->sc_page) + sc->sc_page_off; @@ -1138,15 +1229,6 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); if (ret > 0) { sc->sc_page_off += ret; - - /* this working relies on the handshake being - * smaller than the normal message header */ - if (sc->sc_page_off >= sizeof(struct o2net_handshake)&& - !sc->sc_handshake_ok && o2net_check_handshake(sc)) { - ret = -EPROTO; - goto out; - } - /* only swab incoming here.. we can * only get here once as we cross from * being under to over */ @@ -1248,6 +1330,18 @@ static int o2net_set_nodelay(struct socket *sock) return ret; } +static void o2net_initialize_handshake(void) +{ + o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( + O2HB_MAX_WRITE_TIMEOUT_MS); + o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( + o2net_idle_timeout(NULL)); + o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( + o2net_keepalive_delay(NULL)); + o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( + o2net_reconnect_delay(NULL)); +} + /* ------------------------------------------------------------ */ /* called when a connect completes and after a sock is accepted. the @@ -1262,6 +1356,7 @@ static void o2net_sc_connect_completed(struct work_struct *work) (unsigned long long)O2NET_PROTOCOL_VERSION, (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); + o2net_initialize_handshake(); o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); sc_put(sc); } @@ -1287,8 +1382,10 @@ static void o2net_idle_timer(unsigned long data) do_gettimeofday(&now); - printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " - "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); + printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " + "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), + o2net_idle_timeout(sc->sc_node) / 1000, + o2net_idle_timeout(sc->sc_node) % 1000); mlog(ML_NOTICE, "here are some times that might help debug the " "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", @@ -1306,14 +1403,21 @@ static void o2net_idle_timer(unsigned long data) o2net_sc_queue_work(sc, &sc->sc_shutdown_work); } -static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) +static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) { o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, - O2NET_KEEPALIVE_DELAY_SECS * HZ); + msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); do_gettimeofday(&sc->sc_tv_timer); mod_timer(&sc->sc_idle_timeout, - jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ)); + jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); +} + +static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) +{ + /* Only push out an existing timer */ + if (timer_pending(&sc->sc_idle_timeout)) + o2net_sc_reset_idle_timer(sc); } /* this work func is kicked whenever a path sets the nn state which doesn't @@ -1435,9 +1539,12 @@ static void o2net_connect_expired(struct work_struct *work) spin_lock(&nn->nn_lock); if (!nn->nn_sc_valid) { + struct o2nm_node *node = nn->nn_sc->sc_node; mlog(ML_ERROR, "no connection established with node %u after " - "%u seconds, giving up and returning errors.\n", - o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS); + "%u.%u seconds, giving up and returning errors.\n", + o2net_num_from_nn(nn), + o2net_idle_timeout(node) / 1000, + o2net_idle_timeout(node) % 1000); o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); } @@ -1478,6 +1585,8 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, if (node_num != o2nm_this_node()) o2net_disconnect_node(node); + + BUG_ON(atomic_read(&o2net_connected_peers) < 0); } static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, @@ -1489,14 +1598,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, /* ensure an immediate connect attempt */ nn->nn_last_connect_attempt = jiffies - - (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1); + (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); if (node_num != o2nm_this_node()) { /* heartbeat doesn't work unless a local node number is * configured and doing so brings up the o2net_wq, so we can * use it.. */ queue_delayed_work(o2net_wq, &nn->nn_connect_expired, - O2NET_IDLE_TIMEOUT_SECS * HZ); + msecs_to_jiffies(o2net_idle_timeout(node))); /* believe it or not, accept and node hearbeating testing * can succeed for this node before we got here.. so @@ -1641,6 +1750,7 @@ static int o2net_accept_one(struct socket *sock) o2net_register_callbacks(sc->sc_sock->sk, sc); o2net_sc_queue_work(sc, &sc->sc_rx_work); + o2net_initialize_handshake(); o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); out: diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index 616ff2b..21a4e43 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h @@ -54,6 +54,13 @@ typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data) #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) +/* same as hb delay, we're waiting for another node to recognize our hb */ +#define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 + +#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 5000 +#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 10000 + + /* TODO: figure this out.... */ static inline int o2net_link_down(int err, struct socket *sock) { @@ -101,6 +108,7 @@ void o2net_unregister_hb_callbacks(void); int o2net_start_listening(struct o2nm_node *node); void o2net_stop_listening(struct o2nm_node *node); void o2net_disconnect_node(struct o2nm_node *node); +int o2net_num_connected_peers(void); int o2net_init(void); void o2net_exit(void); diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index daebbd3..b700dc9 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -27,23 +27,20 @@ #define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57) #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58) -/* same as hb delay, we're waiting for another node to recognize our hb */ -#define O2NET_RECONNECT_DELAY_MS O2HB_REGION_TIMEOUT_MS - /* we're delaying our quorum decision so that heartbeat will have timed * out truly dead nodes by the time we come around to making decisions * on their number */ #define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) -#define O2NET_KEEPALIVE_DELAY_SECS 5 -#define O2NET_IDLE_TIMEOUT_SECS 10 - /* * This version number represents quite a lot, unfortunately. It not * only represents the raw network message protocol on the wire but also * locking semantics of the file system using the protocol. It should * be somewhere else, I'm sure, but right now it isn't. * + * New in version 5: + * - Network timeout checking protocol + * * New in version 4: * - Remove i_generation from lock names for better stat performance. * @@ -54,10 +51,14 @@ * - full 64 bit i_size in the metadata lock lvbs * - introduction of "rw" lock and pushing meta/data locking down */ -#define O2NET_PROTOCOL_VERSION 4ULL +#define O2NET_PROTOCOL_VERSION 5ULL struct o2net_handshake { __be64 protocol_version; __be64 connector_id; + __be32 o2hb_heartbeat_timeout_ms; + __be32 o2net_idle_timeout_ms; + __be32 o2net_keepalive_delay_ms; + __be32 o2net_reconnect_delay_ms; }; struct o2net_node { diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 69fba16..e622013 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -770,7 +770,7 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, int dlm_flags) { int ret = 0; - enum dlm_status status; + enum dlm_status status = DLM_NORMAL; unsigned long flags; mlog_entry_void(); @@ -1138,6 +1138,7 @@ int ocfs2_rw_lock(struct inode *inode, int write) { int status, level; struct ocfs2_lock_res *lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); BUG_ON(!inode); @@ -1147,6 +1148,9 @@ int ocfs2_rw_lock(struct inode *inode, int write) (unsigned long long)OCFS2_I(inode)->ip_blkno, write ? "EXMODE" : "PRMODE"); + if (ocfs2_mount_local(osb)) + return 0; + lockres = &OCFS2_I(inode)->ip_rw_lockres; level = write ? LKM_EXMODE : LKM_PRMODE; @@ -1164,6 +1168,7 @@ void ocfs2_rw_unlock(struct inode *inode, int write) { int level = write ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); @@ -1171,7 +1176,8 @@ void ocfs2_rw_unlock(struct inode *inode, int write) (unsigned long long)OCFS2_I(inode)->ip_blkno, write ? "EXMODE" : "PRMODE"); - ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); + if (!ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); mlog_exit_void(); } @@ -1182,6 +1188,7 @@ int ocfs2_data_lock_full(struct inode *inode, { int status = 0, level; struct ocfs2_lock_res *lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); BUG_ON(!inode); @@ -1201,6 +1208,9 @@ int ocfs2_data_lock_full(struct inode *inode, goto out; } + if (ocfs2_mount_local(osb)) + goto out; + lockres = &OCFS2_I(inode)->ip_data_lockres; level = write ? LKM_EXMODE : LKM_PRMODE; @@ -1269,6 +1279,7 @@ void ocfs2_data_unlock(struct inode *inode, { int level = write ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); @@ -1276,7 +1287,8 @@ void ocfs2_data_unlock(struct inode *inode, (unsigned long long)OCFS2_I(inode)->ip_blkno, write ? "EXMODE" : "PRMODE"); - if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) + if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && + !ocfs2_mount_local(osb)) ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); mlog_exit_void(); @@ -1467,8 +1479,9 @@ static int ocfs2_meta_lock_update(struct inode *inode, { int status = 0; struct ocfs2_inode_info *oi = OCFS2_I(inode); - struct ocfs2_lock_res *lockres; + struct ocfs2_lock_res *lockres = NULL; struct ocfs2_dinode *fe; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); @@ -1483,10 +1496,12 @@ static int ocfs2_meta_lock_update(struct inode *inode, } spin_unlock(&oi->ip_lock); - lockres = &oi->ip_meta_lockres; + if (!ocfs2_mount_local(osb)) { + lockres = &oi->ip_meta_lockres; - if (!ocfs2_should_refresh_lock_res(lockres)) - goto bail; + if (!ocfs2_should_refresh_lock_res(lockres)) + goto bail; + } /* This will discard any caching information we might have had * for the inode metadata. */ @@ -1496,7 +1511,7 @@ static int ocfs2_meta_lock_update(struct inode *inode, * map (directories, bitmap files, etc) */ ocfs2_extent_map_trunc(inode, 0); - if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { + if (lockres && ocfs2_meta_lvb_is_trustable(inode, lockres)) { mlog(0, "Trusting LVB on inode %llu\n", (unsigned long long)oi->ip_blkno); ocfs2_refresh_inode_from_lvb(inode); @@ -1543,7 +1558,8 @@ static int ocfs2_meta_lock_update(struct inode *inode, status = 0; bail_refresh: - ocfs2_complete_lock_res_refresh(lockres, status); + if (lockres) + ocfs2_complete_lock_res_refresh(lockres, status); bail: mlog_exit(status); return status; @@ -1585,7 +1601,7 @@ int ocfs2_meta_lock_full(struct inode *inode, int arg_flags) { int status, level, dlm_flags, acquired; - struct ocfs2_lock_res *lockres; + struct ocfs2_lock_res *lockres = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct buffer_head *local_bh = NULL; @@ -1607,6 +1623,9 @@ int ocfs2_meta_lock_full(struct inode *inode, goto bail; } + if (ocfs2_mount_local(osb)) + goto local; + if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) wait_event(osb->recovery_event, ocfs2_node_map_is_empty(osb, &osb->recovery_map)); @@ -1636,6 +1655,7 @@ int ocfs2_meta_lock_full(struct inode *inode, wait_event(osb->recovery_event, ocfs2_node_map_is_empty(osb, &osb->recovery_map)); +local: /* * We only see this flag if we're being called from * ocfs2_read_locked_inode(). It means we're locking an inode @@ -1644,7 +1664,8 @@ int ocfs2_meta_lock_full(struct inode *inode, */ if (inode->i_state & I_NEW) { status = 0; - ocfs2_complete_lock_res_refresh(lockres, 0); + if (lockres) + ocfs2_complete_lock_res_refresh(lockres, 0); goto bail; } @@ -1767,6 +1788,7 @@ void ocfs2_meta_unlock(struct inode *inode, { int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); @@ -1774,7 +1796,8 @@ void ocfs2_meta_unlock(struct inode *inode, (unsigned long long)OCFS2_I(inode)->ip_blkno, ex ? "EXMODE" : "PRMODE"); - if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) + if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && + !ocfs2_mount_local(osb)) ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); mlog_exit_void(); @@ -1783,7 +1806,7 @@ void ocfs2_meta_unlock(struct inode *inode, int ocfs2_super_lock(struct ocfs2_super *osb, int ex) { - int status; + int status = 0; int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; struct buffer_head *bh; @@ -1794,6 +1817,9 @@ int ocfs2_super_lock(struct ocfs2_super *osb, if (ocfs2_is_hard_readonly(osb)) return -EROFS; + if (ocfs2_mount_local(osb)) + goto bail; + status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); if (status < 0) { mlog_errno(status); @@ -1832,7 +1858,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb, int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; - ocfs2_cluster_unlock(osb, lockres, level); + if (!ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(osb, lockres, level); } int ocfs2_rename_lock(struct ocfs2_super *osb) @@ -1843,6 +1870,9 @@ int ocfs2_rename_lock(struct ocfs2_super *osb) if (ocfs2_is_hard_readonly(osb)) return -EROFS; + if (ocfs2_mount_local(osb)) + return 0; + status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); if (status < 0) mlog_errno(status); @@ -1854,7 +1884,8 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb) { struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; - ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); + if (!ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); } int ocfs2_dentry_lock(struct dentry *dentry, int ex) @@ -1869,6 +1900,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex) if (ocfs2_is_hard_readonly(osb)) return -EROFS; + if (ocfs2_mount_local(osb)) + return 0; + ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); if (ret < 0) mlog_errno(ret); @@ -1882,7 +1916,8 @@ void ocfs2_dentry_unlock(struct dentry *dentry, int ex) struct ocfs2_dentry_lock *dl = dentry->d_fsdata; struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); - ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); + if (!ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); } /* Reference counting of the dlm debug structure. We want this because @@ -2145,12 +2180,15 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) int ocfs2_dlm_init(struct ocfs2_super *osb) { - int status; + int status = 0; u32 dlm_key; - struct dlm_ctxt *dlm; + struct dlm_ctxt *dlm = NULL; mlog_entry_void(); + if (ocfs2_mount_local(osb)) + goto local; + status = ocfs2_dlm_init_debug(osb); if (status < 0) { mlog_errno(status); @@ -2178,11 +2216,12 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) goto bail; } + dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); + +local: ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); - dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); - osb->dlm = dlm; status = 0; diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index cbfd45a..8fc52d6 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -154,6 +154,9 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb) { int status; + if (ocfs2_mount_local(osb)) + return 0; + status = o2hb_register_callback(&osb->osb_hb_down); if (status < 0) { mlog_errno(status); @@ -172,6 +175,9 @@ void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb) { int status; + if (ocfs2_mount_local(osb)) + return; + status = o2hb_unregister_callback(&osb->osb_hb_down); if (status < 0) mlog_errno(status); @@ -186,6 +192,9 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb) int ret; char *argv[5], *envp[3]; + if (ocfs2_mount_local(osb)) + return; + if (!osb->uuid_str) { /* This can happen if we don't get far enough in mount... */ mlog(0, "No UUID with which to stop heartbeat!\n\n"); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 42e361f..e4d9149 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -423,7 +423,8 @@ static int ocfs2_read_locked_inode(struct inode *inode, * cluster lock before trusting anything anyway. */ can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) - && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK); + && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK) + && !ocfs2_mount_local(osb); /* * To maintain backwards compatibility with older versions of diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 1d7f4ab..825cb0a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -144,8 +144,10 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) ocfs2_abort(osb->sb, "Detected aborted journal"); handle = ERR_PTR(-EROFS); } - } else - atomic_inc(&(osb->journal->j_num_trans)); + } else { + if (!ocfs2_mount_local(osb)) + atomic_inc(&(osb->journal->j_num_trans)); + } return handle; } @@ -507,9 +509,23 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); - status = ocfs2_journal_toggle_dirty(osb, 0); - if (status < 0) - mlog_errno(status); + if (ocfs2_mount_local(osb)) { + journal_lock_updates(journal->j_journal); + status = journal_flush(journal->j_journal); + journal_unlock_updates(journal->j_journal); + if (status < 0) + mlog_errno(status); + } + + if (status == 0) { + /* + * Do not toggle if flush was unsuccessful otherwise + * will leave dirty metadata in a "clean" journal + */ + status = ocfs2_journal_toggle_dirty(osb, 0); + if (status < 0) + mlog_errno(status); + } /* Shutdown the kernel journal system */ journal_destroy(journal->j_journal); @@ -549,7 +565,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb, } } -int ocfs2_journal_load(struct ocfs2_journal *journal) +int ocfs2_journal_load(struct ocfs2_journal *journal, int local) { int status = 0; struct ocfs2_super *osb; @@ -576,14 +592,18 @@ int ocfs2_journal_load(struct ocfs2_journal *journal) } /* Launch the commit thread */ - osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt"); - if (IS_ERR(osb->commit_task)) { - status = PTR_ERR(osb->commit_task); + if (!local) { + osb->commit_task = kthread_run(ocfs2_commit_thread, osb, + "ocfs2cmt"); + if (IS_ERR(osb->commit_task)) { + status = PTR_ERR(osb->commit_task); + osb->commit_task = NULL; + mlog(ML_ERROR, "unable to launch ocfs2commit thread, " + "error=%d", status); + goto done; + } + } else osb->commit_task = NULL; - mlog(ML_ERROR, "unable to launch ocfs2commit thread, error=%d", - status); - goto done; - } done: mlog_exit(status); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 899112a..e121636 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -157,7 +157,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, void ocfs2_journal_shutdown(struct ocfs2_super *osb); int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full); -int ocfs2_journal_load(struct ocfs2_journal *journal); +int ocfs2_journal_load(struct ocfs2_journal *journal, int local); int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num); @@ -174,6 +174,9 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + if (ocfs2_mount_local(osb)) + return; + if (!ocfs2_inode_fully_checkpointed(inode)) { /* WARNING: This only kicks off a single * checkpoint. If someone races you and adds more diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 69f85ae..51b0204 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -83,10 +83,12 @@ static struct vm_operations_struct ocfs2_file_vm_ops = { int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) { int ret = 0, lock_level = 0; + struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb); /* We don't want to support shared writable mappings yet. */ - if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) - && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { + if (!ocfs2_mount_local(osb) && + ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && + ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); /* This is -EINVAL because generic_file_readonly_mmap * returns it in a similar situation. */ diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 21db45d..9637039 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -587,9 +587,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, } ocfs2_inode_set_new(osb, inode); - status = ocfs2_create_new_inode_locks(inode); - if (status < 0) - mlog_errno(status); + if (!ocfs2_mount_local(osb)) { + status = ocfs2_create_new_inode_locks(inode); + if (status < 0) + mlog_errno(status); + } status = 0; /* error in ocfs2_create_new_inode_locks is not * critical */ diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index b767fd7..db8e77c 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -349,6 +349,11 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) return ret; } +static inline int ocfs2_mount_local(struct ocfs2_super *osb) +{ + return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); +} + #define OCFS2_IS_VALID_DINODE(ptr) \ (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 3330a5d..b5c6856 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -86,7 +86,7 @@ OCFS2_SB(sb)->s_feature_incompat &= ~(mask) #define OCFS2_FEATURE_COMPAT_SUPP 0 -#define OCFS2_FEATURE_INCOMPAT_SUPP 0 +#define OCFS2_FEATURE_INCOMPAT_SUPP OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT #define OCFS2_FEATURE_RO_COMPAT_SUPP 0 /* @@ -96,6 +96,18 @@ */ #define OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV 0x0002 +/* + * tunefs sets this incompat flag before starting the resize and clears it + * at the end. This flag protects users from inadvertently mounting the fs + * after an aborted run without fsck-ing. + */ +#define OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG 0x0004 + +/* Used to denote a non-clustered volume */ +#define OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT 0x0008 + +/* Support for sparse allocation in b-trees */ +#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010 /* * Flags on ocfs2_dinode.i_flags diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 4bf3954..a6d2f8c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -508,6 +508,27 @@ bail: return status; } +static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) +{ + if (ocfs2_mount_local(osb)) { + if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { + mlog(ML_ERROR, "Cannot heartbeat on a locally " + "mounted device.\n"); + return -EINVAL; + } + } + + if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { + if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) { + mlog(ML_ERROR, "Heartbeat has to be started to mount " + "a read-write clustered device.\n"); + return -EINVAL; + } + } + + return 0; +} + static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) { struct dentry *root; @@ -516,16 +537,24 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode = NULL; struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; + char nodestr[8]; mlog_entry("%p, %p, %i", sb, data, silent); - /* for now we only have one cluster/node, make sure we see it - * in the heartbeat universe */ - if (!o2hb_check_local_node_heartbeating()) { + if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) { status = -EINVAL; goto read_super_error; } + /* for now we only have one cluster/node, make sure we see it + * in the heartbeat universe */ + if (parsed_opt & OCFS2_MOUNT_HB_LOCAL) { + if (!o2hb_check_local_node_heartbeating()) { + status = -EINVAL; + goto read_super_error; + } + } + /* probe for superblock */ status = ocfs2_sb_probe(sb, &bh, §or_size); if (status < 0) { @@ -541,11 +570,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } brelse(bh); bh = NULL; - - if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) { - status = -EINVAL; - goto read_super_error; - } osb->s_mount_opt = parsed_opt; sb->s_magic = OCFS2_SUPER_MAGIC; @@ -588,21 +612,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } if (!ocfs2_is_hard_readonly(osb)) { - /* If this isn't a hard readonly mount, then we need - * to make sure that heartbeat is in a valid state, - * and that we mark ourselves soft readonly is -oro - * was specified. */ - if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { - mlog(ML_ERROR, "No heartbeat for device (%s)\n", - sb->s_id); - status = -EINVAL; - goto read_super_error; - } - if (sb->s_flags & MS_RDONLY) ocfs2_set_ro_flag(osb, 0); } + status = ocfs2_verify_heartbeat(osb); + if (status < 0) { + mlog_errno(status); + goto read_super_error; + } + osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, ocfs2_debugfs_root); if (!osb->osb_debug_root) { @@ -635,9 +654,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) ocfs2_complete_mount_recovery(osb); - printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %d, slot %d) " + if (ocfs2_mount_local(osb)) + snprintf(nodestr, sizeof(nodestr), "local"); + else + snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); + + printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " "with %s data mode.\n", - osb->dev_str, osb->node_num, osb->slot_num, + osb->dev_str, nodestr, osb->slot_num, osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : "ordered"); @@ -999,7 +1023,11 @@ static int ocfs2_fill_local_node_info(struct ocfs2_super *osb) /* XXX hold a ref on the node while mounte? easy enough, if * desirable. */ - osb->node_num = o2nm_this_node(); + if (ocfs2_mount_local(osb)) + osb->node_num = 0; + else + osb->node_num = o2nm_this_node(); + if (osb->node_num == O2NM_MAX_NODES) { mlog(ML_ERROR, "could not find this host's node number\n"); status = -ENOENT; @@ -1084,6 +1112,9 @@ static int ocfs2_mount_volume(struct super_block *sb) goto leave; } + if (ocfs2_mount_local(osb)) + goto leave; + /* This should be sent *after* we recovered our journal as it * will cause other nodes to unmark us as needing * recovery. However, we need to send it *before* dropping the @@ -1114,6 +1145,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) { int tmp; struct ocfs2_super *osb = NULL; + char nodestr[8]; mlog_entry("(0x%p)\n", sb); @@ -1177,8 +1209,13 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); - printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %d)\n", - osb->dev_str, osb->node_num); + if (ocfs2_mount_local(osb)) + snprintf(nodestr, sizeof(nodestr), "local"); + else + snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); + + printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", + osb->dev_str, nodestr); ocfs2_delete_osb(osb); kfree(osb); @@ -1536,6 +1573,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) { int status = 0; int dirty; + int local; struct ocfs2_dinode *local_alloc = NULL; /* only used if we * recover * ourselves. */ @@ -1563,8 +1601,10 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) "recovering volume.\n"); } + local = ocfs2_mount_local(osb); + /* will play back anything left in the journal. */ - ocfs2_journal_load(osb->journal); + ocfs2_journal_load(osb->journal, local); if (dirty) { /* recover my local alloc if we didn't unmount cleanly. */ diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 5b4dca7..0315a8b 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c @@ -1000,6 +1000,9 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb) { int status = 0; + if (ocfs2_mount_local(osb)) + return 0; + status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE, osb->net_key, sizeof(struct ocfs2_response_msg), |