From b411b3637fa71fce9cf2acf0639009500f5892fe Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 25 Sep 2009 16:07:19 -0700 Subject: The DRBD driver Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- .../blockdev/drbd/DRBD-8.3-data-packets.svg | 588 +++++++++++++++++++++ Documentation/blockdev/drbd/DRBD-data-packets.svg | 459 ++++++++++++++++ Documentation/blockdev/drbd/README.txt | 16 + Documentation/blockdev/drbd/conn-states-8.dot | 18 + Documentation/blockdev/drbd/disk-states-8.dot | 16 + .../drbd/drbd-connection-state-overview.dot | 85 +++ Documentation/blockdev/drbd/node-states-8.dot | 14 + 7 files changed, 1196 insertions(+) create mode 100644 Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg create mode 100644 Documentation/blockdev/drbd/DRBD-data-packets.svg create mode 100644 Documentation/blockdev/drbd/README.txt create mode 100644 Documentation/blockdev/drbd/conn-states-8.dot create mode 100644 Documentation/blockdev/drbd/disk-states-8.dot create mode 100644 Documentation/blockdev/drbd/drbd-connection-state-overview.dot create mode 100644 Documentation/blockdev/drbd/node-states-8.dot (limited to 'Documentation') diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg new file mode 100644 index 0000000..f87cfa0 --- /dev/null +++ b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg @@ -0,0 +1,588 @@ + + + + + + Master slide + + + + + + + + + + RSDataReply + + + + + + + CsumRSRequest + + + + w_make_resync_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_csum_rs_req() + + + receive_RSDataReply() + + + drbd_endio_write_sec() + + + e_end_resync_block() + + + + + + WriteAck + + + + got_BlockAck() + + + Checksum based Resync, case not in sync + + + DRBD-8.3 data flow + + + w_e_send_csum() + + + + + + + + RSIsInSync + + + + + + + CsumRSRequest + + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_csum_rs_req() + + + got_IsInSync() + + + Checksum based Resync, case in sync + + + + + + + + + + OVReply + + + + + + + OVRequest + + + + receive_OVRequest() + + + drbd_endio_read_sec() + + + w_e_end_ov_req() + + + receive_OVReply() + + + drbd_endio_read_sec() + + + w_e_end_ov_reply() + + + + + + OVResult + + + + got_OVResult() + + + Online verify + + + w_make_ov_request() + + + + + + + + drbd_endio_read_sec() + + + w_make_resync_request() + + + w_e_send_csum() + + + + + drbd_endio_read_sec() + + + + + + rs_begin_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_complete_io() + + diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/blockdev/drbd/DRBD-data-packets.svg new file mode 100644 index 0000000..48a1e21 --- /dev/null +++ b/Documentation/blockdev/drbd/DRBD-data-packets.svg @@ -0,0 +1,459 @@ + + + + + + Master slide + + + + + + + + + RSDataReply + + + + + RSDataRequest + + + w_make_resync_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_rsdata_req() + + + receive_RSDataReply() + + + drbd_endio_write_sec() + + + e_end_resync_block() + + + + + WriteAck + + + got_BlockAck() + + + Resync blocks, 4-32K + + + + + + + WriteAck + + + + + Data + + + drbd_make_request() + + + receive_Data() + + + drbd_endio_write_sec() + + + e_end_block() + + + got_BlockAck() + + + Regular mirrored write, 512-32K + + + w_send_dblock() + + + + + drbd_endio_write_pri() + + + + + + + DataReply + + + + + DataRequest + + + drbd_make_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_data_req() + + + Drawing + + receive_DataReply() + + + + Diskless read, 512-32K + + + w_send_read_req() + + + DRBD 8 data flow + + + + + + al_begin_io() + + + al_complete_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_begin_io() + + + rs_complete_io() + + diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/README.txt new file mode 100644 index 0000000..627b0a1 --- /dev/null +++ b/Documentation/blockdev/drbd/README.txt @@ -0,0 +1,16 @@ +Description + + DRBD is a shared-nothing, synchronously replicated block device. It + is designed to serve as a building block for high availability + clusters and in this context, is a "drop-in" replacement for shared + storage. Simplistically, you could see it as a network RAID 1. + + Please visit http://www.drbd.org to find out more. + +The here included files are intended to help understand the implementation + +DRBD-8.3-data-packets.svg, DRBD-data-packets.svg + relates some functions, and write packets. + +conn-states-8.dot, disk-states-8.dot, node-states-8.dot + The sub graphs of DRBD's state transitions diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/blockdev/drbd/conn-states-8.dot new file mode 100644 index 0000000..025e8cf --- /dev/null +++ b/Documentation/blockdev/drbd/conn-states-8.dot @@ -0,0 +1,18 @@ +digraph conn_states { + StandAllone -> WFConnection [ label = "ioctl_set_net()" ] + WFConnection -> Unconnected [ label = "unable to bind()" ] + WFConnection -> WFReportParams [ label = "in connect() after accept" ] + WFReportParams -> StandAllone [ label = "checks in receive_param()" ] + WFReportParams -> Connected [ label = "in receive_param()" ] + WFReportParams -> WFBitMapS [ label = "sync_handshake()" ] + WFReportParams -> WFBitMapT [ label = "sync_handshake()" ] + WFBitMapS -> SyncSource [ label = "receive_bitmap()" ] + WFBitMapT -> SyncTarget [ label = "receive_bitmap()" ] + SyncSource -> Connected + SyncTarget -> Connected + SyncSource -> PausedSyncS + SyncTarget -> PausedSyncT + PausedSyncS -> SyncSource + PausedSyncT -> SyncTarget + Connected -> WFConnection [ label = "* on network error" ] +} diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/blockdev/drbd/disk-states-8.dot new file mode 100644 index 0000000..d06cfb4 --- /dev/null +++ b/Documentation/blockdev/drbd/disk-states-8.dot @@ -0,0 +1,16 @@ +digraph disk_states { + Diskless -> Inconsistent [ label = "ioctl_set_disk()" ] + Diskless -> Consistent [ label = "ioctl_set_disk()" ] + Diskless -> Outdated [ label = "ioctl_set_disk()" ] + Consistent -> Outdated [ label = "receive_param()" ] + Consistent -> UpToDate [ label = "receive_param()" ] + Consistent -> Inconsistent [ label = "start resync" ] + Outdated -> Inconsistent [ label = "start resync" ] + UpToDate -> Inconsistent [ label = "ioctl_replicate" ] + Inconsistent -> UpToDate [ label = "resync completed" ] + Consistent -> Failed [ label = "io completion error" ] + Outdated -> Failed [ label = "io completion error" ] + UpToDate -> Failed [ label = "io completion error" ] + Inconsistent -> Failed [ label = "io completion error" ] + Failed -> Diskless [ label = "sending notify to peer" ] +} diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot new file mode 100644 index 0000000..6d9cf0a --- /dev/null +++ b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot @@ -0,0 +1,85 @@ +// vim: set sw=2 sts=2 : +digraph { + rankdir=BT + bgcolor=white + + node [shape=plaintext] + node [fontcolor=black] + + StandAlone [ style=filled,fillcolor=gray,label=StandAlone ] + + node [fontcolor=lightgray] + + Unconnected [ label=Unconnected ] + + CommTrouble [ shape=record, + label="{communication loss|{Timeout|BrokenPipe|NetworkFailure}}" ] + + node [fontcolor=gray] + + subgraph cluster_try_connect { + label="try to connect, handshake" + rank=max + WFConnection [ label=WFConnection ] + WFReportParams [ label=WFReportParams ] + } + + TearDown [ label=TearDown ] + + Connected [ label=Connected,style=filled,fillcolor=green,fontcolor=black ] + + node [fontcolor=lightblue] + + StartingSyncS [ label=StartingSyncS ] + StartingSyncT [ label=StartingSyncT ] + + subgraph cluster_bitmap_exchange { + node [fontcolor=red] + fontcolor=red + label="new application (WRITE?) requests blocked\lwhile bitmap is exchanged" + + WFBitMapT [ label=WFBitMapT ] + WFSyncUUID [ label=WFSyncUUID ] + WFBitMapS [ label=WFBitMapS ] + } + + node [fontcolor=blue] + + cluster_resync [ shape=record,label="{resynchronisation process running\l'concurrent' application requests allowed|{{PausedSyncT\nSyncTarget}|{PausedSyncS\nSyncSource}}}" ] + + node [shape=box,fontcolor=black] + + // drbdadm [label="drbdadm connect"] + // handshake [label="drbd_connect()\ndrbd_do_handshake\ndrbd_sync_handshake() etc."] + // comm_error [label="communication trouble"] + + // + // edges + // -------------------------------------- + + StandAlone -> Unconnected [ label="drbdadm connect" ] + Unconnected -> StandAlone [ label="drbdadm disconnect\lor serious communication trouble" ] + Unconnected -> WFConnection [ label="receiver thread is started" ] + WFConnection -> WFReportParams [ headlabel="accept()\land/or \lconnect()\l" ] + + WFReportParams -> StandAlone [ label="during handshake\lpeers do not agree\labout something essential" ] + WFReportParams -> Connected [ label="data identical\lno sync needed",color=green,fontcolor=green ] + + WFReportParams -> WFBitMapS + WFReportParams -> WFBitMapT + WFBitMapT -> WFSyncUUID [minlen=0.1,constraint=false] + + WFBitMapS -> cluster_resync:S + WFSyncUUID -> cluster_resync:T + + edge [color=green] + cluster_resync:any -> Connected [ label="resnyc done",fontcolor=green ] + + edge [color=red] + WFReportParams -> CommTrouble + Connected -> CommTrouble + cluster_resync:any -> CommTrouble + edge [color=black] + CommTrouble -> Unconnected [label="receiver thread is stopped" ] + +} diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/blockdev/drbd/node-states-8.dot new file mode 100644 index 0000000..4a2b00c --- /dev/null +++ b/Documentation/blockdev/drbd/node-states-8.dot @@ -0,0 +1,14 @@ +digraph node_states { + Secondary -> Primary [ label = "ioctl_set_state()" ] + Primary -> Secondary [ label = "ioctl_set_state()" ] +} + +digraph peer_states { + Secondary -> Primary [ label = "recv state packet" ] + Primary -> Secondary [ label = "recv state packet" ] + Primary -> Unknown [ label = "connection lost" ] + Secondary -> Unknown [ label = "connection lost" ] + Unknown -> Primary [ label = "connected" ] + Unknown -> Secondary [ label = "connected" ] +} + -- cgit v1.1