60.nfs:

author Ronnie Sahlberg <sahlberg@ronnie>

Thu, 6 Sep 2007 22:52:56 +0000 (08:52 +1000)

committer Ronnie Sahlberg <sahlberg@ronnie>

Thu, 6 Sep 2007 22:52:56 +0000 (08:52 +1000)
author Ronnie Sahlberg <sahlberg@ronnie>
Thu, 6 Sep 2007 22:52:56 +0000 (08:52 +1000)
committer Ronnie Sahlberg <sahlberg@ronnie>
Thu, 6 Sep 2007 22:52:56 +0000 (08:52 +1000)
diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs

index 311e9f2ef4982b408cffb5c4b4fe32fa3ca3c573..ef0d80fb2401e301abfd62d61d5083bab1d4e3c3 100755 (executable)
--- a/ctdb/config/events.d/60.nfs
+++ b/ctdb/config/events.d/60.nfs
@@ -41,6 +41,7 @@ case $cmd in
       takeip)
         ip=$2
  
+       echo $ip >> /etc/ctdb/state/nfs/restart
         echo $ip >> /etc/ctdb/state/statd/restart
  
         # having a list of what IPs we have allows statd to do the right 
@@ -55,8 +56,8 @@ case $cmd in
         maskbits=$3
  
         echo $ip >> /etc/ctdb/state/nfs/restart
-
         echo $ip >> /etc/ctdb/state/statd/restart
+
         /bin/rm -f /etc/ctdb/state/statd/ip/$ip
         exit 0
         ;;
@@ -72,7 +73,7 @@ case $cmd in
                         destip=`echo $dest | cut -d: -f1`
                         destport=`echo $dest | cut -d: -f2`
                         ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 
-#                      ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1
+                       ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1
                 done
         } > /dev/null 2>&1
  
@@ -91,9 +92,9 @@ case $cmd in
         } > /dev/null 2>&1
         /bin/rm -f /etc/ctdb/state/nfs/restart
  
-       # if we have taken or released any ips we must send out
-       # statd notifications to recover lost nfs locks
-       [ -x /etc/ctdb/statd-callout ] && [ -f /etc/ctdb/state/statd/restart ] && {
+       # always restart the lockmanager so that we start with a clusterwide
+       # graceperiod when ip addresses has changed
+       [ -x /etc/ctdb/statd-callout ] && {
                 /etc/ctdb/statd-callout notify &
         } >/dev/null 2>&1
  
diff --git a/ctdb/config/statd-callout b/ctdb/config/statd-callout

index 6f097be9f65eb7e5279486c2e3e01964b0aca4d3..0be496ef7d040c084269a58c7d375b4aec3e0d61 100755 (executable)
--- a/ctdb/config/statd-callout
+++ b/ctdb/config/statd-callout
@@ -8,10 +8,18 @@
  . /etc/ctdb/functions
  loadconfig nfs
  
-[ -z "$STATD_SHARED_DIRECTORY" ] && exit 0
+[ -z "$STATD_SHARED_DIRECTORY" ] && {
+       echo STATD_SHARED_DIRECTORY not configured. statd-callout failed.
+       exit 0
+}
  
  [ -d $STATD_SHARED_DIRECTORY ] || exit 0
  
+[ -z $NFS_HOSTNAME ] && {
+       echo NFS_HOSTNAME is not configured. statd-callout failed.
+       exit 0
+}
+
  case "$1" in
    add-client)
         # the callout does not tell us to which ip the client connected
@@ -31,19 +39,107 @@ case "$1" in
         done
         ;;
    notify)
-       # send out notifications to any additional ips we now serve
-        for f in `/bin/ls /etc/ctdb/state/statd/ip/*`; do
+       # we must restart the lockmanager (on all nodes) so that we get
+       # a clusterwide grace period (so other clients dont take out
+       # conflicting locks through other nodes before all locks have been
+       # reclaimed)
+       # we must also let some time pass between stopping and restarting the
+       # lockmanager since othervise there is a window where the lockmanager
+       # will respond "strangely" immediately after restarting it, which
+       # causes clients to fail to reclaim the locks.
+       # 
+       service nfslock stop > /dev/null 2>&1
+
+       # we need these settings to make sure that no tcp connections survive
+       # across a very fast failover/failback
+       echo 0 > /proc/sys/net/ipv4/tcp_max_tw_buckets
+       echo 0 > /proc/sys/net/ipv4/tcp_fin_timeout
+       echo 0 > /proc/sys/net/ipv4/tcp_max_orphans
+
+       # rebuild the state directory for the local statd to use the correct
+       # state value and to initally send notifications to all clients
+       rm -f /var/lib/nfs/statd/sm/*
+       rm -f /var/lib/nfs/statd/sm.bak/*
+       cat $STATD_SHARED_DIRECTORY/state >/var/lib/nfs/statd/state
+
+       # we must keep a monotonically increasing state variable for the entire
+       # cluster  so state always increases when ip addresses fail from one
+       # node to another
+       [ ! -f $STATD_SHARED_DIRECTORY/state ] && {
+               echo 1 | awk '{printf("%c%c%c%c", $0, $0/256, $0/256/256, $0/256/256/256);}' >$STATD_SHARED_DIRECTORY/state
+       }
+       # read current state
+       STATE=`od -t d4 $STATD_SHARED_DIRECTORY/state | head -1 | sed -e "s/^[0-9]*[^0-9]*//"`
+       # write current state+2 back to the state file
+       # the /2 *2 are to ensure that state is odd. state must be odd.
+       STATE=`expr $STATE "/" 2 "*" 2 "+" 3`
+       echo $STATE | awk '{printf("%c%c%c%c", $0, $0/256, $0/256/256, $0/256/256/256);}' >$STATD_SHARED_DIRECTORY/state
+       
+
+
+       # we need to restart lockmanager to make sure we get a clusterwide
+       # lock grace period. but to stop lockmanager completely
+       # we must also stop nfs (on linux)
+#      service nfs stop > /dev/null 2>&1
+#      service nfs start > /dev/null 2>&1
+
+       # copy all monitored clients on this node to the local lockmanager
+       for f in `/bin/ls /etc/ctdb/state/statd/ip/* 2>/dev/null`; do
+           ip=`/bin/basename $f`
+           [ -d $STATD_SHARED_DIRECTORY/$ip ] && [ -x /usr/bin/smnotify ] && {
+               for g in `/bin/ls $STATD_SHARED_DIRECTORY/$ip/* 2>/dev/null`; do
+                       client=`/bin/basename $g`
+                       touch /var/lib/nfs/statd/sm/$client
+               done
+           }
+       done
+
+       # now start lockmanager again with the new state directory.
+       service nfslock start > /dev/null 2>&1
+
+       # we now need to send out additional statd notifications to ensure
+       # that clients understand that the lockmanager has restarted.
+       # we have three cases:
+       # 1, clients that ignore the ip address the stat notification came from
+       #    and ONLY care about the 'name' in the notify packet.
+       #    these clients ONLY work with lock failover IFF that name
+       #    can be resolved into an ipaddress that matches the one used
+       #    to mount the share.  (==linux clients)
+       #    This is handled when starting lockmanager above,  but those
+       #    packets are sent from the "wrong" ip address, something linux
+       #    clients are ok with, buth other clients will barf at.
+       # 2, Some clients only accept statd packets IFF they come from the
+       #    'correct' ip address.
+       # 2a,Send out the notification using the 'correct' ip address and also
+       #    specify the 'correct' hostname in the statd packet.
+       #    Some clients require both the correct source address and also the
+       #    correct name. (these clients also ONLY work if the ip addresses
+       #    used to map the share can be resolved into the name returned in
+       #    the notify packet.)
+       # 2b,Other clients require that the source ip address of the notify
+       #    packet matches the ip address used to take out the lock.
+       #    I.e. that the correct source address is used.
+       #    These clients also require that the statd notify packet contains
+       #    the name as the ip address used when the lock was taken out.
+       #
+       # Both 2a and 2b are commonly used in lockmanagers since they maximize
+       # probability that the client will accept the statd notify packet and
+       # not just ignore it.
+        for f in `/bin/ls /etc/ctdb/state/statd/ip/* 2>/dev/null`; do
             ip=`/bin/basename $f`
             [ -d $STATD_SHARED_DIRECTORY/$ip ] && [ -x /usr/bin/smnotify ] && {
-               for g in `/bin/ls $STATD_SHARED_DIRECTORY/$ip/*`; do
+               for g in `/bin/ls $STATD_SHARED_DIRECTORY/$ip/* 2>/dev/null`; do
                         client=`/bin/basename $g`
-                       # send the notifications twice so we force the other
-                       # end to recognice a state change has occured and that
-                       # it MUST reclaim the locks.
-                       #
-                       /usr/bin/smnotify --client=$client --ip=$ip --server=$ip --stateval=4
-                       /usr/bin/smnotify --client=$client --ip=$ip --server=$ip --stateval=5
-                       /bin/rm -f $g
+#                      /bin/rm -f $g
+                       # send out notifications from the "correct" address
+                       # (the same addresse as where the lock was taken out
+                       # on)   some clients require that the source address
+                       # matches where the lock was taken out.
+                       # also send it both as a name that the client
+                       # hopefully can resolve into the server ip and
+                       # and also by specifying the raw ip address as name.
+                       /usr/bin/smnotify --client=$client --ip=$ip --server=$ip --stateval=$STATE
+                       /usr/bin/smnotify --client=$client --ip=$ip --server=$NFS_HOSTNAME --stateval=$STATE
                 done
             }
         done
author	Ronnie Sahlberg <sahlberg@ronnie>
	Thu, 6 Sep 2007 22:52:56 +0000 (08:52 +1000)
committer	Ronnie Sahlberg <sahlberg@ronnie>
	Thu, 6 Sep 2007 22:52:56 +0000 (08:52 +1000)
ctdb/config/events.d/60.nfs		patch \| blob \| blame \| history
ctdb/config/statd-callout		patch \| blob \| blame \| history