]> git.ipfire.org Git - ipfire-2.x.git/commitdiff
core168: Add script to automatically repair MDRAID arrays
authorMichael Tremer <michael.tremer@ipfire.org>
Thu, 19 May 2022 08:56:34 +0000 (08:56 +0000)
committerPeter Müller <peter.mueller@ipfire.org>
Mon, 30 May 2022 18:59:34 +0000 (18:59 +0000)
Please see the header of the script for more details.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
config/rootfiles/common/aarch64/stage2
config/rootfiles/common/armv6l/stage2
config/rootfiles/common/x86_64/stage2
config/rootfiles/core/168/update.sh
src/scripts/repair-mdraid [new file with mode: 0644]

index 352c704d4e7a85e9b7341db39fb5ad13a1fb3d35..e328a4526a1935b7099f1a187ab5ee93f2cdde72 100644 (file)
@@ -99,6 +99,7 @@ usr/local/bin/ipsec-interfaces
 usr/local/bin/makegraphs
 usr/local/bin/qosd
 usr/local/bin/readhash
+usr/local/bin/repair-mdraid
 usr/local/bin/run-parts
 usr/local/bin/scanhd
 usr/local/bin/settime
index 198461a01d4bfe4fb10539ac574aa8d78f13f1b1..2bd00d9683f7b07daf2430f1b25ab04284b9ee9a 100644 (file)
@@ -97,6 +97,7 @@ usr/local/bin/ipsec-interfaces
 usr/local/bin/makegraphs
 usr/local/bin/qosd
 usr/local/bin/readhash
+usr/local/bin/repair-mdraid
 usr/local/bin/run-parts
 usr/local/bin/scanhd
 usr/local/bin/settime
index b03a7fecf30428a17d7712987b43ad0245e48ff7..586b88e3d005b11844700b6d3eb624023fb55ee3 100644 (file)
@@ -99,6 +99,7 @@ usr/local/bin/ipsec-interfaces
 usr/local/bin/makegraphs
 usr/local/bin/qosd
 usr/local/bin/readhash
+usr/local/bin/repair-mdraid
 usr/local/bin/run-parts
 usr/local/bin/scanhd
 usr/local/bin/settime
index c4005dba9c1b9e8422d1ac9a04324f0b08e54b69..84dec941cfff74d52fed9511ae4ad54a3bd743de 100644 (file)
@@ -125,6 +125,9 @@ if ! grep -q rd.auto /etc/default/grub; then
        sed -e "s/panic=10/& rd.auto/" -i /etc/default/grub
 fi
 
+# Repair any broken MDRAID arrays
+/usr/local/bin/repair-mdraid
+
 # Start services
 /etc/init.d/fcron restart
 /etc/init.d/sshd restart
diff --git a/src/scripts/repair-mdraid b/src/scripts/repair-mdraid
new file mode 100644 (file)
index 0000000..a622ff7
--- /dev/null
@@ -0,0 +1,169 @@
+#!/bin/bash
+###############################################################################
+#                                                                             #
+# IPFire.org - A linux based firewall                                         #
+# Copyright (C) 2022 IPFire Team  <info@ipfire.org>                           #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+#
+# This script is supposed to repair any broken RAID installations
+# where the system has been booted from only one of the RAID devices
+# without the software RAID being activated first.
+#
+# This script does as follows:
+#
+# * It tries to find an inactive RAID called "ipfire:0"
+# * It will then destroy any devices that are still part of this RAID.
+#   This is required because if the RAID is being assembled correctly,
+#   data from the disk that has NOT been mounted will be replicated
+#   back to the device that has been changed. That causes that any
+#   data that has been written to the mounted disk will be lost.
+#   To avoid this, we will partially destroy the RAID.
+# * We will then erase any partition tables and destroy any filesystems
+#   on the devices so that they do not get accidentially mounted again.
+# * The system will then need to be rebooted where the RAID will be
+#   mounted again in a degraded state which might take some extra
+#   time at boot (the system stands still for about a minute).
+# * After the system has been booted up correctly, we will re-add
+#   the devices back to the RAID which will resync and the system
+#   will be back to its intended configuration.
+
+find_inactive_raid() {
+       local status
+       local device
+       local arg
+       local args
+
+       while read -r status device args; do
+               if [ "${status}" = "INACTIVE-ARRAY" ]; then
+                       for arg in ${args}; do
+                               case "${arg}" in
+                                       name=ipfire:0)
+                                               echo "${device}"
+                                               return 0
+                                               ;;
+                               esac
+                       done
+               fi
+       done <<< "$(mdadm --detail --scan)"
+
+       return 1
+}
+
+find_root() {
+       local device
+       local mp
+       local fs
+       local args
+
+       while read -r device mp fs args; do
+               if [ "${mp}" = "/" ]; then
+                       echo "${device:0:-1}"
+                       return 0
+               fi
+       done < /proc/mounts
+
+       return 1
+}
+
+find_raid_devices() {
+       local raid="${1}"
+
+       local IFS=,
+
+       local device
+       for device in $(mdadm -v --detail --scan "${raid}" | awk -F= '/^[ ]+devices/ { print $2 }'); do
+               echo "${device}"
+       done
+
+       return 0
+}
+
+destroy_everything() {
+       local device="${1}"
+       local part
+
+       # Destroy the RAID superblock
+       mdadm --zero-superblock "${device}"
+
+       # Wipe the partition table
+       wipefs -a "${device}"
+
+       # Wipe any partition signatures
+       for part in ${device}*; do
+               wipefs -a "${part}"
+       done
+}
+
+raid_rebuild() {
+       local devices=( "$@" )
+
+       cat > /etc/rc.d/rcsysinit.d/S99fix-raid <<EOF
+#!/bin/bash
+
+case "\${1}" in
+       start)
+               if [ -e "/dev/md/ipfire:0" ]; then
+                       for device in ${devices[@]}; do
+                               mdadm --add "/dev/md/ipfire:0" "\${device}"
+                       done
+
+                       # Delete this script
+                       rm "\${0}"
+               fi
+               ;;
+esac
+EOF
+
+       chmod a+x /etc/rc.d/rcsysinit.d/S99fix-raid
+}
+
+main() {
+       local raid="$(find_inactive_raid)"
+
+       # Nothing to do if no RAID device found
+       if [ -z "${raid}" ]; then
+               return 0
+       fi
+
+       echo "Fixing RAID ${raid}..."
+
+       local root="$(find_root)"
+
+       # Finding any devices in this RAID
+       local devices=(
+               $(find_raid_devices "${raid}")
+       )
+
+       # Stop the RAID
+       mdadm --stop "${raid}" &>/dev/null
+
+       # Destroy any useful data on all remaining RAID devices
+       local device
+       for device in ${devices[@]}; do
+               # Skip root
+               [ "${device}" = "${root}" ] && continue
+
+               destroy_everything "${device}"
+       done &>/dev/null
+
+       # Re-add devices to the RAID
+       raid_rebuild "${device}"
+
+       return 0
+}
+
+main "$@" || return $?