]> git.ipfire.org Git - ipfire-2.x.git/blame - src/scripts/repair-mdraid
core168: Add script to automatically repair MDRAID arrays
[ipfire-2.x.git] / src / scripts / repair-mdraid
CommitLineData
71d53192
MT
1#!/bin/bash
2###############################################################################
3# #
4# IPFire.org - A linux based firewall #
5# Copyright (C) 2022 IPFire Team <info@ipfire.org> #
6# #
7# This program is free software: you can redistribute it and/or modify #
8# it under the terms of the GNU General Public License as published by #
9# the Free Software Foundation, either version 3 of the License, or #
10# (at your option) any later version. #
11# #
12# This program is distributed in the hope that it will be useful, #
13# but WITHOUT ANY WARRANTY; without even the implied warranty of #
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
15# GNU General Public License for more details. #
16# #
17# You should have received a copy of the GNU General Public License #
18# along with this program. If not, see <http://www.gnu.org/licenses/>. #
19# #
20###############################################################################
21#
22# This script is supposed to repair any broken RAID installations
23# where the system has been booted from only one of the RAID devices
24# without the software RAID being activated first.
25#
26# This script does as follows:
27#
28# * It tries to find an inactive RAID called "ipfire:0"
29# * It will then destroy any devices that are still part of this RAID.
30# This is required because if the RAID is being assembled correctly,
31# data from the disk that has NOT been mounted will be replicated
32# back to the device that has been changed. That causes that any
33# data that has been written to the mounted disk will be lost.
34# To avoid this, we will partially destroy the RAID.
35# * We will then erase any partition tables and destroy any filesystems
36# on the devices so that they do not get accidentially mounted again.
37# * The system will then need to be rebooted where the RAID will be
38# mounted again in a degraded state which might take some extra
39# time at boot (the system stands still for about a minute).
40# * After the system has been booted up correctly, we will re-add
41# the devices back to the RAID which will resync and the system
42# will be back to its intended configuration.
43
44find_inactive_raid() {
45 local status
46 local device
47 local arg
48 local args
49
50 while read -r status device args; do
51 if [ "${status}" = "INACTIVE-ARRAY" ]; then
52 for arg in ${args}; do
53 case "${arg}" in
54 name=ipfire:0)
55 echo "${device}"
56 return 0
57 ;;
58 esac
59 done
60 fi
61 done <<< "$(mdadm --detail --scan)"
62
63 return 1
64}
65
66find_root() {
67 local device
68 local mp
69 local fs
70 local args
71
72 while read -r device mp fs args; do
73 if [ "${mp}" = "/" ]; then
74 echo "${device:0:-1}"
75 return 0
76 fi
77 done < /proc/mounts
78
79 return 1
80}
81
82find_raid_devices() {
83 local raid="${1}"
84
85 local IFS=,
86
87 local device
88 for device in $(mdadm -v --detail --scan "${raid}" | awk -F= '/^[ ]+devices/ { print $2 }'); do
89 echo "${device}"
90 done
91
92 return 0
93}
94
95destroy_everything() {
96 local device="${1}"
97 local part
98
99 # Destroy the RAID superblock
100 mdadm --zero-superblock "${device}"
101
102 # Wipe the partition table
103 wipefs -a "${device}"
104
105 # Wipe any partition signatures
106 for part in ${device}*; do
107 wipefs -a "${part}"
108 done
109}
110
111raid_rebuild() {
112 local devices=( "$@" )
113
114 cat > /etc/rc.d/rcsysinit.d/S99fix-raid <<EOF
115#!/bin/bash
116
117case "\${1}" in
118 start)
119 if [ -e "/dev/md/ipfire:0" ]; then
120 for device in ${devices[@]}; do
121 mdadm --add "/dev/md/ipfire:0" "\${device}"
122 done
123
124 # Delete this script
125 rm "\${0}"
126 fi
127 ;;
128esac
129EOF
130
131 chmod a+x /etc/rc.d/rcsysinit.d/S99fix-raid
132}
133
134main() {
135 local raid="$(find_inactive_raid)"
136
137 # Nothing to do if no RAID device found
138 if [ -z "${raid}" ]; then
139 return 0
140 fi
141
142 echo "Fixing RAID ${raid}..."
143
144 local root="$(find_root)"
145
146 # Finding any devices in this RAID
147 local devices=(
148 $(find_raid_devices "${raid}")
149 )
150
151 # Stop the RAID
152 mdadm --stop "${raid}" &>/dev/null
153
154 # Destroy any useful data on all remaining RAID devices
155 local device
156 for device in ${devices[@]}; do
157 # Skip root
158 [ "${device}" = "${root}" ] && continue
159
160 destroy_everything "${device}"
161 done &>/dev/null
162
163 # Re-add devices to the RAID
164 raid_rebuild "${device}"
165
166 return 0
167}
168
169main "$@" || return $?