Feb 25, 2020
This commit is contained in:
2
scripts/crontab.txt
Normal file
2
scripts/crontab.txt
Normal file
@ -0,0 +1,2 @@
|
||||
00 08,12,16,20 * * * /root/FreeBSD/scripts/zfs_health.sh
|
||||
00 6 * * 0 /usr/local/sbin/zfSnap -d -s -S -a 1m -p weekly_ -r zroot ship data base
|
411
scripts/gstat_exporter.py
Executable file
411
scripts/gstat_exporter.py
Executable file
@ -0,0 +1,411 @@
|
||||
from prometheus_client import start_http_server, Gauge # type: ignore
|
||||
from subprocess import Popen, PIPE
|
||||
from typing import Dict
|
||||
|
||||
|
||||
def get_deviceinfo(name: str) -> Dict[str, str]:
|
||||
"""
|
||||
Return a dict of GEOM device info for GEOM devices in class DISK,
|
||||
for use as labels for the metrics.
|
||||
|
||||
Sample output from the geom command:
|
||||
|
||||
$ geom -p ada0
|
||||
Geom class: DISK
|
||||
Geom name: ada0
|
||||
Providers:
|
||||
1. Name: ada0
|
||||
Mediasize: 250059350016 (233G)
|
||||
Sectorsize: 512
|
||||
Mode: r2w2e4
|
||||
descr: Samsung SSD 860 EVO mSATA 250GB
|
||||
lunid: 5002538e700b753f
|
||||
ident: S41MNG0K907238X
|
||||
rotationrate: 0
|
||||
fwsectors: 63
|
||||
fwheads: 16
|
||||
$
|
||||
"""
|
||||
with Popen(
|
||||
["geom", "-p", name], stdout=PIPE, bufsize=1, universal_newlines=True
|
||||
) as p:
|
||||
result = {}
|
||||
for line in p.stdout:
|
||||
# remove excess whitespace
|
||||
line = line.strip()
|
||||
# we only care about the DISK class for now
|
||||
if line[0:12] == "Geom class: " and line[-4:] != "DISK":
|
||||
break
|
||||
|
||||
if line[0:11] == "Mediasize: ":
|
||||
result["mediasize"] = line[11:]
|
||||
if line[0:12] == "Sectorsize: ":
|
||||
result["sectorsize"] = line.split(" ")[1]
|
||||
if line[0:7] == "descr: ":
|
||||
result["descr"] = " ".join(line.split(" ")[1:])
|
||||
if line[0:7] == "lunid: ":
|
||||
result["lunid"] = line.split(" ")[1]
|
||||
if line[0:7] == "ident: ":
|
||||
result["ident"] = line.split(" ")[1]
|
||||
if line[0:14] == "rotationrate: ":
|
||||
result["rotationrate"] = line.split(" ")[1]
|
||||
if line[0:11] == "fwsectors: ":
|
||||
result["fwsectors"] = line.split(" ")[1]
|
||||
if line[0:9] == "fwheads: ":
|
||||
result["fwheads"] = line.split(" ")[1]
|
||||
return result
|
||||
|
||||
|
||||
def process_request() -> None:
|
||||
"""
|
||||
Run gstat in a loop and update stats per line
|
||||
"""
|
||||
# start with an empty deviceinfo dict and add devices as we see them
|
||||
deviceinfo: Dict[str, Dict[str, str]] = {}
|
||||
|
||||
with Popen(
|
||||
["gstat", "-pdosCI", "5s"], stdout=PIPE, bufsize=1, universal_newlines=True
|
||||
) as p:
|
||||
for line in p.stdout:
|
||||
(
|
||||
timestamp,
|
||||
name,
|
||||
queue_depth,
|
||||
total_operations_per_second,
|
||||
read_operations_per_second,
|
||||
read_size_kilobytes,
|
||||
read_kilobytes_per_second,
|
||||
miliseconds_per_read,
|
||||
write_operations_per_second,
|
||||
write_size_kilobytes,
|
||||
write_kilobytes_per_second,
|
||||
miliseconds_per_write,
|
||||
delete_operations_per_second,
|
||||
delete_size_kilobytes,
|
||||
delete_kilobytes_per_second,
|
||||
miliseconds_per_delete,
|
||||
other_operations_per_second,
|
||||
miliseconds_per_other,
|
||||
percent_busy,
|
||||
) = line.split(",")
|
||||
if timestamp == "timestamp":
|
||||
# skip header line
|
||||
continue
|
||||
|
||||
if name not in deviceinfo:
|
||||
# this is the first time we see this GEOM
|
||||
deviceinfo[name] = {}
|
||||
# we always need a value for all labels
|
||||
for key in [
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
]:
|
||||
deviceinfo[name][key] = ""
|
||||
# get real info from the device if it is class DISK
|
||||
deviceinfo[name].update(get_deviceinfo(name))
|
||||
|
||||
deviceinfo[name].update({"name": name})
|
||||
|
||||
# up is always.. up
|
||||
up.set(1)
|
||||
|
||||
queue.labels(**deviceinfo[name]).set(queue_depth)
|
||||
totalops.labels(**deviceinfo[name]).set(total_operations_per_second)
|
||||
|
||||
readops.labels(**deviceinfo[name]).set(read_operations_per_second)
|
||||
readsize.labels(**deviceinfo[name]).set(read_size_kilobytes)
|
||||
readkbs.labels(**deviceinfo[name]).set(read_kilobytes_per_second)
|
||||
readms.labels(**deviceinfo[name]).set(miliseconds_per_read)
|
||||
|
||||
writeops.labels(**deviceinfo[name]).set(write_operations_per_second)
|
||||
writesize.labels(**deviceinfo[name]).set(write_size_kilobytes)
|
||||
writekbs.labels(**deviceinfo[name]).set(write_kilobytes_per_second)
|
||||
writems.labels(**deviceinfo[name]).set(miliseconds_per_write)
|
||||
|
||||
deleteops.labels(**deviceinfo[name]).set(delete_operations_per_second)
|
||||
deletesize.labels(**deviceinfo[name]).set(delete_size_kilobytes)
|
||||
deletekbs.labels(**deviceinfo[name]).set(delete_kilobytes_per_second)
|
||||
deletems.labels(**deviceinfo[name]).set(miliseconds_per_delete)
|
||||
|
||||
otherops.labels(**deviceinfo[name]).set(other_operations_per_second)
|
||||
otherms.labels(**deviceinfo[name]).set(miliseconds_per_other)
|
||||
|
||||
busy.labels(**deviceinfo[name]).set(percent_busy)
|
||||
|
||||
|
||||
# define metrics
|
||||
up = Gauge(
|
||||
"gstat_up", "The value of this Gauge is always 1 when the gstat_exporter is up"
|
||||
)
|
||||
|
||||
queue = Gauge(
|
||||
"gstat_queue_depth",
|
||||
"The queue depth for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
totalops = Gauge(
|
||||
"gstat_total_operations_per_second",
|
||||
"The total number of operations/second for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
|
||||
readops = Gauge(
|
||||
"gstat_read_operations_per_second",
|
||||
"The number of read operations/second for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
readsize = Gauge(
|
||||
"gstat_read_size_kilobytes",
|
||||
"The size in kilobytes of read operations for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
readkbs = Gauge(
|
||||
"gstat_read_kilobytes_per_second",
|
||||
"The speed in kilobytes/second of read operations for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
readms = Gauge(
|
||||
"gstat_miliseconds_per_read",
|
||||
"The speed in miliseconds/read operation for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
|
||||
writeops = Gauge(
|
||||
"gstat_write_operations_per_second",
|
||||
"The number of write operations/second for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
writesize = Gauge(
|
||||
"gstat_write_size_kilobytes",
|
||||
"The size in kilobytes of write operations for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
writekbs = Gauge(
|
||||
"gstat_write_kilobytes_per_second",
|
||||
"The speed in kilobytes/second of write operations for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
writems = Gauge(
|
||||
"gstat_miliseconds_per_write",
|
||||
"The speed in miliseconds/write operation for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
|
||||
deleteops = Gauge(
|
||||
"gstat_delete_operations_per_second",
|
||||
"The number of delete operations/second for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
deletesize = Gauge(
|
||||
"gstat_delete_size_kilobytes",
|
||||
"The size in kilobytes of delete operations for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
deletekbs = Gauge(
|
||||
"gstat_delete_kilobytes_per_second",
|
||||
"The speed in kilobytes/second of delete operations for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
deletems = Gauge(
|
||||
"gstat_miliseconds_per_delete",
|
||||
"The speed in miliseconds/delete operation for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
|
||||
otherops = Gauge(
|
||||
"gstat_other_operations_per_second",
|
||||
"The number of other operations (BIO_FLUSH)/second for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
otherms = Gauge(
|
||||
"gstat_miliseconds_per_other",
|
||||
"The speed in miliseconds/other operation (BIO_FLUSH) for this GEOM",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
|
||||
busy = Gauge(
|
||||
"gstat_percent_busy",
|
||||
"The percent of the time this GEOM is busy",
|
||||
[
|
||||
"name",
|
||||
"descr",
|
||||
"mediasize",
|
||||
"sectorsize",
|
||||
"lunid",
|
||||
"ident",
|
||||
"rotationrate",
|
||||
"fwsectors",
|
||||
"fwheads",
|
||||
],
|
||||
)
|
||||
|
||||
start_http_server(9248)
|
||||
while True:
|
||||
process_request()
|
172
scripts/zfs-prune-snapshots
Executable file
172
scripts/zfs-prune-snapshots
Executable file
@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# script to prune zfs snapshots over a given age
|
||||
#
|
||||
# Author: Dave Eddy <dave@daveeddy.com>
|
||||
# Date: November 20, 2015
|
||||
# License: MIT
|
||||
|
||||
VERSION='v1.0.1'
|
||||
|
||||
usage() {
|
||||
local prog=${0##*/}
|
||||
cat <<-EOF
|
||||
usage: $prog [-hnv] [-p <prefix] <time> [[dataset1] ...]
|
||||
|
||||
remove snapshots from one or more zpools that match given criteria
|
||||
|
||||
examples
|
||||
# $prog 1w
|
||||
remove snapshots older than a week across all zpools
|
||||
|
||||
# $prog -vn 1w
|
||||
same as above, but with increased verbosity and without
|
||||
actually deleting any snapshots (dry-run)
|
||||
|
||||
# $prog 3w tank1 tank2/backup
|
||||
remove snapshots older than 3 weeks on tank1 and tank2/backup.
|
||||
note that this script will recurse through *all* of tank1 and
|
||||
*all* datasets below tank2/backup
|
||||
|
||||
# $prog -p 'autosnap_' 1M zones
|
||||
remove snapshots older than a month on the zones pool that start
|
||||
with the string "autosnap_"
|
||||
|
||||
timespec
|
||||
the first argument denotes how old a snapshot must be for it to
|
||||
be considered for deletion - possible specifiers are
|
||||
|
||||
s seconds
|
||||
m minutes
|
||||
h hours
|
||||
d days
|
||||
w weeks
|
||||
M months
|
||||
y years
|
||||
|
||||
options
|
||||
-h print this message and exit
|
||||
-n dry-run, don't actually delete snapshots
|
||||
-p <prefix> snapshot prefix string to match
|
||||
-q quiet, do not printout removed snapshots
|
||||
-v increase verbosity
|
||||
-V print the version number and exit
|
||||
EOF
|
||||
}
|
||||
|
||||
debug() {
|
||||
((verbosity >= 1)) && echo "$@"
|
||||
return 0
|
||||
}
|
||||
|
||||
# given a time in seconds, return the "human readable" string
|
||||
human() {
|
||||
local seconds=$1
|
||||
if ((seconds < 0)); then
|
||||
((seconds *= -1))
|
||||
fi
|
||||
|
||||
local times=(
|
||||
$((seconds / 60 / 60 / 24 / 365)) # years
|
||||
$((seconds / 60 / 60 / 24 / 30)) # months
|
||||
$((seconds / 60 / 60 / 24 / 7)) # weeks
|
||||
$((seconds / 60 / 60 / 24)) # days
|
||||
$((seconds / 60 / 60)) # hours
|
||||
$((seconds / 60)) # minutes
|
||||
$((seconds)) # seconds
|
||||
)
|
||||
local names=(year month week day hour minute second)
|
||||
|
||||
local i
|
||||
for ((i = 0; i < ${#names[@]}; i++)); do
|
||||
if ((${times[$i]} > 1)); then
|
||||
echo "${times[$i]} ${names[$i]}s"
|
||||
return
|
||||
elif ((${times[$i]} == 1)); then
|
||||
echo "${times[$i]} ${names[$i]}"
|
||||
return
|
||||
fi
|
||||
done
|
||||
echo '0 seconds'
|
||||
}
|
||||
|
||||
dryrun=false
|
||||
verbosity=0
|
||||
prefix=
|
||||
quiet=false
|
||||
while getopts 'hnqp:vV' option; do
|
||||
case "$option" in
|
||||
h) usage; exit 0;;
|
||||
n) dryrun=true;;
|
||||
p) prefix=$OPTARG;;
|
||||
q) quiet=true;;
|
||||
v) ((verbosity++));;
|
||||
V) echo "$VERSION"; exit 0;;
|
||||
*) usage; exit 1;;
|
||||
esac
|
||||
done
|
||||
shift "$((OPTIND - 1))"
|
||||
|
||||
# extract the first argument - the timespec - and
|
||||
# convert it to seconds
|
||||
t=$1
|
||||
time_re='^([0-9]+)([smhdwMy])$'
|
||||
seconds=
|
||||
if [[ $t =~ $time_re ]]; then
|
||||
# ex: "21d" becomes num=21 spec=d
|
||||
num=${BASH_REMATCH[1]}
|
||||
spec=${BASH_REMATCH[2]}
|
||||
|
||||
case "$spec" in
|
||||
s) seconds=$((num));;
|
||||
m) seconds=$((num * 60));;
|
||||
h) seconds=$((num * 60 * 60));;
|
||||
d) seconds=$((num * 60 * 60 * 24));;
|
||||
w) seconds=$((num * 60 * 60 * 24 * 7));;
|
||||
M) seconds=$((num * 60 * 60 * 24 * 30));;
|
||||
y) seconds=$((num * 60 * 60 * 24 * 365));;
|
||||
*) echo "error: unknown spec '$spec'" >&2; exit 1;;
|
||||
esac
|
||||
elif [[ -z $t ]]; then
|
||||
echo 'error: timespec must be specified as the first argument' >&2
|
||||
exit 1
|
||||
else
|
||||
echo "error: failed to parse timespec '$t'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
shift
|
||||
pools=("$@")
|
||||
|
||||
now=$(date +%s)
|
||||
code=0
|
||||
while read -r creation snapshot; do
|
||||
# ensure optional prefix matches
|
||||
snapname=${snapshot#*@}
|
||||
if [[ -n $prefix && $prefix != "${snapname:0:${#prefix}}" ]]; then
|
||||
debug "skipping $snapshot: doesn't match prefix $prefix"
|
||||
continue
|
||||
fi
|
||||
|
||||
# ensure snapshot is older than the cutoff time
|
||||
delta=$((now - creation))
|
||||
human=$(human "$delta")
|
||||
if ((delta <= seconds)); then
|
||||
debug "skipping $snapshot: $human old"
|
||||
continue
|
||||
fi
|
||||
|
||||
# remove the snapshot
|
||||
if ! $quiet || $dryrun; then
|
||||
echo -n "removing $snapshot: $human old"
|
||||
fi
|
||||
if $dryrun; then
|
||||
echo ' <dry-run: no action taken>'
|
||||
else
|
||||
if ! $quiet; then
|
||||
echo
|
||||
fi
|
||||
zfs destroy "$snapshot" || code=1
|
||||
fi
|
||||
done < <(zfs list -Hpo creation,name -t snapshot -r "${pools[@]}")
|
||||
exit "$code"
|
131
scripts/zfs_health.sh
Executable file
131
scripts/zfs_health.sh
Executable file
@ -0,0 +1,131 @@
|
||||
#! /bin/sh
|
||||
#
|
||||
# Calomel.org
|
||||
# https://calomel.org/zfs_health_check_script.html
|
||||
# FreeBSD ZFS Health Check script
|
||||
# zfs_health.sh @ Version 0.18
|
||||
|
||||
# Check health of ZFS volumes and drives. On any faults send email.
|
||||
|
||||
|
||||
# 99 problems but ZFS aint one
|
||||
problems=0
|
||||
|
||||
|
||||
# Health - Check if all zfs volumes are in good condition. We are looking for
|
||||
# any keyword signifying a degraded or broken array.
|
||||
|
||||
condition=$(/sbin/zpool status | egrep -i '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)')
|
||||
if [ "${condition}" ]; then
|
||||
emailSubject="`hostname` - ZFS pool - HEALTH fault"
|
||||
problems=1
|
||||
fi
|
||||
|
||||
|
||||
# Capacity - Make sure the pool capacity is below 80% for best performance. The
|
||||
# percentage really depends on how large your volume is. If you have a 128GB
|
||||
# SSD then 80% is reasonable. If you have a 60TB raid-z2 array then you can
|
||||
# probably set the warning closer to 95%.
|
||||
#
|
||||
# ZFS uses a copy-on-write scheme. The file system writes new data to
|
||||
# sequential free blocks first and when the uberblock has been updated the new
|
||||
# inode pointers become valid. This method is true only when the pool has
|
||||
# enough free sequential blocks. If the pool is at capacity and space limited,
|
||||
# ZFS will be have to randomly write blocks. This means ZFS can not create an
|
||||
# optimal set of sequential writes and write performance is severely impacted.
|
||||
|
||||
maxCapacity=80
|
||||
|
||||
if [ ${problems} -eq 0 ]; then
|
||||
capacity=$(/sbin/zpool list -H -o capacity | cut -d'%' -f1)
|
||||
for line in ${capacity}
|
||||
do
|
||||
if [ $line -ge $maxCapacity ]; then
|
||||
emailSubject="`hostname` - ZFS pool - Capacity Exceeded"
|
||||
problems=1
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
# Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors
|
||||
# on all volumes and all drives using "zpool status". If any non-zero errors
|
||||
# are reported an email will be sent out. You should then look to replace the
|
||||
# faulty drive and run "zpool scrub" on the affected volume after resilvering.
|
||||
|
||||
if [ ${problems} -eq 0 ]; then
|
||||
errors=$(/sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000)
|
||||
if [ "${errors}" ]; then
|
||||
emailSubject="`hostname` - ZFS pool - Drive Errors"
|
||||
problems=1
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
# Scrub Expired - Check if all volumes have been scrubbed in at least the last
|
||||
# 40 days. The general guide is to scrub volumes on desktop quality drives once
|
||||
# a week and volumes on enterprise class drives once a month. You can always
|
||||
# use cron to schedual "zpool scrub" in off hours. We scrub our volumes every
|
||||
# Sunday morning for example.
|
||||
#
|
||||
# Scrubbing traverses all the data in the pool once and verifies all blocks can
|
||||
# be read. Scrubbing proceeds as fast as the devices allows, though the
|
||||
# priority of any I/O remains below that of normal calls. This operation might
|
||||
# negatively impact performance, but the file system will remain usable and
|
||||
# responsive while scrubbing occurs. To initiate an explicit scrub, use the
|
||||
# "zpool scrub" command.
|
||||
#
|
||||
# The scrubExpire variable is in seconds. So for 40 days we calculate 40 days
|
||||
# times 24 hours times 3600 seconds to equal 3456000 seconds.
|
||||
|
||||
# 10 days
|
||||
#scrubExpire=864000
|
||||
# 40 days
|
||||
scrubExpire=3456000
|
||||
|
||||
if [ ${problems} -eq 0 ]; then
|
||||
currentDate=$(date +%s)
|
||||
zfsVolumes=$(/sbin/zpool list -H -o name)
|
||||
|
||||
for volume in ${zfsVolumes}
|
||||
do
|
||||
if [ $(/sbin/zpool status $volume | egrep -c "none requested") -ge 1 ]; then
|
||||
printf "ERROR: You need to run \"zpool scrub $volume\" before this script can monitor the scrub expiration time."
|
||||
break
|
||||
fi
|
||||
if [ $(/sbin/zpool status $volume | egrep -c "scrub in progress|resilver") -ge 1 ]; then
|
||||
break
|
||||
fi
|
||||
|
||||
### Ubuntu with GNU supported date format
|
||||
#scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $11" "$12" " $13" " $14" "$15}')
|
||||
#scrubDate=$(date -d "$scrubRawDate" +%s)
|
||||
|
||||
### FreeBSD 11.2 with *nix supported date format
|
||||
#scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $15 $12 $13}')
|
||||
#scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
|
||||
|
||||
### FreeBSD 12.0 with *nix supported date format
|
||||
scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $17 $14 $15}')
|
||||
scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
|
||||
|
||||
if [ $(($currentDate - $scrubDate)) -ge $scrubExpire ]; then
|
||||
emailSubject="`hostname` - ZFS pool - Scrub Time Expired. Scrub Needed on Volume(s)"
|
||||
problems=1
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
# Email - On any problems send email with drive status information and
|
||||
# capacities including a helpful subject line. Also use logger to write the
|
||||
# email subject to the local logs. This is also the place you may want to put
|
||||
# any other notifications like playing a sound file, beeping the internal
|
||||
# speaker, paging someone or updating Nagios or even BigBrother.
|
||||
|
||||
if [ "$problems" -ne 0 ]; then
|
||||
printf '%s\n' "$emailSubject" "" "`/sbin/zpool list`" "" "`/sbin/zpool status`" | /usr/bin/mail -s "$emailSubject" jail-root@ahlawat.com
|
||||
logger $emailSubject
|
||||
fi
|
||||
|
||||
### EOF ###
|
Reference in New Issue
Block a user