Feb 25, 2020
This commit is contained in:
		
							
								
								
									
										2
									
								
								scripts/crontab.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								scripts/crontab.txt
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,2 @@
 | 
			
		||||
00 08,12,16,20 *   *   *   /root/FreeBSD/scripts/zfs_health.sh
 | 
			
		||||
00   6    *   *   0   /usr/local/sbin/zfSnap -d -s -S -a 1m -p weekly_ -r zroot ship data base
 | 
			
		||||
							
								
								
									
										411
									
								
								scripts/gstat_exporter.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										411
									
								
								scripts/gstat_exporter.py
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,411 @@
 | 
			
		||||
from prometheus_client import start_http_server, Gauge  # type: ignore
 | 
			
		||||
from subprocess import Popen, PIPE
 | 
			
		||||
from typing import Dict
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_deviceinfo(name: str) -> Dict[str, str]:
 | 
			
		||||
    """
 | 
			
		||||
    Return a dict of GEOM device info for GEOM devices in class DISK,
 | 
			
		||||
    for use as labels for the metrics.
 | 
			
		||||
 | 
			
		||||
    Sample output from the geom command:
 | 
			
		||||
 | 
			
		||||
    $ geom -p ada0
 | 
			
		||||
    Geom class: DISK
 | 
			
		||||
    Geom name: ada0
 | 
			
		||||
    Providers:
 | 
			
		||||
    1. Name: ada0
 | 
			
		||||
       Mediasize: 250059350016 (233G)
 | 
			
		||||
       Sectorsize: 512
 | 
			
		||||
       Mode: r2w2e4
 | 
			
		||||
       descr: Samsung SSD 860 EVO mSATA 250GB
 | 
			
		||||
       lunid: 5002538e700b753f
 | 
			
		||||
       ident: S41MNG0K907238X
 | 
			
		||||
       rotationrate: 0
 | 
			
		||||
       fwsectors: 63
 | 
			
		||||
       fwheads: 16
 | 
			
		||||
    $
 | 
			
		||||
    """
 | 
			
		||||
    with Popen(
 | 
			
		||||
        ["geom", "-p", name], stdout=PIPE, bufsize=1, universal_newlines=True
 | 
			
		||||
    ) as p:
 | 
			
		||||
        result = {}
 | 
			
		||||
        for line in p.stdout:
 | 
			
		||||
            # remove excess whitespace
 | 
			
		||||
            line = line.strip()
 | 
			
		||||
            # we only care about the DISK class for now
 | 
			
		||||
            if line[0:12] == "Geom class: " and line[-4:] != "DISK":
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
            if line[0:11] == "Mediasize: ":
 | 
			
		||||
                result["mediasize"] = line[11:]
 | 
			
		||||
            if line[0:12] == "Sectorsize: ":
 | 
			
		||||
                result["sectorsize"] = line.split(" ")[1]
 | 
			
		||||
            if line[0:7] == "descr: ":
 | 
			
		||||
                result["descr"] = " ".join(line.split(" ")[1:])
 | 
			
		||||
            if line[0:7] == "lunid: ":
 | 
			
		||||
                result["lunid"] = line.split(" ")[1]
 | 
			
		||||
            if line[0:7] == "ident: ":
 | 
			
		||||
                result["ident"] = line.split(" ")[1]
 | 
			
		||||
            if line[0:14] == "rotationrate: ":
 | 
			
		||||
                result["rotationrate"] = line.split(" ")[1]
 | 
			
		||||
            if line[0:11] == "fwsectors: ":
 | 
			
		||||
                result["fwsectors"] = line.split(" ")[1]
 | 
			
		||||
            if line[0:9] == "fwheads: ":
 | 
			
		||||
                result["fwheads"] = line.split(" ")[1]
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def process_request() -> None:
 | 
			
		||||
    """
 | 
			
		||||
    Run gstat in a loop and update stats per line
 | 
			
		||||
    """
 | 
			
		||||
    # start with an empty deviceinfo dict and add devices as we see them
 | 
			
		||||
    deviceinfo: Dict[str, Dict[str, str]] = {}
 | 
			
		||||
 | 
			
		||||
    with Popen(
 | 
			
		||||
        ["gstat", "-pdosCI", "5s"], stdout=PIPE, bufsize=1, universal_newlines=True
 | 
			
		||||
    ) as p:
 | 
			
		||||
        for line in p.stdout:
 | 
			
		||||
            (
 | 
			
		||||
                timestamp,
 | 
			
		||||
                name,
 | 
			
		||||
                queue_depth,
 | 
			
		||||
                total_operations_per_second,
 | 
			
		||||
                read_operations_per_second,
 | 
			
		||||
                read_size_kilobytes,
 | 
			
		||||
                read_kilobytes_per_second,
 | 
			
		||||
                miliseconds_per_read,
 | 
			
		||||
                write_operations_per_second,
 | 
			
		||||
                write_size_kilobytes,
 | 
			
		||||
                write_kilobytes_per_second,
 | 
			
		||||
                miliseconds_per_write,
 | 
			
		||||
                delete_operations_per_second,
 | 
			
		||||
                delete_size_kilobytes,
 | 
			
		||||
                delete_kilobytes_per_second,
 | 
			
		||||
                miliseconds_per_delete,
 | 
			
		||||
                other_operations_per_second,
 | 
			
		||||
                miliseconds_per_other,
 | 
			
		||||
                percent_busy,
 | 
			
		||||
            ) = line.split(",")
 | 
			
		||||
            if timestamp == "timestamp":
 | 
			
		||||
                # skip header line
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if name not in deviceinfo:
 | 
			
		||||
                # this is the first time we see this GEOM
 | 
			
		||||
                deviceinfo[name] = {}
 | 
			
		||||
                # we always need a value for all labels
 | 
			
		||||
                for key in [
 | 
			
		||||
                    "name",
 | 
			
		||||
                    "descr",
 | 
			
		||||
                    "mediasize",
 | 
			
		||||
                    "sectorsize",
 | 
			
		||||
                    "lunid",
 | 
			
		||||
                    "ident",
 | 
			
		||||
                    "rotationrate",
 | 
			
		||||
                    "fwsectors",
 | 
			
		||||
                    "fwheads",
 | 
			
		||||
                ]:
 | 
			
		||||
                    deviceinfo[name][key] = ""
 | 
			
		||||
                # get real info from the device if it is class DISK
 | 
			
		||||
                deviceinfo[name].update(get_deviceinfo(name))
 | 
			
		||||
 | 
			
		||||
            deviceinfo[name].update({"name": name})
 | 
			
		||||
 | 
			
		||||
            # up is always.. up
 | 
			
		||||
            up.set(1)
 | 
			
		||||
 | 
			
		||||
            queue.labels(**deviceinfo[name]).set(queue_depth)
 | 
			
		||||
            totalops.labels(**deviceinfo[name]).set(total_operations_per_second)
 | 
			
		||||
 | 
			
		||||
            readops.labels(**deviceinfo[name]).set(read_operations_per_second)
 | 
			
		||||
            readsize.labels(**deviceinfo[name]).set(read_size_kilobytes)
 | 
			
		||||
            readkbs.labels(**deviceinfo[name]).set(read_kilobytes_per_second)
 | 
			
		||||
            readms.labels(**deviceinfo[name]).set(miliseconds_per_read)
 | 
			
		||||
 | 
			
		||||
            writeops.labels(**deviceinfo[name]).set(write_operations_per_second)
 | 
			
		||||
            writesize.labels(**deviceinfo[name]).set(write_size_kilobytes)
 | 
			
		||||
            writekbs.labels(**deviceinfo[name]).set(write_kilobytes_per_second)
 | 
			
		||||
            writems.labels(**deviceinfo[name]).set(miliseconds_per_write)
 | 
			
		||||
 | 
			
		||||
            deleteops.labels(**deviceinfo[name]).set(delete_operations_per_second)
 | 
			
		||||
            deletesize.labels(**deviceinfo[name]).set(delete_size_kilobytes)
 | 
			
		||||
            deletekbs.labels(**deviceinfo[name]).set(delete_kilobytes_per_second)
 | 
			
		||||
            deletems.labels(**deviceinfo[name]).set(miliseconds_per_delete)
 | 
			
		||||
 | 
			
		||||
            otherops.labels(**deviceinfo[name]).set(other_operations_per_second)
 | 
			
		||||
            otherms.labels(**deviceinfo[name]).set(miliseconds_per_other)
 | 
			
		||||
 | 
			
		||||
            busy.labels(**deviceinfo[name]).set(percent_busy)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# define metrics
 | 
			
		||||
up = Gauge(
 | 
			
		||||
    "gstat_up", "The value of this Gauge is always 1 when the gstat_exporter is up"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
queue = Gauge(
 | 
			
		||||
    "gstat_queue_depth",
 | 
			
		||||
    "The queue depth for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
totalops = Gauge(
 | 
			
		||||
    "gstat_total_operations_per_second",
 | 
			
		||||
    "The total number of operations/second for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
readops = Gauge(
 | 
			
		||||
    "gstat_read_operations_per_second",
 | 
			
		||||
    "The number of read operations/second for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
readsize = Gauge(
 | 
			
		||||
    "gstat_read_size_kilobytes",
 | 
			
		||||
    "The size in kilobytes of read operations for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
readkbs = Gauge(
 | 
			
		||||
    "gstat_read_kilobytes_per_second",
 | 
			
		||||
    "The speed in kilobytes/second of read operations for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
readms = Gauge(
 | 
			
		||||
    "gstat_miliseconds_per_read",
 | 
			
		||||
    "The speed in miliseconds/read operation for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
writeops = Gauge(
 | 
			
		||||
    "gstat_write_operations_per_second",
 | 
			
		||||
    "The number of write operations/second for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
writesize = Gauge(
 | 
			
		||||
    "gstat_write_size_kilobytes",
 | 
			
		||||
    "The size in kilobytes of write operations for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
writekbs = Gauge(
 | 
			
		||||
    "gstat_write_kilobytes_per_second",
 | 
			
		||||
    "The speed in kilobytes/second of write operations for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
writems = Gauge(
 | 
			
		||||
    "gstat_miliseconds_per_write",
 | 
			
		||||
    "The speed in miliseconds/write operation for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
deleteops = Gauge(
 | 
			
		||||
    "gstat_delete_operations_per_second",
 | 
			
		||||
    "The number of delete operations/second for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
deletesize = Gauge(
 | 
			
		||||
    "gstat_delete_size_kilobytes",
 | 
			
		||||
    "The size in kilobytes of delete operations for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
deletekbs = Gauge(
 | 
			
		||||
    "gstat_delete_kilobytes_per_second",
 | 
			
		||||
    "The speed in kilobytes/second of delete operations for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
deletems = Gauge(
 | 
			
		||||
    "gstat_miliseconds_per_delete",
 | 
			
		||||
    "The speed in miliseconds/delete operation for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
otherops = Gauge(
 | 
			
		||||
    "gstat_other_operations_per_second",
 | 
			
		||||
    "The number of other operations (BIO_FLUSH)/second for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
otherms = Gauge(
 | 
			
		||||
    "gstat_miliseconds_per_other",
 | 
			
		||||
    "The speed in miliseconds/other operation (BIO_FLUSH) for this GEOM",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
busy = Gauge(
 | 
			
		||||
    "gstat_percent_busy",
 | 
			
		||||
    "The percent of the time this GEOM is busy",
 | 
			
		||||
    [
 | 
			
		||||
        "name",
 | 
			
		||||
        "descr",
 | 
			
		||||
        "mediasize",
 | 
			
		||||
        "sectorsize",
 | 
			
		||||
        "lunid",
 | 
			
		||||
        "ident",
 | 
			
		||||
        "rotationrate",
 | 
			
		||||
        "fwsectors",
 | 
			
		||||
        "fwheads",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
start_http_server(9248)
 | 
			
		||||
while True:
 | 
			
		||||
    process_request()
 | 
			
		||||
							
								
								
									
										172
									
								
								scripts/zfs-prune-snapshots
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										172
									
								
								scripts/zfs-prune-snapshots
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,172 @@
 | 
			
		||||
#!/usr/bin/env bash
 | 
			
		||||
#
 | 
			
		||||
# script to prune zfs snapshots over a given age
 | 
			
		||||
#
 | 
			
		||||
# Author: Dave Eddy <dave@daveeddy.com>
 | 
			
		||||
# Date: November 20, 2015
 | 
			
		||||
# License: MIT
 | 
			
		||||
 | 
			
		||||
VERSION='v1.0.1'
 | 
			
		||||
 | 
			
		||||
usage() {
 | 
			
		||||
	local prog=${0##*/}
 | 
			
		||||
	cat <<-EOF
 | 
			
		||||
	usage: $prog [-hnv] [-p <prefix] <time> [[dataset1] ...]
 | 
			
		||||
 | 
			
		||||
	remove snapshots from one or more zpools that match given criteria
 | 
			
		||||
 | 
			
		||||
	examples
 | 
			
		||||
	    # $prog 1w
 | 
			
		||||
	    remove snapshots older than a week across all zpools
 | 
			
		||||
 | 
			
		||||
	    # $prog -vn 1w
 | 
			
		||||
	    same as above, but with increased verbosity and without
 | 
			
		||||
	    actually deleting any snapshots (dry-run)
 | 
			
		||||
 | 
			
		||||
	    # $prog 3w tank1 tank2/backup
 | 
			
		||||
	    remove snapshots older than 3 weeks on tank1 and tank2/backup.
 | 
			
		||||
	    note that this script will recurse through *all* of tank1 and
 | 
			
		||||
	    *all* datasets below tank2/backup
 | 
			
		||||
 | 
			
		||||
	    # $prog -p 'autosnap_' 1M zones
 | 
			
		||||
	    remove snapshots older than a month on the zones pool that start
 | 
			
		||||
	    with the string "autosnap_"
 | 
			
		||||
 | 
			
		||||
	timespec
 | 
			
		||||
	    the first argument denotes how old a snapshot must be for it to
 | 
			
		||||
	    be considered for deletion - possible specifiers are
 | 
			
		||||
 | 
			
		||||
	        s seconds
 | 
			
		||||
	        m minutes
 | 
			
		||||
	        h hours
 | 
			
		||||
	        d days
 | 
			
		||||
	        w weeks
 | 
			
		||||
	        M months
 | 
			
		||||
	        y years
 | 
			
		||||
 | 
			
		||||
	options
 | 
			
		||||
	    -h             print this message and exit
 | 
			
		||||
	    -n             dry-run, don't actually delete snapshots
 | 
			
		||||
	    -p <prefix>    snapshot prefix string to match
 | 
			
		||||
	    -q             quiet, do not printout removed snapshots
 | 
			
		||||
	    -v             increase verbosity
 | 
			
		||||
	    -V             print the version number and exit
 | 
			
		||||
	EOF
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
debug() {
 | 
			
		||||
	((verbosity >= 1)) && echo "$@"
 | 
			
		||||
	return 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# given a time in seconds, return the "human readable" string
 | 
			
		||||
human() {
 | 
			
		||||
	local seconds=$1
 | 
			
		||||
	if ((seconds < 0)); then
 | 
			
		||||
		((seconds *= -1))
 | 
			
		||||
	fi
 | 
			
		||||
 | 
			
		||||
	local times=(
 | 
			
		||||
	$((seconds / 60 / 60 / 24 / 365)) # years
 | 
			
		||||
	$((seconds / 60 / 60 / 24 / 30))  # months
 | 
			
		||||
	$((seconds / 60 / 60 / 24 / 7))   # weeks
 | 
			
		||||
	$((seconds / 60 / 60 / 24))       # days
 | 
			
		||||
	$((seconds / 60 / 60))            # hours
 | 
			
		||||
	$((seconds / 60))                 # minutes
 | 
			
		||||
	$((seconds))                      # seconds
 | 
			
		||||
	)
 | 
			
		||||
	local names=(year month week day hour minute second)
 | 
			
		||||
 | 
			
		||||
	local i
 | 
			
		||||
	for ((i = 0; i < ${#names[@]}; i++)); do
 | 
			
		||||
		if ((${times[$i]} > 1)); then
 | 
			
		||||
			echo "${times[$i]} ${names[$i]}s"
 | 
			
		||||
			return
 | 
			
		||||
		elif ((${times[$i]} == 1)); then
 | 
			
		||||
			echo "${times[$i]} ${names[$i]}"
 | 
			
		||||
			return
 | 
			
		||||
		fi
 | 
			
		||||
	done
 | 
			
		||||
	echo '0 seconds'
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
dryrun=false
 | 
			
		||||
verbosity=0
 | 
			
		||||
prefix=
 | 
			
		||||
quiet=false
 | 
			
		||||
while getopts 'hnqp:vV' option; do
 | 
			
		||||
	case "$option" in
 | 
			
		||||
		h) usage; exit 0;;
 | 
			
		||||
		n) dryrun=true;;
 | 
			
		||||
		p) prefix=$OPTARG;;
 | 
			
		||||
		q) quiet=true;;
 | 
			
		||||
		v) ((verbosity++));;
 | 
			
		||||
		V) echo "$VERSION"; exit 0;;
 | 
			
		||||
		*) usage; exit 1;;
 | 
			
		||||
	esac
 | 
			
		||||
done
 | 
			
		||||
shift "$((OPTIND - 1))"
 | 
			
		||||
 | 
			
		||||
# extract the first argument - the timespec - and
 | 
			
		||||
# convert it to seconds
 | 
			
		||||
t=$1
 | 
			
		||||
time_re='^([0-9]+)([smhdwMy])$'
 | 
			
		||||
seconds=
 | 
			
		||||
if [[ $t =~ $time_re ]]; then
 | 
			
		||||
	# ex: "21d" becomes num=21 spec=d
 | 
			
		||||
	num=${BASH_REMATCH[1]}
 | 
			
		||||
	spec=${BASH_REMATCH[2]}
 | 
			
		||||
 | 
			
		||||
	case "$spec" in
 | 
			
		||||
		s) seconds=$((num));;
 | 
			
		||||
		m) seconds=$((num * 60));;
 | 
			
		||||
		h) seconds=$((num * 60 * 60));;
 | 
			
		||||
		d) seconds=$((num * 60 * 60 * 24));;
 | 
			
		||||
		w) seconds=$((num * 60 * 60 * 24 * 7));;
 | 
			
		||||
		M) seconds=$((num * 60 * 60 * 24 * 30));;
 | 
			
		||||
		y) seconds=$((num * 60 * 60 * 24 * 365));;
 | 
			
		||||
		*) echo "error: unknown spec '$spec'" >&2; exit 1;;
 | 
			
		||||
	esac
 | 
			
		||||
elif [[ -z $t ]]; then
 | 
			
		||||
	echo 'error: timespec must be specified as the first argument' >&2
 | 
			
		||||
	exit 1
 | 
			
		||||
else
 | 
			
		||||
	echo "error: failed to parse timespec '$t'" >&2
 | 
			
		||||
	exit 1
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
shift
 | 
			
		||||
pools=("$@")
 | 
			
		||||
 | 
			
		||||
now=$(date +%s)
 | 
			
		||||
code=0
 | 
			
		||||
while read -r creation snapshot; do
 | 
			
		||||
	# ensure optional prefix matches
 | 
			
		||||
	snapname=${snapshot#*@}
 | 
			
		||||
	if [[ -n $prefix && $prefix != "${snapname:0:${#prefix}}" ]]; then
 | 
			
		||||
		debug "skipping $snapshot: doesn't match prefix $prefix"
 | 
			
		||||
		continue
 | 
			
		||||
	fi
 | 
			
		||||
 | 
			
		||||
	# ensure snapshot is older than the cutoff time
 | 
			
		||||
	delta=$((now - creation))
 | 
			
		||||
	human=$(human "$delta")
 | 
			
		||||
	if ((delta <= seconds)); then
 | 
			
		||||
		debug "skipping $snapshot: $human old"
 | 
			
		||||
		continue
 | 
			
		||||
	fi
 | 
			
		||||
 | 
			
		||||
	# remove the snapshot
 | 
			
		||||
	if ! $quiet || $dryrun; then
 | 
			
		||||
		echo -n "removing $snapshot: $human old"
 | 
			
		||||
	fi
 | 
			
		||||
	if $dryrun; then
 | 
			
		||||
		echo ' <dry-run: no action taken>'
 | 
			
		||||
	else
 | 
			
		||||
		if ! $quiet; then
 | 
			
		||||
			echo
 | 
			
		||||
		fi
 | 
			
		||||
		zfs destroy "$snapshot" || code=1
 | 
			
		||||
	fi
 | 
			
		||||
done < <(zfs list -Hpo creation,name -t snapshot -r "${pools[@]}")
 | 
			
		||||
exit "$code"
 | 
			
		||||
							
								
								
									
										131
									
								
								scripts/zfs_health.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										131
									
								
								scripts/zfs_health.sh
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,131 @@
 | 
			
		||||
#! /bin/sh
 | 
			
		||||
#
 | 
			
		||||
# Calomel.org
 | 
			
		||||
#     https://calomel.org/zfs_health_check_script.html
 | 
			
		||||
#     FreeBSD ZFS Health Check script
 | 
			
		||||
#     zfs_health.sh @ Version 0.18
 | 
			
		||||
 | 
			
		||||
# Check health of ZFS volumes and drives. On any faults send email.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 99 problems but ZFS aint one
 | 
			
		||||
problems=0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Health - Check if all zfs volumes are in good condition. We are looking for
 | 
			
		||||
# any keyword signifying a degraded or broken array.
 | 
			
		||||
 | 
			
		||||
condition=$(/sbin/zpool status | egrep -i '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)')
 | 
			
		||||
if [ "${condition}" ]; then
 | 
			
		||||
        emailSubject="`hostname` - ZFS pool - HEALTH fault"
 | 
			
		||||
        problems=1
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Capacity - Make sure the pool capacity is below 80% for best performance. The
 | 
			
		||||
# percentage really depends on how large your volume is. If you have a 128GB
 | 
			
		||||
# SSD then 80% is reasonable. If you have a 60TB raid-z2 array then you can
 | 
			
		||||
# probably set the warning closer to 95%.
 | 
			
		||||
#
 | 
			
		||||
# ZFS uses a copy-on-write scheme. The file system writes new data to
 | 
			
		||||
# sequential free blocks first and when the uberblock has been updated the new
 | 
			
		||||
# inode pointers become valid. This method is true only when the pool has
 | 
			
		||||
# enough free sequential blocks. If the pool is at capacity and space limited,
 | 
			
		||||
# ZFS will be have to randomly write blocks. This means ZFS can not create an
 | 
			
		||||
# optimal set of sequential writes and write performance is severely impacted.
 | 
			
		||||
 | 
			
		||||
maxCapacity=80
 | 
			
		||||
 | 
			
		||||
if [ ${problems} -eq 0 ]; then
 | 
			
		||||
   capacity=$(/sbin/zpool list -H -o capacity | cut -d'%' -f1)
 | 
			
		||||
   for line in ${capacity}
 | 
			
		||||
     do
 | 
			
		||||
       if [ $line -ge $maxCapacity ]; then
 | 
			
		||||
         emailSubject="`hostname` - ZFS pool - Capacity Exceeded"
 | 
			
		||||
         problems=1
 | 
			
		||||
       fi
 | 
			
		||||
     done
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors
 | 
			
		||||
# on all volumes and all drives using "zpool status". If any non-zero errors
 | 
			
		||||
# are reported an email will be sent out. You should then look to replace the
 | 
			
		||||
# faulty drive and run "zpool scrub" on the affected volume after resilvering.
 | 
			
		||||
 | 
			
		||||
if [ ${problems} -eq 0 ]; then
 | 
			
		||||
   errors=$(/sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000)
 | 
			
		||||
   if [ "${errors}" ]; then
 | 
			
		||||
        emailSubject="`hostname` - ZFS pool - Drive Errors"
 | 
			
		||||
        problems=1
 | 
			
		||||
   fi
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Scrub Expired - Check if all volumes have been scrubbed in at least the last
 | 
			
		||||
# 40 days. The general guide is to scrub volumes on desktop quality drives once
 | 
			
		||||
# a week and volumes on enterprise class drives once a month. You can always
 | 
			
		||||
# use cron to schedual "zpool scrub" in off hours. We scrub our volumes every
 | 
			
		||||
# Sunday morning for example.
 | 
			
		||||
#
 | 
			
		||||
# Scrubbing traverses all the data in the pool once and verifies all blocks can
 | 
			
		||||
# be read. Scrubbing proceeds as fast as the devices allows, though the
 | 
			
		||||
# priority of any I/O remains below that of normal calls. This operation might
 | 
			
		||||
# negatively impact performance, but the file system will remain usable and
 | 
			
		||||
# responsive while scrubbing occurs. To initiate an explicit scrub, use the
 | 
			
		||||
# "zpool scrub" command.
 | 
			
		||||
#
 | 
			
		||||
# The scrubExpire variable is in seconds. So for 40 days we calculate 40 days
 | 
			
		||||
# times 24 hours times 3600 seconds to equal 3456000 seconds.
 | 
			
		||||
 | 
			
		||||
# 10 days
 | 
			
		||||
#scrubExpire=864000
 | 
			
		||||
# 40 days
 | 
			
		||||
scrubExpire=3456000
 | 
			
		||||
 | 
			
		||||
if [ ${problems} -eq 0 ]; then
 | 
			
		||||
   currentDate=$(date +%s)
 | 
			
		||||
   zfsVolumes=$(/sbin/zpool list -H -o name)
 | 
			
		||||
 | 
			
		||||
  for volume in ${zfsVolumes}
 | 
			
		||||
   do
 | 
			
		||||
    if [ $(/sbin/zpool status $volume | egrep -c "none requested") -ge 1 ]; then
 | 
			
		||||
        printf "ERROR: You need to run \"zpool scrub $volume\" before this script can monitor the scrub expiration time."
 | 
			
		||||
        break
 | 
			
		||||
    fi
 | 
			
		||||
    if [ $(/sbin/zpool status $volume | egrep -c "scrub in progress|resilver") -ge 1 ]; then
 | 
			
		||||
        break
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
    ### Ubuntu with GNU supported date format
 | 
			
		||||
    #scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $11" "$12" " $13" " $14" "$15}')
 | 
			
		||||
    #scrubDate=$(date -d "$scrubRawDate" +%s)
 | 
			
		||||
 | 
			
		||||
    ### FreeBSD 11.2 with *nix supported date format
 | 
			
		||||
    #scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $15 $12 $13}')
 | 
			
		||||
    #scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
 | 
			
		||||
 | 
			
		||||
    ### FreeBSD 12.0 with *nix supported date format
 | 
			
		||||
    scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $17 $14 $15}')
 | 
			
		||||
    scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
 | 
			
		||||
 | 
			
		||||
     if [ $(($currentDate - $scrubDate)) -ge $scrubExpire ]; then
 | 
			
		||||
        emailSubject="`hostname` - ZFS pool - Scrub Time Expired. Scrub Needed on Volume(s)"
 | 
			
		||||
        problems=1
 | 
			
		||||
     fi
 | 
			
		||||
   done
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Email - On any problems send email with drive status information and
 | 
			
		||||
# capacities including a helpful subject line. Also use logger to write the
 | 
			
		||||
# email subject to the local logs. This is also the place you may want to put
 | 
			
		||||
# any other notifications like playing a sound file, beeping the internal 
 | 
			
		||||
# speaker, paging someone or updating Nagios or even BigBrother.
 | 
			
		||||
 | 
			
		||||
if [ "$problems" -ne 0 ]; then
 | 
			
		||||
  printf '%s\n' "$emailSubject" "" "`/sbin/zpool list`" "" "`/sbin/zpool status`" | /usr/bin/mail -s "$emailSubject" jail-root@ahlawat.com
 | 
			
		||||
  logger $emailSubject
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
### EOF ###
 | 
			
		||||
		Reference in New Issue
	
	Block a user