Initial commit.

author Andre Noll <maan@tuebingen.mpg.de>

Sun, 14 Jan 2024 22:55:26 +0000 (23:55 +0100)

committer Andre Noll <maan@tuebingen.mpg.de>

Sun, 14 Jan 2024 22:56:01 +0000 (23:56 +0100)
author Andre Noll <maan@tuebingen.mpg.de>
Sun, 14 Jan 2024 22:55:26 +0000 (23:55 +0100)
committer Andre Noll <maan@tuebingen.mpg.de>
Sun, 14 Jan 2024 22:56:01 +0000 (23:56 +0100)
diff --git a/.gitignore b/.gitignore

new file mode 100644 (file)

index 0000000..7a9c605
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+misma
+misma.8
+build
+*.swp
+Makefile.local
diff --git a/Makefile b/Makefile

new file mode 100644 (file)

index 0000000..32b2c10
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,261 @@
+# SPDX-License-Identifier: GPL-2.0+
+.SUFFIXES:
+MAKEFLAGS += -Rr
+ifeq ("$(origin CC)", "default")
+        CC := cc
+endif
+ifeq ("$(origin V)", "command line")
+       SAY =
+else
+       SAY = @echo '$(strip $(1))'
+endif
+
+.ONESHELL:
+.SHELLFLAGS := -ec
+PREFIX ?= /usr/local
+INSTALL ?= install
+MKDIR_P := mkdir -p
+RM := rm -f
+CHMOD := chmod
+GROFF := groff
+B := build
+all := misma misma.8
+all: $(all)
+
+PACKAGE := misma
+SLOGAN := the minimal snapshot manager
+AUTHOR := Andre Noll
+EMAIL := maan@tuebingen.mpg.de
+COPYRIGHT_YEAR := 2024
+URL := http://people.tuebingen.mpg.de/maan/$(PACKAGE)/
+CLONE_URL := git://git.tuebingen.mpg.de/$(PACKAGE)
+GITWEB_URL := http://git.tuebingen.mpg.de/$(PACKAGE).git
+HOME_URL := http://people.tuebingen.mpg.de/maan/
+LICENSE := GPL-2.0+
+LICENSE_URL := https://www.gnu.org/licenses/gpl-3.0-standalone.html
+LOGLEVELS := LL_DEBUG,LL_INFO,LL_NOTICE,LL_WARNING,LL_ERROR,LL_CRIT,LL_EMERG
+
+units := misma util version misma.lsg
+deps := $(addprefix $(B)/, $(addsuffix .d, $(units)))
+objs := $(addprefix $(B)/, $(addsuffix .o, $(units)))
+
+ifeq ($(findstring clean, $(MAKECMDGOALS)),)
+ifeq ($(findstring README, $(MAKECMDGOALS)),)
+-include $(deps)
+-include $(B)/config.mak
+endif
+endif
+
+XCPPFLAGS :=
+XCPPFLAGS += -I$(B)
+XCPPFLAGS += -Wunused-macros
+XCPPFLAGS += -DCOPYRIGHT_YEAR='"$(COPYRIGHT_YEAR)"'
+XCPPFLAGS += -DPACKAGE='"$(PACKAGE)"'
+XCPPFLAGS += -DAUTHOR='"$(AUTHOR)"'
+XCPPFLAGS += -DEMAIL='"$(EMAIL)"'
+XCPPFLAGS += -DURL='"$(URL)"'
+XCPPFLAGS += -DCLONE_URL='"$(CLONE_URL)"'
+XCPPFLAGS += -DGITWEB_URL='"$(GITWEB_URL)"'
+XCPPFLAGS += -DHOME_URL='"$(HOME_URL)"'
+XCPPFLAGS += -DGET_VERSION='$(PACKAGE)_version'
+XCPPFLAGS += -DLOGLEVELS='$(LOGLEVELS)'
+XCPPFLAGS += -DBUILD_DATE='"$(build_date)"'
+XCPPFLAGS += -DCC_VERSION='"$(cc_version)"'
+XCPPFLAGS += -DUNAME_RS='"$(uname_rs)"'
+XCPPFLAGS += -DLICENSE='"$(LICENSE)"'
+XCPPFLAGS += -DLICENSE_URL='"$(LICENSE_URL)"'
+
+XCFLAGS :=
+XCFLAGS += -fno-strict-aliasing
+XCFLAGS += -g
+XCFLAGS += -Os
+XCFLAGS += -Wundef -W -Wuninitialized
+XCFLAGS += -Wchar-subscripts
+XCFLAGS += -Werror-implicit-function-declaration
+XCFLAGS += -Wmissing-noreturn
+XCFLAGS += -Wbad-function-cast
+XCFLAGS += -Wredundant-decls
+XCFLAGS += -Wno-sign-compare -Wno-unknown-pragmas
+XCFLAGS += -Wdeclaration-after-statement
+XCFLAGS += -Wformat -Wformat-security -Wmissing-format-attribute
+XCFLAGS += -fdata-sections -ffunction-sections
+XCFLAGS += -Wstrict-prototypes
+XCFLAGS += -Wshadow
+XCFLAGS += -Wunused -Wall
+XCFLAGS += -Wformat-signedness
+XCFLAGS += -Wdiscarded-qualifiers
+
+XLDFLAGS := -Wl,--gc-sections
+version_file := $(B)/version.c
+GIT_VERSION := $(shell $(MKDIR_P) $(B) && ./version-gen.sh $(PACKAGE) $(version_file))
+
+CC_CMD = $(CC) -c -o $@ $(XCPPFLAGS) $(CPPFLAGS) \
+       $(XCFLAGS) $(CFLAGS) -MMD -MF $(B)/$(*F).d -MT $@
+
+$(objs): misma.h $(B)/misma.lsg.h Makefile
+
+$(B):
+       @$(MKDIR_P) $@
+
+$(B)/config.h.in: configure.ac | $(B)
+       $(call SAY, AH $<)
+       cd $(B)
+       autoheader -f ../configure.ac
+$(B)/configure.sh: configure.ac | $(B)
+       $(call SAY, AC $<)
+       cd $(B)
+       autoconf ../configure.ac > configure.sh
+       $(CHMOD) 755 configure.sh
+$(B)/config.status: $(B)/configure.sh | $(B)
+       $(call SAY, SH $<)
+       cd $(B)
+       if test -x config.status; then \
+               ./config.status --quiet --recheck; \
+       else \
+               ./configure.sh --no-create; \
+       fi
+$(B)/config.mak $(B)/config.h: $(B)/config.status config.mak.in $(B)/config.h.in
+       $(call SAY, CS $@)
+       cd $(B)
+       ln -f ../config.mak.in
+       ./config.status -q
+       test -f config.h && touch config.h
+
+define DESCRIPTION1 :=
+       PACKAGE() is an open source application which maintains snapshots of one
+       or more thin provisioned logical volumes on Linux systems.
+endef
+
+define DESCRIPTION2 :=
+       Snapshots are created and removed automatically according to the
+       configured schedule. Old snapshots are replaced so that the time
+       between two consecutive snapshots doubles at each step. To prevent data
+       or metadata space exhaustion, the available space of the underlying
+       thin pools is monitored periodically and snapshots are removed early
+       when space gets tight.
+endef
+
+define DESCRIPTION3 :=
+       Besides the run subcommand which implements snapshot scheduling and
+       free space monitoring, PACKAGE() supports additional subcommands
+       to list existing snapshots and the utilization of the thin pools,
+       or to create/remove snapshots manually.
+endef
+
+define M4_CMD =
+       $(call SAY, M4 $<)
+       $(M4) -D "AUTHOR=$(AUTHOR)" -D "COPYRIGHT_YEAR=$(COPYRIGHT_YEAR)" \
+               -D "PACKAGE=$(PACKAGE)" \
+               -D "SLOGAN=$(SLOGAN)" \
+               -D "EMAIL=$(EMAIL)" \
+               -D "URL=$(URL)" \
+               -D "CLONE_URL=$(CLONE_URL)" \
+               -D "GITWEB_URL=$(GITWEB_URL)" \
+               -D "HOME_URL=$(HOME_URL)" \
+               -D "LICENSE=$(LICENSE)" \
+               -D "LICENSE_URL=$(LICENSE_URL)" \
+               -D "DESCRIPTION1=$(DESCRIPTION1)" \
+               -D "DESCRIPTION2=$(DESCRIPTION2)" \
+               -D "DESCRIPTION3=$(DESCRIPTION3)"
+endef
+
+$(B)/logo.svg: index.html.m4 Makefile
+       $(M4_CMD) -D MODE=svg $< > $@
+$(B)/index.html: index.html.m4 Makefile
+       $(M4_CMD) $< > $@
+$(B)/misma.suite: misma.suite.m4 Makefile
+       $(M4_CMD) $< > $@
+$(B)/%.lsg.c: $(B)/%.suite
+       $(call SAY, LSGC $<)
+       $(LOPSUBGEN) --gen-c --output-dir $(B) < $<
+$(B)/%.lsg.h: $(B)/%.suite
+       $(call SAY, LSGH $<)
+       $(LOPSUBGEN) --gen-header --output-dir $(B) < $<
+%.8: $(B)/%.suite $(B)/version.c
+       $(call SAY, LSGM $<)
+       $(LOPSUBGEN) --gen-man=$(*F).8 --version-string $(GIT_VERSION) < $<
+$(B)/%.8.html: %.8
+       $(GROFF) -m man -Thtml -Wbreak < $< > $@
+
+$(B)/%.o: %.c | $(B)
+       $(call SAY, CC $<)
+       $(CC_CMD) $<
+$(B)/%.o: $(B)/%.c
+       $(call SAY, CC $<)
+       $(CC_CMD) $<
+$(PACKAGE): $(objs)
+       $(call SAY, LD $@)
+       $(CC) -o $@ $^ $(XLDFLAGS) $(LDFLAGS) -llopsub -lm
+$(B)/$(PACKAGE): $(objs)
+       $(call SAY, LD-STATIC $@)
+       $(CC) -static -o $@ $^ $(XLDFLAGS) $(LDFLAGS) -llopsub -lm
+
+mandir := $(datarootdir)/man/man8
+INSTALL ?= install
+INSTALL_PROGRAM ?= $(INSTALL) -m 755
+INSTALL_DATA ?= $(INSTALL) -m 644
+ifneq ($(findstring strip, $(MAKECMDGOALS)),)
+       strip_option := -s
+endif
+install install-strip: all
+       $(MKDIR_P) $(DESTDIR)$(sbindir) $(DESTDIR)$(mandir)
+       $(INSTALL_PROGRAM) $(strip_option) misma $(DESTDIR)$(sbindir)
+       $(INSTALL_DATA) misma.8 $(DESTDIR)$(mandir)
+
+clean:
+       $(RM) $(B)/*.o $(all)
+distclean: clean
+       $(RM) -r $(B)
+maintainer-clean:
+       git clean -dfqx > /dev/null 2>&1
+
+define README :=
+$(PACKAGE) -  $(SLOGAN)
+
+$(DESCRIPTION1)
+
+$(DESCRIPTION2)
+
+$(DESCRIPTION3)
+
+Resources
+~~~~~~~~~
+|      web page: $(URL)
+|      git clone URL: $(CLONE_URL)
+|      gitweb: $(GITWEB_URL)
+|      author's home page: $(HOME_URL)
+|      Send feedback to: $(AUTHOR) <$(EMAIL)>
+
+License
+~~~~~~~
+Open source, licensed under the $(LICENSE) license.
+
+Documentation
+~~~~~~~~~~~~~
+See misma.suite.m4. Or build the man page with \"make\" and run
+\"man -l misma.8\".
+
+Dependencies
+~~~~~~~~~~~~
+This package requires m4, autoconf, gnu make, gcc or clang, and
+lopsub. The configure script checks if all dependencies are installed
+and prints a meaningful error message if one of them is missing.
+
+Building
+~~~~~~~~
+Run \"make\" to build the package with the default settings. Run
+\"./configure -h\" to list configuration options.
+
+Installation
+~~~~~~~~~~~~
+Run \"sudo make install\" to install to /usr/local. To install to
+/somewhere/else, run \"./configure --prefix /somewhere/else && make\"
+first.
+endef
+
+README:
+       @printf '%s\n' "$(README)"
+
+.PRECIOUS: $(B)/%.lsg.c $(B)/%.lsg.h $(B)/%.8
+.PHONY: all clean install distclean maintainer-clean README
+-include Makefile.local
diff --git a/README b/README

new file mode 100644 (file)

index 0000000..52a1fd7
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+Run "make README".
diff --git a/config.mak.in b/config.mak.in

new file mode 100644 (file)

index 0000000..592d9e6
--- /dev/null
+++ b/config.mak.in
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+prefix := @prefix@
+exec_prefix := @exec_prefix@
+
+# These two use prefix and exec_prefix
+sbindir := @sbindir@
+datarootdir := @datarootdir@
+
+LOPSUBGEN := @LOPSUBGEN@
+M4 := @M4@
diff --git a/configure b/configure

new file mode 100755 (executable)

index 0000000..d59262d
--- /dev/null
+++ b/configure
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+
+set -e
+
+mkdir -p build
+cd build
+autoconf ../configure.ac > configure.sh
+chmod 755 configure.sh
+ln -f ../config.mak.in
+autoheader ../configure.ac
+sh configure.sh "$@"
diff --git a/configure.ac b/configure.ac

new file mode 100644 (file)

index 0000000..00e0f09
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+AC_PREREQ([2.61])
+# only for configure -h, see Makefile
+AC_INIT([software], [packages])
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_FILES([config.mak])
+AC_USE_SYSTEM_EXTENSIONS
+AC_PROG_CC
+AC_PROG_CPP
+
+AC_DEFUN([LOPSUB_NOT_FOUND], [
+The lopsub library is required to build this software, but the checks
+indicate it is not installed on your system.  Run the following
+command to download a copy.
+       git clone git://git.tuebingen.mpg.de/lopsub.git
+Install the library, then run this configure script again.
+
+If you installed lopsub at a non-standard location, make sure to set
+PATH, CPPFLAGS and LDFLAGS accordingly. For example:
+
+       pfx=/prefix/where/lopsub/is/installed
+       export PATH=\$pfx/bin:\$PATH
+       export CPPFLAGS=-I\$pfx/include
+       export LDFLAGS=-L\$pfx/lib
+])
+
+AC_DEFUN([REQUIRE_EXECUTABLE], [
+       AC_PATH_PROG(m4_toupper([$1]), [$1])
+       test -z "$m4_toupper([$1])" && AC_MSG_ERROR([$2])
+])
+REQUIRE_EXECUTABLE([lopsubgen], [LOPSUB_NOT_FOUND])
+REQUIRE_EXECUTABLE([m4], [m4 is required to build this package])
+
+HAVE_LOPSUB=yes
+AC_CHECK_HEADER(lopsub.h, [], [HAVE_LOPSUB=no])
+AC_CHECK_LIB([lopsub], [lls_merge], [], [HAVE_LOPSUB=no])
+if test $HAVE_LOPSUB = no; then AC_MSG_ERROR([LOPSUB_NOT_FOUND()]); fi
+AC_OUTPUT
diff --git a/index.html.m4 b/index.html.m4

new file mode 100644 (file)

index 0000000..47e8c79
--- /dev/null
+++ b/index.html.m4
@@ -0,0 +1,113 @@
+dnl SPDX-License-Identifier: GPL-2.0+
+define(`SVG', `dnl
+<svg xmlns="http://www.w3.org/2000/svg" height="50" width="100">
+       <path stroke-width="3" stroke="black" fill="none"
+               d="
+                       M 5 23
+                       l 6,0
+                       l 0,-6 l 6,0 l 0,6
+                       l 6,0
+                       l 0,-12 l 6,0 l 0,12
+                       l 6,0
+                       l 0,-6 l 6,0 l 0,6
+                       l 6,0
+                       l 0,-18 l 6,0 l 0,18
+                       l 6,0
+                       l 0,-6 l 6,0 l 0,6
+                       l 6,0
+                       l 0,-12 l 6,0 l 0,12
+                       l 6,0
+                       l 0,-6 l 6,0 l 0,6
+                       l 6,0
+               "
+       />
+       <path stroke-width="4" stroke="blue" fill="none"
+               d="
+                       M 10 46
+                       l 0,-15 l 6,0 l 0,10 l 6,0 l 0,-10 l 6,0 l 0,15
+                       m 6,0
+                       l 0,-13 l 0,-4 m 0,15
+                       m 5,0
+                       l 9,0 l 0,-6 l -8,0 l 0,-7 l 9,0 m 0,15
+                       m 5,0
+                       l 0,-15 l 6,0 l 0,10 l 6,0 l 0,-10 l 6,0 l 0,15
+                       m 6,-2
+                       l 0,-13 l 10,0 l 0,13 l -12,0 m 12,0 l 4,0
+               "
+       />
+</svg>
+')dnl
+ifelse(MODE(), `svg', `SVG() m4exit')
+
+<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'
+'http://www.w3.org/TR/html4/loose.dtd'>
+
+<html>
+       <head>
+               <meta
+                       http-equiv='Content-Type';
+                       content='text/html';
+                       charset=utf-8;
+               >
+               <title>PACKAGE()</title>
+               <style type='text/css'>
+                       body {
+                               text-align: justify;
+                               padding: 0px 30px 0px 30px;
+                               font-size: 130%;
+                       }
+                       a {
+                               color: #01c;
+                       }
+                       pre,code {
+                               font-size: 110%;
+                       }
+               </style>
+       </head>
+       <body>
+               <table width="100%">
+                       <tr>
+                               <td>
+                                       <h2 align="left">
+                                                PACKAGE() - SLOGAN()
+                                       </h2>
+                               </td>
+                               <td align="right"> SVG() </td>
+                       </tr>
+               </table>
+               <p> DESCRIPTION1() </p>
+               <p> DESCRIPTION2() </p>
+               <p> DESCRIPTION3() </p>
+
+               <h3> Installation </h3>
+
+               <p> PACKAGE() is easy to install and easy to configure. To build from
+               source, a number of dependencies must be installed. The following
+               should work on Debian/Ubuntu: </p>
+
+               <pre>
+       sudo apt-get install gcc git autoconf m4 make liblopsub-dev
+       git clone CLONE_URL()
+       cd misma
+       ./configure &amp;&amp; make &amp;&amp; sudo make install
+               </pre>
+
+               <p> Alternatively, download this pre-compiled <a
+               href="PACKAGE()">static binary</a> for x86, which should work fine
+               on all Linux distributions. </p>
+
+               Run <code>PACKAGE() help</code> to display the subcommands and
+               <code>man PACKAGE()</code> for the manual page. The examples included
+               in the manual illustrate how to create thin logical volumes and how
+               to snapshot them with PACKAGE().
+
+               <h3> Resources </h3> <ul>
+                       <li> Clone `URL': <code>CLONE_URL()</code> </li>
+                       <li> <a href="GITWEB_URL()">Gitweb</a> </li>
+                       <li> <a href="PACKAGE().8.html">manual page</a> </li>
+                       <li> The author's <a href="HOME_URL()">home page</a> </li>
+                       <li> Send feedback to <a href="mailto:EMAIL()">AUTHOR()</a> </li>
+               </ul>
+
+       </body>
+</html>
diff --git a/misma.c b/misma.c

new file mode 100644 (file)

index 0000000..fc9ab3d
--- /dev/null
+++ b/misma.c
@@ -0,0 +1,1543 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#include "misma.h"
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <lopsub.h>
+#include <sys/mman.h>
+#include <math.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/ioctl.h>
+
+#include "misma.lsg.h"
+
+enum interval_type {
+       IT_CREATE,
+       IT_TRIM,
+       IT_MAX_AGE,
+       NUM_INTERVAL_TYPES
+};
+
+struct snapshot_config {
+       struct percentage_pair thresholds;
+       unsigned interval[NUM_INTERVAL_TYPES];
+};
+static struct snapshot_config global_config = {
+       .thresholds = {.data = 95, .meta = 95},
+       .interval = {
+               [IT_CREATE] = 6 * 3600,
+               [IT_TRIM] = 0,
+               [IT_MAX_AGE] = 86400 * 365
+       }
+};
+
+enum event_type {ET_CREATE, ET_CHECK, ET_TRIM, NUM_EVENT_TYPES};
+
+struct volume_group {
+       char *name;
+       struct snapshot_config config;
+};
+static unsigned num_vgs;
+static struct volume_group *volume_group; /* num_vgs elements */
+
+static const char *vgname(unsigned vgid)
+{
+       return volume_group[vgid].name;
+}
+
+/* sequential search is good enough */
+static unsigned get_vgid(const char *name)
+{
+       for (unsigned n = 0; n < num_vgs; n++)
+               if (!strcmp(name, volume_group[n].name))
+                       return n;
+       return ~0U;
+}
+
+/* insert only if it not exists already */
+static unsigned insert_vg(const char *name)
+{
+       struct volume_group *vg;
+       unsigned vgid = get_vgid(name);
+
+       if (vgid != ~0U)
+               return vgid;
+       INFO_LOG("vg #%u: %s\n", num_vgs, name);
+       num_vgs++;
+       volume_group = xrealloc(volume_group, num_vgs
+               * sizeof(struct volume_group));
+       vg = volume_group + num_vgs - 1;
+       memset(vg, 0, sizeof(struct volume_group));
+       vg->name = xstrdup(name);
+       return num_vgs - 1;
+}
+
+struct thin_pool {
+       char *name;
+       unsigned vgid;
+       struct snapshot_config config;
+       struct percentage_pair utilization;
+       enum lvm_scope threshold_scope;
+};
+static unsigned num_pools;
+static struct thin_pool *thin_pool; /* num_pools elements */
+
+static unsigned get_poolid(const char *name, const char *vg_name)
+{
+       for (unsigned n = 0; n < num_pools; n++) {
+               struct thin_pool *pool = thin_pool + n;
+               if (!strcmp(name, pool->name) && !strcmp(vg_name,
+                               vgname(pool->vgid)))
+                       return n;
+       }
+       return ~0U;
+}
+
+/* vg of pool must have been inserted already */
+static unsigned insert_pool(const char *name, const char *vgname)
+{
+       struct thin_pool *pool;
+       unsigned poolid = get_poolid(name, vgname);
+
+       if (poolid != ~0U)
+               return poolid;
+       INFO_LOG("pool #%u: %s/%s\n", num_pools, vgname, name);
+       num_pools++;
+       thin_pool = xrealloc(thin_pool, num_pools * sizeof(struct thin_pool));
+       pool = thin_pool + num_pools - 1;
+       memset(pool, 0, sizeof(struct thin_pool));
+       pool->name = xstrdup(name);
+       pool->vgid = get_vgid(vgname);
+       if (pool->vgid == ~0U)
+               die("invalid vg: %s", vgname);
+       return num_pools - 1;
+}
+
+struct snapshot {
+       unsigned seq;
+       uint64_t epoch;
+};
+
+struct origin {
+       char *name;
+       unsigned vgid;
+       unsigned poolid;
+       struct snapshot_config config;
+       enum lvm_scope iscope[NUM_INTERVAL_TYPES]; /* interval scopes */
+       uint64_t last_event[NUM_EVENT_TYPES]; /* epochs */
+       unsigned last_seq;
+       unsigned num_slots;
+       struct snapshot *snapshot;
+};
+static unsigned num_origins;
+static struct origin *origin;
+#define FOR_EACH_ORIGIN(_n) for (_n = 0; _n < num_origins; _n++)
+
+static unsigned check_seconds = 60;
+
+static unsigned interval_length(enum interval_type it, const struct origin *o)
+{
+       switch (o->iscope[it]) {
+               case LS_GLOBAL: return global_config.interval[it];
+               case LS_VG: return volume_group[o->vgid].config.interval[it];
+               case LS_POOL: return thin_pool[o->poolid].config.interval[it];
+               case LS_ORIGIN: return o->config.interval[it];
+               default: assert(0);
+       }
+}
+
+static unsigned get_oid(const char *name, const char *vg_name)
+{
+       unsigned n;
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               if (!strcmp(name, o->name) && !strcmp(vg_name, vgname(o->vgid)))
+                       return n;
+       }
+       return ~0U;
+}
+
+/* vg must have been inserted already */
+static unsigned insert_origin(const char *name, const char *vgname,
+               const char *poolname)
+{
+       struct origin *o;
+       unsigned oid = get_oid(name, vgname);
+
+       assert(oid == ~0U);
+       INFO_LOG("origin #%u: %s/%s, pool: %s\n", num_origins, vgname, name,
+               poolname);
+       num_origins++;
+       origin = xrealloc(origin, num_origins * sizeof(struct origin));
+       o = origin + num_origins - 1;
+       memset(o, 0, sizeof(struct origin));
+       o->name = xstrdup(name);
+       o->vgid = get_vgid(vgname);
+       assert(o->vgid != ~0U);
+       o->poolid = get_poolid(poolname, vgname);
+       assert(o->poolid != ~0U);
+       return num_origins - 1;
+}
+
+struct event {
+       enum event_type type;
+       uint64_t epoch;
+       struct origin *origin;
+};
+
+static int event_compare(const void *d1, const void *d2)
+{
+       const struct event *a = d1, *b = d2;
+
+       if (a->epoch < b->epoch)
+               return 1;
+       if (a->epoch > b->epoch)
+               return -1;
+       return 0;
+}
+
+static char *config_file;
+
+#define FOR_EACH_SLOT_REVERSE(_j, _o) for ( \
+       unsigned _j = _o->num_slots - 1; _j != -1U; _j--)
+
+static unsigned loglevel_arg_val = LL_WARNING;
+
+/* lopsub */
+static const struct lls_command *subcmd;
+static struct lls_parse_result *lpr, *sublpr;
+#define CMD_PTR(_cname) lls_cmd(LSG_MISMA_CMD_ ## _cname, misma_suite)
+#define OPT_RESULT(_cname, _oname) (lls_opt_result(\
+       LSG_MISMA_ ## _cname ## _OPT_ ## _oname, \
+       (CMD_PTR(_cname) == CMD_PTR(MISMA))? lpr : sublpr))
+#define OPT_GIVEN(_cname, _oname) (lls_opt_given(OPT_RESULT(_cname, _oname)))
+#define OPT_UINT32_VAL(_cname, _oname) (lls_uint32_val(0, \
+               OPT_RESULT(_cname, _oname)))
+#define OPT_STRING_VAL_N(_n, _cname, _oname) (lls_string_val(_n, \
+       OPT_RESULT(_cname, _oname)))
+#define OPT_STRING_VAL(_cname, _oname) (OPT_STRING_VAL_N(0, _cname, _oname))
+
+struct misma_user_data {bool (*handler)(void);};
+#define EXPORT_CMD_HANDLER(_cmd) const struct misma_user_data \
+       lsg_misma_com_ ## _cmd ## _user_data = { \
+               .handler = com_ ## _cmd \
+       };
+
+/* does not allocate memory */
+void misma_log(int ll, const char* fmt,...)
+{
+       va_list argp;
+       time_t t1;
+       struct tm *tm;
+       char str[255] = "";
+
+       if (ll < loglevel_arg_val)
+               return;
+       if (subcmd == CMD_PTR(RUN)) {
+               time(&t1);
+               tm = localtime(&t1);
+               strftime(str, sizeof(str), "%b %d %H:%M:%S", tm);
+               fprintf(stderr, "%s ", str);
+       }
+       va_start(argp, fmt);
+       vfprintf(stderr, fmt, argp);
+       va_end(argp);
+}
+static const char *exit_hook;
+
+__attribute__ ((noreturn))
+static void run_exit_hook_and_die(const char *str)
+{
+       char *arg;
+       char *argv[] = {"/bin/sh", "-c", NULL, NULL};
+       const char *tmp;
+
+       if (exit_hook) {
+               /*
+                * Prevent helpers from calling us again via die() or
+                * die_errno(), which would result in a crash due to an endless
+                * call stack.
+                */
+               tmp = exit_hook;
+               exit_hook = NULL;
+               arg = msg("%s '%s'", tmp, str);
+               argv[2] = arg;
+               xexec(argv, NULL);
+       }
+       exit(EXIT_FAILURE);
+}
+
+void die(const char *fmt, ...)
+{
+       char *str;
+       va_list argp;
+       int ret;
+
+       va_start(argp, fmt);
+       ret = vasprintf(&str, fmt, argp);
+       va_end(argp);
+       if (ret < 0) { /* give up */
+               EMERG_LOG("OOM\n");
+               exit(EXIT_FAILURE);
+       }
+       misma_log(LL_EMERG, "%s\n", str);
+       run_exit_hook_and_die(str);
+}
+
+void die_errno(const char *fmt, ...)
+{
+       char *str;
+       va_list argp;
+       int ret, save_errno = errno;
+
+       va_start(argp, fmt);
+       ret = vasprintf(&str, fmt, argp);
+       va_end(argp);
+       if (ret < 0) {
+               EMERG_LOG("OOM\n");
+               exit(EXIT_FAILURE);
+       }
+       misma_log(LL_EMERG, "%s: %s\n", str, strerror(save_errno));
+       run_exit_hook_and_die(str);
+}
+
+__attribute__ ((const))
+static uint32_t ffz(uint32_t v)
+{
+       uint32_t ret = 0;
+
+       assert(v != (uint32_t)-1);
+       if ((v & 0xffff) == 0xffff) {
+               ret += 16;
+               v >>= 16;
+       }
+       if ((v & 0xff) == 0xff) {
+               ret += 8;
+               v >>= 8;
+       }
+       if ((v & 0xf) == 0xf) {
+               ret += 4;
+               v >>= 4;
+       }
+       if ((v & 0x3) == 0x3) {
+               ret += 2;
+               v >>= 2;
+       }
+       if ((v & 0x1) == 0x1)
+               ret += 1;
+       return ret;
+}
+
+static bool slot_is_used(unsigned slot, const struct origin *o)
+{
+       return o->snapshot[slot].seq != 0;
+}
+
+static void mark_slot_unused(unsigned slot, struct origin *o)
+{
+       o->snapshot[slot].seq = 0;
+}
+
+/* Use highest numbered unused slot, or default if all slots are used. */
+static unsigned get_slot(unsigned seq, const struct origin *o)
+{
+       unsigned mod;
+       FOR_EACH_SLOT_REVERSE(sl, o)
+               if (!slot_is_used(sl, o))
+                       return sl;
+       /* all slots used */
+       mod = (1 << o->num_slots) - 1;
+       return ffz(seq % mod);
+}
+
+/*
+ * We specify --autobackup n to avoid filling up /etc/lvm/archive with tons of
+ * useless backup configurations.
+ */
+static bool remove_snapshot(unsigned sl, struct origin *o, bool dry_run)
+{
+       struct snapshot *snap = o->snapshot + sl;
+       bool success;
+       char *arg = msg("%s/misma-%s.%u", vgname(o->vgid), o->name, snap->seq);
+       char *argv[] = {
+               "lvremove",
+               "--yes",
+               "--quiet",
+               "--quiet",
+               "--autobackup",
+               "n",
+               arg,
+               NULL
+       };
+       if (dry_run) {
+               printf("dry-run: would remove snapshot %s\n", arg);
+               free(arg);
+               return true;
+       }
+       NOTICE_LOG("removing snapshot %s\n", arg);
+       success = xexec(argv, NULL);
+       free(arg);
+       if (success)
+               mark_slot_unused(sl, o);
+       return success;
+}
+
+static int slot_compare(const void *a, const void *b, void *data)
+{
+       const struct snapshot *s1 = a, *s2 = b;
+       struct origin *o = data;
+
+       if (!slot_is_used(s1 - o->snapshot, o))
+               return -1;
+       if (!slot_is_used(s2 - o->snapshot, o))
+               return 1;
+       if (s1->seq < s2->seq)
+               return 1;
+       if (s1->seq > s2->seq)
+               return -1;
+       return 0;
+}
+
+static void sort_slots(struct origin *o)
+{
+       qsort_r(o->snapshot, o->num_slots, sizeof(struct snapshot),
+               slot_compare, o);
+}
+
+/*
+ * sleazy (adj.): 1640s, "downy, fuzzy," later "flimsy, unsubstantial" (1660s).
+ *
+ * A sleazy snapshot is one whose distance (with respect to creation time) to
+ * its sibling snapshots is minimal.
+ */
+static bool remove_sleazy_snapshot(struct origin *o, bool dry_run)
+{
+       unsigned sl, victim = 0;
+       uint64_t score = 0;
+       bool have_victim = false;
+       struct snapshot *prev = NULL, *next = NULL;
+
+       sort_slots(o);
+       for (sl = 0; sl < o->num_slots; sl++)
+               if (slot_is_used(sl, o))
+                       break;
+       for (; sl < o->num_slots; prev = o->snapshot + sl, sl++) {
+               uint64_t dist;
+               struct snapshot *s = o->snapshot + sl;
+
+               assert(slot_is_used(sl, o));
+               next = sl == o->num_slots - 1? NULL : s + 1;
+               if (!prev && !next)
+                       dist = 1;
+               else if (!prev)
+                       dist = 10 * (s->epoch - next->epoch);
+               else if (!next)
+                       dist = 10 * (prev->epoch - s->epoch);
+               else
+                       dist = prev->epoch - next->epoch;
+               DEBUG_LOG("seq %u, slot %u, epoch %" PRIu64 ", score %" PRIu64"\n",
+                       s->seq, sl, s->epoch, dist);
+               if (!have_victim || dist < score) {
+                       have_victim = true;
+                       victim = sl;
+                       score = dist;
+               }
+       }
+       if (!have_victim) {
+               INFO_LOG("no snapshots\n");
+               return false;
+       }
+       NOTICE_LOG("victim: seq %u, slot %u, score %" PRIu64 "\n",
+               o->snapshot[victim].seq, victim, score);
+       if (!remove_snapshot(victim, o, dry_run))
+               return false;
+       sort_slots(o);
+       return true;
+}
+
+static void set_interval(enum interval_type it, const struct time_arg *ta)
+{
+       enum lvm_scope scope = ta->lvmspec.scope;
+       unsigned vgid, poolid, oid, n;
+
+       if (scope == LS_GLOBAL) {
+               NOTICE_LOG("default interval #%u: %u seconds\n", it,
+                       ta->seconds);
+               global_config.interval[it] = ta->seconds;
+               return;
+       }
+       vgid = get_vgid(ta->lvmspec.vg);
+       if (vgid == ~0U)
+               die("invalid vg in lvmspec: %s", ta->lvmspec.vg);
+       switch (scope) {
+       case LS_VG:
+               volume_group[vgid].config.interval[it] = ta->seconds;
+               break;
+       case LS_POOL:
+               poolid = get_poolid(ta->lvmspec.pool, vgname(vgid));
+               if (poolid == ~0U)
+                       die("invalid pool in lvmspec: %s", ta->lvmspec.pool);
+               thin_pool[poolid].config.interval[it] = ta->seconds;
+               break;
+       case LS_ORIGIN:
+               oid = get_oid(ta->lvmspec.tlv, vgname(vgid));
+               if (oid == ~0U)
+                       die("invalid tlv in lvmspec: %s", ta->lvmspec.tlv);
+               origin[oid].config.interval[it] = ta->seconds;
+               break;
+       default:
+               assert(0);
+       }
+       /*
+        * Narrow the scope of all matching origins for which it is currently
+        * set to a wider scope.
+        */
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               if (o->iscope[it] >= scope)
+                       continue; /* already set to more narrow scope */
+               switch (scope) {
+               case LS_ORIGIN:
+                       if (n != oid)
+                               continue;
+                       break;
+               case LS_POOL:
+                       if (poolid != o->poolid || vgid != o->vgid)
+                               continue;
+                       break;
+               case LS_VG:
+                       if (vgid != o->vgid)
+                               continue;
+                       break;
+               default:
+                       assert(0);
+               }
+               NOTICE_LOG("interval #%u for %s/%s: %u seconds\n", it,
+                       vgname(o->vgid), o->name, ta->seconds);
+               o->iscope[it] = scope;
+       }
+}
+
+struct lv_info {
+       char *vg, *lv, *pool, *origin;
+       uint64_t time;
+};
+
+static void free_lv_info(struct lv_info *lv)
+{
+       free(lv->vg);
+       free(lv->lv);
+       free(lv->pool);
+       free(lv->origin);
+}
+
+static void parse_lvs_line(const char *line, struct lv_info *result)
+{
+       char *tmp = xstrdup(line), *p = tmp + 2, *comma;
+
+       comma = strchr(p, ',');
+       assert(comma && comma != p);
+       *comma = '\0';
+       result->vg = xstrdup(p);
+       p = comma + 1;
+       comma = strchr(p, ',');
+       assert(comma);
+       *comma = '\0';
+       result->lv = xstrdup(p);
+       p = comma + 1;
+       comma = strchr(p, ',');
+       assert(comma);
+       *comma = '\0';
+       result->pool = xstrdup(p);
+       p = comma + 1;
+       comma = strchr(p, ',');
+       assert(comma);
+       *comma = '\0';
+       result->origin = xstrdup(p);
+       p = comma + 1;
+       assert(sscanf(p, "%" PRIu64, &result->time) == 1);
+       free(tmp);
+}
+
+static void init_origins(void)
+{
+       unsigned n, oid;
+       char *argv[] = {
+               "lvs",
+               "--select", NULL,
+               "--noheading",
+               "--separator", ",",
+               "--readonly",
+               "--unquoted",
+               "-o", "vgname,lvname,pool_lv,origin,lvtime",
+               "-O", "-lv_time",
+               "--config", "report/time_format=%s",
+               NULL
+       };
+       char *buf, *tmp, *line, *select_string = NULL;
+       struct line_iter liter;
+       struct lv_info lv;
+
+       if (OPT_GIVEN(MISMA, ORIGIN) == 0)
+               die("--origin not given");
+
+       /* create argument to --select */
+       for (n = 0; n < OPT_GIVEN(MISMA, ORIGIN); n++) {
+               char *tmp2, *slash;
+               const char *arg = OPT_STRING_VAL_N(n, MISMA, ORIGIN);
+
+               tmp = xstrdup(arg),
+               slash = strchr(tmp, '/');
+               if (!slash || slash == tmp || !slash[1])
+                       die("--origin arg must be of the form vg/tlv");
+               *slash = '\0';
+               tmp2 = msg("%s%s (vg_name=%s && (lv_name=%s ||"
+                       "(origin=%s && lv_name =~ misma-%s.[0-9]+)))",
+                       select_string? select_string : "",
+                       select_string? " || " : "" ,
+                       tmp, slash + 1, slash + 1, slash + 1
+               );
+               free(tmp);
+               free(select_string);
+               select_string = tmp2;
+       }
+       argv[2] = select_string;
+       if (!xexec(argv, &buf))
+               die("lvs failure");
+       tmp = xstrdup(buf);
+       line_iter_init(&liter, tmp);
+       /* insert vgs and pools */
+       while ((line = line_iter_get(&liter))) {
+               parse_lvs_line(line, &lv);
+               DEBUG_LOG("vg: %s, lv: %s, pool: %s, origin: %s, "
+                       "time: %" PRIu64"\n",
+                       lv.vg, lv.lv, lv.pool, lv.origin, lv.time);
+               if (lv.origin[0] == '\0') { /* origin */
+                       insert_vg(lv.vg);
+                       if (lv.pool[0] == '\0')
+                               die("%s/%s is no thin LV", lv.vg, lv.lv);
+                       insert_pool(lv.pool, lv.vg);
+               }
+               free_lv_info(&lv);
+       }
+       free(tmp);
+       tmp = xstrdup(buf);
+       line_iter_init(&liter, tmp);
+       /* insert origins */
+       while ((line = line_iter_get(&liter))) {
+               parse_lvs_line(line, &lv);
+               if (lv.origin[0] == '\0')
+                       insert_origin(lv.lv, lv.vg, lv.pool);
+               free_lv_info(&lv);
+       }
+       free(tmp);
+       /* check that all given origins exist */
+       for (n = 0; n < OPT_GIVEN(MISMA, ORIGIN); n++) {
+               const char *arg = OPT_STRING_VAL_N(n, MISMA, ORIGIN);
+               char *slash;
+
+               tmp = xstrdup(arg),
+               slash = strchr(tmp, '/');
+               *slash = '\0';
+               oid = get_oid(slash + 1, tmp);
+               free(tmp);
+               if (oid == ~0U)
+                       die("origin %s does not exist", arg);
+       }
+       tmp = xstrdup(buf);
+       line_iter_init(&liter, tmp);
+       /* allocate and init snapshot arrays */
+       while ((line = line_iter_get(&liter))) {
+               char *fmt;
+               struct snapshot *s;
+               struct origin *o;
+
+               parse_lvs_line(line, &lv);
+               if (lv.origin[0] == '\0') { /* no snapshot */
+                       free_lv_info(&lv);
+                       continue;
+               }
+               oid = get_oid(lv.origin, lv.vg);
+               assert(oid != ~0U);
+               o = origin + oid;
+               o->num_slots++;
+               o->snapshot = xrealloc(o->snapshot, o->num_slots
+                       * sizeof(struct snapshot));
+               s = o->snapshot + o->num_slots - 1;
+               fmt = msg("misma-%s.%%u", lv.origin);
+               if (sscanf(lv.lv, fmt, &s->seq) != 1)
+                       die("parse error: %s", lv.lv);
+               free(fmt);
+               s->epoch = lv.time;
+               if (s->seq > o->last_seq)
+                       o->last_seq = s->seq;
+               if (s->epoch > o->last_event[ET_CREATE])
+                       o->last_event[ET_CREATE] = s->epoch;
+               free_lv_info(&lv);
+       }
+       free(tmp);
+}
+
+static void die_lopsub(int lopsub_ret, char **errctx)
+{
+       const char *m = lls_strerror(-lopsub_ret);
+       if (*errctx)
+               ERROR_LOG("%s: %s\n", *errctx, m);
+       else
+               ERROR_LOG("%s\n", m);
+       free(*errctx);
+       *errctx = NULL;
+       die("lopsub error");
+}
+
+static void parse_options(int argc, char **argv, const struct lls_command *cmd,
+               struct lls_parse_result **lprp)
+{
+       int ret, fd = -1;
+       struct stat statbuf;
+       void *map;
+       size_t sz;
+       int cf_argc;
+       char **cf_argv, *errctx = NULL;
+       const char *subcmd_name;
+       struct lls_parse_result *merged_lpr, *cf_lpr;
+
+       ret = lls_parse(argc, argv, cmd, lprp, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       if (!config_file) {
+               if (OPT_GIVEN(MISMA, CONFIG_FILE))
+                       config_file = xstrdup(OPT_STRING_VAL(MISMA,
+                               CONFIG_FILE));
+               else {
+                       const char *home = getenv("HOME");
+                       if (!home || !*home)
+                               die("fatal: HOME is unset or empty");
+                       config_file = msg("%s/.mismarc", home);
+               }
+       }
+       ret = open(config_file, O_RDONLY);
+       if (ret < 0) {
+               if (errno != ENOENT || OPT_GIVEN(MISMA, CONFIG_FILE))
+                       die_errno("can not open config file %s", config_file);
+               /* no config file -- nothing to do */
+               ret = 0;
+               goto success;
+       }
+       fd = ret;
+       ret = fstat(fd, &statbuf);
+       if (ret < 0)
+               die_errno("failed to stat config file %s", config_file);
+       sz = statbuf.st_size;
+       if (sz == 0) { /* config file is empty -- nothing to do */
+               ret = 0;
+               goto success;
+       }
+       map = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
+       if (map == MAP_FAILED)
+               die_errno("failed to mmap config file %s", config_file);
+       subcmd_name = (cmd == CMD_PTR(MISMA))? NULL : lls_command_name(cmd);
+       ret = lls_convert_config(map, sz, subcmd_name, &cf_argv,
+               &errctx);
+       munmap(map, sz);
+       if (ret < 0) {
+               ERROR_LOG("failed to convert config file %s\n", config_file);
+               die_lopsub(ret, &errctx);
+       }
+       cf_argc = ret;
+       ret = lls_parse(cf_argc, cf_argv, cmd, &cf_lpr, &errctx);
+       lls_free_argv(cf_argv);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       /* command line options override config file options */
+       ret = lls_merge(*lprp, cf_lpr, cmd, &merged_lpr, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       lls_free_parse_result(cf_lpr, cmd);
+       lls_free_parse_result(*lprp, cmd);
+       *lprp = merged_lpr;
+success:
+       if (fd >= 0)
+               close(fd);
+}
+
+static void get_utilization(void)
+{
+       char *select_string = NULL, *buf, *line;
+       unsigned n;
+       char *argv[] = {
+               "lvs",
+               "--select", NULL,
+               "--noheading", "--unquoted",
+               "-o", "vgname,lvname,data_percent,metadata_percent",
+               NULL
+       };
+       struct line_iter liter;
+
+       for (n = 0; n < num_pools; n++) {
+               const struct thin_pool *pool = thin_pool + n;
+               char *tmp = msg("%s%s (vg_name = %s && lv_name = %s)",
+                       (n == 0)? "" : select_string, (n == 0)? "" : "||",
+                       vgname(pool->vgid), pool->name);
+               free(select_string);
+               select_string = tmp;
+       }
+       argv[2] = select_string;
+       if (!xexec(argv, &buf))
+               die("lvs failure");
+       free(select_string);
+       line_iter_init(&liter, buf);
+       while ((line = line_iter_get(&liter))) {
+               struct percentage_pair *u;
+               struct thin_pool *pool;
+               unsigned poolid;
+               float data, meta;
+               size_t len = strlen(line);
+               char *vg = xmalloc(len), *lv = xmalloc(len);
+               if (sscanf(line, "%s %s %f %f", vg, lv, &data, &meta) != 4)
+                       die("cannot parse lvs line: %s", line);
+               poolid = get_poolid(lv, vg);
+               free(vg);
+               free(lv);
+               assert(poolid != ~0U);
+               pool = thin_pool + poolid;
+               u = &pool->utilization;
+               u->data = data + 0.5;
+               u->meta = meta + 0.5;
+               INFO_LOG("pool %s/%s utilization: %u/%u\n",
+                       vgname(pool->vgid), pool->name, u->data, u->meta);
+       }
+       free(buf);
+}
+
+static bool pool_is_full(const struct thin_pool *pool)
+{
+       bool ret;
+       struct percentage_pair t, u = pool->utilization;
+
+       if (pool->threshold_scope == LS_GLOBAL)
+               t = global_config.thresholds;
+       else if (pool->threshold_scope == LS_VG)
+               t = volume_group[pool->vgid].config.thresholds;
+       else
+               t = pool->config.thresholds;
+       ret = u.data > t.data || u.meta > t.meta;
+       if (ret) {
+               NOTICE_LOG("pool %s/%s utilization: %u/%u, threshold: %u/%u\n",
+                       vgname(pool->vgid), pool->name,
+                       u.data, u.meta, t.data, t.meta);
+               WARNING_LOG("pool %s/%s exceeds utilization thresholds\n",
+                       vgname(pool->vgid), pool->name);
+       }
+       return ret;
+}
+
+static void check_utilization(void)
+{
+       bool found_full_pool, removed_snapshot;
+
+again:
+       found_full_pool = false;
+       removed_snapshot = false;
+       get_utilization();
+       for (unsigned n = 0; n < num_pools; n++) {
+               unsigned m;
+               const struct thin_pool *pool = thin_pool + n;
+               if (!pool_is_full(pool))
+                       continue;
+               found_full_pool = true;
+               FOR_EACH_ORIGIN(m) {
+                       struct origin *o = origin + m;
+                       if (o->poolid != n)
+                               continue;
+                       if (remove_sleazy_snapshot(o, false))
+                               removed_snapshot = true;
+               }
+       }
+       if (!found_full_pool)
+               return;
+       if (removed_snapshot)
+               goto again;
+       INFO_LOG("full pool found, but nothing to remove\n");
+}
+
+static bool create_snapshot(struct origin *o, bool dry_run)
+{
+       unsigned seq = o->last_seq + 1;
+       char *name = msg("misma-%s.%u", o->name, seq);
+       char *vg_origin = msg("%s/%s", vgname(o->vgid), o->name);
+       char *argv[] = {
+               "lvcreate",
+               "--type",
+               "thin",
+               "--quiet",
+               "--quiet",
+               "-s",
+               "--autobackup",
+               "n",
+               "-n",
+               name,
+               vg_origin,
+               NULL
+       };
+       if (dry_run) {
+               printf("dry-run: would create snapshot #%u of origin %s\n",
+                       seq, vg_origin);
+               free(name);
+               free(vg_origin);
+               return true;
+       }
+       NOTICE_LOG("creating snapshot %s/%s\n", vgname(o->vgid), name);
+       if (!xexec(argv, NULL))
+               die("could not create snapshot");
+       free(name);
+       free(vg_origin);
+       return true;
+}
+
+static void signal_handler(int signo)
+{
+       die("caught signal %d, terminating", signo);
+}
+
+#ifndef FITRIM
+struct fstrim_range {uint64_t start; uint64_t len; uint64_t minlen;};
+#define FITRIM _IOWR('X', 121, struct fstrim_range)
+#endif
+static bool trim_filesystem(struct origin *o, bool dry_run)
+{
+       struct stat sb;
+       char *dev;
+       unsigned majo, mino;
+       int fd;
+       char *buf;
+       struct line_iter liter;
+       char *line, *mp = NULL;
+       struct fstrim_range range = {.len = ULLONG_MAX};
+
+       dev = msg("/dev/%s/%s", vgname(o->vgid), o->name);
+       if (stat(dev, &sb) < 0) {
+               WARNING_LOG("stat(%s): %m\n", dev);
+               free(dev);
+               return false;
+       }
+       if ((sb.st_mode & S_IFMT) != S_IFBLK) {
+               WARNING_LOG("not a block device: %s\n", dev);
+               free(dev);
+               return false;
+       }
+       free(dev);
+       majo = major(sb.st_rdev);
+       mino = minor(sb.st_rdev);
+       fd = open("/proc/self/mountinfo", O_RDONLY);
+       if (fd < 0) {
+               WARNING_LOG("open(/proc/self/mountinfo): %m\n");
+               return false;
+       }
+       if (!fd2buf(fd, &buf)) {
+               WARNING_LOG("fd2buf error\n");
+               close(fd);
+               return false;
+       }
+       close(fd);
+       line_iter_init(&liter, buf);
+       /* 13 15 0:5 / /proc */
+       while ((line = line_iter_get(&liter))) {
+               unsigned id, parent, mmajo, mmino;
+               size_t len = strlen(line);
+               char *mountroot = xmalloc(len), *target = xmalloc(len);
+
+               if (sscanf(line, "%u %u %u:%u %s %s", &id, &parent, &mmajo,
+                               &mmino, mountroot, target) != 6) {
+                       WARNING_LOG("parse mountinfo line: %s\n", line);
+                       free(mountroot);
+                       free(target);
+                       return false;
+               }
+               free(mountroot);
+               if (mmajo == majo && mmino == mino) {
+                       mp = target;
+                       break;
+               }
+               free(target);
+       }
+       free(buf);
+       if (!mp) {
+               WARNING_LOG("unable to find mountpoint of origin\n");
+               return false;
+       }
+       if (dry_run) {
+               printf("%s\n", mp);
+               free(mp);
+               return true;
+       }
+       fd = open(mp, O_RDONLY);
+       if (fd < 0) {
+               WARNING_LOG("open(%s): %m\n", mp);
+               free(mp);
+               return false;
+       }
+       if (ioctl(fd, FITRIM, &range)) {
+               WARNING_LOG("ioctl(FITRIM, %s): %m\n", mp);
+               close(fd);
+               free(mp);
+               return false;
+       }
+       close(fd);
+       NOTICE_LOG("trimmed %s\n", mp);
+       free(mp);
+       return true;
+}
+
+static void set_threshold(const struct threshold_arg *ta)
+{
+       enum lvm_scope scope = ta->lvmspec.scope;
+       unsigned poolid = 0, vgid;
+
+       if (scope == LS_GLOBAL) {
+               global_config.thresholds = ta->threshold;
+               return;
+       }
+       vgid = get_vgid(ta->lvmspec.vg);
+       if (vgid == ~0U)
+               die("invalid vg in lvmspec: %s", ta->lvmspec.vg);
+       if (scope == LS_VG) {
+               volume_group[vgid].config.thresholds = ta->threshold;
+       } else {
+               assert(scope == LS_POOL);
+               poolid = get_poolid(ta->lvmspec.pool, vgname(vgid));
+               if (poolid == ~0U)
+                       die("invalid pool in lvmspec: %s", ta->lvmspec.pool);
+               thin_pool[poolid].config.thresholds = ta->threshold;
+       }
+       /*
+        * Narrow the scope of all matching pools for which it is currently
+        * set to a wider scope.
+        */
+       for (unsigned n = 0; n < num_pools; n++) {
+               struct thin_pool *p = thin_pool + n;
+               if (p->threshold_scope >= scope)
+                       continue; /* already set to more narrow scope */
+               if (vgid != p->vgid)
+                       continue;
+               if (scope == LS_POOL && poolid != n)
+                       continue;
+               NOTICE_LOG("threshold for pool %s/%s: %u/%u\n",
+                       vgname(vgid), p->name, ta->threshold.data,
+                       ta->threshold.meta);
+               p->threshold_scope = scope;
+       }
+}
+
+static void log_event(const void *d)
+{
+       const struct event *e = d;
+
+       if (e->origin)
+               DEBUG_LOG("(%s,%u): %" PRIu64 "\n", e->origin->name,
+                       e->type, e->epoch);
+       else
+               DEBUG_LOG("(utilization): %" PRIu64 "\n", e->epoch);
+}
+
+static unsigned check_run_options(void)
+{
+       struct time_arg ta;
+       const char *arg;
+       unsigned n, num_events = 0;
+
+       for (n = 0; n < OPT_GIVEN(RUN, THRESHOLD); n++) {
+               struct threshold_arg tha;
+               arg = OPT_STRING_VAL_N(n, RUN, THRESHOLD);
+               parse_threshold_arg(arg,"--threshold", &tha);
+               set_threshold(&tha);
+               free_lvmspec(&tha.lvmspec);
+       }
+       if (OPT_GIVEN(RUN, CHECK_INTERVAL)) {
+               arg = OPT_STRING_VAL(RUN, CHECK_INTERVAL);
+               check_seconds = parse_timespec(arg, "check-interval");
+               check_range(check_seconds, 10, 86400, "check-interval");
+       }
+       for (n = 0; n < OPT_GIVEN(RUN, TRIM_INTERVAL); n++) {
+               arg = OPT_STRING_VAL_N(n, RUN, TRIM_INTERVAL);
+               parse_time_arg(arg, "--trim-interval", &ta);
+               if (ta.seconds > 0)
+                       check_range(ta.seconds, 60, ~0U, "trim-interval");
+               set_interval(IT_TRIM, &ta);
+               free_lvmspec(&ta.lvmspec);
+       }
+       for (n = 0; n < OPT_GIVEN(RUN, CREATE_INTERVAL); n++) {
+               arg = OPT_STRING_VAL_N(n, RUN, CREATE_INTERVAL);
+               parse_time_arg(arg, "--create-interval", &ta);
+               check_range(ta.seconds, 60, 86400 * 365, "create-interval");
+               set_interval(IT_CREATE, &ta);
+               free_lvmspec(&ta.lvmspec);
+       }
+       for (n = 0; n < OPT_GIVEN(RUN, MAX_AGE); n++) {
+               arg = OPT_STRING_VAL_N(n, RUN, MAX_AGE);
+               parse_time_arg(arg, "--max-age", &ta);
+               check_range(ta.seconds, 86400, 86400 * 20 * 365, "max-age");
+               set_interval(IT_MAX_AGE, &ta);
+               free_lvmspec(&ta.lvmspec);
+       }
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               uint32_t ma, cr, max_slots; /* max age, create interval */
+
+               INFO_LOG("found %u snapshots of origin %s/%s\n",
+                       o->num_slots, vgname(o->vgid), o->name);
+               /* set number of slots */
+               ma = interval_length(IT_MAX_AGE, o);
+               cr = interval_length(IT_CREATE, o);
+               if (ma / 3 < cr)
+                       die("%s/%s: max-age/create ratio too small",
+                               vgname(o->vgid), o->name);
+               max_slots = 1 + ceil(log2((double)ma / cr + 1));
+               assert(max_slots > 2);
+               assert(max_slots < 30);
+               if (o->num_slots > max_slots)
+                       die("%s/%s: too many snapshots", vgname(o->vgid),
+                               o->name);
+               if (o->num_slots < max_slots) {
+                       unsigned diff = max_slots - o->num_slots;
+                       o->snapshot = xrealloc(o->snapshot, max_slots
+                               * sizeof(struct snapshot));
+                       memset(o->snapshot + o->num_slots, 0,
+                               diff * sizeof(struct snapshot));
+                       o->num_slots = max_slots;
+               }
+               INFO_LOG("%s/%s: using %u slots\n", vgname(o->vgid), o->name,
+                       o->num_slots);
+               if (interval_length(IT_TRIM, o) > 0)
+                       num_events++;
+       }
+       return num_events + 1 + num_origins;
+}
+
+static void dispatch_create_event(struct origin *o)
+{
+       unsigned seq, sl;
+       const struct thin_pool *pool;
+       uint64_t now;
+
+       pool = thin_pool + o->poolid;
+       if (pool_is_full(pool)) {
+               WARNING_LOG("%s/%s: creation suspended\n", vgname(o->vgid),
+                       o->name);
+               return;
+       }
+       seq = o->last_seq + 1, sl = get_slot(seq, o);
+       if (slot_is_used(sl, o) && !remove_snapshot(sl, o, false))
+               die("%s/%s: unable to free slot\n", vgname(o->vgid), o->name);
+       now = time(NULL);
+       create_snapshot(o, false);
+       o->snapshot[sl].seq = seq;
+       o->snapshot[sl].epoch = now;
+       o->last_seq = seq;
+       o->last_event[ET_CREATE] = now;
+}
+
+/* We leak the fd but that's OK as long as we're only called once. */
+static int silence_lvm(void)
+{
+       char *val;
+       int fd = open("/dev/null", O_RDWR);
+
+       if (fd < 0)
+               die_errno("open(/dev/null)");
+       val = msg("%d", fd);
+       setenv("LVM_ERR_FD", val, true /* overwrite */);
+       free(val);
+       return fd;
+}
+
+__attribute__ ((noreturn))
+static bool com_run(void)
+{
+       int fd = -1;
+       unsigned n, num_events;
+       struct event **ep;
+       struct event **event; /* At most 2 * num_origins + 1 */
+       struct heap *event_heap;
+       uint64_t now = time(NULL);
+
+       num_events = check_run_options();
+       event = xmalloc(num_events * sizeof(struct event *));
+       ep = event;
+       (*ep) = xmalloc(sizeof(struct event));
+       (*ep)->type = ET_CHECK;
+       (*ep)->origin = NULL;
+       (*ep)->epoch = 0;
+       log_event(*ep);
+       ep++;
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               (*ep) = xmalloc(sizeof(struct event));
+               (*ep)->type = ET_CREATE;
+               (*ep)->origin = o;
+               (*ep)->epoch = o->last_event[ET_CREATE]
+                       + interval_length(IT_CREATE, o);
+               log_event(*ep);
+               ep++;
+               if (interval_length(IT_TRIM, o) == 0)
+                       continue;
+               (*ep) = xmalloc(sizeof(struct event));
+               (*ep)->type = ET_TRIM;
+               (*ep)->origin = o;
+               (*ep)->epoch = now + interval_length(IT_TRIM, o);
+               log_event(*ep);
+               ep++;
+       }
+       event_heap = heap_init(&event, num_events, event_compare);
+       if (get_misma_pid(config_file) > 0)
+               die("already running");
+       if (OPT_GIVEN(RUN, DAEMON))
+               fd = daemonize(OPT_STRING_VAL(RUN, LOGFILE));
+       if (!misma_lock(config_file))
+               die("already running");
+       if (signal(SIGINT, &signal_handler) == SIG_ERR)
+               die_errno("signal handler for SIGINT");
+       if (signal(SIGTERM, &signal_handler) == SIG_ERR)
+               die_errno("signal handler for SIGTERM");
+       if (signal(SIGHUP, &signal_handler) == SIG_ERR)
+               die_errno("signal handler for SIGHUP");
+       if (fd >= 0) {
+               if (write(fd, "\0", 1) < 0)
+                       die_errno("write");
+               close(fd);
+       }
+       exit_hook = OPT_STRING_VAL(RUN, EXIT_HOOK);
+       if (OPT_GIVEN(RUN, SUPPRESS_LVM_WARNINGS))
+               silence_lvm();
+       for (;;) {
+               struct event *e = heap_min(event_heap);
+               struct origin *o;
+
+               now = time(NULL);
+               if (e->epoch > now) {
+                       INFO_LOG("sleeping %" PRIu64 " seconds\n",
+                               e->epoch - now);
+                       sleep(e->epoch - now);
+                       continue;
+               }
+               e = heap_extract_min(event_heap);
+               o = e->origin;
+               switch (e->type) {
+               case ET_CHECK:
+                       INFO_LOG("next event: check\n");
+                       check_utilization();
+                       now = time(NULL);
+                       e->epoch = now + check_seconds;
+                       break;
+               case ET_TRIM:
+                       INFO_LOG("next event: trim %s/%s\n",
+                               vgname(o->vgid), o->name);
+                       trim_filesystem(o, false /* dry-run */);
+                       e->origin->last_event[ET_TRIM] = now;
+                       e->epoch = now + interval_length(IT_TRIM, o);
+                       break;
+               case ET_CREATE:
+                       INFO_LOG("next event: create %s/%s\n", vgname(o->vgid),
+                               o->name);
+                       dispatch_create_event(o);
+                       e->epoch = now + interval_length(IT_CREATE, o);
+                       break;
+               default: assert(0);
+               }
+               heap_insert(e, event_heap);
+               heap_dump(event_heap, log_event);
+               sleep(3);
+       }
+}
+EXPORT_CMD_HANDLER(run);
+
+static void seconds_to_human(int64_t diff, char *buf)
+{
+       if (diff > 2 * 86400 * 365)
+               sprintf(buf, "%3" PRId64 " years  ", diff / (86400 * 365));
+       else if (diff > 2 * 86400 * 60)
+               sprintf(buf, "%3" PRId64 " months ", diff / (86400 * 60));
+       else if (diff > 2 * 86400 * 7)
+               sprintf(buf, "%3" PRId64 " weeks  ", diff / (86400 * 7));
+       else if (diff > 2 * 86400)
+               sprintf(buf, "%3" PRId64 " days   ", diff / 86400);
+       else if (diff > 2 * 3600)
+               sprintf(buf, "%3" PRId64 " hours  ", diff / 3600);
+       else if (diff > 2 * 60)
+               sprintf(buf, "%3" PRId64 " minutes", diff / 60);
+       else
+               sprintf(buf, "%3" PRId64 " second%s", diff, diff == 1? "" : "s");
+}
+
+static bool origin_matches_lvmspec(const struct origin *o,
+               const struct lvmspec *spec)
+{
+       if (spec->scope == LS_GLOBAL)
+               return true;
+       if (strcmp(spec->vg, vgname(o->vgid)))
+               return false;
+       if (spec->scope == LS_VG)
+               return true;
+       if (spec->scope == LS_ORIGIN)
+               return !strcmp(spec->tlv, o->name);
+       return !strcmp(spec->pool, thin_pool[o->poolid].name);
+}
+
+static bool for_each_matching_origin(bool (*func)(struct origin *, bool),
+               bool dry_run)
+{
+       unsigned k, n, num_args = lls_num_inputs(sublpr);
+       struct lvmspec *spec = NULL; /* STFU gcc-12.3.0 */
+       bool match = false;
+
+       if (num_args > 0)
+               spec = xmalloc(num_args * sizeof(*spec));
+       for (k = 0; k < num_args; k++)
+               parse_lvmspec(lls_input(k, sublpr), "create/rm", spec + k);
+       FOR_EACH_ORIGIN(n) {
+               struct origin *o = origin + n;
+               for (k = 0; k < num_args; k++)
+                       if (origin_matches_lvmspec(o, spec + k))
+                               break;
+               if (num_args == 0 || k < num_args) {
+                       func(o, dry_run);
+                       match = true;
+               }
+       }
+       free(spec);
+       if (!match && num_args > 0)
+               printf("no matches\n");
+       return match;
+}
+
+static bool list_snapshots(struct origin *o, bool l_given)
+{
+       if (!l_given)
+               printf("%s/%s:\n", vgname(o->vgid), o->name);
+       FOR_EACH_SLOT_REVERSE(sl, o) {
+               char buf[32];
+               struct tm *tm;
+               struct snapshot *s = o->snapshot + sl;
+               time_t t;
+
+               assert(slot_is_used(sl, o));
+               if (l_given) {
+                       printf("/dev/%s/misma-%s.%u\t", vgname(o->vgid),
+                               o->name, s->seq);
+                       t = s->epoch;
+                       tm = localtime(&t);
+                       strftime(buf, sizeof(buf), "%F %R", tm);
+                       printf("%s", buf);
+               } else
+                       printf("%8u ", s->seq);
+               t = time(NULL);
+               seconds_to_human(t - s->epoch, buf);
+               printf("  %s\n", buf);
+       }
+       return true;
+}
+
+static bool com_ls(void)
+{
+       return for_each_matching_origin(list_snapshots,
+               OPT_GIVEN(LS, LONG));
+}
+EXPORT_CMD_HANDLER(ls);
+
+static bool com_create(void)
+{
+       if (!misma_lock(config_file))
+               die("already running");
+       return for_each_matching_origin(create_snapshot,
+               OPT_GIVEN(CREATE, DRY_RUN));
+}
+EXPORT_CMD_HANDLER(create);
+
+static bool com_rm(void)
+{
+       if (!misma_lock(config_file))
+               die("already running");
+       return for_each_matching_origin(remove_sleazy_snapshot,
+               OPT_GIVEN(RM, DRY_RUN));
+}
+EXPORT_CMD_HANDLER(rm);
+
+static bool com_kill(void)
+{
+       pid_t pid;
+       unsigned sig = OPT_UINT32_VAL(KILL, SIGNAL);
+       unsigned ms = 32;
+
+       pid = get_misma_pid(config_file);
+       if (pid == 0)
+               die("no misma run process to send signal to");
+       NOTICE_LOG("sending signal %u to pid %d\n", sig, pid);
+       if (kill(pid, sig) < 0)
+               die_errno("kill");
+       if (!OPT_GIVEN(KILL, WAIT))
+               return true;
+       while (ms < 5000) {
+               struct timespec ts = {
+                       .tv_sec = ms / 1000,
+                       .tv_nsec = (ms % 1000) * 1000 * 1000
+               };
+               if (nanosleep(&ts, NULL) < 0)
+                       return false;
+               if (kill(pid, 0) < 0)
+                       return errno == ESRCH;
+               ms *= 2;
+       }
+       return false;
+}
+EXPORT_CMD_HANDLER(kill);
+
+#define LSG_MISMA_CMD(_name) #_name
+static const char * const subcommand_names[] = {LSG_MISMA_SUBCOMMANDS NULL};
+#undef LSG_MISMA_CMD
+
+static void show_subcommand_summary(bool verbose)
+{
+       int i;
+
+       printf("Available subcommands:\n");
+       if (verbose) {
+               const struct lls_command *cmd;
+               for (i = 1; (cmd = lls_cmd(i, misma_suite)); i++) {
+                       const char *purpose = lls_purpose(cmd);
+                       const char *name = lls_command_name(cmd);
+                       printf("%-12s%s\n", name, purpose);
+               }
+       } else {
+               unsigned n = 8;
+               printf("\t");
+               for (i = 0; i < LSG_NUM_MISMA_SUBCOMMANDS; i++) {
+                       if (i > 0)
+                               n += printf(", ");
+                       if (n > 70) {
+                               printf("\n\t");
+                               n = 8;
+                       }
+                       n += printf("%s", subcommand_names[i]);
+               }
+               printf("\n");
+       }
+}
+
+static bool com_trim(void)
+{
+       if (!misma_lock(config_file))
+               die("already running");
+       return for_each_matching_origin(trim_filesystem,
+               OPT_GIVEN(TRIM, DRY_RUN));
+}
+EXPORT_CMD_HANDLER(trim);
+
+static bool com_help(void)
+{
+       int ret;
+       char *errctx, *help;
+       const char *arg;
+       const struct lls_command *cmd;
+
+       ret = lls_check_arg_count(sublpr, 0, 1, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       if (lls_num_inputs(sublpr) == 0) {
+               show_subcommand_summary(OPT_GIVEN(HELP, LONG));
+               return true;
+       }
+       arg = lls_input(0, sublpr);
+       ret = lls_lookup_subcmd(arg, misma_suite, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       cmd = lls_cmd(ret, misma_suite);
+       if (OPT_GIVEN(HELP, LONG))
+               help = lls_long_help(cmd);
+       else
+               help = lls_short_help(cmd);
+       printf("%s\n", help);
+       free(help);
+       return true;
+}
+EXPORT_CMD_HANDLER(help);
+
+static bool com_configtest(void)
+{
+       printf("Syntax Ok\n");
+       return true;
+}
+EXPORT_CMD_HANDLER(configtest);
+
+static bool com_utilization(void)
+{
+       get_utilization();
+       for (unsigned n = 0; n < num_pools; n++) {
+               struct thin_pool *p = thin_pool + n;
+               printf("%s/%s: %u%%/%u%%\n",
+                       vgname(p->vgid), p->name, p->utilization.data,
+                       p->utilization.meta);
+       }
+       return true;
+}
+EXPORT_CMD_HANDLER(utilization);
+
+const char *GET_VERSION(void);
+static void handle_version_and_help(void)
+{
+       char *help;
+
+       if (OPT_GIVEN(MISMA, VERSION)) {
+               printf(PACKAGE " %s\n"
+                       "Copyright (C) " COPYRIGHT_YEAR " " AUTHOR ".\n"
+                       "License: " LICENSE ": <" LICENSE_URL ">.\n"
+                       "This is free software: you are free to change and redistribute it.\n"
+                       "There is NO WARRANTY, to the extent permitted by law.\n"
+                       "\n"
+                       "Web page: " URL "\n"
+                       "Clone URL: " CLONE_URL "\n"
+                       "Gitweb: " GITWEB_URL "\n"
+                       "Author's Home Page: " HOME_URL "\n"
+                       "Send feedback to: " AUTHOR " <" EMAIL ">\n"
+                       ,
+                       GET_VERSION()
+               );
+               exit(EXIT_SUCCESS);
+       }
+       if (OPT_GIVEN(MISMA, DETAILED_HELP))
+               help = lls_long_help(CMD_PTR(MISMA));
+       else if (OPT_GIVEN(MISMA, HELP))
+               help = lls_short_help(CMD_PTR(MISMA));
+       else
+               return;
+       printf("%s\n", help);
+       free(help);
+       exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+       unsigned num_inputs;
+       int ret;
+       char *errctx;
+       const struct misma_user_data *ud;
+
+       valid_fd012();
+       parse_options(argc, argv, CMD_PTR(MISMA), &lpr);
+       loglevel_arg_val = OPT_UINT32_VAL(MISMA, LOGLEVEL);
+       handle_version_and_help();
+       num_inputs = lls_num_inputs(lpr);
+       if (num_inputs == 0) {
+               show_subcommand_summary(true /* verbose */);
+               exit(EXIT_SUCCESS);
+       }
+       ret = lls_lookup_subcmd(argv[argc - num_inputs], misma_suite, &errctx);
+       if (ret < 0)
+               die_lopsub(ret, &errctx);
+       subcmd = lls_cmd(ret, misma_suite);
+       parse_options(num_inputs, argv + argc - num_inputs, subcmd, &sublpr);
+       if (subcmd != CMD_PTR(HELP))
+               init_origins();
+       ud = lls_user_data(subcmd);
+       exit(ud->handler()? EXIT_SUCCESS : EXIT_FAILURE);
+}
diff --git a/misma.h b/misma.h

new file mode 100644 (file)

index 0000000..fceab34
--- /dev/null
+++ b/misma.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <time.h>
+
+#include "config.h"
+
+__attribute__ ((warn_unused_result))
+void *xrealloc(void *p, size_t size);
+
+__attribute__ ((warn_unused_result))
+void *xmalloc(size_t size);
+
+__attribute__ ((warn_unused_result))
+void *xzmalloc(size_t size);
+
+void *xstrdup(const char *s);
+
+__attribute__ ((format (printf, 1, 2))) __attribute__ ((warn_unused_result))
+char *msg(const char *fmt, ...);
+
+__attribute__ ((noreturn))
+void die_empty_arg(const char *opt);
+
+__attribute__ ((noreturn))
+void die_range(const char *opt);
+
+void check_range(uint32_t val, uint32_t min, uint32_t max, const char *opt);
+bool xexec(char * const argv[], char **buf);
+
+enum lvm_scope {
+       LS_GLOBAL, LS_VG, LS_POOL, LS_ORIGIN
+};
+struct lvmspec {
+       enum lvm_scope scope;
+       char *vg, *pool, *tlv;
+};
+void parse_lvmspec(const char *arg, const char *context,
+               struct lvmspec *result);
+void free_lvmspec(struct lvmspec *spec);
+struct percentage_pair {
+       uint8_t data, meta;
+};
+struct threshold_arg {
+       struct lvmspec lvmspec;
+       struct percentage_pair threshold;
+};
+void parse_threshold_arg(const char *arg, const char *context,
+               struct threshold_arg *result);
+struct time_arg {
+       struct lvmspec lvmspec;
+       uint32_t seconds;
+};
+unsigned parse_timespec(const char *spec, const char *context);
+void parse_time_arg(const char *arg, const char *context,
+                struct time_arg *result);
+
+void valid_fd012(void);
+int daemonize(const char *logfile);
+bool misma_lock(const char *string);
+pid_t get_misma_pid(const char *string);
+struct line_iter {
+       char *base;
+       char *line;
+};
+void line_iter_init(struct line_iter *liter, char *text);
+char *line_iter_get(struct line_iter *liter);
+bool fd2buf(int fd, char **buf);
+
+enum loglevels {LOGLEVELS, NUM_LOGLEVELS};
+
+__attribute__ ((format (printf, 2, 3)))
+void misma_log(int ll, const char* fmt,...);
+
+#define DEBUG_LOG(f,...) misma_log(LL_DEBUG, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define INFO_LOG(f,...) misma_log(LL_INFO, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define NOTICE_LOG(f,...) misma_log(LL_NOTICE, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define WARNING_LOG(f,...) misma_log(LL_WARNING, "%s: " f, __FUNCTION__, ##  __VA_ARGS__)
+#define ERROR_LOG(f,...) misma_log(LL_ERROR, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define CRIT_LOG(f,...) misma_log(LL_CRIT, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+#define EMERG_LOG(f,...) misma_log(LL_EMERG, "%s: " f, __FUNCTION__, ## __VA_ARGS__)
+
+__attribute__ ((noreturn))
+__attribute__ ((format (printf, 1, 2)))
+void die(const char *fmt, ...);
+
+__attribute__ ((noreturn))
+__attribute__ ((format (printf, 1, 2)))
+void die_errno(const char *fmt, ...);
+
+struct heap;
+struct heap *heap_init(void *array, unsigned num_elements,
+       int (*compare)(const void *data1, const void *data2));
+unsigned heap_num_elements(const struct heap *h);
+void heap_insert(void *new_element, struct heap *h);
+void *heap_min(const struct heap *h);
+void *heap_extract_min(struct heap *h);
+void heap_dump(const struct heap *h, void (*dumper)(const void *));
diff --git a/misma.suite.m4 b/misma.suite.m4

new file mode 100644 (file)

index 0000000..35c702a
--- /dev/null
+++ b/misma.suite.m4
@@ -0,0 +1,614 @@
+# SPDX-License-Identifier: GPL-2.0+
+[suite misma]
+       caption = Subcommands
+       mansect = 8
+       manual_title = System Manager's Manual
+[supercommand misma]
+       [description]
+               DESCRIPTION1()
+
+               DESCRIPTION2()
+
+               DESCRIPTION3()
+       [/description]
+       synopsis = [global-options...] [--] [<subcommand> [subcommand-options...]]
+       purpose = SLOGAN()
+
+       [option title-text]
+               summary = General options
+               flag ignored
+       [option help]
+               summary = print help and exit
+               short_opt = h
+       [option detailed-help]
+               summary = print help, including all details, and exit
+       [option version]
+               summary = print version and exit
+               short_opt = V
+       [option loglevel]
+               summary = control amount of logging
+               short_opt = l
+               arg_info = required_arg
+               arg_type = string
+               typestr = severity
+               values = {
+                       LSGLL_DEBUG = "debug",
+                       LSGLL_INFO = "info",
+                       LSGLL_NOTICE = "notice",
+                       LSGLL_WARNING = "warning",
+                       LSGLL_ERROR = "error",
+                       LSGLL_CRIT = "crit",
+                       LSGLL_EMERG = "emerg"
+               }
+               default_val = warning
+               [help]
+                       Log only messages with severity greater or equal than the given
+                       value. Possible values:
+
+                       debug: produces really noisy output.
+                       info: still noisy, but won't fill up the disk quickly.
+                       notice: indicates normal, but significant event.
+                       warning: unexpected events that can be handled.
+                       error: unhandled error condition.
+                       crit: system might be unreliable.
+                       emerg: last message before exit.
+               [/help]
+       [option config-file]
+               short_opt = c
+               summary = use alternative config file (default: ~/.mismarc)
+               typestr = path
+               arg_info = required_arg
+               arg_type = string
+               [help]
+                       Options may be given at the command line or in the configuration
+                       file. As usual, if an option is given both at the command line and
+                       in the configuration file, the command line option takes precedence.
+
+                       The config file may contain global options as well as options for
+                       any subcommand, but subcommand specific options must be placed in a
+                       separate section. See the Examples section of the man page.
+               [/help]
+
+       [option title-text]
+               summary = LVM options
+               flag ignored
+       [option origin]
+               summary = the VG and the thin LV to snapshot
+               typestr = vg/tlv
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               [help]
+                       The named volume group must exist and it must contain the named thin
+                       logical volume. This option may be given multiple times where each
+                       instance corresponds to one origin to snapshot.
+               [/help]
+[introduction]
+       Misma supports the subcommands described below. If no subcommand is
+       given, the list of available subcommands is shown and the program
+       terminates successfully without performing any further action.
+[/introduction]
+
+[subcommand run]
+       purpose = create and prune snapshots, discard unused blocks
+       [description]
+               This is the main mode of operation. Snapshots are created and pruned
+               periodically, the thin pool utilization is monitored and filesystem
+               trims are scheduled as configured. The subcommand terminates only on
+               fatal errors or after a terminating signal was received.
+       [/description]
+       [option daemon]
+               short_opt = d
+               summary = run as background daemon
+               [help]
+                       If this option is given, the process detaches from the console and
+                       continues to run in the background.
+               [/help]
+       [option logfile]
+               short_opt = l
+               summary = where to write log output
+               arg_info = required_arg
+               arg_type = string
+               typestr = path
+               default_val = /dev/null
+               [help]
+                       This option is only honored if --daemon is given, in which case
+                       log messages go to the given file. Otherwise the option is silently
+                       ignored and log output is written to stderr.
+               [/help]
+       [option create-interval]
+               summary = Time span between two subsequent snapshots
+               typestr = [lvmspec:]timespec
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               default_val = 6h
+               [help]
+                       The lvm specifier determines to which origins this instance of the
+                       option applies. If no specifier is given, the option applies to all
+                       origins. Otherwise the specifier may be in one of the following forms:
+                       <vg>: applies to all origins in VG vg, <vg|pool>: applies to all
+                       origins in thin pool <pool> of VG vg, or <vg/tlv>: applies to origin
+                       tlv of vg only. If more than one specifier match a particular origin,
+                       the narrowest scoped one applies. The order of precedence is therefore
+                       <vg/tlv>, <vg|pool>, <vg>, <global>.
+
+                       The time specifier is an unsigned integer which is followed by a time
+                       unit, a single character of the set {s,m,h,d,y} for seconds, minutes,
+                       hours, days, and years.
+               [/help]
+       [option max-age]
+               summary = age of the oldest snapshot to keep
+               typestr = [lvmspec:]timespec
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               default_val = 1y
+               [help]
+                       See --create-interval for the format of the lvm and time specifiers.
+               [/help]
+       [option check-interval]
+               summary = the time period between two utilization checks
+               typestr = timespec
+               arg_info = required_arg
+               arg_type = string
+               default_val = 1m
+               [help]
+                       The utilization of all thin pools which contain at least one thin
+                       logical volume specified as an argument to --origin are checked
+                       periodically. See --create-interval for the format of the time
+                       specifier.
+               [/help]
+       [option threshold]
+               summary = high watermarks for snapshot removal (1-99)
+               typestr = [lvmspec:]data_threshold,meta_threshold
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               default_val = 95,95
+               [help]
+                       The threshold part of the argument is a comma-separated pair of
+                       percentages between 1 and 99, inclusively. If the percentage of used
+                       space in the data/metadata logical volume of the thin pool exceeds
+                       the corresponding threshold value, forced snapshot removal kicks in
+                       to bring back the utilization below the thresholds.
+
+                       The format of the lvm specifier is described in the help text of
+                       --create-interval. However, since the utilization is a property
+                       of the pool, arguments of the form <vg/tlv> make no sense and are
+                       therefore rejected.
+               [/help]
+       [option trim-interval]
+               summary = discard unused blocks periodically
+               typestr = [lvmspec:]timespec
+               arg_info = required_arg
+               arg_type = string
+               flag multiple
+               default_val = 0
+               [help]
+                       The argument specifies the duration between two successive trims. The
+                       default value of zero deactivates this feature.
+
+                       Trimming is performed in the same way as for the trim subcommand.
+                       Errors related to trimming are logged but are otherwise ignored.
+
+                       See --create-interval for the format of the specifiers.
+               [/help]
+       [option exit-hook]
+               summary = command to be executed before exit
+               typestr = command
+               arg_info = required_arg
+               arg_type = string
+               default_val = true
+               [help]
+                       One possible application for this hook is to inform system manager
+                       that no more snapshots are going to be created.
+
+                       A (quoted) string which describes the error that caused the termination is
+                       appended to the given command and the resulting string is passed as a single
+                       argument to /bin/sh -c.
+               [/help]
+       [option suppress-lvm-warnings]
+               summary = quieten lvcreate(8) and lvremove(8)
+               [help]
+                       suppress
+               [/help]
+[subcommand create]
+       purpose = create a snapshot of each matching origin
+       non-opts-name = [<lvmspec>]...
+       [description]
+               This creates one snapshot of each origin which matches the given lvm
+               specifier, ignoring creation intervals, maximal age and utilization
+               thresholds. If no specifiers are given, all origins are regarded as
+               matching so that one snapshot of each configured origin is created.
+
+               The subcommand fails if another "run", "create", or "remove" command
+               is currently running.
+       [/description]
+       [option dry-run]
+               short_opt = n
+               summary = just print which snapshot would be created
+[subcommand rm]
+       purpose = remove one snapshot of each matching origin
+       non-opts-name = [<lvmspec>]...
+       [description]
+               The remarks stated in the description of the "create" subcommand apply
+               for this subcommand as well.
+       [/description]
+       [option dry-run]
+               short_opt = n
+               summary = just print which snapshot would get removed
+[subcommand ls]
+       purpose = print the snapshot list of each origin
+       non-opts-name = [<lvmspec>]...
+       [description]
+               The list is sorted by snapshot creation date.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = use long listing format
+               [help]
+                       The default output mode lists only the sequence number and the age
+                       of each snapshot as human readable text. This option adds additional
+                       output.
+               [/help]
+[subcommand kill]
+       purpose = signal another misma process
+       [description]
+               This sends a signal to the misma "run" process.
+       [/description]
+       [option signal]
+               short_opt = s
+               summary = send the given signal rather than SIGTERM
+               typestr = signal_number
+               arg_info = required_arg
+               arg_type = uint32
+               default_val = 15
+               [help]
+                       The standard Unix semantics apply if the specified signal number
+                       is zero. That is, no signal is actually sent, and the subcommand
+                       exits successfully only if a misma "run" process exists.
+               [/help]
+       [option wait]
+               short_opt = w
+               summary = wait until the signalled process has terminated
+               [help]
+                       This option is handy for system shutdown scripts which would like
+                       to terminate the misma daemon process.
+
+                       Without --wait the misma process which executes the kill subcommand
+                       exits right after the kill(2) system call returns. At this point the
+                       signalled process might still be alive (even if SIGKILL was sent).
+                       If --wait is given, the process waits until the signalled process
+                       has terminated or the timeout expires.
+
+                       If --wait is not given, the kill subcommand exits successfully if
+                       and only if the signal was sent (i.e., if there exists another misma
+                       process to receive the signal). With --wait it exits successfully
+                       if, additionally, the signalled process has terminated before the
+                       timeout expires.
+
+                       It makes only sense to use the option for signals which terminate
+                       the misma process.
+               [/help]
+[subcommand trim]
+       purpose = discard unused blocks of origin LVs
+       non-opts-name = [<lvmspec>]...
+       [description]
+               Each matching origin LV is expected to contain a mounted and writable
+               filesystem. The subcommand is equivalent to running fstrim(8) on
+               the mountpoints of these filesystems. The full block range of each
+               origin LV is taken into account and the default minimal block size for
+               discards is used. This corresponds to the default values of fstrim(8).
+       [/description]
+       [option dry-run]
+               short_opt = n
+               summary = print the mount points, but do not trim
+               [help]
+                       In dry-run mode the mount points are determined as usual, but the
+                       command exits without starting any trim operation.
+               [/help]
+[subcommand help]
+       purpose = list available subcommands or print subcommand-specific help
+       non-opts-name = [subcommand]
+       [description]
+               Without any arguments, help prints the list of available
+               subcommands. When called with a subcommand name argument, it prints
+               the help text of the given subcommand.
+       [/description]
+       [option long]
+               short_opt = l
+               summary = show the long help text
+               [help]
+                       If the optional argument is supplied, the long help text contains the
+                       synopsis, the purpose and the description of the specified subcommand,
+                       followed by the option list including summary and help text of each
+                       option. Without --long, the short help is shown instead. This omits
+                       the description of the subcommand and the option help.
+
+                       If no subcommand is supplied but --long is given, the list contains the
+                       purpose of each subcommand.
+               [/help]
+[subcommand utilization]
+       purpose = show thin pool utilization
+       [description]
+               This prints the percentage of used blocks in the data and metadata
+               logical volumes of each pool.
+       [/description]
+[subcommand configtest]
+       purpose = run a configuration file syntax test
+       [description]
+               This subcommand checks the command line options and the configuration
+               file for syntactic correctness. It either reports "Syntax Ok" and
+               exits successfully or prints information about the first syntax error
+               detected and terminates with exit code 1.
+       [/description]
+
+[section Notes]
+.SS Naming
+       Snapshots created by misma are named
+       .IR misma-origin.seq ,
+       where
+       .I origin
+       is the name of the thin logical volume (i.e., the second component
+       of the argument to
+       .I --origin)
+       and
+       .I seq
+       is a sequence number.
+.SS Snapshot Replacement Strategy
+       Assume that the arguments
+       to
+       .I --create-interval
+       and
+       .I --max-age
+       correspond to
+       .I d
+       minutes and
+       .I m
+       days, respectively. These two quantities determine the length
+       .I n
+       of a sequence of snapshots such that
+       .IP \(bu 2
+               the first two snapshots are
+       .I d
+       minutes apart,
+       .IP \(bu 2
+               the difference of the creation times between two consecutive snapshots
+               doubles at each step,
+       .IP \(bu 2
+               the first and the last snapshot are at least
+       .I m
+       days apart.
+       .P
+       At startup,
+       .B misma
+       maps each existing snapshot to a slot in an array
+       of length
+       .IR n .
+       When a new snapshot has to be created and not all slots are mapped
+       yet, the new snapshot is mapped to an unmapped slot. If all slots
+       are mapped, an existing snapshot is removed first and its slot is
+       reused. The slot number of the snapshot to be replaced is computed as
+       .B ffz(seq % (2^n - 1)),
+       where
+       .I seq
+       is the sequence number of the new snapshot, and
+       .B ffz(x)
+       is the first zero in
+       the binary representation of
+       .IR x .
+       By properties of the
+       .B ffz()
+       function, the frequency at which a slot gets reused halves at each
+       step: the snapshot in slot 0 gets reused (roughly) every second time,
+       the snapshot in slot one every fourth time, and so on.
+.SS Forced Snapshot Removal
+       In addition to the normal snapshot removal which takes place when a
+       slot gets reused as described above, snapshots are
+       .I force-removed
+       when the utilization of a thin pool exceeds its configured
+       thresholds. One snapshot is removed from each affected origin until
+       the utilization drops below the thresholds. If the utilization still
+       exceeds the thresholds after all snapshots have been removed, snapshot
+       creation is suspended.
+       .P
+       Forced removal
+       reliably prevents data and metadata exhaustion if the pool is
+       not overbooked. That is, if the sum of the (virtual) sizes of the
+       non-snapshot logical volumes is smaller than the pool size.
+.SS Trimming
+       The trim operation instructs a mounted filesystem to identify blocks
+       which are currently not in use and to pass this information to the
+       underlying block device driver. For a configured misma origin, this
+       driver is
+       .BR dm-thin ,
+       which keeps track of the used and unused blocks of each thin pool.
+       The blocks which are freed by the trim operation become available
+       for subsequent snapshots.
+
+       A one-shot trim operation is started by invoking the
+       .B trim
+       subcommand while periodic trims may be configured via the
+       .I --trim-interval
+       option of the
+       .B run
+       subcommand.
+
+       Trimming is implemented by issuing the
+       .I FITRIM
+       ioctl on the mount point, which is identical to how the
+       .BR fstrim (8)
+       command works. The mount point is determined from the major and minor
+       device numbers of the block special of the origin by parsing
+       .IR /proc/self/mountinfo .
+.SS Activating and Mounting Snapshots
+       Since thin provisioned snapshots have the
+       .I activation-skip
+       flag set, one must first
+       .I activate
+       the snapshot logical volume to create the corresponding device node.
+
+       Moreover, the XFS filesystem driver refuses to mount a block device
+       which contains a UUID that is identical to the UUID of an already
+       mounted filesystem. To mount a snapshot of an XFS filesystem, one
+       must therefore tell XFS to skip the UUID check.
+
+       See the examples below for suitable command line options for
+       .BR lvchange (8)
+       and
+       . BR mount (8).
+
+       Since logical volumes which contain a mounted filesystem cannot be
+       removed, a thin pool which is not overbooked may still run out of
+       space when one of its snapshot logical volumes is still mounted. It
+       is therefore good practice to activate and mount snapshots only for
+       as long as necessary.
+[/section]
+
+[section Examples]
+       .IP \(bu 2
+       Create a 1T large thin pool named
+       .I tp
+       in the volume group
+       .IR vg :
+
+       .RS 6
+       .EX
+               .B lvcreate \-\-type thin\-pool \-L 1T \-\-poolmetadatasize 16G \-n tp vg
+       .EE
+       .RE
+       .IP \(bu 2
+       Create the thin logical volume
+       .I tlv
+       of virtual size 100G in the thin pool
+       .IR tp :
+
+       .RS 6
+       .EX
+               .B lvcreate \-\-thin \-n tlv \-\-virtualsize 100G \-\-thinpool vg/tp
+       .EE
+       .RE
+       .IP \(bu 2
+       Run
+       .B misma
+       to create snapshots of the logical volume
+       .IR tlv ,
+       using default values:
+
+       .RS 6
+       .EX
+               .B misma \-\-origin vg/tlv run
+       .EE
+       .RE
+       .IP \(bu 2
+       Same as before, but run
+       .B misma
+       as a background daemon to create a snapshot every hour:
+
+       .RS 6
+       .EX
+               .B misma \-\-origin vg/tlv \-\-create-interval 1h \-\- run \-d
+       .EE
+       .RE
+       .IP \(bu 2
+       List all snapshots created so far:
+
+       .RS 6
+       .EX
+               .B misma \-\-origin vg/tlv \-\- ls \-l
+       .EE
+       .RE
+       .IP \(bu 2
+       Run
+       .B lvs
+       to print similar information:
+
+       .RS 6
+       .EX
+               .B vg=vg; o=tlv
+               .B lvs -o 'lv_path,lv_attr,lv_time,origin' \[rs]
+               .B \~ \-S \[dq]vg_name = $vg && origin = $o\[dq] \[rs]
+               .B \~ \-\-config \[dq]report/time_format='%F %R'\[dq]
+       .EE
+       .RE
+       .IP \(bu 2
+       Activate snapshot number 42:
+
+       .RS 6
+       .EX
+               .B lvchange \-\-ignoreactivationskip \-\-activate y vg/misma-tlv.42
+       .EE
+       .RE
+       .IP \(bu 2
+       Mount an active snapshot which contains an XFS filesystem:
+
+       .RS 6
+       .EX
+               .B mount /dev/vg/misma-tlv.42 \-o nouuid /mnt
+       .EE
+       .RE
+       .IP \(bu 2
+       Terminate the
+       .B misma
+       daemon process:
+
+       .RS 6
+       .EX
+               .B misma \-\-origin vg/tlv kill
+       .EE
+       .RE
+       .IP \(bu 2
+       A simple config file:
+
+       .RS 6
+       .EX
+               # global options
+               origin vg/tlv
+               loglevel info
+               # an option for the "run" subcommand
+               [run]
+                   logfile /var/log/misma.log
+       .EE
+       .RE
+
+[/section]
+
+[section copyright]
+       Written by AUTHOR()
+       .br
+       Copyright (C) COPYRIGHT_YEAR() AUTHOR()
+       .br
+       License: LICENSE()
+       .br
+       This is free software: you are free to change and redistribute it.
+       .br
+       There is NO WARRANTY, to the extent permitted by law.
+       .P
+       Web page:
+       .UR URL()
+       .UE
+       .br
+       Git clone `URL':
+       .UR CLONE_URL()
+       .UE
+       .br
+       Gitweb:
+       .UR GITWEB_URL()
+       .UE
+       .br
+       Author's home page:
+       .UR HOME_URL()
+       .UE
+       .br
+       Report bugs to
+       .MT EMAIL()
+       AUTHOR()
+       .ME
+[/section]
+[section see also]
+       .BR lvm (8),
+       .BR fstrim (8),
+       .BR lvmthin (7),
+       .BR dss (1)
+[/section]
diff --git a/util.c b/util.c

new file mode 100644 (file)

index 0000000..10972da
--- /dev/null
+++ b/util.c
@@ -0,0 +1,562 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#include "misma.h"
+
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+void *xrealloc(void *p, size_t size)
+{
+       assert(size > 0);
+       assert((p = realloc(p, size)));
+       return p;
+}
+
+void *xmalloc(size_t size)
+{
+       return xrealloc(NULL, size);
+}
+
+void *xzmalloc(size_t size)
+{
+       void *p = xrealloc(NULL, size);
+       memset(p, 0, size);
+       return p;
+}
+
+void *xstrdup(const char *s)
+{
+       char *ret = strdup(s? s: "");
+
+       assert(ret);
+       return ret;
+}
+
+char *msg(const char *fmt, ...)
+{
+       char *m;
+       size_t size = 100;
+
+       m = xmalloc(size);
+       while (1) {
+               int n;
+               va_list ap;
+
+               /* Try to print in the allocated space. */
+               va_start(ap, fmt);
+               n = vsnprintf(m, size, fmt, ap);
+               va_end(ap);
+               /* If that worked, return the string. */
+               if (n < size)
+                       return m;
+               /* Else try again with more space. */
+               size = n + 1; /* precisely what is needed */
+               m = xrealloc(m, size);
+       }
+}
+
+bool fd2buf(int fd, char **buf)
+{
+       ssize_t ret, nread = 0, sz = 100;
+
+       *buf = xmalloc(sz);
+       for (;;) {
+               ret = read(fd, *buf + nread, sz - nread - 1);
+               if (ret < 0) {
+                       if (errno == EAGAIN || errno == EINTR)
+                               continue;
+                       ERROR_LOG("read error: %s\n", strerror(errno));
+                       return false;
+               }
+               if (ret == 0) {
+                       (*buf)[nread] = '\0';
+                       return true;
+               }
+               nread += ret;
+               if (nread >= sz - 1) {
+                       sz *= 2;
+                       *buf = xrealloc(*buf, sz);
+               }
+       }
+}
+
+bool xexec(char * const argv[], char **buf)
+{
+       pid_t pid;
+       int pipefd[2] = {-1, -1};
+       unsigned n;
+
+       for (n = 0; argv[n]; n++)
+               DEBUG_LOG("argv[%u]=%s\n", n, argv[n]);
+       if (buf) {
+               if (pipe(pipefd) < 0)
+                       die_errno("pipe");
+       }
+       if ((pid = fork()) < 0)
+               die_errno("fork");
+       if (pid > 0) { /* parent */
+               int wstatus;
+               bool success = true;
+               if (buf) {
+                       close(pipefd[1]);
+                       success = fd2buf(pipefd[0], buf);
+                       close(pipefd[0]);
+               }
+               if (waitpid(pid, &wstatus, 0) < 0)
+                       die_errno("waitp");
+               if (!success)
+                       return false;
+               if (!WIFEXITED(wstatus))
+                       return false;
+               if (WEXITSTATUS(wstatus) != EXIT_SUCCESS)
+                       return false;
+               return true;
+       }
+       if (pipefd[0] >= 0)
+               close(pipefd[0]);
+       if (pipefd[1] >= 0 && pipefd[1] != STDOUT_FILENO) {
+               if (dup2(pipefd[1], STDOUT_FILENO) < 0)
+                       die_errno("dup2()");
+               close(pipefd[1]);
+       }
+       execvp(argv[0], argv);
+       EMERG_LOG("execvp error: %s\n", strerror(errno));
+       _exit(EXIT_FAILURE);
+}
+
+void die_empty_arg(const char *opt)
+{
+       die("argument to --%s must not be empty", opt);
+}
+
+void die_range(const char *opt)
+{
+       die("argument to --%s is out of range", opt);
+}
+
+void check_range(uint32_t val, uint32_t min, uint32_t max, const char *opt)
+{
+       if (val < min || val > max)
+               die_range(opt);
+}
+
+static uint32_t atou32(const char *str, const char *opt)
+{
+       char *endptr;
+       long long tmp;
+
+       errno = 0; /* To distinguish success/failure after call */
+       tmp = strtoll(str, &endptr, 10);
+       if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN))
+               die_range(opt);
+       if (tmp < 0 || tmp > (uint32_t)-1)
+               die_range(opt);
+       /*
+        * If there were no digits at all, strtoll() stores the original value
+        * of str in *endptr.
+        */
+       if (endptr == str)
+               die_empty_arg(opt);
+       /*
+        * The implementation may also set errno and return 0 in case no
+        * conversion was performed.
+        */
+       if (errno != 0 && tmp == 0)
+               die_empty_arg(opt);
+       if (*endptr != '\0') /* Further characters after number */
+               die("--%s: trailing characters after number", opt);
+       return tmp;
+}
+
+static void split_arg(const char *arg, const char *context,
+               char **prefix, char **suffix)
+{
+       char *colon;
+       char *tmp = xstrdup(arg);
+
+       if (!tmp[0])
+               die_empty_arg(context);
+       colon = strchr(tmp, ':');
+       if (!colon) {
+               *prefix = NULL;
+               *suffix = tmp;
+               return;
+       }
+       *colon = '\0';
+       if (colon == tmp || !colon[1])
+               die("%s: invalid argument", context);
+       *prefix = xstrdup(tmp);
+       *suffix = xstrdup(colon + 1);
+       free(tmp);
+}
+
+void parse_lvmspec(const char *arg, const char *context,
+               struct lvmspec *result)
+{
+       char *slash, *pipe;
+       char *tmp = xstrdup(arg);
+
+       slash = strchr(tmp, '/');
+       if (slash) {
+               if (slash == tmp || !slash[1])
+                       die("%s: invalid argument", context);
+               *slash = '\0';
+               result->scope = LS_ORIGIN;
+               result->tlv = xstrdup(slash + 1);
+               goto free_tmp;
+       }
+       pipe = strchr(tmp, '|');
+       if (pipe) {
+               if (pipe == tmp || !pipe[1])
+                       die("%s: invalid argument", context);
+               *pipe = '\0';
+               result->scope = LS_POOL;
+               result->pool = xstrdup(pipe + 1);
+               goto free_tmp;
+       }
+       result->scope = LS_VG;
+free_tmp:
+       result->vg = xstrdup(tmp);
+       free(tmp);
+}
+
+void free_lvmspec(struct lvmspec *spec)
+{
+       if (spec->scope == LS_GLOBAL)
+               return;
+       free(spec->vg);
+       if (spec->scope == LS_POOL)
+               free(spec->pool);
+       else if (spec->scope == LS_ORIGIN)
+               free(spec->tlv);
+}
+
+void parse_threshold_arg(const char *arg, const char *context,
+               struct threshold_arg *result)
+{
+       char *prefix, *suffix, *comma;
+       uint32_t val;
+
+       split_arg(arg, context, &prefix, &suffix);
+       if (prefix) {
+               parse_lvmspec(prefix, context, &result->lvmspec);
+               if (result->lvmspec.scope == LS_ORIGIN)
+                       die("invalid scope for threshold lvmspec");
+       } else
+               result->lvmspec.scope = LS_GLOBAL;
+       free(prefix);
+       comma = strchr(suffix, ',');
+       if (!comma)
+               die("%s: invalid argument", context);
+       *comma = '\0';
+       val = atou32(suffix, context);
+       check_range(val, 1, 99, context);
+       result->threshold.data = val;
+       val = atou32(comma + 1, context);
+       check_range(val, 1, 99, context);
+       result->threshold.meta = val;
+       free(suffix);
+}
+
+unsigned parse_timespec(const char *spec, const char *context)
+{
+       char *p, *tmp = xstrdup(spec);
+       uint64_t val, multiplier;
+
+       for (p = tmp; isdigit(*p); p++)
+               ;
+       if (*p == '\0')
+               die("%s: timepec lacks trailing time unit", context);
+       switch (*p) {
+       case 's': multiplier = 1; break;
+       case 'm': multiplier = 60; break;
+       case 'h': multiplier = 3600; break;
+       case 'd': multiplier = 86400; break;
+       case 'y': multiplier = 365 * 86400; break;
+       default:
+               die("%s: invalid time unit in timepec argument", context);
+       }
+       *p = '\0';
+       if (p[1])
+               die("%s: trailing characters after time unit", context);
+       val = atou32(tmp, context) * multiplier;
+       free(tmp);
+       if (val > (uint32_t)-1)
+               die_range(context);
+       return val;
+}
+
+void parse_time_arg(const char *arg, const char *context,
+                struct time_arg *result)
+{
+       char *prefix, *suffix;
+
+       split_arg(arg, context, &prefix, &suffix);
+       if (prefix)
+               parse_lvmspec(prefix, context, &result->lvmspec);
+       else
+               result->lvmspec.scope = LS_GLOBAL;
+       free(prefix);
+       result->seconds = parse_timespec(suffix, context);
+       free(suffix);
+}
+
+void line_iter_init(struct line_iter *liter, char *text)
+{
+       liter->line = liter->base = text;
+}
+
+char *line_iter_get(struct line_iter *liter)
+{
+       char *cr, *line;
+
+       if (!liter->line || !liter->line[0])
+               return NULL;
+       line = liter->line;
+       cr = strchr(liter->line, '\n');
+       if (cr) {
+               *cr = '\0';
+               liter->line = cr + 1;
+       } else
+               liter->line = NULL;
+       return line;
+}
+
+void valid_fd012(void)
+{
+       /* Ensure that file descriptors 0, 1, and 2 are valid. */
+       while (1) {
+               int fd = open("/dev/null", O_RDWR);
+               if (fd < 0)
+                       die_errno("open");
+               if (fd > 2) {
+                       close(fd);
+                       break;
+               }
+       }
+}
+
+int daemonize(const char *logfile)
+{
+       pid_t pid;
+       int nullfd, logfd, pipefd[2];
+
+       if (pipe(pipefd) < 0)
+               die_errno("pipe");
+       if ((pid = fork()) < 0)
+               die_errno("fork");
+       if (pid) { /* parent exits after reading from the pipe */
+               char c;
+               close(pipefd[1]);
+               if (read(pipefd[0], &c, 1) <= 0)
+                       die("child terminated unsuccessfully");
+               exit(EXIT_SUCCESS);
+       }
+       close(pipefd[0]);
+       /* become session leader */
+       if (setsid() < 0)
+               die_errno("setsid");
+       if ((nullfd = open("/dev/null", O_RDWR)) < 0)
+               die_errno("open /dev/null");
+       logfile = logfile? logfile : "/dev/null";
+       if ((logfd = open(logfile, O_WRONLY | O_APPEND | O_CREAT, 0666)) < 0)
+               die_errno("open %s", logfile);
+       INFO_LOG("subsequent log messages go to %s\n", logfile);
+       if (dup2(nullfd, STDIN_FILENO) < 0)
+               die_errno("dup2");
+       close(nullfd);
+       if (dup2(logfd, STDOUT_FILENO) < 0)
+               die_errno("dup2");
+       if (dup2(logfd, STDERR_FILENO) < 0)
+               die_errno("dup2");
+       close(logfd);
+       valid_fd012();
+       if (chdir("/") < 0)
+               die_errno("chdir");
+       return pipefd[1];
+}
+
+static int super_dull_hash(const char *input)
+{
+       const uint8_t *x = (typeof(x))input;
+       const unsigned p1 = 16777619, p2 = 2971215073;
+       unsigned n, m, h, result = 0;
+
+       for (n = 0; n < 4; n++) {
+               h = p1 * (x[0] + n);
+               for (m = 1; x[m] != 0; m++)
+                       h = p2 * (h ^ x[m]);
+               result = (result << 8) | (h % 256);
+       }
+       return result >> 1;
+}
+
+/**
+ * We use a semaphore set with two semaphores. The first semaphore is modified
+ * in both misma_lock() and get_misma_pid() while the second one is modified
+ * only in misma_lock(). This allows us to obtain the PID of the running misma
+ * process by querying the PID that last performed an operation on the second
+ * semaphore. This is achieved by passing GETPID as the control operation to
+ * semctl().
+ */
+
+bool misma_lock(const char *string)
+{
+       int ret, semid;
+       struct sembuf sops[4];
+       key_t key = super_dull_hash(string);
+
+       ret = semget(key, 2, IPC_CREAT | 0600);
+       if (ret < 0)
+               return false;
+       semid = ret;
+       DEBUG_LOG("key: 0x%0x, semid: %d\n", (unsigned)key, semid);
+       sops[0].sem_num = 0;
+       sops[0].sem_op = 0;
+       sops[0].sem_flg = SEM_UNDO | IPC_NOWAIT;
+
+       sops[1].sem_num = 0;
+       sops[1].sem_op = 1;
+       sops[1].sem_flg = SEM_UNDO | IPC_NOWAIT;
+
+       sops[2].sem_num = 1;
+       sops[2].sem_op = 0;
+       sops[2].sem_flg = SEM_UNDO | IPC_NOWAIT;
+
+       sops[3].sem_num = 1;
+       sops[3].sem_op = 1;
+       sops[3].sem_flg = SEM_UNDO | IPC_NOWAIT;
+
+       return semop(semid, sops, 4) >= 0;
+}
+
+/* returns zero if misma is not running */
+pid_t get_misma_pid(const char *string)
+{
+       int ret, semid;
+       struct sembuf sops = {
+               .sem_num = 0,
+               .sem_op = 0,
+               .sem_flg = SEM_UNDO | IPC_NOWAIT
+       };
+       key_t key = super_dull_hash(string);
+
+       ret = semget(key, 2, 0);
+       if (ret < 0)
+               return 0;
+       semid = ret;
+       DEBUG_LOG("key: 0x%0x, semid: %d\n", (unsigned)key, semid);
+       if (semop(semid, &sops, 1) >= 0)
+               return 0;
+       ret = semctl(semid, 1, GETPID);
+       if (ret < 0)
+               return 0;
+       return ret;
+}
+
+/* Simplistic min-heap implementation (see e.g. Cormen et al. Chapter 6) */
+struct heap {
+       void ***aa; /* array address */
+       unsigned n; /* num elements */
+       int (*compare)(const void *data1, const void *data2);
+};
+
+static unsigned heap_parent(unsigned idx)
+{
+       return (idx + 1) / 2 - 1;
+}
+
+static unsigned heap_left(unsigned idx)
+{
+       return (idx + 1) * 2 - 1;
+}
+
+static unsigned heap_right(unsigned idx)
+{
+       return (idx + 1) * 2;
+}
+
+static void heapify(struct heap *h, unsigned idx)
+{
+       unsigned l = heap_left(idx), r = heap_right(idx), smallest;
+       void **array = *(h->aa);
+
+       assert(idx < h->n);
+       if (l < h->n && h->compare(array[l], array[idx]) > 0)
+               smallest = l;
+       else
+               smallest = idx;
+       if (r < h->n && h->compare(array[r], array[smallest]) > 0)
+               smallest = r;
+       if (smallest != idx) { /* exchange idx and smallest */
+               void *tmp = array[idx];
+               array[idx] = array[smallest];
+               array[smallest] = tmp;
+               heapify(h, smallest);
+       }
+}
+
+struct heap *heap_init(void *aa, unsigned num_elements,
+       int (*compare)(const void *data1, const void *data2))
+{
+       struct heap *h = xmalloc(sizeof(*h));
+
+       INFO_LOG("creating heap with %u elements\n", num_elements);
+       h->aa = aa;
+       h->n = num_elements;
+       h->compare = compare;
+       for (unsigned j = h->n / 2 - 1; j != ~0U; j--)
+               heapify(h, j);
+       return h;
+}
+
+void *heap_min(const struct heap *h)
+{
+       assert(h->n > 0);
+       return (*(h->aa))[0];
+}
+
+unsigned heap_num_elements(const struct heap *h)
+{
+       return h->n;
+}
+
+void *heap_extract_min(struct heap *h)
+{
+       void *smallest = heap_min(h);
+       void **array = *(h->aa);
+
+       array[0] = array[h->n - 1];
+       h->n--;
+       *(h->aa) = xrealloc((*h->aa), h->n * sizeof(void *));
+       heapify(h, 0);
+       return smallest;
+}
+
+void heap_insert(void *new_element, struct heap *h)
+{
+       unsigned parent;
+       void **array;
+
+       h->n++;
+       *(h->aa) = xrealloc((*h->aa), h->n * sizeof(void *));
+       array = *(h->aa);
+       array[h->n - 1] = new_element;
+       for (unsigned j = h->n - 1; j > 0; j = parent) {
+               void *tmp;
+               parent = heap_parent(j);
+               if (h->compare(array[j], array[parent]) <= 0)
+                       break;
+               tmp = array[j];
+               array[j] = array[parent];
+               array[parent] = tmp;
+       }
+}
+
+void heap_dump(const struct heap *h, void (*dumper)(const void *))
+{
+       void **array = *(h->aa);
+       for (unsigned j = 0; j < h->n; j++)
+               dumper(array[j]);
+}
diff --git a/version-gen.sh b/version-gen.sh

new file mode 100755 (executable)

index 0000000..29134b3
--- /dev/null
+++ b/version-gen.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+
+package="$1"
+version_file="$2"
+
+ver='unnamed_version'
+# First try git, then gitweb, then default.
+if [ -e '.git' -o -e '../.git' ]; then
+       git_ver=$(git describe --abbrev=4 HEAD 2>/dev/null)
+       [ -z "$git_ver" ] && git_ver="$ver"
+       # update stat information in index to match working tree
+       git update-index -q --refresh > /dev/null
+       # if there are differences (exit code 1), the working tree is dirty
+       git diff-index --quiet HEAD || git_ver=$git_ver-dirty
+       ver=$git_ver
+elif [ "${PWD%%-*}" = $package- ]; then
+       ver=${PWD##*/$package-}
+fi
+ver=${ver#v}
+
+echo "$ver"
+[ -z "${version_file}" ] && exit 0
+# update version file if necessary
+content="const char *${package}_version(void) {return \"$ver\";};"
+[ -r "$version_file" ] && echo "$content" | cmp -s - $version_file && exit 0
+echo "$content" > $version_file
author	Andre Noll <maan@tuebingen.mpg.de>
	Sun, 14 Jan 2024 22:55:26 +0000 (23:55 +0100)
committer	Andre Noll <maan@tuebingen.mpg.de>
	Sun, 14 Jan 2024 22:56:01 +0000 (23:56 +0100)
.gitignore	[new file with mode: 0644]	patch \| blob
Makefile	[new file with mode: 0644]	patch \| blob
README	[new file with mode: 0644]	patch \| blob
config.mak.in	[new file with mode: 0644]	patch \| blob
configure	[new file with mode: 0755]	patch \| blob
configure.ac	[new file with mode: 0644]	patch \| blob
index.html.m4	[new file with mode: 0644]	patch \| blob
misma.c	[new file with mode: 0644]	patch \| blob
misma.h	[new file with mode: 0644]	patch \| blob
misma.suite.m4	[new file with mode: 0644]	patch \| blob
util.c	[new file with mode: 0644]	patch \| blob
version-gen.sh	[new file with mode: 0755]	patch \| blob