From b95de7e4b629c3ffb90ae09b53d0178285e73629 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 11 Jan 2018 23:40:23 +0100 Subject: [PATCH] initial --- INSTALL | 10 ++ Makefile | 119 +++++++++++++++ ast.c | 395 +++++++++++++++++++++++++++++++++++++++++++++++++ autogen.sh | 5 + config.mak.in | 16 ++ configure.ac | 32 ++++ err.h | 45 ++++++ list.h | 219 +++++++++++++++++++++++++++ logo.svg | 30 ++++ tf.h | 148 ++++++++++++++++++ tfortune.c | 252 +++++++++++++++++++++++++++++++ tfortune.suite | 103 +++++++++++++ txp.lex | 111 ++++++++++++++ txp.y | 130 ++++++++++++++++ util.c | 144 ++++++++++++++++++ version-gen.sh | 27 ++++ 16 files changed, 1786 insertions(+) create mode 100644 INSTALL create mode 100644 Makefile create mode 100644 ast.c create mode 100755 autogen.sh create mode 100644 config.mak.in create mode 100644 configure.ac create mode 100644 err.h create mode 100644 list.h create mode 100644 logo.svg create mode 100644 tf.h create mode 100644 tfortune.c create mode 100644 tfortune.suite create mode 100644 txp.lex create mode 100644 txp.y create mode 100644 util.c create mode 100755 version-gen.sh diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..87528ee --- /dev/null +++ b/INSTALL @@ -0,0 +1,10 @@ +Dependencies: autoconf, gnu make, flex, bison, gcc or clang, lopsub + +Run + + ./autogen.sh && ./configure && make && sudo make install + +to build and install this software. + +The configure script checks if all required dependencies are installed +and prints an error message if one of them is missing. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..eb561ac --- /dev/null +++ b/Makefile @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: GPL-2.0 + +.SUFFIXES: +MAKEFLAGS += -Rr +.ONESHELL: +.SHELLFLAGS := -ec + +RM := rm -f +MKDIR_P := mkdir -p + +ifeq ("$(origin CC)", "default") + CC := cc +endif +ifeq ("$(origin V)", "command line") + SAY = +else + SAY = @echo '$(strip $(1))' +endif + +COPYRIGHT_YEAR := 2018 +LOGLEVELS := LL_DEBUG,LL_INFO,LL_NOTICE,LL_WARNING,LL_ERROR,LL_CRIT,LL_EMERG +GIT_VERSION := $(shell ./version-gen.sh) +cc_version := $(shell $(CC) --version | head -n 1) +build_date := $(shell date) +uname_rs := $(shell uname -rs) + +all := tfortune tfortune.1 +all: $(all) + +deps := txp.bison.d txp.flex.d ast.d tfortune.d util.d txp.flex.d \ + tfortune.lsg.d version.d + +ifeq ($(findstring clean, $(MAKECMDGOALS)),) +-include $(deps) +include config.mak +endif + +.PRECIOUS: %.flex.c %.bison.c %.bison.h %.lsg.h %.lsg.c %.lsg.h + +# created by version-gen.sh +version.c: + +%.lsg.c: %.suite + $(call SAY, LSGC $<) + $(LOPSUBGEN) --gen-c < $< + +%.lsg.h: %.suite + $(call SAY, LSGH $<) + $(LOPSUBGEN) --gen-header < $< + +%.1: %.suite + $(call SAY, LSGM $<) + $(LOPSUBGEN) --gen-man=$@ --version-string $(GIT_VERSION) < $< + +%.flex.c: %.lex + $(call SAY, FLEX $<) + $(FLEX) -o $@ $< + +%.bison.c %.bison.h: %.y + $(call SAY, BISON $<) + $(BISON) --defines=$(notdir $(<:.y=.bison.h)) \ + --output=$(notdir $(<:.y=.bison.c)) $< + +TF_CPPFLAGS += -DCOPYRIGHT_YEAR='"$(COPYRIGHT_YEAR)"' +TF_CPPFLAGS += -DLOGLEVELS='$(LOGLEVELS)' +TF_CPPFLAGS += -DBUILD_DATE='"$(build_date)"' +TF_CPPFLAGS += -DCC_VERSION='"$(cc_version)"' +TF_CPPFLAGS += -DUNAME_RS='"$(uname_rs)"' +TF_CPPFLAGS += -I/usr/local/include + +TF_CFLAGS += -g +TF_CFLAGS += -O2 +TF_CFLAGS += -Wall +TF_CFLAGS += -Wundef -W -Wuninitialized +TF_CFLAGS += -Wchar-subscripts +TF_CFLAGS += -Werror-implicit-function-declaration +TF_CFLAGS += -Wmissing-noreturn +TF_CFLAGS += -Wbad-function-cast +TF_CFLAGS += -Wredundant-decls +TF_CFLAGS += -Wdeclaration-after-statement +TF_CFLAGS += -Wformat -Wformat-security -Wmissing-format-attribute + +%.flex.o: TF_CFLAGS += -Wno-all + +%.o: %.c tfortune.lsg.h txp.bison.h + $(call SAY, CC $<) + $(CC) \ + -o $@ -c -MMD -MF $(*F).d \ + -MT $@ $(TF_CPPFLAGS) $(CPPFLAGS) $(TF_CFLAGS) $(CFLAGS) $< + +TF_LDFLAGS=-llopsub +tfortune: $(deps:.d=.o) + $(call SAY, LD $@) + $(CC) $^ -o $@ $(TF_LDFLAGS) $(LDFLAGS) + +.PHONY: all mostlyclean clean install install-strip + +mostlyclean: + $(RM) tfortune *.o *.d +clean: mostlyclean + $(RM) *.lsg.* *.flex.* *.bison.* *.1 version.c +distclean: clean + $(RM) config.mak config.status config.log config.h configure config.h.in + $(RM) -r autom4te.cache +maintainer-clean: distclean + git clean -dfqx > /dev/null 2>&1 + +mandir := $(datarootdir)/man/man1 +INSTALL ?= install +INSTALL_PROGRAM ?= $(INSTALL) -m 755 +INSTALL_DATA ?= $(INSTALL) -m 644 +ifneq ($(findstring strip, $(MAKECMDGOALS)),) + strip_option := -s +endif + +install install-strip: all + $(MKDIR_P) $(DESTDIR)$(bindir) $(DESTDIR)$(mandir) + $(INSTALL_PROGRAM) $(strip_option) tfortune $(DESTDIR)$(bindir) + $(INSTALL_DATA) tfortune.1 $(DESTDIR)$(mandir) diff --git a/ast.c b/ast.c new file mode 100644 index 0000000..d7ce1c0 --- /dev/null +++ b/ast.c @@ -0,0 +1,395 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "tf.h" +#include "txp.bison.h" + +enum semantic_types { + ST_STRVAL, + ST_INTVAL, + ST_BOOLVAL, + ST_REGEX_PATTERN, +}; + +struct txp_context { + /* global context */ + char *errmsg; + struct txp_ast_node *ast; + /* per tag expression context */ + unsigned num_lines; + unsigned num_tags; + char **tags; +}; + +/* + * Set the error bit in the parser context and log a message. + * + * This is called if the lexer or the parser detect an error. Only the first + * error is logged (with a severity of "warn"). + */ +__attribute__ ((format (printf, 3, 4))) +void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...) +{ + va_list ap; + char *tmp; + + if (ctx->errmsg) /* we already printed an error message */ + return; + va_start(ap, fmt); + xvasprintf(&tmp, fmt, ap); + va_end(ap); + xasprintf(&ctx->errmsg, "line %d: %s", line, tmp); + free(tmp); + WARNING_LOG("%s\n", ctx->errmsg); +} + +/* + * Parse a (generalized) string literal. + * + * This function turns the generalized C99 string literal given by src into a C + * string. For example, the string literal "xyz\n" is transformed into an + * array containing the three characters 'x', 'y' and 'z', followed by a + * newline character and the terminating zero byte. The function allows to + * specify different quote characters so that, for example, regular expression + * patterns enclosed in '/' can be parsed as well. To parse a proper string + * literal, one has to pass two double quotes as the second argument. + * + * The function strips off the opening and leading quote characters, replaces + * double backslashes by single backslashes and handles the usual escapes like + * \n and \". + * + * The caller must make sure that the input is well-formed. The function simply + * aborts if the input is not a valid C99 string literal (modulo the quote + * characters). + * + * The return value is the offset of the first character after the closing + * quote. For proper string literals this will be the terminating zero byte of + * the input string, for regular expression patterns it is the beginning of the + * flags which modify the matching behaviour. + */ +unsigned parse_quoted_string(const char *src, const char quote_chars[2], + char **result) +{ + size_t n, len = strlen(src); + char *dst, *p; + bool backslash; + + assert(len >= 2); + assert(src[0] == quote_chars[0]); + p = dst = xmalloc(len - 1); + backslash = false; + for (n = 1;; n++) { + char c; + assert(n < len); + c = src[n]; + if (!backslash) { + if (c == '\\') { + backslash = true; + continue; + } + if (c == quote_chars[1]) + break; + *p++ = c; + continue; + } + if (c == quote_chars[1]) + *p++ = quote_chars[1]; + else switch (c) { + case '\\': *p++ = '\\'; break; + case 'a': *p++ = '\a'; break; + case 'b': *p++ = '\b'; break; + case 'f': *p++ = '\f'; break; + case 'n': *p++ = '\n'; break; + case 'r': *p++ = '\r'; break; + case 't': *p++ = '\t'; break; + case 'v': *p++ = '\v'; break; + default: assert(false); + } + backslash = false; + } + assert(src[n] == quote_chars[1]); + *p = '\0'; + *result = dst; + return n + 1; +} + +/* + * Parse and compile an extended regular expression pattern, including flags. + * + * A regex pattern is identical to a C99 string literal except (a) it is + * enclosed in '/' characters rather than double quotes, (b) double quote + * characters which are part of the pattern do not need to be quoted with + * backslashes, but slashes must be quoted in this way, and (c) the closing + * slash may be followed by one or more flag characters which modify the + * matching behaviour. + * + * The only flags which are currently supported are 'i' to ignore case in match + * (REG_ICASE) and 'n' to change the handling of newline characters + * (REG_NEWLINE). + * + * This function calls parse_quoted_string(), hence it aborts if the input + * string is malformed. However, errors from regcomp(3) are returned without + * aborting the process. The rationale behind this difference is that passing a + * malformed string must be considered an implementation bug because malformed + * strings should be rejected earlier by the lexer. + */ +int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result) +{ + int ret; + char *pat; + unsigned n = parse_quoted_string(src, "//", &pat); + + result->flags = 0; + for (; src[n]; n++) { + switch (src[n]) { + case 'i': result->flags |= REG_ICASE; break; + case 'n': result->flags |= REG_NEWLINE; break; + default: assert(false); + } + } + ret = xregcomp(&result->preg, pat, result->flags); + free(pat); + return ret; +} + +static struct txp_ast_node *ast_node_raw(int id) +{ + struct txp_ast_node *node = xmalloc(sizeof(*node)); + node->id = id; + return node; +} + +/* This is non-static because it is also called from the lexer. */ +struct txp_ast_node *txp_new_ast_leaf_node(int id) +{ + struct txp_ast_node *node = ast_node_raw(id); + node->num_children = 0; + return node; +} + +struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child) +{ + struct txp_ast_node *node = ast_node_raw(id); + node->num_children = 1; + node->children = xmalloc(sizeof(struct txp_ast_node *)); + node->children[0] = child; + return node; +} + +struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left, + struct txp_ast_node *right) +{ + struct txp_ast_node *node = ast_node_raw(id); + node->num_children = 2; + node->children = xmalloc(2 * sizeof(struct txp_ast_node *)); + node->children[0] = left; + node->children[1] = right; + return node; +} + +void txp_free_ast(struct txp_ast_node *root) +{ + if (!root) + return; + if (root->num_children > 0) { + int i; + for (i = 0; i < root->num_children; i++) + txp_free_ast(root->children[i]); + free(root->children); + } else { + union txp_semantic_value *sv = &root->sv; + switch (root->id) { + case STRING_LITERAL: + free(sv->strval); + break; + case REGEX_PATTERN: + regfree(&sv->re_pattern.preg); + break; + } + } + free(root); +} + +static int eval_node(struct txp_ast_node *node, struct txp_context *ctx, + union txp_semantic_value *result); + +static void eval_binary_op(struct txp_ast_node *node, struct txp_context *ctx, + union txp_semantic_value *v1, union txp_semantic_value *v2) +{ + eval_node(node->children[0], ctx, v1); + eval_node(node->children[1], ctx, v2); +} + +static int eval_node(struct txp_ast_node *node, struct txp_context *ctx, + union txp_semantic_value *result) +{ + int ret; + union txp_semantic_value v1, v2; + + switch (node->id) { + /* strings */ + case STRING_LITERAL: + result->strval = node->sv.strval; + return ST_STRVAL; + /* integers */ + case NUM: + result->intval = node->sv.intval; + return ST_INTVAL; + case '+': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval + v2.intval; + return ST_INTVAL; + case '-': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval - v2.intval; + return ST_INTVAL; + case '*': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval * v2.intval; + return ST_INTVAL; + case '/': + eval_binary_op(node, ctx, &v1, &v2); + if (v2.intval == 0) { + static bool warned; + if (!warned) + ERROR_LOG("division by zero\n"); + warned = true; + result->intval = 0; + } else + result->intval = v1.intval / v2.intval; + return ST_INTVAL; + case NEG: + eval_node(node->children[0], ctx, &v1); + result->intval = -v1.intval; + return ST_INTVAL; + case NUM_LINES: + result->intval = ctx->num_lines; + return ST_INTVAL; + /* bools */ + case TRUE: + result->boolval = true; + return ST_BOOLVAL; + case FALSE: + result->boolval = false; + return ST_BOOLVAL; + case OR: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.boolval || v2.boolval; + return ST_BOOLVAL; + case AND: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.boolval && v2.boolval; + return ST_BOOLVAL; + case NOT: + eval_node(node->children[0], ctx, &v1); + result->boolval = !v1.boolval; + return ST_BOOLVAL; + case EQUAL: + ret = eval_node(node->children[0], ctx, &v1); + eval_node(node->children[1], ctx, &v2); + if (ret == ST_STRVAL) + result->boolval = !strcmp(v1.strval, v2.strval); + else + result->boolval = v1.intval == v2.intval; + return ST_BOOLVAL; + case NOT_EQUAL: + ret = eval_node(node->children[0], ctx, &v1); + eval_node(node->children[1], ctx, &v2); + if (ret == ST_STRVAL) + result->boolval = strcmp(v1.strval, v2.strval); + else + result->boolval = v1.intval != v2.intval; + return ST_BOOLVAL; + case '<': + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval < v2.intval; + return ST_BOOLVAL; + case '>': + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval > v2.intval; + return ST_BOOLVAL; + case LESS_OR_EQUAL: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval <= v2.intval; + return ST_BOOLVAL; + case GREATER_OR_EQUAL: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval >= v2.intval; + return ST_BOOLVAL; + case REGEX_MATCH: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = regexec(&v2.re_pattern.preg, v1.strval, + 0, NULL, 0) == 0; + return ST_BOOLVAL; + case REGEX_PATTERN: + result->re_pattern = node->sv.re_pattern; + return ST_REGEX_PATTERN; + default: + EMERG_LOG("bug: invalid node id %d\n", node->id); + exit(EXIT_FAILURE); + } +} + +bool txp_eval_ast(struct txp_ast_node *root, struct txp_context *ctx) +{ + union txp_semantic_value v; + int ret = eval_node(root, ctx, &v); + + if (ret == ST_INTVAL) + return v.intval != 0; + if (ret == ST_STRVAL) + return v.strval[0] != 0; + if (ret == ST_BOOLVAL) + return v.boolval; + assert(false); +} + +int txp_yylex_init(txp_yyscan_t *yyscanner); +struct yy_buffer_state *txp_yy_scan_bytes(const char *buf, int len, + txp_yyscan_t yyscanner); +void txp_yy_delete_buffer(struct yy_buffer_state *bs, txp_yyscan_t yyscanner); +int txp_yylex_destroy(txp_yyscan_t yyscanner); +void txp_yyset_lineno(int lineno, txp_yyscan_t scanner); + +/* + * Initialize the tag expression parser. + * + * This allocates and sets up the internal structures of the tag expression + * parser and creates an abstract syntax tree from the given epigram (including + * the tags). It must be called before txp_eval_ast() can be called. + * + * The context pointer returned by this function may be passed to mp_eval_ast() + * to determine whether an epigram is admissible. + * + * The error message pointer may be NULL in which case no error message is + * returned. Otherwise, the caller must free the returned string. + */ +int txp_init(const char *definition, int nbytes, struct txp_context **result, + char **errmsg) +{ + int ret; + txp_yyscan_t scanner; + struct txp_context *ctx; + struct yy_buffer_state *buffer_state; + + ctx = xcalloc(sizeof(*ctx)); + ret = txp_yylex_init(&scanner); + assert(ret == 0); + buffer_state = txp_yy_scan_bytes(definition, nbytes, scanner); + txp_yyset_lineno(1, scanner); + NOTICE_LOG("creating abstract syntax tree from tag expression\n"); + ret = txp_yyparse(ctx, &ctx->ast, scanner); + txp_yy_delete_buffer(buffer_state, scanner); + txp_yylex_destroy(scanner); + if (ctx->errmsg) { /* parse error */ + if (errmsg) + *errmsg = ctx->errmsg; + else + free(ctx->errmsg); + free(ctx); + return -E_TXP; + } + if (errmsg) + *errmsg = NULL; + *result = ctx; + return 1; +} diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..72b2fab --- /dev/null +++ b/autogen.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +# SPDX-License-Identifier: GPL-2.0 + +autoheader && autoconf diff --git a/config.mak.in b/config.mak.in new file mode 100644 index 0000000..20ddfe6 --- /dev/null +++ b/config.mak.in @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 + +prefix := @prefix@ +exec_prefix := @exec_prefix@ + +# These two use prefix and exec_prefix +bindir := @bindir@ +datarootdir := @datarootdir@ + +PACKAGE_TARNAME := @PACKAGE_TARNAME@ +PACKAGE_VERSION := @PACKAGE_VERSION@ + +FLEX := @FLEX@ +BISON := @BISON@ +M4 := @M4@ +LOPSUBGEN := @LOPSUBGEN@ diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..845315f --- /dev/null +++ b/configure.ac @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 + +AC_PREREQ([2.61]) + +AC_INIT([tfortune], [m4_esyscmd_s(./version-gen.sh)], + [maan@tuebingen.mpg.de], [], [http://people.tuebingen.mpg.de/maan/tfortune/]) +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_FILES([config.mak]) +AC_USE_SYSTEM_EXTENSIONS +AC_PROG_CC +AC_PROG_CPP + +AC_DEFUN([REQUIRE_EXECUTABLE], [ + AC_PATH_PROG(m4_toupper([$1]), [$1]) + test -z "$m4_toupper([$1])" && AC_MSG_ERROR([$1 is required]) +]) +REQUIRE_EXECUTABLE([flex]) +REQUIRE_EXECUTABLE([bison]) +REQUIRE_EXECUTABLE([lopsubgen]) + +HAVE_LOPSUB=yes +AC_CHECK_HEADER(lopsub.h, [], [HAVE_LOPSUB=no]) +AC_CHECK_LIB([lopsub], [lls_merge], [], [HAVE_LOPSUB=no]) +if test $HAVE_LOPSUB = no; then AC_MSG_ERROR([ + The lopsub library is required to build this software, but + the above checks indicate it is not installed on your system. + Run the following command to download a copy. + git clone git://git.tuebingen.mpg.de/lopsub.git + Install the library, then run this configure script again. +]) +fi +AC_OUTPUT diff --git a/err.h b/err.h new file mode 100644 index 0000000..897deee --- /dev/null +++ b/err.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define TF_ERRORS \ + TF_ERROR(SUCCESS, "success"), \ + TF_ERROR(ATOI_OVERFLOW, "value too large"), \ + TF_ERROR(ATOI_NO_DIGITS, "no digits found in string"), \ + TF_ERROR(ATOI_JUNK_AT_END, "further characters after number"), \ + TF_ERROR(TXP, "tag expression parse error"), \ + TF_ERROR(REGEX, "regular expression error"), \ + TF_ERROR(LOPSUB, "lopsub error"), \ + +/* + * This is temporarily defined to expand to its first argument (prefixed by + * 'E_') and gets later redefined to expand to the error text only + */ +#define TF_ERROR(err, msg) E_ ## err +enum tf_error_codes {TF_ERRORS}; +#undef TF_ERROR +#define TF_ERROR(err, msg) msg +#define DEFINE_TF_ERRLIST char *tf_errlist[] = {TF_ERRORS} + +extern char *tf_errlist[]; + +/** + * This bit indicates whether a number is considered a system error number + * If yes, the system errno is just the result of clearing this bit from + * the given number. + */ +#define SYSTEM_ERROR_BIT 30 + +/** Check whether the system error bit is set. */ +#define IS_SYSTEM_ERROR(num) (!!((num) & (1 << SYSTEM_ERROR_BIT))) + +/** Set the system error bit for the given number. */ +#define ERRNO_TO_TF_ERROR(num) ((num) | (1 << SYSTEM_ERROR_BIT)) + +static inline char *tf_strerror(int num) +{ + assert(num > 0); + if (IS_SYSTEM_ERROR(num)) + return strerror((num) & ((1 << SYSTEM_ERROR_BIT) - 1)); + else + return tf_errlist[num]; +} + diff --git a/list.h b/list.h new file mode 100644 index 0000000..ab52253 --- /dev/null +++ b/list.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copied from the Linux kernel source tree, version 2.6.13. + * + * Licensed under the GPL v2 as per the whole kernel source tree. + * + */ + +/** \file list.h doubly linked list implementation */ + +#include /* offsetof */ + +/** get the struct this entry is embedded in */ +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +/** + * Non-NULL pointers that will result in page faults under normal + * circumstances, used to verify that nobody uses non-initialized list entries. + * Used for poisoning the \a next pointer of struct list_head. + */ +#define LIST_POISON1 ((void *) 0x00100100) +/** Non-null pointer, used for poisoning the \a prev pointer of struct + * list_head + */ +#define LIST_POISON2 ((void *) 0x00200200) + +/** Simple doubly linked list implementation. */ +struct list_head { + /** pointer to the next list entry */ + struct list_head *next; + /** pointer to the previous list entry */ + struct list_head *prev; +}; + +/** Define an initialized list head. */ +#define INITIALIZED_LIST_HEAD(name) struct list_head name = { &(name), &(name) } + + +/** must be called before using any other list functions */ +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + + +/* + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * add a new entry + * + * \param new new entry to be added + * \param head list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * add a new entry + * + * \param new new entry to be added + * \param head list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * Delete entry from list. + * + * \param entry the element to delete from the list. + * + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} + +/** + * delete from one list and add as another's head + * + * \param list: the entry to move + * \param head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + _list_add(list, head); +} + +/** + * test whether a list is empty + * + * \param head the list to test. + */ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +/** + * get the struct for this entry + * + * \param ptr the &struct list_head pointer. + * \param type the type of the struct this is embedded in. + * \param member the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * iterate over list of given type + * + * \param pos the type * to use as a loop counter. + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * iterate over list of given type safe against removal of list entry + * + * \param pos the type * to use as a loop counter. + * \param n another type * to use as temporary storage + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) +/** + * iterate backwards over list of given type safe against removal of list entry + * \param pos the type * to use as a loop counter. + * \param n another type * to use as temporary storage + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe_reverse(pos, n, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member), \ + n = list_entry(pos->member.prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.prev, typeof(*n), member)) + +/** + * Get the first element from a list + * \param ptr the list head to take the element from. + * \param type The type of the struct this is embedded in. + * \param member The name of the list_struct within the struct. + * + * Note that list is expected to be not empty. + */ +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +/** + * Test whether a list has just one entry. + * + * \param head The list to test. + */ +static inline int list_is_singular(const struct list_head *head) +{ + return !list_empty(head) && (head->next == head->prev); +} + diff --git a/logo.svg b/logo.svg new file mode 100644 index 0000000..f64d714 --- /dev/null +++ b/logo.svg @@ -0,0 +1,30 @@ +<-- SPDX-License-Identifier: GPL-2.0 --> + + + + + + + + + diff --git a/tf.h b/tf.h new file mode 100644 index 0000000..4c01764 --- /dev/null +++ b/tf.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "err.h" + +/* Opaque, only known to ast.c. Passed to the generated txp_yyparse(). */ +struct txp_context; + +/* + * Since we use a reentrant lexer, all functions generated by flex(1) + * receive an additional argument of this type. + */ +typedef void *txp_yyscan_t; + +/* Parsed regex pattern. */ +struct txp_re_pattern { + regex_t preg; /* Pre-compiled regex. */ + unsigned flags; /* Subset of the cflags described in regex(3). */ +}; + +/* + * The possible values of a node in the abstract syntax tree (AST). + * + * Constant semantic values (string literals, numeric constants and regex + * patterns which are part of the tag expression) are determined during + * txp_init() while values which depend on the epigram (tags, number of lines, + * etc.) are determined during txp_eval_row(). + * + * This union, and the txp_ast_node structure below are used extensively in + * txp.y. However, both need to be public because the lexer must be able to + * create AST nodes for the constant semantic values. + */ +union txp_semantic_value { + bool boolval; /* Comparators, =~ and =|. */ + char *strval; /* String literals, tags, path. */ + int64_t intval; /* Constants, bitrate, frequency, etc. */ + struct txp_re_pattern re_pattern; /*< Right-hand side operand of =~. */ +}; + +/* + * A node is either interior or a leaf node. Interior nodes have at least one + * child while leaf nodes have a semantic value and no children. + * + * Examples: (a) STRING_LITERAL has a semantic value (the unescaped string + * literal) and no children, (b) NEG (unary minus) has no semantic value but + * one child (the numeric expression that is to be negated), (c) LESS_OR_EQUAL + * has no semantic value and two children (the two numeric expressions being + * compared). + */ +struct txp_ast_node { + /* Corresponds to a token type, for example LESS_OR_EQUAL. */ + int id; + union { + /* Pointers to the child nodes (interior nodes only). */ + struct txp_ast_node **children; + /* Leaf nodes only. */ + union txp_semantic_value sv; + }; + /* + * The number of children is implicitly given by the id, but we include + * it here to avoid having to maintain a lookup table. The AST is + * usually small, so we can afford to waste a byte per node. + */ + uint8_t num_children; +}; + +enum loglevels {LOGLEVELS, NUM_LOGLEVELS}; +#define DEBUG_LOG(f,...) txp_log(LL_DEBUG, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define INFO_LOG(f,...) txp_log(LL_INFO, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define NOTICE_LOG(f,...) txp_log(LL_NOTICE, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define WARNING_LOG(f,...) txp_log(LL_WARNING, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define ERROR_LOG(f,...) txp_log(LL_ERROR, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define CRIT_LOG(f,...) txp_log(LL_CRIT, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define EMERG_LOG(f,...) txp_log(LL_EMERG, "%s: " f, __FUNCTION__, ## __VA_ARGS__) + +/* tfortune.c */ + +void txp_log(int ll, const char* fmt,...); + +/* Called from both the lexer and the parser. */ +__attribute__ ((format (printf, 3, 4))) +void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...); + +/* Helper functions for the lexer. */ +unsigned parse_quoted_string(const char *src, const char quote_chars[2], + char **result); +int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result); + +/* ast.c */ +struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child); +struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left, + struct txp_ast_node *right); + +/* + * Allocate a new leaf node for the abstract syntax tree. + * + * This returns a pointer to a node whose ->num_children field is initialized + * to zero. The ->id field is initialized with the given id. The caller is + * expected to initialize the ->sv field. + */ +struct txp_ast_node *txp_new_ast_leaf_node(int id); + +/* + * Evaluate an abstract syntax tree, starting at the root node. + * + * The root node argument should be the pointer that was returned from an + * earlier call to txp_init() via the context pointer. The context contains the + * information about the epigram. + * + * Returns true if the AST evaluates to true, a non-empty string, or a non-zero + * number, false otherwise. + */ +bool txp_eval_ast(struct txp_ast_node *root, struct txp_context *ctx); + +/* + * Deallocate an abstract syntax tree. + * + * This frees the memory occupied by the nodes of the AST, the child pointers + * of the internal nodes and the (constant) semantic values of the leaf nodes + * (string literals and pre-compiled regular expressions). + */ +void txp_free_ast(struct txp_ast_node *root); + + +/* util.c */ +int atoi64(const char *str, int64_t *value); +unsigned xvasprintf(char **result, const char *fmt, va_list ap); +unsigned xasprintf(char **result, const char *fmt, ...); +void *xmalloc(size_t size); +void *xcalloc(size_t size); +int xregcomp(regex_t *preg, const char *regex, int cflags); + +/* txp.c */ + +/* txp.y. */ + diff --git a/tfortune.c b/tfortune.c new file mode 100644 index 0000000..7447059 --- /dev/null +++ b/tfortune.c @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tf.h" +#include "tfortune.lsg.h" + +#define TF_SEP "---- " + +struct tf_map { + void *map; + off_t len; +}; + +struct tf_cookie { + unsigned input_num; + off_t offset, len; +}; + +static void mmap_file(const char *path, struct tf_map *map) +{ + int fd, ret; + + ret = open(path, 0); + if (ret < 0) { + perror("open"); + exit(EXIT_FAILURE); + } + fd = ret; + map->len = lseek(fd, 0, SEEK_END); + if (map->len == (off_t)-1) { + perror("lseek"); + exit(EXIT_FAILURE); + } + map->map = mmap(NULL, map->len, PROT_READ, MAP_PRIVATE, fd, 0); + if (map->map == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + ret = close(fd); + assert(ret >= 0); +} + +static bool tag_given(const char *tag, const char *tags, size_t sz) +{ + bool found = false; + char *p, *str = strndup(tags, sz); + + assert(str); + p = strtok(str, ","); + while (p) { + //fprintf(stderr, "sz: %zu, compare: %s <-> %.*s\n", sz, tag, (int) sz, p); + if (strcmp(p, tag) == 0) { + found = true; + break; + } + p = strtok(NULL, ","); + } + free(str); + return found; +} + +static bool tags_ok(const char *tags, size_t sz, struct lls_parse_result *lpr) +{ + unsigned n, given; + const struct lls_opt_result *r; + const char *arg; + + r = lls_opt_result(LSG_TFORTUNE_TFORTUNE_OPT_ACCEPT, lpr); + given = lls_opt_given(r); + if (given == 0) + goto check_reject; + /* check if any of the given accept tags is set */ + for (n = 0; n < given; n++) { + arg = lls_string_val(n, r); + if (tag_given(arg, tags, sz)) + goto check_reject; + } + return false; /* accept tag(s) given, but none was set */ +check_reject: + /* check if none of the given reject tags is set */ + r = lls_opt_result(LSG_TFORTUNE_TFORTUNE_OPT_REJECT, lpr); + given = lls_opt_given(r); + for (n = 0; n < given; n++) { + arg = lls_string_val(n, r); + if (tag_given(arg, tags, sz)) + return false; + } + return true; +} + +static void print_tags(const char *tags, size_t sz, FILE *f) +{ + char *p, *str = strndup(tags, sz); + + assert(str); + p = strtok(str, ","); + while (p) { + fprintf(f, "%s\n", p); + p = strtok(NULL, ","); + } + free(str); +} + +static void print_random_cookie(const struct tf_cookie *cookies, + unsigned num_cookies, const struct tf_map *maps) +{ + long unsigned r; + const struct tf_cookie *cookie; + struct timeval tv; + + if (num_cookies == 0) { + fprintf(stderr, "no matching cookie\n"); + return; + } + gettimeofday(&tv, NULL); + srandom((unsigned)tv.tv_usec); + r = ((num_cookies + 0.0) * (random() / (RAND_MAX + 1.0))); + cookie = cookies + r; + assert(r < num_cookies); + printf("%.*s", (int)cookie->len, + (char *)maps[cookie->input_num].map + cookie->offset); +} + +static void make_cookies(struct tf_map *maps, struct lls_parse_result *lpr) +{ + struct tf_cookie *cookies = NULL; + unsigned n, cookies_size = 0, num_cookies = 0, num_inputs; + size_t sep_len = strlen(TF_SEP); + const struct lls_opt_result *r_s; + FILE *f = NULL; + + r_s = lls_opt_result(LSG_TFORTUNE_TFORTUNE_OPT_STATISTICS, lpr); + if (lls_opt_given(r_s)) { + f = popen("sort | uniq -c | sort -n", "w"); + assert(f); + } + num_inputs = lls_num_inputs(lpr); + for (n = 0; n < num_inputs; n++) { + struct tf_map *m = maps + n; + const char *start = m->map, *end = m->map + m->len; + const char *buf = start, *cookie_start = start; + + while (buf < end) { + struct tf_cookie *cookie; + const char *p, *cr, *tags; + size_t sz; + + cr = memchr(buf, '\n', end - buf); + if (!cr) + break; + p = cr + 1; + if (!cookie_start) + cookie_start = p; + if (p + sep_len >= end) + break; + if (strncmp(p, TF_SEP, sep_len) != 0) { + buf = p; + continue; + } + tags = p + sep_len; + cr = memchr(tags, '\n', end - tags); + if (cr) + sz = cr - tags; + else + sz = end - tags; + if (!tags_ok(tags, sz, lpr)) { + if (!cr) + break; + buf = cr; + cookie_start = NULL; + continue; + } + num_cookies++; + if (lls_opt_given(r_s)) { + print_tags(tags, sz, f); + if (!cr) + break; + buf = cr; + continue; + } + if (num_cookies > cookies_size) { + cookies_size = 2 * cookies_size + 1; + cookies = realloc(cookies, + cookies_size * sizeof(*cookies)); + assert(cookies); + } + cookie = cookies + num_cookies - 1; + cookie->input_num = n; + cookie->offset = cookie_start - start; + cookie->len = p - cookie_start; + buf = p + sep_len; + cookie_start = NULL; + } + } + if (f) + pclose(f); + if (!lls_opt_given(r_s)) + print_random_cookie(cookies, num_cookies, maps); + else + printf("num cookies: %d\n", num_cookies); + free(cookies); +} + +int main(int argc, char **argv) +{ + char *errctx; + struct lls_parse_result *lpr; + int ret; + struct tf_map *maps; + unsigned n, num_inputs; + const struct lls_command *cmd = lls_cmd(0, tfortune_suite); + + ret = lls_parse(argc, argv, cmd, &lpr, &errctx); + if (ret < 0) { + fprintf(stderr, "%s: %s\n", errctx? errctx : "", + lls_strerror(-ret)); + free(errctx); + exit(EXIT_FAILURE); + } + if (lls_num_inputs(lpr) == 0) { + char *help = lls_short_help(cmd); + fprintf(stderr, "%s\n", help); + free(help); + ret = 0; + goto free_lpr; + } + num_inputs = lls_num_inputs(lpr); + maps = xmalloc(num_inputs * sizeof(*maps)); + for (n = 0; n < num_inputs; n++) + mmap_file(lls_input(n, lpr), maps + n); + make_cookies(maps, lpr); + free(maps); + ret = EXIT_SUCCESS; +free_lpr: + lls_free_parse_result(lpr, cmd); + return ret; +} diff --git a/tfortune.suite b/tfortune.suite new file mode 100644 index 0000000..6545865 --- /dev/null +++ b/tfortune.suite @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: GPL-2.0 + +[suite tfortune] +[supercommand tfortune] + purpose = fortune cookies with tags + [description] + Like fortune(1), tfortune prints out a random epigram. However, + the epigrams in the input files must contain additional information, + called "tags". The program picks only epigrams which are considered + "admissible" based on the tags and the command line options. + [/description] + non-opts-name = ... + [option accept] + short_opt = a + summary = epigrams which contain this tag are admissible + arg_info = required_arg + arg_type = string + typestr = tag + flag multiple + [help] + This option may be given multiple times. + [/help] + [option reject] + short_opt = r + summary = epigrams which contain this tag are inadmissible + arg_info = required_arg + arg_type = string + typestr = tag + flag multiple + [help] + Like --accept, this may be given multiple times. See the discussion + below how --accept and --reject interact. + [/help] + [option statistics] + short_opt = s + summary = print tags found in input files + [help] + If this option is specified, tfortune does not print any + epigrams. Instead, it prints all tags found in the given input file(s), + together with how many time each tag occurred. The list is sorted by + occurence count. + [/help] + [option accept-reject] + summary = Admissible epigrams + flag ignored + [help] + tfortune picks a random epigram from the set of admissible epigrams, + which is computed as follows. If neither --accept nor --reject are + specified, all epigrams are considered admissible. If only --accept + is specified, epigrams with at least one tag given as an argument to + --accept are admissible, all others are inadmissible. Similarly, if + only --reject is specified, epigrams with at least one tag given as an + argument to --reject are inadmissible, all others are admissible. If + both --accept and --reject are specified, an epigram is admissible + if and only if it has at least one tag which is given as an argument + to --accept but no tag which is given as an argument to --reject. + [/help] + [option format] + summary = Input file format + flag ignored + [help] + Input files may contain arbitrary many epigrams. The end of each + epigram must be marked with a "tag" line. The tag line consists of + four dashes, a space character, and a comma separated list of tags. + Tags may span multiple words, but no comma is allowed. + [/help] + +[section example] +The following is an example input file for tfortune. It contains a single epigram +with two tags. + +.RS +.EX +Anyone who attempts to generate random numbers by deterministic means +is, of course, living in a state of sin. -- John von Neumann +---- math,religion +.EE +.RE +[/section] + +[section copyright] + Written by Andre Noll + .br + Copyright (C) 2016-present Andre Noll + .br + License: GNU GPL version 3 + .br + This is free software: you are free to change and redistribute it. + .br + There is NO WARRANTY, to the extent permitted by law. + .br + Report bugs to + .MT + Andre Noll + .ME + .br + Homepage: + .UR http://people.tuebingen.mpg.de/~maan/tfortune/ + .UE +[/section] +[section see also] + .BR fortune (6) +[/section] diff --git a/txp.lex b/txp.lex new file mode 100644 index 0000000..4b04fb3 --- /dev/null +++ b/txp.lex @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + + /* + * Since we do not supply yywrap(), we use noyywrap to instruct the scanner to + * behave as though yywrap() returned 1. + */ +%option noyywrap + + /* + * We don't want symbols to clash with those of other flex users, particularly + * lopsub. + */ +%option prefix="txp_yy" + + /* + * Generate a scanner that maintains the number of the current line read from + * its input in the yylineno variable. + */ +%option yylineno + + /* Generate a bison-compatible scanner. */ +%option bison-bridge bison-locations + + /* + * Warn (in particular) if the default rule can be matched but no default rule + * has been given. + */ +%option warn + + /* + * Generate a scanner which is portable and safe to use in one or more threads + * of control. + */ +%option reentrant + + /* + * Generate a scanner which always looks one extra character ahead. This is a + * bit faster than an interactive scanner for which look ahead happens only + * when necessary. + */ +%option never-interactive + +%{ +#include "tf.h" + +#define YYSTYPE TXP_YYSTYPE +#define YYLTYPE TXP_YYLTYPE +#define YY_DECL int txp_yylex(TXP_YYSTYPE *yylval_param, TXP_YYLTYPE *yylloc_param, \ + struct txp_context *ctx, struct txp_ast_node **ast, txp_yyscan_t yyscanner) +#include "txp.bison.h" +#define TXP_YY_USER_ACTION do {txp_yylloc->first_line = txp_yylineno;} while (0); +%} +DECIMAL_CONSTANT (0|([[:digit:]]{-}[0])[[:digit:]]*) +STRING_LITERAL \"([^\"\\\n]|(\\[\"\\abfnrtv]))*\" +REGEX_PATTERN \/([^\/\\\n]|(\\[\/\\abfnrtv]))*\/([in])* +%% + +tag {return TAG;} +num_lines {return NUM_LINES;} +true {return TRUE;} +false {return FALSE;} + +[[:space:]]+|#.*\n /* skip comments and whitespace */ + +"("|")"|","|"+"|"-"|"*"|"/"|"<"|">" {return yytext[0];} + +"||" {return OR;} +"&&" {return AND;} +"!" {return NOT;} +"==" {return EQUAL;} +"!=" {return NOT_EQUAL;} +"<=" {return LESS_OR_EQUAL;} +">=" {return GREATER_OR_EQUAL;} +"=~" {return REGEX_MATCH;} + +{DECIMAL_CONSTANT} { + int ret; + yylval->node = txp_new_ast_leaf_node(NUM); + ret = atoi64(yytext, &yylval->node->sv.intval); + if (ret < 0) { + free(yylval->node); + txp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext, + tf_strerror(-ret)); + return -E_TXP; + } + return NUM; +} + +{STRING_LITERAL} { + yylval->node = txp_new_ast_leaf_node(STRING_LITERAL); + parse_quoted_string(yytext, "\"\"", &yylval->node->sv.strval); + return STRING_LITERAL; +} + +{REGEX_PATTERN} { + int ret; + yylval->node = txp_new_ast_leaf_node(REGEX_PATTERN); + ret = txp_parse_regex_pattern(yytext, &yylval->node->sv.re_pattern); + if (ret < 0) { + txp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext, + tf_strerror(-ret)); + return -E_TXP; + } + return REGEX_PATTERN; +} + +. { + txp_parse_error(yylloc->first_line, ctx, "unrecognized text: %s", + yytext); + return -E_TXP; +} diff --git a/txp.y b/txp.y new file mode 100644 index 0000000..578b2a4 --- /dev/null +++ b/txp.y @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Provide more verbose and specific error messages instead of just "syntax + * error". + */ +%define parse.error verbose + +/* + * Verbose error messages may contain incorrect information if LAC (Lookahead + * Correction) is not enabled. + */ +%define parse.lac full + +/* Avoid symbol clashes (lopsub might also expose yy* symbols). */ +%define api.prefix {txp_yy} + +/* + * Although locations are automatically enabled as soon as the grammar uses the + * special @N tokens, specifying %locations explicitly allows for more accurate + * syntax error messages. + */ +%locations + +/* + * Generate a pure (reentrant) parser. With this option enabled, yylval and + * yylloc become local variables in yyparse(), and a different calling + * convention is used for yylex(). + */ +%define api.pure full + +/* Additional arguments to yylex(), yyparse() and yyerror() */ +%param {struct txp_context *ctx} +%param {struct txp_ast_node **ast} +%param {txp_yyscan_t yyscanner} /* reentrant lexers */ + +%{ +#include "tf.h" +#include "txp.bison.h" + +int yylex(TXP_YYSTYPE *lvalp, TXP_YYLTYPE *llocp, struct txp_context *ctx, + struct txp_ast_node **ast, txp_yyscan_t yyscanner); +static void yyerror(YYLTYPE *llocp, struct txp_context *ctx, + struct txp_ast_node **ast, txp_yyscan_t yyscanner, const char *msg); + +%} + +%union { + struct txp_ast_node *node; +} + +/* terminals */ +%token NUM +%token STRING_LITERAL +%token REGEX_PATTERN + +/* keywords with semantic value */ +%token NUM_LINES +%token FALSE TRUE + +/* keywords without semantic value */ +%token TAG + +/* operators, ordered by precendence */ +%left OR +%left AND +%left EQUAL NOT_EQUAL +%left LESS_THAN LESS_OR_EQUAL GREATER_OR_EQUAL REGEX_MATCH FILENAME_MATCH +%left '-' '+' +%left '*' '/' +%right NOT NEG /* negation (unary minus) */ + +/* nonterminals */ +%type string +%type exp +%type boolexp + +%% + +program: + /* empty */ {*ast = NULL; return 0;} + | string {*ast = $1; return 0;} + | exp {*ast = $1; return 0;} + | boolexp {*ast = $1; return 0;} + +string: STRING_LITERAL {$$ = $1;} +; + +exp: NUM {$$ = $1;} + | exp '+' exp {$$ = ast_node_new_binary('+', $1, $3);} + | exp '-' exp {$$ = ast_node_new_binary('-', $1, $3);} + | exp '*' exp {$$ = ast_node_new_binary('*', $1, $3);} + | exp '/' exp {$$ = ast_node_new_binary('/', $1, $3);} + | '-' exp %prec NEG {$$ = ast_node_new_unary(NEG, $2);} + | '(' exp ')' {$$ = $2;} + | NUM_LINES {$$ = txp_new_ast_leaf_node(NUM_LINES);} +; + +boolexp: TRUE {$$ = txp_new_ast_leaf_node(TRUE);} + | FALSE {$$ = txp_new_ast_leaf_node(FALSE);} + | '(' boolexp ')' {$$ = $2;} + | boolexp OR boolexp {$$ = ast_node_new_binary(OR, $1, $3);} + | boolexp AND boolexp {$$ = ast_node_new_binary(AND, $1, $3);} + | NOT boolexp {$$ = ast_node_new_unary(NOT, $2);} + | exp EQUAL exp {$$ = ast_node_new_binary(EQUAL, $1, $3);} + | exp NOT_EQUAL exp {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);} + | exp '<' exp {$$ = ast_node_new_binary('<', $1, $3);} + | exp '>' exp {$$ = ast_node_new_binary('>', $1, $3);} + | exp LESS_OR_EQUAL exp { + $$ = ast_node_new_binary(LESS_OR_EQUAL, $1, $3); + } + | exp GREATER_OR_EQUAL exp { + $$ = ast_node_new_binary(GREATER_OR_EQUAL, $1, $3); + } + | string REGEX_MATCH REGEX_PATTERN { + $$ = ast_node_new_binary(REGEX_MATCH, $1, $3); + } + | string EQUAL string {$$ = ast_node_new_binary(EQUAL, $1, $3);} + | string NOT_EQUAL string {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);} +; +%% + +/* Called by yyparse() on error */ +static void yyerror(YYLTYPE *llocp, struct txp_context *ctx, + __attribute__ ((unused)) struct txp_ast_node **ast, + __attribute__ ((unused)) txp_yyscan_t yyscanner, + const char *msg) +{ + txp_parse_error(llocp->first_line, ctx, "%s", msg); +} diff --git a/util.c b/util.c new file mode 100644 index 0000000..8645889 --- /dev/null +++ b/util.c @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "tf.h" + +DEFINE_TF_ERRLIST; + +int atoi64(const char *str, int64_t *value) +{ + char *endptr; + long long tmp; + + errno = 0; /* To distinguish success/failure after call */ + tmp = strtoll(str, &endptr, 10); + if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN)) + return -E_ATOI_OVERFLOW; + /* + * If there were no digits at all, strtoll() stores the original value + * of str in *endptr. + */ + if (endptr == str) + return -E_ATOI_NO_DIGITS; + /* + * The implementation may also set errno and return 0 in case no + * conversion was performed. + */ + if (errno != 0 && tmp == 0) + return -E_ATOI_NO_DIGITS; + if (*endptr != '\0') /* Further characters after number */ + return -E_ATOI_JUNK_AT_END; + *value = tmp; + return 1; +} + +__attribute__ ((warn_unused_result)) +void *xrealloc(void *p, size_t size) +{ + /* + * No need to check for NULL pointers: If p is NULL, the call + * to realloc is equivalent to malloc(size) + */ + assert(size); + if (!(p = realloc(p, size))) { + EMERG_LOG("realloc failed (size = %zu), aborting\n", size); + exit(EXIT_FAILURE); + } + return p; +} + +__attribute__ ((warn_unused_result)) +void *xmalloc(size_t size) +{ + return xrealloc(NULL, size); +} + +__attribute__ ((warn_unused_result)) +void *xcalloc(size_t size) +{ + void *p = xmalloc(size); + memset(p, 0, size); + return p; +} + +/* + * Print a formated message to a dynamically allocated string. + * + * This function is similar to vasprintf(), a GNU extension which is not in C + * or POSIX. It allocates a string large enough to hold the output including + * the terminating null byte. The allocated string is returned via the first + * argument and must be freed by the caller. However, unlike vasprintf(), this + * function calls exit() if insufficient memory is available, while vasprintf() + * returns -1 in this case. + * + * It returns the number of bytes written, not including the terminating \p + * NULL character. + */ +__attribute__ ((format (printf, 2, 0))) +unsigned xvasprintf(char **result, const char *fmt, va_list ap) +{ + int ret; + size_t size = 150; + va_list aq; + + *result = xmalloc(size + 1); + va_copy(aq, ap); + ret = vsnprintf(*result, size, fmt, aq); + va_end(aq); + assert(ret >= 0); + if ((size_t)ret < size) + return ret; + size = ret + 1; + *result = xrealloc(*result, size); + va_copy(aq, ap); + ret = vsnprintf(*result, size, fmt, aq); + va_end(aq); + assert(ret >= 0 && (size_t)ret < size); + return ret; +} + +__attribute__ ((format (printf, 2, 3))) +/* Print to a dynamically allocated string, variable number of arguments. */ +unsigned xasprintf(char **result, const char *fmt, ...) +{ + va_list ap; + unsigned ret; + + va_start(ap, fmt); + ret = xvasprintf(result, fmt, ap); + va_end(ap); + return ret; +} + +/* + * Compile a regular expression. + * + * This simple wrapper calls regcomp(3) and logs a message on errors. + */ +int xregcomp(regex_t *preg, const char *regex, int cflags) +{ + char *buf; + size_t size; + int ret = regcomp(preg, regex, cflags); + + if (ret == 0) + return 1; + size = regerror(ret, preg, NULL, 0); + buf = xmalloc(size); + regerror(ret, preg, buf, size); + ERROR_LOG("%s\n", buf); + free(buf); + return -E_REGEX; +} + +static int loglevel_arg_val; + +__attribute__ ((format (printf, 2, 3))) +void txp_log(int ll, const char* fmt,...) +{ + va_list argp; + if (ll < loglevel_arg_val) + return; + va_start(argp, fmt); + vfprintf(stderr, fmt, argp); + va_end(argp); +} diff --git a/version-gen.sh b/version-gen.sh new file mode 100755 index 0000000..5c6b965 --- /dev/null +++ b/version-gen.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# SPDX-License-Identifier: GPL-2.0 + +version_file='version.c' +ver='unnamed_version' +# First try git, then gitweb, then default. +if [ -e '.git' -o -e '../.git' ]; then + git_ver=$(git describe --abbrev=4 HEAD 2>/dev/null) + [ -z "$git_ver" ] && git_ver="$ver" + # update stat information in index to match working tree + git update-index -q --refresh > /dev/null + # if there are differences (exit code 1), the working tree is dirty + git diff-index --quiet HEAD || git_ver=$git_ver-dirty + ver=$git_ver +elif [ "${PWD%%-*}" = 'tfortune-' ]; then + ver=${PWD##*/tfortune-} +fi +ver=${ver#v} + +echo "$ver" + +# update version file if necessary +content="const char *lls_version(void) {return \"$ver\";};" +[ -r "$version_file" ] && echo "$content" | cmp -s - $version_file && exit 0 +echo >&2 "new git version: $ver" +echo "$content" > $version_file -- 2.39.5