From: Andre Noll Date: Thu, 11 Jan 2018 22:40:23 +0000 (+0100) Subject: initial X-Git-Tag: initial X-Git-Url: http://git.tue.mpg.de/?a=commitdiff_plain;h=b49fe2bb7608a623a44d5ba45beec0087bc90c1d;p=tfortune.git initial --- b95de7e4b629c3ffb90ae09b53d0178285e73629 diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..87528ee --- /dev/null +++ b/INSTALL @@ -0,0 +1,10 @@ +Dependencies: autoconf, gnu make, flex, bison, gcc or clang, lopsub + +Run + + ./autogen.sh && ./configure && make && sudo make install + +to build and install this software. + +The configure script checks if all required dependencies are installed +and prints an error message if one of them is missing. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..eb561ac --- /dev/null +++ b/Makefile @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: GPL-2.0 + +.SUFFIXES: +MAKEFLAGS += -Rr +.ONESHELL: +.SHELLFLAGS := -ec + +RM := rm -f +MKDIR_P := mkdir -p + +ifeq ("$(origin CC)", "default") + CC := cc +endif +ifeq ("$(origin V)", "command line") + SAY = +else + SAY = @echo '$(strip $(1))' +endif + +COPYRIGHT_YEAR := 2018 +LOGLEVELS := LL_DEBUG,LL_INFO,LL_NOTICE,LL_WARNING,LL_ERROR,LL_CRIT,LL_EMERG +GIT_VERSION := $(shell ./version-gen.sh) +cc_version := $(shell $(CC) --version | head -n 1) +build_date := $(shell date) +uname_rs := $(shell uname -rs) + +all := tfortune tfortune.1 +all: $(all) + +deps := txp.bison.d txp.flex.d ast.d tfortune.d util.d txp.flex.d \ + tfortune.lsg.d version.d + +ifeq ($(findstring clean, $(MAKECMDGOALS)),) +-include $(deps) +include config.mak +endif + +.PRECIOUS: %.flex.c %.bison.c %.bison.h %.lsg.h %.lsg.c %.lsg.h + +# created by version-gen.sh +version.c: + +%.lsg.c: %.suite + $(call SAY, LSGC $<) + $(LOPSUBGEN) --gen-c < $< + +%.lsg.h: %.suite + $(call SAY, LSGH $<) + $(LOPSUBGEN) --gen-header < $< + +%.1: %.suite + $(call SAY, LSGM $<) + $(LOPSUBGEN) --gen-man=$@ --version-string $(GIT_VERSION) < $< + +%.flex.c: %.lex + $(call SAY, FLEX $<) + $(FLEX) -o $@ $< + +%.bison.c %.bison.h: %.y + $(call SAY, BISON $<) + $(BISON) --defines=$(notdir $(<:.y=.bison.h)) \ + --output=$(notdir $(<:.y=.bison.c)) $< + +TF_CPPFLAGS += -DCOPYRIGHT_YEAR='"$(COPYRIGHT_YEAR)"' +TF_CPPFLAGS += -DLOGLEVELS='$(LOGLEVELS)' +TF_CPPFLAGS += -DBUILD_DATE='"$(build_date)"' +TF_CPPFLAGS += -DCC_VERSION='"$(cc_version)"' +TF_CPPFLAGS += -DUNAME_RS='"$(uname_rs)"' +TF_CPPFLAGS += -I/usr/local/include + +TF_CFLAGS += -g +TF_CFLAGS += -O2 +TF_CFLAGS += -Wall +TF_CFLAGS += -Wundef -W -Wuninitialized +TF_CFLAGS += -Wchar-subscripts +TF_CFLAGS += -Werror-implicit-function-declaration +TF_CFLAGS += -Wmissing-noreturn +TF_CFLAGS += -Wbad-function-cast +TF_CFLAGS += -Wredundant-decls +TF_CFLAGS += -Wdeclaration-after-statement +TF_CFLAGS += -Wformat -Wformat-security -Wmissing-format-attribute + +%.flex.o: TF_CFLAGS += -Wno-all + +%.o: %.c tfortune.lsg.h txp.bison.h + $(call SAY, CC $<) + $(CC) \ + -o $@ -c -MMD -MF $(*F).d \ + -MT $@ $(TF_CPPFLAGS) $(CPPFLAGS) $(TF_CFLAGS) $(CFLAGS) $< + +TF_LDFLAGS=-llopsub +tfortune: $(deps:.d=.o) + $(call SAY, LD $@) + $(CC) $^ -o $@ $(TF_LDFLAGS) $(LDFLAGS) + +.PHONY: all mostlyclean clean install install-strip + +mostlyclean: + $(RM) tfortune *.o *.d +clean: mostlyclean + $(RM) *.lsg.* *.flex.* *.bison.* *.1 version.c +distclean: clean + $(RM) config.mak config.status config.log config.h configure config.h.in + $(RM) -r autom4te.cache +maintainer-clean: distclean + git clean -dfqx > /dev/null 2>&1 + +mandir := $(datarootdir)/man/man1 +INSTALL ?= install +INSTALL_PROGRAM ?= $(INSTALL) -m 755 +INSTALL_DATA ?= $(INSTALL) -m 644 +ifneq ($(findstring strip, $(MAKECMDGOALS)),) + strip_option := -s +endif + +install install-strip: all + $(MKDIR_P) $(DESTDIR)$(bindir) $(DESTDIR)$(mandir) + $(INSTALL_PROGRAM) $(strip_option) tfortune $(DESTDIR)$(bindir) + $(INSTALL_DATA) tfortune.1 $(DESTDIR)$(mandir) diff --git a/ast.c b/ast.c new file mode 100644 index 0000000..d7ce1c0 --- /dev/null +++ b/ast.c @@ -0,0 +1,395 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "tf.h" +#include "txp.bison.h" + +enum semantic_types { + ST_STRVAL, + ST_INTVAL, + ST_BOOLVAL, + ST_REGEX_PATTERN, +}; + +struct txp_context { + /* global context */ + char *errmsg; + struct txp_ast_node *ast; + /* per tag expression context */ + unsigned num_lines; + unsigned num_tags; + char **tags; +}; + +/* + * Set the error bit in the parser context and log a message. + * + * This is called if the lexer or the parser detect an error. Only the first + * error is logged (with a severity of "warn"). + */ +__attribute__ ((format (printf, 3, 4))) +void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...) +{ + va_list ap; + char *tmp; + + if (ctx->errmsg) /* we already printed an error message */ + return; + va_start(ap, fmt); + xvasprintf(&tmp, fmt, ap); + va_end(ap); + xasprintf(&ctx->errmsg, "line %d: %s", line, tmp); + free(tmp); + WARNING_LOG("%s\n", ctx->errmsg); +} + +/* + * Parse a (generalized) string literal. + * + * This function turns the generalized C99 string literal given by src into a C + * string. For example, the string literal "xyz\n" is transformed into an + * array containing the three characters 'x', 'y' and 'z', followed by a + * newline character and the terminating zero byte. The function allows to + * specify different quote characters so that, for example, regular expression + * patterns enclosed in '/' can be parsed as well. To parse a proper string + * literal, one has to pass two double quotes as the second argument. + * + * The function strips off the opening and leading quote characters, replaces + * double backslashes by single backslashes and handles the usual escapes like + * \n and \". + * + * The caller must make sure that the input is well-formed. The function simply + * aborts if the input is not a valid C99 string literal (modulo the quote + * characters). + * + * The return value is the offset of the first character after the closing + * quote. For proper string literals this will be the terminating zero byte of + * the input string, for regular expression patterns it is the beginning of the + * flags which modify the matching behaviour. + */ +unsigned parse_quoted_string(const char *src, const char quote_chars[2], + char **result) +{ + size_t n, len = strlen(src); + char *dst, *p; + bool backslash; + + assert(len >= 2); + assert(src[0] == quote_chars[0]); + p = dst = xmalloc(len - 1); + backslash = false; + for (n = 1;; n++) { + char c; + assert(n < len); + c = src[n]; + if (!backslash) { + if (c == '\\') { + backslash = true; + continue; + } + if (c == quote_chars[1]) + break; + *p++ = c; + continue; + } + if (c == quote_chars[1]) + *p++ = quote_chars[1]; + else switch (c) { + case '\\': *p++ = '\\'; break; + case 'a': *p++ = '\a'; break; + case 'b': *p++ = '\b'; break; + case 'f': *p++ = '\f'; break; + case 'n': *p++ = '\n'; break; + case 'r': *p++ = '\r'; break; + case 't': *p++ = '\t'; break; + case 'v': *p++ = '\v'; break; + default: assert(false); + } + backslash = false; + } + assert(src[n] == quote_chars[1]); + *p = '\0'; + *result = dst; + return n + 1; +} + +/* + * Parse and compile an extended regular expression pattern, including flags. + * + * A regex pattern is identical to a C99 string literal except (a) it is + * enclosed in '/' characters rather than double quotes, (b) double quote + * characters which are part of the pattern do not need to be quoted with + * backslashes, but slashes must be quoted in this way, and (c) the closing + * slash may be followed by one or more flag characters which modify the + * matching behaviour. + * + * The only flags which are currently supported are 'i' to ignore case in match + * (REG_ICASE) and 'n' to change the handling of newline characters + * (REG_NEWLINE). + * + * This function calls parse_quoted_string(), hence it aborts if the input + * string is malformed. However, errors from regcomp(3) are returned without + * aborting the process. The rationale behind this difference is that passing a + * malformed string must be considered an implementation bug because malformed + * strings should be rejected earlier by the lexer. + */ +int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result) +{ + int ret; + char *pat; + unsigned n = parse_quoted_string(src, "//", &pat); + + result->flags = 0; + for (; src[n]; n++) { + switch (src[n]) { + case 'i': result->flags |= REG_ICASE; break; + case 'n': result->flags |= REG_NEWLINE; break; + default: assert(false); + } + } + ret = xregcomp(&result->preg, pat, result->flags); + free(pat); + return ret; +} + +static struct txp_ast_node *ast_node_raw(int id) +{ + struct txp_ast_node *node = xmalloc(sizeof(*node)); + node->id = id; + return node; +} + +/* This is non-static because it is also called from the lexer. */ +struct txp_ast_node *txp_new_ast_leaf_node(int id) +{ + struct txp_ast_node *node = ast_node_raw(id); + node->num_children = 0; + return node; +} + +struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child) +{ + struct txp_ast_node *node = ast_node_raw(id); + node->num_children = 1; + node->children = xmalloc(sizeof(struct txp_ast_node *)); + node->children[0] = child; + return node; +} + +struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left, + struct txp_ast_node *right) +{ + struct txp_ast_node *node = ast_node_raw(id); + node->num_children = 2; + node->children = xmalloc(2 * sizeof(struct txp_ast_node *)); + node->children[0] = left; + node->children[1] = right; + return node; +} + +void txp_free_ast(struct txp_ast_node *root) +{ + if (!root) + return; + if (root->num_children > 0) { + int i; + for (i = 0; i < root->num_children; i++) + txp_free_ast(root->children[i]); + free(root->children); + } else { + union txp_semantic_value *sv = &root->sv; + switch (root->id) { + case STRING_LITERAL: + free(sv->strval); + break; + case REGEX_PATTERN: + regfree(&sv->re_pattern.preg); + break; + } + } + free(root); +} + +static int eval_node(struct txp_ast_node *node, struct txp_context *ctx, + union txp_semantic_value *result); + +static void eval_binary_op(struct txp_ast_node *node, struct txp_context *ctx, + union txp_semantic_value *v1, union txp_semantic_value *v2) +{ + eval_node(node->children[0], ctx, v1); + eval_node(node->children[1], ctx, v2); +} + +static int eval_node(struct txp_ast_node *node, struct txp_context *ctx, + union txp_semantic_value *result) +{ + int ret; + union txp_semantic_value v1, v2; + + switch (node->id) { + /* strings */ + case STRING_LITERAL: + result->strval = node->sv.strval; + return ST_STRVAL; + /* integers */ + case NUM: + result->intval = node->sv.intval; + return ST_INTVAL; + case '+': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval + v2.intval; + return ST_INTVAL; + case '-': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval - v2.intval; + return ST_INTVAL; + case '*': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval * v2.intval; + return ST_INTVAL; + case '/': + eval_binary_op(node, ctx, &v1, &v2); + if (v2.intval == 0) { + static bool warned; + if (!warned) + ERROR_LOG("division by zero\n"); + warned = true; + result->intval = 0; + } else + result->intval = v1.intval / v2.intval; + return ST_INTVAL; + case NEG: + eval_node(node->children[0], ctx, &v1); + result->intval = -v1.intval; + return ST_INTVAL; + case NUM_LINES: + result->intval = ctx->num_lines; + return ST_INTVAL; + /* bools */ + case TRUE: + result->boolval = true; + return ST_BOOLVAL; + case FALSE: + result->boolval = false; + return ST_BOOLVAL; + case OR: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.boolval || v2.boolval; + return ST_BOOLVAL; + case AND: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.boolval && v2.boolval; + return ST_BOOLVAL; + case NOT: + eval_node(node->children[0], ctx, &v1); + result->boolval = !v1.boolval; + return ST_BOOLVAL; + case EQUAL: + ret = eval_node(node->children[0], ctx, &v1); + eval_node(node->children[1], ctx, &v2); + if (ret == ST_STRVAL) + result->boolval = !strcmp(v1.strval, v2.strval); + else + result->boolval = v1.intval == v2.intval; + return ST_BOOLVAL; + case NOT_EQUAL: + ret = eval_node(node->children[0], ctx, &v1); + eval_node(node->children[1], ctx, &v2); + if (ret == ST_STRVAL) + result->boolval = strcmp(v1.strval, v2.strval); + else + result->boolval = v1.intval != v2.intval; + return ST_BOOLVAL; + case '<': + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval < v2.intval; + return ST_BOOLVAL; + case '>': + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval > v2.intval; + return ST_BOOLVAL; + case LESS_OR_EQUAL: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval <= v2.intval; + return ST_BOOLVAL; + case GREATER_OR_EQUAL: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval >= v2.intval; + return ST_BOOLVAL; + case REGEX_MATCH: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = regexec(&v2.re_pattern.preg, v1.strval, + 0, NULL, 0) == 0; + return ST_BOOLVAL; + case REGEX_PATTERN: + result->re_pattern = node->sv.re_pattern; + return ST_REGEX_PATTERN; + default: + EMERG_LOG("bug: invalid node id %d\n", node->id); + exit(EXIT_FAILURE); + } +} + +bool txp_eval_ast(struct txp_ast_node *root, struct txp_context *ctx) +{ + union txp_semantic_value v; + int ret = eval_node(root, ctx, &v); + + if (ret == ST_INTVAL) + return v.intval != 0; + if (ret == ST_STRVAL) + return v.strval[0] != 0; + if (ret == ST_BOOLVAL) + return v.boolval; + assert(false); +} + +int txp_yylex_init(txp_yyscan_t *yyscanner); +struct yy_buffer_state *txp_yy_scan_bytes(const char *buf, int len, + txp_yyscan_t yyscanner); +void txp_yy_delete_buffer(struct yy_buffer_state *bs, txp_yyscan_t yyscanner); +int txp_yylex_destroy(txp_yyscan_t yyscanner); +void txp_yyset_lineno(int lineno, txp_yyscan_t scanner); + +/* + * Initialize the tag expression parser. + * + * This allocates and sets up the internal structures of the tag expression + * parser and creates an abstract syntax tree from the given epigram (including + * the tags). It must be called before txp_eval_ast() can be called. + * + * The context pointer returned by this function may be passed to mp_eval_ast() + * to determine whether an epigram is admissible. + * + * The error message pointer may be NULL in which case no error message is + * returned. Otherwise, the caller must free the returned string. + */ +int txp_init(const char *definition, int nbytes, struct txp_context **result, + char **errmsg) +{ + int ret; + txp_yyscan_t scanner; + struct txp_context *ctx; + struct yy_buffer_state *buffer_state; + + ctx = xcalloc(sizeof(*ctx)); + ret = txp_yylex_init(&scanner); + assert(ret == 0); + buffer_state = txp_yy_scan_bytes(definition, nbytes, scanner); + txp_yyset_lineno(1, scanner); + NOTICE_LOG("creating abstract syntax tree from tag expression\n"); + ret = txp_yyparse(ctx, &ctx->ast, scanner); + txp_yy_delete_buffer(buffer_state, scanner); + txp_yylex_destroy(scanner); + if (ctx->errmsg) { /* parse error */ + if (errmsg) + *errmsg = ctx->errmsg; + else + free(ctx->errmsg); + free(ctx); + return -E_TXP; + } + if (errmsg) + *errmsg = NULL; + *result = ctx; + return 1; +} diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..72b2fab --- /dev/null +++ b/autogen.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +# SPDX-License-Identifier: GPL-2.0 + +autoheader && autoconf diff --git a/config.mak.in b/config.mak.in new file mode 100644 index 0000000..20ddfe6 --- /dev/null +++ b/config.mak.in @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 + +prefix := @prefix@ +exec_prefix := @exec_prefix@ + +# These two use prefix and exec_prefix +bindir := @bindir@ +datarootdir := @datarootdir@ + +PACKAGE_TARNAME := @PACKAGE_TARNAME@ +PACKAGE_VERSION := @PACKAGE_VERSION@ + +FLEX := @FLEX@ +BISON := @BISON@ +M4 := @M4@ +LOPSUBGEN := @LOPSUBGEN@ diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..845315f --- /dev/null +++ b/configure.ac @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 + +AC_PREREQ([2.61]) + +AC_INIT([tfortune], [m4_esyscmd_s(./version-gen.sh)], + [maan@tuebingen.mpg.de], [], [http://people.tuebingen.mpg.de/maan/tfortune/]) +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_FILES([config.mak]) +AC_USE_SYSTEM_EXTENSIONS +AC_PROG_CC +AC_PROG_CPP + +AC_DEFUN([REQUIRE_EXECUTABLE], [ + AC_PATH_PROG(m4_toupper([$1]), [$1]) + test -z "$m4_toupper([$1])" && AC_MSG_ERROR([$1 is required]) +]) +REQUIRE_EXECUTABLE([flex]) +REQUIRE_EXECUTABLE([bison]) +REQUIRE_EXECUTABLE([lopsubgen]) + +HAVE_LOPSUB=yes +AC_CHECK_HEADER(lopsub.h, [], [HAVE_LOPSUB=no]) +AC_CHECK_LIB([lopsub], [lls_merge], [], [HAVE_LOPSUB=no]) +if test $HAVE_LOPSUB = no; then AC_MSG_ERROR([ + The lopsub library is required to build this software, but + the above checks indicate it is not installed on your system. + Run the following command to download a copy. + git clone git://git.tuebingen.mpg.de/lopsub.git + Install the library, then run this configure script again. +]) +fi +AC_OUTPUT diff --git a/err.h b/err.h new file mode 100644 index 0000000..897deee --- /dev/null +++ b/err.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define TF_ERRORS \ + TF_ERROR(SUCCESS, "success"), \ + TF_ERROR(ATOI_OVERFLOW, "value too large"), \ + TF_ERROR(ATOI_NO_DIGITS, "no digits found in string"), \ + TF_ERROR(ATOI_JUNK_AT_END, "further characters after number"), \ + TF_ERROR(TXP, "tag expression parse error"), \ + TF_ERROR(REGEX, "regular expression error"), \ + TF_ERROR(LOPSUB, "lopsub error"), \ + +/* + * This is temporarily defined to expand to its first argument (prefixed by + * 'E_') and gets later redefined to expand to the error text only + */ +#define TF_ERROR(err, msg) E_ ## err +enum tf_error_codes {TF_ERRORS}; +#undef TF_ERROR +#define TF_ERROR(err, msg) msg +#define DEFINE_TF_ERRLIST char *tf_errlist[] = {TF_ERRORS} + +extern char *tf_errlist[]; + +/** + * This bit indicates whether a number is considered a system error number + * If yes, the system errno is just the result of clearing this bit from + * the given number. + */ +#define SYSTEM_ERROR_BIT 30 + +/** Check whether the system error bit is set. */ +#define IS_SYSTEM_ERROR(num) (!!((num) & (1 << SYSTEM_ERROR_BIT))) + +/** Set the system error bit for the given number. */ +#define ERRNO_TO_TF_ERROR(num) ((num) | (1 << SYSTEM_ERROR_BIT)) + +static inline char *tf_strerror(int num) +{ + assert(num > 0); + if (IS_SYSTEM_ERROR(num)) + return strerror((num) & ((1 << SYSTEM_ERROR_BIT) - 1)); + else + return tf_errlist[num]; +} + diff --git a/list.h b/list.h new file mode 100644 index 0000000..ab52253 --- /dev/null +++ b/list.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copied from the Linux kernel source tree, version 2.6.13. + * + * Licensed under the GPL v2 as per the whole kernel source tree. + * + */ + +/** \file list.h doubly linked list implementation */ + +#include /* offsetof */ + +/** get the struct this entry is embedded in */ +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +/** + * Non-NULL pointers that will result in page faults under normal + * circumstances, used to verify that nobody uses non-initialized list entries. + * Used for poisoning the \a next pointer of struct list_head. + */ +#define LIST_POISON1 ((void *) 0x00100100) +/** Non-null pointer, used for poisoning the \a prev pointer of struct + * list_head + */ +#define LIST_POISON2 ((void *) 0x00200200) + +/** Simple doubly linked list implementation. */ +struct list_head { + /** pointer to the next list entry */ + struct list_head *next; + /** pointer to the previous list entry */ + struct list_head *prev; +}; + +/** Define an initialized list head. */ +#define INITIALIZED_LIST_HEAD(name) struct list_head name = { &(name), &(name) } + + +/** must be called before using any other list functions */ +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + + +/* + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * add a new entry + * + * \param new new entry to be added + * \param head list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * add a new entry + * + * \param new new entry to be added + * \param head list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * Delete entry from list. + * + * \param entry the element to delete from the list. + * + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} + +/** + * delete from one list and add as another's head + * + * \param list: the entry to move + * \param head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + _list_add(list, head); +} + +/** + * test whether a list is empty + * + * \param head the list to test. + */ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +/** + * get the struct for this entry + * + * \param ptr the &struct list_head pointer. + * \param type the type of the struct this is embedded in. + * \param member the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * iterate over list of given type + * + * \param pos the type * to use as a loop counter. + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * iterate over list of given type safe against removal of list entry + * + * \param pos the type * to use as a loop counter. + * \param n another type * to use as temporary storage + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) +/** + * iterate backwards over list of given type safe against removal of list entry + * \param pos the type * to use as a loop counter. + * \param n another type * to use as temporary storage + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe_reverse(pos, n, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member), \ + n = list_entry(pos->member.prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.prev, typeof(*n), member)) + +/** + * Get the first element from a list + * \param ptr the list head to take the element from. + * \param type The type of the struct this is embedded in. + * \param member The name of the list_struct within the struct. + * + * Note that list is expected to be not empty. + */ +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +/** + * Test whether a list has just one entry. + * + * \param head The list to test. + */ +static inline int list_is_singular(const struct list_head *head) +{ + return !list_empty(head) && (head->next == head->prev); +} + diff --git a/logo.svg b/logo.svg new file mode 100644 index 0000000..f64d714 --- /dev/null +++ b/logo.svg @@ -0,0 +1,30 @@ +<-- SPDX-License-Identifier: GPL-2.0 --> + + + + + + + + + diff --git a/tf.h b/tf.h new file mode 100644 index 0000000..4c01764 --- /dev/null +++ b/tf.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "err.h" + +/* Opaque, only known to ast.c. Passed to the generated txp_yyparse(). */ +struct txp_context; + +/* + * Since we use a reentrant lexer, all functions generated by flex(1) + * receive an additional argument of this type. + */ +typedef void *txp_yyscan_t; + +/* Parsed regex pattern. */ +struct txp_re_pattern { + regex_t preg; /* Pre-compiled regex. */ + unsigned flags; /* Subset of the cflags described in regex(3). */ +}; + +/* + * The possible values of a node in the abstract syntax tree (AST). + * + * Constant semantic values (string literals, numeric constants and regex + * patterns which are part of the tag expression) are determined during + * txp_init() while values which depend on the epigram (tags, number of lines, + * etc.) are determined during txp_eval_row(). + * + * This union, and the txp_ast_node structure below are used extensively in + * txp.y. However, both need to be public because the lexer must be able to + * create AST nodes for the constant semantic values. + */ +union txp_semantic_value { + bool boolval; /* Comparators, =~ and =|. */ + char *strval; /* String literals, tags, path. */ + int64_t intval; /* Constants, bitrate, frequency, etc. */ + struct txp_re_pattern re_pattern; /*< Right-hand side operand of =~. */ +}; + +/* + * A node is either interior or a leaf node. Interior nodes have at least one + * child while leaf nodes have a semantic value and no children. + * + * Examples: (a) STRING_LITERAL has a semantic value (the unescaped string + * literal) and no children, (b) NEG (unary minus) has no semantic value but + * one child (the numeric expression that is to be negated), (c) LESS_OR_EQUAL + * has no semantic value and two children (the two numeric expressions being + * compared). + */ +struct txp_ast_node { + /* Corresponds to a token type, for example LESS_OR_EQUAL. */ + int id; + union { + /* Pointers to the child nodes (interior nodes only). */ + struct txp_ast_node **children; + /* Leaf nodes only. */ + union txp_semantic_value sv; + }; + /* + * The number of children is implicitly given by the id, but we include + * it here to avoid having to maintain a lookup table. The AST is + * usually small, so we can afford to waste a byte per node. + */ + uint8_t num_children; +}; + +enum loglevels {LOGLEVELS, NUM_LOGLEVELS}; +#define DEBUG_LOG(f,...) txp_log(LL_DEBUG, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define INFO_LOG(f,...) txp_log(LL_INFO, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define NOTICE_LOG(f,...) txp_log(LL_NOTICE, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define WARNING_LOG(f,...) txp_log(LL_WARNING, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define ERROR_LOG(f,...) txp_log(LL_ERROR, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define CRIT_LOG(f,...) txp_log(LL_CRIT, "%s: " f, __FUNCTION__, ## __VA_ARGS__) +#define EMERG_LOG(f,...) txp_log(LL_EMERG, "%s: " f, __FUNCTION__, ## __VA_ARGS__) + +/* tfortune.c */ + +void txp_log(int ll, const char* fmt,...); + +/* Called from both the lexer and the parser. */ +__attribute__ ((format (printf, 3, 4))) +void txp_parse_error(int line, struct txp_context *ctx, const char *fmt, ...); + +/* Helper functions for the lexer. */ +unsigned parse_quoted_string(const char *src, const char quote_chars[2], + char **result); +int txp_parse_regex_pattern(const char *src, struct txp_re_pattern *result); + +/* ast.c */ +struct txp_ast_node *ast_node_new_unary(int id, struct txp_ast_node *child); +struct txp_ast_node *ast_node_new_binary(int id, struct txp_ast_node *left, + struct txp_ast_node *right); + +/* + * Allocate a new leaf node for the abstract syntax tree. + * + * This returns a pointer to a node whose ->num_children field is initialized + * to zero. The ->id field is initialized with the given id. The caller is + * expected to initialize the ->sv field. + */ +struct txp_ast_node *txp_new_ast_leaf_node(int id); + +/* + * Evaluate an abstract syntax tree, starting at the root node. + * + * The root node argument should be the pointer that was returned from an + * earlier call to txp_init() via the context pointer. The context contains the + * information about the epigram. + * + * Returns true if the AST evaluates to true, a non-empty string, or a non-zero + * number, false otherwise. + */ +bool txp_eval_ast(struct txp_ast_node *root, struct txp_context *ctx); + +/* + * Deallocate an abstract syntax tree. + * + * This frees the memory occupied by the nodes of the AST, the child pointers + * of the internal nodes and the (constant) semantic values of the leaf nodes + * (string literals and pre-compiled regular expressions). + */ +void txp_free_ast(struct txp_ast_node *root); + + +/* util.c */ +int atoi64(const char *str, int64_t *value); +unsigned xvasprintf(char **result, const char *fmt, va_list ap); +unsigned xasprintf(char **result, const char *fmt, ...); +void *xmalloc(size_t size); +void *xcalloc(size_t size); +int xregcomp(regex_t *preg, const char *regex, int cflags); + +/* txp.c */ + +/* txp.y. */ + diff --git a/tfortune.c b/tfortune.c new file mode 100644 index 0000000..7447059 --- /dev/null +++ b/tfortune.c @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tf.h" +#include "tfortune.lsg.h" + +#define TF_SEP "---- " + +struct tf_map { + void *map; + off_t len; +}; + +struct tf_cookie { + unsigned input_num; + off_t offset, len; +}; + +static void mmap_file(const char *path, struct tf_map *map) +{ + int fd, ret; + + ret = open(path, 0); + if (ret < 0) { + perror("open"); + exit(EXIT_FAILURE); + } + fd = ret; + map->len = lseek(fd, 0, SEEK_END); + if (map->len == (off_t)-1) { + perror("lseek"); + exit(EXIT_FAILURE); + } + map->map = mmap(NULL, map->len, PROT_READ, MAP_PRIVATE, fd, 0); + if (map->map == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + ret = close(fd); + assert(ret >= 0); +} + +static bool tag_given(const char *tag, const char *tags, size_t sz) +{ + bool found = false; + char *p, *str = strndup(tags, sz); + + assert(str); + p = strtok(str, ","); + while (p) { + //fprintf(stderr, "sz: %zu, compare: %s <-> %.*s\n", sz, tag, (int) sz, p); + if (strcmp(p, tag) == 0) { + found = true; + break; + } + p = strtok(NULL, ","); + } + free(str); + return found; +} + +static bool tags_ok(const char *tags, size_t sz, struct lls_parse_result *lpr) +{ + unsigned n, given; + const struct lls_opt_result *r; + const char *arg; + + r = lls_opt_result(LSG_TFORTUNE_TFORTUNE_OPT_ACCEPT, lpr); + given = lls_opt_given(r); + if (given == 0) + goto check_reject; + /* check if any of the given accept tags is set */ + for (n = 0; n < given; n++) { + arg = lls_string_val(n, r); + if (tag_given(arg, tags, sz)) + goto check_reject; + } + return false; /* accept tag(s) given, but none was set */ +check_reject: + /* check if none of the given reject tags is set */ + r = lls_opt_result(LSG_TFORTUNE_TFORTUNE_OPT_REJECT, lpr); + given = lls_opt_given(r); + for (n = 0; n < given; n++) { + arg = lls_string_val(n, r); + if (tag_given(arg, tags, sz)) + return false; + } + return true; +} + +static void print_tags(const char *tags, size_t sz, FILE *f) +{ + char *p, *str = strndup(tags, sz); + + assert(str); + p = strtok(str, ","); + while (p) { + fprintf(f, "%s\n", p); + p = strtok(NULL, ","); + } + free(str); +} + +static void print_random_cookie(const struct tf_cookie *cookies, + unsigned num_cookies, const struct tf_map *maps) +{ + long unsigned r; + const struct tf_cookie *cookie; + struct timeval tv; + + if (num_cookies == 0) { + fprintf(stderr, "no matching cookie\n"); + return; + } + gettimeofday(&tv, NULL); + srandom((unsigned)tv.tv_usec); + r = ((num_cookies + 0.0) * (random() / (RAND_MAX + 1.0))); + cookie = cookies + r; + assert(r < num_cookies); + printf("%.*s", (int)cookie->len, + (char *)maps[cookie->input_num].map + cookie->offset); +} + +static void make_cookies(struct tf_map *maps, struct lls_parse_result *lpr) +{ + struct tf_cookie *cookies = NULL; + unsigned n, cookies_size = 0, num_cookies = 0, num_inputs; + size_t sep_len = strlen(TF_SEP); + const struct lls_opt_result *r_s; + FILE *f = NULL; + + r_s = lls_opt_result(LSG_TFORTUNE_TFORTUNE_OPT_STATISTICS, lpr); + if (lls_opt_given(r_s)) { + f = popen("sort | uniq -c | sort -n", "w"); + assert(f); + } + num_inputs = lls_num_inputs(lpr); + for (n = 0; n < num_inputs; n++) { + struct tf_map *m = maps + n; + const char *start = m->map, *end = m->map + m->len; + const char *buf = start, *cookie_start = start; + + while (buf < end) { + struct tf_cookie *cookie; + const char *p, *cr, *tags; + size_t sz; + + cr = memchr(buf, '\n', end - buf); + if (!cr) + break; + p = cr + 1; + if (!cookie_start) + cookie_start = p; + if (p + sep_len >= end) + break; + if (strncmp(p, TF_SEP, sep_len) != 0) { + buf = p; + continue; + } + tags = p + sep_len; + cr = memchr(tags, '\n', end - tags); + if (cr) + sz = cr - tags; + else + sz = end - tags; + if (!tags_ok(tags, sz, lpr)) { + if (!cr) + break; + buf = cr; + cookie_start = NULL; + continue; + } + num_cookies++; + if (lls_opt_given(r_s)) { + print_tags(tags, sz, f); + if (!cr) + break; + buf = cr; + continue; + } + if (num_cookies > cookies_size) { + cookies_size = 2 * cookies_size + 1; + cookies = realloc(cookies, + cookies_size * sizeof(*cookies)); + assert(cookies); + } + cookie = cookies + num_cookies - 1; + cookie->input_num = n; + cookie->offset = cookie_start - start; + cookie->len = p - cookie_start; + buf = p + sep_len; + cookie_start = NULL; + } + } + if (f) + pclose(f); + if (!lls_opt_given(r_s)) + print_random_cookie(cookies, num_cookies, maps); + else + printf("num cookies: %d\n", num_cookies); + free(cookies); +} + +int main(int argc, char **argv) +{ + char *errctx; + struct lls_parse_result *lpr; + int ret; + struct tf_map *maps; + unsigned n, num_inputs; + const struct lls_command *cmd = lls_cmd(0, tfortune_suite); + + ret = lls_parse(argc, argv, cmd, &lpr, &errctx); + if (ret < 0) { + fprintf(stderr, "%s: %s\n", errctx? errctx : "", + lls_strerror(-ret)); + free(errctx); + exit(EXIT_FAILURE); + } + if (lls_num_inputs(lpr) == 0) { + char *help = lls_short_help(cmd); + fprintf(stderr, "%s\n", help); + free(help); + ret = 0; + goto free_lpr; + } + num_inputs = lls_num_inputs(lpr); + maps = xmalloc(num_inputs * sizeof(*maps)); + for (n = 0; n < num_inputs; n++) + mmap_file(lls_input(n, lpr), maps + n); + make_cookies(maps, lpr); + free(maps); + ret = EXIT_SUCCESS; +free_lpr: + lls_free_parse_result(lpr, cmd); + return ret; +} diff --git a/tfortune.suite b/tfortune.suite new file mode 100644 index 0000000..6545865 --- /dev/null +++ b/tfortune.suite @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: GPL-2.0 + +[suite tfortune] +[supercommand tfortune] + purpose = fortune cookies with tags + [description] + Like fortune(1), tfortune prints out a random epigram. However, + the epigrams in the input files must contain additional information, + called "tags". The program picks only epigrams which are considered + "admissible" based on the tags and the command line options. + [/description] + non-opts-name = ... + [option accept] + short_opt = a + summary = epigrams which contain this tag are admissible + arg_info = required_arg + arg_type = string + typestr = tag + flag multiple + [help] + This option may be given multiple times. + [/help] + [option reject] + short_opt = r + summary = epigrams which contain this tag are inadmissible + arg_info = required_arg + arg_type = string + typestr = tag + flag multiple + [help] + Like --accept, this may be given multiple times. See the discussion + below how --accept and --reject interact. + [/help] + [option statistics] + short_opt = s + summary = print tags found in input files + [help] + If this option is specified, tfortune does not print any + epigrams. Instead, it prints all tags found in the given input file(s), + together with how many time each tag occurred. The list is sorted by + occurence count. + [/help] + [option accept-reject] + summary = Admissible epigrams + flag ignored + [help] + tfortune picks a random epigram from the set of admissible epigrams, + which is computed as follows. If neither --accept nor --reject are + specified, all epigrams are considered admissible. If only --accept + is specified, epigrams with at least one tag given as an argument to + --accept are admissible, all others are inadmissible. Similarly, if + only --reject is specified, epigrams with at least one tag given as an + argument to --reject are inadmissible, all others are admissible. If + both --accept and --reject are specified, an epigram is admissible + if and only if it has at least one tag which is given as an argument + to --accept but no tag which is given as an argument to --reject. + [/help] + [option format] + summary = Input file format + flag ignored + [help] + Input files may contain arbitrary many epigrams. The end of each + epigram must be marked with a "tag" line. The tag line consists of + four dashes, a space character, and a comma separated list of tags. + Tags may span multiple words, but no comma is allowed. + [/help] + +[section example] +The following is an example input file for tfortune. It contains a single epigram +with two tags. + +.RS +.EX +Anyone who attempts to generate random numbers by deterministic means +is, of course, living in a state of sin. -- John von Neumann +---- math,religion +.EE +.RE +[/section] + +[section copyright] + Written by Andre Noll + .br + Copyright (C) 2016-present Andre Noll + .br + License: GNU GPL version 3 + .br + This is free software: you are free to change and redistribute it. + .br + There is NO WARRANTY, to the extent permitted by law. + .br + Report bugs to + .MT + Andre Noll + .ME + .br + Homepage: + .UR http://people.tuebingen.mpg.de/~maan/tfortune/ + .UE +[/section] +[section see also] + .BR fortune (6) +[/section] diff --git a/txp.lex b/txp.lex new file mode 100644 index 0000000..4b04fb3 --- /dev/null +++ b/txp.lex @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + + /* + * Since we do not supply yywrap(), we use noyywrap to instruct the scanner to + * behave as though yywrap() returned 1. + */ +%option noyywrap + + /* + * We don't want symbols to clash with those of other flex users, particularly + * lopsub. + */ +%option prefix="txp_yy" + + /* + * Generate a scanner that maintains the number of the current line read from + * its input in the yylineno variable. + */ +%option yylineno + + /* Generate a bison-compatible scanner. */ +%option bison-bridge bison-locations + + /* + * Warn (in particular) if the default rule can be matched but no default rule + * has been given. + */ +%option warn + + /* + * Generate a scanner which is portable and safe to use in one or more threads + * of control. + */ +%option reentrant + + /* + * Generate a scanner which always looks one extra character ahead. This is a + * bit faster than an interactive scanner for which look ahead happens only + * when necessary. + */ +%option never-interactive + +%{ +#include "tf.h" + +#define YYSTYPE TXP_YYSTYPE +#define YYLTYPE TXP_YYLTYPE +#define YY_DECL int txp_yylex(TXP_YYSTYPE *yylval_param, TXP_YYLTYPE *yylloc_param, \ + struct txp_context *ctx, struct txp_ast_node **ast, txp_yyscan_t yyscanner) +#include "txp.bison.h" +#define TXP_YY_USER_ACTION do {txp_yylloc->first_line = txp_yylineno;} while (0); +%} +DECIMAL_CONSTANT (0|([[:digit:]]{-}[0])[[:digit:]]*) +STRING_LITERAL \"([^\"\\\n]|(\\[\"\\abfnrtv]))*\" +REGEX_PATTERN \/([^\/\\\n]|(\\[\/\\abfnrtv]))*\/([in])* +%% + +tag {return TAG;} +num_lines {return NUM_LINES;} +true {return TRUE;} +false {return FALSE;} + +[[:space:]]+|#.*\n /* skip comments and whitespace */ + +"("|")"|","|"+"|"-"|"*"|"/"|"<"|">" {return yytext[0];} + +"||" {return OR;} +"&&" {return AND;} +"!" {return NOT;} +"==" {return EQUAL;} +"!=" {return NOT_EQUAL;} +"<=" {return LESS_OR_EQUAL;} +">=" {return GREATER_OR_EQUAL;} +"=~" {return REGEX_MATCH;} + +{DECIMAL_CONSTANT} { + int ret; + yylval->node = txp_new_ast_leaf_node(NUM); + ret = atoi64(yytext, &yylval->node->sv.intval); + if (ret < 0) { + free(yylval->node); + txp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext, + tf_strerror(-ret)); + return -E_TXP; + } + return NUM; +} + +{STRING_LITERAL} { + yylval->node = txp_new_ast_leaf_node(STRING_LITERAL); + parse_quoted_string(yytext, "\"\"", &yylval->node->sv.strval); + return STRING_LITERAL; +} + +{REGEX_PATTERN} { + int ret; + yylval->node = txp_new_ast_leaf_node(REGEX_PATTERN); + ret = txp_parse_regex_pattern(yytext, &yylval->node->sv.re_pattern); + if (ret < 0) { + txp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext, + tf_strerror(-ret)); + return -E_TXP; + } + return REGEX_PATTERN; +} + +. { + txp_parse_error(yylloc->first_line, ctx, "unrecognized text: %s", + yytext); + return -E_TXP; +} diff --git a/txp.y b/txp.y new file mode 100644 index 0000000..578b2a4 --- /dev/null +++ b/txp.y @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Provide more verbose and specific error messages instead of just "syntax + * error". + */ +%define parse.error verbose + +/* + * Verbose error messages may contain incorrect information if LAC (Lookahead + * Correction) is not enabled. + */ +%define parse.lac full + +/* Avoid symbol clashes (lopsub might also expose yy* symbols). */ +%define api.prefix {txp_yy} + +/* + * Although locations are automatically enabled as soon as the grammar uses the + * special @N tokens, specifying %locations explicitly allows for more accurate + * syntax error messages. + */ +%locations + +/* + * Generate a pure (reentrant) parser. With this option enabled, yylval and + * yylloc become local variables in yyparse(), and a different calling + * convention is used for yylex(). + */ +%define api.pure full + +/* Additional arguments to yylex(), yyparse() and yyerror() */ +%param {struct txp_context *ctx} +%param {struct txp_ast_node **ast} +%param {txp_yyscan_t yyscanner} /* reentrant lexers */ + +%{ +#include "tf.h" +#include "txp.bison.h" + +int yylex(TXP_YYSTYPE *lvalp, TXP_YYLTYPE *llocp, struct txp_context *ctx, + struct txp_ast_node **ast, txp_yyscan_t yyscanner); +static void yyerror(YYLTYPE *llocp, struct txp_context *ctx, + struct txp_ast_node **ast, txp_yyscan_t yyscanner, const char *msg); + +%} + +%union { + struct txp_ast_node *node; +} + +/* terminals */ +%token NUM +%token STRING_LITERAL +%token REGEX_PATTERN + +/* keywords with semantic value */ +%token NUM_LINES +%token FALSE TRUE + +/* keywords without semantic value */ +%token TAG + +/* operators, ordered by precendence */ +%left OR +%left AND +%left EQUAL NOT_EQUAL +%left LESS_THAN LESS_OR_EQUAL GREATER_OR_EQUAL REGEX_MATCH FILENAME_MATCH +%left '-' '+' +%left '*' '/' +%right NOT NEG /* negation (unary minus) */ + +/* nonterminals */ +%type string +%type exp +%type boolexp + +%% + +program: + /* empty */ {*ast = NULL; return 0;} + | string {*ast = $1; return 0;} + | exp {*ast = $1; return 0;} + | boolexp {*ast = $1; return 0;} + +string: STRING_LITERAL {$$ = $1;} +; + +exp: NUM {$$ = $1;} + | exp '+' exp {$$ = ast_node_new_binary('+', $1, $3);} + | exp '-' exp {$$ = ast_node_new_binary('-', $1, $3);} + | exp '*' exp {$$ = ast_node_new_binary('*', $1, $3);} + | exp '/' exp {$$ = ast_node_new_binary('/', $1, $3);} + | '-' exp %prec NEG {$$ = ast_node_new_unary(NEG, $2);} + | '(' exp ')' {$$ = $2;} + | NUM_LINES {$$ = txp_new_ast_leaf_node(NUM_LINES);} +; + +boolexp: TRUE {$$ = txp_new_ast_leaf_node(TRUE);} + | FALSE {$$ = txp_new_ast_leaf_node(FALSE);} + | '(' boolexp ')' {$$ = $2;} + | boolexp OR boolexp {$$ = ast_node_new_binary(OR, $1, $3);} + | boolexp AND boolexp {$$ = ast_node_new_binary(AND, $1, $3);} + | NOT boolexp {$$ = ast_node_new_unary(NOT, $2);} + | exp EQUAL exp {$$ = ast_node_new_binary(EQUAL, $1, $3);} + | exp NOT_EQUAL exp {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);} + | exp '<' exp {$$ = ast_node_new_binary('<', $1, $3);} + | exp '>' exp {$$ = ast_node_new_binary('>', $1, $3);} + | exp LESS_OR_EQUAL exp { + $$ = ast_node_new_binary(LESS_OR_EQUAL, $1, $3); + } + | exp GREATER_OR_EQUAL exp { + $$ = ast_node_new_binary(GREATER_OR_EQUAL, $1, $3); + } + | string REGEX_MATCH REGEX_PATTERN { + $$ = ast_node_new_binary(REGEX_MATCH, $1, $3); + } + | string EQUAL string {$$ = ast_node_new_binary(EQUAL, $1, $3);} + | string NOT_EQUAL string {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);} +; +%% + +/* Called by yyparse() on error */ +static void yyerror(YYLTYPE *llocp, struct txp_context *ctx, + __attribute__ ((unused)) struct txp_ast_node **ast, + __attribute__ ((unused)) txp_yyscan_t yyscanner, + const char *msg) +{ + txp_parse_error(llocp->first_line, ctx, "%s", msg); +} diff --git a/util.c b/util.c new file mode 100644 index 0000000..8645889 --- /dev/null +++ b/util.c @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "tf.h" + +DEFINE_TF_ERRLIST; + +int atoi64(const char *str, int64_t *value) +{ + char *endptr; + long long tmp; + + errno = 0; /* To distinguish success/failure after call */ + tmp = strtoll(str, &endptr, 10); + if (errno == ERANGE && (tmp == LLONG_MAX || tmp == LLONG_MIN)) + return -E_ATOI_OVERFLOW; + /* + * If there were no digits at all, strtoll() stores the original value + * of str in *endptr. + */ + if (endptr == str) + return -E_ATOI_NO_DIGITS; + /* + * The implementation may also set errno and return 0 in case no + * conversion was performed. + */ + if (errno != 0 && tmp == 0) + return -E_ATOI_NO_DIGITS; + if (*endptr != '\0') /* Further characters after number */ + return -E_ATOI_JUNK_AT_END; + *value = tmp; + return 1; +} + +__attribute__ ((warn_unused_result)) +void *xrealloc(void *p, size_t size) +{ + /* + * No need to check for NULL pointers: If p is NULL, the call + * to realloc is equivalent to malloc(size) + */ + assert(size); + if (!(p = realloc(p, size))) { + EMERG_LOG("realloc failed (size = %zu), aborting\n", size); + exit(EXIT_FAILURE); + } + return p; +} + +__attribute__ ((warn_unused_result)) +void *xmalloc(size_t size) +{ + return xrealloc(NULL, size); +} + +__attribute__ ((warn_unused_result)) +void *xcalloc(size_t size) +{ + void *p = xmalloc(size); + memset(p, 0, size); + return p; +} + +/* + * Print a formated message to a dynamically allocated string. + * + * This function is similar to vasprintf(), a GNU extension which is not in C + * or POSIX. It allocates a string large enough to hold the output including + * the terminating null byte. The allocated string is returned via the first + * argument and must be freed by the caller. However, unlike vasprintf(), this + * function calls exit() if insufficient memory is available, while vasprintf() + * returns -1 in this case. + * + * It returns the number of bytes written, not including the terminating \p + * NULL character. + */ +__attribute__ ((format (printf, 2, 0))) +unsigned xvasprintf(char **result, const char *fmt, va_list ap) +{ + int ret; + size_t size = 150; + va_list aq; + + *result = xmalloc(size + 1); + va_copy(aq, ap); + ret = vsnprintf(*result, size, fmt, aq); + va_end(aq); + assert(ret >= 0); + if ((size_t)ret < size) + return ret; + size = ret + 1; + *result = xrealloc(*result, size); + va_copy(aq, ap); + ret = vsnprintf(*result, size, fmt, aq); + va_end(aq); + assert(ret >= 0 && (size_t)ret < size); + return ret; +} + +__attribute__ ((format (printf, 2, 3))) +/* Print to a dynamically allocated string, variable number of arguments. */ +unsigned xasprintf(char **result, const char *fmt, ...) +{ + va_list ap; + unsigned ret; + + va_start(ap, fmt); + ret = xvasprintf(result, fmt, ap); + va_end(ap); + return ret; +} + +/* + * Compile a regular expression. + * + * This simple wrapper calls regcomp(3) and logs a message on errors. + */ +int xregcomp(regex_t *preg, const char *regex, int cflags) +{ + char *buf; + size_t size; + int ret = regcomp(preg, regex, cflags); + + if (ret == 0) + return 1; + size = regerror(ret, preg, NULL, 0); + buf = xmalloc(size); + regerror(ret, preg, buf, size); + ERROR_LOG("%s\n", buf); + free(buf); + return -E_REGEX; +} + +static int loglevel_arg_val; + +__attribute__ ((format (printf, 2, 3))) +void txp_log(int ll, const char* fmt,...) +{ + va_list argp; + if (ll < loglevel_arg_val) + return; + va_start(argp, fmt); + vfprintf(stderr, fmt, argp); + va_end(argp); +} diff --git a/version-gen.sh b/version-gen.sh new file mode 100755 index 0000000..5c6b965 --- /dev/null +++ b/version-gen.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# SPDX-License-Identifier: GPL-2.0 + +version_file='version.c' +ver='unnamed_version' +# First try git, then gitweb, then default. +if [ -e '.git' -o -e '../.git' ]; then + git_ver=$(git describe --abbrev=4 HEAD 2>/dev/null) + [ -z "$git_ver" ] && git_ver="$ver" + # update stat information in index to match working tree + git update-index -q --refresh > /dev/null + # if there are differences (exit code 1), the working tree is dirty + git diff-index --quiet HEAD || git_ver=$git_ver-dirty + ver=$git_ver +elif [ "${PWD%%-*}" = 'tfortune-' ]; then + ver=${PWD##*/tfortune-} +fi +ver=${ver#v} + +echo "$ver" + +# update version file if necessary +content="const char *lls_version(void) {return \"$ver\";};" +[ -r "$version_file" ] && echo "$content" | cmp -s - $version_file && exit 0 +echo >&2 "new git version: $ver" +echo "$content" > $version_file