From: Andre Noll Date: Tue, 11 Jul 2017 14:08:32 +0000 (+0200) Subject: Version 2 moods. X-Git-Tag: v0.6.1~7^2 X-Git-Url: http://git.tue.mpg.de/?a=commitdiff_plain;h=3d3a2f50a05501cf27f1155629799953f952bd4b;p=paraslash.git Version 2 moods. This introduces a context-free grammar for moods. The parser for the corresponding language (the version 2 mood parser) is generated by flex and bison and will eventually replace the open-coded parser for traditional (version 1) moods. Those are still supported, but the server now logs a deprecation warning when a version 1 mood is loaded, Loading a version 2 mood is a two step procedure. In the first step the bison parser reads the mood definition stored in the moods table of the afs database. If there are no errors, the parser returns an abstract syntax tree which represents the syntactic structure of the mood definition. In the second step the abstract syntax tree is evaluated for each row of the audio file table in turn. If the evaluation function returns true, the audio file is considered admissible. In this case a reference to the row is added to the score table in the same way the version 1 mood parser stores the set of admissible files. The commit adds the following new files to the repository: * yy/mp.lex: the lexer * yy/mp.y: the bison parser * yy/makefile: rules for building the parser, included from main Makefile * mp.c: frontend (high-level API) and backend (helpers for yy/mp.[ch]) * mp.h: backend data structures and function prototypes The frontend is only needed in the existing mood.c, which now also contains the declarations of the frontend API so that mp.h needs only be included by the lexer and the parser. The comment at the top of mp.c gives an overview of the mood parser API. All non-static functions of mp.c (both frontend and backend) are fully documented. The section on moods of the user manual has been rewritten and extended. Since flex and bison are required to build para_server, the list of optional software packages now mentions these tools, with links to their home page. If either tool is not installed, the configure script succeeds but para_server will not be built. --- diff --git a/Makefile.in b/Makefile.in index 556a926c..d4a83a77 100644 --- a/Makefile.in +++ b/Makefile.in @@ -8,6 +8,8 @@ datarootdir := @datarootdir@ PACKAGE_TARNAME := @PACKAGE_TARNAME@ PACKAGE_VERSION := @PACKAGE_VERSION@ +FLEX := @FLEX@ +BISON := @BISON@ M4 := @M4@ LOPSUBGEN := @LOPSUBGEN@ diff --git a/Makefile.real b/Makefile.real index 3631a5c9..b60c5698 100644 --- a/Makefile.real +++ b/Makefile.real @@ -32,12 +32,15 @@ m4depdir := $(build_dir)/m4deps lls_suite_dir := $(build_dir)/lls lls_m4_dir := m4/lls test_dir := t +yy_src_dir = yy +yy_build_dir = $(build_dir)/yy # sort removes duplicate words, which is all we need here all_objs := $(sort $(recv_objs) $(filter_objs) $(client_objs) $(gui_objs) \ $(audiod_objs) $(audioc_objs) $(mixer_objs) $(server_objs) \ $(write_objs) $(afh_objs) $(play_objs)) deps := $(addprefix $(dep_dir)/, $(all_objs:.o=.d)) +deps += $(addprefix $(dep_dir)/, mp.bison.d mp.flex.d) afh_objs += afh.lsg.o audioc_objs += audioc.lsg.o @@ -57,6 +60,12 @@ suites := $(addprefix $(lls_suite_dir)/, $(cmd_suites) $(executables)) m4_lls_deps := $(addsuffix .m4d, $(suites)) lsg_h := $(addsuffix .lsg.h, $(suites)) +# flex/bison objects and headers are only needed if para_server is built +ifeq ("$(findstring server, $(executables))", "server") + server_objs += mp.flex.o mp.bison.o + yy_h := $(yy_build_dir)/mp.bison.h +endif + # now prefix all objects with object dir recv_objs := $(addprefix $(object_dir)/, $(recv_objs)) filter_objs := $(addprefix $(object_dir)/, $(filter_objs)) @@ -85,12 +94,14 @@ man: $(man_pages) include $(lls_m4_dir)/makefile include $(test_dir)/makefile.test +include $(yy_src_dir)/makefile ifeq ($(findstring clean, $(MAKECMDGOALS)),) -include $(deps) -include $(m4_lls_deps) endif -$(object_dir) $(man_dir) $(dep_dir) $(m4depdir) $(lls_suite_dir): +$(object_dir) $(man_dir) $(dep_dir) $(m4depdir) $(lls_suite_dir) \ + $(yy_build_dir): $(Q) $(MKDIR_P) $@ CPPFLAGS += -DBINDIR='"$(bindir)"' @@ -101,6 +112,7 @@ CPPFLAGS += -DUNAME_RS='"$(uname_rs)"' CPPFLAGS += -DCC_VERSION='"$(cc_version)"' CPPFLAGS += -I/usr/local/include CPPFLAGS += -I$(lls_suite_dir) +CPPFLAGS += -I$(yy_build_dir) CPPFLAGS += $(lopsub_cppflags) STRICT_CFLAGS += -fno-strict-aliasing @@ -234,7 +246,7 @@ $(object_dir)/mm.o \ $(object_dir)/compress_filter.o: CFLAGS += -O3 -$(object_dir)/%.o: %.c | $(object_dir) $(dep_dir) $(lsg_h) +$(object_dir)/%.o: %.c | $(object_dir) $(dep_dir) $(lsg_h) $(yy_h) @[ -z "$(Q)" ] || echo 'CC $<' $(Q) $(CC) -c -o $@ -MMD -MF $(dep_dir)/$(*F).d -MT $@ $(CPPFLAGS) \ $(STRICT_CFLAGS) $(CFLAGS) $< diff --git a/afs.c b/afs.c index ef05a473..5623d7e9 100644 --- a/afs.c +++ b/afs.c @@ -466,23 +466,30 @@ no_admissible_files: } /* Never fails if arg == NULL */ -static int activate_mood_or_playlist(const char *arg, int *num_admissible) +static int activate_mood_or_playlist(const char *arg, int *num_admissible, + char **errmsg) { enum play_mode mode; int ret; if (!arg) { - ret = change_current_mood(NULL); /* always successful */ + ret = change_current_mood(NULL, NULL); /* always successful */ mode = PLAY_MODE_MOOD; } else { if (!strncmp(arg, "p/", 2)) { ret = playlist_open(arg + 2); + if (ret < 0 && errmsg) + *errmsg = make_message( "could not open %s", + arg); mode = PLAY_MODE_PLAYLIST; } else if (!strncmp(arg, "m/", 2)) { - ret = change_current_mood(arg + 2); + ret = change_current_mood(arg + 2, errmsg); mode = PLAY_MODE_MOOD; - } else + } else { + if (errmsg) + *errmsg = make_message("%s: parse error", arg); return -ERRNO_TO_PARA_ERROR(EINVAL); + } if (ret < 0) return ret; } @@ -564,6 +571,7 @@ static int com_select_callback(struct afs_callback_arg *aca) const struct lls_command *cmd = SERVER_CMD_CMD_PTR(SELECT); const char *arg; int num_admissible, ret; + char *errmsg; ret = lls_deserialize_parse_result(aca->query.data, cmd, &aca->lpr); assert(ret >= 0); @@ -577,22 +585,27 @@ static int com_select_callback(struct afs_callback_arg *aca) close_current_mood(); else playlist_close(); - ret = activate_mood_or_playlist(arg, &num_admissible); + ret = activate_mood_or_playlist(arg, &num_admissible, &errmsg); if (ret >= 0) goto out; /* ignore subsequent errors (but log them) */ + para_printf(&aca->pbout, "%s\n", errmsg); + free(errmsg); para_printf(&aca->pbout, "could not activate %s\n", arg); if (current_mop && strcmp(current_mop, arg) != 0) { int ret2; para_printf(&aca->pbout, "switching back to %s\n", current_mop); - ret2 = activate_mood_or_playlist(current_mop, &num_admissible); + ret2 = activate_mood_or_playlist(current_mop, &num_admissible, + &errmsg); if (ret2 >= 0) goto out; + para_printf(&aca->pbout, "%s\n", errmsg); + free(errmsg); para_printf(&aca->pbout, "could not reactivate %s: %s\n", current_mop, para_strerror(-ret2)); } para_printf(&aca->pbout, "activating dummy mood\n"); - activate_mood_or_playlist(NULL, &num_admissible); + activate_mood_or_playlist(NULL, &num_admissible, NULL); out: para_printf(&aca->pbout, "activated %s (%d admissible files)\n", current_mop? current_mop : "dummy mood", num_admissible); @@ -617,12 +630,12 @@ EXPORT_SERVER_CMD_HANDLER(select); static void init_admissible_files(const char *arg) { - int ret = activate_mood_or_playlist(arg, NULL); + int ret = activate_mood_or_playlist(arg, NULL, NULL); if (ret < 0) { assert(arg); PARA_WARNING_LOG("could not activate %s: %s\n", arg, para_strerror(-ret)); - activate_mood_or_playlist(NULL, NULL); /* always successful */ + activate_mood_or_playlist(NULL, NULL, NULL); } } diff --git a/configure.ac b/configure.ac index b6aac81b..bf55bd3c 100644 --- a/configure.ac +++ b/configure.ac @@ -50,6 +50,9 @@ AC_DEFUN([LIB_SUBST_FLAGS], [ AC_USE_SYSTEM_EXTENSIONS AC_C_BIGENDIAN() +AC_PATH_PROG([BISON], [bison]) +AC_PATH_PROG([FLEX], [flex]) + AC_PATH_PROG([M4], [m4]) test -z "$M4" && AC_MSG_ERROR( [The m4 macro processor is required to build this package]) @@ -169,6 +172,17 @@ AC_CHECK_TYPE([struct ucred], [ #include #include ]) +################################################################### FNM_EXTMATCH +AC_MSG_CHECKING(for extended wildcard pattern matching) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + #include +]], [[ + unsigned n = FNM_EXTMATCH; +]])], [have_fnm_extmatch=yes], [have_fnm_extmatch=no]) +AC_MSG_RESULT($have_fnm_extmatch) +if test $have_fnm_extmatch = yes; then + AC_DEFINE(HAVE_FNM_EXTMATCH, 1, define to 1 if FNM_EXTMATCH is defined) +fi ########################################################################### curses STASH_FLAGS LIB_ARG_WITH([curses], []) @@ -334,7 +348,8 @@ AC_CHECK_LIB([samplerate], [src_process], [], HAVE_SAMPLERATE=no) LIB_SUBST_FLAGS(samplerate) UNSTASH_FLAGS ######################################################################### server -if test -n "$CRYPTOLIB" && test $HAVE_OSL = yes; then +if test -n "$CRYPTOLIB" && test $HAVE_OSL = yes && test -n "$BISON" && \ + test -n "$FLEX"; then build_server="yes" executables="$executables server" server_errlist_objs=" @@ -361,6 +376,7 @@ if test -n "$CRYPTOLIB" && test $HAVE_OSL = yes; then afs aft mood + mp score attribute blob diff --git a/error.h b/error.h index e792b058..7afd772b 100644 --- a/error.h +++ b/error.h @@ -137,6 +137,7 @@ PARA_ERROR(MAX_CLIENTS, "maximal number of clients exceeded"), \ PARA_ERROR(MISSING_COLON, "syntax error: missing colon"), \ PARA_ERROR(MOOD_SYNTAX, "mood syntax error"), \ + PARA_ERROR(MOOD_PARSE, "mood parse error"), \ PARA_ERROR(MP3DEC_CORRUPT, "too many corrupt frames"), \ PARA_ERROR(MP3DEC_EOF, "mp3dec: end of file"), \ PARA_ERROR(MP3_INFO, "could not read mp3 info"), \ diff --git a/mood.c b/mood.c index 40228be5..584c46b8 100644 --- a/mood.c +++ b/mood.c @@ -19,6 +19,16 @@ #include "mm.h" #include "mood.h" +/* + * Mood parser API. It's overkill to have an own header file for + * these declarations as they are only needed in this .c file. + */ +struct mp_context; +int mp_init(const char *definition, int nbytes, struct mp_context **result, + char **errmsg); +bool mp_eval_row(const struct osl_row *aft_row, struct mp_context *ctx); +void mp_shutdown(struct mp_context *ctx); + /** * Contains statistical data of the currently admissible audio files. * @@ -73,6 +83,8 @@ struct mood { struct list_head deny_list; /** The list of mood items of type \p score. */ struct list_head score_list; + /* Only used for version 2 moods. */ + struct mp_context *parser_context; }; /* @@ -150,6 +162,10 @@ static int row_is_admissible(const struct osl_row *aft_row, struct mood *m, if (!m) return -E_NO_MOOD; + if (m->parser_context) { + *scorep = 0; + return mp_eval_row(aft_row, m->parser_context); + } ret = get_afsi_of_row(aft_row, &afsi); if (ret < 0) return ret; @@ -214,6 +230,7 @@ static void destroy_mood(struct mood *m) list_for_each_entry_safe(item, tmp, &m->score_list, mood_item_node) cleanup_list_entry(item); free(m->name); + mp_shutdown(m->parser_context); free(m); } @@ -367,7 +384,8 @@ out: return ret; } -static int load_mood(const struct osl_row *mood_row, struct mood **m) +static int load_mood(const struct osl_row *mood_row, struct mood **m, + char **errmsg) { char *mood_name; struct osl_object mood_def; @@ -383,15 +401,21 @@ static int load_mood(const struct osl_row *mood_row, struct mood **m) mlpd.m = alloc_new_mood(mood_name); ret = for_each_line(FELF_READ_ONLY, mood_def.data, mood_def.size, parse_mood_line, &mlpd); - osl_close_disk_object(&mood_def); if (ret < 0) { - PARA_ERROR_LOG("unable to load mood %s: %s\n", mlpd.m->name, - para_strerror(-ret)); - destroy_mood(mlpd.m); - return ret; + PARA_INFO_LOG("opening version 2 mood %s\n", mlpd.m->name); + ret = mp_init(mood_def.data, mood_def.size, &mlpd.m->parser_context, + errmsg); + if (ret < 0) + destroy_mood(mlpd.m); + } else { + PARA_WARNING_LOG("loaded version 1 mood %s\n", mlpd.m->name); + PARA_WARNING_LOG("please convert to version 2\n"); + ret = 1; } - *m = mlpd.m; - return 1; + osl_close_disk_object(&mood_def); + if (ret >= 0) + *m = mlpd.m; + return ret; } static int check_mood(struct osl_row *mood_row, void *data) @@ -409,12 +433,24 @@ static int check_mood(struct osl_row *mood_row, void *data) } if (!*mood_name) /* ignore dummy row */ goto out; - para_printf(pb, "checking mood %s...\n", mood_name); ret = for_each_line(FELF_READ_ONLY, mood_def.data, mood_def.size, parse_mood_line, &mlpd); - if (ret < 0) - para_printf(pb, "mood %s: error in line %u: %s\n", mood_name, - mlpd.line_num, para_strerror(-ret)); + if (ret < 0) { + char *errmsg; + struct mood *m = alloc_new_mood("check"); + ret = mp_init(mood_def.data, mood_def.size, &m->parser_context, + &errmsg); + if (ret < 0) { + para_printf(pb, "%s: %s\n", mood_name, errmsg); + free(errmsg); + para_printf(pb, "%s\n", para_strerror(-ret)); + } else + destroy_mood(m); + } else { + para_printf(pb, "%s: v1 mood, please convert to v2\n", + mood_name); + + } ret = 1; /* don't fail the loop on invalid mood definitions */ out: osl_close_disk_object(&mood_def); @@ -784,18 +820,22 @@ void close_current_mood(void) * Change the current mood. * * \param mood_name The name of the mood to open. + * \param errmsg Error description is returned here. * * If \a mood_name is \a NULL, load the dummy mood that accepts every audio file * and uses a scoring method based only on the \a last_played information. * + * The errmsg pointer may be NULL, in which case no error message will be + * returned. If a non-NULL pointer is given, the caller must free *errmsg. + * * If there is already an open mood, it will be closed first. * * \return Positive on success, negative on errors. Loading the dummy mood * always succeeds. * - * \sa struct \ref afs_info::last_played. + * \sa struct \ref afs_info::last_played, \ref mp_eval_row(). */ -int change_current_mood(const char *mood_name) +int change_current_mood(const char *mood_name, char **errmsg) { int i, ret; struct admissible_array aa = { @@ -815,7 +855,7 @@ int change_current_mood(const char *mood_name) PARA_NOTICE_LOG("no such mood: %s\n", mood_name); return ret; } - ret = load_mood(row, &m); + ret = load_mood(row, &m, errmsg); if (ret < 0) return ret; close_current_mood(); @@ -866,7 +906,7 @@ static int reload_current_mood(void) if (current_mood->name) mood_name = para_strdup(current_mood->name); close_current_mood(); - ret = change_current_mood(mood_name); + ret = change_current_mood(mood_name, NULL); free(mood_name); return ret; } diff --git a/mood.h b/mood.h index f7055753..87050142 100644 --- a/mood.h +++ b/mood.h @@ -6,6 +6,6 @@ /** \file mood.h Public functions of mood.c. */ -int change_current_mood(const char *mood_name); +int change_current_mood(const char *mood_name, char **errmsg); void close_current_mood(void); int mood_check_callback(struct afs_callback_arg *aca); diff --git a/mp.c b/mp.c new file mode 100644 index 00000000..12fe336e --- /dev/null +++ b/mp.c @@ -0,0 +1,572 @@ +/* + * Copyright (C) 2017 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** + * \file mp.c Mood parser helper functions. + * + * This file contains the public and the private API of the flex/bison based + * mood parser. + * + * The public API (at the bottom of the file) allows to parse the same mood + * definition many times in an efficient manner. + * + * The first function to all is \ref mp_init(), which analyzes the given mood + * definition syntactically. It returns the abstract syntax tree of the mood + * definition and pre-compiles all regular expression patterns to make later + * pattern matching efficient. + * + * Semantic analysis is performed in \ref mp_eval_row(). This function is + * called from \ref mood.c once for each file in the audio file table. It + * utilizes the abstract syntax tree and the pre-compiled regular expressions + * to determine the set of admissible audio files. + * + * If the mood is no longer needed, \ref mp_shutdown() should be called to free + * the resources. + * + * The internal API is described in \ref mp.h. + */ + +#include "para.h" + +#include +#include +#include +#include + +#include "string.h" +#include "error.h" +#include "afh.h" +#include "afs.h" +#include "mp.h" +#include "mp.bison.h" + +struct mp_context { + /* global context */ + char *errmsg; + struct mp_ast_node *ast; + /* per audio file context */ + const struct osl_row *aft_row; + char *path; + bool have_afsi; + struct afs_info afsi; + bool have_afhi; + struct afh_info afhi; +}; + +/** + * Parse a (generalized) string literal. + * + * \param src The string to parse. + * \param quote_chars Opening and closing quote characters. + * \param result The corresponding C string is returned here. + * + * This function turns a generalized C99 string literal like "xyz\n" into a C + * string (containing the three characters 'x', 'y' and 'z', followed by a + * newline character and the terminating zero byte). The function allows to + * specify different quote characters so that, for example, regular expression + * patterns enclosed in '/' can be parsed as well. To parse a proper string + * literal, one has to pass two double quotes as the second argument. + * + * The function strips off the opening and leading quote characters, replaces + * double backslashes by single backslashes and handles the usual escapes like + * \n and \". + * + * The caller must make sure that the input is well-formed. The function simply + * aborts if the input is not a valid C99 string literal (modulo the quote + * characters). + * + * \return Offset of the first character after the closing quote. For proper + * string literals this will be the terminating zero byte of the input string, + * for regular expression patterns it is the beginning of the flags which + * modify the matching behaviour. + * + * \sa \ref mp_parse_regex_pattern(), \ref mp_parse_wildcard_pattern(). + */ +unsigned parse_quoted_string(const char *src, const char quote_chars[2], + char **result) +{ + size_t n, len = strlen(src); + char *dst, *p; + bool backslash; + + assert(len >= 2); + assert(src[0] == quote_chars[0]); + p = dst = para_malloc(len - 1); + backslash = false; + for (n = 1;; n++) { + char c; + assert(n < len); + c = src[n]; + if (!backslash) { + if (c == '\\') { + backslash = true; + continue; + } + if (c == quote_chars[1]) + break; + *p++ = c; + continue; + } + if (c == quote_chars[1]) + *p++ = quote_chars[1]; + else switch (c) { + case '\\': *p++ = '\\'; break; + case 'a': *p++ = '\a'; break; + case 'b': *p++ = '\b'; break; + case 'f': *p++ = '\f'; break; + case 'n': *p++ = '\n'; break; + case 'r': *p++ = '\r'; break; + case 't': *p++ = '\t'; break; + case 'v': *p++ = '\v'; break; + default: assert(false); + } + backslash = false; + } + assert(src[n] == quote_chars[1]); + *p = '\0'; + *result = dst; + return n + 1; +} + +/** + * Parse and compile an extended regular expression pattern, including flags. + * + * \param src The pattern to parse. + * \param result C-string and flags are returned here. + * + * A regex pattern is identical to a C99 string literal except (a) it is + * enclosed in '/' characters rather than double quotes, (b) double quote + * characters which are part of the pattern do not need to be quoted with + * backslashes, but slashes must be quoted in this way, and (c) the closing + * slash may be followed by one or more flag characters which modify the + * matching behaviour. + * + * The only flags which are currently supported are 'i' to ignore case in match + * (REG_ICASE) and 'n' to change the handling of newline characters + * (REG_NEWLINE). + * + * \return Standard. This function calls \ref parse_quoted_string(), hence it + * aborts if the input string is malformed. However, errors from \ref + * para_regcomp are returned without aborting the process. The rationale behind + * this difference is that passing a malformed string must be considered an + * implementation bug because malformed strings should be rejected earlier by + * the lexer. + * + * \sa \ref mp_parse_wildcard_pattern(), \ref parse_quoted_string(), + * \ref para_regcomp(), regex(3). + */ +int mp_parse_regex_pattern(const char *src, struct mp_re_pattern *result) +{ + int ret; + char *pat; + unsigned n = parse_quoted_string(src, "//", &pat); + + result->flags = 0; + for (; src[n]; n++) { + switch (src[n]) { + case 'i': result->flags |= REG_ICASE; break; + case 'n': result->flags |= REG_NEWLINE; break; + default: assert(false); + } + } + ret = para_regcomp(&result->preg, pat, result->flags); + free(pat); + return ret; +} + +/** + * Parse a wildcard pattern, including flags. + * + * \param src The pattern to parse. + * \param result C-string and flags are returned here. + * + * This function parses a shell wildcard pattern. It is similar to \ref + * mp_parse_regex_pattern(), so the remarks mentioned there apply to this + * function as well. + * + * Wildcard patterns differ from regular expression patterns in that (a) they + * must be enclosed in '|' characters, (b) they support different flags for + * modifying matching behaviour, and (c) there is no cache for them. + * + * The following flags, whose meaning is explained in fnmatch(3), are currently + * supported: 'n' (FNM_NOESCAPE), 'p' (FNM_PATHNAME), 'P' (FNM_PERIOD), 'l' + * (FNM_LEADING_DIR), 'i' (FNM_CASEFOLD), 'e' (FNM_EXTMATCH). The last flag is + * a GNU extension. It is silently ignored on non GNU systems. + * + * \sa \ref parse_quoted_string(), \ref mp_parse_regex_pattern(), fnmatch(3). + */ +void mp_parse_wildcard_pattern(const char *src, struct mp_wc_pattern *result) +{ + unsigned n = parse_quoted_string(src, "||", &result->pat); + + result->flags = 0; + for (; src[n]; n++) { + switch (src[n]) { + case 'n': result->flags |= FNM_NOESCAPE; break; + case 'p': result->flags |= FNM_PATHNAME; break; + case 'P': result->flags |= FNM_PERIOD; break; + /* not POSIX, but both FreeBSD and NetBSD have it */ + case 'l': result->flags |= FNM_LEADING_DIR; break; + case 'i': result->flags |= FNM_CASEFOLD; break; + /* GNU only */ +#ifdef HAVE_FNM_EXTMATCH + case 'e': result->flags |= FNM_EXTMATCH; break; +#else /* silently ignore extglob flag */ + case 'e': break; +#endif + default: assert(false); + } + } +} + +/** + * Set the error bit in the parser context and log a message. + * + * \param line The number of the input line which caused the error. + * \param ctx Contains the error bit. + * \param fmt Usual format string. + * + * This is called if the lexer or the parser detect an error in the mood + * definition. Only the first error is logged (with a severity of "warn"). + */ +__printf_3_4 void mp_parse_error(int line, struct mp_context *ctx, + const char *fmt, ...) +{ + va_list ap; + char *tmp; + + if (ctx->errmsg) /* we already printed an error message */ + return; + va_start(ap, fmt); + xvasprintf(&tmp, fmt, ap); + va_end(ap); + xasprintf(&ctx->errmsg, "line %d: %s", line, tmp); + free(tmp); + PARA_WARNING_LOG("%s\n", ctx->errmsg); +} + +static int get_afsi(struct mp_context *ctx) +{ + int ret; + + if (ctx->have_afsi) + return 0; + ret = get_afsi_of_row(ctx->aft_row, &ctx->afsi); + if (ret < 0) + return ret; + ctx->have_afsi = true; + return 1; +} + +static int get_afhi(struct mp_context *ctx) +{ + int ret; + + if (ctx->have_afhi) + return 0; + ret = get_afhi_of_row(ctx->aft_row, &ctx->afhi); + if (ret < 0) + return ret; + ctx->have_afhi = true; + return 1; +} + +/** + * Return the full path to the audio file. + * + * \param ctx Contains a reference to the row of the audio file table which + * corresponds to the current audio file. The path of the audio file, the + * afs_info and the afh_info structures (which contain the tag information) can + * be retrieved through this reference. + * + * \return A reference to the path. Must not be freed by the caller. + * + * \sa \ref get_audio_file_path_of_row(). + */ +char *mp_path(struct mp_context *ctx) +{ + if (!ctx->path) + get_audio_file_path_of_row(ctx->aft_row, &ctx->path); + return ctx->path; +} + +/** + * Check whether the given attribute is set for the current audio file. + * + * \param attr The string to look up in the attribute table. + * \param ctx See \ref mp_path(). + * + * First, determine the bit number which corresponds to the attribute, then + * check if this bit is set in the ->attributes field of the afs_info structure + * of the audio file. + * + * \return True if the attribute is set, false if it is not. On errors, for + * example if the given string is no attribute, the function returns false. + * + * \sa \ref get_attribute_bitnum_by_name(). + */ +bool mp_is_set(const char *attr, struct mp_context *ctx) +{ + int ret; + unsigned char bitnum; + const uint64_t one = 1; + + ret = get_attribute_bitnum_by_name(attr, &bitnum); + if (ret < 0) /* treat invalid attributes as not set */ + return false; + ret = get_afsi(ctx); + if (ret < 0) + return false; + return (one << bitnum) & ctx->afsi.attributes; +} + +/** + * Count the number of attributes set. + * + * \param ctx See \ref mp_path(). + * + * \return The number of bits which are set in the ->attributes field of the + * afs_info structure of the current audio file. + */ +int64_t mp_num_attributes_set(struct mp_context *ctx) +{ + const uint64_t m = ~(uint64_t)0; + int ret; + uint64_t v; + + ret = get_afsi(ctx); + if (ret < 0) + return 0; + + v = ctx->afsi.attributes; + /* taken from https://graphics.stanford.edu/~seander/bithacks.html */ + v = v - ((v >> 1) & m / 3); + v = (v & m / 15 * 3) + ((v >> 2) & m / 15 * 3); + v = (v + (v >> 4)) & m / 255 * 15; + v = (v * (m / 255)) >> 56; + assert(v <= 64); + return v; +} + +/** + * Define a function which returns a field of the afs_info structure. + * + * \param _name The name of the field. + * + * The defined function casts the value to int64_t. On errors, zero is returned. + */ +#define MP_AFSI(_name) \ + int64_t mp_ ## _name(struct mp_context *ctx) \ + { \ + int ret = get_afsi(ctx); \ + if (ret < 0) \ + return 0; \ + return ctx->afsi._name; \ + } +/** \cond MP_AFSI */ +MP_AFSI(num_played) +MP_AFSI(image_id) +MP_AFSI(lyrics_id) +/** \endcond */ + +/** + * Define a function which returns a field of the afh_info structure. + * + * \param _name The name of the field. + * + * The defined function casts the value to int64_t. On errors, zero is returned. + */ +#define MP_AFHI(_name) \ + int64_t mp_ ## _name(struct mp_context *ctx) \ + { \ + int ret = get_afhi(ctx); \ + if (ret < 0) \ + return 0; \ + return ctx->afhi._name; \ + } +/** \cond MP_AFHI */ +MP_AFHI(bitrate) +MP_AFHI(frequency) +MP_AFHI(channels) +/** \endcond */ + +/** + * Define a function which extracts and returns the value of a meta tag. + * + * \param _name The name of the tag (artist, title, ...). + * + * The function will return a pointer to memory owned by the audio file + * selector. On errors, or if the current audio file has no tag of the given + * name, the function returns the empty string. The caller must not attempt to + * free the returned string. + */ +#define MP_TAG(_name) \ + char *mp_ ## _name (struct mp_context *ctx) \ + { \ + int ret = get_afhi(ctx); \ + if (ret < 0) \ + return ""; \ + return ctx->afhi.tags._name; \ + } +/** \cond MP_TAG */ +MP_TAG(artist) +MP_TAG(title) +MP_TAG(album) +MP_TAG(comment) +/** \endcond */ + +/** + * Parse and return the value of the year tag. + * + * \param ctx See \ref mp_path(). + * + * \return If the year tag is not present, can not be parsed, or its value is + * less than zero, the function returns 0. If the value is less than 100, we + * add 1900. + */ +int64_t mp_year(struct mp_context *ctx) +{ + int64_t year; + int ret = get_afhi(ctx); + + if (ret < 0) + return 0; + assert(ctx->afhi.tags.year); + ret = para_atoi64(ctx->afhi.tags.year, &year); + if (ret < 0) + return 0; + if (year < 0) + return 0; + if (year < 100) + year += 1900; + return year; +} + +/* + * Ideally, these functions should be declared in a header file which is + * created by flex with the --header-file option. However, for flex-2.6.x + * (2017) this option is borken: if --reentrant is also given, the generated + * header file contains syntax errors. As a workaround we declare the functions + * here. + */ +/** \cond flex_workaround */ +int mp_yylex_init(mp_yyscan_t *yyscanner); +struct yy_buffer_state *mp_yy_scan_bytes(const char *buf, int len, + mp_yyscan_t yyscanner); +void mp_yy_delete_buffer(struct yy_buffer_state *bs, mp_yyscan_t yyscanner); +int mp_yylex_destroy(mp_yyscan_t yyscanner); +void mp_yyset_lineno(int lineno, mp_yyscan_t scanner); +/** \endcond */ + +/* Public API */ + +/** + * Initialize the mood parser. + * + * This allocates and sets up the internal structures of the mood parser + * and creates an abstract syntax tree from the given mood definition. + * It must be called before \ref mp_eval_row() can be called. + * + * The context pointer returned by this function may be passed to \ref + * mp_eval_row() to determine whether an audio file is admissible. + * + * \param definition A reference to the mood definition. + * \param nbytes The size of the mood definition. + * \param result Opaque context pointer is returned here. + * \param errmsg Optional error message is returned here. + * + * It's OK to pass a NULL pointer or a zero sized buffer as the mood + * definition. This corresponds to the "dummy" mood for which all audio files + * are admissible. + * + * The error message pointer may also be NULL in which case no error message + * is returned. Otherwise, the caller must free the returned string. + * + * \return Standard. On success *errmsg is set to NULL. + */ +int mp_init(const char *definition, int nbytes, struct mp_context **result, + char **errmsg) +{ + int ret; + mp_yyscan_t scanner; + struct mp_context *ctx; + struct yy_buffer_state *buffer_state; + + if (!definition || nbytes == 0) { /* dummy mood */ + if (errmsg) + *errmsg = NULL; + *result = NULL; + return 0; + } + ctx = para_calloc(sizeof(*ctx)); + ctx->errmsg = NULL; + ctx->ast = NULL; + + ret = mp_yylex_init(&scanner); + assert(ret == 0); + buffer_state = mp_yy_scan_bytes(definition, nbytes, scanner); + mp_yyset_lineno(1, scanner); + PARA_NOTICE_LOG("creating abstract syntax tree\n"); + ret = mp_yyparse(ctx, &ctx->ast, scanner); + mp_yy_delete_buffer(buffer_state, scanner); + mp_yylex_destroy(scanner); + if (ctx->errmsg) { /* parse error */ + if (errmsg) + *errmsg = ctx->errmsg; + else + free(ctx->errmsg); + free(ctx); + return -E_MOOD_PARSE; + } + if (errmsg) + *errmsg = NULL; + *result = ctx; + return 1; +} + +/** + * Determine whether the given audio file is admissible. + * + * \param aft_row The audio file to check for admissibility. + * \param ctx As returned from \ref mp_init(). + * + * \return Whether the audio file is admissible. + * + * If the mood parser was set up without an input buffer (dummy mood), this + * function returns true (without looking at the audio file metadata) to + * indicate that the given audio file should be considered admissible. + * + * \sa \ref change_current_mood(), \ref mp_eval_ast(). + */ +bool mp_eval_row(const struct osl_row *aft_row, struct mp_context *ctx) +{ + if (!ctx) /* dummy mood */ + return true; + assert(aft_row); + ctx->aft_row = aft_row; + ctx->have_afsi = false; + ctx->have_afhi = false; + ctx->path = NULL; + return mp_eval_ast(ctx->ast, ctx); +} + +/** + * Deallocate the resources of a mood parser. + * + * This function frees the abstract syntax tree which was created by \ref + * mp_init(). + * + * \param ctx As returned from \ref mp_init(). + * + * It's OK to pass a NULL pointer, in which case the function does nothing. + */ +void mp_shutdown(struct mp_context *ctx) +{ + if (!ctx) + return; + mp_free_ast(ctx->ast); + free(ctx); +} diff --git a/mp.h b/mp.h new file mode 100644 index 00000000..93bbab3e --- /dev/null +++ b/mp.h @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2017 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/** + * \file mp.h Internal mood parser API (backend). + * + * This header is included from the lexer, the parser, and from \ref mp.c, but + * not from \ref mood.c, the only user of the mood parser front end. It + * contains structures and function prototypes which are considered + * implementation details. + * + * There is one function for each keyword in the context-free grammar of the + * parser. These functions return the semantic value of the keyword. + * + * The functions declared here are defined either in mp.c or in mp.y. + */ + +/** Opaque, only known to mp.c. Passed to the generated mp_yyparse(). */ +struct mp_context; + +/** + * Since we use a reentrant lexer, all functions generated by flex(1) + * receive an additional argument of this type. + */ +typedef void *mp_yyscan_t; + +/** Parsed regex pattern. */ +struct mp_re_pattern { + regex_t preg; /**< Pre-compiled regex. **/ + unsigned flags; /**< Subset of the cflags described in regex(3). */ +}; + +/** Parsed wildcard pattern. */ +struct mp_wc_pattern { + char *pat; /**< Unescaped C string (without quotes and flags). */ + unsigned flags; /**< For modifying matching behaviour. */ +}; + +/** + * The possible values of a node in the abstract syntax tree (AST). + * + * Constant semantic values (string literals, numeric constants, wildcard and + * regex patterns which are part of the mood definition) are determined during + * \ref mp_init() while values which depend on the audio file (path, bitrate, + * etc.) are determined during mp_eval_row(). + * + * This union, and the \ref mp_ast_node structure below are used extensively in + * mp.y. However, both need to be public because the lexer must be able to + * create AST nodes for the constant semantic values. + */ +union mp_semantic_value { + bool boolval; /**< Comparators, =~ and =|. */ + char *strval; /**< String literals, tags, path. */ + int64_t intval; /**< Constants, bitrate, frequency, etc. */ + struct mp_wc_pattern wc_pattern; /**< Right-hand side operand of =|. */ + struct mp_re_pattern re_pattern; /**< Right-hand side operand of =~. */ +}; + +/** + * Describes one node of the abstract syntax tree. + * + * A node is either interior or a leaf node. Interior nodes have at least one + * child while leaf nodes have a semantic value and no children. + * + * Examples: (a) STRING_LITERAL has a semantic value (the unescaped string + * literal) and no children, (b) NEG (unary minus) has no semantic value but + * one child (the numeric expression that is to be negated), (c) LESS_OR_EQUAL + * has no semantic value and two children (the two numeric expressions being + * compared). + */ +struct mp_ast_node { + /** Corresponds to a token type, for example LESS_OR_EQUAL. */ + int id; + union { + /** Pointers to the child nodes (interior nodes only). */ + struct mp_ast_node **children; + /** Leaf nodes only. */ + union mp_semantic_value sv; + }; + /** + * The number of children is implicitly given by the id, but we include + * it here to avoid having to maintain a lookup table. The AST is + * usually small, so we can afford to waste a byte per node. + */ + uint8_t num_children; +}; + +/* Called from both the lexer and the parser. */ +__printf_3_4 void mp_parse_error(int line, struct mp_context *ctx, + const char *fmt, ...); + +/* Helper functions for the lexer. */ +unsigned parse_quoted_string(const char *src, const char quote_chars[2], + char **result); +int mp_parse_regex_pattern(const char *src, struct mp_re_pattern *result); +void mp_parse_wildcard_pattern(const char *src, struct mp_wc_pattern *result); + +/* + * The functions below are implemented in mp.y. They are documented here + * because mp.y is not doxyfied. + */ + +/** + * Allocate a new leaf node for the abstract syntax tree. + * + * \param id Initial value for the ->id field of the new node + * + * \return Pointer to a node whose ->num_children field is initialized to zero. + * The caller is expected to initialize the ->sv field. + */ +struct mp_ast_node *mp_new_ast_leaf_node(int id); + +/** + * Evaluate an abstract syntax tree, starting at the root node. + * + * \param root As returned from \ref mp_init() via the context pointer. + * \param ctx Contains the aft row to evaluate. + * + * \return True if the AST evaluates to true, a non-empty string, or a + * non-zero number. False otherwise. + * + * \sa mp_eval_row(). + */ +bool mp_eval_ast(struct mp_ast_node *root, struct mp_context *ctx); + +/** + * Deallocate an abstract syntax tree. + * + * This frees the memory occupied by the nodes of the AST, the child pointers + * of the internal nodes and the (constant) semantic values of the leaf nodes + * (string literals, unescaped wildcard patterns and pre-compiled regular + * expressions). + * + * \param root It's OK to pass NULL here. + */ +void mp_free_ast(struct mp_ast_node *root); + +/* Helper functions for the parser. */ +bool mp_is_set(const char *attr, struct mp_context *ctx); +char *mp_path(struct mp_context *ctx); +int64_t mp_year(struct mp_context *ctx); +int64_t mp_num_attributes_set(struct mp_context *ctx); + +/* Generated with MP_AFSI() */ +/** \cond MP_AFSI */ +int64_t mp_num_played(struct mp_context *ctx); +int64_t mp_image_id(struct mp_context *ctx); +int64_t mp_lyrics_id(struct mp_context *ctx); +/** \endcond */ + +/* Generated with MP_AFHI() */ +/** \cond MP_AFHI */ +int64_t mp_bitrate(struct mp_context *ctx); +int64_t mp_frequency(struct mp_context *ctx); +int64_t mp_channels(struct mp_context *ctx); +/** \endcond */ + +/* Generated with MP_TAG() */ +/** \cond MP_TAG */ +char *mp_artist(struct mp_context *ctx); +char *mp_title(struct mp_context *ctx); +char *mp_album(struct mp_context *ctx); +char *mp_comment(struct mp_context *ctx); +/** \endcond */ diff --git a/web/manual.md b/web/manual.md index 96d724c9..904ac180 100644 --- a/web/manual.md +++ b/web/manual.md @@ -338,6 +338,11 @@ libgcrypt are usually shipped with the distro, but you might have to install the development package (`libssl-dev` or `libgcrypt-dev` on debian systems) as well. +- [flex](https://github.com/westes/flex) and +[bison](https://www.gnu.org/software/bison) are needed to build the +mood parser of para_server. The build system will skip para_server +if these tools are not installed. + - [libmad](http://www.underbit.com/products/mad/). To compile in MP3 support for paraslash, the development package must be installed. It is called `libmad0-dev` on debian-based systems. Note that libmad is @@ -976,124 +981,140 @@ the score table (but not from the playlist).

Moods

-A mood consists of a unique name and its *mood definition*, which is -a set of *mood lines* containing expressions in terms of attributes -and other data contained in the database. - -At any time at most one mood can be *active* which means that -para_server is going to select only files from that subset of -admissible files. - -So in order to create a mood definition one has to write a set of -mood lines. Mood lines come in three flavours: Accept lines, deny -lines and score lines. - -The general syntax of the three types of mood lines is - - - accept [with score ] [if] [not] [options] - deny [with score ] [if] [not] [options] - score [if] [not] [options] - - -Here is either an integer or the string "random" which assigns -a random score to all matching files. The score value changes the -order in which admissible files are going to be selected, but is of -minor importance for this introduction. - -So we concentrate on the first two forms, i.e. accept and deny -lines. As usual, everything in square brackets is optional, i.e. -accept/deny lines take the following form when ignoring scores: - - accept [if] [not] [options] - -and analogously for the deny case. The "if" keyword is only syntactic -sugar and has no function. The "not" keyword just inverts the result, -so the essence of a mood line is the mood method part and the options -following thereafter. - -A *mood method* is realized as a function which takes an audio file -and computes a number from the data contained in the database. -If this number is non-negative, we say the file *matches* the mood -method. The file matches the full mood line if it either - - - matches the mood method and the "not" keyword is not given, -or - - does not match the mood method, but the "not" keyword is given. - -The set of admissible files for the whole mood is now defined as those -files which match at least one accept mood line, but no deny mood line. -More formally, an audio file F is admissible if and only if - - (F ~ AL1 or F ~ AL2...) and not (F ~ DL1 or F ~ DN2 ...) - -where AL1, AL2... are the accept lines, DL1, DL2... are the deny -lines and "~" means "matches". - -The cases where no mood lines of accept/deny type are defined need -special treatment: - - - Neither accept nor deny lines: This treats all files as - admissible (in fact, that is the definition of the dummy mood - which is activated automatically if no moods are available). - - - Only accept lines: A file is admissible iff it matches at - least one accept line: - - F ~ AL1 or F ~ AL2 or ... - - - Only deny lines: A file is admissible iff it matches no - deny line: - - not (F ~ DL1 or F ~ DN2 ...) - - - -

List of mood_methods

- - no_attributes_set - -Takes no arguments and matches an audio file if and only if no -attributes are set. - - is_set - -Takes the name of an attribute and matches iff that attribute is set. - - path_matches - -Takes a filename pattern and matches iff the path of the audio file -matches the pattern. - - artist_matches - album_matches - title_matches - comment_matches - -Takes an extended regular expression and matches iff the text of the -corresponding tag of the audio file matches the pattern. If the tag -is not set, the empty string is matched against the pattern. - - year ~ - bitrate ~ - frequency ~ - channels ~ - num_played ~ - image_id ~ - lyrics_id ~ - -Takes a comparator ~ of the set {<, =, <=, >, >=, !=} and a number -. Matches an audio file iff the condition ~ is -satisfied where val is the corresponding value of the audio file -(value of the year tag, bitrate in kbit/s, etc.). - -The year tag is special as its value is undefined if the audio file -has no year tag or the content of the year tag is not a number. Such -audio files never match. Another difference is the special treatment -if the year tag is a two-digit number. In this case either 1900 or -2000 is added to the tag value, depending on whether the number is -greater than 2000 plus the current year. - +A mood consists of a unique name and a definition. The definition +is an expression which describes which audio files are considered +admissible. At any time at most one mood can be active, meaning +that para_server will only stream files which are admissible for the +active mood. + +The expression may refer to attributes and other metadata stored in +the database. Expressions may be combined by means of logical and +arithmetical operators in a natural way. Moreover, string matching +based on regular expression or wildcard patterns is supported. + +The set of admissible files is determined by applying the expression +to each audio file in turn. For a mood definition to be valid, its +expression must evaluate to a number, a string or a boolean value +("true" or "false"). For numbers, any value other than zero means the +file is admissible. For strings, any non-empty string indicates an +admissible file. For boolean values, true means admissible and false +means not admissible. As a special case, the empty expression treats +all files as admissible. + +

Mood grammar

+ +Expressions are based on a context-free grammar which distinguishes +between several types for syntactic units or groupings. The grammar +defines a set of keywords which have a type and a corresponding +semantic value, as shown in the following table. + +Keyword | Type | Semantic value +:--------------------|--------:|:---------------------------------- +`path` | string | Full path of the current audio file +`artist` | string | Content of the artist meta tag +`title` | string | Content of the title meta tag +`album` | string | Content of the album meta tag +`comment` | string | Content of the somment meta tag +`num_attributes_set` | integer | Number of attributes which are set +`year` | integer | Content of the year meta tag [\*] +`num_played` | integer | How many times the file has been streamed +`image_id` | integer | The identifier of the (cover art) image +`lyrics_id` | integer | The identifier of the lyrics blob +`bitrate` | integer | The average bitrate +`frequency` | integer | The output sample rate +`channels` | integer | The number of channels +`is_set("foo")` | boolean | True if attribute "foo" is set. + +[\*] For most audio formats, the year tag is stored as a string. It +is converted to an integer by the mood parser. If the audio file +has no year tag or the content of the year tag is not a number, the +semantic value is zero. A special convention applies if the year tag +is a one-digit or a two-digit number. In this case 1900 is added to +the tag value. + +Expressions may be grouped using parentheses, logical and +arithmetical operators or string matching operators. The following +table lists the available operators. + +Token | Meaning +:------|:------- +`\|\|` | Logical Or +`&&` | Logical And +`!` | Logical Not +`==` | Equal (can be applied to all types) +`!=` | Not equal. Likewise +`<` | Less than +`<=` | Less or equal +`>=` | Greater or equal +`+` | Arithmetical minus +`-` | Binary/unary minus +`*` | Multiplication +`/` | Division +`=~` | Regular expression match +`=\|` | Filename match + +Besides integers, strings and booleans there is an additional type +which describes regular expression or wildcard patterns. Patterns +are not just strings because they also include a list of flags which +modify matching behaviour. + +Regular expression patterns are of the form `/pattern/[flags]`. That +is, the pattern is delimited by slashes, and is followed by zero or +more characters, each specifying a flag according to the following +table + +Flag | POSIX name | Meaning +:----|--------------:|-------- +`i` | `REG_ICASE` | Ignore case in match +`n` | `REG_NEWLINE` | Treat newline as an ordinary character + +Note that only extended regular expression patterns are supported. See +regex(3) for details. + +Wildcard patterns are similar, but the pattern must be delimited by +`'|'` characters rather than slashes. For wildcard patterns different +flags exist, as shown below. + +Flag | POSIX name | Meaning +:----|-----------------------:|-------- +`n` | `FNM_NOESCAPE` | Treat backslash as an ordinary character +`p` | `FNM_PATHNAME` | Match a slash only with a slash in pattern +`P` | `FNM_PERIOD` | Leading period has to be matched exactly +`l` | `FNM_LEADING_DIR` [\*] | Ignore "/\*" rest after successful matching +`i` | `FNM_CASEFOLD` [\*] | Ignore case in match +`e` | `FNM_EXTMATCH` [\*\*] | Enable extended pattern matching + +[\*] Not in POSIX, but both FreeBSD and NetBSD have it. + +[\*\*] GNU extension, silently ignored on non GNU systems. + +See fnmatch(3) for details. + +Mood definitions may contain arbitrary whitespace and comments. +A comment is a word beginning with #. This word and all remaining +characters of the line are ignored. + +

Example moods

+ +* Files with no/invalid year tag: `year == 0` + +* Only oldies: `year != 0 && year < 1980` + +* Only 80's Rock or Metal: `(year >= 1980 && year < 1990) && + (is_set("rock") || is_set("metal"))` + +* Files with incomplete tags: `artist == "" || title == "" || album = +"" || comment == "" || year == 0` + +* Files with no attributes defined so far: `num_attributes_set == 0` + +* Only newly added files: `num_played == 0` + +* Only poor quality files: `bitrate < 96` + +* Cope with different spellings of Motörhead: `artist =~ /mot(ö|oe{0,1})rhead/i` + +* The same with extended wildcard patterns: `artist =| |mot+(o\|oe\|ö)rhead|ie`

Mood usage

@@ -1122,27 +1143,6 @@ if the "-a" switch is given: para ls -a - -

Example mood definition

- -Suppose you have defined attributes "punk" and "rock" and want to define -a mood containing only Punk-Rock songs. That is, an audio file should be -admissible if and only if both attributes are set. Since - - punk and rock - -is obviously the same as - - not (not punk or not rock) - -(de Morgan's rule), a mood definition that selects only Punk-Rock -songs is - - deny if not is_set punk - deny if not is_set rock - - - File renames and content changes -------------------------------- diff --git a/yy/makefile b/yy/makefile new file mode 100644 index 00000000..ed70d655 --- /dev/null +++ b/yy/makefile @@ -0,0 +1,17 @@ +.PRECIOUS: $(yy_build_dir)/%.flex.c $(yy_build_dir)/%.bison.c \ + $(yy_build_dir)/%.bison.h + +$(yy_build_dir)/%.flex.c: $(yy_src_dir)/%.lex | $(yy_build_dir) + @[ -z "$(Q)" ] || echo 'FLEX $<' + @$(FLEX) -o $@ $< + +$(yy_build_dir)/%.bison.c $(yy_build_dir)/%.bison.h: $(yy_src_dir)/%.y \ + | $(yy_build_dir) + @[ -z "$(Q)" ] || echo 'BISON $<' + @$(BISON) --defines=$(yy_build_dir)/$(notdir $(<:.y=.bison.h)) \ + --output=$(yy_build_dir)/$(notdir $(<:.y=.bison.c)) $< + +$(object_dir)/%.o: $(yy_build_dir)/%.c | $(object_dir) + @[ -z "$(Q)" ] || echo 'CC $<' + @$(Q) $(CC) -g -c -o $@ $(CPPFLAGS) -MMD -MF $(dep_dir)/$(*F).d \ + -MT $@ -iquote . -Wno-unused-macros $< diff --git a/yy/mp.lex b/yy/mp.lex new file mode 100644 index 00000000..1e06b8dc --- /dev/null +++ b/yy/mp.lex @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2017 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + + /* + * Since we do not supply yywrap(), we use noyywrap to instruct the scanner to + * behave as though yywrap() returned 1. + */ +%option noyywrap + + /* + * We don't want symbols to clash with those of other flex users, particularly + * lopsub. + */ +%option prefix="mp_yy" + + /* + * Generate a scanner that maintains the number of the current line read from + * its input in the yylineno variable. + */ +%option yylineno + + /* Generate a bison-compatible scanner. */ +%option bison-bridge bison-locations + + /* + * Warn (in particular) if the default rule can be matched but no default rule + * has been given. + */ +%option warn + + /* + * Generate a scanner which is portable and safe to use in one or more threads + * of control. + */ +%option reentrant + + /* + * Generate a scanner which always looks one extra character ahead. This is a + * bit faster than an interactive scanner for which look ahead happens only + * when necessary. + */ +%option never-interactive + +%{ +#include +#include "para.h" +#include "string.h" +#include "mp.h" +#include "error.h" + +#define YYSTYPE MP_YYSTYPE +#define YYLTYPE MP_YYLTYPE +#define YY_DECL int mp_yylex(MP_YYSTYPE *yylval_param, MP_YYLTYPE *yylloc_param, \ + struct mp_context *ctx, struct mp_ast_node **ast, mp_yyscan_t yyscanner) +#include "mp.bison.h" +#define MP_YY_USER_ACTION do {mp_yylloc->first_line = mp_yylineno;} while (0); +%} +DECIMAL_CONSTANT (0|([[:digit:]]{-}[0])[[:digit:]]*) +STRING_LITERAL \"([^\"\\\n]|(\\[\"\\abfnrtv]))*\" +REGEX_PATTERN \/([^\/\\\n]|(\\[\/\\abfnrtv]))*\/([in])* +WILDCARD_PATTERN \|([^\|\\\n]|(\\[\|\\abfnrtv]))*\|([npPlie])* +%% + +is_set {return IS_SET;} +num_attributes_set {return NUM_ATTRIBUTES_SET;} +path {return PATH;} +artist {return ARTIST;} +title {return TITLE;} +album {return ALBUM;} +comment {return COMMENT;} +year {return YEAR;} +num_played {return NUM_PLAYED;} +image_id {return IMAGE_ID;} +lyrics_id {return LYRICS_ID;} +bitrate {return BITRATE;} +frequency {return FREQUENCY;} +channels {return CHANNELS;} +true {return TRUE;} +false {return FALSE;} + +[[:space:]]+|#.*\n /* skip comments and whitespace */ + +"("|")"|","|"+"|"-"|"*"|"/"|"<"|">" {return yytext[0];} + +"||" {return OR;} +"&&" {return AND;} +"!" {return NOT;} +"==" {return EQUAL;} +"!=" {return NOT_EQUAL;} +"<=" {return LESS_OR_EQUAL;} +">=" {return GREATER_OR_EQUAL;} +"=~" {return REGEX_MATCH;} +"=|" {return FILENAME_MATCH;} + +{DECIMAL_CONSTANT} { + int ret; + yylval->node = mp_new_ast_leaf_node(NUM); + ret = para_atoi64(yytext, &yylval->node->sv.intval); + if (ret < 0) { + free(yylval->node); + mp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext, + para_strerror(-ret)); + return -E_MOOD_PARSE; + } + return NUM; +} + +{STRING_LITERAL} { + yylval->node = mp_new_ast_leaf_node(STRING_LITERAL); + parse_quoted_string(yytext, "\"\"", &yylval->node->sv.strval); + //PARA_CRIT_LOG("strval: %s\n", yylval->node->sv.strval); + //PARA_CRIT_LOG("node: %p\n", yylval->node); + return STRING_LITERAL; +} + +{REGEX_PATTERN} { + int ret; + yylval->node = mp_new_ast_leaf_node(REGEX_PATTERN); + ret = mp_parse_regex_pattern(yytext, &yylval->node->sv.re_pattern); + if (ret < 0) { + mp_parse_error(yylloc->first_line, ctx, "%s: %s", yytext, + para_strerror(-ret)); + return -E_MOOD_PARSE; + } + return REGEX_PATTERN; +} + +{WILDCARD_PATTERN} { + yylval->node = mp_new_ast_leaf_node(WILDCARD_PATTERN); + mp_parse_wildcard_pattern(yytext, &yylval->node->sv.wc_pattern); + return WILDCARD_PATTERN; +} + +. { + mp_parse_error(yylloc->first_line, ctx, "unrecognized text: %s", + yytext); + return -E_MOOD_PARSE; +} diff --git a/yy/mp.y b/yy/mp.y new file mode 100644 index 00000000..82ef5140 --- /dev/null +++ b/yy/mp.y @@ -0,0 +1,419 @@ +/* + * Copyright (C) 2017 Andre Noll + * + * Licensed under the GPL v2. For licencing details see COPYING. + */ + +/* + * Provide more verbose and specific error messages instead of just "syntax + * error". + */ +%define parse.error verbose + +/* + * Verbose error messages may contain incorrect information if LAC (Lookahead + * Correction) is not enabled. + */ +%define parse.lac full + +/* Avoid symbol clashes (lopsub might also expose yy* symbols). */ +%define api.prefix {mp_yy} + +/* + * Although locations are automatically enabled as soon as the grammar uses the + * special @N tokens, specifying %locations explicitly allows for more accurate + * syntax error messages. + */ +%locations + +/* + * Generate a pure (reentrant) parser. With this option enabled, yylval and + * yylloc become local variables in yyparse(), and a different calling + * convention is used for yylex(). + */ +%define api.pure full + +/* Additional arguments to yylex(), yyparse() and yyerror() */ +%param {struct mp_context *ctx} +%param {struct mp_ast_node **ast} +%param {mp_yyscan_t yyscanner} /* reentrant lexers */ + +%{ +#include +#include + +#include "para.h" +#include "string.h" +#include "mp.h" +#include "mp.bison.h" +#include "error.h" + +int yylex(MP_YYSTYPE *lvalp, MP_YYLTYPE *llocp, struct mp_context *ctx, + struct mp_ast_node **ast, mp_yyscan_t yyscanner); +static void yyerror(YYLTYPE *llocp, struct mp_context *ctx, + struct mp_ast_node **ast, mp_yyscan_t yyscanner, const char *msg); + +enum semantic_types { + ST_STRVAL, + ST_INTVAL, + ST_BOOLVAL, + ST_REGEX_PATTERN, + ST_WC_PATTERN +}; + +static struct mp_ast_node *ast_node_raw(int id) +{ + struct mp_ast_node *node = para_malloc(sizeof(struct mp_ast_node)); + node->id = id; + return node; +} + +/* This is non-static because it is also called from the lexer. */ +struct mp_ast_node *mp_new_ast_leaf_node(int id) +{ + struct mp_ast_node *node = ast_node_raw(id); + node->num_children = 0; + return node; +} + +static struct mp_ast_node *ast_node_new_unary(int id, struct mp_ast_node *child) +{ + struct mp_ast_node *node = ast_node_raw(id); + node->num_children = 1; + node->children = para_malloc(sizeof(struct mp_ast_node *)); + node->children[0] = child; + return node; +} + +static struct mp_ast_node *ast_node_new_binary(int id, struct mp_ast_node *left, + struct mp_ast_node *right) +{ + struct mp_ast_node *node = ast_node_raw(id); + node->num_children = 2; + node->children = para_malloc(2 * sizeof(struct mp_ast_node *)); + node->children[0] = left; + node->children[1] = right; + return node; +} + +void mp_free_ast(struct mp_ast_node *root) +{ + if (!root) + return; + if (root->num_children > 0) { + int i; + for (i = 0; i < root->num_children; i++) + mp_free_ast(root->children[i]); + free(root->children); + } else { + union mp_semantic_value *sv = &root->sv; + switch (root->id) { + case STRING_LITERAL: + free(sv->strval); + break; + case REGEX_PATTERN: + regfree(&sv->re_pattern.preg); + break; + case WILDCARD_PATTERN: + free(sv->wc_pattern.pat); + break; + } + } + free(root); +} + +static int eval_node(struct mp_ast_node *node, struct mp_context *ctx, + union mp_semantic_value *result); + +static void eval_binary_op(struct mp_ast_node *node, struct mp_context *ctx, + union mp_semantic_value *v1, union mp_semantic_value *v2) +{ + eval_node(node->children[0], ctx, v1); + eval_node(node->children[1], ctx, v2); +} + +static int eval_node(struct mp_ast_node *node, struct mp_context *ctx, + union mp_semantic_value *result) +{ + int ret; + char *arg; + union mp_semantic_value v1, v2; + + switch (node->id) { + /* strings */ + case STRING_LITERAL: + result->strval = node->sv.strval; + return ST_STRVAL; + case PATH: + result->strval = mp_path(ctx); + return ST_STRVAL; + case ARTIST: + result->strval = mp_artist(ctx); + return ST_STRVAL; + case TITLE: + result->strval = mp_title(ctx); + return ST_STRVAL; + case ALBUM: + result->strval = mp_album(ctx); + return ST_STRVAL; + case COMMENT: + result->strval = mp_comment(ctx); + return ST_STRVAL; + /* integers */ + case NUM: + result->intval = node->sv.intval; + return ST_INTVAL; + case '+': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval + v2.intval; + return ST_INTVAL; + case '-': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval - v2.intval; + return ST_INTVAL; + case '*': + eval_binary_op(node, ctx, &v1, &v2); + result->intval = v1.intval * v2.intval; + return ST_INTVAL; + case '/': + eval_binary_op(node, ctx, &v1, &v2); + if (v2.intval == 0) { + static bool warned; + if (!warned) + PARA_ERROR_LOG("division by zero\n"); + warned = true; + result->intval = 0; + } else + result->intval = v1.intval / v2.intval; + return ST_INTVAL; + case NEG: + eval_node(node->children[0], ctx, &v1); + result->intval = -v1.intval; + return ST_INTVAL; + case YEAR: + result->intval = mp_year(ctx); + return ST_INTVAL; + case NUM_ATTRIBUTES_SET: + result->intval = mp_num_attributes_set(ctx); + return ST_INTVAL; + case NUM_PLAYED: + result->intval = mp_num_played(ctx); + return ST_INTVAL; + case IMAGE_ID: + result->intval = mp_image_id(ctx); + return ST_INTVAL; + case LYRICS_ID: + result->intval = mp_lyrics_id(ctx); + return ST_INTVAL; + case BITRATE: + result->intval = mp_bitrate(ctx); + return ST_INTVAL; + case FREQUENCY: + result->intval = mp_frequency(ctx); + return ST_INTVAL; + case CHANNELS: + result->intval= mp_channels(ctx); + return ST_INTVAL; + /* bools */ + case IS_SET: + arg = node->children[0]->sv.strval; + result->boolval = mp_is_set(arg, ctx); + return ST_BOOLVAL; + case TRUE: + result->boolval = true; + return ST_BOOLVAL; + case FALSE: + result->boolval = false; + return ST_BOOLVAL; + case OR: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.boolval || v2.boolval; + return ST_BOOLVAL; + case AND: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.boolval && v2.boolval; + return ST_BOOLVAL; + case NOT: + eval_node(node->children[0], ctx, &v1); + result->boolval = !v1.boolval; + return ST_BOOLVAL; + case EQUAL: + ret = eval_node(node->children[0], ctx, &v1); + eval_node(node->children[1], ctx, &v2); + if (ret == ST_STRVAL) + result->boolval = !strcmp(v1.strval, v2.strval); + else + result->boolval = v1.intval == v2.intval; + return ST_BOOLVAL; + case NOT_EQUAL: + ret = eval_node(node->children[0], ctx, &v1); + eval_node(node->children[1], ctx, &v2); + if (ret == ST_STRVAL) + result->boolval = strcmp(v1.strval, v2.strval); + else + result->boolval = v1.intval != v2.intval; + return ST_BOOLVAL; + case '<': + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval < v2.intval; + return ST_BOOLVAL; + case '>': + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval > v2.intval; + return ST_BOOLVAL; + case LESS_OR_EQUAL: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval <= v2.intval; + return ST_BOOLVAL; + case GREATER_OR_EQUAL: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = v1.intval >= v2.intval; + return ST_BOOLVAL; + case FILENAME_MATCH: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = fnmatch(v2.wc_pattern.pat, v1.strval, + v2.wc_pattern.flags) == 0; + return ST_BOOLVAL; + case REGEX_MATCH: + eval_binary_op(node, ctx, &v1, &v2); + result->boolval = regexec(&v2.re_pattern.preg, v1.strval, + 0, NULL, 0) == 0; + return ST_BOOLVAL; + case REGEX_PATTERN: + result->re_pattern = node->sv.re_pattern; + return ST_REGEX_PATTERN; + case WILDCARD_PATTERN: + result->wc_pattern = node->sv.wc_pattern; + return ST_WC_PATTERN; + default: + PARA_EMERG_LOG("bug: invalid node id %d\n", node->id); + exit(EXIT_FAILURE); + } +} + +bool mp_eval_ast(struct mp_ast_node *root, struct mp_context *ctx) +{ + union mp_semantic_value v; + int ret = eval_node(root, ctx, &v); + + if (ret == ST_INTVAL) + return v.intval != 0; + if (ret == ST_STRVAL) + return v.strval[0] != 0; + if (ret == ST_BOOLVAL) + return v.boolval; + assert(false); +} + +%} + +%union { + struct mp_ast_node *node; +} + +/* terminals */ +%token NUM +%token STRING_LITERAL +%token REGEX_PATTERN +%token WILDCARD_PATTERN + +/* keywords with semantic value */ +%token PATH +%token ARTIST +%token TITLE +%token ALBUM +%token COMMENT +%token YEAR +%token NUM_ATTRIBUTES_SET +%token NUM_PLAYED +%token IMAGE_ID +%token LYRICS_ID +%token BITRATE +%token FREQUENCY +%token CHANNELS +%token FALSE TRUE + +/* keywords without semantic value */ +%token IS_SET + +/* operators, ordered by precendence */ +%left OR +%left AND +%left EQUAL NOT_EQUAL +%left LESS_THAN LESS_OR_EQUAL GREATER_OR_EQUAL REGEX_MATCH FILENAME_MATCH +%left '-' '+' +%left '*' '/' +%right NOT NEG /* negation (unary minus) */ + +/* nonterminals */ +%type string +%type exp +%type boolexp + +%% + +program: + /* empty */ {*ast = NULL; return 0;} + | string {*ast = $1; return 0;} + | exp {*ast = $1; return 0;} + | boolexp {*ast = $1; return 0;} + +string: STRING_LITERAL {$$ = $1;} + | PATH {$$ = mp_new_ast_leaf_node(PATH);} + | ARTIST {$$ = mp_new_ast_leaf_node(ARTIST);} + | TITLE {$$ = mp_new_ast_leaf_node(TITLE);} + | ALBUM {$$ = mp_new_ast_leaf_node(ALBUM);} + | COMMENT {$$ = mp_new_ast_leaf_node(COMMENT);} +; + +exp: NUM {$$ = $1;} + | exp '+' exp {$$ = ast_node_new_binary('+', $1, $3);} + | exp '-' exp {$$ = ast_node_new_binary('-', $1, $3);} + | exp '*' exp {$$ = ast_node_new_binary('*', $1, $3);} + | exp '/' exp {$$ = ast_node_new_binary('/', $1, $3);} + | '-' exp %prec NEG {$$ = ast_node_new_unary(NEG, $2);} + | '(' exp ')' {$$ = $2;} + | YEAR {$$ = mp_new_ast_leaf_node(YEAR);} + | NUM_ATTRIBUTES_SET {$$ = mp_new_ast_leaf_node(NUM_ATTRIBUTES_SET);} + | NUM_PLAYED {$$ = mp_new_ast_leaf_node(NUM_PLAYED);} + | IMAGE_ID {$$ = mp_new_ast_leaf_node(IMAGE_ID);} + | LYRICS_ID {$$ = mp_new_ast_leaf_node(LYRICS_ID);} + | BITRATE {$$ = mp_new_ast_leaf_node(BITRATE);} + | FREQUENCY {$$ = mp_new_ast_leaf_node(FREQUENCY);} + | CHANNELS {$$ = mp_new_ast_leaf_node(CHANNELS);} +; + +boolexp: IS_SET '(' STRING_LITERAL ')' {$$ = ast_node_new_unary(IS_SET, $3);} + | TRUE {$$ = mp_new_ast_leaf_node(TRUE);} + | FALSE {$$ = mp_new_ast_leaf_node(FALSE);} + | '(' boolexp ')' {$$ = $2;} + | boolexp OR boolexp {$$ = ast_node_new_binary(OR, $1, $3);} + | boolexp AND boolexp {$$ = ast_node_new_binary(AND, $1, $3);} + | NOT boolexp {$$ = ast_node_new_unary(NOT, $2);} + | exp EQUAL exp {$$ = ast_node_new_binary(EQUAL, $1, $3);} + | exp NOT_EQUAL exp {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);} + | exp '<' exp {$$ = ast_node_new_binary('<', $1, $3);} + | exp '>' exp {$$ = ast_node_new_binary('>', $1, $3);} + | exp LESS_OR_EQUAL exp { + $$ = ast_node_new_binary(LESS_OR_EQUAL, $1, $3); + } + | exp GREATER_OR_EQUAL exp { + $$ = ast_node_new_binary(GREATER_OR_EQUAL, $1, $3); + } + | string REGEX_MATCH REGEX_PATTERN { + $$ = ast_node_new_binary(REGEX_MATCH, $1, $3); + } + | string FILENAME_MATCH WILDCARD_PATTERN { + $$ = ast_node_new_binary(FILENAME_MATCH, $1, $3); + } + | string EQUAL string {$$ = ast_node_new_binary(EQUAL, $1, $3);} + | string NOT_EQUAL string {$$ = ast_node_new_binary(NOT_EQUAL, $1, $3);} +; +%% + +/* Called by yyparse() on error */ +static void yyerror(YYLTYPE *llocp, struct mp_context *ctx, + struct mp_ast_node **ast, mp_yyscan_t yyscanner, const char *msg) +{ + mp_parse_error(llocp->first_line, ctx, "%s", msg); +}