From: Andre Noll Date: Mon, 25 Nov 2019 20:22:35 +0000 (+0100) Subject: Rework score formula. X-Git-Tag: v0.7.0~17^2 X-Git-Url: http://git.tue.mpg.de/?a=commitdiff_plain;h=86b5aba882056a6ff6d8645684e59222ba74a818;p=paraslash.git Rework score formula. Currently the two scales for the num_played and the last_played components of the score value are computed independently of each other. There is, however, a natural link between the two scales: a file with best possible num_played value (zero) and worst possible last_played value (now) should receive the average score zero. This patch employs this idea to rescale the two components. See the new comment to compute_score() for details about the implementation. --- diff --git a/mood.c b/mood.c index a63d4d2a..a5d2b025 100644 --- a/mood.c +++ b/mood.c @@ -39,10 +39,16 @@ struct afs_statistics { int64_t num_played_qd; /** Quadratic deviation of last played time. */ int64_t last_played_qd; + /** Correction factor for the num played score. */ + int64_t num_played_correction; + /** Correction factor for the last played score. */ + int64_t last_played_correction; + /** Common divisor of the correction factors. */ + int64_t normalization_divisor; /** Number of admissible files */ unsigned num; }; -static struct afs_statistics statistics; +static struct afs_statistics statistics = {.normalization_divisor = 1}; /** * Each line of the current mood corresponds to a mood_item. @@ -499,20 +505,59 @@ int mood_check_callback(struct afs_callback_arg *aca) check_mood)); } -static int64_t normalized_value(int64_t x, int64_t n, int64_t sum, int64_t qd) -{ - if (!n || !qd) - return 0; - return 100 * (n * x - sum) / (int64_t)int_sqrt(n) / (int64_t)int_sqrt(qd); -} - +/* + * The normalized num_played and last_played values are defined as + * + * nn := -(np - mean_n) / sigma_n and nl := -(lp - mean_l) / sigma_l + * + * For a (hypothetical) file with np = 0 and lp = now we thus have + * + * nn = mean_n / sigma_n =: hn > 0 + * nl = -(now - mean_l) / sigma_l =: hl < 0 + * + * We design the score function so that both contributions get the same + * weight. Define the np and lp score of an arbitrary file as + * + * sn := nn * -hl and sl := nl * hn + * + * Example: + * num_played mean/sigma: 87/14 + * last_played mean/sigma: 45/32 days + * + * We have hn = 87 / 14 = 6.21 and hl = -45 / 32 = -1.41. Multiplying + * nn of every file with the correction factor 1.41 and nl with + * 6.21 makes the weight of the two contributions equal. + * + * The total score s := sn + sl has the representation + * + * s = -cn * (np - mean_n) - cl * (lp - mean_l) + * + * with positive correction factors + * + * cn = (now - mean_l) / (sqrt(ql) * sqrt(qn) / n) + * cl = mean_n / (sqrt(ql) * sqrt(qn) / n) + * + * where ql and qn are the quadratic deviations stored in the statistics + * structure and n is the number of admissible files. To avoid integer + * overflows and rounding errors we store the common divisor of the + * correction factors separately. + */ static long compute_score(struct afs_info *afsi, long mood_score) { - mood_score -= normalized_value(afsi->num_played, statistics.num, - statistics.num_played_sum, statistics.num_played_qd); - mood_score -= normalized_value(afsi->last_played, statistics.num, - statistics.last_played_sum, statistics.last_played_qd); - return mood_score / 3; + int64_t mean_n, mean_l,score_n, score_l; + + assert(statistics.normalization_divisor > 0); + assert(statistics.num > 0); + mean_n = statistics.num_played_sum / statistics.num; + mean_l = statistics.last_played_sum / statistics.num; + + score_n = -((int64_t)afsi->num_played - mean_n) + * statistics.num_played_correction + / statistics.normalization_divisor; + score_l = -((int64_t)afsi->last_played - mean_l) + * statistics.last_played_correction + / statistics.normalization_divisor; + return (mood_score + score_n + score_l) / 3; } static int add_afs_statistics(const struct osl_row *row) @@ -556,6 +601,7 @@ static int del_afs_statistics(const struct osl_row *row) assert(n); if (n == 1) { memset(&statistics, 0, sizeof(statistics)); + statistics.normalization_divisor = 1; return 1; } @@ -804,15 +850,11 @@ static int mood_update_audio_file(const struct osl_row *aft_row, return score_update(aft_row, percent); } -static void log_statistics(void) +/* sse: seconds since epoch. */ +static void log_statistics(int64_t sse) { unsigned n = statistics.num; int mean_days, sigma_days; - /* - * We can not use the "now" pointer from sched.c here because we are - * called before schedule(), which initializes "now". - */ - struct timeval rnow; assert(current_mood); PARA_NOTICE_LOG("loaded mood %s\n", current_mood->name? @@ -822,13 +864,18 @@ static void log_statistics(void) return; } PARA_NOTICE_LOG("%u admissible files\n", statistics.num); - clock_get_realtime(&rnow); - mean_days = (rnow.tv_sec - statistics.last_played_sum / n) / 3600 / 24; + mean_days = (sse - statistics.last_played_sum / n) / 3600 / 24; sigma_days = int_sqrt(statistics.last_played_qd / n) / 3600 / 24; PARA_NOTICE_LOG("last_played mean/sigma: %d/%d days\n", mean_days, sigma_days); - PARA_NOTICE_LOG("num_played mean/sigma: %llu/%llu\n", - (long long unsigned)statistics.num_played_sum / n, - (long long unsigned)int_sqrt(statistics.num_played_qd / n)); + PARA_NOTICE_LOG("num_played mean/sigma: %" PRId64 "/%" PRIu64 "\n", + statistics.num_played_sum / n, + int_sqrt(statistics.num_played_qd / n)); + PARA_NOTICE_LOG("num_played correction factor: %" PRId64 "\n", + statistics.num_played_correction); + PARA_NOTICE_LOG("last_played correction factor: %" PRId64 "\n", + statistics.last_played_correction); + PARA_NOTICE_LOG("normalization divisor: %" PRId64 "\n", + statistics.normalization_divisor); } /** @@ -841,6 +888,25 @@ void close_current_mood(void) destroy_mood(current_mood); current_mood = NULL; memset(&statistics, 0, sizeof(statistics)); + statistics.normalization_divisor = 1; +} + +static void compute_correction_factors(int64_t sse) +{ + struct afs_statistics *s = &statistics; + + if (s->num > 0) { + s->normalization_divisor = int_sqrt(s->last_played_qd) + * int_sqrt(s->num_played_qd) / s->num / 100; + s->num_played_correction = sse - s->last_played_sum / s->num; + s->last_played_correction = s->num_played_sum / s->num; + } + if (s->num_played_correction == 0) + s->num_played_correction = 1; + if (s->normalization_divisor == 0) + s->normalization_divisor = 1; + if (s->last_played_correction == 0) + s->last_played_correction = 1; } /** @@ -869,6 +935,11 @@ int change_current_mood(const char *mood_name, char **errmsg) .size = 0, .array = NULL }; + /* + * We can not use the "now" pointer from sched.c here because we are + * called before schedule(), which initializes "now". + */ + struct timeval rnow; if (mood_name) { struct mood *m; @@ -901,6 +972,9 @@ int change_current_mood(const char *mood_name, char **errmsg) *errmsg = make_message("audio file loop failed"); return ret; } + clock_get_realtime(&rnow); + compute_correction_factors(rnow.tv_sec); + log_statistics(rnow.tv_sec); for (i = 0; i < statistics.num; i++) { struct admissible_file_info *a = aa.array + i; ret = add_to_score_table(a->aft_row, a->score); @@ -911,7 +985,6 @@ int change_current_mood(const char *mood_name, char **errmsg) goto out; } } - log_statistics(); ret = statistics.num; out: free(aa.array);