]> git.tue.mpg.de Git - paraslash.git/commitdiff
base64: Speed up decoder by using a table.
authorAndre Noll <maan@tuebingen.mpg.de>
Sat, 9 Apr 2016 18:44:28 +0000 (20:44 +0200)
committerAndre Noll <maan@tuebingen.mpg.de>
Tue, 23 Aug 2016 14:51:24 +0000 (16:51 +0200)
The current implementation calls strchr() for each character in the
decoded data to find the offset in the Base64[] array that corresponds
to six decoded bits. This makes the algorithm scale poorly.

This commit introduces a lookup table of size 256 which simplifies
the code and improves the performance of the decoder.

base64.c

index cee51571a7416b396583932be2376398e0670573..7b8fe2920cf6e55fb02a23cb7b57624e3139d587 100644 (file)
--- a/base64.c
+++ b/base64.c
 
 static const char Base64[] =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const unsigned char base64_tab[256] = {
+       255, 255, 255, 255, 255, 255, 255, 255, /* 00-07 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 08-0f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 10-17 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 18-1f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 20-2f */
+       255, 255, 255,  62, 255, 255, 255,  63, /* 28-2f */
+       52 ,  53,  54,  55,  56,  57,  58,  59, /* 30-37 */
+       60 ,  61, 255, 255, 255, 255, 255, 255, /* 38-3f */
+       255,   0,   1,   2,   3,   4,   5,   6, /* 40-47 */
+       7  ,   8,   9,  10,  11,  12,  13,  14, /* 48-4f */
+       15 ,  16,  17,  18,  19,  20,  21,  22, /* 50-57 */
+       23 ,  24,  25, 255, 255, 255, 255, 255, /* 58-5f */
+       255,  26,  27,  28,  29,  30,  31,  32, /* 60-6f */
+       33 ,  34,  35,  36,  37,  38,  39,  40, /* 68-6f */
+       41 ,  42,  43,  44,  45,  46,  47,  48, /* 70-77 */
+       49 ,  50,  51, 255, 255, 255, 255, 255, /* 78-7f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 80-87 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 88-8f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 90-97 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 98-9f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* a0-a7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* a8-af */
+       255, 255, 255, 255, 255, 255, 255, 255, /* b0-b7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* b8-bf */
+       255, 255, 255, 255, 255, 255, 255, 255, /* c0-c7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* c8-cf */
+       255, 255, 255, 255, 255, 255, 255, 255, /* d0-d7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* d8-df */
+       255, 255, 255, 255, 255, 255, 255, 255, /* e0-e7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* e8-ef */
+       255, 255, 255, 255, 255, 255, 255, 255, /* f0-f7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* f8-ff */
+};
 
 /** Maximal possible size of the decoded data. */
 #define BASE64_MAX_DECODED_SIZE(_encoded_size) ((_encoded_size) / 4 * 3)
@@ -41,61 +75,54 @@ static const char Base64[] =
 int base64_decode(char const *src, size_t encoded_size, char **result,
                size_t *decoded_size)
 {
-       unsigned int tarindex, state;
-       int ch;
-       char *pos;
-       const char *end = src + encoded_size;
-       unsigned char *target;
+       size_t i, j, state; /* source/target indices */
+       const char *end = src + encoded_size, *p;
+       unsigned char *target, uch;
 
        if (encoded_size == (size_t)-1)
                encoded_size = strlen(src);
        target = para_malloc(BASE64_MAX_DECODED_SIZE(encoded_size) + 1);
 
-       state = 0;
-       tarindex = 0;
-
-       while (src < end) {
-               ch = *src++;
-               if (para_isspace(ch)) /* Skip whitespace anywhere. */
+       for (
+               i = 0, j = 0, state = 0;
+               i < encoded_size && (uch = src[i]) != '\0';
+               i++
+       ) {
+               if (para_isspace(uch)) /* Skip whitespace anywhere. */
                        continue;
-
-               if (ch == PAD64)
+               if (uch == PAD64)
                        break;
-
-               pos = strchr(Base64, ch);
-               if (pos == NULL) /* A non-base64 character. */
+               if (base64_tab[uch] == 255) /* A non-base64 character. */
                        goto fail;
-
+               uch = base64_tab[uch];
                switch (state) {
                case 0:
-                       target[tarindex] = (pos - Base64) << 2;
-                       state = 1;
+                       target[j] = uch << 2;
                        break;
                case 1:
-                       target[tarindex] |= (pos - Base64) >> 4;
-                       target[tarindex + 1] = ((pos - Base64) & 0x0f) << 4;
-                       tarindex++;
-                       state = 2;
+                       target[j] |= uch >> 4;
+                       j++;
+                       target[j] = (uch & 0x0f) << 4;
                        break;
                case 2:
-                       target[tarindex] |= (pos - Base64) >> 2;
-                       target[tarindex + 1] = ((pos - Base64) & 0x03) << 6;
-                       tarindex++;
-                       state = 3;
+                       target[j] |= uch >> 2;
+                       j++;
+                       target[j] = (uch & 0x03) << 6;
                        break;
                case 3:
-                       target[tarindex] |= pos - Base64;
-                       tarindex++;
-                       state = 0;
+                       target[j] |= uch;
+                       j++;
                        break;
                }
+               state = (state + 1) % 4;
        }
+       p = (i < encoded_size)? src + i : NULL;
        /*
         * We are done decoding Base-64 chars.  Let's see if we ended
         * on a byte boundary, and/or with erroneous trailing characters.
         */
-       if (*src == PAD64) { /* We got a pad char. */
-               ch = *src++; /* Skip it, get next. */
+       if (p && *p == PAD64) { /* We got a pad char. Skip it, get next. */
+               p++;
                switch (state) {
                case 0: /* Invalid = in first position */
                case 1: /* Invalid = in second position */
@@ -103,22 +130,22 @@ int base64_decode(char const *src, size_t encoded_size, char **result,
 
                case 2: /* Valid, means one byte of info */
                        /* Skip any number of spaces. */
-                       for (; ch != '\0'; ch = *src++)
-                               if (!para_isspace(ch))
+                       for (; p < end && *p != '\0'; p++)
+                               if (!para_isspace(*p))
                                        break;
                        /* Make sure there is another trailing = sign. */
-                       if (ch != PAD64)
+                       if (*p != PAD64)
                                goto fail;
-                       ch = *src++;            /* Skip the = */
                        /* Fall through to "single trailing =" case. */
+                       p++;
 
                case 3: /* Valid, means two bytes of info */
                        /*
                         * We know this char is an =.  Is there anything but
                         * whitespace after it?
                         */
-                       for (; ch != '\0'; ch = *src++)
-                               if (!para_isspace(ch))
+                       for (; p < end && *p != '\0'; p++)
+                               if (!para_isspace(*p))
                                        goto fail;
                        /*
                         * Now make sure for cases 2 and 3 that the "extra"
@@ -126,7 +153,7 @@ int base64_decode(char const *src, size_t encoded_size, char **result,
                         * zeros.  If we don't check them, they become a
                         * subliminal channel.
                         */
-                       if (target[tarindex] != 0)
+                       if (target[j] != 0)
                                goto fail;
                }
        } else {
@@ -138,10 +165,10 @@ int base64_decode(char const *src, size_t encoded_size, char **result,
                        goto fail;
        }
        /* success */
-       target[tarindex] = '\0'; /* just to be sure */
-       *result = (char *)target;
+       target[j] = '\0'; /* just to be sure */
        if (decoded_size)
-               *decoded_size = tarindex;
+               *decoded_size = j;
+       *result = (char *)target;
        return 1;
 fail:
        free(target);