denyhosts/clamscan/libclamav/message.c

/*
 *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
 *  Copyright (C) 2007-2013 Sourcefire, Inc.
 *
 *  Authors: Nigel Horne
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 *  MA 02110-1301, USA.
 *
 * TODO: Optimise messageExport, decodeLine, messageIsEncoding
 */

#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif

#ifdef CL_THREAD_SAFE
#ifndef _REENTRANT
#define _REENTRANT /* for Solaris 2.8 */
#endif
#endif

#ifdef C_DARWIN
#include <sys/types.h>
#endif
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#include <ctype.h>
#include <stdio.h>
#include <stdbool.h>

#ifdef CL_THREAD_SAFE
#include <pthread.h>
#endif

#include "others.h"
#include "str.h"
#include "filetypes.h"

#include "mbox.h"
#include "clamav.h"
#include "json_api.h"

#ifndef isblank
#define isblank(c) (((c) == ' ') || ((c) == '\t'))
#endif

#define RFC2045LENGTH 76 /* maximum number of characters on a line */

static int messageHasArgument(const message *m, const char *variable);
static void messageIsEncoding(message *m);
static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
static void sanitiseBase64(char *s);
#ifdef __GNUC__
static unsigned char hex(char c) __attribute__((const));
static unsigned char base64(char c) __attribute__((const));
static unsigned char uudecode(char c) __attribute__((const));
#else
static unsigned char hex(char c);
static unsigned char base64(char c);
static unsigned char uudecode(char c);
#endif
static const char *messageGetArgument(const message *m, size_t arg);
static void *messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), int (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void (*setCTX)(void *, cli_ctx *), int destroy_text);
static int usefulArg(const char *arg);
static void messageDedup(message *m);
static char *rfc2231(const char *in);
static int simil(const char *str1, const char *str2);

/*
 * These maps are ordered in decreasing likelihood of their appearance
 * in an e-mail. Probably these should be in a table...
 */
static const struct encoding_map {
    const char *string;
    encoding_type type;
} encoding_map[] = {/* rfc2045 */
                    {"7bit", NOENCODING},
                    {"text/plain", NOENCODING},
                    {"quoted-printable", QUOTEDPRINTABLE}, /* rfc2045 */
                    {"base64", BASE64},                    /* rfc2045 */
                    {"8bit", EIGHTBIT},
                    {"binary", BINARY},
                    {"x-uuencode", UUENCODE}, /* uuencode(5) */
                    {"x-yencode", YENCODE},
                    {"x-binhex", BINHEX},
                    {"us-ascii", NOENCODING}, /* incorrect */
                    {"x-uue", UUENCODE},      /* incorrect */
                    {"uuencode", UUENCODE},   /* incorrect */
                    {NULL, NOENCODING}};

static const struct mime_map {
    const char *string;
    mime_type type;
} mime_map[] = {
    {"text", TEXT},
    {"multipart", MULTIPART},
    {"application", APPLICATION},
    {"audio", AUDIO},
    {"image", IMAGE},
    {"message", MESSAGE},
    {"video", VIDEO},
    {NULL, TEXT}};

/*
 * See RFC2045, section 6.8, table 1
 */
static const unsigned char base64Table[256] = {
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63,
    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 0, 255, 255,
    255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255,
    255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};

message *
messageCreate(void)
{
    message *m = (message *)cli_calloc(1, sizeof(message));

    if (m)
        m->mimeType = NOMIME;

    return m;
}

void messageDestroy(message *m)
{
    if (NULL == m) {
        return;
    }

    messageReset(m);

    free(m);
}

void messageReset(message *m)
{
    size_t i;

    if (NULL == m) {
        return;
    }

    if (m->mimeSubtype)
        free(m->mimeSubtype);

    if (m->mimeDispositionType)
        free(m->mimeDispositionType);

    if (m->mimeArguments) {
        for (i = 0; i < m->numberOfArguments; i++)
            free(m->mimeArguments[i]);
        free(m->mimeArguments);
    }

    if (m->body_first)
        textDestroy(m->body_first);

    if (0 != m->base64chars) {
        cli_errmsg("Internal email parse error: message base64chars should be 0 when resetting the message\n");
    }

    if (m->encodingTypes) {
        if (0 == m->numberOfEncTypes) {
            cli_errmsg("Internal email parse error: message numberOfEncTypes should be 0 if encoding types are set\n");
        }

        free(m->encodingTypes);
    }

#if HAVE_JSON
    if (m->jobj)
        cli_json_delobj(m->jobj);
#endif

    memset(m, '\0', sizeof(message));
    m->mimeType = NOMIME;
}

/*
 * Handle the Content-Type header. The syntax is in RFC1341.
 * Return success (1) or failure (0). Failure only happens when it's an
 * unknown type and we've already received a known type, or we've received an
 * empty type. If we receive an unknown type by itself we default to application
 */
int messageSetMimeType(message *mess, const char *type)
{
#ifdef CL_THREAD_SAFE
    static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
#endif
    const struct mime_map *m;
    int typeval;
    static table_t *mime_table;

    if (mess == NULL) {
        cli_dbgmsg("messageSetMimeType: NULL message pointer\n");
        return 0;
    }

    if (type == NULL) {
        cli_dbgmsg("messageSetMimeType: Empty content-type field\n");
        return 0;
    }

    cli_dbgmsg("messageSetMimeType: '%s'\n", type);

    /* Ignore leading spaces */
    while (!isalpha(*type))
        if (*type++ == '\0')
            return 0;

#ifdef CL_THREAD_SAFE
    pthread_mutex_lock(&mime_mutex);
#endif
    if (mime_table == NULL) {
        mime_table = tableCreate();
        if (mime_table == NULL) {
#ifdef CL_THREAD_SAFE
            pthread_mutex_unlock(&mime_mutex);
#endif
            return 0;
        }

        for (m = mime_map; m->string; m++)
            if (!tableInsert(mime_table, m->string, m->type)) {
                tableDestroy(mime_table);
                mime_table = NULL;
#ifdef CL_THREAD_SAFE
                pthread_mutex_unlock(&mime_mutex);
#endif
                return 0;
            }
    }
#ifdef CL_THREAD_SAFE
    pthread_mutex_unlock(&mime_mutex);
#endif

    typeval = tableFind(mime_table, type);

    if (typeval != -1) {
        mess->mimeType = (mime_type)typeval;
        return 1;
    }
    if (mess->mimeType == NOMIME) {
        if (strncasecmp(type, "x-", 2) == 0)
            mess->mimeType = MEXTENSION;
        else {
            /*
             * Force scanning of strange messages
             */
            if (strcasecmp(type, "plain") == 0) {
                cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n");
                mess->mimeType = TEXT;
            } else {
                /*
                 * Don't handle broken e-mail probably sending
                 *    Content-Type: plain/text
                 * instead of
                 *    Content-Type: text/plain
                 * as an attachment
                 */
                int highestSimil = 0, t = -1;
                const char *closest = NULL;

                for (m = mime_map; m->string; m++) {
                    const int s = simil(m->string, type);

                    if (s > highestSimil) {
                        highestSimil = s;
                        closest      = m->string;
                        t            = m->type;
                    }
                }
                if (highestSimil >= 50) {
                    cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%d%% certainty)\n",
                               type, closest,
                               highestSimil);
                    mess->mimeType = (mime_type)t;
                } else {
                    cli_dbgmsg("Unknown MIME type: `%s', set to Application - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
                    mess->mimeType = APPLICATION;
                }
            }
        }
        return 1;
    }
    return 0;
}

mime_type
messageGetMimeType(const message *m)
{
    if (m == NULL) {
        cli_errmsg("Internal email parser error: message is pointer is NULL when trying to get MIME type\n");
        return NOMIME;
    }

    return m->mimeType;
}

void messageSetMimeSubtype(message *m, const char *subtype)
{
    if (m == NULL) {
        cli_errmsg("Internal email parser error: message is pointer is NULL when trying to set MIME sub-type\n");
        return;
    }

    if (subtype == NULL) {
        /*
         * Handle broken content-type lines, e.g.
         *    Content-Type: text/
         */
        cli_dbgmsg("Empty content subtype\n");
        subtype = "";
    }

    if (m->mimeSubtype)
        free(m->mimeSubtype);

    m->mimeSubtype = cli_strdup(subtype);
}

const char *
messageGetMimeSubtype(const message *m)
{
    return (m->mimeSubtype) ? m->mimeSubtype : "";
}

void messageSetDispositionType(message *m, const char *disptype)
{
    if (m == NULL) {
        cli_errmsg("Internal email parser error: message is pointer is NULL when trying to set disposition type\n");
        return;
    }

    if (m->mimeDispositionType)
        free(m->mimeDispositionType);
    if (disptype == NULL) {
        m->mimeDispositionType = NULL;
        return;
    }

    /*
     * It's broken for there to be an entry such as "Content-Disposition:"
     * However some spam and viruses are rather broken, it's a sign
     * that something is wrong if we get that - maybe we should force a
     * scan of this part
     */
    while (*disptype && isspace((int)*disptype))
        disptype++;
    if (*disptype) {
        m->mimeDispositionType = cli_strdup(disptype);
        if (m->mimeDispositionType)
            strstrip(m->mimeDispositionType);
    } else
        m->mimeDispositionType = NULL;
}

const char *
messageGetDispositionType(const message *m)
{
    return (m->mimeDispositionType) ? m->mimeDispositionType : "";
}

/*
 * TODO:
 *    Arguments are held on a per message basis, they should be held on
 * a per section basis. Otherwise what happens if two sections have two
 * different values for charset? Probably doesn't matter for the use this
 * code will be given, but will need fixing if this code is used elsewhere
 */
void messageAddArgument(message *m, const char *arg)
{
    size_t offset;
    char *p;

    if (m == NULL) {
        cli_errmsg("Internal email parser error: message is pointer is NULL when trying to add an argument\n");
        return;
    }

    if (arg == NULL)
        return; /* Note: this is not an error condition */

    while (isspace(*arg))
        arg++;

    if (*arg == '\0')
        /* Empty argument? Probably a broken mail client... */
        return;

    cli_dbgmsg("messageAddArgument, arg='%s'\n", arg);

    if (!usefulArg(arg))
        return;

    for (offset = 0; offset < m->numberOfArguments; offset++)
        if (m->mimeArguments[offset] == NULL)
            break;
        else if (strcasecmp(arg, m->mimeArguments[offset]) == 0)
            return; /* already in there */

    if (offset == m->numberOfArguments) {
        char **q;

        m->numberOfArguments++;
        q = (char **)cli_realloc(m->mimeArguments, m->numberOfArguments * sizeof(char *));
        if (q == NULL) {
            m->numberOfArguments--;
            return;
        }
        m->mimeArguments = q;
    }

    p = m->mimeArguments[offset] = rfc2231(arg);
    if (!p) {
        /* problem inside rfc2231() */
        cli_dbgmsg("messageAddArgument, error from rfc2231()\n");
        return;
    }

    if (strchr(p, '=') == NULL) {
        if (strncmp(p, "filename", 8) == 0) {
            /*
             * FIXME: Bounce message handling is corrupting the in
             * core copies of headers
             */
            if (strlen(p) > 8) {
                cli_dbgmsg("Possible data corruption fixed\n");
                p[8] = '=';
            } else {
                cli_dbgmsg("Possible data corruption not fixed\n");
            }
        } else {
            if (*p)
                cli_dbgmsg("messageAddArgument, '%s' contains no '='\n", p);
            free(m->mimeArguments[offset]);
            m->mimeArguments[offset] = NULL;
            return;
        }
    }

    /*
     * This is terribly broken from an RFC point of view but is useful
     * for catching viruses which have a filename but no type of
     * mime. By pretending defaulting to an application rather than
     * to nomime we can ensure they're saved and scanned
     */
    if ((strncasecmp(p, "filename=", 9) == 0) || (strncasecmp(p, "name=", 5) == 0))
        if (messageGetMimeType(m) == NOMIME) {
            cli_dbgmsg("Force mime encoding to application\n");
            messageSetMimeType(m, "application");
        }
}

/*
 * Add in all the arguments.
 * Cope with:
 *    name="foo bar.doc"
 *    charset=foo name=bar
 */
void messageAddArguments(message *m, const char *s)
{
    const char *string = s;

    cli_dbgmsg("Add arguments '%s'\n", string);

    if (string == NULL) {
        cli_errmsg("Internal email parser error: message is pointer is NULL when trying to add message arguments\n");
        return;
    }

    while (*string) {
        const char *key, *cptr;
        char *data, *field;
        size_t datasz = 0;

        if (isspace(*string & 0xff) || (*string == ';')) {
            string++;
            continue;
        }

        key = string;

        data = strchr(string, '=');

        /*
         * Some spam breaks RFC2045 by using ':' instead of '='
         * e.g.:
         *    Content-Type: text/html; charset:ISO-8859-1
         * should be:
         *    Content-type: text/html; charset=ISO-8859-1
         *
         * We give up with lines that are completely broken because
         * we don't have ESP and don't know what was meant to be there.
         * It's unlikely to really be a problem.
         */
        if (data == NULL)
            data = strchr(string, ':');

        if (data == NULL) {
            /*
             * Completely broken, give up
             */
            cli_dbgmsg("Can't parse header \"%s\"\n", s);
            return;
        }

        string = &data[1];

        /*
         * Handle white space to the right of the equals sign
         * This breaks RFC2045 which has:
         *    parameter := attribute "=" value
         *    attribute := token   ; case-insensitive
         *    token  :=  1*<any (ASCII) CHAR except SPACE, CTLs,
         *        or tspecials>
         * But too many MUAs ignore this
         */
        while (isspace(*string) && (*string != '\0'))
            string++;

        cptr = string;

        if (*string)
            string++;

        if (*cptr == '"') {
            char *ptr, *kcopy;

            /*
             * The field is in quotes, so look for the
             * closing quotes
             */
            kcopy = cli_strdup(key);

            if (kcopy == NULL)
                return;

            ptr = strchr(kcopy, '=');
            if (ptr == NULL) {
                ptr = strchr(kcopy, ':');
                if (ptr == NULL) {
                    cli_dbgmsg("Can't parse header \"%s\"\n", s);
                    free(kcopy);
                    return;
                }
            }

            *ptr = '\0';

            string = strchr(++cptr, '"');

            if (string == NULL) {
                cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s);
                string = "";
            } else
                string++;

            if (!usefulArg(kcopy)) {
                free(kcopy);
                continue;
            }

            data = cli_strdup(cptr);

            if (!data) {
                cli_dbgmsg("Can't parse header \"%s\" - if you believe this file contains a missed virus, report it to bugs@clamav.net\n", s);
                free(kcopy);
                return;
            }

            ptr = strchr(data, '"');

            if (ptr == NULL) {
                /*
                 * Weird e-mail header such as:
                 * Content-Type: application/octet-stream; name="
                 * "
                 * Content-Transfer-Encoding: base64
                 * Content-Disposition: attachment; filename="
                 * "
                 *
                 * Use the end of line as data.
                 */
            } else
                *ptr = '\0';

            datasz = strlen(kcopy) + strlen(data) + 2;
            field  = cli_realloc(kcopy, strlen(kcopy) + strlen(data) + 2);
            if (field) {
                cli_strlcat(field, "=", datasz);
                cli_strlcat(field, data, datasz);
            } else {
                free(kcopy);
            }
            free(data);
        } else {
            size_t len;

            if (*cptr == '\0') {
                cli_dbgmsg("Ignoring empty field in \"%s\"\n", s);
                return;
            }

            /*
             * The field is not in quotes, so look for the closing
             * white space
             */
            while ((*string != '\0') && !isspace(*string))
                string++;

            len   = (size_t)string - (size_t)key + 1;
            field = cli_malloc(len);

            if (field) {
                memcpy(field, key, len - 1);
                field[len - 1] = '\0';
            }
        }
        if (field) {
            messageAddArgument(m, field);
            free(field);
        }
    }
}

static const char *
messageGetArgument(const message *m, size_t arg)
{
    if (m == NULL) {
        cli_errmsg("Internal email parse error: message pointer is NULL when trying to get a message argument\n");
        return "";
    }

    if (arg >= m->numberOfArguments) {
        return "";
    }

    return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
}

/*
 * Find a MIME variable from the header and return a COPY to the value of that
 * variable. The caller must free the copy
 */
char *
messageFindArgument(const message *m, const char *variable)
{
    size_t i;
    size_t len;

    if (m == NULL || variable == NULL) {
        cli_errmsg("Internal email parser error: invalid arguments when finding message arguments\n");
        return NULL;
    }

    len = strlen(variable);

    for (i = 0; i < m->numberOfArguments; i++) {
        const char *ptr;

        ptr = messageGetArgument(m, i);
        if ((ptr == NULL) || (*ptr == '\0'))
            continue;
#ifdef CL_DEBUG
        cli_dbgmsg("messageFindArgument: compare %lu bytes of %s with %s\n",
                   (unsigned long)len, variable, ptr);
#endif
        if (strncasecmp(ptr, variable, len) == 0) {
            ptr = &ptr[len];
            while (isspace(*ptr))
                ptr++;
            if (*ptr != '=') {
                cli_dbgmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
                return NULL;
            }
            ptr++;
            if ((strlen(ptr) > 1) && (*ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
                /* Remove any quote characters */
                char *ret = cli_strdup(++ptr);
                char *p;

                if (ret == NULL)
                    return NULL;

                /*
                 * fix un-quoting of boundary strings from
                 * header, occurs if boundary was given as
                 *    'boundary="_Test_";'
                 *
                 * At least two quotes in string, assume
                 * quoted argument
                 * end string at next quote
                 */
                if ((p = strchr(ret, '"')) != NULL) {
                    ret[strlen(ret) - 1] = '\0';
                    *p                   = '\0';
                }
                return ret;
            }
            return cli_strdup(ptr);
        }
    }
    return NULL;
}

char *
messageGetFilename(const message *m)
{
    char *filename = (char *)messageFindArgument(m, "filename");

    if (filename)
        return filename;

    return (char *)messageFindArgument(m, "name");
}

/* Returns true or false */
static int
messageHasArgument(const message *m, const char *variable)
{
    size_t i;
    size_t len;

    if (m == NULL || variable == NULL) {
        cli_errmsg("Internal email parser error: invalid arguments when checking if message has arguments\n");
        return 0;
    }

    len = strlen(variable);

    for (i = 0; i < m->numberOfArguments; i++) {
        const char *ptr;

        ptr = messageGetArgument(m, i);
        if ((ptr == NULL) || (*ptr == '\0'))
            continue;
#ifdef CL_DEBUG
        cli_dbgmsg("messageHasArgument: compare %lu bytes of %s with %s\n",
                   (unsigned long)len, variable, ptr);
#endif
        if (strncasecmp(ptr, variable, len) == 0) {
            ptr = &ptr[len];
            while (isspace(*ptr))
                ptr++;
            if (*ptr != '=') {
                cli_dbgmsg("messageHasArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
                return 0;
            }
            return 1;
        }
    }
    return 0;
}

int messageHasFilename(const message *m)
{
    return messageHasArgument(m, "filename") || messageHasArgument(m, "file");
}

void messageSetEncoding(message *m, const char *enctype)
{
    const struct encoding_map *e;
    int i;
    char *type;

    if (m == NULL || enctype == NULL) {
        cli_errmsg("Internal email parser error: invalid arguments when setting message encoding type\n");
        return;
    }

    /*m->encodingType = EEXTENSION;*/

    while (isblank(*enctype))
        enctype++;

    cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);

    if (strcasecmp(enctype, "8 bit") == 0) {
        cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n");
        enctype = "8bit";
    }

    /*
     * Iterate through
     *    Content-Transfer-Encoding: base64 binary
     * cli_strtok's fieldno counts from 0
     */
    i = 0;
    while ((type = cli_strtok(enctype, i++, " \t")) != NULL) {
        int highestSimil    = 0;
        const char *closest = NULL;

        for (e = encoding_map; e->string; e++) {
            int sim;
            const char lowertype = tolower(type[0]);

            if ((lowertype != tolower(e->string[0])) && (lowertype != 'x'))
                /*
                 * simil is expensive, I'm yet to encounter only
                 * one example of a missent encoding when the
                 * first character was wrong, so lets assume no
                 * match to save the call.
                 *
                 * That example was quoted-printable sent as
                 * X-quoted-printable.
                 */
                continue;

            if (strcmp(e->string, "uuencode") == 0)
                /*
                 * No need to test here - fast track visa will have
                 * handled uuencoded files
                 */
                continue;

            sim = simil(type, e->string);

            if (sim == 100) {
                int j;
                encoding_type *et;

                for (j = 0; j < m->numberOfEncTypes; j++)
                    if (m->encodingTypes[j] == e->type)
                        break;

                if (j < m->numberOfEncTypes) {
                    cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n",
                               type);
                    break;
                }

                et = (encoding_type *)cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) * sizeof(encoding_type));
                if (et == NULL)
                    break;

                m->encodingTypes                        = et;
                m->encodingTypes[m->numberOfEncTypes++] = e->type;

                cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type);
                break;
            } else if (sim > highestSimil) {
                closest      = e->string;
                highestSimil = sim;
            }
        }

        if (e->string == NULL) {
            /*
             * The stated encoding type is illegal, so we
             * use a best guess of what it should be.
             *
             * 50% is arbitrary. For example 7bi will match as
             * 66% certain to be 7bit
             */
            if (highestSimil >= 50) {
                cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n",
                           type, closest, highestSimil);
                messageSetEncoding(m, closest);
            } else {
                cli_dbgmsg("Unknown encoding type \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
                /*
                 * Err on the side of safety, enable all
                 * decoding modules
                 */
                messageSetEncoding(m, "base64");
                messageSetEncoding(m, "quoted-printable");
            }
        }

        free(type);
    }
}

encoding_type
messageGetEncoding(const message *m)
{
    if (m == NULL) {
        cli_errmsg("Internal email parser error: invalid arguments when checking message encoding type\n");
        return NOENCODING;
    }

    if (m->numberOfEncTypes == 0)
        return NOENCODING;
    return m->encodingTypes[0];
}

int messageAddLine(message *m, line_t *line)
{
    if (m == NULL) {
        cli_errmsg("Internal email parser error: invalid arguments when adding line to message.\n");
        return -1;
    }

    if (m->body_first == NULL)
        m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
    else {
        m->body_last->t_next = (text *)cli_malloc(sizeof(text));
        m->body_last         = m->body_last->t_next;
    }

    if (m->body_last == NULL) {
        cli_errmsg("messageAddLine: out of memory for m->body_last\n");
        return -1;
    }

    m->body_last->t_next = NULL;

    if (line && lineGetData(line)) {
        m->body_last->t_line = lineLink(line);

        messageIsEncoding(m);
    } else
        m->body_last->t_line = NULL;

    return 1;
}

/*
 * Add the given line to the end of the given message
 * If needed a copy of the given line is taken which the caller must free
 * Line must not be terminated by a \n
 */
int messageAddStr(message *m, const char *data)
{
    line_t *repeat = NULL;

    if (m == NULL) {
        cli_errmsg("messageAddStr: invalid arguments\n");
        return -1;
    }

    if (data) {
        if (*data == '\0')
            data = NULL;
        else {
            /*
             * If it's only white space, just store one space to
             * save memory. You must store something since it may
             * be a header line
             */
            int iswhite = 1;
            const char *p;

            for (p = data; *p; p++)
                if (((*p) & 0x80) || !isspace(*p)) {
                    iswhite = 0;
                    break;
                }
            if (iswhite) {
                /*cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);*/
                data = " ";
            }
        }
    }

    if (m->body_first == NULL)
        m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
    else {
        if (m->body_last == NULL) {
            cli_errmsg("Internal email parser error: message 'body_last' pointer should not be NULL if 'body_first' is set.\n");
        } else {
            if ((data == NULL) && (m->body_last->t_line == NULL))
                /*
                 * Although this would save time and RAM, some
                 * phish signatures have been built which need the
                 * blank lines
                 */
                if (messageGetMimeType(m) != TEXT)
                    /* don't save two blank lines in succession */
                    return 1;

            m->body_last->t_next = (text *)cli_malloc(sizeof(text));
            if (m->body_last->t_next == NULL) {
                messageDedup(m);
                m->body_last->t_next = (text *)cli_malloc(sizeof(text));
                if (m->body_last->t_next == NULL) {
                    cli_errmsg("messageAddStr: out of memory\n");
                    return -1;
                }
            }

            if (data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0))
                repeat = m->body_last->t_line;

            m->body_last = m->body_last->t_next;
        }
    }

    if (m->body_last == NULL) {
        cli_errmsg("messageAddStr: out of memory\n");
        return -1;
    }

    m->body_last->t_next = NULL;

    if (data && *data) {
        if (repeat)
            m->body_last->t_line = lineLink(repeat);
        else {
            m->body_last->t_line = lineCreate(data);

            if (m->body_last->t_line == NULL) {
                messageDedup(m);
                m->body_last->t_line = lineCreate(data);

                if (m->body_last->t_line == NULL) {
                    cli_errmsg("messageAddStr: out of memory\n");
                    return -1;
                }
            }
            /* cli_chomp(m->body_last->t_text); */
            messageIsEncoding(m);
        }
    } else
        m->body_last->t_line = NULL;

    return 1;
}

/*
 * Put the contents of the given text at the end of the current object.
 * Can be used either to move a text object into a message, or to move a
 * message's text into another message only moving from a given offset.
 * The given text emptied; it can be used again if needed, though be warned that
 * it will have an empty line at the start.
 * Returns 0 for failure, 1 for success
 */
int messageMoveText(message *m, text *t, message *old_message)
{
    int rc;

    if (m->body_first == NULL) {
        if ((NULL != old_message) &&
            (NULL != old_message->body_first)) {
            text *u;
            /*
             * t is within old_message which is about to be
             * destroyed
             */

            m->body_first = t;
            for (u = old_message->body_first; u != t;) {
                text *next;

                if (u->t_line) {
                    lineUnlink(u->t_line);
                    u->t_line = NULL;
                }
                next = u->t_next;

                free(u);
                u = next;

                if (u == NULL) {
                    cli_dbgmsg("messageMoveText sanity check: t not within old_message\n");
                    return -1;
                }
            }

            m->body_last            = old_message->body_last;
            old_message->body_first = old_message->body_last = NULL;

            /* Do any pointers need to be reset? */
            if ((old_message->bounce == NULL) &&
                (old_message->encoding == NULL) &&
                (old_message->binhex == NULL) &&
                (old_message->yenc == NULL))
                return 0;

            m->body_last = m->body_first;
            rc           = 0;
        } else {
            m->body_last = m->body_first = textMove(NULL, t);
            if (m->body_first == NULL)
                return -1;
            else
                rc = 0;
        }
    } else {
        m->body_last = textMove(m->body_last, t);
        if (m->body_last == NULL) {
            rc           = -1;
            m->body_last = m->body_first;
        } else
            rc = 0;
    }

    while (m->body_last->t_next) {
        m->body_last = m->body_last->t_next;
        if (m->body_last->t_line)
            messageIsEncoding(m);
    }

    return rc;
}

/*
 * See if the last line marks the start of a non MIME inclusion that
 * will need to be scanned
 */
static void
messageIsEncoding(message *m)
{
    static const char encoding[] = "Content-Transfer-Encoding";
    static const char binhex[]   = "(This file must be converted with BinHex 4.0)";
    const char *line             = lineGetData(m->body_last->t_line);

    /*if(m->ctx == NULL)
        cli_dbgmsg("messageIsEncoding, ctx == NULL\n");*/

    if ((m->encoding == NULL) &&
        (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
        (strstr(line, "7bit") == NULL))
        m->encoding = m->body_last;
    else if ((m->bounce == NULL) && m->ctx &&
             (strncasecmp(line, "Received: ", 10) == 0) &&
             (cli_compare_ftm_file((const unsigned char *)line, strlen(line), m->ctx->engine) == CL_TYPE_MAIL))
        m->bounce = m->body_last;
    /* Not needed with fast track visa technology */
    /*else if((m->uuencode == NULL) && isuuencodebegin(line))
        m->uuencode = m->body_last;*/
    else if ((m->binhex == NULL) &&
             strstr(line, "BinHex") &&
             (simil(line, binhex) > 90))
        /*
             * Look for close matches for BinHex, but
             * simil() is expensive so only do it if it's
             * likely to be found
             */
        m->binhex = m->body_last;
    else if ((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0))
        m->yenc = m->body_last;
}

/*
 * Returns a pointer to the body of the message. Note that it does NOT return
 * a copy of the data
 */
text *
messageGetBody(message *m)
{
    if (NULL == m)
        return NULL;

    return m->body_first;
}

/*
 * Export a message using the given export routines
 *
 * TODO: It really should export into an array, one
 * for each encoding algorithm. However, what it does is it returns the
 * last item that was exported. That's sufficient for now.
 */
static void *
messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), int (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void (*setCTX)(void *, cli_ctx *), int destroy_text)
{
    void *ret;
    text *t_line;
    char *filename;
    int i;

    if (NULL == m)
        return NULL;

    if (messageGetBody(m) == NULL)
        return NULL;

    ret = (*create)();

    if (ret == NULL)
        return NULL;

    cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);

    if (m->numberOfEncTypes == 0) {
        /*
         * Fast copy
         */
        cli_dbgmsg("messageExport: Entering fast copy mode\n");

#if 0
        filename = messageGetFilename(m);

        if(filename == NULL) {
            cli_dbgmsg("Unencoded attachment sent with no filename\n");
            messageAddArgument(m, "name=attachment");
        } else if((strcmp(filename, "textportion") != 0) && (strcmp(filename, "mixedtextportion") != 0))
            /*
             * Some virus attachments don't say how they've
             * been encoded. We assume base64
             */
            messageSetEncoding(m, "base64");
#else
        filename = (char *)messageFindArgument(m, "filename");
        if (filename == NULL) {
            filename = (char *)messageFindArgument(m, "name");

            if (filename == NULL) {
                cli_dbgmsg("Unencoded attachment sent with no filename\n");
                messageAddArgument(m, "name=attachment");
            } else
                /*
                 * Some virus attachments don't say how they've
                 * been encoded. We assume base64.
                 * RFC says encoding should be 7-bit.
                 */
                messageSetEncoding(m, "7-bit");
        }
#endif

        (*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");

        if (filename)
            free((char *)filename);

        if (m->numberOfEncTypes == 0)
            return exportText(messageGetBody(m), ret, destroy_text);
    }

    if (setCTX && m->ctx)
        (*setCTX)(ret, m->ctx);

    for (i = 0; i < m->numberOfEncTypes; i++) {
        encoding_type enctype = m->encodingTypes[i];
        size_t size;

        if (i > 0) {
            void *newret;

            newret = (*create)();
            if (newret == NULL) {
                cli_dbgmsg("Not all decoding algorithms were run\n");
                return ret;
            }
            (*destroy)(ret);
            ret = newret;
        }
        cli_dbgmsg("messageExport: enctype %d is %d\n", i, (int)enctype);
        /*
         * Find the filename to decode
         */
        if (((enctype == YENCODE) || (i == 0)) && yEncBegin(m)) {
            const char *f;

            /*
             * TODO: handle multipart yEnc encoded files
             */
            t_line = yEncBegin(m);
            f      = lineGetData(t_line->t_line);

            if ((filename = strstr(f, " name=")) != NULL) {
                filename = cli_strdup(&filename[6]);
                if (filename) {
                    cli_chomp(filename);
                    strstrip(filename);
                    cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename);
                }
            }

            (*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
            if (filename) {
                free((char *)filename);
                filename = NULL;
            }
            t_line  = t_line->t_next;
            enctype = YENCODE;
            m->yenc = NULL;
        } else {
            if (enctype == UUENCODE) {
                /*
                 * The body will have been stripped out by the
                 * fast track visa system. Treat as plain/text,
                 * which means we'll still scan for funnies
                 * outside of the uuencoded portion.
                 */
                cli_dbgmsg("messageExport: treat uuencode as text/plain\n");
                enctype = m->encodingTypes[i] = NOENCODING;
            }
            filename = messageGetFilename(m);

            if (filename == NULL) {
                cli_dbgmsg("Attachment sent with no filename\n");
                messageAddArgument(m, "name=attachment");
            } else if (enctype == NOENCODING)
                /*
                 * Some virus attachments don't say how
                 * they've been encoded. We assume
                 * base64.
                 *
                 * FIXME: don't do this if it's a fall
                 * through from uuencode
                 */
                messageSetEncoding(m, "base64");

            (*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");

            t_line = messageGetBody(m);
        }

        if (filename)
            free((char *)filename);

        /*
         * t_line should now point to the first (encoded) line of the
         * message
         */
        if (t_line == NULL) {
            cli_dbgmsg("Empty attachment not saved\n");
            (*destroy)(ret);
            return NULL;
        }

        if (enctype == NOENCODING) {
            /*
             * Fast copy
             */
            if (i == m->numberOfEncTypes - 1) {
                /* last one */
                (void)exportText(t_line, ret, destroy_text);
                break;
            }
            (void)exportText(t_line, ret, 0);
            continue;
        }

        size = 0;
        do {
            unsigned char smallbuf[1024];
            unsigned char *uptr, *data;
            const char *line = lineGetData(t_line->t_line);
            unsigned char *bigbuf;
            size_t datasize;

            if (enctype == YENCODE) {
                if (line == NULL) {
                    continue;
                }
                if (strncmp(line, "=yend ", 6) == 0) {
                    break;
                }
            }

            /*
             * Add two bytes for '\n' and '\0'
             */
            datasize = (line) ? strlen(line) + 2 : 0;

            if (datasize >= sizeof(smallbuf)) {
                data = bigbuf = (unsigned char *)cli_malloc(datasize);
                if (NULL == data) {
                    cli_dbgmsg("Failed to allocate data buffer of size %zu\n", datasize);
                    break;
                }
            } else {
                bigbuf   = NULL;
                data     = smallbuf;
                datasize = sizeof(smallbuf);
            }

            uptr = decodeLine(m, enctype, line, data, datasize);
            if (uptr == NULL) {
                if (data == bigbuf) {
                    free(data);
                }
                break;
            }

            if (uptr != data) {
                (*addData)(ret, data, (size_t)(uptr - data));
                size += (size_t)(uptr - data);
            }

            if (data == bigbuf) {
                free(data);
            }

            /*
             * According to RFC2045, '=' is used to pad out
             * the last byte and should be used as evidence
             * of the end of the data. Some mail clients
             * annoyingly then put plain text after the '='
             * byte and viruses exploit this bug. Sigh
             */
            /*if(enctype == BASE64)
                if(strchr(line, '='))
                    break;*/
            if (line && destroy_text && (i == m->numberOfEncTypes - 1)) {
                lineUnlink(t_line->t_line);
                t_line->t_line = NULL;
            }
        } while ((t_line = t_line->t_next) != NULL);

        cli_dbgmsg("Exported %lu bytes using enctype %d\n",
                   (unsigned long)size, (int)enctype);

        /* Verify we have nothing left to flush out */
        if (m->base64chars) {
            unsigned char data[4];
            unsigned char *ptr;

            ptr = base64Flush(m, data);
            if (ptr) {
                (*addData)(ret, data, (size_t)(ptr - data));
            }
        }
    }

    return ret;
}

unsigned char *
base64Flush(message *m, unsigned char *buf)
{
    cli_dbgmsg("%d trailing bytes to export\n", m->base64chars);

    if (m->base64chars) {
        unsigned char *ret = decode(m, NULL, buf, base64, false);

        m->base64chars = 0;

        return ret;
    }
    return NULL;
}

int messageSavePartial(message *m, const char *dir, const char *md5id, unsigned part)
{
    char fullname[1024];
    fileblob *fb;
    unsigned long time_val;

    cli_dbgmsg("messageSavePartial\n");
    time_val = time(NULL);
    snprintf(fullname, 1024, "%s" PATHSEP "clamav-partial-%lu_%s-%u", dir, time_val, md5id, part);

    fb = messageExport(m, fullname,
                       (void *(*)(void))fileblobCreate,
                       (void (*)(void *))fileblobDestroy,
                       (void (*)(void *, const char *, const char *))fileblobPartialSet,
                       (int (*)(void *, const unsigned char *, size_t))fileblobAddData,
                       (void *(*)(text *, void *, int))textToFileblob,
                       (void (*)(void *, cli_ctx *))fileblobSetCTX,
                       0);
    if (!fb)
        return CL_EFORMAT;
    fileblobDestroy(fb);
    return CL_SUCCESS;
}

/*
 * Decode and transfer the contents of the message into a fileblob
 * The caller must free the returned fileblob
 */
fileblob *
messageToFileblob(message *m, const char *dir, int destroy)
{
    fileblob *fb;

    cli_dbgmsg("messageToFileblob\n");
    fb = messageExport(m, dir,
                       (void *(*)(void))fileblobCreate,
                       (void (*)(void *))fileblobDestroy,
                       (void (*)(void *, const char *, const char *))fileblobSetFilename,
                       (int (*)(void *, const unsigned char *, size_t))fileblobAddData,
                       (void *(*)(text *, void *, int))textToFileblob,
                       (void (*)(void *, cli_ctx *))fileblobSetCTX,
                       destroy);
    if (destroy && m->body_first) {
        textDestroy(m->body_first);
        m->body_first = m->body_last = NULL;
    }
    return fb;
}

/*
 * Decode and transfer the contents of the message into a closed blob
 * The caller must free the returned blob
 */
blob *
messageToBlob(message *m, int destroy)
{
    blob *b;

    cli_dbgmsg("messageToBlob\n");

    b = messageExport(m, NULL,
                      (void *(*)(void))blobCreate,
                      (void (*)(void *))blobDestroy,
                      (void (*)(void *, const char *, const char *))blobSetFilename,
                      (int (*)(void *, const unsigned char *, size_t))blobAddData,
                      (void *(*)(text *, void *, int))textToBlob,
                      (void (*)(void *, cli_ctx *))NULL,
                      destroy);

    if (destroy && m->body_first) {
        textDestroy(m->body_first);
        m->body_first = m->body_last = NULL;
    }
    return b;
}

/*
 * Decode and transfer the contents of the message into a text area
 * The caller must free the returned text
 */
text *
messageToText(message *m)
{
    int i;
    text *first = NULL, *last = NULL;
    const text *t_line;

    if (m == NULL) {
        cli_errmsg("Internal email parser error: invalid arguments when converting message to text.\n");
        return NULL;
    }

    if (m->numberOfEncTypes == 0) {
        /*
         * Fast copy
         */
        for (t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
            if (first == NULL)
                first = last = cli_malloc(sizeof(text));
            else {
                last->t_next = cli_malloc(sizeof(text));
                last         = last->t_next;
            }

            if (last == NULL) {
                if (first)
                    textDestroy(first);
                return NULL;
            }
            if (t_line->t_line)
                last->t_line = lineLink(t_line->t_line);
            else
                last->t_line = NULL; /* empty line */
        }
        if (last)
            last->t_next = NULL;

        return first;
    }
    /*
     * Scan over the data a number of times once for each claimed encoding
     * type
     */
    for (i = 0; i < m->numberOfEncTypes; i++) {
        const encoding_type enctype = m->encodingTypes[i];

        cli_dbgmsg("messageToText: export transfer method %d = %d\n",
                   i, (int)enctype);

        switch (enctype) {
            case NOENCODING:
            case BINARY:
            case EIGHTBIT:
                /*
                 * Fast copy
                 */
                for (t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
                    if (first == NULL)
                        first = last = cli_malloc(sizeof(text));
                    else if (last) {
                        last->t_next = cli_malloc(sizeof(text));
                        last         = last->t_next;
                    }

                    if (last == NULL) {
                        if (first) {
                            textDestroy(first);
                        }
                        return NULL;
                    }
                    if (t_line->t_line)
                        last->t_line = lineLink(t_line->t_line);
                    else
                        last->t_line = NULL; /* empty line */
                }
                continue;
            case UUENCODE:
                cli_warnmsg("messageToText: Unexpected attempt to handle uuencoded file\n");
                if (first) {
                    if (last)
                        last->t_next = NULL;
                    textDestroy(first);
                }
                return NULL;
            case YENCODE:
                t_line = yEncBegin(m);

                if (t_line == NULL) {
                    /*cli_warnmsg("YENCODED attachment is missing begin statement\n");*/
                    if (first) {
                        if (last)
                            last->t_next = NULL;
                        textDestroy(first);
                    }
                    return NULL;
                }
                t_line = t_line->t_next;
                /* fall-through */
            default:
                if ((i == 0) && binhexBegin(m))
                    cli_warnmsg("Binhex messages not supported yet.\n");
                t_line = messageGetBody(m);
        }

        for (; t_line; t_line = t_line->t_next) {
            unsigned char data[1024];
            unsigned char *uptr;
            const char *line = lineGetData(t_line->t_line);

            if (enctype == BASE64)
                /*
                 * ignore blanks - breaks RFC which is
                 * probably the point!
                 */
                if (line == NULL)
                    continue;

            if ((line != NULL) && (strlen(line) > sizeof(data))) {
                cli_errmsg("Internal email parser error: line size greater than size of receiving data buffer\n");
                break;
            }

            uptr = decodeLine(m, enctype, line, data, sizeof(data));

            if (uptr == NULL)
                break;

            if ((size_t)(uptr - data) > sizeof(data)) {
                cli_errmsg("Internal email parser error: line size greater than size of receiving data buffer\n");
                break;
            }

            if (first == NULL)
                first = last = cli_malloc(sizeof(text));
            else if (last) {
                last->t_next = cli_malloc(sizeof(text));
                last         = last->t_next;
            }

            if (last == NULL)
                break;

            /*
             * If the decoded line is the same as the encoded
             * there's no need to take a copy, just link it.
             * Note that the comparison is done without the
             * trailing newline that the decoding routine may have
             * added - that's why there's a strncmp rather than a
             * strcmp - that'd be bad for MIME decoders, but is OK
             * for AV software
             */
            if ((data[0] == '\n') || (data[0] == '\0'))
                last->t_line = NULL;
            else if (line && (strncmp((const char *)data, line, strlen(line)) == 0)) {
#ifdef CL_DEBUG
                cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data);
#endif
                last->t_line = lineLink(t_line->t_line);
            } else
                last->t_line = lineCreate((char *)data);

            if (line && enctype == BASE64)
                if (strchr(line, '='))
                    break;
        }
        if (m->base64chars) {
            unsigned char data[4];

            memset(data, '\0', sizeof(data));
            if (decode(m, NULL, data, base64, false) && data[0]) {
                if (first == NULL)
                    first = last = cli_malloc(sizeof(text));
                else if (last) {
                    last->t_next = cli_malloc(sizeof(text));
                    last         = last->t_next;
                }

                if (last != NULL)
                    last->t_line = lineCreate((char *)data);
            }
            m->base64chars = 0;
        }
    }

    if (last)
        last->t_next = NULL;

    return first;
}

text *
yEncBegin(message *m)
{
    return m->yenc;
}

/*
 * Scan to find the BINHEX message (if any)
 */
#if 0
const text *
binhexBegin(message *m)
{
    const text *t_line;

    for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
        if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0)
            return t_line;

    return NULL;
}
#else
text *
binhexBegin(message *m)
{
    return m->binhex;
}
#endif

/*
 * Scan to find a bounce message. There is no standard for these, not
 * even a convention, so don't expect this to be foolproof
 */
#if 0
text *
bounceBegin(message *m)
{
    const text *t_line;

    for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
        if(cli_compare_ftm_file(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL)
            return t_line;

    return NULL;
}
#else
text *
bounceBegin(message *m)
{
    return m->bounce;
}
#endif

/*
 * If a message doesn't not contain another message which could be harmful
 * it is deemed to be safe.
 *
 * TODO: ensure nothing can get through this
 *
 * TODO: check to see if we need to
 * find anything else, perhaps anything
 * from the RFC821 table?
 */
#if 0
int
messageIsAllText(const message *m)
{
    const text *t;

    for(t = messageGetBody(m); t; t = t->t_next)
        if(strncasecmp(t->t_text,
            "Content-Transfer-Encoding",
            strlen("Content-Transfer-Encoding")) == 0)
                return 0;

    return 1;
}
#else
text *
encodingLine(message *m)
{
    return m->encoding;
}
#endif

/*
 * Decode a line and add it to a buffer, return the end of the buffer
 * to help appending callers. There is no new line at the end of "line"
 *
 * len is sizeof(ptr)
 */
unsigned char *
decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, size_t buflen)
{
    size_t len, reallen;
    bool softbreak;
    char *p2, *copy;
    char base64buf[RFC2045LENGTH + 1];

    /*cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);*/
    if (NULL == m || NULL == buf) {
        cli_dbgmsg("decodeLine: invalid parameters\n");
        return NULL;
    }

    switch (et) {
        case BINARY:
            /*
             * TODO: find out what this is, encoded as binary??
             */
            /* fall through */
        case NOENCODING:
        case EIGHTBIT:
        default:      /* unknown encoding type - try our best */
            if (line) /* empty line? */
                buf = (unsigned char *)cli_strrcpy((char *)buf, line);
            /* Put the new line back in */
            return (unsigned char *)cli_strrcpy((char *)buf, "\n");

        case QUOTEDPRINTABLE:
            if (line == NULL) { /* empty line */
                *buf++ = '\n';
                break;
            }

            softbreak = false;
            while (buflen && *line) {
                if (*line == '=') {
                    unsigned char byte;

                    if ((*++line == '\0') || (*line == '\n')) {
                        softbreak = true;
                        /* soft line break */
                        break;
                    }

                    byte = hex(*line);

                    if ((*++line == '\0') || (*line == '\n')) {
                        /*
                         * broken e-mail, not
                         * adhering to RFC2045
                         */
                        *buf++ = byte;
                        break;
                    }

                    /*
                     * Handle messages that use a broken
                     * quoted-printable encoding of
                     * href=\"http://, instead of =3D
                     */
                    if (byte != '=')
                        byte = (byte << 4) | hex(*line);
                    else
                        line -= 2;

                    *buf++ = byte;
                } else {
                    *buf++ = *line;
                }
                ++line;
                --buflen;
            }
            if (!softbreak) {
                /* Put the new line back in */
                *buf++ = '\n';
            }
            break;

        case BASE64:
            if (line == NULL)
                break;
            /*
             * RFC2045 sets the maximum length to 76 bytes
             * but many e-mail clients ignore that
             */
            if (strlen(line) < sizeof(base64buf)) {
                strcpy(base64buf, line);
                copy = base64buf;
            } else {
                copy = cli_strdup(line);
                if (copy == NULL)
                    break;
            }

            p2 = strchr(copy, '=');
            if (p2)
                *p2 = '\0';

            sanitiseBase64(copy);

            /*
             * Klez doesn't always put "=" on the last line
             */
            buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));

            if (copy != base64buf)
                free(copy);
            break;

        case UUENCODE:
            if (0 != m->base64chars)
                break;

            if ((line == NULL) || (*line == '\0')) /* empty line */
                break;
            if (strcasecmp(line, "end") == 0)
                break;
            if (isuuencodebegin(line))
                break;

            if ((line[0] & 0x3F) == ' ')
                break;

            /*
             * reallen contains the number of bytes that were
             *    encoded
             */
            reallen = (size_t)uudecode(*line++);
            if (reallen <= 0)
                break;
            if (reallen > 62)
                break;
            len = strlen(line);

            if ((len > buflen) || (reallen > len))
                /*
                 * In practice this should never occur since
                 * the maximum length of a uuencoded line is
                 * 62 characters
                 */
                cli_dbgmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n");
            else {
                (void)decode(m, line, buf, uudecode, (len & 3) == 0);
                buf = &buf[reallen];
            }
            m->base64chars = 0; /* this happens with broken uuencoded files */
            break;
        case YENCODE:
            if ((line == NULL) || (*line == '\0')) /* empty line */
                break;
            if (strncmp(line, "=yend ", 6) == 0)
                break;

            while (*line)
                if (*line == '=') {
                    if (*++line == '\0')
                        break;
                    *buf++ = ((*line++ - 64) & 255);
                } else
                    *buf++ = ((*line++ - 42) & 255);
            break;
    }

    *buf = '\0';
    return buf;
}

/*
 * Remove the non base64 characters such as spaces from a string. Spaces
 * shouldn't appear mid string in base64 files, but some broken mail clients
 * ignore such errors rather than discarding the mail, and virus writers
 * exploit this bug
 */
static void
sanitiseBase64(char *s)
{
    cli_dbgmsg("sanitiseBase64 '%s'\n", s);
    while (*s)
        if (base64Table[(unsigned int)(*s & 0xFF)] == 255) {
            char *p1;

            for (p1 = s; p1[0] != '\0'; p1++)
                p1[0] = p1[1];
        } else
            s++;
}

/*
 * Returns one byte after the end of the decoded data in "out"
 *
 * Update m->base64chars with the last few bytes of data that we haven't
 * decoded. After the last line is found, decode will be called with in = NULL
 * to flush these out
 */
static unsigned char *
decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
{
    unsigned char b1, b2, b3, b4;
    unsigned char cb1, cb2, cb3; /* carried over from last line */

    /*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in,
        in ? strlen(in) : 0,
        isFast, m->base64chars);*/

    cb1 = cb2 = cb3 = '\0';

    switch (m->base64chars) {
        case 3:
            cb3 = m->base64_3;
            /* FALLTHROUGH */
        case 2:
            cb2 = m->base64_2;
            /* FALLTHROUGH */
        case 1:
            cb1    = m->base64_1;
            isFast = false;
            break;
        default:
            if (3 < m->base64chars) {
                cli_errmsg("email message decode error: invalid base64chars value: %d\n", m->base64chars);
                return out;
            }
    }

    if (isFast)
        /* Fast decoding if not last line */
        while (*in) {
            b1 = (*decoder)(*in++);
            b2 = (*decoder)(*in++);
            b3 = (*decoder)(*in++);
            /*
             * Put this line here to help on some compilers which
             * can make use of some architecture's ability to
             * multiprocess when different variables can be
             * updated at the same time - here b3 is used in
             * one line, b1/b2 in the next and b4 in the next after
             * that, b3 and b4 rely on in but b1/b2 don't
             */
            *out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
            b4     = (*decoder)(*in++);
            *out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
            *out++ = (b3 << 6) | (b4 & 0x3F);
        }
    else if (in == NULL) { /* flush */
        int nbytes;

        if (m->base64chars == 0)
            return out;

        cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars,
                   isalnum(cb1) ? cb1 : '@',
                   isalnum(cb2) ? cb2 : '@',
                   isalnum(cb3) ? cb3 : '@');

        m->base64chars--;
        b1     = cb1;
        nbytes = 1;

        if (m->base64chars) {
            m->base64chars--;
            b2 = cb2;

            if (m->base64chars) {
                nbytes = 2;
                m->base64chars--;
                b3     = cb3;
                nbytes = 3;
            } else if (b2)
                nbytes = 2;
        }

        switch (nbytes) {
            case 3:
                b4 = '\0';
                /* fall through */
            case 4:
                *out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
                *out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
                if ((nbytes == 4) || (b3 & 0x3))
                    *out++ = (b3 << 6) | (b4 & 0x3F);
                break;
            case 2:
                *out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
                if ((b2 << 4) & 0xFF)
                    *out++ = b2 << 4;
                break;
            case 1:
                *out++ = b1 << 2;
                break;
            default:
                cli_errmsg("email message decode error: invalid nbytes value: %d\n", nbytes);
                return out;
        }
    } else
        while (*in) {
            int nbytes;

            if (m->base64chars) {
                m->base64chars--;
                b1 = cb1;
            } else
                b1 = (*decoder)(*in++);

            if (*in == '\0') {
                b2     = '\0';
                nbytes = 1;
            } else {
                if (m->base64chars) {
                    m->base64chars--;
                    b2 = cb2;
                } else
                    b2 = (*decoder)(*in++);

                if (*in == '\0') {
                    b3     = '\0';
                    nbytes = 2;
                } else {
                    if (m->base64chars) {
                        m->base64chars--;
                        b3 = cb3;
                    } else
                        b3 = (*decoder)(*in++);

                    if (*in == '\0') {
                        b4     = '\0';
                        nbytes = 3;
                    } else {
                        b4     = (*decoder)(*in++);
                        nbytes = 4;
                    }
                }
            }

            switch (nbytes) {
                case 4:
                    *out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
                    *out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
                    *out++ = (b3 << 6) | (b4 & 0x3F);
                    continue;
                case 3:
                    m->base64_3 = b3;
                    /* fall-through */
                case 2:
                    m->base64_2 = b2;
                    /* fall-through */
                case 1:
                    m->base64_1    = b1;
                    m->base64chars = nbytes;
                    break;
                default:
                    cli_errmsg("email message decode error: invalid nbytes value: %d\n", nbytes);
                    return out;
            }
            break; /* nbytes != 4 => EOL */
        }
    return out;
}

static unsigned char
hex(char c)
{
    if (isdigit(c))
        return c - '0';
    if ((c >= 'A') && (c <= 'F'))
        return c - 'A' + 10;
    if ((c >= 'a') && (c <= 'f'))
        return c - 'a' + 10;
    cli_dbgmsg("Illegal hex character '%c'\n", c);

    /*
     * Some mails (notably some spam) break RFC2045 by failing to encode
     * the '=' character
     */
    return '=';
}

static unsigned char
base64(char c)
{
    const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)];

    if (ret == 255) {
        /*cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);*/
        return 63;
    }
    return ret;
}

static unsigned char
uudecode(char c)
{
    return c - ' ';
}

/*
 * These are the only arguments we're interested in.
 * Do 'fgrep messageFindArgument *.c' if you don't believe me!
 * It's probably not good doing this since each time a new
 * messageFindArgument is added I need to remember to look here,
 * but it can save a lot of memory...
 */
static int
usefulArg(const char *arg)
{
    if ((strncasecmp(arg, "name", 4) != 0) &&
        (strncasecmp(arg, "filename", 8) != 0) &&
        (strncasecmp(arg, "boundary", 8) != 0) &&
        (strncasecmp(arg, "protocol", 8) != 0) &&
        (strncasecmp(arg, "id", 2) != 0) &&
        (strncasecmp(arg, "number", 6) != 0) &&
        (strncasecmp(arg, "total", 5) != 0) &&
        (strncasecmp(arg, "type", 4) != 0)) {
        cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
        return 0;
    }
    return 1;
}

void messageSetCTX(message *m, cli_ctx *ctx)
{
    m->ctx = ctx;
}

int messageContainsVirus(const message *m)
{
    return m->isInfected ? true : false;
}

/*
 * We've run out of memory. Try to recover some by
 * deduping the message
 *
 * FIXME: this can take a long time. The real solution is for system admins
 *    to refrain from setting ulimits too low, then this routine won't be
 *    called
 */
static void
messageDedup(message *m)
{
    const text *t1;
    size_t saved = 0;

    cli_dbgmsg("messageDedup\n");

    t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first;

    for (t1 = m->body_first; t1; t1 = t1->t_next) {
        const char *d1;
        text *t2;
        line_t *l1;
        unsigned int r1;

        if (saved >= 100 * 1000)
            break; /* that's enough */
        l1 = t1->t_line;
        if (l1 == NULL)
            continue;
        d1 = lineGetData(l1);
        if (strlen(d1) < 8)
            continue; /* wouldn't recover many bytes */

        r1 = (unsigned int)lineGetRefCount(l1);
        if (r1 == 255)
            continue;
        /*
         * We don't want to foul up any pointers
         */
        if (t1 == m->encoding)
            continue;
        if (t1 == m->bounce)
            continue;
        if (t1 == m->binhex)
            continue;
        if (t1 == m->yenc)
            continue;

        for (t2 = t1->t_next; t2; t2 = t2->t_next) {
            const char *d2;
            line_t *l2 = t2->t_line;

            if (l2 == NULL)
                continue;
            d2 = lineGetData(l2);
            if (d1 == d2)
                /* already linked */
                continue;
            if (strcmp(d1, d2) == 0) {
                if (lineUnlink(l2) == NULL)
                    saved += strlen(d1) + 1;
                t2->t_line = lineLink(l1);
                if (t2->t_line == NULL) {
                    cli_errmsg("messageDedup: out of memory\n");
                    return;
                }
                if (++r1 == 255)
                    break;
            }
        }
    }

    cli_dbgmsg("messageDedup reclaimed %lu bytes\n", (unsigned long)saved);
    m->dedupedThisFar = t1;
}

/*
 * Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must
 * free, or NULL on error.
 *
 * TODO: Currently only handles paragraph 4 of RFC2231 e.g.
 *     protocol*=ansi-x3.4-1968''application%2Fpgp-signature;
 */
static char *
rfc2231(const char *in)
{
    const char *ptr;
    char *ret, *out;
    enum { LANGUAGE,
           CHARSET,
           CONTENTS } field;

    if (strstr(in, "*0*=") != NULL) {
        char *p;

        /* Don't handle continuations, decode what we can */
        p = ret = cli_malloc(strlen(in) + 16);
        if (ret == NULL) {
            cli_errmsg("rfc2331: out of memory, unable to proceed\n");
            return NULL;
        }

        do {
            switch (*in) {
                default:
                    *p++ = *in++;
                    continue;
                case '*':
                    do
                        in++;
                    while ((*in != '*') && *in);
                    if (*in) {
                        in++;
                        continue;
                    }
                    break;
                case '=':
                    /*strcpy(p, in);*/
                    strcpy(p, "=rfc2231failure");
                    p += strlen("=rfc2231failure");
                    break;
            }
            break;
        } while (*in);
        *p = '\0';

        cli_dbgmsg("RFC2231 parameter continuations are not yet handled, returning \"%s\"\n",
                   ret);
        return ret;
    }

    ptr = strstr(in, "*0=");
    if (ptr != NULL)
        /*
         * Parameter continuation, with no continuation
         * Thunderbird 1.5 (and possibly other versions) does this
         */
        field = CONTENTS;
    else {
        ptr   = strstr(in, "*=");
        field = LANGUAGE;
    }

    if (ptr == NULL) { /* quick return */
        out = ret = cli_strdup(in);
        while (*out)
            *out++ &= 0x7F;
        return ret;
    }

    cli_dbgmsg("rfc2231 '%s'\n", in);

    ret = cli_malloc(strlen(in) + 1);

    if (ret == NULL) {
        cli_errmsg("rfc2331: out of memory for ret\n");
        return NULL;
    }

    /*
     * memcpy(out, in, (ptr - in));
     * out = &out[ptr - in];
     * in = ptr;
     */
    out = ret;
    while (in != ptr)
        *out++ = *in++;

    *out++ = '=';

    while (*ptr++ != '=') continue;

    /*
     * We don't do anything with the language and character set, just skip
     * over them!
     */
    while (*ptr) {
        switch (field) {
            case LANGUAGE:
                if (*ptr == '\'')
                    field = CHARSET;
                break;
            case CHARSET:
                if (*ptr == '\'')
                    field = CONTENTS;
                break;
            case CONTENTS:
                if (*ptr == '%') {
                    unsigned char byte;

                    if ((*++ptr == '\0') || (*ptr == '\n'))
                        break;

                    byte = hex(*ptr);

                    if ((*++ptr == '\0') || (*ptr == '\n')) {
                        *out++ = byte;
                        break;
                    }

                    byte <<= 4;
                    byte += hex(*ptr);
                    *out++ = byte;
                } else
                    *out++ = *ptr;
        }
        if (*ptr++ == '\0')
            /*
             * Incorrect message that has just one character after
             * a '%'.
             * FIXME: stash something in out that would, for example
             *    treat %2 as %02, assuming field == CONTENTS
             */
            break;
    }

    if (field != CONTENTS) {
        free(ret);
        cli_dbgmsg("Invalid RFC2231 header: '%s'\n", in);
        return cli_strdup("");
    }

    *out = '\0';

    cli_dbgmsg("rfc2231 returns '%s'\n", ret);

    return ret;
}

/*
 * common/simil:
 *    From Computing Magazine 20/8/92
 * Returns %ge number from 0 to 100 - how similar are 2 strings?
 * 100 for exact match, < for error
 */
struct pstr_list { /* internal stack */
    char *d1;
    struct pstr_list *next;
};

#define OUT_OF_MEMORY (-2)
#define FAILURE (-3)
#define SUCCESS (-4)
#define ARRAY_OVERFLOW (-5)
typedef struct pstr_list ELEMENT1;
typedef ELEMENT1 *LINK1;

static int push(LINK1 *top, const char *string);
static int pop(LINK1 *top, char *buffer);
static unsigned int compare(char *ls1, char **rs1, char *ls2, char **rs2);

#define MAX_PATTERN_SIZ 50 /* maximum string lengths */

static int
simil(const char *str1, const char *str2)
{
    LINK1 top          = NULL;
    unsigned int score = 0;
    size_t common, total;
    size_t len1, len2;
    char *rs1 = NULL, *rs2 = NULL;
    char *s1, *s2;
    char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ];

    if (strcasecmp(str1, str2) == 0)
        return 100;

    if ((s1 = cli_strdup(str1)) == NULL)
        return OUT_OF_MEMORY;
    if ((s2 = cli_strdup(str2)) == NULL) {
        free(s1);
        return OUT_OF_MEMORY;
    }

    if (((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) || ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) {
        free(s1);
        free(s2);
        return ARRAY_OVERFLOW;
    }

    total += len2;

    if ((push(&top, s1) == OUT_OF_MEMORY) ||
        (push(&top, s2) == OUT_OF_MEMORY)) {
        free(s1);
        free(s2);
        return OUT_OF_MEMORY;
    }

    while (pop(&top, ls2) == SUCCESS) {
        pop(&top, ls1);
        common = compare(ls1, &rs1, ls2, &rs2);
        if (common > 0) {
            score += (unsigned int)common;
            len1 = strlen(ls1);
            len2 = strlen(ls2);

            if ((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
                if ((push(&top, ls1) == OUT_OF_MEMORY) || (push(&top, ls2) == OUT_OF_MEMORY)) {
                    free(s1);
                    free(s2);
                    return OUT_OF_MEMORY;
                }
            len1 = strlen(rs1);
            len2 = strlen(rs2);

            if ((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
                if ((push(&top, rs1) == OUT_OF_MEMORY) || (push(&top, rs2) == OUT_OF_MEMORY)) {
                    free(s1);
                    free(s2);
                    return OUT_OF_MEMORY;
                }
        }
    }
    free(s1);
    free(s2);
    return (total > 0) ? ((score * 200) / total) : 0;
}

static unsigned int
compare(char *ls1, char **rs1, char *ls2, char **rs2)
{
    unsigned int common, maxchars = 0;
    bool some_similarity = false;
    char *s1, *s2;
    char *maxs1 = NULL, *maxs2 = NULL, *maxe1 = NULL, *maxe2 = NULL;
    char *cs1, *cs2, *start1, *end1, *end2;

    end1   = ls1 + strlen(ls1);
    end2   = ls2 + strlen(ls2);
    start1 = ls1;

    for (;;) {
        s1 = start1;
        s2 = ls2;

        if (s1 < end1) {
            while (s1 < end1 && s2 < end2) {
                if (tolower(*s1) == tolower(*s2)) {
                    some_similarity = true;
                    cs1             = s1;
                    cs2             = s2;
                    common          = 0;
                    do
                        if (s1 == end1 || s2 == end2)
                            break;
                        else {
                            s1++;
                            s2++;
                            common++;
                        }
                    while (tolower(*s1) == tolower(*s2));

                    if (common > maxchars) {
                        unsigned int diff = common - maxchars;
                        maxchars          = common;
                        maxs1             = cs1;
                        maxs2             = cs2;
                        maxe1             = s1;
                        maxe2             = s2;
                        end1 -= diff;
                        end2 -= diff;
                    } else
                        s1 -= common;
                } else
                    s2++;
            }
            start1++;
        } else
            break;
    }
    if (some_similarity) {
        *maxs1 = '\0';
        *maxs2 = '\0';
        *rs1   = maxe1;
        *rs2   = maxe2;
    }
    return maxchars;
}

static int
push(LINK1 *top, const char *string)
{
    LINK1 element;

    if ((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL)
        return OUT_OF_MEMORY;
    if ((element->d1 = cli_strdup(string)) == NULL) {
        free(element);
        return OUT_OF_MEMORY;
    }
    element->next = *top;
    *top          = element;

    return SUCCESS;
}

static int
pop(LINK1 *top, char *buffer)
{
    LINK1 t1;

    if ((t1 = *top) != NULL) {
        (void)strcpy(buffer, t1->d1);
        *top = t1->next;
        free(t1->d1);
        free((char *)t1);
        return SUCCESS;
    }
    return FAILURE;
}

/*
 * Have we found a line that is a start of a uuencoded file (see uuencode(5))?
 */
int isuuencodebegin(const char *line)
{
    if (line[0] != 'b') /* quick check */
        return 0;

    if (strlen(line) < 10)
        return 0;

    return (strncasecmp(line, "begin ", 6) == 0) &&
           isdigit(line[6]) && isdigit(line[7]) &&
           isdigit(line[8]) && (line[9] == ' ');
}

#if HAVE_JSON
json_object *messageGetJObj(message *m)
{
    if (m == NULL) {
        return NULL;
    }

    if (m->jobj == NULL)
        m->jobj = cli_jsonobj(NULL, NULL);

    return m->jobj;
}
#endif