4682 lines
156 KiB
C
4682 lines
156 KiB
C
|
/*
|
||
|
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
||
|
* Copyright (C) 2007-2013 Sourcefire, Inc.
|
||
|
*
|
||
|
* Authors: Nigel Horne
|
||
|
*
|
||
|
* Acknowledgements: Some ideas came from Stephen White <stephen@earth.li>,
|
||
|
* Michael Dankov <misha@btrc.ru>, Gianluigi Tiesi <sherpya@netfarm.it>,
|
||
|
* Everton da Silva Marques, Thomas Lamy <Thomas.Lamy@in-online.net>,
|
||
|
* James Stevens <James@kyzo.com>
|
||
|
*
|
||
|
* This program is free software; you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License version 2 as
|
||
|
* published by the Free Software Foundation.
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with this program; if not, write to the Free Software
|
||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||
|
* MA 02110-1301, USA.
|
||
|
*/
|
||
|
#if HAVE_CONFIG_H
|
||
|
#include "clamav-config.h"
|
||
|
#endif
|
||
|
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
#ifndef _REENTRANT
|
||
|
#define _REENTRANT /* for Solaris 2.8 */
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <errno.h>
|
||
|
#include <assert.h>
|
||
|
#include <string.h>
|
||
|
#include <stdbool.h>
|
||
|
#ifdef HAVE_STRINGS_H
|
||
|
#include <strings.h>
|
||
|
#endif
|
||
|
#ifdef HAVE_STRING_H
|
||
|
#include <string.h>
|
||
|
#endif
|
||
|
#include <ctype.h>
|
||
|
#include <time.h>
|
||
|
#include <fcntl.h>
|
||
|
#ifdef HAVE_SYS_PARAM_H
|
||
|
#include <sys/param.h>
|
||
|
#endif
|
||
|
#include <dirent.h>
|
||
|
#include <limits.h>
|
||
|
#include <signal.h>
|
||
|
|
||
|
#ifdef HAVE_UNISTD_H
|
||
|
#include <unistd.h>
|
||
|
#endif
|
||
|
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
#include <pthread.h>
|
||
|
#endif
|
||
|
|
||
|
#if defined(_WIN32) || defined(_WIN64)
|
||
|
#define strtok_r strtok_s
|
||
|
#endif
|
||
|
|
||
|
#include "clamav.h"
|
||
|
#include "others.h"
|
||
|
#include "str.h"
|
||
|
#include "filetypes.h"
|
||
|
#include "mbox.h"
|
||
|
#include "dconf.h"
|
||
|
#include "fmap.h"
|
||
|
#include "json_api.h"
|
||
|
#include "msxml_parser.h"
|
||
|
|
||
|
#if HAVE_LIBXML2
|
||
|
#include <libxml/xmlversion.h>
|
||
|
#include <libxml/HTMLtree.h>
|
||
|
#include <libxml/HTMLparser.h>
|
||
|
#include <libxml/xmlreader.h>
|
||
|
#endif
|
||
|
|
||
|
#define DCONF_PHISHING mctx->ctx->dconf->phishing
|
||
|
|
||
|
#ifdef CL_DEBUG
|
||
|
|
||
|
#if defined(C_LINUX)
|
||
|
#include <features.h>
|
||
|
#endif
|
||
|
|
||
|
#if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
|
||
|
#define HAVE_BACKTRACE
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifdef HAVE_BACKTRACE
|
||
|
#include <execinfo.h>
|
||
|
|
||
|
#ifdef USE_SYSLOG
|
||
|
#include <syslog.h>
|
||
|
#endif
|
||
|
|
||
|
static void sigsegv(int sig);
|
||
|
static void print_trace(int use_syslog);
|
||
|
|
||
|
/*#define SAVE_TMP */ /* Save the file being worked on in tmp */
|
||
|
#endif
|
||
|
|
||
|
#if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
|
||
|
#undef strtok_r
|
||
|
#undef __strtok_r
|
||
|
#define strtok_r(a, b, c) strtok(a, b)
|
||
|
#endif
|
||
|
|
||
|
typedef enum {
|
||
|
FAIL,
|
||
|
OK,
|
||
|
OK_ATTACHMENTS_NOT_SAVED,
|
||
|
VIRUS,
|
||
|
MAXREC,
|
||
|
MAXFILES
|
||
|
} mbox_status;
|
||
|
|
||
|
#ifndef isblank
|
||
|
#define isblank(c) (((c) == ' ') || ((c) == '\t'))
|
||
|
#endif
|
||
|
|
||
|
#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
|
||
|
|
||
|
#include "htmlnorm.h"
|
||
|
|
||
|
#include "phishcheck.h"
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
#include <sys/time.h>
|
||
|
#include <netdb.h>
|
||
|
#include <sys/socket.h>
|
||
|
#include <netinet/in.h>
|
||
|
#if !defined(C_BEOS) && !defined(C_INTERIX)
|
||
|
#include <net/if.h>
|
||
|
#include <arpa/inet.h>
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#include <fcntl.h>
|
||
|
|
||
|
/*
|
||
|
* Use CL_SCAN_MAIL_PARTIAL_MESSAGE to handle messages covered by section 7.3.2 of RFC1341.
|
||
|
* This is experimental code so it is up to YOU to (1) ensure it's secure
|
||
|
* (2) periodically trim the directory of old files
|
||
|
*
|
||
|
* If you use the load balancing feature of clamav-milter to run clamd on
|
||
|
* more than one machine you must make sure that .../partial is on a shared
|
||
|
* network filesystem
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* Slows things down a lot and only catches unencoded copies
|
||
|
* of EICAR within bounces, which don't matter
|
||
|
*/
|
||
|
//#define SCAN_UNENCODED_BOUNCES
|
||
|
|
||
|
typedef struct mbox_ctx {
|
||
|
const char *dir;
|
||
|
const table_t *rfc821Table;
|
||
|
const table_t *subtypeTable;
|
||
|
cli_ctx *ctx;
|
||
|
unsigned int files; /* number of files extracted */
|
||
|
#if HAVE_JSON
|
||
|
json_object *wrkobj;
|
||
|
#endif
|
||
|
} mbox_ctx;
|
||
|
|
||
|
/* if supported by the system, use the optimized
|
||
|
* version of getc, that doesn't do locking,
|
||
|
* and is possibly implemented entirely as a macro */
|
||
|
#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L
|
||
|
#define GETC(fp) getc_unlocked(fp)
|
||
|
#define LOCKFILE(fp) flockfile(fp)
|
||
|
#define UNLOCKFILE(fp) funlockfile(fp)
|
||
|
#else
|
||
|
#define GETC(fp) getc(fp)
|
||
|
#define LOCKFILE(fp)
|
||
|
#define UNLOCKFILE(fp)
|
||
|
#endif
|
||
|
|
||
|
static int cli_parse_mbox(const char *dir, cli_ctx *ctx);
|
||
|
static message *parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821Table, const char *firstLine, const char *dir, cli_ctx *ctx, bool *heuristicFound);
|
||
|
static message *parseEmailHeaders(message *m, const table_t *rfc821Table, bool *heuristicFound);
|
||
|
static int parseEmailHeader(message *m, const char *line, const table_t *rfc821, cli_ctx *ctx, bool *heuristicFound);
|
||
|
static cl_error_t parseMHTMLComment(const char *comment, cli_ctx *ctx, void *wrkjobj, void *cbdata);
|
||
|
static mbox_status parseRootMHTML(mbox_ctx *mctx, message *m, text *t);
|
||
|
static mbox_status parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level);
|
||
|
static int boundaryStart(const char *line, const char *boundary);
|
||
|
static int boundaryEnd(const char *line, const char *boundary);
|
||
|
static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
|
||
|
static int getTextPart(message *const messages[], size_t size);
|
||
|
static size_t strip(char *buf, int len);
|
||
|
static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg, cli_ctx *ctx, bool *heuristicFound);
|
||
|
static int saveTextPart(mbox_ctx *mctx, message *m, int destroy_text);
|
||
|
static char *rfc2047(const char *in);
|
||
|
static char *rfc822comments(const char *in, char *out);
|
||
|
static int rfc1341(mbox_ctx *mctx, message *m);
|
||
|
static bool usefulHeader(int commandNumber, const char *cmd);
|
||
|
static char *getline_from_mbox(char *buffer, size_t len, fmap_t *map, size_t *at);
|
||
|
static bool isBounceStart(mbox_ctx *mctx, const char *line);
|
||
|
static bool exportBinhexMessage(mbox_ctx *mctx, message *m);
|
||
|
static int exportBounceMessage(mbox_ctx *ctx, text *start);
|
||
|
static const char *getMimeTypeStr(mime_type mimetype);
|
||
|
static const char *getEncTypeStr(encoding_type enctype);
|
||
|
static message *do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level);
|
||
|
static int count_quotes(const char *buf);
|
||
|
static bool next_is_folded_header(const text *t);
|
||
|
static bool newline_in_header(const char *line);
|
||
|
|
||
|
static blob *getHrefs(message *m, tag_arguments_t *hrefs);
|
||
|
static void hrefs_done(blob *b, tag_arguments_t *hrefs);
|
||
|
static void checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html);
|
||
|
|
||
|
static bool haveTooManyMIMEPartsPerMessage(size_t mimePartCnt, cli_ctx *ctx, mbox_status *rc);
|
||
|
static bool hitLineFoldCnt(const char *const line, size_t *lineFoldCnt, cli_ctx *ctx, bool *heuristicFound);
|
||
|
static bool haveTooManyHeaderBytes(size_t totalLen, cli_ctx *ctx, bool *heuristicFound);
|
||
|
static bool haveTooManyEmailHeaders(size_t totalHeaderCnt, cli_ctx *ctx, bool *heuristicFound);
|
||
|
static bool haveTooManyMIMEArguments(size_t argCnt, cli_ctx *ctx, bool *heuristicFound);
|
||
|
|
||
|
/* Maximum line length according to RFC2821 */
|
||
|
#define RFC2821LENGTH 1000
|
||
|
|
||
|
/* Hashcodes for our hash tables */
|
||
|
#define CONTENT_TYPE 1
|
||
|
#define CONTENT_TRANSFER_ENCODING 2
|
||
|
#define CONTENT_DISPOSITION 3
|
||
|
|
||
|
/* Mime sub types */
|
||
|
#define PLAIN 1
|
||
|
#define ENRICHED 2
|
||
|
#define HTML 3
|
||
|
#define RICHTEXT 4
|
||
|
#define MIXED 5
|
||
|
#define ALTERNATIVE 6 /* RFC1521*/
|
||
|
#define DIGEST 7
|
||
|
#define SIGNED 8
|
||
|
#define PARALLEL 9
|
||
|
#define RELATED 10 /* RFC2387 */
|
||
|
#define REPORT 11 /* RFC1892 */
|
||
|
#define APPLEDOUBLE 12 /* Handling of this in only noddy for now */
|
||
|
#define FAX MIXED /* \
|
||
|
* RFC3458 \
|
||
|
* Drafts stated to treat is as mixed if it is \
|
||
|
* not known. This disappeared in the final \
|
||
|
* version (except when talking about \
|
||
|
* voice-message), but it is good enough for us \
|
||
|
* since we do no validation of coversheet \
|
||
|
* presence etc. (which also has disappeared \
|
||
|
* in the final version) \
|
||
|
*/
|
||
|
#define ENCRYPTED 13 /* \
|
||
|
* e.g. RFC2015 \
|
||
|
* Content-Type: multipart/encrypted; \
|
||
|
* boundary="nextPart1383049.XCRrrar2yq"; \
|
||
|
* protocol="application/pgp-encrypted" \
|
||
|
*/
|
||
|
#define X_BFILE RELATED /* \
|
||
|
* BeOS, expert two parts: the file and it's \
|
||
|
* attributes. The attributes part comes as \
|
||
|
* Content-Type: application/x-be_attribute \
|
||
|
* name="foo" \
|
||
|
* I can't find where it is defined, any \
|
||
|
* pointers would be appreciated. For now \
|
||
|
* we treat it as multipart/related \
|
||
|
*/
|
||
|
#define KNOWBOT 14 /* Unknown and undocumented format? */
|
||
|
|
||
|
#define HEURISTIC_EMAIL_MAX_LINE_FOLDS_PER_HEADER (256 * 1024)
|
||
|
#define HEURISTIC_EMAIL_MAX_HEADER_BYTES (1024 * 256)
|
||
|
#define HEURISTIC_EMAIL_MAX_HEADERS 1024
|
||
|
#define HEURISTIC_EMAIL_MAX_MIME_PARTS_PER_MESSAGE 1024
|
||
|
#define HEURISTIC_EMAIL_MAX_ARGUMENTS_PER_HEADER 256
|
||
|
|
||
|
static const struct tableinit {
|
||
|
const char *key;
|
||
|
int value;
|
||
|
} rfc821headers[] = {
|
||
|
/* TODO: make these regular expressions */
|
||
|
{"Content-Type", CONTENT_TYPE},
|
||
|
{"Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING},
|
||
|
{"Content-Disposition", CONTENT_DISPOSITION},
|
||
|
{NULL, 0}},
|
||
|
mimeSubtypes[] = {/* see RFC2045 */
|
||
|
/* subtypes of Text */
|
||
|
{"plain", PLAIN},
|
||
|
{"enriched", ENRICHED},
|
||
|
{"html", HTML},
|
||
|
{"richtext", RICHTEXT},
|
||
|
/* subtypes of Multipart */
|
||
|
{"mixed", MIXED},
|
||
|
{"alternative", ALTERNATIVE},
|
||
|
{"digest", DIGEST},
|
||
|
{"signed", SIGNED},
|
||
|
{"parallel", PARALLEL},
|
||
|
{"related", RELATED},
|
||
|
{"report", REPORT},
|
||
|
{"appledouble", APPLEDOUBLE},
|
||
|
{"fax-message", FAX},
|
||
|
{"encrypted", ENCRYPTED},
|
||
|
{"x-bfile", X_BFILE}, /* BeOS */
|
||
|
{"knowbot", KNOWBOT}, /* ??? */
|
||
|
{"knowbot-metadata", KNOWBOT}, /* ??? */
|
||
|
{"knowbot-code", KNOWBOT}, /* ??? */
|
||
|
{"knowbot-state", KNOWBOT}, /* ??? */
|
||
|
{NULL, 0}},
|
||
|
mimeTypeStr[] = {{"NOMIME", NOMIME}, {"APPLICATION", APPLICATION}, {"AUDIO", AUDIO}, {"IMAGE", IMAGE}, {"MESSAGE", MESSAGE}, {"MULTIPART", MULTIPART}, {"TEXT", TEXT}, {"VIDEO", VIDEO}, {"MEXTENSION", MEXTENSION}, {NULL, 0}}, encTypeStr[] = {{"NOENCODING", NOENCODING}, {"QUOTEDPRINTABLE", QUOTEDPRINTABLE}, {"BASE64", BASE64}, {"EIGHTBIT", EIGHTBIT}, {"BINARY", BINARY}, {"UUENCODE", UUENCODE}, {"YENCODE", YENCODE}, {"EEXTENSION", EEXTENSION}, {"BINHEX", BINHEX}, {NULL, 0}};
|
||
|
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||
|
#endif
|
||
|
static table_t *rfc821 = NULL;
|
||
|
static table_t *subtype = NULL;
|
||
|
|
||
|
int cli_mbox(const char *dir, cli_ctx *ctx)
|
||
|
{
|
||
|
if (dir == NULL) {
|
||
|
cli_dbgmsg("cli_mbox called with NULL dir\n");
|
||
|
return CL_ENULLARG;
|
||
|
}
|
||
|
return cli_parse_mbox(dir, ctx);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* TODO: when signal handling is added, need to remove temp files when a
|
||
|
* signal is received
|
||
|
* TODO: add option to scan in memory not via temp files, perhaps with a
|
||
|
* named pipe or memory mapped file, though this won't work on big e-mails
|
||
|
* containing many levels of encapsulated messages - it'd just take too much
|
||
|
* RAM
|
||
|
* TODO: parse .msg format files
|
||
|
* TODO: fully handle AppleDouble format, see
|
||
|
* http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
|
||
|
* TODO: ensure parseEmailHeaders is always called before parseEmailBody
|
||
|
* TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
|
||
|
* TODO: Handle unexpected NUL bytes in header lines which stop strcmp()s:
|
||
|
* e.g. \0Content-Type: application/binary;
|
||
|
*/
|
||
|
static int
|
||
|
cli_parse_mbox(const char *dir, cli_ctx *ctx)
|
||
|
{
|
||
|
int retcode;
|
||
|
message *body;
|
||
|
char buffer[RFC2821LENGTH + 1];
|
||
|
mbox_ctx mctx;
|
||
|
size_t at = 0;
|
||
|
fmap_t *map = ctx->fmap;
|
||
|
|
||
|
cli_dbgmsg("in mbox()\n");
|
||
|
|
||
|
if (!fmap_gets(map, buffer, &at, sizeof(buffer) - 1)) {
|
||
|
/* empty message */
|
||
|
return CL_CLEAN;
|
||
|
}
|
||
|
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
pthread_mutex_lock(&tables_mutex);
|
||
|
#endif
|
||
|
|
||
|
if (initialiseTables(&rfc821, &subtype) < 0) {
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
pthread_mutex_unlock(&tables_mutex);
|
||
|
#endif
|
||
|
return CL_EMEM;
|
||
|
}
|
||
|
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
pthread_mutex_unlock(&tables_mutex);
|
||
|
#endif
|
||
|
|
||
|
retcode = CL_SUCCESS;
|
||
|
body = NULL;
|
||
|
|
||
|
mctx.dir = dir;
|
||
|
mctx.rfc821Table = rfc821;
|
||
|
mctx.subtypeTable = subtype;
|
||
|
mctx.ctx = ctx;
|
||
|
mctx.files = 0;
|
||
|
#if HAVE_JSON
|
||
|
mctx.wrkobj = ctx->wrkproperty;
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Is it a UNIX style mbox with more than one
|
||
|
* mail message, or just a single mail message?
|
||
|
*
|
||
|
* TODO: It would be better if we called cli_magic_scan_dir here rather than
|
||
|
* in cli_scanmail. Then we could improve the way mailboxes with more
|
||
|
* than one message is handled, e.g. giving a better indication of
|
||
|
* which message within the mailbox is infected
|
||
|
*/
|
||
|
/*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
|
||
|
if (strncmp(buffer, "From ", 5) == 0) {
|
||
|
/*
|
||
|
* Have been asked to check a UNIX style mbox file, which
|
||
|
* may contain more than one e-mail message to decode
|
||
|
*
|
||
|
* It would be far better for scanners.c to do this splitting
|
||
|
* and do this
|
||
|
* FOR EACH mail in the mailbox
|
||
|
* DO
|
||
|
* pass this mail to cli_mbox --
|
||
|
* scan this file
|
||
|
* IF this file has a virus quit
|
||
|
* THEN
|
||
|
* return CL_VIRUS
|
||
|
* FI
|
||
|
* END
|
||
|
* This would remove a problem with this code that it can
|
||
|
* fill up the tmp directory before it starts scanning
|
||
|
*/
|
||
|
bool lastLineWasEmpty;
|
||
|
int messagenumber;
|
||
|
message *m = messageCreate(); /*Create an empty email */
|
||
|
|
||
|
if (m == NULL)
|
||
|
return CL_EMEM;
|
||
|
|
||
|
lastLineWasEmpty = false;
|
||
|
messagenumber = 1;
|
||
|
messageSetCTX(m, ctx);
|
||
|
|
||
|
do {
|
||
|
cli_chomp(buffer);
|
||
|
/*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) */
|
||
|
if (lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
|
||
|
cli_dbgmsg("Deal with message number %d\n", messagenumber++);
|
||
|
/*
|
||
|
* End of a message in the mail box
|
||
|
*/
|
||
|
bool heuristicFound = false;
|
||
|
body = parseEmailHeaders(m, rfc821, &heuristicFound);
|
||
|
if (body == NULL) {
|
||
|
messageReset(m);
|
||
|
messageSetCTX(m, ctx);
|
||
|
if (heuristicFound) {
|
||
|
retcode = CL_VIRUS;
|
||
|
break;
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
messageSetCTX(body, ctx);
|
||
|
messageDestroy(m);
|
||
|
if (messageGetBody(body)) {
|
||
|
mbox_status rc = parseEmailBody(body, NULL, &mctx, 0);
|
||
|
if (rc == FAIL) {
|
||
|
m = body;
|
||
|
messageReset(m);
|
||
|
messageSetCTX(m, ctx);
|
||
|
continue;
|
||
|
} else if (rc == VIRUS) {
|
||
|
cli_dbgmsg("Message number %d is infected\n",
|
||
|
messagenumber - 1);
|
||
|
retcode = CL_VIRUS;
|
||
|
m = NULL;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
/*
|
||
|
* Starting a new message, throw away all the
|
||
|
* information about the old one. It would
|
||
|
* be best to be able to scan this message
|
||
|
* now, but cli_magic_scan_file needs arguments
|
||
|
* that haven't been passed here so it can't be
|
||
|
* called
|
||
|
*/
|
||
|
m = body;
|
||
|
messageReset(m);
|
||
|
messageSetCTX(m, ctx);
|
||
|
|
||
|
cli_dbgmsg("Finished processing message\n");
|
||
|
} else
|
||
|
lastLineWasEmpty = (bool)(buffer[0] == '\0');
|
||
|
|
||
|
if (isuuencodebegin(buffer)) {
|
||
|
/*
|
||
|
* Fast track visa to uudecode.
|
||
|
* TODO: binhex, yenc
|
||
|
*/
|
||
|
if (uudecodeFile(m, buffer, dir, map, &at) < 0)
|
||
|
if (messageAddStr(m, buffer) < 0)
|
||
|
break;
|
||
|
} else
|
||
|
/* at this point, the \n has been removed */
|
||
|
if (messageAddStr(m, buffer) < 0)
|
||
|
break;
|
||
|
} while (fmap_gets(map, buffer, &at, sizeof(buffer) - 1));
|
||
|
|
||
|
if (retcode == CL_SUCCESS) {
|
||
|
cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
|
||
|
bool heuristicFound = false;
|
||
|
body = parseEmailHeaders(m, rfc821, &heuristicFound);
|
||
|
if (heuristicFound) {
|
||
|
retcode = CL_VIRUS;
|
||
|
}
|
||
|
}
|
||
|
if (m)
|
||
|
messageDestroy(m);
|
||
|
} else {
|
||
|
/*
|
||
|
* It's a single message, parse the headers then the body
|
||
|
*/
|
||
|
if (strncmp(buffer, "P I ", 4) == 0)
|
||
|
/*
|
||
|
* CommuniGate Pro format: ignore headers until
|
||
|
* blank line
|
||
|
*/
|
||
|
while (fmap_gets(map, buffer, &at, sizeof(buffer) - 1) &&
|
||
|
(strchr("\r\n", buffer[0]) == NULL))
|
||
|
;
|
||
|
/* getline_from_mbox could be using unlocked_stdio(3),
|
||
|
* so lock file here */
|
||
|
/*
|
||
|
* Ignore any blank lines at the top of the message
|
||
|
*/
|
||
|
while (strchr("\r\n", buffer[0]) &&
|
||
|
(getline_from_mbox(buffer, sizeof(buffer) - 1, map, &at) != NULL))
|
||
|
;
|
||
|
|
||
|
buffer[sizeof(buffer) - 1] = '\0';
|
||
|
|
||
|
bool heuristicFound = false;
|
||
|
body = parseEmailFile(map, &at, rfc821, buffer, dir, ctx, &heuristicFound);
|
||
|
if (heuristicFound) {
|
||
|
retcode = CL_VIRUS;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (body) {
|
||
|
/*
|
||
|
* Write out the last entry in the mailbox
|
||
|
*/
|
||
|
if ((retcode == CL_SUCCESS) && messageGetBody(body)) {
|
||
|
messageSetCTX(body, ctx);
|
||
|
switch (parseEmailBody(body, NULL, &mctx, 0)) {
|
||
|
case OK:
|
||
|
case OK_ATTACHMENTS_NOT_SAVED:
|
||
|
break;
|
||
|
case FAIL:
|
||
|
/*
|
||
|
* beware: cli_magic_scan_desc(),
|
||
|
* changes this into CL_CLEAN, so only
|
||
|
* use it to inform the higher levels
|
||
|
* that we couldn't decode it because
|
||
|
* it isn't an mbox, not to signal
|
||
|
* decoding errors on what *is* a valid
|
||
|
* mbox
|
||
|
*/
|
||
|
retcode = CL_EFORMAT;
|
||
|
break;
|
||
|
case MAXREC:
|
||
|
retcode = CL_EMAXREC;
|
||
|
cli_append_virus_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxRecursion"); // Doing this now because it's actually tracking email recursion,-
|
||
|
// not fmap recursion, but it still is aborting with stuff not scanned.
|
||
|
// Also, we didn't have access to the ctx when this happened earlier.
|
||
|
break;
|
||
|
case MAXFILES:
|
||
|
retcode = CL_EMAXFILES;
|
||
|
cli_append_virus_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles"); // Doing this now because it's actually tracking email parts,-
|
||
|
// not actual files, but it still is aborting with stuff not scanned.
|
||
|
// Also, we didn't have access to the ctx when this happened earlier.
|
||
|
break;
|
||
|
case VIRUS:
|
||
|
retcode = CL_VIRUS;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (body->isTruncated && retcode == CL_SUCCESS)
|
||
|
retcode = CL_EMEM;
|
||
|
/*
|
||
|
* Tidy up and quit
|
||
|
*/
|
||
|
messageDestroy(body);
|
||
|
}
|
||
|
|
||
|
if ((retcode == CL_CLEAN) && ctx->found_possibly_unwanted &&
|
||
|
(*ctx->virname == NULL || SCAN_ALLMATCHES)) {
|
||
|
retcode = cli_append_virus(ctx, "Heuristics.Phishing.Email");
|
||
|
ctx->found_possibly_unwanted = 0;
|
||
|
}
|
||
|
|
||
|
cli_dbgmsg("cli_mbox returning %d\n", retcode);
|
||
|
|
||
|
return retcode;
|
||
|
}
|
||
|
|
||
|
/*TODO: move these to a header.*/
|
||
|
#define DO_STRDUP(buf, var) \
|
||
|
do { \
|
||
|
var = strdup(buf); \
|
||
|
if (NULL == var) { \
|
||
|
goto done; \
|
||
|
} \
|
||
|
} while (0)
|
||
|
|
||
|
#define DO_FREE(var) \
|
||
|
do { \
|
||
|
if (NULL != var) { \
|
||
|
free(var); \
|
||
|
var = NULL; \
|
||
|
} \
|
||
|
} while (0)
|
||
|
|
||
|
#define DO_MALLOC(var, size) \
|
||
|
do { \
|
||
|
var = malloc(size); \
|
||
|
if (NULL == var) { \
|
||
|
goto done; \
|
||
|
} \
|
||
|
} while (0)
|
||
|
|
||
|
#define DO_CALLOC(var, size) \
|
||
|
do { \
|
||
|
(var) = calloc((size), sizeof *(var)); \
|
||
|
if (NULL == var) { \
|
||
|
goto done; \
|
||
|
} \
|
||
|
} while (0)
|
||
|
|
||
|
#define DO_VERIFY_POINTER(ptr) \
|
||
|
do { \
|
||
|
if (NULL == ptr) { \
|
||
|
goto done; \
|
||
|
} \
|
||
|
} while (0)
|
||
|
|
||
|
#define READ_STRUCT_BUFFER_LEN 1024
|
||
|
typedef struct _ReadStruct {
|
||
|
char buffer[READ_STRUCT_BUFFER_LEN + 1];
|
||
|
|
||
|
size_t bufferLen;
|
||
|
|
||
|
struct _ReadStruct *next;
|
||
|
|
||
|
} ReadStruct;
|
||
|
|
||
|
static ReadStruct *
|
||
|
appendReadStruct(ReadStruct *rs, const char *const buffer)
|
||
|
{
|
||
|
if (NULL == rs) {
|
||
|
cli_dbgmsg("appendReadStruct: Invalid argument\n");
|
||
|
goto done;
|
||
|
}
|
||
|
|
||
|
size_t spaceLeft = (READ_STRUCT_BUFFER_LEN - rs->bufferLen);
|
||
|
|
||
|
if (strlen(buffer) > spaceLeft) {
|
||
|
ReadStruct *next = NULL;
|
||
|
int part = spaceLeft;
|
||
|
strncpy(&(rs->buffer[rs->bufferLen]), buffer, part);
|
||
|
rs->bufferLen += part;
|
||
|
|
||
|
DO_CALLOC(next, 1);
|
||
|
|
||
|
rs->next = next;
|
||
|
strcpy(next->buffer, &(buffer[part]));
|
||
|
next->bufferLen = strlen(&(buffer[part]));
|
||
|
|
||
|
rs = next;
|
||
|
} else {
|
||
|
strcpy(&(rs->buffer[rs->bufferLen]), buffer);
|
||
|
rs->bufferLen += strlen(buffer);
|
||
|
}
|
||
|
|
||
|
done:
|
||
|
return rs;
|
||
|
}
|
||
|
|
||
|
static char *
|
||
|
getMallocedBufferFromList(const ReadStruct *head)
|
||
|
{
|
||
|
|
||
|
const ReadStruct *rs = head;
|
||
|
int bufferLen = 1;
|
||
|
char *working = NULL;
|
||
|
char *ret = NULL;
|
||
|
|
||
|
while (rs) {
|
||
|
bufferLen += rs->bufferLen;
|
||
|
rs = rs->next;
|
||
|
}
|
||
|
|
||
|
DO_MALLOC(working, bufferLen);
|
||
|
|
||
|
rs = head;
|
||
|
bufferLen = 0;
|
||
|
while (rs) {
|
||
|
memcpy(&(working[bufferLen]), rs->buffer, rs->bufferLen);
|
||
|
bufferLen += rs->bufferLen;
|
||
|
working[bufferLen] = 0;
|
||
|
rs = rs->next;
|
||
|
}
|
||
|
|
||
|
ret = working;
|
||
|
done:
|
||
|
if (NULL == ret) {
|
||
|
DO_FREE(working);
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
freeList(ReadStruct *head)
|
||
|
{
|
||
|
while (head) {
|
||
|
ReadStruct *rs = head->next;
|
||
|
DO_FREE(head);
|
||
|
head = rs;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#ifndef FREELIST_REALLOC
|
||
|
#define FREELIST_REALLOC(head, curr) \
|
||
|
do { \
|
||
|
if (curr != head) { \
|
||
|
freeList(head->next); \
|
||
|
} \
|
||
|
head->bufferLen = 0; \
|
||
|
head->next = 0; \
|
||
|
curr = head; \
|
||
|
} while (0)
|
||
|
#endif /*FREELIST_REALLOC*/
|
||
|
|
||
|
/*Check if we have repeated blank lines with only a semicolon at the end. Semicolon is a delimiter for parameters,
|
||
|
* but if there is no data, it isn't a parameter. Allow the first one because it may be continuation of a previous line
|
||
|
* that actually had data in it.*/
|
||
|
static bool
|
||
|
doContinueMultipleEmptyOptions(const char *const line, bool *lastWasOnlySemi)
|
||
|
{
|
||
|
if (line) {
|
||
|
size_t i = 0;
|
||
|
int doCont = 1;
|
||
|
for (; i < strlen(line); i++) {
|
||
|
if (isblank(line[i])) {
|
||
|
} else if (';' == line[i]) {
|
||
|
} else {
|
||
|
doCont = 0;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (1 == doCont) {
|
||
|
if (*lastWasOnlySemi) {
|
||
|
return true;
|
||
|
}
|
||
|
*lastWasOnlySemi = true;
|
||
|
} else {
|
||
|
*lastWasOnlySemi = false;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static bool
|
||
|
hitLineFoldCnt(const char *const line, size_t *lineFoldCnt, cli_ctx *ctx, bool *heuristicFound)
|
||
|
{
|
||
|
|
||
|
if (line) {
|
||
|
if (isblank(line[0])) {
|
||
|
(*lineFoldCnt)++;
|
||
|
} else {
|
||
|
(*lineFoldCnt) = 0;
|
||
|
}
|
||
|
|
||
|
if ((*lineFoldCnt) >= HEURISTIC_EMAIL_MAX_LINE_FOLDS_PER_HEADER) {
|
||
|
if (SCAN_HEURISTIC_EXCEEDS_MAX) {
|
||
|
cli_append_virus(ctx, "Heuristics.Limits.Exceeded.EmailLineFoldCnt");
|
||
|
*heuristicFound = true;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static bool
|
||
|
haveTooManyHeaderBytes(size_t totalLen, cli_ctx *ctx, bool *heuristicFound)
|
||
|
{
|
||
|
|
||
|
if (totalLen > HEURISTIC_EMAIL_MAX_HEADER_BYTES) {
|
||
|
if (SCAN_HEURISTIC_EXCEEDS_MAX) {
|
||
|
cli_append_virus(ctx, "Heuristics.Limits.Exceeded.EmailHeaderBytes");
|
||
|
*heuristicFound = true;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static bool
|
||
|
haveTooManyEmailHeaders(size_t totalHeaderCnt, cli_ctx *ctx, bool *heuristicFound)
|
||
|
{
|
||
|
|
||
|
if (totalHeaderCnt > HEURISTIC_EMAIL_MAX_HEADERS) {
|
||
|
if (SCAN_HEURISTIC_EXCEEDS_MAX) {
|
||
|
cli_append_virus(ctx, "Heuristics.Limits.Exceeded.EmailHeaders");
|
||
|
*heuristicFound = true;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static bool
|
||
|
haveTooManyMIMEPartsPerMessage(size_t mimePartCnt, cli_ctx *ctx, mbox_status *rc)
|
||
|
{
|
||
|
|
||
|
if (mimePartCnt >= HEURISTIC_EMAIL_MAX_MIME_PARTS_PER_MESSAGE) {
|
||
|
if (SCAN_HEURISTIC_EXCEEDS_MAX) {
|
||
|
cli_append_virus(ctx, "Heuristics.Limits.Exceeded.EmailMIMEPartsPerMessage");
|
||
|
*rc = VIRUS;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static bool
|
||
|
haveTooManyMIMEArguments(size_t argCnt, cli_ctx *ctx, bool *heuristicFound)
|
||
|
{
|
||
|
|
||
|
if (argCnt >= HEURISTIC_EMAIL_MAX_ARGUMENTS_PER_HEADER) {
|
||
|
if (SCAN_HEURISTIC_EXCEEDS_MAX) {
|
||
|
cli_append_virus(ctx, "Heuristics.Limits.Exceeded.EmailMIMEArguments");
|
||
|
*heuristicFound = true;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Read in an email message from fin, parse it, and return the message
|
||
|
*
|
||
|
* FIXME: files full of new lines and nothing else are
|
||
|
* handled ungracefully...
|
||
|
*/
|
||
|
static message *
|
||
|
parseEmailFile(fmap_t *map, size_t *at, const table_t *rfc821, const char *firstLine, const char *dir, cli_ctx *ctx, bool *heuristicFound)
|
||
|
{
|
||
|
bool inHeader = true;
|
||
|
bool bodyIsEmpty = true;
|
||
|
bool lastWasBlank = false, lastBodyLineWasBlank = false;
|
||
|
message *ret;
|
||
|
bool anyHeadersFound = false;
|
||
|
int commandNumber = -1;
|
||
|
char *boundary = NULL;
|
||
|
char buffer[RFC2821LENGTH + 1];
|
||
|
bool lastWasOnlySemi = false;
|
||
|
int err = 1;
|
||
|
size_t totalHeaderBytes = 0;
|
||
|
size_t totalHeaderCnt = 0;
|
||
|
|
||
|
size_t lineFoldCnt = 0;
|
||
|
|
||
|
*heuristicFound = false;
|
||
|
|
||
|
ReadStruct *head = NULL;
|
||
|
ReadStruct *curr = NULL;
|
||
|
DO_CALLOC(head, 1);
|
||
|
curr = head;
|
||
|
|
||
|
cli_dbgmsg("parseEmailFile\n");
|
||
|
|
||
|
ret = messageCreate();
|
||
|
if (ret == NULL)
|
||
|
return NULL;
|
||
|
|
||
|
strncpy(buffer, firstLine, sizeof(buffer) - 1);
|
||
|
do {
|
||
|
const char *line;
|
||
|
|
||
|
(void)cli_chomp(buffer);
|
||
|
|
||
|
if (buffer[0] == '\0')
|
||
|
line = NULL;
|
||
|
else
|
||
|
line = buffer;
|
||
|
|
||
|
if (doContinueMultipleEmptyOptions(line, &lastWasOnlySemi)) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (hitLineFoldCnt(line, &lineFoldCnt, ctx, heuristicFound)) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Don't blank lines which are only spaces from headers,
|
||
|
* otherwise they'll be treated as the end of header marker
|
||
|
*/
|
||
|
if (lastWasBlank) {
|
||
|
lastWasBlank = false;
|
||
|
if (boundaryStart(buffer, boundary)) {
|
||
|
cli_dbgmsg("Found a header line with space that should be blank\n");
|
||
|
inHeader = false;
|
||
|
}
|
||
|
}
|
||
|
if (inHeader) {
|
||
|
cli_dbgmsg("parseEmailFile: check '%s'\n", buffer);
|
||
|
|
||
|
/*
|
||
|
* Ensure wide characters are handled where
|
||
|
* sizeof(char) > 1
|
||
|
*/
|
||
|
if (line && isspace(line[0] & 0xFF)) {
|
||
|
char copy[sizeof(buffer)];
|
||
|
|
||
|
strcpy(copy, buffer);
|
||
|
strstrip(copy);
|
||
|
if (copy[0] == '\0') {
|
||
|
/*
|
||
|
* The header line contains only white
|
||
|
* space. This is not the end of the
|
||
|
* headers according to RFC2822, but
|
||
|
* some MUAs will handle it as though
|
||
|
* it were, and virus writers exploit
|
||
|
* this bug. We can't just break from
|
||
|
* the loop here since that would allow
|
||
|
* other exploits such as inserting a
|
||
|
* white space line before the
|
||
|
* content-type line. So we just have
|
||
|
* to make a best guess. Sigh.
|
||
|
*/
|
||
|
if (head->bufferLen) {
|
||
|
char *header = getMallocedBufferFromList(head);
|
||
|
int needContinue = 0;
|
||
|
DO_VERIFY_POINTER(header);
|
||
|
|
||
|
totalHeaderCnt++;
|
||
|
if (haveTooManyEmailHeaders(totalHeaderCnt, ctx, heuristicFound)) {
|
||
|
DO_FREE(header);
|
||
|
break;
|
||
|
}
|
||
|
needContinue = (parseEmailHeader(ret, header, rfc821, ctx, heuristicFound) < 0);
|
||
|
if (*heuristicFound) {
|
||
|
DO_FREE(header);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
DO_FREE(header);
|
||
|
FREELIST_REALLOC(head, curr);
|
||
|
|
||
|
if (needContinue) {
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (boundary ||
|
||
|
((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) {
|
||
|
lastWasBlank = true;
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if ((line == NULL) && (0 == head->bufferLen)) { /* empty line */
|
||
|
/*
|
||
|
* A blank line signifies the end of
|
||
|
* the header and the start of the text
|
||
|
*/
|
||
|
if (!anyHeadersFound)
|
||
|
/* Ignore the junk at the top */
|
||
|
continue;
|
||
|
|
||
|
cli_dbgmsg("End of header information\n");
|
||
|
inHeader = false;
|
||
|
bodyIsEmpty = true;
|
||
|
} else {
|
||
|
char *ptr;
|
||
|
const char *lookahead;
|
||
|
bool lineAdded = true;
|
||
|
|
||
|
if (0 == head->bufferLen) {
|
||
|
char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
|
||
|
|
||
|
/*
|
||
|
* Continuation of line we're ignoring?
|
||
|
*/
|
||
|
if (isblank(line[0]))
|
||
|
continue;
|
||
|
|
||
|
/*
|
||
|
* Is this a header we're interested in?
|
||
|
*/
|
||
|
if ((strchr(line, ':') == NULL) ||
|
||
|
(cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
|
||
|
if (strncmp(line, "From ", 5) == 0)
|
||
|
anyHeadersFound = true;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
ptr = rfc822comments(cmd, out);
|
||
|
commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
|
||
|
|
||
|
switch (commandNumber) {
|
||
|
case CONTENT_TRANSFER_ENCODING:
|
||
|
case CONTENT_DISPOSITION:
|
||
|
case CONTENT_TYPE:
|
||
|
anyHeadersFound = true;
|
||
|
break;
|
||
|
default:
|
||
|
if (!anyHeadersFound)
|
||
|
anyHeadersFound = usefulHeader(commandNumber, cmd);
|
||
|
continue;
|
||
|
}
|
||
|
curr = appendReadStruct(curr, line);
|
||
|
if (NULL == curr) {
|
||
|
if (ret) {
|
||
|
ret->isTruncated = true;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
} else if (line != NULL) {
|
||
|
curr = appendReadStruct(curr, line);
|
||
|
} else {
|
||
|
lineAdded = false;
|
||
|
}
|
||
|
|
||
|
if (lineAdded) {
|
||
|
totalHeaderBytes += strlen(line);
|
||
|
if (haveTooManyHeaderBytes(totalHeaderBytes, ctx, heuristicFound)) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ((lookahead = fmap_need_off_once(map, *at, 1))) {
|
||
|
/*
|
||
|
* Section B.2 of RFC822 says TAB or
|
||
|
* SPACE means a continuation of the
|
||
|
* previous entry.
|
||
|
*
|
||
|
* Add all the arguments on the line
|
||
|
*/
|
||
|
if (isblank(*lookahead))
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Handle broken headers, where the next
|
||
|
* line isn't indented by whitespace
|
||
|
*/
|
||
|
{
|
||
|
char *header = getMallocedBufferFromList(head); /*This is the issue */
|
||
|
int needContinue = 0;
|
||
|
DO_VERIFY_POINTER(header);
|
||
|
|
||
|
needContinue = (header[strlen(header) - 1] == ';');
|
||
|
if (0 == needContinue) {
|
||
|
needContinue = (line && (count_quotes(header) & 1));
|
||
|
}
|
||
|
|
||
|
if (0 == needContinue) {
|
||
|
totalHeaderCnt++;
|
||
|
if (haveTooManyEmailHeaders(totalHeaderCnt, ctx, heuristicFound)) {
|
||
|
DO_FREE(header);
|
||
|
break;
|
||
|
}
|
||
|
needContinue = (parseEmailHeader(ret, header, rfc821, ctx, heuristicFound) < 0);
|
||
|
if (*heuristicFound) {
|
||
|
DO_FREE(header);
|
||
|
break;
|
||
|
}
|
||
|
/*Check total headers here;*/
|
||
|
}
|
||
|
|
||
|
DO_FREE(header);
|
||
|
if (needContinue) {
|
||
|
continue;
|
||
|
}
|
||
|
FREELIST_REALLOC(head, curr);
|
||
|
}
|
||
|
}
|
||
|
} else if (line && isuuencodebegin(line)) {
|
||
|
/*
|
||
|
* Fast track visa to uudecode.
|
||
|
* TODO: binhex, yenc
|
||
|
*/
|
||
|
bodyIsEmpty = false;
|
||
|
if (uudecodeFile(ret, line, dir, map, at) < 0)
|
||
|
if (messageAddStr(ret, line) < 0)
|
||
|
break;
|
||
|
} else {
|
||
|
if (line == NULL) {
|
||
|
/*
|
||
|
* Although this would save time and RAM, some
|
||
|
* phish signatures have been built which need
|
||
|
* the blank lines
|
||
|
*/
|
||
|
if (lastBodyLineWasBlank &&
|
||
|
(messageGetMimeType(ret) != TEXT)) {
|
||
|
cli_dbgmsg("Ignoring consecutive blank lines in the body\n");
|
||
|
continue;
|
||
|
}
|
||
|
lastBodyLineWasBlank = true;
|
||
|
} else {
|
||
|
if (bodyIsEmpty) {
|
||
|
/*
|
||
|
* Broken message: new line in the
|
||
|
* middle of the headers, so the first
|
||
|
* line of the body is in fact
|
||
|
* the last lines of the header
|
||
|
*/
|
||
|
if (newline_in_header(line))
|
||
|
continue;
|
||
|
bodyIsEmpty = false;
|
||
|
}
|
||
|
lastBodyLineWasBlank = false;
|
||
|
}
|
||
|
|
||
|
if (messageAddStr(ret, line) < 0)
|
||
|
break;
|
||
|
}
|
||
|
} while (getline_from_mbox(buffer, sizeof(buffer) - 1, map, at) != NULL);
|
||
|
|
||
|
err = 0;
|
||
|
done:
|
||
|
if (err) {
|
||
|
cli_errmsg("parseEmailFile: ERROR parsing file\n");
|
||
|
ret->isTruncated = true;
|
||
|
}
|
||
|
|
||
|
DO_FREE(boundary);
|
||
|
|
||
|
freeList(head);
|
||
|
|
||
|
if (!anyHeadersFound) {
|
||
|
/*
|
||
|
* False positive in believing we have an e-mail when we don't
|
||
|
*/
|
||
|
messageDestroy(ret);
|
||
|
cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (*heuristicFound) {
|
||
|
messageDestroy(ret);
|
||
|
cli_dbgmsg("parseEmailFile: found heuristic\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
cli_dbgmsg("parseEmailFile: return\n");
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* The given message contains a raw e-mail.
|
||
|
*
|
||
|
* Returns the message's body with the correct arguments set, empties the
|
||
|
* given message's contents (note that it isn't destroyed)
|
||
|
*
|
||
|
* TODO: remove the duplication with parseEmailFile
|
||
|
*/
|
||
|
static message *
|
||
|
parseEmailHeaders(message *m, const table_t *rfc821, bool *heuristicFound)
|
||
|
{
|
||
|
bool inHeader = true;
|
||
|
bool bodyIsEmpty = true;
|
||
|
text *t;
|
||
|
message *ret;
|
||
|
bool anyHeadersFound = false;
|
||
|
int commandNumber = -1;
|
||
|
char *fullline = NULL;
|
||
|
size_t fulllinelength = 0;
|
||
|
bool lastWasOnlySemi = false;
|
||
|
size_t lineFoldCnt = 0;
|
||
|
size_t totalHeaderCnt = 0;
|
||
|
|
||
|
cli_dbgmsg("parseEmailHeaders\n");
|
||
|
|
||
|
*heuristicFound = false;
|
||
|
|
||
|
if (m == NULL)
|
||
|
return NULL;
|
||
|
|
||
|
ret = messageCreate();
|
||
|
|
||
|
for (t = messageGetBody(m); t; t = t->t_next) {
|
||
|
const char *line;
|
||
|
|
||
|
if (t->t_line)
|
||
|
line = lineGetData(t->t_line);
|
||
|
else
|
||
|
line = NULL;
|
||
|
|
||
|
if (doContinueMultipleEmptyOptions(line, &lastWasOnlySemi)) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (hitLineFoldCnt(line, &lineFoldCnt, m->ctx, heuristicFound)) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (inHeader) {
|
||
|
cli_dbgmsg("parseEmailHeaders: check '%s'\n",
|
||
|
line ? line : "");
|
||
|
if (line == NULL) {
|
||
|
/*
|
||
|
* A blank line signifies the end of
|
||
|
* the header and the start of the text
|
||
|
*/
|
||
|
cli_dbgmsg("End of header information\n");
|
||
|
if (!anyHeadersFound) {
|
||
|
cli_dbgmsg("Nothing interesting in the header\n");
|
||
|
break;
|
||
|
}
|
||
|
inHeader = false;
|
||
|
bodyIsEmpty = true;
|
||
|
} else {
|
||
|
char *ptr;
|
||
|
bool lineAdded = true;
|
||
|
|
||
|
if (fullline == NULL) {
|
||
|
char cmd[RFC2821LENGTH + 1];
|
||
|
|
||
|
/*
|
||
|
* Continuation of line we're ignoring?
|
||
|
*/
|
||
|
if (isblank(line[0]))
|
||
|
continue;
|
||
|
|
||
|
/*
|
||
|
* Is this a header we're interested in?
|
||
|
*/
|
||
|
if ((strchr(line, ':') == NULL) ||
|
||
|
(cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
|
||
|
if (strncmp(line, "From ", 5) == 0)
|
||
|
anyHeadersFound = true;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
ptr = rfc822comments(cmd, NULL);
|
||
|
commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
|
||
|
if (ptr)
|
||
|
free(ptr);
|
||
|
|
||
|
switch (commandNumber) {
|
||
|
case CONTENT_TRANSFER_ENCODING:
|
||
|
case CONTENT_DISPOSITION:
|
||
|
case CONTENT_TYPE:
|
||
|
anyHeadersFound = true;
|
||
|
break;
|
||
|
default:
|
||
|
if (!anyHeadersFound)
|
||
|
anyHeadersFound = usefulHeader(commandNumber, cmd);
|
||
|
continue;
|
||
|
}
|
||
|
fullline = cli_strdup(line);
|
||
|
fulllinelength = strlen(line) + 1;
|
||
|
} else if (line) {
|
||
|
fulllinelength += strlen(line) + 1;
|
||
|
ptr = cli_realloc(fullline, fulllinelength);
|
||
|
if (ptr == NULL)
|
||
|
continue;
|
||
|
fullline = ptr;
|
||
|
cli_strlcat(fullline, line, fulllinelength);
|
||
|
} else {
|
||
|
lineAdded = false;
|
||
|
}
|
||
|
|
||
|
/*continue doesn't seem right here, but that is what is done everywhere else when a malloc fails.*/
|
||
|
if (NULL == fullline) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (lineAdded) {
|
||
|
if (haveTooManyHeaderBytes(fulllinelength, m->ctx, heuristicFound)) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (next_is_folded_header(t))
|
||
|
/* Add arguments to this line */
|
||
|
continue;
|
||
|
|
||
|
lineUnlink(t->t_line);
|
||
|
t->t_line = NULL;
|
||
|
|
||
|
if (count_quotes(fullline) & 1)
|
||
|
continue;
|
||
|
|
||
|
ptr = rfc822comments(fullline, NULL);
|
||
|
if (ptr) {
|
||
|
free(fullline);
|
||
|
fullline = ptr;
|
||
|
}
|
||
|
|
||
|
totalHeaderCnt++;
|
||
|
if (haveTooManyEmailHeaders(totalHeaderCnt, m->ctx, heuristicFound)) {
|
||
|
break;
|
||
|
}
|
||
|
if (parseEmailHeader(ret, fullline, rfc821, m->ctx, heuristicFound) < 0) {
|
||
|
continue;
|
||
|
}
|
||
|
if (*heuristicFound) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
free(fullline);
|
||
|
fullline = NULL;
|
||
|
}
|
||
|
} else {
|
||
|
if (bodyIsEmpty) {
|
||
|
if (line == NULL)
|
||
|
/* throw away leading blank lines */
|
||
|
continue;
|
||
|
/*
|
||
|
* Broken message: new line in the
|
||
|
* middle of the headers, so the first
|
||
|
* line of the body is in fact
|
||
|
* the last lines of the header
|
||
|
*/
|
||
|
if (newline_in_header(line))
|
||
|
continue;
|
||
|
bodyIsEmpty = false;
|
||
|
}
|
||
|
/*if(t->t_line && isuuencodebegin(t->t_line))
|
||
|
puts("FIXME: add fast visa here");*/
|
||
|
cli_dbgmsg("parseEmailHeaders: finished with headers, moving body\n");
|
||
|
messageMoveText(ret, t, m);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (fullline) {
|
||
|
if (*fullline) switch (commandNumber) {
|
||
|
case CONTENT_TRANSFER_ENCODING:
|
||
|
case CONTENT_DISPOSITION:
|
||
|
case CONTENT_TYPE:
|
||
|
cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
|
||
|
}
|
||
|
free(fullline);
|
||
|
}
|
||
|
|
||
|
if (!anyHeadersFound) {
|
||
|
/*
|
||
|
* False positive in believing we have an e-mail when we don't
|
||
|
*/
|
||
|
messageDestroy(ret);
|
||
|
cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
if (*heuristicFound) {
|
||
|
messageDestroy(ret);
|
||
|
cli_dbgmsg("parseEmailHeaders: found a heuristic, delete message and stop parsing.\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
cli_dbgmsg("parseEmailHeaders: return\n");
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Handle a header line of an email message
|
||
|
*/
|
||
|
static int
|
||
|
parseEmailHeader(message *m, const char *line, const table_t *rfc821, cli_ctx *ctx, bool *heuristicFound)
|
||
|
{
|
||
|
int ret = -1;
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
char *strptr;
|
||
|
#endif
|
||
|
const char *separator;
|
||
|
char *cmd, *copy, tokenseparator[2];
|
||
|
|
||
|
cli_dbgmsg("parseEmailHeader '%s'\n", line);
|
||
|
|
||
|
/*
|
||
|
* In RFC822 the separator between the key a value is a colon,
|
||
|
* e.g. Content-Transfer-Encoding: base64
|
||
|
* However some MUA's are lapse about this and virus writers exploit
|
||
|
* this hole, so we need to check all known possibilities
|
||
|
*/
|
||
|
for (separator = ":= "; *separator; separator++)
|
||
|
if (strchr(line, *separator) != NULL)
|
||
|
break;
|
||
|
|
||
|
if (*separator == '\0')
|
||
|
return -1;
|
||
|
|
||
|
copy = rfc2047(line);
|
||
|
if (copy == NULL) {
|
||
|
/* an RFC checker would return -1 here */
|
||
|
copy = cli_strdup(line);
|
||
|
if (NULL == copy) {
|
||
|
goto done;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
tokenseparator[0] = *separator;
|
||
|
tokenseparator[1] = '\0';
|
||
|
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
cmd = strtok_r(copy, tokenseparator, &strptr);
|
||
|
#else
|
||
|
cmd = strtok(copy, tokenseparator);
|
||
|
#endif
|
||
|
|
||
|
if (cmd && (strstrip(cmd) > 0)) {
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
char *arg = strtok_r(NULL, "", &strptr);
|
||
|
#else
|
||
|
char *arg = strtok(NULL, "");
|
||
|
#endif
|
||
|
|
||
|
if (arg) {
|
||
|
/*
|
||
|
* Found a header such as
|
||
|
* Content-Type: multipart/mixed;
|
||
|
* set arg to be
|
||
|
* "multipart/mixed" and cmd to
|
||
|
* be "Content-Type"
|
||
|
*/
|
||
|
ret = parseMimeHeader(m, cmd, rfc821, arg, ctx, heuristicFound);
|
||
|
}
|
||
|
}
|
||
|
done:
|
||
|
DO_FREE(copy);
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
#if HAVE_LIBXML2
|
||
|
static const struct key_entry mhtml_keys[] = {
|
||
|
/* root html tags for microsoft office document */
|
||
|
{"html", "RootHTML", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB},
|
||
|
|
||
|
{"head", "Head", MSXML_JSON_WRKPTR | MSXML_COMMENT_CB},
|
||
|
{"meta", "Meta", MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_ATTRIB},
|
||
|
{"link", "Link", MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_ATTRIB},
|
||
|
{"script", "Script", MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_VALUE}};
|
||
|
static size_t num_mhtml_keys = sizeof(mhtml_keys) / sizeof(struct key_entry);
|
||
|
|
||
|
static const struct key_entry mhtml_comment_keys[] = {
|
||
|
/* embedded xml tags (comment) for microsoft office document */
|
||
|
{"o:documentproperties", "DocumentProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB},
|
||
|
{"o:author", "Author", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:lastauthor", "LastAuthor", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:revision", "Revision", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:totaltime", "TotalTime", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:created", "Created", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:lastsaved", "LastSaved", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:pages", "Pages", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:words", "Words", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:characters", "Characters", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:company", "Company", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:lines", "Lines", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:paragraphs", "Paragraphs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
{"o:version", "Version", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE},
|
||
|
|
||
|
{"o:officedocumentsettings", "DocumentSettings", MSXML_IGNORE_ELEM},
|
||
|
{"w:worddocument", "WordDocument", MSXML_IGNORE_ELEM},
|
||
|
{"w:latentstyles", "LatentStyles", MSXML_IGNORE_ELEM}};
|
||
|
static size_t num_mhtml_comment_keys = sizeof(mhtml_comment_keys) / sizeof(struct key_entry);
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* The related multipart root HTML file comment parsing wrapper.
|
||
|
*
|
||
|
* Attempts to leverage msxml parser, cannot operate without LIBXML2.
|
||
|
* This function is only used for Preclassification JSON.
|
||
|
*/
|
||
|
static cl_error_t parseMHTMLComment(const char *comment, cli_ctx *ctx, void *wrkjobj, void *cbdata)
|
||
|
{
|
||
|
cl_error_t ret = CL_SUCCESS;
|
||
|
|
||
|
#if HAVE_LIBXML2
|
||
|
const char *xmlsrt, *xmlend;
|
||
|
xmlTextReaderPtr reader;
|
||
|
|
||
|
UNUSEDPARAM(cbdata);
|
||
|
UNUSEDPARAM(wrkjobj);
|
||
|
|
||
|
xmlend = comment;
|
||
|
while ((xmlsrt = strstr(xmlend, "<xml>"))) {
|
||
|
xmlend = strstr(xmlsrt, "</xml>");
|
||
|
if (xmlend == NULL) {
|
||
|
cli_dbgmsg("parseMHTMLComment: unbounded xml tag\n");
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
reader = xmlReaderForMemory(xmlsrt, xmlend - xmlsrt + 6, "comment.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
|
||
|
if (!reader) {
|
||
|
cli_dbgmsg("parseMHTMLComment: cannot initialize xmlReader\n");
|
||
|
|
||
|
#if HAVE_JSON
|
||
|
if (ctx->wrkproperty != NULL)
|
||
|
ret = cli_json_parse_error(ctx->wrkproperty, "MHTML_ERROR_XML_READER_MEM");
|
||
|
#endif
|
||
|
return ret; // libxml2 failed!
|
||
|
}
|
||
|
|
||
|
/* comment callback is not set to prevent recursion */
|
||
|
/* TODO: should we separate the key dictionaries? */
|
||
|
/* TODO: should we use the json object pointer? */
|
||
|
ret = cli_msxml_parse_document(ctx, reader, mhtml_comment_keys, num_mhtml_comment_keys, MSXML_FLAG_JSON, NULL);
|
||
|
|
||
|
xmlTextReaderClose(reader);
|
||
|
xmlFreeTextReader(reader);
|
||
|
if (ret != CL_SUCCESS)
|
||
|
return ret;
|
||
|
}
|
||
|
#else
|
||
|
UNUSEDPARAM(comment);
|
||
|
UNUSEDPARAM(ctx);
|
||
|
UNUSEDPARAM(wrkjobj);
|
||
|
UNUSEDPARAM(cbdata);
|
||
|
|
||
|
cli_dbgmsg("in parseMHTMLComment\n");
|
||
|
cli_dbgmsg("parseMHTMLComment: parsing html xml-comments requires libxml2!\n");
|
||
|
#endif
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* The related multipart root HTML file parsing wrapper.
|
||
|
*
|
||
|
* Attempts to leverage msxml parser, cannot operate without LIBXML2.
|
||
|
* This function is only used for Preclassification JSON.
|
||
|
*/
|
||
|
static mbox_status
|
||
|
parseRootMHTML(mbox_ctx *mctx, message *m, text *t)
|
||
|
{
|
||
|
cli_ctx *ctx = mctx->ctx;
|
||
|
#if HAVE_LIBXML2
|
||
|
#ifdef LIBXML_HTML_ENABLED
|
||
|
struct msxml_ctx mxctx;
|
||
|
blob *input = NULL;
|
||
|
htmlDocPtr htmlDoc;
|
||
|
xmlTextReaderPtr reader;
|
||
|
int ret = CL_SUCCESS;
|
||
|
mbox_status rc = OK;
|
||
|
#if HAVE_JSON
|
||
|
json_object *rhtml;
|
||
|
#endif
|
||
|
|
||
|
cli_dbgmsg("in parseRootMHTML\n");
|
||
|
|
||
|
if (ctx == NULL)
|
||
|
return OK;
|
||
|
|
||
|
if (m == NULL && t == NULL)
|
||
|
return OK;
|
||
|
|
||
|
if (m != NULL)
|
||
|
input = messageToBlob(m, 0);
|
||
|
else /* t != NULL */
|
||
|
input = textToBlob(t, NULL, 0);
|
||
|
|
||
|
if (input == NULL)
|
||
|
return OK;
|
||
|
|
||
|
htmlDoc = htmlReadMemory((char *)input->data, input->len, "mhtml.html", NULL, CLAMAV_MIN_XMLREADER_FLAGS | HTML_PARSE_NOWARNING);
|
||
|
if (htmlDoc == NULL) {
|
||
|
cli_dbgmsg("parseRootMHTML: cannot initialize read html document\n");
|
||
|
#if HAVE_JSON
|
||
|
if (ctx->wrkproperty != NULL)
|
||
|
ret = cli_json_parse_error(ctx->wrkproperty, "MHTML_ERROR_HTML_READ");
|
||
|
if (ret != CL_SUCCESS)
|
||
|
rc = FAIL;
|
||
|
#endif
|
||
|
blobDestroy(input);
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
#if HAVE_JSON
|
||
|
if (mctx->wrkobj) {
|
||
|
rhtml = cli_jsonobj(mctx->wrkobj, "RootHTML");
|
||
|
if (rhtml != NULL) {
|
||
|
/* MHTML-specific properties */
|
||
|
cli_jsonstr(rhtml, "Encoding", (const char *)htmlGetMetaEncoding(htmlDoc));
|
||
|
cli_jsonint(rhtml, "CompressMode", xmlGetDocCompressMode(htmlDoc));
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
reader = xmlReaderWalker(htmlDoc);
|
||
|
if (reader == NULL) {
|
||
|
cli_dbgmsg("parseRootMHTML: cannot initialize xmlTextReader\n");
|
||
|
#if HAVE_JSON
|
||
|
if (ctx->wrkproperty != NULL)
|
||
|
ret = cli_json_parse_error(ctx->wrkproperty, "MHTML_ERROR_XML_READER_IO");
|
||
|
if (ret != CL_SUCCESS)
|
||
|
rc = FAIL;
|
||
|
#endif
|
||
|
blobDestroy(input);
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
memset(&mxctx, 0, sizeof(mxctx));
|
||
|
/* no scanning callback set */
|
||
|
mxctx.comment_cb = parseMHTMLComment;
|
||
|
ret = cli_msxml_parse_document(ctx, reader, mhtml_keys, num_mhtml_keys, MSXML_FLAG_JSON | MSXML_FLAG_WALK, &mxctx);
|
||
|
switch (ret) {
|
||
|
case CL_SUCCESS:
|
||
|
case CL_ETIMEOUT:
|
||
|
case CL_BREAK:
|
||
|
rc = OK;
|
||
|
break;
|
||
|
|
||
|
case CL_EMAXREC:
|
||
|
rc = MAXREC;
|
||
|
break;
|
||
|
|
||
|
case CL_EMAXFILES:
|
||
|
rc = MAXFILES;
|
||
|
break;
|
||
|
|
||
|
case CL_VIRUS:
|
||
|
rc = VIRUS;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
rc = FAIL;
|
||
|
}
|
||
|
|
||
|
xmlTextReaderClose(reader);
|
||
|
xmlFreeTextReader(reader);
|
||
|
xmlFreeDoc(htmlDoc);
|
||
|
blobDestroy(input);
|
||
|
return rc;
|
||
|
#else /* LIBXML_HTML_ENABLED */
|
||
|
UNUSEDPARAM(m);
|
||
|
UNUSEDPARAM(t);
|
||
|
cli_dbgmsg("in parseRootMHTML\n");
|
||
|
cli_dbgmsg("parseRootMHTML: parsing html documents disabled in libxml2!\n");
|
||
|
#endif /* LIBXML_HTML_ENABLED */
|
||
|
#else /* HAVE_LIBXML2 */
|
||
|
UNUSEDPARAM(m);
|
||
|
UNUSEDPARAM(t);
|
||
|
cli_dbgmsg("in parseRootMHTML\n");
|
||
|
cli_dbgmsg("parseRootMHTML: parsing html documents requires libxml2!\n");
|
||
|
|
||
|
return OK;
|
||
|
#endif /* HAVE_LIBXML2 */
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* This is a recursive routine.
|
||
|
*
|
||
|
* This function parses the body of mainMessage and saves its attachments in dir
|
||
|
*
|
||
|
* mainMessage is the buffer to be parsed, it contains an e-mail's body, without
|
||
|
* any headers. First time of calling it'll be
|
||
|
* the whole message. Later it'll be parts of a multipart message
|
||
|
* textIn is the plain text message being built up so far
|
||
|
*/
|
||
|
static mbox_status
|
||
|
parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level)
|
||
|
{
|
||
|
mbox_status rc;
|
||
|
text *aText = textIn;
|
||
|
message *mainMessage = messageIn;
|
||
|
fileblob *fb;
|
||
|
bool infected = false;
|
||
|
const struct cl_engine *engine = mctx->ctx->engine;
|
||
|
const int doPhishingScan = engine->dboptions & CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
|
||
|
#if HAVE_JSON
|
||
|
json_object *saveobj = mctx->wrkobj;
|
||
|
#endif
|
||
|
bool heuristicFound = false;
|
||
|
|
||
|
cli_dbgmsg("in parseEmailBody, %u files saved so far\n",
|
||
|
mctx->files);
|
||
|
|
||
|
/* FIXMELIMITS: this should be better integrated */
|
||
|
if (engine->max_recursion_level)
|
||
|
/*
|
||
|
* This is approximate
|
||
|
*/
|
||
|
if (recursion_level > engine->max_recursion_level) {
|
||
|
// Note: engine->max_recursion_level is re-purposed here out of convenience.
|
||
|
// ole2 recursion does not leverage the ctx->recursion_stack stack.
|
||
|
cli_dbgmsg("parseEmailBody: hit maximum recursion level (%u)\n", recursion_level);
|
||
|
return MAXREC;
|
||
|
}
|
||
|
if (engine->maxfiles && (mctx->files >= engine->maxfiles)) {
|
||
|
/*
|
||
|
* FIXME: This is only approx - it may have already
|
||
|
* been exceeded
|
||
|
*/
|
||
|
cli_dbgmsg("parseEmailBody: number of files exceeded %u\n", engine->maxfiles);
|
||
|
return MAXFILES;
|
||
|
}
|
||
|
|
||
|
rc = OK;
|
||
|
|
||
|
/* Anything left to be parsed? */
|
||
|
if (mainMessage && (messageGetBody(mainMessage) != NULL)) {
|
||
|
mime_type mimeType;
|
||
|
int subtype, inhead, htmltextPart, inMimeHead, i;
|
||
|
const char *mimeSubtype;
|
||
|
char *boundary;
|
||
|
const text *t_line;
|
||
|
/*bool isAlternative;*/
|
||
|
message *aMessage;
|
||
|
int multiparts = 0;
|
||
|
message **messages = NULL; /* parts of a multipart message */
|
||
|
|
||
|
cli_dbgmsg("Parsing mail file\n");
|
||
|
|
||
|
mimeType = messageGetMimeType(mainMessage);
|
||
|
mimeSubtype = messageGetMimeSubtype(mainMessage);
|
||
|
#if HAVE_JSON
|
||
|
if (mctx->wrkobj != NULL) {
|
||
|
mctx->wrkobj = cli_jsonobj(mctx->wrkobj, "Body");
|
||
|
cli_jsonstr(mctx->wrkobj, "MimeType", getMimeTypeStr(mimeType));
|
||
|
cli_jsonstr(mctx->wrkobj, "MimeSubtype", mimeSubtype);
|
||
|
cli_jsonstr(mctx->wrkobj, "EncodingType", getEncTypeStr(messageGetEncoding(mainMessage)));
|
||
|
cli_jsonstr(mctx->wrkobj, "Disposition", messageGetDispositionType(mainMessage));
|
||
|
if (messageHasFilename(mainMessage)) {
|
||
|
char *filename = messageGetFilename(mainMessage);
|
||
|
cli_jsonstr(mctx->wrkobj, "Filename", filename);
|
||
|
free(filename);
|
||
|
} else {
|
||
|
cli_jsonstr(mctx->wrkobj, "Filename", "(inline)");
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/* pre-process */
|
||
|
subtype = tableFind(mctx->subtypeTable, mimeSubtype);
|
||
|
if ((mimeType == TEXT) && (subtype == PLAIN)) {
|
||
|
/*
|
||
|
* This is effectively no encoding, notice that we
|
||
|
* don't check that charset is us-ascii
|
||
|
*/
|
||
|
cli_dbgmsg("text/plain: Assume no attachments\n");
|
||
|
mimeType = NOMIME;
|
||
|
messageSetMimeSubtype(mainMessage, "");
|
||
|
} else if ((mimeType == MESSAGE) &&
|
||
|
(strcasecmp(mimeSubtype, "rfc822-headers") == 0)) {
|
||
|
/*
|
||
|
* RFC1892/RFC3462: section 2 text/rfc822-headers
|
||
|
* incorrectly sent as message/rfc822-headers
|
||
|
*
|
||
|
* Parse as text/plain, i.e. no mime
|
||
|
*/
|
||
|
cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n");
|
||
|
mimeType = NOMIME;
|
||
|
messageSetMimeSubtype(mainMessage, "");
|
||
|
} else
|
||
|
cli_dbgmsg("mimeType = %d\n", (int)mimeType);
|
||
|
|
||
|
switch (mimeType) {
|
||
|
case NOMIME:
|
||
|
cli_dbgmsg("Not a mime encoded message\n");
|
||
|
aText = textAddMessage(aText, mainMessage);
|
||
|
|
||
|
if (!doPhishingScan) {
|
||
|
break;
|
||
|
}
|
||
|
/*
|
||
|
* Fall through: some phishing mails claim they are
|
||
|
* text/plain, when they are in fact html
|
||
|
*/
|
||
|
/* fall through */
|
||
|
case TEXT:
|
||
|
/* text/plain has been preprocessed as no encoding */
|
||
|
if (doPhishingScan) {
|
||
|
/*
|
||
|
* It would be better to save and scan the
|
||
|
* file and only checkURLs if it's found to be
|
||
|
* clean
|
||
|
*/
|
||
|
checkURLs(mainMessage, mctx, &rc, (subtype == HTML));
|
||
|
/*
|
||
|
* There might be html sent without subtype
|
||
|
* html too, so scan them for phishing
|
||
|
*/
|
||
|
if (rc == VIRUS)
|
||
|
infected = true;
|
||
|
}
|
||
|
break;
|
||
|
case MULTIPART:
|
||
|
cli_dbgmsg("Content-type 'multipart' handler\n");
|
||
|
boundary = messageFindArgument(mainMessage, "boundary");
|
||
|
|
||
|
#if HAVE_JSON
|
||
|
if (mctx->wrkobj != NULL)
|
||
|
cli_jsonstr(mctx->wrkobj, "Boundary", boundary);
|
||
|
#endif
|
||
|
|
||
|
if (boundary == NULL) {
|
||
|
cli_dbgmsg("Multipart/%s MIME message contains no boundary header\n",
|
||
|
mimeSubtype);
|
||
|
/* Broken e-mail message */
|
||
|
mimeType = NOMIME;
|
||
|
/*
|
||
|
* The break means that we will still
|
||
|
* check if the file contains a uuencoded file
|
||
|
*/
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
cli_chomp(boundary);
|
||
|
|
||
|
/* Perhaps it should assume mixed? */
|
||
|
if (mimeSubtype[0] == '\0') {
|
||
|
cli_dbgmsg("Multipart has no subtype assuming alternative\n");
|
||
|
mimeSubtype = "alternative";
|
||
|
messageSetMimeSubtype(mainMessage, "alternative");
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Get to the start of the first message
|
||
|
*/
|
||
|
t_line = messageGetBody(mainMessage);
|
||
|
|
||
|
if (t_line == NULL) {
|
||
|
cli_dbgmsg("Multipart MIME message has no body\n");
|
||
|
free((char *)boundary);
|
||
|
mimeType = NOMIME;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
do
|
||
|
if (t_line->t_line) {
|
||
|
if (boundaryStart(lineGetData(t_line->t_line), boundary))
|
||
|
break;
|
||
|
/*
|
||
|
* Found a binhex file before
|
||
|
* the first multipart
|
||
|
* TODO: check yEnc
|
||
|
*/
|
||
|
if (binhexBegin(mainMessage) == t_line) {
|
||
|
if (exportBinhexMessage(mctx, mainMessage)) {
|
||
|
/* virus found */
|
||
|
rc = VIRUS;
|
||
|
infected = true;
|
||
|
break;
|
||
|
}
|
||
|
} else if (t_line->t_next &&
|
||
|
(encodingLine(mainMessage) == t_line->t_next)) {
|
||
|
/*
|
||
|
* We look for the next line
|
||
|
* since later on we'll skip
|
||
|
* over the important line when
|
||
|
* we think it's a blank line
|
||
|
* at the top of the message -
|
||
|
* which it would have been in
|
||
|
* an RFC compliant world
|
||
|
*/
|
||
|
cli_dbgmsg("Found MIME attachment before the first MIME section \"%s\"\n",
|
||
|
lineGetData(t_line->t_next->t_line));
|
||
|
if (messageGetEncoding(mainMessage) == NOENCODING)
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
while ((t_line = t_line->t_next) != NULL);
|
||
|
|
||
|
if (t_line == NULL) {
|
||
|
cli_dbgmsg("Multipart MIME message contains no boundary lines (%s)\n",
|
||
|
boundary);
|
||
|
free((char *)boundary);
|
||
|
mimeType = NOMIME;
|
||
|
/*
|
||
|
* The break means that we will still
|
||
|
* check if the file contains a yEnc/binhex file
|
||
|
*/
|
||
|
break;
|
||
|
}
|
||
|
/*
|
||
|
* Build up a table of all of the parts of this
|
||
|
* multipart message. Remember, each part may itself
|
||
|
* be a multipart message.
|
||
|
*/
|
||
|
inhead = 1;
|
||
|
inMimeHead = 0;
|
||
|
|
||
|
/*
|
||
|
* Re-read this variable in case mimeSubtype has changed
|
||
|
*/
|
||
|
subtype = tableFind(mctx->subtypeTable, mimeSubtype);
|
||
|
|
||
|
/*
|
||
|
* Parse the mainMessage object and create an array
|
||
|
* of objects called messages, one for each of the
|
||
|
* multiparts that mainMessage contains.
|
||
|
*
|
||
|
* This looks like parseEmailHeaders() - maybe there's
|
||
|
* some duplication of code to be cleaned up
|
||
|
*
|
||
|
* We may need to create an array rather than just
|
||
|
* save each part as it is found because not all
|
||
|
* elements will need scanning, and we don't yet know
|
||
|
* which of those elements it will be, except in
|
||
|
* the case of mixed, when all parts need to be scanned.
|
||
|
*/
|
||
|
for (multiparts = 0; t_line && !infected; multiparts++) {
|
||
|
int lines = 0;
|
||
|
message **m;
|
||
|
mbox_status old_rc;
|
||
|
|
||
|
m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
|
||
|
if (m == NULL)
|
||
|
break;
|
||
|
messages = m;
|
||
|
|
||
|
aMessage = messages[multiparts] = messageCreate();
|
||
|
if (aMessage == NULL) {
|
||
|
multiparts--;
|
||
|
/* if allocation failed the first time,
|
||
|
* there's no point in retrying, just
|
||
|
* break out */
|
||
|
break;
|
||
|
}
|
||
|
messageSetCTX(aMessage, mctx->ctx);
|
||
|
|
||
|
cli_dbgmsg("Now read in part %d\n", multiparts);
|
||
|
|
||
|
/*
|
||
|
* Ignore blank lines. There shouldn't be ANY
|
||
|
* but some viruses insert them
|
||
|
*/
|
||
|
while ((t_line = t_line->t_next) != NULL)
|
||
|
if (t_line->t_line &&
|
||
|
/*(cli_chomp(t_line->t_text) > 0))*/
|
||
|
(strlen(lineGetData(t_line->t_line)) > 0))
|
||
|
break;
|
||
|
|
||
|
if (t_line == NULL) {
|
||
|
cli_dbgmsg("Empty part\n");
|
||
|
/*
|
||
|
* Remove this part unless there's
|
||
|
* a binhex portion somewhere in
|
||
|
* the complete message that we may
|
||
|
* throw away by mistake if the MIME
|
||
|
* encoding information is incorrect
|
||
|
*/
|
||
|
if (mainMessage &&
|
||
|
(binhexBegin(mainMessage) == NULL)) {
|
||
|
messageDestroy(aMessage);
|
||
|
--multiparts;
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
do {
|
||
|
const char *line = lineGetData(t_line->t_line);
|
||
|
|
||
|
/*
|
||
|
cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
|
||
|
multiparts, inMimeHead, inhead, boundary, line,
|
||
|
t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");
|
||
|
*/
|
||
|
|
||
|
if (inMimeHead) { /* continuation line */
|
||
|
if (line == NULL) {
|
||
|
/*inhead =*/inMimeHead = 0;
|
||
|
continue;
|
||
|
}
|
||
|
/*
|
||
|
* Handle continuation lines
|
||
|
* because the previous line
|
||
|
* ended with a ; or this line
|
||
|
* starts with a white space
|
||
|
*/
|
||
|
cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
|
||
|
multiparts, line);
|
||
|
/*
|
||
|
* Handle the case when it
|
||
|
* isn't really a continuation
|
||
|
* line:
|
||
|
* Content-Type: application/octet-stream;
|
||
|
* Content-Transfer-Encoding: base64
|
||
|
*/
|
||
|
parseEmailHeader(aMessage, line, mctx->rfc821Table, mctx->ctx, &heuristicFound);
|
||
|
if (heuristicFound) {
|
||
|
rc = VIRUS;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
while (isspace((int)*line))
|
||
|
line++;
|
||
|
|
||
|
if (*line == '\0') {
|
||
|
inhead = inMimeHead = 0;
|
||
|
continue;
|
||
|
}
|
||
|
inMimeHead = false;
|
||
|
messageAddArgument(aMessage, line);
|
||
|
} else if (inhead) { /* handling normal headers */
|
||
|
/*int quotes;*/
|
||
|
char *fullline, *ptr;
|
||
|
|
||
|
if (line == NULL) {
|
||
|
/*
|
||
|
* empty line, should the end of the headers,
|
||
|
* but some base64 decoders, e.g. uudeview, are broken
|
||
|
* and will handle this type of entry, decoding the
|
||
|
* base64 content...
|
||
|
* Content-Type: application/octet-stream; name=text.zip
|
||
|
* Content-Transfer-Encoding: base64
|
||
|
* Content-Disposition: attachment; filename="text.zip"
|
||
|
*
|
||
|
* Content-Disposition: attachment;
|
||
|
* filename=text.zip
|
||
|
* Content-Type: application/octet-stream;
|
||
|
* name=text.zip
|
||
|
* Content-Transfer-Encoding: base64
|
||
|
*
|
||
|
* UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
|
||
|
*/
|
||
|
const text *next = t_line->t_next;
|
||
|
|
||
|
if (next && next->t_line) {
|
||
|
const char *data = lineGetData(next->t_line);
|
||
|
|
||
|
if ((messageGetEncoding(aMessage) == NOENCODING) &&
|
||
|
(messageGetMimeType(aMessage) == APPLICATION) &&
|
||
|
data && strstr(data, "base64")) {
|
||
|
/*
|
||
|
* Handle this nightmare (note the blank
|
||
|
* line in the header and the incorrect
|
||
|
* content-transfer-encoding header)
|
||
|
*
|
||
|
* Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
|
||
|
*
|
||
|
* r-Encoding: base64
|
||
|
* Content-Disposition: attachment; filename="zipped_files.EXE"
|
||
|
*/
|
||
|
messageSetEncoding(aMessage, "base64");
|
||
|
cli_dbgmsg("Ignoring fake end of headers\n");
|
||
|
continue;
|
||
|
}
|
||
|
if ((strncmp(data, "Content", 7) == 0) ||
|
||
|
(strncmp(data, "filename=", 9) == 0)) {
|
||
|
cli_dbgmsg("Ignoring fake end of headers\n");
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
cli_dbgmsg("Multipart %d: End of header information\n",
|
||
|
multiparts);
|
||
|
inhead = 0;
|
||
|
continue;
|
||
|
}
|
||
|
if (isspace((int)*line)) {
|
||
|
/*
|
||
|
* The first line is
|
||
|
* continuation line.
|
||
|
* This is tricky
|
||
|
* to handle, but
|
||
|
* all we can do is our
|
||
|
* best
|
||
|
*/
|
||
|
cli_dbgmsg("Part %d starts with a continuation line\n",
|
||
|
multiparts);
|
||
|
messageAddArgument(aMessage, line);
|
||
|
/*
|
||
|
* Give it a default
|
||
|
* MIME type since
|
||
|
* that may be the
|
||
|
* missing line
|
||
|
*
|
||
|
* Choose application to
|
||
|
* force a save
|
||
|
*/
|
||
|
if (messageGetMimeType(aMessage) == NOMIME)
|
||
|
messageSetMimeType(aMessage, "application");
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
inMimeHead = false;
|
||
|
|
||
|
if (strlen(line) > RFC2821LENGTH) {
|
||
|
cli_dbgmsg("parseEmailBody: line length exceds RFC2821 maximum length (1000)\n");
|
||
|
// We must skip this line because functions like rfc822comments() may accept output buffers
|
||
|
// that [RFC2821LENGTH + 1] in and don't have any length checks to prevent exceeding that max.
|
||
|
// E.g. See `boundaryStart()`.
|
||
|
// TODO: A larger audit would be needed to remove this limitation, though frankly I recommend
|
||
|
// fully re-writing the email parser (in Rust).
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
fullline = rfc822comments(line, NULL);
|
||
|
if (fullline == NULL)
|
||
|
fullline = cli_strdup(line);
|
||
|
|
||
|
/*quotes = count_quotes(fullline);*/
|
||
|
|
||
|
/*
|
||
|
* Fold next lines to the end of this
|
||
|
* if they start with a white space
|
||
|
* or if this line has an odd number of quotes:
|
||
|
* Content-Type: application/octet-stream; name="foo
|
||
|
* "
|
||
|
*/
|
||
|
while (t_line && next_is_folded_header(t_line)) {
|
||
|
const char *data;
|
||
|
size_t datasz;
|
||
|
|
||
|
t_line = t_line->t_next;
|
||
|
|
||
|
data = lineGetData(t_line->t_line);
|
||
|
|
||
|
if (data[1] == '\0') {
|
||
|
/*
|
||
|
* Broken message: the
|
||
|
* blank line at the end
|
||
|
* of the headers isn't blank -
|
||
|
* it contains a space
|
||
|
*/
|
||
|
cli_dbgmsg("Multipart %d: headers not terminated by blank line\n",
|
||
|
multiparts);
|
||
|
inhead = false;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
datasz = strlen(fullline) + strlen(data) + 1;
|
||
|
ptr = cli_realloc(fullline, datasz);
|
||
|
|
||
|
if (ptr == NULL)
|
||
|
break;
|
||
|
|
||
|
fullline = ptr;
|
||
|
cli_strlcat(fullline, data, datasz);
|
||
|
|
||
|
/*quotes = count_quotes(data);*/
|
||
|
}
|
||
|
|
||
|
cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
|
||
|
multiparts, fullline);
|
||
|
|
||
|
parseEmailHeader(aMessage, fullline, mctx->rfc821Table, mctx->ctx, &heuristicFound);
|
||
|
free(fullline);
|
||
|
if (heuristicFound) {
|
||
|
rc = VIRUS;
|
||
|
}
|
||
|
} else if (boundaryEnd(line, boundary)) {
|
||
|
/*
|
||
|
* Some viruses put information
|
||
|
* *after* the end of message,
|
||
|
* which presumably some broken
|
||
|
* mail clients find, so we
|
||
|
* can't assume that this
|
||
|
* is the end of the message
|
||
|
*/
|
||
|
/* t_line = NULL;*/
|
||
|
break;
|
||
|
} else if (boundaryStart(line, boundary)) {
|
||
|
inhead = 1;
|
||
|
break;
|
||
|
} else {
|
||
|
if (messageAddLine(aMessage, t_line->t_line) < 0)
|
||
|
break;
|
||
|
lines++;
|
||
|
}
|
||
|
} while ((t_line = t_line->t_next) != NULL);
|
||
|
|
||
|
cli_dbgmsg("Part %d has %d lines, rc = %d\n",
|
||
|
multiparts, lines, (int)rc);
|
||
|
|
||
|
/*
|
||
|
* Only save in the array of messages if some
|
||
|
* decision will be taken on whether to scan.
|
||
|
* If all parts will be scanned then save to
|
||
|
* file straight away
|
||
|
*/
|
||
|
switch (subtype) {
|
||
|
case MIXED:
|
||
|
case ALTERNATIVE:
|
||
|
case REPORT:
|
||
|
case DIGEST:
|
||
|
case APPLEDOUBLE:
|
||
|
case KNOWBOT:
|
||
|
case -1:
|
||
|
old_rc = rc;
|
||
|
mainMessage = do_multipart(mainMessage,
|
||
|
messages, multiparts,
|
||
|
&rc, mctx, messageIn,
|
||
|
&aText, recursion_level);
|
||
|
if ((rc == OK_ATTACHMENTS_NOT_SAVED) && (old_rc == OK))
|
||
|
rc = OK;
|
||
|
if (messages[multiparts]) {
|
||
|
messageDestroy(messages[multiparts]);
|
||
|
messages[multiparts] = NULL;
|
||
|
}
|
||
|
--multiparts;
|
||
|
if (rc == VIRUS)
|
||
|
infected = true;
|
||
|
break;
|
||
|
|
||
|
case RELATED:
|
||
|
case ENCRYPTED:
|
||
|
case SIGNED:
|
||
|
case PARALLEL:
|
||
|
/* all the subtypes that we handle
|
||
|
* (all from the switch(tableFind...) below)
|
||
|
* must be listed here */
|
||
|
break;
|
||
|
default:
|
||
|
/* this is a subtype that we
|
||
|
* don't handle anyway,
|
||
|
* don't store */
|
||
|
if (messages[multiparts]) {
|
||
|
messageDestroy(messages[multiparts]);
|
||
|
messages[multiparts] = NULL;
|
||
|
}
|
||
|
--multiparts;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
free((char *)boundary);
|
||
|
|
||
|
if (haveTooManyMIMEPartsPerMessage(multiparts, mctx->ctx, &rc)) {
|
||
|
if (messages) {
|
||
|
for (i = 0; i < multiparts; i++) {
|
||
|
if (messages[i])
|
||
|
messageDestroy(messages[i]);
|
||
|
}
|
||
|
free(messages);
|
||
|
messages = NULL;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Preprocess. Anything special to be done before
|
||
|
* we handle the multiparts?
|
||
|
*/
|
||
|
switch (subtype) {
|
||
|
case KNOWBOT:
|
||
|
/* TODO */
|
||
|
cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
|
||
|
mimeSubtype = "mixed";
|
||
|
break;
|
||
|
case -1:
|
||
|
/*
|
||
|
* According to section 7.2.6 of
|
||
|
* RFC1521, unrecognized multiparts
|
||
|
* should be treated as multipart/mixed.
|
||
|
*/
|
||
|
cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
|
||
|
mimeSubtype = "mixed";
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* We've finished message we're parsing
|
||
|
*/
|
||
|
if (mainMessage && (mainMessage != messageIn)) {
|
||
|
messageDestroy(mainMessage);
|
||
|
mainMessage = NULL;
|
||
|
}
|
||
|
|
||
|
cli_dbgmsg("The message has %d parts\n", multiparts);
|
||
|
|
||
|
if (infected || ((multiparts == 0) && (aText == NULL))) {
|
||
|
if (messages) {
|
||
|
for (i = 0; i < multiparts; i++) {
|
||
|
if (messages[i])
|
||
|
messageDestroy(messages[i]);
|
||
|
}
|
||
|
free(messages);
|
||
|
messages = NULL;
|
||
|
}
|
||
|
if (aText && (textIn == NULL))
|
||
|
textDestroy(aText);
|
||
|
|
||
|
#if HAVE_JSON
|
||
|
mctx->wrkobj = saveobj;
|
||
|
#endif
|
||
|
/*
|
||
|
* Nothing to do
|
||
|
*/
|
||
|
switch (rc) {
|
||
|
case VIRUS:
|
||
|
return VIRUS;
|
||
|
case MAXREC:
|
||
|
return MAXREC;
|
||
|
default:
|
||
|
return OK_ATTACHMENTS_NOT_SAVED;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
|
||
|
|
||
|
/*
|
||
|
* We now have all the parts of the multipart message
|
||
|
* in the messages array:
|
||
|
* message *messages[multiparts]
|
||
|
* Let's decide what to do with them all
|
||
|
*/
|
||
|
switch (tableFind(mctx->subtypeTable, mimeSubtype)) {
|
||
|
case RELATED:
|
||
|
cli_dbgmsg("Multipart related handler\n");
|
||
|
/*
|
||
|
* Have a look to see if there's HTML code
|
||
|
* which will need scanning
|
||
|
*/
|
||
|
|
||
|
// It's okay if multiparts == 0
|
||
|
|
||
|
htmltextPart = getTextPart(messages, multiparts);
|
||
|
|
||
|
if (htmltextPart >= 0 && messages) {
|
||
|
if (messageGetBody(messages[htmltextPart])) {
|
||
|
|
||
|
aText = textAddMessage(aText, messages[htmltextPart]);
|
||
|
}
|
||
|
} else {
|
||
|
/*
|
||
|
* There isn't an HTML bit. If there's a
|
||
|
* multipart bit, it'll may be in there
|
||
|
* somewhere
|
||
|
*/
|
||
|
for (i = 0; i < multiparts; i++) {
|
||
|
if (messageGetMimeType(messages[i]) == MULTIPART) {
|
||
|
htmltextPart = i;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (htmltextPart == -1) {
|
||
|
cli_dbgmsg("No HTML code found to be scanned\n");
|
||
|
} else {
|
||
|
#if HAVE_JSON
|
||
|
/* Send root HTML file for preclassification */
|
||
|
if (mctx->ctx->wrkproperty)
|
||
|
(void)parseRootMHTML(mctx, messages[htmltextPart], aText);
|
||
|
#endif
|
||
|
rc = parseEmailBody(messages[htmltextPart], aText, mctx, recursion_level + 1);
|
||
|
if ((rc == OK) && messages[htmltextPart]) {
|
||
|
messageDestroy(messages[htmltextPart]);
|
||
|
messages[htmltextPart] = NULL;
|
||
|
} else if (rc == VIRUS) {
|
||
|
infected = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* The message is confused about the difference
|
||
|
* between alternative and related. Badtrans.B
|
||
|
* suffers from this problem.
|
||
|
*
|
||
|
* Fall through in this case:
|
||
|
* Content-Type: multipart/related;
|
||
|
* type="multipart/alternative"
|
||
|
*/
|
||
|
/* fall through */
|
||
|
case DIGEST:
|
||
|
/*
|
||
|
* According to section 5.1.5 RFC2046, the
|
||
|
* default mime type of multipart/digest parts
|
||
|
* is message/rfc822
|
||
|
*
|
||
|
* We consider them as alternative, wrong in
|
||
|
* the strictest sense since they aren't
|
||
|
* alternatives - all parts a valid - but it's
|
||
|
* OK for our needs since it means each part
|
||
|
* will be scanned
|
||
|
*/
|
||
|
case ALTERNATIVE:
|
||
|
cli_dbgmsg("Multipart alternative handler\n");
|
||
|
|
||
|
/*
|
||
|
* Fall through - some clients are broken and
|
||
|
* say alternative instead of mixed. The Klez
|
||
|
* virus is broken that way, and anyway we
|
||
|
* wish to scan all of the alternatives
|
||
|
*/
|
||
|
/* fall through */
|
||
|
case REPORT:
|
||
|
/*
|
||
|
* According to section 1 of RFC1892, the
|
||
|
* syntax of multipart/report is the same
|
||
|
* as multipart/mixed. There are some required
|
||
|
* parameters, but there's no need for us to
|
||
|
* verify that they exist
|
||
|
*/
|
||
|
case ENCRYPTED:
|
||
|
/* MUAs without encryption plugins can display as multipart/mixed,
|
||
|
* just scan it*/
|
||
|
case MIXED:
|
||
|
case APPLEDOUBLE: /* not really supported */
|
||
|
/*
|
||
|
* Look for attachments
|
||
|
*
|
||
|
* Not all formats are supported. If an
|
||
|
* unsupported format turns out to be
|
||
|
* common enough to implement, it is a simple
|
||
|
* matter to add it
|
||
|
*/
|
||
|
if (aText) {
|
||
|
if (mainMessage && (mainMessage != messageIn))
|
||
|
messageDestroy(mainMessage);
|
||
|
mainMessage = NULL;
|
||
|
}
|
||
|
|
||
|
cli_dbgmsg("Mixed message with %d parts\n", multiparts);
|
||
|
for (i = 0; i < multiparts; i++) {
|
||
|
mainMessage = do_multipart(mainMessage,
|
||
|
messages, i, &rc, mctx,
|
||
|
messageIn, &aText, recursion_level + 1);
|
||
|
if (rc == VIRUS) {
|
||
|
infected = true;
|
||
|
break;
|
||
|
}
|
||
|
if (rc == MAXREC)
|
||
|
break;
|
||
|
if (rc == OK_ATTACHMENTS_NOT_SAVED)
|
||
|
rc = OK;
|
||
|
}
|
||
|
|
||
|
/* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */
|
||
|
break;
|
||
|
case SIGNED:
|
||
|
case PARALLEL:
|
||
|
/*
|
||
|
* If we're here it could be because we have a
|
||
|
* multipart/mixed message, consisting of a
|
||
|
* message followed by an attachment. That
|
||
|
* message itself is a multipart/alternative
|
||
|
* message and we need to dig out the plain
|
||
|
* text part of that alternative
|
||
|
*/
|
||
|
if (messages) {
|
||
|
htmltextPart = getTextPart(messages, multiparts);
|
||
|
if (htmltextPart == -1)
|
||
|
htmltextPart = 0;
|
||
|
rc = parseEmailBody(messages[htmltextPart], aText, mctx, recursion_level + 1);
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
assert(0);
|
||
|
}
|
||
|
|
||
|
if (mainMessage && (mainMessage != messageIn))
|
||
|
messageDestroy(mainMessage);
|
||
|
|
||
|
if (aText && (textIn == NULL)) {
|
||
|
if ((!infected) && (fb = fileblobCreate()) != NULL) {
|
||
|
cli_dbgmsg("Save non mime and/or text/plain part\n");
|
||
|
fileblobSetFilename(fb, mctx->dir, "textpart");
|
||
|
/*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/
|
||
|
fileblobSetCTX(fb, mctx->ctx);
|
||
|
(void)textToFileblob(aText, fb, 1);
|
||
|
|
||
|
fileblobDestroy(fb);
|
||
|
mctx->files++;
|
||
|
}
|
||
|
textDestroy(aText);
|
||
|
}
|
||
|
|
||
|
if (messages) {
|
||
|
for (i = 0; i < multiparts; i++) {
|
||
|
if (messages[i])
|
||
|
messageDestroy(messages[i]);
|
||
|
}
|
||
|
free(messages);
|
||
|
messages = NULL;
|
||
|
}
|
||
|
|
||
|
#if HAVE_JSON
|
||
|
mctx->wrkobj = saveobj;
|
||
|
#endif
|
||
|
return rc;
|
||
|
|
||
|
case MESSAGE:
|
||
|
/*
|
||
|
* Check for forbidden encodings
|
||
|
*/
|
||
|
switch (messageGetEncoding(mainMessage)) {
|
||
|
case NOENCODING:
|
||
|
case EIGHTBIT:
|
||
|
case BINARY:
|
||
|
break;
|
||
|
default:
|
||
|
cli_dbgmsg("MIME type 'message' cannot be decoded\n");
|
||
|
break;
|
||
|
}
|
||
|
rc = FAIL;
|
||
|
if ((strcasecmp(mimeSubtype, "rfc822") == 0) ||
|
||
|
(strcasecmp(mimeSubtype, "delivery-status") == 0)) {
|
||
|
message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table, &heuristicFound);
|
||
|
if (m) {
|
||
|
cli_dbgmsg("Decode rfc822\n");
|
||
|
|
||
|
messageSetCTX(m, mctx->ctx);
|
||
|
|
||
|
if (mainMessage && (mainMessage != messageIn)) {
|
||
|
messageDestroy(mainMessage);
|
||
|
mainMessage = NULL;
|
||
|
} else
|
||
|
messageReset(mainMessage);
|
||
|
if (messageGetBody(m))
|
||
|
rc = parseEmailBody(m, NULL, mctx, recursion_level + 1);
|
||
|
|
||
|
messageDestroy(m);
|
||
|
} else if (heuristicFound) {
|
||
|
rc = VIRUS;
|
||
|
}
|
||
|
break;
|
||
|
} else if (strcasecmp(mimeSubtype, "disposition-notification") == 0) {
|
||
|
/* RFC 2298 - handle like a normal email */
|
||
|
rc = OK;
|
||
|
break;
|
||
|
} else if (strcasecmp(mimeSubtype, "partial") == 0) {
|
||
|
if (mctx->ctx->options->mail & CL_SCAN_MAIL_PARTIAL_MESSAGE) {
|
||
|
/* RFC1341 message split over many emails */
|
||
|
if (rfc1341(mctx, mainMessage) >= 0)
|
||
|
rc = OK;
|
||
|
} else {
|
||
|
cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
|
||
|
}
|
||
|
} else if (strcasecmp(mimeSubtype, "external-body") == 0)
|
||
|
/* TODO */
|
||
|
cli_warnmsg("Attempt to send Content-type message/external-body trapped\n");
|
||
|
else
|
||
|
cli_warnmsg("Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", mimeSubtype);
|
||
|
|
||
|
if (mainMessage && (mainMessage != messageIn))
|
||
|
messageDestroy(mainMessage);
|
||
|
|
||
|
if (messages) {
|
||
|
for (i = 0; i < multiparts; i++) {
|
||
|
if (messages[i])
|
||
|
messageDestroy(messages[i]);
|
||
|
}
|
||
|
free(messages);
|
||
|
messages = NULL;
|
||
|
}
|
||
|
#if HAVE_JSON
|
||
|
mctx->wrkobj = saveobj;
|
||
|
#endif
|
||
|
return rc;
|
||
|
|
||
|
default:
|
||
|
cli_dbgmsg("Message received with unknown mime encoding - assume application\n");
|
||
|
/*
|
||
|
* Some Yahoo emails attach as
|
||
|
* Content-Type: X-unknown/unknown;
|
||
|
* instead of
|
||
|
* Content-Type: application/unknown;
|
||
|
* so let's try our best to salvage something
|
||
|
*/
|
||
|
/* fall through */
|
||
|
case APPLICATION:
|
||
|
/*cptr = messageGetMimeSubtype(mainMessage);
|
||
|
|
||
|
if((strcasecmp(cptr, "octet-stream") == 0) ||
|
||
|
(strcasecmp(cptr, "x-msdownload") == 0)) {*/
|
||
|
{
|
||
|
fb = messageToFileblob(mainMessage, mctx->dir, 1);
|
||
|
|
||
|
if (fb) {
|
||
|
cli_dbgmsg("Saving main message as attachment\n");
|
||
|
if (fileblobScanAndDestroy(fb) == CL_VIRUS)
|
||
|
rc = VIRUS;
|
||
|
mctx->files++;
|
||
|
if (mainMessage != messageIn) {
|
||
|
messageDestroy(mainMessage);
|
||
|
mainMessage = NULL;
|
||
|
} else
|
||
|
messageReset(mainMessage);
|
||
|
}
|
||
|
} /*else
|
||
|
cli_warnmsg("Discarded application not sent as attachment\n");*/
|
||
|
break;
|
||
|
|
||
|
case AUDIO:
|
||
|
case VIDEO:
|
||
|
case IMAGE:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (messages) {
|
||
|
/* "can't happen" */
|
||
|
cli_warnmsg("messages != NULL\n");
|
||
|
for (i = 0; i < multiparts; i++) {
|
||
|
if (messages[i])
|
||
|
messageDestroy(messages[i]);
|
||
|
}
|
||
|
free(messages);
|
||
|
messages = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (aText && (textIn == NULL)) {
|
||
|
/* Look for a bounce in the text (non mime encoded) portion */
|
||
|
const text *t;
|
||
|
/* isBounceStart() is expensive, reduce the number of calls */
|
||
|
bool lookahead_definately_is_bounce = false;
|
||
|
|
||
|
for (t = aText; t && (rc != VIRUS); t = t->t_next) {
|
||
|
const line_t *l = t->t_line;
|
||
|
const text *lookahead, *topofbounce;
|
||
|
const char *s;
|
||
|
bool inheader;
|
||
|
|
||
|
if (l == NULL) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (lookahead_definately_is_bounce)
|
||
|
lookahead_definately_is_bounce = false;
|
||
|
else if (!isBounceStart(mctx, lineGetData(l)))
|
||
|
continue;
|
||
|
|
||
|
lookahead = t->t_next;
|
||
|
if (lookahead) {
|
||
|
if (isBounceStart(mctx, lineGetData(lookahead->t_line))) {
|
||
|
lookahead_definately_is_bounce = true;
|
||
|
/* don't save worthless header lines */
|
||
|
continue;
|
||
|
}
|
||
|
} else /* don't save a single liner */
|
||
|
break;
|
||
|
|
||
|
/*
|
||
|
* We've found what looks like the start of a bounce
|
||
|
* message. Only bother saving if it really is a bounce
|
||
|
* message, this helps to speed up scanning of ping-pong
|
||
|
* messages that have lots of bounces within bounces in
|
||
|
* them
|
||
|
*/
|
||
|
for (; lookahead; lookahead = lookahead->t_next) {
|
||
|
l = lookahead->t_line;
|
||
|
|
||
|
if (l == NULL)
|
||
|
break;
|
||
|
s = lineGetData(l);
|
||
|
if (strncasecmp(s, "Content-Type:", 13) == 0) {
|
||
|
/*
|
||
|
* Don't bother with text/plain or
|
||
|
* text/html
|
||
|
*/
|
||
|
if (CLI_STRCASESTR(s, "text/plain") != NULL)
|
||
|
/*
|
||
|
* Don't bother to save the
|
||
|
* unuseful part, read past
|
||
|
* the headers then we'll go
|
||
|
* on to look for the next
|
||
|
* bounce message
|
||
|
*/
|
||
|
continue;
|
||
|
if ((!doPhishingScan) &&
|
||
|
(CLI_STRCASESTR(s, "text/html") != NULL))
|
||
|
continue;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (lookahead && (lookahead->t_line == NULL)) {
|
||
|
cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n");
|
||
|
t = lookahead;
|
||
|
/* look for next bounce message */
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Prescan the bounce message to see if there's likely
|
||
|
* to be anything nasty.
|
||
|
* This algorithm is hand crafted and may be breakable
|
||
|
* so all submissions are welcome. It's best NOT to
|
||
|
* remove this however you may be tempted, because it
|
||
|
* significantly speeds up the scanning of multiple
|
||
|
* bounces (i.e. bounces within many bounces)
|
||
|
*/
|
||
|
for (; lookahead; lookahead = lookahead->t_next) {
|
||
|
l = lookahead->t_line;
|
||
|
|
||
|
if (l) {
|
||
|
s = lineGetData(l);
|
||
|
if ((strncasecmp(s, "Content-Type:", 13) == 0) &&
|
||
|
(strstr(s, "multipart/") == NULL) &&
|
||
|
(strstr(s, "message/rfc822") == NULL) &&
|
||
|
(strstr(s, "text/plain") == NULL))
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (lookahead == NULL) {
|
||
|
cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
|
||
|
/* nothing here, move along please */
|
||
|
break;
|
||
|
}
|
||
|
if ((fb = fileblobCreate()) == NULL)
|
||
|
break;
|
||
|
cli_dbgmsg("Save non mime part bounce message\n");
|
||
|
fileblobSetFilename(fb, mctx->dir, "bounce");
|
||
|
fileblobAddData(fb, (const unsigned char *)"Received: by clamd (bounce)\n", 28);
|
||
|
fileblobSetCTX(fb, mctx->ctx);
|
||
|
|
||
|
inheader = true;
|
||
|
topofbounce = NULL;
|
||
|
do {
|
||
|
l = t->t_line;
|
||
|
|
||
|
if (l == NULL) {
|
||
|
if (inheader) {
|
||
|
inheader = false;
|
||
|
topofbounce = t;
|
||
|
}
|
||
|
} else {
|
||
|
s = lineGetData(l);
|
||
|
fileblobAddData(fb, (const unsigned char *)s, strlen(s));
|
||
|
}
|
||
|
fileblobAddData(fb, (const unsigned char *)"\n", 1);
|
||
|
lookahead = t->t_next;
|
||
|
if (lookahead == NULL)
|
||
|
break;
|
||
|
t = lookahead;
|
||
|
l = t->t_line;
|
||
|
if ((!inheader) && l) {
|
||
|
s = lineGetData(l);
|
||
|
if (isBounceStart(mctx, s)) {
|
||
|
cli_dbgmsg("Found the start of another bounce candidate (%s)\n", s);
|
||
|
lookahead_definately_is_bounce = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
} while (!fileblobInfected(fb));
|
||
|
|
||
|
if (fileblobScanAndDestroy(fb) == CL_VIRUS)
|
||
|
rc = VIRUS;
|
||
|
mctx->files++;
|
||
|
|
||
|
if (topofbounce)
|
||
|
t = topofbounce;
|
||
|
}
|
||
|
textDestroy(aText);
|
||
|
aText = NULL;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* No attachments - scan the text portions, often files
|
||
|
* are hidden in HTML code
|
||
|
*/
|
||
|
if (mainMessage && (rc != VIRUS)) {
|
||
|
text *t_line;
|
||
|
|
||
|
/*
|
||
|
* Look for uu-encoded main file
|
||
|
*/
|
||
|
if (mainMessage->body_first != NULL &&
|
||
|
(encodingLine(mainMessage) != NULL) &&
|
||
|
((t_line = bounceBegin(mainMessage)) != NULL))
|
||
|
rc = (exportBounceMessage(mctx, t_line) == CL_VIRUS) ? VIRUS : OK;
|
||
|
else {
|
||
|
bool saveIt;
|
||
|
|
||
|
if (messageGetMimeType(mainMessage) == MESSAGE)
|
||
|
/*
|
||
|
* Quick peek, if the encapsulated
|
||
|
* message has no
|
||
|
* content encoding statement don't
|
||
|
* bother saving to scan, it's safe
|
||
|
*/
|
||
|
saveIt = (bool)(encodingLine(mainMessage) != NULL);
|
||
|
else if (mainMessage->body_last != NULL && (t_line = encodingLine(mainMessage)) != NULL) {
|
||
|
/*
|
||
|
* Some bounces include the message
|
||
|
* body without the headers.
|
||
|
* FIXME: Unfortunately this generates a
|
||
|
* lot of false positives that a bounce
|
||
|
* has been found when it hasn't.
|
||
|
*/
|
||
|
if ((fb = fileblobCreate()) != NULL) {
|
||
|
cli_dbgmsg("Found a bounce message with no header at '%s'\n",
|
||
|
lineGetData(t_line->t_line));
|
||
|
fileblobSetFilename(fb, mctx->dir, "bounce");
|
||
|
fileblobAddData(fb,
|
||
|
(const unsigned char *)"Received: by clamd (bounce)\n",
|
||
|
28);
|
||
|
|
||
|
fileblobSetCTX(fb, mctx->ctx);
|
||
|
if (fileblobScanAndDestroy(textToFileblob(t_line, fb, 1)) == CL_VIRUS)
|
||
|
rc = VIRUS;
|
||
|
mctx->files++;
|
||
|
}
|
||
|
saveIt = false;
|
||
|
} else
|
||
|
/*
|
||
|
* Save the entire text portion,
|
||
|
* since it it may be an HTML file with
|
||
|
* a JavaScript virus or a phish
|
||
|
*/
|
||
|
saveIt = true;
|
||
|
|
||
|
if (saveIt) {
|
||
|
cli_dbgmsg("Saving text part to scan, rc = %d\n",
|
||
|
(int)rc);
|
||
|
if (saveTextPart(mctx, mainMessage, 1) == CL_VIRUS)
|
||
|
rc = VIRUS;
|
||
|
|
||
|
if (mainMessage != messageIn) {
|
||
|
messageDestroy(mainMessage);
|
||
|
mainMessage = NULL;
|
||
|
} else
|
||
|
messageReset(mainMessage);
|
||
|
}
|
||
|
}
|
||
|
} /*else
|
||
|
rc = OK_ATTACHMENTS_NOT_SAVED; */
|
||
|
/* nothing saved */
|
||
|
|
||
|
if (mainMessage && (mainMessage != messageIn))
|
||
|
messageDestroy(mainMessage);
|
||
|
|
||
|
if ((rc != FAIL) && infected)
|
||
|
rc = VIRUS;
|
||
|
|
||
|
#if HAVE_JSON
|
||
|
mctx->wrkobj = saveobj;
|
||
|
#endif
|
||
|
|
||
|
cli_dbgmsg("parseEmailBody() returning %d\n", (int)rc);
|
||
|
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Is the current line the start of a new section?
|
||
|
*
|
||
|
* New sections start with --boundary
|
||
|
*/
|
||
|
static int
|
||
|
boundaryStart(const char *line, const char *boundary)
|
||
|
{
|
||
|
const char *ptr;
|
||
|
char *out;
|
||
|
int rc;
|
||
|
char buf[RFC2821LENGTH + 1];
|
||
|
char *newline;
|
||
|
|
||
|
if (line == NULL || *line == '\0')
|
||
|
return 0; /* empty line */
|
||
|
if (boundary == NULL)
|
||
|
return 0;
|
||
|
|
||
|
newline = strdup(line);
|
||
|
if (!(newline))
|
||
|
newline = (char *)line;
|
||
|
|
||
|
if (newline != line && strlen(line)) {
|
||
|
char *p;
|
||
|
/* Trim trailing spaces */
|
||
|
p = newline + strlen(line) - 1;
|
||
|
while (p >= newline && *p == ' ')
|
||
|
*(p--) = '\0';
|
||
|
}
|
||
|
|
||
|
if (newline != line)
|
||
|
cli_chomp(newline);
|
||
|
|
||
|
/* cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary); */
|
||
|
|
||
|
if ((*newline != '-') && (*newline != '(')) {
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if (strchr(newline, '-') == NULL) {
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if (strlen(newline) <= sizeof(buf)) {
|
||
|
out = NULL;
|
||
|
ptr = rfc822comments(newline, buf);
|
||
|
} else
|
||
|
ptr = out = rfc822comments(newline, NULL);
|
||
|
|
||
|
if (ptr == NULL)
|
||
|
ptr = newline;
|
||
|
|
||
|
if ((*ptr++ != '-') || (*ptr == '\0')) {
|
||
|
if (out)
|
||
|
free(out);
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Gibe.B3 is broken, it has:
|
||
|
* boundary="---- =_NextPart_000_01C31177.9DC7C000"
|
||
|
* but it's boundaries look like
|
||
|
* ------ =_NextPart_000_01C31177.9DC7C000
|
||
|
* notice the one too few '-'.
|
||
|
* Presumably this is a deliberate exploitation of a bug in some mail
|
||
|
* clients.
|
||
|
*
|
||
|
* The trouble is that this creates a lot of false positives for
|
||
|
* boundary conditions, if we're too lax about matches. We do our level
|
||
|
* best to avoid these false positives. For example if we have
|
||
|
* boundary="1" we want to ensure that we don't break out of every line
|
||
|
* that has -1 in it instead of starting --1. This needs some more work.
|
||
|
*
|
||
|
* Look with and without RFC822 comments stripped, I've seen some
|
||
|
* samples where () are taken as comments in boundaries and some where
|
||
|
* they're not. Irrespective of whatever RFC2822 says, we need to find
|
||
|
* viruses in both types of mails.
|
||
|
*/
|
||
|
if ((strstr(&ptr[1], boundary) != NULL) || (strstr(newline, boundary) != NULL)) {
|
||
|
const char *k = ptr;
|
||
|
|
||
|
/*
|
||
|
* We need to ensure that we don't match --11=-=-=11 when
|
||
|
* looking for --1=-=-=1 in well behaved headers, that's a
|
||
|
* false positive problem mentioned above
|
||
|
*/
|
||
|
rc = 0;
|
||
|
do
|
||
|
if (strcmp(++k, boundary) == 0) {
|
||
|
rc = 1;
|
||
|
break;
|
||
|
}
|
||
|
while (*k == '-');
|
||
|
if (rc == 0) {
|
||
|
k = &line[1];
|
||
|
do
|
||
|
if (strcmp(++k, boundary) == 0) {
|
||
|
rc = 1;
|
||
|
break;
|
||
|
}
|
||
|
while (*k == '-');
|
||
|
}
|
||
|
} else if (*ptr++ != '-')
|
||
|
rc = 0;
|
||
|
else
|
||
|
rc = (strcasecmp(ptr, boundary) == 0);
|
||
|
|
||
|
if (out)
|
||
|
free(out);
|
||
|
|
||
|
if (rc == 1)
|
||
|
cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
|
||
|
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Is the current line the end?
|
||
|
*
|
||
|
* The message ends with with --boundary--
|
||
|
*/
|
||
|
static int
|
||
|
boundaryEnd(const char *line, const char *boundary)
|
||
|
{
|
||
|
size_t len;
|
||
|
char *newline, *p, *p2;
|
||
|
|
||
|
if (line == NULL || *line == '\0')
|
||
|
return 0;
|
||
|
|
||
|
p = newline = strdup(line);
|
||
|
if (!(newline)) {
|
||
|
p = (char *)line;
|
||
|
newline = (char *)line;
|
||
|
}
|
||
|
|
||
|
if (newline != line && strlen(line)) {
|
||
|
/* Trim trailing spaces */
|
||
|
p2 = newline + strlen(line) - 1;
|
||
|
while (p2 >= newline && *p2 == ' ')
|
||
|
*(p2--) = '\0';
|
||
|
}
|
||
|
|
||
|
/* cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", newline, boundary); */
|
||
|
|
||
|
if (*p++ != '-') {
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if (*p++ != '-') {
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
len = strlen(boundary);
|
||
|
if (strncasecmp(p, boundary, len) != 0) {
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
/*
|
||
|
* Use < rather than == because some broken mails have white
|
||
|
* space after the boundary
|
||
|
*/
|
||
|
if (strlen(p) < (len + 2)) {
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
p = &p[len];
|
||
|
if (*p++ != '-') {
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if (*p == '-') {
|
||
|
/* cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, p); */
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
if (newline != line)
|
||
|
free(newline);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Initialise the various lookup tables
|
||
|
*
|
||
|
* Only initializes the tables if not already initialized.
|
||
|
*/
|
||
|
static int
|
||
|
initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
|
||
|
{
|
||
|
const struct tableinit *tableinit;
|
||
|
|
||
|
/*
|
||
|
* Initialise the various look up tables
|
||
|
*/
|
||
|
if (NULL == *rfc821Table) {
|
||
|
*rfc821Table = tableCreate();
|
||
|
if (*rfc821Table == NULL) {
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
for (tableinit = rfc821headers; tableinit->key; tableinit++) {
|
||
|
if (tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
|
||
|
tableDestroy(*rfc821Table);
|
||
|
*rfc821Table = NULL;
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (NULL == *subtypeTable) {
|
||
|
*subtypeTable = tableCreate();
|
||
|
if (*subtypeTable == NULL) {
|
||
|
tableDestroy(*rfc821Table);
|
||
|
*rfc821Table = NULL;
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
for (tableinit = mimeSubtypes; tableinit->key; tableinit++) {
|
||
|
if (tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
|
||
|
tableDestroy(*rfc821Table);
|
||
|
tableDestroy(*subtypeTable);
|
||
|
*rfc821Table = NULL;
|
||
|
*subtypeTable = NULL;
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If there's a HTML text version use that, otherwise
|
||
|
* use the first text part, otherwise just use the
|
||
|
* first one around. HTML text is most likely to include
|
||
|
* a scripting worm
|
||
|
*
|
||
|
* If we can't find one, return -1
|
||
|
*/
|
||
|
static int
|
||
|
getTextPart(message *const messages[], size_t size)
|
||
|
{
|
||
|
size_t i;
|
||
|
int textpart = -1;
|
||
|
|
||
|
for (i = 0; i < size; i++)
|
||
|
if (messages[i] && (messageGetMimeType(messages[i]) == TEXT)) {
|
||
|
if (strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
|
||
|
return (int)i;
|
||
|
textpart = (int)i;
|
||
|
}
|
||
|
|
||
|
return textpart;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* strip -
|
||
|
* Remove the trailing spaces from a buffer. Don't call this directly,
|
||
|
* always call strstrip() which is a wrapper to this routine to be used with
|
||
|
* NUL terminated strings. This code looks a bit strange because of it's
|
||
|
* heritage from code that worked on strings that weren't necessarily NUL
|
||
|
* terminated.
|
||
|
* TODO: rewrite for clamAV
|
||
|
*
|
||
|
* Returns it's new length (a la strlen)
|
||
|
*
|
||
|
* len must be int not size_t because of the >= 0 test, it is sizeof(buf)
|
||
|
* not strlen(buf)
|
||
|
*/
|
||
|
static size_t
|
||
|
strip(char *buf, int len)
|
||
|
{
|
||
|
register char *ptr;
|
||
|
register size_t i;
|
||
|
|
||
|
if ((buf == NULL) || (len <= 0))
|
||
|
return 0;
|
||
|
|
||
|
i = strlen(buf);
|
||
|
if (len > (int)(i + 1))
|
||
|
return i;
|
||
|
ptr = &buf[--len];
|
||
|
|
||
|
#if defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) /* watch - it may be in shared text area */
|
||
|
do
|
||
|
if (*ptr)
|
||
|
*ptr = '\0';
|
||
|
while ((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
|
||
|
#else /* more characters can be displayed on DOS */
|
||
|
do
|
||
|
#ifndef REAL_MODE_DOS
|
||
|
if (*ptr) /* C8.0 puts into a text area */
|
||
|
#endif
|
||
|
*ptr = '\0';
|
||
|
while ((--len >= 0) && ((*--ptr == '\0') || isspace((int)(*ptr & 0xFF))));
|
||
|
#endif
|
||
|
return ((size_t)(len + 1));
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* strstrip:
|
||
|
* Strip a given string
|
||
|
*/
|
||
|
size_t
|
||
|
strstrip(char *s)
|
||
|
{
|
||
|
if (s == (char *)NULL)
|
||
|
return (0);
|
||
|
|
||
|
return (strip(s, strlen(s) + 1));
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Returns 0 for OK, -1 for error
|
||
|
*/
|
||
|
static int
|
||
|
parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg, cli_ctx *ctx, bool *heuristicFound)
|
||
|
{
|
||
|
char *copy, *p, *buf;
|
||
|
const char *ptr;
|
||
|
int commandNumber;
|
||
|
size_t argCnt = 0;
|
||
|
|
||
|
*heuristicFound = false;
|
||
|
|
||
|
cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
|
||
|
|
||
|
copy = rfc822comments(cmd, NULL);
|
||
|
if (copy) {
|
||
|
commandNumber = tableFind(rfc821Table, copy);
|
||
|
free(copy);
|
||
|
} else {
|
||
|
commandNumber = tableFind(rfc821Table, cmd);
|
||
|
}
|
||
|
|
||
|
copy = rfc822comments(arg, NULL);
|
||
|
|
||
|
if (copy) {
|
||
|
ptr = copy;
|
||
|
} else {
|
||
|
ptr = arg;
|
||
|
}
|
||
|
|
||
|
buf = NULL;
|
||
|
|
||
|
switch (commandNumber) {
|
||
|
case CONTENT_TYPE:
|
||
|
/*
|
||
|
* Fix for non RFC1521 compliant mailers
|
||
|
* that send content-type: Text instead
|
||
|
* of content-type: Text/Plain, or
|
||
|
* just simply "Content-Type:"
|
||
|
*/
|
||
|
if (arg == NULL)
|
||
|
/*
|
||
|
* According to section 4 of RFC1521:
|
||
|
* "Note also that a subtype specification is
|
||
|
* MANDATORY. There are no default subtypes"
|
||
|
*
|
||
|
* We have to break this and make an assumption
|
||
|
* for the subtype because virus writers and
|
||
|
* email client writers don't get it right
|
||
|
*/
|
||
|
cli_dbgmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
|
||
|
else if (strchr(ptr, '/') == NULL)
|
||
|
/*
|
||
|
* Empty field, such as
|
||
|
* Content-Type:
|
||
|
* which I believe is illegal according to
|
||
|
* RFC1521
|
||
|
*/
|
||
|
cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
|
||
|
else {
|
||
|
int i;
|
||
|
|
||
|
buf = cli_malloc(strlen(ptr) + 1);
|
||
|
if (buf == NULL) {
|
||
|
cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)(strlen(ptr) + 1));
|
||
|
if (copy)
|
||
|
free(copy);
|
||
|
return -1;
|
||
|
}
|
||
|
/*
|
||
|
* Some clients are broken and
|
||
|
* put white space after the ;
|
||
|
*/
|
||
|
if (*arg == '/') {
|
||
|
cli_dbgmsg("Content-type '/' received, assuming application/octet-stream\n");
|
||
|
messageSetMimeType(m, "application");
|
||
|
messageSetMimeSubtype(m, "octet-stream");
|
||
|
} else {
|
||
|
/*
|
||
|
* The content type could be in quotes:
|
||
|
* Content-Type: "multipart/mixed"
|
||
|
* FIXME: this is a hack in that ignores
|
||
|
* the quotes, it doesn't handle
|
||
|
* them properly
|
||
|
*/
|
||
|
while (isspace((const unsigned char)*ptr))
|
||
|
ptr++;
|
||
|
if (ptr[0] == '\"')
|
||
|
ptr++;
|
||
|
|
||
|
if (ptr[0] != '/') {
|
||
|
char *s;
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
char *strptr = NULL;
|
||
|
#endif
|
||
|
|
||
|
s = cli_strtokbuf(ptr, 0, ";", buf);
|
||
|
/*
|
||
|
* Handle
|
||
|
* Content-Type: foo/bar multipart/mixed
|
||
|
* and
|
||
|
* Content-Type: multipart/mixed foo/bar
|
||
|
*/
|
||
|
if (s && *s) {
|
||
|
char *buf2 = cli_strdup(buf);
|
||
|
|
||
|
if (buf2 == NULL) {
|
||
|
if (copy)
|
||
|
free(copy);
|
||
|
free(buf);
|
||
|
return -1;
|
||
|
}
|
||
|
for (;;) {
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
|
||
|
#else
|
||
|
int set = messageSetMimeType(m, strtok(s, "/"));
|
||
|
#endif
|
||
|
|
||
|
#ifdef CL_THREAD_SAFE
|
||
|
s = strtok_r(NULL, ";", &strptr);
|
||
|
#else
|
||
|
s = strtok(NULL, ";");
|
||
|
#endif
|
||
|
if (s == NULL)
|
||
|
break;
|
||
|
if (set) {
|
||
|
size_t len = strstrip(s) - 1;
|
||
|
if (s[len] == '\"') {
|
||
|
s[len] = '\0';
|
||
|
len = strstrip(s);
|
||
|
}
|
||
|
if (len) {
|
||
|
if (strchr(s, ' '))
|
||
|
messageSetMimeSubtype(m,
|
||
|
cli_strtokbuf(s, 0, " ", buf2));
|
||
|
else
|
||
|
messageSetMimeSubtype(m, s);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
while (*s && !isspace((unsigned char)*s))
|
||
|
s++;
|
||
|
if (*s++ == '\0')
|
||
|
break;
|
||
|
if (*s == '\0')
|
||
|
break;
|
||
|
}
|
||
|
free(buf2);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Add in all rest of the the arguments.
|
||
|
* e.g. if the header is this:
|
||
|
* Content-Type:', arg='multipart/mixed; boundary=foo
|
||
|
* we find the boundary argument set it
|
||
|
*/
|
||
|
i = 1;
|
||
|
while (cli_strtokbuf(ptr, i++, ";", buf) != NULL) {
|
||
|
cli_dbgmsg("mimeArgs = '%s'\n", buf);
|
||
|
|
||
|
argCnt++;
|
||
|
if (haveTooManyMIMEArguments(argCnt, ctx, heuristicFound)) {
|
||
|
break;
|
||
|
}
|
||
|
messageAddArguments(m, buf);
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case CONTENT_TRANSFER_ENCODING:
|
||
|
messageSetEncoding(m, ptr);
|
||
|
break;
|
||
|
case CONTENT_DISPOSITION:
|
||
|
buf = cli_malloc(strlen(ptr) + 1);
|
||
|
if (buf == NULL) {
|
||
|
cli_errmsg("parseMimeHeader: Unable to allocate memory for buf %llu\n", (long long unsigned)(strlen(ptr) + 1));
|
||
|
if (copy)
|
||
|
free(copy);
|
||
|
return -1;
|
||
|
}
|
||
|
p = cli_strtokbuf(ptr, 0, ";", buf);
|
||
|
if (p && *p) {
|
||
|
messageSetDispositionType(m, p);
|
||
|
messageAddArgument(m, cli_strtokbuf(ptr, 1, ";", buf));
|
||
|
}
|
||
|
if (!messageHasFilename(m))
|
||
|
/*
|
||
|
* Handle this type of header, without
|
||
|
* a filename (e.g. some Worm.Torvil.D)
|
||
|
* Content-ID: <nRfkHdrKsAxRU>
|
||
|
* Content-Transfer-Encoding: base64
|
||
|
* Content-Disposition: attachment
|
||
|
*/
|
||
|
messageAddArgument(m, "filename=unknown");
|
||
|
}
|
||
|
if (copy)
|
||
|
free(copy);
|
||
|
if (buf)
|
||
|
free(buf);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Save the text portion of the message
|
||
|
*/
|
||
|
static int
|
||
|
saveTextPart(mbox_ctx *mctx, message *m, int destroy_text)
|
||
|
{
|
||
|
fileblob *fb;
|
||
|
|
||
|
messageAddArgument(m, "filename=textportion");
|
||
|
if ((fb = messageToFileblob(m, mctx->dir, destroy_text)) != NULL) {
|
||
|
/*
|
||
|
* Save main part to scan that
|
||
|
*/
|
||
|
cli_dbgmsg("Saving main message\n");
|
||
|
|
||
|
mctx->files++;
|
||
|
return fileblobScanAndDestroy(fb);
|
||
|
}
|
||
|
return CL_ETMPFILE;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Handle RFC822 comments in headers.
|
||
|
* If out == NULL, return a buffer without the comments, the caller must free
|
||
|
* the returned buffer
|
||
|
* Return NULL on error or if the input * has no comments.
|
||
|
* See section 3.4.3 of RFC822
|
||
|
* TODO: handle comments that go on to more than one line
|
||
|
*/
|
||
|
static char *
|
||
|
rfc822comments(const char *in, char *out)
|
||
|
{
|
||
|
const char *iptr;
|
||
|
char *optr;
|
||
|
int backslash, inquote, commentlevel;
|
||
|
|
||
|
if (in == NULL || out == in) {
|
||
|
cli_errmsg("rfc822comments: Invalid parameters.n");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (strchr(in, '(') == NULL) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
while (isspace((const unsigned char)*in)) {
|
||
|
in++;
|
||
|
}
|
||
|
|
||
|
if (out == NULL) {
|
||
|
out = cli_malloc(strlen(in) + 1);
|
||
|
if (out == NULL) {
|
||
|
cli_errmsg("rfc822comments: Unable to allocate memory for out %llu\n", (long long unsigned)(strlen(in) + 1));
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
backslash = commentlevel = inquote = 0;
|
||
|
optr = out;
|
||
|
|
||
|
cli_dbgmsg("rfc822comments: contains a comment\n");
|
||
|
|
||
|
for (iptr = in; *iptr; iptr++)
|
||
|
if (backslash) {
|
||
|
if (commentlevel == 0)
|
||
|
*optr++ = *iptr;
|
||
|
backslash = 0;
|
||
|
} else
|
||
|
switch (*iptr) {
|
||
|
case '\\':
|
||
|
backslash = 1;
|
||
|
break;
|
||
|
case '\"':
|
||
|
*optr++ = '\"';
|
||
|
inquote = !inquote;
|
||
|
break;
|
||
|
case '(':
|
||
|
if (inquote)
|
||
|
*optr++ = '(';
|
||
|
else
|
||
|
commentlevel++;
|
||
|
break;
|
||
|
case ')':
|
||
|
if (inquote)
|
||
|
*optr++ = ')';
|
||
|
else if (commentlevel > 0)
|
||
|
commentlevel--;
|
||
|
break;
|
||
|
default:
|
||
|
if (commentlevel == 0)
|
||
|
*optr++ = *iptr;
|
||
|
}
|
||
|
|
||
|
if (backslash) /* last character was a single backslash */
|
||
|
*optr++ = '\\';
|
||
|
*optr = '\0';
|
||
|
|
||
|
/*strstrip(out);*/
|
||
|
|
||
|
cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
|
||
|
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
|
||
|
* free, or NULL on error
|
||
|
*/
|
||
|
static char *
|
||
|
rfc2047(const char *in)
|
||
|
{
|
||
|
char *out, *pout;
|
||
|
size_t len;
|
||
|
|
||
|
if ((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL))
|
||
|
return cli_strdup(in);
|
||
|
|
||
|
cli_dbgmsg("rfc2047 '%s'\n", in);
|
||
|
out = cli_malloc(strlen(in) + 1);
|
||
|
|
||
|
if (out == NULL) {
|
||
|
cli_errmsg("rfc2047: Unable to allocate memory for out %llu\n", (long long unsigned)(strlen(in) + 1));
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
pout = out;
|
||
|
|
||
|
/* For each RFC2047 string */
|
||
|
while (*in) {
|
||
|
char encoding, *ptr, *enctext;
|
||
|
message *m;
|
||
|
blob *b;
|
||
|
|
||
|
/* Find next RFC2047 string */
|
||
|
while (*in) {
|
||
|
if ((*in == '=') && (in[1] == '?')) {
|
||
|
in += 2;
|
||
|
break;
|
||
|
}
|
||
|
*pout++ = *in++;
|
||
|
}
|
||
|
/* Skip over charset, find encoding */
|
||
|
while ((*in != '?') && *in)
|
||
|
in++;
|
||
|
if (*in == '\0')
|
||
|
break;
|
||
|
encoding = *++in;
|
||
|
encoding = (char)tolower(encoding);
|
||
|
|
||
|
if ((encoding != 'q') && (encoding != 'b')) {
|
||
|
cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net\n", encoding);
|
||
|
free(out);
|
||
|
out = NULL;
|
||
|
break;
|
||
|
}
|
||
|
/* Skip to encoded text */
|
||
|
if (*++in != '?')
|
||
|
break;
|
||
|
if (*++in == '\0')
|
||
|
break;
|
||
|
|
||
|
enctext = cli_strdup(in);
|
||
|
if (enctext == NULL) {
|
||
|
free(out);
|
||
|
out = NULL;
|
||
|
break;
|
||
|
}
|
||
|
in = strstr(in, "?=");
|
||
|
if (in == NULL) {
|
||
|
free(enctext);
|
||
|
break;
|
||
|
}
|
||
|
in += 2;
|
||
|
ptr = strstr(enctext, "?=");
|
||
|
if (NULL == ptr) {
|
||
|
free(enctext);
|
||
|
break;
|
||
|
}
|
||
|
*ptr = '\0';
|
||
|
/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
|
||
|
|
||
|
m = messageCreate();
|
||
|
if (m == NULL) {
|
||
|
free(enctext);
|
||
|
break;
|
||
|
}
|
||
|
messageAddStr(m, enctext);
|
||
|
|
||
|
free(enctext);
|
||
|
enctext = NULL;
|
||
|
|
||
|
switch (encoding) {
|
||
|
case 'q':
|
||
|
messageSetEncoding(m, "quoted-printable");
|
||
|
break;
|
||
|
case 'b':
|
||
|
messageSetEncoding(m, "base64");
|
||
|
break;
|
||
|
}
|
||
|
b = messageToBlob(m, 1);
|
||
|
if (b == NULL) {
|
||
|
messageDestroy(m);
|
||
|
break;
|
||
|
}
|
||
|
len = blobGetDataSize(b);
|
||
|
cli_dbgmsg("Decoded as '%*.*s'\n", (int)len, (int)len,
|
||
|
(const char *)blobGetData(b));
|
||
|
memcpy(pout, blobGetData(b), len);
|
||
|
blobDestroy(b);
|
||
|
messageDestroy(m);
|
||
|
if (len > 0 && pout[len - 1] == '\n')
|
||
|
pout += len - 1;
|
||
|
else
|
||
|
pout += len;
|
||
|
}
|
||
|
if (out == NULL)
|
||
|
return NULL;
|
||
|
|
||
|
*pout = '\0';
|
||
|
|
||
|
cli_dbgmsg("rfc2047 returns '%s'\n", out);
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Handle partial messages
|
||
|
*/
|
||
|
static int
|
||
|
rfc1341(mbox_ctx *mctx, message *m)
|
||
|
{
|
||
|
char *arg, *id, *number, *total, *oldfilename;
|
||
|
const char *tmpdir = NULL;
|
||
|
int n;
|
||
|
char pdir[PATH_MAX + 1];
|
||
|
unsigned char md5_val[16];
|
||
|
char *md5_hex;
|
||
|
|
||
|
if ((NULL == mctx) || (NULL == m)) {
|
||
|
cli_dbgmsg("rfc1341: Invalid NULL arguments\n");
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
id = (char *)messageFindArgument(m, "id");
|
||
|
if (id == NULL) {
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
if (NULL != mctx->ctx) {
|
||
|
tmpdir = cl_engine_get_str((const struct cl_engine *)mctx->ctx->engine, CL_ENGINE_TMPDIR, NULL);
|
||
|
}
|
||
|
if (NULL == tmpdir) {
|
||
|
tmpdir = cli_gettmpdir();
|
||
|
}
|
||
|
|
||
|
snprintf(pdir, sizeof(pdir) - 1, "%s" PATHSEP "clamav-partial", tmpdir);
|
||
|
|
||
|
if ((mkdir(pdir, S_IRUSR | S_IWUSR) < 0) && (errno != EEXIST)) {
|
||
|
cli_errmsg("Can't create the directory '%s'\n", pdir);
|
||
|
free(id);
|
||
|
return -1;
|
||
|
} else if (errno == EEXIST) {
|
||
|
STATBUF statb;
|
||
|
|
||
|
if (CLAMSTAT(pdir, &statb) < 0) {
|
||
|
char err[128];
|
||
|
cli_errmsg("Partial directory %s: %s\n", pdir,
|
||
|
cli_strerror(errno, err, sizeof(err)));
|
||
|
free(id);
|
||
|
return -1;
|
||
|
}
|
||
|
if (statb.st_mode & 077)
|
||
|
cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
|
||
|
pdir,
|
||
|
#ifdef ACCESSPERMS
|
||
|
(int)(statb.st_mode & ACCESSPERMS)
|
||
|
#else
|
||
|
(int)(statb.st_mode & 0777)
|
||
|
#endif
|
||
|
);
|
||
|
}
|
||
|
|
||
|
number = (char *)messageFindArgument(m, "number");
|
||
|
if (number == NULL) {
|
||
|
free(id);
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
oldfilename = messageGetFilename(m);
|
||
|
|
||
|
arg = cli_malloc(10 + strlen(id) + strlen(number));
|
||
|
if (arg) {
|
||
|
sprintf(arg, "filename=%s%s", id, number);
|
||
|
messageAddArgument(m, arg);
|
||
|
free(arg);
|
||
|
}
|
||
|
|
||
|
if (oldfilename) {
|
||
|
cli_dbgmsg("Must reset to %s\n", oldfilename);
|
||
|
free(oldfilename);
|
||
|
}
|
||
|
|
||
|
n = atoi(number);
|
||
|
cl_hash_data("md5", id, strlen(id), md5_val, NULL);
|
||
|
md5_hex = cli_str2hex((const char *)md5_val, 16);
|
||
|
|
||
|
if (!md5_hex) {
|
||
|
free(id);
|
||
|
free(number);
|
||
|
return CL_EMEM;
|
||
|
}
|
||
|
|
||
|
if (messageSavePartial(m, pdir, md5_hex, n) < 0) {
|
||
|
free(md5_hex);
|
||
|
free(id);
|
||
|
free(number);
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
total = (char *)messageFindArgument(m, "total");
|
||
|
cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
|
||
|
if (total) {
|
||
|
int t = atoi(total);
|
||
|
DIR *dd = NULL;
|
||
|
|
||
|
free(total);
|
||
|
/*
|
||
|
* If it's the last one - reassemble it
|
||
|
* FIXME: this assumes that we receive the parts in order
|
||
|
*/
|
||
|
if ((n == t) && ((dd = opendir(pdir)) != NULL)) {
|
||
|
FILE *fout;
|
||
|
char outname[PATH_MAX + 1];
|
||
|
time_t now;
|
||
|
|
||
|
sanitiseName(id);
|
||
|
|
||
|
snprintf(outname, sizeof(outname) - 1, "%s" PATHSEP "%s", mctx->dir, id);
|
||
|
|
||
|
cli_dbgmsg("outname: %s\n", outname);
|
||
|
|
||
|
fout = fopen(outname, "wb");
|
||
|
if (fout == NULL) {
|
||
|
cli_errmsg("Can't open '%s' for writing", outname);
|
||
|
free(id);
|
||
|
free(number);
|
||
|
free(md5_hex);
|
||
|
closedir(dd);
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
time(&now);
|
||
|
for (n = 1; n <= t; n++) {
|
||
|
char filename[NAME_MAX + 1];
|
||
|
struct dirent *dent;
|
||
|
|
||
|
snprintf(filename, sizeof(filename), "_%s-%u", md5_hex, n);
|
||
|
|
||
|
while ((dent = readdir(dd))) {
|
||
|
FILE *fin;
|
||
|
char buffer[BUFSIZ], fullname[PATH_MAX + 1 + 256 + 1];
|
||
|
int nblanks;
|
||
|
STATBUF statb;
|
||
|
const char *dentry_idpart;
|
||
|
int test_fd;
|
||
|
|
||
|
if (dent->d_ino == 0)
|
||
|
continue;
|
||
|
|
||
|
if (!strcmp(".", dent->d_name) ||
|
||
|
!strcmp("..", dent->d_name))
|
||
|
continue;
|
||
|
snprintf(fullname, sizeof(fullname) - 1,
|
||
|
"%s" PATHSEP "%s", pdir, dent->d_name);
|
||
|
dentry_idpart = strchr(dent->d_name, '_');
|
||
|
|
||
|
if (!dentry_idpart ||
|
||
|
strcmp(filename, dentry_idpart) != 0) {
|
||
|
if (!m->ctx->engine->keeptmp)
|
||
|
continue;
|
||
|
|
||
|
if ((test_fd = open(fullname, O_RDONLY)) < 0)
|
||
|
continue;
|
||
|
|
||
|
if (FSTAT(test_fd, &statb) < 0) {
|
||
|
close(test_fd);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (now - statb.st_mtime > (time_t)(7 * 24 * 3600)) {
|
||
|
if (cli_unlink(fullname)) {
|
||
|
cli_unlink(outname);
|
||
|
fclose(fout);
|
||
|
free(md5_hex);
|
||
|
free(id);
|
||
|
free(number);
|
||
|
closedir(dd);
|
||
|
close(test_fd);
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
close(test_fd);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
fin = fopen(fullname, "rb");
|
||
|
if (fin == NULL) {
|
||
|
cli_errmsg("Can't open '%s' for reading", fullname);
|
||
|
fclose(fout);
|
||
|
cli_unlink(outname);
|
||
|
free(md5_hex);
|
||
|
free(id);
|
||
|
free(number);
|
||
|
closedir(dd);
|
||
|
return -1;
|
||
|
}
|
||
|
nblanks = 0;
|
||
|
while (fgets(buffer, sizeof(buffer) - 1, fin) != NULL)
|
||
|
/*
|
||
|
* Ensure that trailing newlines
|
||
|
* aren't copied
|
||
|
*/
|
||
|
if (buffer[0] == '\n')
|
||
|
nblanks++;
|
||
|
else {
|
||
|
if (nblanks)
|
||
|
do {
|
||
|
if (putc('\n', fout) == EOF) break;
|
||
|
} while (--nblanks > 0);
|
||
|
if (nblanks || fputs(buffer, fout) == EOF) {
|
||
|
fclose(fin);
|
||
|
fclose(fout);
|
||
|
cli_unlink(outname);
|
||
|
free(md5_hex);
|
||
|
free(id);
|
||
|
free(number);
|
||
|
closedir(dd);
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
fclose(fin);
|
||
|
|
||
|
/* don't unlink if leave temps */
|
||
|
if (!m->ctx->engine->keeptmp) {
|
||
|
if (cli_unlink(fullname)) {
|
||
|
fclose(fout);
|
||
|
cli_unlink(outname);
|
||
|
free(md5_hex);
|
||
|
free(id);
|
||
|
free(number);
|
||
|
closedir(dd);
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
rewinddir(dd);
|
||
|
}
|
||
|
closedir(dd);
|
||
|
fclose(fout);
|
||
|
}
|
||
|
}
|
||
|
free(number);
|
||
|
free(id);
|
||
|
free(md5_hex);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
hrefs_done(blob *b, tag_arguments_t *hrefs)
|
||
|
{
|
||
|
if (b)
|
||
|
blobDestroy(b);
|
||
|
html_tag_arg_free(hrefs);
|
||
|
}
|
||
|
|
||
|
/* extract URLs from static text */
|
||
|
static void extract_text_urls(const unsigned char *mem, size_t len, tag_arguments_t *hrefs)
|
||
|
{
|
||
|
char url[1024];
|
||
|
size_t off;
|
||
|
for (off = 0; off + 10 < len; off++) {
|
||
|
/* check whether this is the start of a URL */
|
||
|
int32_t proto = cli_readint32(mem + off);
|
||
|
/* convert to lowercase */
|
||
|
proto |= 0x20202020;
|
||
|
/* 'http:', 'https:', or 'ftp:' in little-endian */
|
||
|
if ((proto == 0x70747468 &&
|
||
|
(mem[off + 4] == ':' || (mem[off + 5] == 's' && mem[off + 6] == ':'))) ||
|
||
|
proto == 0x3a707466) {
|
||
|
size_t url_len;
|
||
|
for (url_len = 4; off + url_len < len && url_len < (sizeof(url) - 1); url_len++) {
|
||
|
unsigned char c = mem[off + url_len];
|
||
|
/* smart compilers will compile this if into
|
||
|
* a single bt + jb instruction */
|
||
|
if (c == ' ' || c == '\n' || c == '\t')
|
||
|
break;
|
||
|
}
|
||
|
memcpy(url, mem + off, url_len);
|
||
|
url[url_len] = '\0';
|
||
|
html_tag_arg_add(hrefs, "href", url);
|
||
|
off += url_len;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* This used to be part of checkURLs, split out, because phishingScan needs it
|
||
|
* too, and phishingScan might be used in situations where checkURLs is
|
||
|
* disabled (see ifdef)
|
||
|
*/
|
||
|
static blob *
|
||
|
getHrefs(message *m, tag_arguments_t *hrefs)
|
||
|
{
|
||
|
unsigned char *mem;
|
||
|
blob *b = messageToBlob(m, 0);
|
||
|
size_t len;
|
||
|
|
||
|
if (b == NULL)
|
||
|
return NULL;
|
||
|
|
||
|
len = blobGetDataSize(b);
|
||
|
|
||
|
if (len == 0) {
|
||
|
blobDestroy(b);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/* TODO: make this size customisable */
|
||
|
if (len > 100 * 1024) {
|
||
|
cli_dbgmsg("Viruses pointed to by URLs not scanned in large message\n");
|
||
|
blobDestroy(b);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
hrefs->count = 0;
|
||
|
hrefs->tag = hrefs->value = NULL;
|
||
|
hrefs->contents = NULL;
|
||
|
|
||
|
cli_dbgmsg("getHrefs: calling html_normalise_mem\n");
|
||
|
mem = blobGetData(b);
|
||
|
if (!html_normalise_mem(mem, (off_t)len, NULL, hrefs, m->ctx->dconf)) {
|
||
|
blobDestroy(b);
|
||
|
return NULL;
|
||
|
}
|
||
|
cli_dbgmsg("getHrefs: html_normalise_mem returned\n");
|
||
|
if (!hrefs->count && hrefs->scanContents) {
|
||
|
extract_text_urls(mem, len, hrefs);
|
||
|
}
|
||
|
|
||
|
/* TODO: Do we need to call remove_html_comments? */
|
||
|
return b;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* validate URLs for phishes
|
||
|
* followurls: see if URLs point to malware
|
||
|
*/
|
||
|
static void
|
||
|
checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
|
||
|
{
|
||
|
blob *b;
|
||
|
tag_arguments_t hrefs;
|
||
|
|
||
|
UNUSEDPARAM(is_html);
|
||
|
|
||
|
if (*rc == VIRUS)
|
||
|
return;
|
||
|
|
||
|
hrefs.scanContents = mctx->ctx->engine->dboptions & CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
|
||
|
|
||
|
if (!hrefs.scanContents)
|
||
|
/*
|
||
|
* Don't waste time extracting hrefs (parsing html), nobody
|
||
|
* will need it
|
||
|
*/
|
||
|
return;
|
||
|
|
||
|
hrefs.count = 0;
|
||
|
hrefs.tag = hrefs.value = NULL;
|
||
|
hrefs.contents = NULL;
|
||
|
|
||
|
b = getHrefs(mainMessage, &hrefs);
|
||
|
if (b) {
|
||
|
if (hrefs.scanContents) {
|
||
|
if (phishingScan(mctx->ctx, &hrefs) == CL_VIRUS) {
|
||
|
/*
|
||
|
* FIXME: message objects' contents are
|
||
|
* encapsulated so we should not access
|
||
|
* the members directly
|
||
|
*/
|
||
|
mainMessage->isInfected = true;
|
||
|
*rc = VIRUS;
|
||
|
cli_dbgmsg("PH:Phishing found\n");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
hrefs_done(b, &hrefs);
|
||
|
}
|
||
|
|
||
|
#ifdef HAVE_BACKTRACE
|
||
|
static void
|
||
|
sigsegv(int sig)
|
||
|
{
|
||
|
signal(SIGSEGV, SIG_DFL);
|
||
|
print_trace(1);
|
||
|
exit(SIGSEGV);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
print_trace(int use_syslog)
|
||
|
{
|
||
|
void *array[10];
|
||
|
size_t size;
|
||
|
char **strings;
|
||
|
size_t i;
|
||
|
pid_t pid = getpid();
|
||
|
|
||
|
cli_errmsg("Segmentation fault, attempting to print backtrace\n");
|
||
|
|
||
|
size = backtrace(array, 10);
|
||
|
strings = backtrace_symbols(array, size);
|
||
|
|
||
|
cli_errmsg("Backtrace of pid %d:\n", pid);
|
||
|
if (use_syslog)
|
||
|
syslog(LOG_ERR, "Backtrace of pid %d:", pid);
|
||
|
|
||
|
for (i = 0; i < size; i++) {
|
||
|
cli_errmsg("%s\n", strings[i]);
|
||
|
if (use_syslog)
|
||
|
syslog(LOG_ERR, "bt[%llu]: %s", (unsigned long long)i, strings[i]);
|
||
|
}
|
||
|
|
||
|
#ifdef SAVE_TMP
|
||
|
cli_errmsg("The errant mail file has been saved\n");
|
||
|
#endif
|
||
|
/* #else TODO: dump the current email */
|
||
|
|
||
|
free(strings);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/* See also clamav-milter */
|
||
|
static bool
|
||
|
usefulHeader(int commandNumber, const char *cmd)
|
||
|
{
|
||
|
switch (commandNumber) {
|
||
|
case CONTENT_TRANSFER_ENCODING:
|
||
|
case CONTENT_DISPOSITION:
|
||
|
case CONTENT_TYPE:
|
||
|
return true;
|
||
|
default:
|
||
|
if (strcasecmp(cmd, "From") == 0)
|
||
|
return true;
|
||
|
if (strcasecmp(cmd, "Received") == 0)
|
||
|
return true;
|
||
|
if (strcasecmp(cmd, "De") == 0)
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r"
|
||
|
*/
|
||
|
static char *
|
||
|
getline_from_mbox(char *buffer, size_t buffer_len, fmap_t *map, size_t *at)
|
||
|
{
|
||
|
const char *src, *cursrc;
|
||
|
char *curbuf;
|
||
|
size_t i;
|
||
|
size_t input_len = MIN(map->len - *at, buffer_len + 1);
|
||
|
src = cursrc = fmap_need_off_once(map, *at, input_len);
|
||
|
|
||
|
/* we check for eof from the result of GETC()
|
||
|
if(feof(fin))
|
||
|
return NULL;*/
|
||
|
if (!src) {
|
||
|
cli_dbgmsg("getline_from_mbox: fmap need failed\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
if ((buffer_len == 0) || (buffer == NULL)) {
|
||
|
cli_errmsg("Invalid call to getline_from_mbox(). Refer to https://docs.clamav.net/manual/Installing.html\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
curbuf = buffer;
|
||
|
|
||
|
for (i = 0; i < buffer_len - 1; i++) {
|
||
|
char c;
|
||
|
|
||
|
if (!input_len--) {
|
||
|
if (curbuf == buffer) {
|
||
|
/* EOF on first char */
|
||
|
return NULL;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
switch ((c = *cursrc++)) {
|
||
|
case '\0':
|
||
|
continue;
|
||
|
case '\n':
|
||
|
*curbuf++ = '\n';
|
||
|
if (input_len && *cursrc == '\r') {
|
||
|
i++;
|
||
|
cursrc++;
|
||
|
}
|
||
|
break;
|
||
|
case '\r':
|
||
|
*curbuf++ = '\r';
|
||
|
if (input_len && *cursrc == '\n') {
|
||
|
i++;
|
||
|
cursrc++;
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
*curbuf++ = c;
|
||
|
continue;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
*at += cursrc - src;
|
||
|
*curbuf = '\0';
|
||
|
|
||
|
return buffer;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Is this line a candidate for the start of a bounce message?
|
||
|
*/
|
||
|
static bool
|
||
|
isBounceStart(mbox_ctx *mctx, const char *line)
|
||
|
{
|
||
|
size_t len;
|
||
|
|
||
|
if (line == NULL)
|
||
|
return false;
|
||
|
if (*line == '\0')
|
||
|
return false;
|
||
|
/*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
|
||
|
return false;
|
||
|
if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6]))
|
||
|
return false;*/
|
||
|
|
||
|
len = strlen(line);
|
||
|
if ((len < 6) || (len >= 72))
|
||
|
return false;
|
||
|
|
||
|
if ((memcmp(line, "From ", 5) == 0) ||
|
||
|
(memcmp(line, ">From ", 6) == 0)) {
|
||
|
int numSpaces = 0, numDigits = 0;
|
||
|
|
||
|
line += 4;
|
||
|
|
||
|
do
|
||
|
if (*line == ' ')
|
||
|
numSpaces++;
|
||
|
else if (isdigit((*line) & 0xFF))
|
||
|
numDigits++;
|
||
|
while (*++line != '\0');
|
||
|
|
||
|
if (numSpaces < 6)
|
||
|
return false;
|
||
|
if (numDigits < 11)
|
||
|
return false;
|
||
|
return true;
|
||
|
}
|
||
|
return (bool)(cli_compare_ftm_file((const unsigned char *)line, len, mctx->ctx->engine) == CL_TYPE_MAIL);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Extract a binhexEncoded message, return if it's found to be infected as we
|
||
|
* extract it
|
||
|
*/
|
||
|
static bool
|
||
|
exportBinhexMessage(mbox_ctx *mctx, message *m)
|
||
|
{
|
||
|
bool infected = false;
|
||
|
fileblob *fb;
|
||
|
|
||
|
if (messageGetEncoding(m) == NOENCODING)
|
||
|
messageSetEncoding(m, "x-binhex");
|
||
|
|
||
|
fb = messageToFileblob(m, mctx->dir, 0);
|
||
|
|
||
|
if (fb) {
|
||
|
cli_dbgmsg("Binhex file decoded to %s\n",
|
||
|
fileblobGetFilename(fb));
|
||
|
|
||
|
if (fileblobScanAndDestroy(fb) == CL_VIRUS)
|
||
|
infected = true;
|
||
|
mctx->files++;
|
||
|
} else
|
||
|
cli_errmsg("Couldn't decode binhex file to %s\n", mctx->dir);
|
||
|
|
||
|
return infected;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Locate any bounce message and extract it. Return cl_status
|
||
|
*/
|
||
|
static int
|
||
|
exportBounceMessage(mbox_ctx *mctx, text *start)
|
||
|
{
|
||
|
int rc = CL_CLEAN;
|
||
|
text *t;
|
||
|
fileblob *fb;
|
||
|
|
||
|
/*
|
||
|
* Attempt to save the original (unbounced)
|
||
|
* message - clamscan will find that in the
|
||
|
* directory and call us again (with any luck)
|
||
|
* having found an e-mail message to handle.
|
||
|
*
|
||
|
* This finds a lot of false positives, the
|
||
|
* search that a content type is in the
|
||
|
* bounce (i.e. it's after the bounce header)
|
||
|
* helps a bit.
|
||
|
*
|
||
|
* messageAddLine
|
||
|
* optimization could help here, but needs
|
||
|
* careful thought, do it with line numbers
|
||
|
* would be best, since the current method in
|
||
|
* messageAddLine of checking encoding first
|
||
|
* must remain otherwise non bounce messages
|
||
|
* won't be scanned
|
||
|
*/
|
||
|
for (t = start; t; t = t->t_next) {
|
||
|
const char *txt = lineGetData(t->t_line);
|
||
|
char cmd[RFC2821LENGTH + 1];
|
||
|
|
||
|
if (txt == NULL)
|
||
|
continue;
|
||
|
if (cli_strtokbuf(txt, 0, ":", cmd) == NULL)
|
||
|
continue;
|
||
|
|
||
|
switch (tableFind(mctx->rfc821Table, cmd)) {
|
||
|
case CONTENT_TRANSFER_ENCODING:
|
||
|
if ((strstr(txt, "7bit") == NULL) &&
|
||
|
(strstr(txt, "8bit") == NULL))
|
||
|
break;
|
||
|
continue;
|
||
|
case CONTENT_DISPOSITION:
|
||
|
break;
|
||
|
case CONTENT_TYPE:
|
||
|
if (strstr(txt, "text/plain") != NULL)
|
||
|
t = NULL;
|
||
|
break;
|
||
|
default:
|
||
|
if (strcasecmp(cmd, "From") == 0)
|
||
|
start = t;
|
||
|
else if (strcasecmp(cmd, "Received") == 0)
|
||
|
start = t;
|
||
|
continue;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
if (t && ((fb = fileblobCreate()) != NULL)) {
|
||
|
cli_dbgmsg("Found a bounce message\n");
|
||
|
fileblobSetFilename(fb, mctx->dir, "bounce");
|
||
|
fileblobSetCTX(fb, mctx->ctx);
|
||
|
if (textToFileblob(start, fb, 1) == NULL) {
|
||
|
cli_dbgmsg("Nothing new to save in the bounce message\n");
|
||
|
fileblobDestroy(fb);
|
||
|
} else
|
||
|
rc = fileblobScanAndDestroy(fb);
|
||
|
mctx->files++;
|
||
|
} else
|
||
|
cli_dbgmsg("Not found a bounce message\n");
|
||
|
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Get string representation of mimetype
|
||
|
*/
|
||
|
static const char *getMimeTypeStr(mime_type mimetype)
|
||
|
{
|
||
|
const struct tableinit *entry = mimeTypeStr;
|
||
|
|
||
|
while (entry->key) {
|
||
|
if (mimetype == ((mime_type)entry->value)) {
|
||
|
return entry->key;
|
||
|
}
|
||
|
entry++;
|
||
|
}
|
||
|
return "UNKNOWN";
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Get string representation of encoding type
|
||
|
*/
|
||
|
static const char *getEncTypeStr(encoding_type enctype)
|
||
|
{
|
||
|
const struct tableinit *entry = encTypeStr;
|
||
|
|
||
|
while (entry->key) {
|
||
|
if (enctype == ((encoding_type)entry->value)) {
|
||
|
return entry->key;
|
||
|
}
|
||
|
entry++;
|
||
|
}
|
||
|
return "UNKNOWN";
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Handle the ith element of a number of multiparts, e.g. multipart/alternative
|
||
|
*/
|
||
|
static message *
|
||
|
do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level)
|
||
|
{
|
||
|
bool addToText = false;
|
||
|
const char *dtype;
|
||
|
#ifndef SAVE_TO_DISC
|
||
|
message *body;
|
||
|
#endif
|
||
|
message *aMessage = messages[i];
|
||
|
const int doPhishingScan = mctx->ctx->engine->dboptions & CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
|
||
|
#if HAVE_JSON
|
||
|
json_object *thisobj = NULL, *saveobj = mctx->wrkobj;
|
||
|
|
||
|
if (mctx->wrkobj != NULL) {
|
||
|
json_object *multiobj = cli_jsonarray(mctx->wrkobj, "Multipart");
|
||
|
if (multiobj == NULL) {
|
||
|
cli_errmsg("Cannot get multipart preclass array\n");
|
||
|
} else if (NULL == (thisobj = messageGetJObj(aMessage))) {
|
||
|
cli_dbgmsg("Cannot get message preclass object\n");
|
||
|
} else if (CL_SUCCESS != cli_json_addowner(multiobj, thisobj, NULL, -1)) {
|
||
|
cli_errmsg("Cannot assign message preclass object to multipart preclass array\n");
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
if (aMessage == NULL) {
|
||
|
#if HAVE_JSON
|
||
|
if (thisobj != NULL)
|
||
|
cli_jsonstr(thisobj, "MimeType", "NULL");
|
||
|
#endif
|
||
|
return mainMessage;
|
||
|
}
|
||
|
|
||
|
if (*rc != OK)
|
||
|
return mainMessage;
|
||
|
|
||
|
cli_dbgmsg("Mixed message part %d is of type %d\n",
|
||
|
i, messageGetMimeType(aMessage));
|
||
|
|
||
|
#if HAVE_JSON
|
||
|
if (thisobj != NULL) {
|
||
|
cli_jsonstr(thisobj, "MimeType", getMimeTypeStr(messageGetMimeType(aMessage)));
|
||
|
cli_jsonstr(thisobj, "MimeSubtype", messageGetMimeSubtype(aMessage));
|
||
|
cli_jsonstr(thisobj, "EncodingType", getEncTypeStr(messageGetEncoding(aMessage)));
|
||
|
cli_jsonstr(thisobj, "Disposition", messageGetDispositionType(aMessage));
|
||
|
if (messageHasFilename(aMessage)) {
|
||
|
char *filename = messageGetFilename(aMessage);
|
||
|
cli_jsonstr(thisobj, "Filename", filename);
|
||
|
free(filename);
|
||
|
} else {
|
||
|
cli_jsonstr(thisobj, "Filename", "(inline)");
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
switch (messageGetMimeType(aMessage)) {
|
||
|
case APPLICATION:
|
||
|
case AUDIO:
|
||
|
case IMAGE:
|
||
|
case VIDEO:
|
||
|
break;
|
||
|
case NOMIME:
|
||
|
cli_dbgmsg("No mime headers found in multipart part %d\n", i);
|
||
|
if (mainMessage) {
|
||
|
if (binhexBegin(aMessage)) {
|
||
|
cli_dbgmsg("Found binhex message in multipart/mixed mainMessage\n");
|
||
|
|
||
|
if (exportBinhexMessage(mctx, mainMessage))
|
||
|
*rc = VIRUS;
|
||
|
}
|
||
|
if (mainMessage != messageIn)
|
||
|
messageDestroy(mainMessage);
|
||
|
mainMessage = NULL;
|
||
|
} else if (aMessage) {
|
||
|
if (binhexBegin(aMessage)) {
|
||
|
cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
|
||
|
if (exportBinhexMessage(mctx, aMessage))
|
||
|
*rc = VIRUS;
|
||
|
messageReset(messages[i]);
|
||
|
}
|
||
|
}
|
||
|
addToText = true;
|
||
|
if (messageGetBody(aMessage) == NULL)
|
||
|
/*
|
||
|
* No plain text version
|
||
|
*/
|
||
|
cli_dbgmsg("No plain text alternative\n");
|
||
|
break;
|
||
|
case TEXT:
|
||
|
dtype = messageGetDispositionType(aMessage);
|
||
|
cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
|
||
|
dtype);
|
||
|
if (strcasecmp(dtype, "attachment") == 0)
|
||
|
break;
|
||
|
if ((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
|
||
|
const char *cptr;
|
||
|
|
||
|
if (mainMessage && (mainMessage != messageIn))
|
||
|
messageDestroy(mainMessage);
|
||
|
mainMessage = NULL;
|
||
|
cptr = messageGetMimeSubtype(aMessage);
|
||
|
cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
|
||
|
if ((tableFind(mctx->subtypeTable, cptr) == PLAIN) &&
|
||
|
(messageGetEncoding(aMessage) == NOENCODING)) {
|
||
|
/*
|
||
|
* Strictly speaking, a text/plain part
|
||
|
* is not an attachment. We pretend it
|
||
|
* is so that we can decode and scan it
|
||
|
*/
|
||
|
if (!messageHasFilename(aMessage)) {
|
||
|
cli_dbgmsg("Adding part to main message\n");
|
||
|
addToText = true;
|
||
|
} else
|
||
|
cli_dbgmsg("Treating inline as attachment\n");
|
||
|
} else {
|
||
|
const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML);
|
||
|
if (doPhishingScan)
|
||
|
checkURLs(aMessage, mctx, rc, is_html);
|
||
|
messageAddArgument(aMessage,
|
||
|
"filename=mixedtextportion");
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
cli_dbgmsg("Text type %s is not supported\n", dtype);
|
||
|
return mainMessage;
|
||
|
case MESSAGE:
|
||
|
/* Content-Type: message/rfc822 */
|
||
|
cli_dbgmsg("Found message inside multipart (encoding type %d)\n",
|
||
|
messageGetEncoding(aMessage));
|
||
|
#ifndef SCAN_UNENCODED_BOUNCES
|
||
|
switch (messageGetEncoding(aMessage)) {
|
||
|
case NOENCODING:
|
||
|
case EIGHTBIT:
|
||
|
case BINARY:
|
||
|
if (encodingLine(aMessage) == NULL) {
|
||
|
/*
|
||
|
* This means that the message
|
||
|
* has no attachments
|
||
|
*
|
||
|
* The test for
|
||
|
* messageGetEncoding is needed
|
||
|
* since encodingLine won't have
|
||
|
* been set if the message
|
||
|
* itself has been encoded
|
||
|
*/
|
||
|
cli_dbgmsg("Unencoded multipart/message will not be scanned\n");
|
||
|
messageDestroy(messages[i]);
|
||
|
messages[i] = NULL;
|
||
|
return mainMessage;
|
||
|
}
|
||
|
/* FALLTHROUGH */
|
||
|
default:
|
||
|
cli_dbgmsg("Encoded multipart/message will be scanned\n");
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#ifdef SAVE_TO_DISC
|
||
|
/*
|
||
|
* Save this embedded message
|
||
|
* to a temporary file
|
||
|
*/
|
||
|
if (saveTextPart(mctx, aMessage, 1) == CL_VIRUS)
|
||
|
*rc = VIRUS;
|
||
|
messageDestroy(messages[i]);
|
||
|
messages[i] = NULL;
|
||
|
#else
|
||
|
/*
|
||
|
* Scan in memory, faster but is open to DoS attacks
|
||
|
* when many nested levels are involved.
|
||
|
*/
|
||
|
body = parseEmailHeaders(aMessage, mctx->rfc821Table);
|
||
|
|
||
|
/*
|
||
|
* We've finished with the
|
||
|
* original copy of the message,
|
||
|
* so throw that away and
|
||
|
* deal with the encapsulated
|
||
|
* message as a message.
|
||
|
* This can save a lot of memory
|
||
|
*/
|
||
|
messageDestroy(messages[i]);
|
||
|
messages[i] = NULL;
|
||
|
#if HAVE_JSON
|
||
|
mctx->wrkobj = thisobj;
|
||
|
#endif
|
||
|
if (body) {
|
||
|
messageSetCTX(body, mctx->ctx);
|
||
|
*rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
|
||
|
if ((*rc == OK) && messageContainsVirus(body))
|
||
|
*rc = VIRUS;
|
||
|
messageDestroy(body);
|
||
|
}
|
||
|
#if HAVE_JSON
|
||
|
mctx->wrkobj = saveobj;
|
||
|
#endif
|
||
|
#endif
|
||
|
return mainMessage;
|
||
|
case MULTIPART:
|
||
|
/*
|
||
|
* It's a multi part within a multi part
|
||
|
* Run the message parser on this bit, it won't
|
||
|
* be an attachment
|
||
|
*/
|
||
|
cli_dbgmsg("Found multipart inside multipart\n");
|
||
|
#if HAVE_JSON
|
||
|
mctx->wrkobj = thisobj;
|
||
|
#endif
|
||
|
if (aMessage) {
|
||
|
/*
|
||
|
* The headers were parsed when reading in the
|
||
|
* whole multipart section
|
||
|
*/
|
||
|
*rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1);
|
||
|
cli_dbgmsg("Finished recursion, rc = %d\n", (int)*rc);
|
||
|
messageDestroy(messages[i]);
|
||
|
messages[i] = NULL;
|
||
|
} else {
|
||
|
*rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1);
|
||
|
if (mainMessage && (mainMessage != messageIn)) {
|
||
|
messageDestroy(mainMessage);
|
||
|
}
|
||
|
mainMessage = NULL;
|
||
|
}
|
||
|
#if HAVE_JSON
|
||
|
mctx->wrkobj = saveobj;
|
||
|
#endif
|
||
|
return mainMessage;
|
||
|
default:
|
||
|
cli_dbgmsg("Only text and application attachments are fully supported, type = %d\n",
|
||
|
messageGetMimeType(aMessage));
|
||
|
/* fall through - we may be able to salvage something */
|
||
|
}
|
||
|
|
||
|
if (*rc != VIRUS) {
|
||
|
fileblob *fb = messageToFileblob(aMessage, mctx->dir, 1);
|
||
|
#if HAVE_JSON
|
||
|
json_object *arrobj;
|
||
|
#if (JSON_C_MAJOR_VERSION == 0) && (JSON_C_MINOR_VERSION < 13)
|
||
|
int arrlen = 0;
|
||
|
#else
|
||
|
size_t arrlen = 0;
|
||
|
#endif
|
||
|
|
||
|
if (thisobj != NULL) {
|
||
|
/* attempt to determine container size - prevents incorrect type reporting */
|
||
|
if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj)) {
|
||
|
arrlen = json_object_array_length(arrobj);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
if (fb) {
|
||
|
/* aMessage doesn't always have a ctx set */
|
||
|
fileblobSetCTX(fb, mctx->ctx);
|
||
|
if (fileblobScanAndDestroy(fb) == CL_VIRUS) {
|
||
|
*rc = VIRUS;
|
||
|
}
|
||
|
if (!addToText) {
|
||
|
mctx->files++;
|
||
|
}
|
||
|
}
|
||
|
#if HAVE_JSON
|
||
|
if (thisobj != NULL) {
|
||
|
json_object *entry = NULL;
|
||
|
const char *dtype = NULL;
|
||
|
|
||
|
/* attempt to acquire container type */
|
||
|
if (json_object_object_get_ex(mctx->ctx->wrkproperty, "ContainedObjects", &arrobj)) {
|
||
|
if (json_object_array_length(arrobj) > arrlen) {
|
||
|
entry = json_object_array_get_idx(arrobj, arrlen);
|
||
|
}
|
||
|
}
|
||
|
if (entry) {
|
||
|
json_object_object_get_ex(entry, "FileType", &entry);
|
||
|
if (entry) {
|
||
|
dtype = json_object_get_string(entry);
|
||
|
}
|
||
|
}
|
||
|
cli_jsonint(thisobj, "ContainedObjectsIndex", (int32_t)arrlen);
|
||
|
cli_jsonstr(thisobj, "ClamAVFileType", dtype ? dtype : "UNKNOWN");
|
||
|
}
|
||
|
#endif
|
||
|
if (messageContainsVirus(aMessage)) {
|
||
|
*rc = VIRUS;
|
||
|
}
|
||
|
}
|
||
|
messageDestroy(aMessage);
|
||
|
messages[i] = NULL;
|
||
|
|
||
|
return mainMessage;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Returns the number of quote characters in the given string
|
||
|
*/
|
||
|
static int
|
||
|
count_quotes(const char *buf)
|
||
|
{
|
||
|
int quotes = 0;
|
||
|
|
||
|
while (*buf)
|
||
|
if (*buf++ == '\"')
|
||
|
quotes++;
|
||
|
|
||
|
return quotes;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Will the next line be a folded header? See RFC2822 section 2.2.3
|
||
|
*/
|
||
|
static bool
|
||
|
next_is_folded_header(const text *t)
|
||
|
{
|
||
|
const text *next = t->t_next;
|
||
|
const char *data, *ptr;
|
||
|
|
||
|
if (next == NULL)
|
||
|
return false;
|
||
|
|
||
|
if (next->t_line == NULL)
|
||
|
return false;
|
||
|
|
||
|
data = lineGetData(next->t_line);
|
||
|
|
||
|
/*
|
||
|
* Section B.2 of RFC822 says TAB or SPACE means a continuation of the
|
||
|
* previous entry.
|
||
|
*/
|
||
|
if (isblank(data[0]))
|
||
|
return true;
|
||
|
|
||
|
if (strchr(data, '=') == NULL)
|
||
|
/*
|
||
|
* Avoid false positives with
|
||
|
* Content-Type: text/html;
|
||
|
* Content-Transfer-Encoding: quoted-printable
|
||
|
*/
|
||
|
return false;
|
||
|
|
||
|
/*
|
||
|
* Some are broken and don't fold headers lines
|
||
|
* correctly as per section 2.2.3 of RFC2822.
|
||
|
* Generally they miss the white space at
|
||
|
* the start of the fold line:
|
||
|
* Content-Type: multipart/related;
|
||
|
* type="multipart/alternative";
|
||
|
* boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
|
||
|
* should read:
|
||
|
* Content-Type: multipart/related;
|
||
|
* type="multipart/alternative";
|
||
|
* boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
|
||
|
* Since we're a virus checker not an RFC
|
||
|
* verifier we need to handle these
|
||
|
*/
|
||
|
data = lineGetData(t->t_line);
|
||
|
|
||
|
ptr = strchr(data, '\0');
|
||
|
|
||
|
while (--ptr > data)
|
||
|
switch (*ptr) {
|
||
|
case ';':
|
||
|
return true;
|
||
|
case '\n':
|
||
|
case ' ':
|
||
|
case '\r':
|
||
|
case '\t':
|
||
|
continue; /* white space at end of line */
|
||
|
default:
|
||
|
return false;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* This routine is called on the first line of the body of
|
||
|
* an email to handle broken messages that have newlines
|
||
|
* in the middle of its headers
|
||
|
*/
|
||
|
static bool
|
||
|
newline_in_header(const char *line)
|
||
|
{
|
||
|
cli_dbgmsg("newline_in_header, check \"%s\"\n", line);
|
||
|
|
||
|
if (strncmp(line, "Message-Id: ", 12) == 0)
|
||
|
return true;
|
||
|
if (strncmp(line, "Date: ", 6) == 0)
|
||
|
return true;
|
||
|
|
||
|
cli_dbgmsg("newline_in_header, returning \"%s\"\n", line);
|
||
|
|
||
|
return false;
|
||
|
}
|