2022-10-22 18:41:00 +08:00
/*
* Copyright ( C ) 2013 - 2022 Cisco Systems , Inc . and / or its affiliates . All rights reserved .
* Copyright ( C ) 2007 - 2013 Sourcefire , Inc .
*
* Authors : Nigel Horne
*
* Acknowledgements : Some ideas came from Stephen White < stephen @ earth . li > ,
* Michael Dankov < misha @ btrc . ru > , Gianluigi Tiesi < sherpya @ netfarm . it > ,
* Everton da Silva Marques , Thomas Lamy < Thomas . Lamy @ in - online . net > ,
* James Stevens < James @ kyzo . com >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston ,
* MA 02110 - 1301 , USA .
*/
# if HAVE_CONFIG_H
# include "clamav-config.h"
# endif
# ifdef CL_THREAD_SAFE
# ifndef _REENTRANT
# define _REENTRANT /* for Solaris 2.8 */
# endif
# endif
# include <stdio.h>
# include <stdlib.h>
# include <errno.h>
# include <assert.h>
# include <string.h>
# include <stdbool.h>
# ifdef HAVE_STRINGS_H
# include <strings.h>
# endif
# ifdef HAVE_STRING_H
# include <string.h>
# endif
# include <ctype.h>
# include <time.h>
# include <fcntl.h>
# ifdef HAVE_SYS_PARAM_H
# include <sys/param.h>
# endif
# include <dirent.h>
# include <limits.h>
# include <signal.h>
# ifdef HAVE_UNISTD_H
# include <unistd.h>
# endif
# ifdef CL_THREAD_SAFE
# include <pthread.h>
# endif
# if defined(_WIN32) || defined(_WIN64)
# define strtok_r strtok_s
# endif
# include "clamav.h"
# include "others.h"
# include "str.h"
# include "filetypes.h"
# include "mbox.h"
# include "dconf.h"
# include "fmap.h"
# include "json_api.h"
# include "msxml_parser.h"
# if HAVE_LIBXML2
# include <libxml/xmlversion.h>
# include <libxml/HTMLtree.h>
# include <libxml/HTMLparser.h>
# include <libxml/xmlreader.h>
# endif
# define DCONF_PHISHING mctx->ctx->dconf->phishing
# ifdef CL_DEBUG
# if defined(C_LINUX)
# include <features.h>
# endif
2023-01-14 18:28:39 +08:00
# if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1 && !defined(__UCLIBC__) || defined(__UCLIBC_HAS_BACKTRACE__)
2022-10-22 18:41:00 +08:00
# define HAVE_BACKTRACE
# endif
# endif
# ifdef HAVE_BACKTRACE
# include <execinfo.h>
# ifdef USE_SYSLOG
# include <syslog.h>
# endif
static void sigsegv ( int sig ) ;
static void print_trace ( int use_syslog ) ;
/*#define SAVE_TMP */ /* Save the file being worked on in tmp */
# endif
# if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
# undef strtok_r
# undef __strtok_r
# define strtok_r(a, b, c) strtok(a, b)
# endif
typedef enum {
FAIL ,
OK ,
OK_ATTACHMENTS_NOT_SAVED ,
VIRUS ,
MAXREC ,
MAXFILES
} mbox_status ;
# ifndef isblank
# define isblank(c) (((c) == ' ') || ((c) == '\t'))
# endif
# define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
# include "htmlnorm.h"
# include "phishcheck.h"
# ifndef _WIN32
# include <sys/time.h>
# include <netdb.h>
# include <sys/socket.h>
# include <netinet/in.h>
# if !defined(C_BEOS) && !defined(C_INTERIX)
# include <net/if.h>
# include <arpa/inet.h>
# endif
# endif
# include <fcntl.h>
/*
* Use CL_SCAN_MAIL_PARTIAL_MESSAGE to handle messages covered by section 7.3 .2 of RFC1341 .
* This is experimental code so it is up to YOU to ( 1 ) ensure it ' s secure
* ( 2 ) periodically trim the directory of old files
*
* If you use the load balancing feature of clamav - milter to run clamd on
* more than one machine you must make sure that . . . / partial is on a shared
* network filesystem
*/
/*
* Slows things down a lot and only catches unencoded copies
* of EICAR within bounces , which don ' t matter
*/
//#define SCAN_UNENCODED_BOUNCES
typedef struct mbox_ctx {
const char * dir ;
const table_t * rfc821Table ;
const table_t * subtypeTable ;
cli_ctx * ctx ;
unsigned int files ; /* number of files extracted */
# if HAVE_JSON
json_object * wrkobj ;
# endif
} mbox_ctx ;
/* if supported by the system, use the optimized
* version of getc , that doesn ' t do locking ,
* and is possibly implemented entirely as a macro */
# if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L
# define GETC(fp) getc_unlocked(fp)
# define LOCKFILE(fp) flockfile(fp)
# define UNLOCKFILE(fp) funlockfile(fp)
# else
# define GETC(fp) getc(fp)
# define LOCKFILE(fp)
# define UNLOCKFILE(fp)
# endif
static int cli_parse_mbox ( const char * dir , cli_ctx * ctx ) ;
static message * parseEmailFile ( fmap_t * map , size_t * at , const table_t * rfc821Table , const char * firstLine , const char * dir , cli_ctx * ctx , bool * heuristicFound ) ;
static message * parseEmailHeaders ( message * m , const table_t * rfc821Table , bool * heuristicFound ) ;
static int parseEmailHeader ( message * m , const char * line , const table_t * rfc821 , cli_ctx * ctx , bool * heuristicFound ) ;
static cl_error_t parseMHTMLComment ( const char * comment , cli_ctx * ctx , void * wrkjobj , void * cbdata ) ;
static mbox_status parseRootMHTML ( mbox_ctx * mctx , message * m , text * t ) ;
static mbox_status parseEmailBody ( message * messageIn , text * textIn , mbox_ctx * mctx , unsigned int recursion_level ) ;
static int boundaryStart ( const char * line , const char * boundary ) ;
static int boundaryEnd ( const char * line , const char * boundary ) ;
static int initialiseTables ( table_t * * rfc821Table , table_t * * subtypeTable ) ;
static int getTextPart ( message * const messages [ ] , size_t size ) ;
static size_t strip ( char * buf , int len ) ;
static int parseMimeHeader ( message * m , const char * cmd , const table_t * rfc821Table , const char * arg , cli_ctx * ctx , bool * heuristicFound ) ;
static int saveTextPart ( mbox_ctx * mctx , message * m , int destroy_text ) ;
static char * rfc2047 ( const char * in ) ;
static char * rfc822comments ( const char * in , char * out ) ;
static int rfc1341 ( mbox_ctx * mctx , message * m ) ;
static bool usefulHeader ( int commandNumber , const char * cmd ) ;
static char * getline_from_mbox ( char * buffer , size_t len , fmap_t * map , size_t * at ) ;
static bool isBounceStart ( mbox_ctx * mctx , const char * line ) ;
static bool exportBinhexMessage ( mbox_ctx * mctx , message * m ) ;
static int exportBounceMessage ( mbox_ctx * ctx , text * start ) ;
static const char * getMimeTypeStr ( mime_type mimetype ) ;
static const char * getEncTypeStr ( encoding_type enctype ) ;
static message * do_multipart ( message * mainMessage , message * * messages , int i , mbox_status * rc , mbox_ctx * mctx , message * messageIn , text * * tptr , unsigned int recursion_level ) ;
static int count_quotes ( const char * buf ) ;
static bool next_is_folded_header ( const text * t ) ;
static bool newline_in_header ( const char * line ) ;
static blob * getHrefs ( message * m , tag_arguments_t * hrefs ) ;
static void hrefs_done ( blob * b , tag_arguments_t * hrefs ) ;
static void checkURLs ( message * m , mbox_ctx * mctx , mbox_status * rc , int is_html ) ;
static bool haveTooManyMIMEPartsPerMessage ( size_t mimePartCnt , cli_ctx * ctx , mbox_status * rc ) ;
static bool hitLineFoldCnt ( const char * const line , size_t * lineFoldCnt , cli_ctx * ctx , bool * heuristicFound ) ;
static bool haveTooManyHeaderBytes ( size_t totalLen , cli_ctx * ctx , bool * heuristicFound ) ;
static bool haveTooManyEmailHeaders ( size_t totalHeaderCnt , cli_ctx * ctx , bool * heuristicFound ) ;
static bool haveTooManyMIMEArguments ( size_t argCnt , cli_ctx * ctx , bool * heuristicFound ) ;
/* Maximum line length according to RFC2821 */
# define RFC2821LENGTH 1000
/* Hashcodes for our hash tables */
# define CONTENT_TYPE 1
# define CONTENT_TRANSFER_ENCODING 2
# define CONTENT_DISPOSITION 3
/* Mime sub types */
# define PLAIN 1
# define ENRICHED 2
# define HTML 3
# define RICHTEXT 4
# define MIXED 5
# define ALTERNATIVE 6 /* RFC1521*/
# define DIGEST 7
# define SIGNED 8
# define PARALLEL 9
# define RELATED 10 /* RFC2387 */
# define REPORT 11 /* RFC1892 */
# define APPLEDOUBLE 12 /* Handling of this in only noddy for now */
# define FAX MIXED / * \
* RFC3458 \
* Drafts stated to treat is as mixed if it is \
* not known . This disappeared in the final \
* version ( except when talking about \
* voice - message ) , but it is good enough for us \
* since we do no validation of coversheet \
* presence etc . ( which also has disappeared \
* in the final version ) \
*/
# define ENCRYPTED 13 / * \
* e . g . RFC2015 \
* Content - Type : multipart / encrypted ; \
* boundary = " nextPart1383049.XCRrrar2yq " ; \
* protocol = " application/pgp-encrypted " \
*/
# define X_BFILE RELATED / * \
* BeOS , expert two parts : the file and it ' s \
* attributes . The attributes part comes as \
* Content - Type : application / x - be_attribute \
* name = " foo " \
* I can ' t find where it is defined , any \
* pointers would be appreciated . For now \
* we treat it as multipart / related \
*/
# define KNOWBOT 14 /* Unknown and undocumented format? */
# define HEURISTIC_EMAIL_MAX_LINE_FOLDS_PER_HEADER (256 * 1024)
# define HEURISTIC_EMAIL_MAX_HEADER_BYTES (1024 * 256)
# define HEURISTIC_EMAIL_MAX_HEADERS 1024
# define HEURISTIC_EMAIL_MAX_MIME_PARTS_PER_MESSAGE 1024
# define HEURISTIC_EMAIL_MAX_ARGUMENTS_PER_HEADER 256
static const struct tableinit {
const char * key ;
int value ;
} rfc821headers [ ] = {
/* TODO: make these regular expressions */
{ " Content-Type " , CONTENT_TYPE } ,
{ " Content-Transfer-Encoding " , CONTENT_TRANSFER_ENCODING } ,
{ " Content-Disposition " , CONTENT_DISPOSITION } ,
{ NULL , 0 } } ,
mimeSubtypes [ ] = { /* see RFC2045 */
/* subtypes of Text */
{ " plain " , PLAIN } ,
{ " enriched " , ENRICHED } ,
{ " html " , HTML } ,
{ " richtext " , RICHTEXT } ,
/* subtypes of Multipart */
{ " mixed " , MIXED } ,
{ " alternative " , ALTERNATIVE } ,
{ " digest " , DIGEST } ,
{ " signed " , SIGNED } ,
{ " parallel " , PARALLEL } ,
{ " related " , RELATED } ,
{ " report " , REPORT } ,
{ " appledouble " , APPLEDOUBLE } ,
{ " fax-message " , FAX } ,
{ " encrypted " , ENCRYPTED } ,
{ " x-bfile " , X_BFILE } , /* BeOS */
{ " knowbot " , KNOWBOT } , /* ??? */
{ " knowbot-metadata " , KNOWBOT } , /* ??? */
{ " knowbot-code " , KNOWBOT } , /* ??? */
{ " knowbot-state " , KNOWBOT } , /* ??? */
{ NULL , 0 } } ,
mimeTypeStr [ ] = { { " NOMIME " , NOMIME } , { " APPLICATION " , APPLICATION } , { " AUDIO " , AUDIO } , { " IMAGE " , IMAGE } , { " MESSAGE " , MESSAGE } , { " MULTIPART " , MULTIPART } , { " TEXT " , TEXT } , { " VIDEO " , VIDEO } , { " MEXTENSION " , MEXTENSION } , { NULL , 0 } } , encTypeStr [ ] = { { " NOENCODING " , NOENCODING } , { " QUOTEDPRINTABLE " , QUOTEDPRINTABLE } , { " BASE64 " , BASE64 } , { " EIGHTBIT " , EIGHTBIT } , { " BINARY " , BINARY } , { " UUENCODE " , UUENCODE } , { " YENCODE " , YENCODE } , { " EEXTENSION " , EEXTENSION } , { " BINHEX " , BINHEX } , { NULL , 0 } } ;
# ifdef CL_THREAD_SAFE
static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER ;
# endif
static table_t * rfc821 = NULL ;
static table_t * subtype = NULL ;
int cli_mbox ( const char * dir , cli_ctx * ctx )
{
if ( dir = = NULL ) {
cli_dbgmsg ( " cli_mbox called with NULL dir \n " ) ;
return CL_ENULLARG ;
}
return cli_parse_mbox ( dir , ctx ) ;
}
/*
* TODO : when signal handling is added , need to remove temp files when a
* signal is received
* TODO : add option to scan in memory not via temp files , perhaps with a
* named pipe or memory mapped file , though this won ' t work on big e - mails
* containing many levels of encapsulated messages - it ' d just take too much
* RAM
* TODO : parse . msg format files
* TODO : fully handle AppleDouble format , see
* http : //www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
* TODO : ensure parseEmailHeaders is always called before parseEmailBody
* TODO : create parseEmail which calls parseEmailHeaders then parseEmailBody
* TODO : Handle unexpected NUL bytes in header lines which stop strcmp ( ) s :
* e . g . \ 0 Content - Type : application / binary ;
*/
static int
cli_parse_mbox ( const char * dir , cli_ctx * ctx )
{
int retcode ;
message * body ;
char buffer [ RFC2821LENGTH + 1 ] ;
mbox_ctx mctx ;
size_t at = 0 ;
fmap_t * map = ctx - > fmap ;
cli_dbgmsg ( " in mbox() \n " ) ;
if ( ! fmap_gets ( map , buffer , & at , sizeof ( buffer ) - 1 ) ) {
/* empty message */
return CL_CLEAN ;
}
# ifdef CL_THREAD_SAFE
pthread_mutex_lock ( & tables_mutex ) ;
# endif
if ( initialiseTables ( & rfc821 , & subtype ) < 0 ) {
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & tables_mutex ) ;
# endif
return CL_EMEM ;
}
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & tables_mutex ) ;
# endif
retcode = CL_SUCCESS ;
body = NULL ;
mctx . dir = dir ;
mctx . rfc821Table = rfc821 ;
mctx . subtypeTable = subtype ;
mctx . ctx = ctx ;
mctx . files = 0 ;
# if HAVE_JSON
mctx . wrkobj = ctx - > wrkproperty ;
# endif
/*
* Is it a UNIX style mbox with more than one
* mail message , or just a single mail message ?
*
* TODO : It would be better if we called cli_magic_scan_dir here rather than
* in cli_scanmail . Then we could improve the way mailboxes with more
* than one message is handled , e . g . giving a better indication of
* which message within the mailbox is infected
*/
/*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
if ( strncmp ( buffer , " From " , 5 ) = = 0 ) {
/*
* Have been asked to check a UNIX style mbox file , which
* may contain more than one e - mail message to decode
*
* It would be far better for scanners . c to do this splitting
* and do this
* FOR EACH mail in the mailbox
* DO
* pass this mail to cli_mbox - -
* scan this file
* IF this file has a virus quit
* THEN
* return CL_VIRUS
* FI
* END
* This would remove a problem with this code that it can
* fill up the tmp directory before it starts scanning
*/
bool lastLineWasEmpty ;
int messagenumber ;
message * m = messageCreate ( ) ; /*Create an empty email */
2023-01-14 18:28:39 +08:00
if ( m = = NULL ) {
2022-10-22 18:41:00 +08:00
return CL_EMEM ;
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
lastLineWasEmpty = false ;
messagenumber = 1 ;
messageSetCTX ( m , ctx ) ;
do {
cli_chomp ( buffer ) ;
/*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) */
if ( lastLineWasEmpty & & ( strncmp ( buffer , " From " , 5 ) = = 0 ) ) {
cli_dbgmsg ( " Deal with message number %d \n " , messagenumber + + ) ;
/*
* End of a message in the mail box
*/
bool heuristicFound = false ;
body = parseEmailHeaders ( m , rfc821 , & heuristicFound ) ;
if ( body = = NULL ) {
messageReset ( m ) ;
messageSetCTX ( m , ctx ) ;
if ( heuristicFound ) {
retcode = CL_VIRUS ;
break ;
}
continue ;
}
messageSetCTX ( body , ctx ) ;
messageDestroy ( m ) ;
if ( messageGetBody ( body ) ) {
mbox_status rc = parseEmailBody ( body , NULL , & mctx , 0 ) ;
if ( rc = = FAIL ) {
m = body ;
messageReset ( m ) ;
messageSetCTX ( m , ctx ) ;
continue ;
} else if ( rc = = VIRUS ) {
cli_dbgmsg ( " Message number %d is infected \n " ,
messagenumber - 1 ) ;
retcode = CL_VIRUS ;
m = NULL ;
break ;
}
}
/*
* Starting a new message , throw away all the
* information about the old one . It would
* be best to be able to scan this message
* now , but cli_magic_scan_file needs arguments
* that haven ' t been passed here so it can ' t be
* called
*/
m = body ;
messageReset ( m ) ;
messageSetCTX ( m , ctx ) ;
cli_dbgmsg ( " Finished processing message \n " ) ;
2023-01-14 18:28:39 +08:00
} else {
2022-10-22 18:41:00 +08:00
lastLineWasEmpty = ( bool ) ( buffer [ 0 ] = = ' \0 ' ) ;
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
if ( isuuencodebegin ( buffer ) ) {
/*
* Fast track visa to uudecode .
* TODO : binhex , yenc
*/
2023-01-14 18:28:39 +08:00
if ( uudecodeFile ( m , buffer , dir , map , & at ) < 0 ) {
if ( messageAddStr ( m , buffer ) < 0 ) {
2022-10-22 18:41:00 +08:00
break ;
2023-01-14 18:28:39 +08:00
}
}
} else {
2022-10-22 18:41:00 +08:00
/* at this point, the \n has been removed */
2023-01-14 18:28:39 +08:00
if ( messageAddStr ( m , buffer ) < 0 ) {
break ;
}
}
2022-10-22 18:41:00 +08:00
} while ( fmap_gets ( map , buffer , & at , sizeof ( buffer ) - 1 ) ) ;
if ( retcode = = CL_SUCCESS ) {
cli_dbgmsg ( " Extract attachments from email %d \n " , messagenumber ) ;
bool heuristicFound = false ;
body = parseEmailHeaders ( m , rfc821 , & heuristicFound ) ;
if ( heuristicFound ) {
retcode = CL_VIRUS ;
}
}
2023-01-14 18:28:39 +08:00
if ( m ) {
2022-10-22 18:41:00 +08:00
messageDestroy ( m ) ;
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
} else {
/*
* It ' s a single message , parse the headers then the body
*/
if ( strncmp ( buffer , " P I " , 4 ) = = 0 )
/*
* CommuniGate Pro format : ignore headers until
* blank line
*/
while ( fmap_gets ( map , buffer , & at , sizeof ( buffer ) - 1 ) & &
2023-01-14 18:28:39 +08:00
( strchr ( " \r \n " , buffer [ 0 ] ) = = NULL ) ) {
2022-10-22 18:41:00 +08:00
;
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
/* getline_from_mbox could be using unlocked_stdio(3),
* so lock file here */
/*
* Ignore any blank lines at the top of the message
*/
while ( strchr ( " \r \n " , buffer [ 0 ] ) & &
2023-01-14 18:28:39 +08:00
( getline_from_mbox ( buffer , sizeof ( buffer ) - 1 , map , & at ) ! = NULL ) ) {
2022-10-22 18:41:00 +08:00
;
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
buffer [ sizeof ( buffer ) - 1 ] = ' \0 ' ;
bool heuristicFound = false ;
body = parseEmailFile ( map , & at , rfc821 , buffer , dir , ctx , & heuristicFound ) ;
if ( heuristicFound ) {
retcode = CL_VIRUS ;
}
}
if ( body ) {
/*
* Write out the last entry in the mailbox
*/
if ( ( retcode = = CL_SUCCESS ) & & messageGetBody ( body ) ) {
messageSetCTX ( body , ctx ) ;
switch ( parseEmailBody ( body , NULL , & mctx , 0 ) ) {
case OK :
case OK_ATTACHMENTS_NOT_SAVED :
break ;
case FAIL :
/*
* beware : cli_magic_scan_desc ( ) ,
* changes this into CL_CLEAN , so only
* use it to inform the higher levels
* that we couldn ' t decode it because
* it isn ' t an mbox , not to signal
* decoding errors on what * is * a valid
* mbox
*/
retcode = CL_EFORMAT ;
break ;
case MAXREC :
retcode = CL_EMAXREC ;
2023-01-14 18:28:39 +08:00
cli_append_potentially_unwanted_if_heur_exceedsmax ( ctx , " Heuristics.Limits.Exceeded.MaxRecursion " ) ; // Doing this now because it's actually tracking email recursion,-
// not fmap recursion, but it still is aborting with stuff not scanned.
// Also, we didn't have access to the ctx when this happened earlier.
2022-10-22 18:41:00 +08:00
break ;
case MAXFILES :
retcode = CL_EMAXFILES ;
2023-01-14 18:28:39 +08:00
cli_append_potentially_unwanted_if_heur_exceedsmax ( ctx , " Heuristics.Limits.Exceeded.MaxFiles " ) ; // Doing this now because it's actually tracking email parts,-
// not actual files, but it still is aborting with stuff not scanned.
// Also, we didn't have access to the ctx when this happened earlier.
2022-10-22 18:41:00 +08:00
break ;
case VIRUS :
retcode = CL_VIRUS ;
break ;
}
}
2023-01-14 18:28:39 +08:00
if ( body - > isTruncated & & retcode = = CL_SUCCESS ) {
2022-10-22 18:41:00 +08:00
retcode = CL_EMEM ;
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
/*
* Tidy up and quit
*/
messageDestroy ( body ) ;
}
cli_dbgmsg ( " cli_mbox returning %d \n " , retcode ) ;
return retcode ;
}
# define READ_STRUCT_BUFFER_LEN 1024
typedef struct _ReadStruct {
char buffer [ READ_STRUCT_BUFFER_LEN + 1 ] ;
size_t bufferLen ;
struct _ReadStruct * next ;
} ReadStruct ;
static ReadStruct *
appendReadStruct ( ReadStruct * rs , const char * const buffer )
{
if ( NULL = = rs ) {
cli_dbgmsg ( " appendReadStruct: Invalid argument \n " ) ;
goto done ;
}
size_t spaceLeft = ( READ_STRUCT_BUFFER_LEN - rs - > bufferLen ) ;
if ( strlen ( buffer ) > spaceLeft ) {
ReadStruct * next = NULL ;
int part = spaceLeft ;
strncpy ( & ( rs - > buffer [ rs - > bufferLen ] ) , buffer , part ) ;
rs - > bufferLen + = part ;
2023-01-14 18:28:39 +08:00
CLI_CALLOC ( next , 1 , sizeof ( ReadStruct ) ) ;
2022-10-22 18:41:00 +08:00
rs - > next = next ;
strcpy ( next - > buffer , & ( buffer [ part ] ) ) ;
next - > bufferLen = strlen ( & ( buffer [ part ] ) ) ;
rs = next ;
} else {
strcpy ( & ( rs - > buffer [ rs - > bufferLen ] ) , buffer ) ;
rs - > bufferLen + = strlen ( buffer ) ;
}
done :
return rs ;
}
static char *
getMallocedBufferFromList ( const ReadStruct * head )
{
const ReadStruct * rs = head ;
int bufferLen = 1 ;
char * working = NULL ;
char * ret = NULL ;
while ( rs ) {
bufferLen + = rs - > bufferLen ;
rs = rs - > next ;
}
2023-01-14 18:28:39 +08:00
MALLOC ( working , bufferLen ) ;
2022-10-22 18:41:00 +08:00
rs = head ;
bufferLen = 0 ;
while ( rs ) {
memcpy ( & ( working [ bufferLen ] ) , rs - > buffer , rs - > bufferLen ) ;
bufferLen + = rs - > bufferLen ;
working [ bufferLen ] = 0 ;
rs = rs - > next ;
}
ret = working ;
done :
if ( NULL = = ret ) {
2023-01-14 18:28:39 +08:00
FREE ( working ) ;
2022-10-22 18:41:00 +08:00
}
return ret ;
}
static void
freeList ( ReadStruct * head )
{
while ( head ) {
ReadStruct * rs = head - > next ;
2023-01-14 18:28:39 +08:00
FREE ( head ) ;
2022-10-22 18:41:00 +08:00
head = rs ;
}
}
# ifndef FREELIST_REALLOC
# define FREELIST_REALLOC(head, curr) \
do { \
if ( curr ! = head ) { \
freeList ( head - > next ) ; \
} \
head - > bufferLen = 0 ; \
head - > next = 0 ; \
curr = head ; \
} while ( 0 )
# endif /*FREELIST_REALLOC*/
/*Check if we have repeated blank lines with only a semicolon at the end. Semicolon is a delimiter for parameters,
* but if there is no data , it isn ' t a parameter . Allow the first one because it may be continuation of a previous line
* that actually had data in it . */
static bool
doContinueMultipleEmptyOptions ( const char * const line , bool * lastWasOnlySemi )
{
if ( line ) {
size_t i = 0 ;
int doCont = 1 ;
for ( ; i < strlen ( line ) ; i + + ) {
if ( isblank ( line [ i ] ) ) {
} else if ( ' ; ' = = line [ i ] ) {
} else {
doCont = 0 ;
break ;
}
}
if ( 1 = = doCont ) {
if ( * lastWasOnlySemi ) {
return true ;
}
* lastWasOnlySemi = true ;
} else {
* lastWasOnlySemi = false ;
}
}
return false ;
}
static bool
hitLineFoldCnt ( const char * const line , size_t * lineFoldCnt , cli_ctx * ctx , bool * heuristicFound )
{
if ( line ) {
if ( isblank ( line [ 0 ] ) ) {
( * lineFoldCnt ) + + ;
} else {
( * lineFoldCnt ) = 0 ;
}
if ( ( * lineFoldCnt ) > = HEURISTIC_EMAIL_MAX_LINE_FOLDS_PER_HEADER ) {
if ( SCAN_HEURISTIC_EXCEEDS_MAX ) {
2023-01-14 18:28:39 +08:00
cli_append_potentially_unwanted ( ctx , " Heuristics.Limits.Exceeded.EmailLineFoldCnt " ) ;
2022-10-22 18:41:00 +08:00
* heuristicFound = true ;
}
return true ;
}
}
return false ;
}
static bool
haveTooManyHeaderBytes ( size_t totalLen , cli_ctx * ctx , bool * heuristicFound )
{
if ( totalLen > HEURISTIC_EMAIL_MAX_HEADER_BYTES ) {
if ( SCAN_HEURISTIC_EXCEEDS_MAX ) {
2023-01-14 18:28:39 +08:00
cli_append_potentially_unwanted ( ctx , " Heuristics.Limits.Exceeded.EmailHeaderBytes " ) ;
2022-10-22 18:41:00 +08:00
* heuristicFound = true ;
}
return true ;
}
return false ;
}
static bool
haveTooManyEmailHeaders ( size_t totalHeaderCnt , cli_ctx * ctx , bool * heuristicFound )
{
if ( totalHeaderCnt > HEURISTIC_EMAIL_MAX_HEADERS ) {
if ( SCAN_HEURISTIC_EXCEEDS_MAX ) {
2023-01-14 18:28:39 +08:00
cli_append_potentially_unwanted ( ctx , " Heuristics.Limits.Exceeded.EmailHeaders " ) ;
2022-10-22 18:41:00 +08:00
* heuristicFound = true ;
}
return true ;
}
return false ;
}
static bool
haveTooManyMIMEPartsPerMessage ( size_t mimePartCnt , cli_ctx * ctx , mbox_status * rc )
{
if ( mimePartCnt > = HEURISTIC_EMAIL_MAX_MIME_PARTS_PER_MESSAGE ) {
if ( SCAN_HEURISTIC_EXCEEDS_MAX ) {
2023-01-14 18:28:39 +08:00
cli_append_potentially_unwanted ( ctx , " Heuristics.Limits.Exceeded.EmailMIMEPartsPerMessage " ) ;
2022-10-22 18:41:00 +08:00
* rc = VIRUS ;
}
return true ;
}
return false ;
}
static bool
haveTooManyMIMEArguments ( size_t argCnt , cli_ctx * ctx , bool * heuristicFound )
{
if ( argCnt > = HEURISTIC_EMAIL_MAX_ARGUMENTS_PER_HEADER ) {
if ( SCAN_HEURISTIC_EXCEEDS_MAX ) {
2023-01-14 18:28:39 +08:00
cli_append_potentially_unwanted ( ctx , " Heuristics.Limits.Exceeded.EmailMIMEArguments " ) ;
2022-10-22 18:41:00 +08:00
* heuristicFound = true ;
}
return true ;
}
return false ;
}
/*
* Read in an email message from fin , parse it , and return the message
*
* FIXME : files full of new lines and nothing else are
* handled ungracefully . . .
*/
static message *
parseEmailFile ( fmap_t * map , size_t * at , const table_t * rfc821 , const char * firstLine , const char * dir , cli_ctx * ctx , bool * heuristicFound )
{
bool inHeader = true ;
bool bodyIsEmpty = true ;
bool lastWasBlank = false , lastBodyLineWasBlank = false ;
message * ret ;
bool anyHeadersFound = false ;
int commandNumber = - 1 ;
char * boundary = NULL ;
char buffer [ RFC2821LENGTH + 1 ] ;
bool lastWasOnlySemi = false ;
int err = 1 ;
size_t totalHeaderBytes = 0 ;
size_t totalHeaderCnt = 0 ;
size_t lineFoldCnt = 0 ;
* heuristicFound = false ;
ReadStruct * head = NULL ;
ReadStruct * curr = NULL ;
cli_dbgmsg ( " parseEmailFile \n " ) ;
ret = messageCreate ( ) ;
if ( ret = = NULL )
return NULL ;
2023-01-14 18:28:39 +08:00
CLI_CALLOC ( head , 1 , sizeof ( ReadStruct ) ) ;
curr = head ;
2022-10-22 18:41:00 +08:00
strncpy ( buffer , firstLine , sizeof ( buffer ) - 1 ) ;
do {
const char * line ;
( void ) cli_chomp ( buffer ) ;
if ( buffer [ 0 ] = = ' \0 ' )
line = NULL ;
else
line = buffer ;
if ( doContinueMultipleEmptyOptions ( line , & lastWasOnlySemi ) ) {
continue ;
}
if ( hitLineFoldCnt ( line , & lineFoldCnt , ctx , heuristicFound ) ) {
break ;
}
/*
* Don ' t blank lines which are only spaces from headers ,
* otherwise they ' ll be treated as the end of header marker
*/
if ( lastWasBlank ) {
lastWasBlank = false ;
if ( boundaryStart ( buffer , boundary ) ) {
cli_dbgmsg ( " Found a header line with space that should be blank \n " ) ;
inHeader = false ;
}
}
if ( inHeader ) {
cli_dbgmsg ( " parseEmailFile: check '%s' \n " , buffer ) ;
/*
* Ensure wide characters are handled where
* sizeof ( char ) > 1
*/
if ( line & & isspace ( line [ 0 ] & 0xFF ) ) {
char copy [ sizeof ( buffer ) ] ;
strcpy ( copy , buffer ) ;
strstrip ( copy ) ;
if ( copy [ 0 ] = = ' \0 ' ) {
/*
* The header line contains only white
* space . This is not the end of the
* headers according to RFC2822 , but
* some MUAs will handle it as though
* it were , and virus writers exploit
* this bug . We can ' t just break from
* the loop here since that would allow
* other exploits such as inserting a
* white space line before the
* content - type line . So we just have
* to make a best guess . Sigh .
*/
if ( head - > bufferLen ) {
char * header = getMallocedBufferFromList ( head ) ;
int needContinue = 0 ;
2023-01-14 18:28:39 +08:00
VERIFY_POINTER ( header ) ;
2022-10-22 18:41:00 +08:00
totalHeaderCnt + + ;
if ( haveTooManyEmailHeaders ( totalHeaderCnt , ctx , heuristicFound ) ) {
2023-01-14 18:28:39 +08:00
FREE ( header ) ;
2022-10-22 18:41:00 +08:00
break ;
}
needContinue = ( parseEmailHeader ( ret , header , rfc821 , ctx , heuristicFound ) < 0 ) ;
if ( * heuristicFound ) {
2023-01-14 18:28:39 +08:00
FREE ( header ) ;
2022-10-22 18:41:00 +08:00
break ;
}
2023-01-14 18:28:39 +08:00
FREE ( header ) ;
2022-10-22 18:41:00 +08:00
FREELIST_REALLOC ( head , curr ) ;
if ( needContinue ) {
continue ;
}
}
if ( boundary | |
( ( boundary = ( char * ) messageFindArgument ( ret , " boundary " ) ) ! = NULL ) ) {
lastWasBlank = true ;
continue ;
}
}
}
if ( ( line = = NULL ) & & ( 0 = = head - > bufferLen ) ) { /* empty line */
/*
* A blank line signifies the end of
* the header and the start of the text
*/
if ( ! anyHeadersFound )
/* Ignore the junk at the top */
continue ;
cli_dbgmsg ( " End of header information \n " ) ;
inHeader = false ;
bodyIsEmpty = true ;
} else {
char * ptr ;
const char * lookahead ;
bool lineAdded = true ;
if ( 0 = = head - > bufferLen ) {
char cmd [ RFC2821LENGTH + 1 ] , out [ RFC2821LENGTH + 1 ] ;
/*
* Continuation of line we ' re ignoring ?
*/
if ( isblank ( line [ 0 ] ) )
continue ;
/*
* Is this a header we ' re interested in ?
*/
if ( ( strchr ( line , ' : ' ) = = NULL ) | |
( cli_strtokbuf ( line , 0 , " : " , cmd ) = = NULL ) ) {
if ( strncmp ( line , " From " , 5 ) = = 0 )
anyHeadersFound = true ;
continue ;
}
ptr = rfc822comments ( cmd , out ) ;
commandNumber = tableFind ( rfc821 , ptr ? ptr : cmd ) ;
switch ( commandNumber ) {
case CONTENT_TRANSFER_ENCODING :
case CONTENT_DISPOSITION :
case CONTENT_TYPE :
anyHeadersFound = true ;
break ;
default :
if ( ! anyHeadersFound )
anyHeadersFound = usefulHeader ( commandNumber , cmd ) ;
continue ;
}
curr = appendReadStruct ( curr , line ) ;
if ( NULL = = curr ) {
if ( ret ) {
ret - > isTruncated = true ;
}
break ;
}
} else if ( line ! = NULL ) {
curr = appendReadStruct ( curr , line ) ;
} else {
lineAdded = false ;
}
if ( lineAdded ) {
totalHeaderBytes + = strlen ( line ) ;
if ( haveTooManyHeaderBytes ( totalHeaderBytes , ctx , heuristicFound ) ) {
break ;
}
}
if ( ( lookahead = fmap_need_off_once ( map , * at , 1 ) ) ) {
/*
* Section B .2 of RFC822 says TAB or
* SPACE means a continuation of the
* previous entry .
*
* Add all the arguments on the line
*/
if ( isblank ( * lookahead ) )
continue ;
}
/*
* Handle broken headers , where the next
* line isn ' t indented by whitespace
*/
{
char * header = getMallocedBufferFromList ( head ) ; /*This is the issue */
int needContinue = 0 ;
2023-01-14 18:28:39 +08:00
VERIFY_POINTER ( header ) ;
2022-10-22 18:41:00 +08:00
needContinue = ( header [ strlen ( header ) - 1 ] = = ' ; ' ) ;
if ( 0 = = needContinue ) {
needContinue = ( line & & ( count_quotes ( header ) & 1 ) ) ;
}
if ( 0 = = needContinue ) {
totalHeaderCnt + + ;
if ( haveTooManyEmailHeaders ( totalHeaderCnt , ctx , heuristicFound ) ) {
2023-01-14 18:28:39 +08:00
FREE ( header ) ;
2022-10-22 18:41:00 +08:00
break ;
}
needContinue = ( parseEmailHeader ( ret , header , rfc821 , ctx , heuristicFound ) < 0 ) ;
if ( * heuristicFound ) {
2023-01-14 18:28:39 +08:00
FREE ( header ) ;
2022-10-22 18:41:00 +08:00
break ;
}
/*Check total headers here;*/
}
2023-01-14 18:28:39 +08:00
FREE ( header ) ;
2022-10-22 18:41:00 +08:00
if ( needContinue ) {
continue ;
}
FREELIST_REALLOC ( head , curr ) ;
}
}
} else if ( line & & isuuencodebegin ( line ) ) {
/*
* Fast track visa to uudecode .
* TODO : binhex , yenc
*/
bodyIsEmpty = false ;
if ( uudecodeFile ( ret , line , dir , map , at ) < 0 )
if ( messageAddStr ( ret , line ) < 0 )
break ;
} else {
if ( line = = NULL ) {
/*
* Although this would save time and RAM , some
* phish signatures have been built which need
* the blank lines
*/
if ( lastBodyLineWasBlank & &
( messageGetMimeType ( ret ) ! = TEXT ) ) {
cli_dbgmsg ( " Ignoring consecutive blank lines in the body \n " ) ;
continue ;
}
lastBodyLineWasBlank = true ;
} else {
if ( bodyIsEmpty ) {
/*
* Broken message : new line in the
* middle of the headers , so the first
* line of the body is in fact
* the last lines of the header
*/
if ( newline_in_header ( line ) )
continue ;
bodyIsEmpty = false ;
}
lastBodyLineWasBlank = false ;
}
if ( messageAddStr ( ret , line ) < 0 )
break ;
}
} while ( getline_from_mbox ( buffer , sizeof ( buffer ) - 1 , map , at ) ! = NULL ) ;
err = 0 ;
done :
if ( err ) {
cli_errmsg ( " parseEmailFile: ERROR parsing file \n " ) ;
ret - > isTruncated = true ;
}
2023-01-14 18:28:39 +08:00
FREE ( boundary ) ;
2022-10-22 18:41:00 +08:00
freeList ( head ) ;
if ( ! anyHeadersFound ) {
/*
* False positive in believing we have an e - mail when we don ' t
*/
messageDestroy ( ret ) ;
cli_dbgmsg ( " parseEmailFile: no headers found, assuming it isn't an email \n " ) ;
return NULL ;
}
if ( * heuristicFound ) {
messageDestroy ( ret ) ;
cli_dbgmsg ( " parseEmailFile: found heuristic \n " ) ;
return NULL ;
}
cli_dbgmsg ( " parseEmailFile: return \n " ) ;
return ret ;
}
/*
* The given message contains a raw e - mail .
*
* Returns the message ' s body with the correct arguments set , empties the
* given message ' s contents ( note that it isn ' t destroyed )
*
* TODO : remove the duplication with parseEmailFile
*/
static message *
parseEmailHeaders ( message * m , const table_t * rfc821 , bool * heuristicFound )
{
bool inHeader = true ;
bool bodyIsEmpty = true ;
text * t ;
message * ret ;
bool anyHeadersFound = false ;
int commandNumber = - 1 ;
char * fullline = NULL ;
size_t fulllinelength = 0 ;
bool lastWasOnlySemi = false ;
size_t lineFoldCnt = 0 ;
size_t totalHeaderCnt = 0 ;
cli_dbgmsg ( " parseEmailHeaders \n " ) ;
* heuristicFound = false ;
if ( m = = NULL )
return NULL ;
ret = messageCreate ( ) ;
for ( t = messageGetBody ( m ) ; t ; t = t - > t_next ) {
const char * line ;
if ( t - > t_line )
line = lineGetData ( t - > t_line ) ;
else
line = NULL ;
if ( doContinueMultipleEmptyOptions ( line , & lastWasOnlySemi ) ) {
continue ;
}
if ( hitLineFoldCnt ( line , & lineFoldCnt , m - > ctx , heuristicFound ) ) {
break ;
}
if ( inHeader ) {
cli_dbgmsg ( " parseEmailHeaders: check '%s' \n " ,
line ? line : " " ) ;
if ( line = = NULL ) {
/*
* A blank line signifies the end of
* the header and the start of the text
*/
cli_dbgmsg ( " End of header information \n " ) ;
if ( ! anyHeadersFound ) {
cli_dbgmsg ( " Nothing interesting in the header \n " ) ;
break ;
}
inHeader = false ;
bodyIsEmpty = true ;
} else {
char * ptr ;
bool lineAdded = true ;
if ( fullline = = NULL ) {
char cmd [ RFC2821LENGTH + 1 ] ;
/*
* Continuation of line we ' re ignoring ?
*/
if ( isblank ( line [ 0 ] ) )
continue ;
/*
* Is this a header we ' re interested in ?
*/
if ( ( strchr ( line , ' : ' ) = = NULL ) | |
( cli_strtokbuf ( line , 0 , " : " , cmd ) = = NULL ) ) {
if ( strncmp ( line , " From " , 5 ) = = 0 )
anyHeadersFound = true ;
continue ;
}
ptr = rfc822comments ( cmd , NULL ) ;
commandNumber = tableFind ( rfc821 , ptr ? ptr : cmd ) ;
if ( ptr )
free ( ptr ) ;
switch ( commandNumber ) {
case CONTENT_TRANSFER_ENCODING :
case CONTENT_DISPOSITION :
case CONTENT_TYPE :
anyHeadersFound = true ;
break ;
default :
if ( ! anyHeadersFound )
anyHeadersFound = usefulHeader ( commandNumber , cmd ) ;
continue ;
}
fullline = cli_strdup ( line ) ;
fulllinelength = strlen ( line ) + 1 ;
} else if ( line ) {
fulllinelength + = strlen ( line ) + 1 ;
ptr = cli_realloc ( fullline , fulllinelength ) ;
if ( ptr = = NULL )
continue ;
fullline = ptr ;
cli_strlcat ( fullline , line , fulllinelength ) ;
} else {
lineAdded = false ;
}
/*continue doesn't seem right here, but that is what is done everywhere else when a malloc fails.*/
if ( NULL = = fullline ) {
continue ;
}
if ( lineAdded ) {
if ( haveTooManyHeaderBytes ( fulllinelength , m - > ctx , heuristicFound ) ) {
break ;
}
}
if ( next_is_folded_header ( t ) )
/* Add arguments to this line */
continue ;
lineUnlink ( t - > t_line ) ;
t - > t_line = NULL ;
if ( count_quotes ( fullline ) & 1 )
continue ;
ptr = rfc822comments ( fullline , NULL ) ;
if ( ptr ) {
free ( fullline ) ;
fullline = ptr ;
}
totalHeaderCnt + + ;
if ( haveTooManyEmailHeaders ( totalHeaderCnt , m - > ctx , heuristicFound ) ) {
break ;
}
if ( parseEmailHeader ( ret , fullline , rfc821 , m - > ctx , heuristicFound ) < 0 ) {
continue ;
}
if ( * heuristicFound ) {
break ;
}
free ( fullline ) ;
fullline = NULL ;
}
} else {
if ( bodyIsEmpty ) {
if ( line = = NULL )
/* throw away leading blank lines */
continue ;
/*
* Broken message : new line in the
* middle of the headers , so the first
* line of the body is in fact
* the last lines of the header
*/
if ( newline_in_header ( line ) )
continue ;
bodyIsEmpty = false ;
}
/*if(t->t_line && isuuencodebegin(t->t_line))
puts ( " FIXME: add fast visa here " ) ; */
cli_dbgmsg ( " parseEmailHeaders: finished with headers, moving body \n " ) ;
messageMoveText ( ret , t , m ) ;
break ;
}
}
if ( fullline ) {
if ( * fullline ) switch ( commandNumber ) {
case CONTENT_TRANSFER_ENCODING :
case CONTENT_DISPOSITION :
case CONTENT_TYPE :
cli_dbgmsg ( " parseEmailHeaders: Fullline unparsed '%s' \n " , fullline ) ;
}
free ( fullline ) ;
}
if ( ! anyHeadersFound ) {
/*
* False positive in believing we have an e - mail when we don ' t
*/
messageDestroy ( ret ) ;
cli_dbgmsg ( " parseEmailHeaders: no headers found, assuming it isn't an email \n " ) ;
return NULL ;
}
if ( * heuristicFound ) {
messageDestroy ( ret ) ;
cli_dbgmsg ( " parseEmailHeaders: found a heuristic, delete message and stop parsing. \n " ) ;
return NULL ;
}
cli_dbgmsg ( " parseEmailHeaders: return \n " ) ;
return ret ;
}
/*
* Handle a header line of an email message
*/
static int
parseEmailHeader ( message * m , const char * line , const table_t * rfc821 , cli_ctx * ctx , bool * heuristicFound )
{
int ret = - 1 ;
# ifdef CL_THREAD_SAFE
char * strptr ;
# endif
const char * separator ;
char * cmd , * copy , tokenseparator [ 2 ] ;
cli_dbgmsg ( " parseEmailHeader '%s' \n " , line ) ;
/*
* In RFC822 the separator between the key a value is a colon ,
* e . g . Content - Transfer - Encoding : base64
* However some MUA ' s are lapse about this and virus writers exploit
* this hole , so we need to check all known possibilities
*/
for ( separator = " := " ; * separator ; separator + + )
if ( strchr ( line , * separator ) ! = NULL )
break ;
if ( * separator = = ' \0 ' )
return - 1 ;
copy = rfc2047 ( line ) ;
if ( copy = = NULL ) {
/* an RFC checker would return -1 here */
copy = cli_strdup ( line ) ;
if ( NULL = = copy ) {
goto done ;
}
}
tokenseparator [ 0 ] = * separator ;
tokenseparator [ 1 ] = ' \0 ' ;
# ifdef CL_THREAD_SAFE
cmd = strtok_r ( copy , tokenseparator , & strptr ) ;
# else
cmd = strtok ( copy , tokenseparator ) ;
# endif
if ( cmd & & ( strstrip ( cmd ) > 0 ) ) {
# ifdef CL_THREAD_SAFE
char * arg = strtok_r ( NULL , " " , & strptr ) ;
# else
char * arg = strtok ( NULL , " " ) ;
# endif
if ( arg ) {
/*
* Found a header such as
* Content - Type : multipart / mixed ;
* set arg to be
* " multipart/mixed " and cmd to
* be " Content-Type "
*/
ret = parseMimeHeader ( m , cmd , rfc821 , arg , ctx , heuristicFound ) ;
}
}
done :
2023-01-14 18:28:39 +08:00
FREE ( copy ) ;
2022-10-22 18:41:00 +08:00
return ret ;
}
# if HAVE_LIBXML2
static const struct key_entry mhtml_keys [ ] = {
/* root html tags for microsoft office document */
{ " html " , " RootHTML " , MSXML_JSON_ROOT | MSXML_JSON_ATTRIB } ,
{ " head " , " Head " , MSXML_JSON_WRKPTR | MSXML_COMMENT_CB } ,
{ " meta " , " Meta " , MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_ATTRIB } ,
{ " link " , " Link " , MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_ATTRIB } ,
{ " script " , " Script " , MSXML_JSON_WRKPTR | MSXML_JSON_MULTI | MSXML_JSON_VALUE } } ;
static size_t num_mhtml_keys = sizeof ( mhtml_keys ) / sizeof ( struct key_entry ) ;
static const struct key_entry mhtml_comment_keys [ ] = {
/* embedded xml tags (comment) for microsoft office document */
{ " o:documentproperties " , " DocumentProperties " , MSXML_JSON_ROOT | MSXML_JSON_ATTRIB } ,
{ " o:author " , " Author " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:lastauthor " , " LastAuthor " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:revision " , " Revision " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:totaltime " , " TotalTime " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:created " , " Created " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:lastsaved " , " LastSaved " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:pages " , " Pages " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:words " , " Words " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:characters " , " Characters " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:company " , " Company " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:lines " , " Lines " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:paragraphs " , " Paragraphs " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:characterswithspaces " , " CharactersWithSpaces " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:version " , " Version " , MSXML_JSON_WRKPTR | MSXML_JSON_VALUE } ,
{ " o:officedocumentsettings " , " DocumentSettings " , MSXML_IGNORE_ELEM } ,
{ " w:worddocument " , " WordDocument " , MSXML_IGNORE_ELEM } ,
{ " w:latentstyles " , " LatentStyles " , MSXML_IGNORE_ELEM } } ;
static size_t num_mhtml_comment_keys = sizeof ( mhtml_comment_keys ) / sizeof ( struct key_entry ) ;
# endif
/*
* The related multipart root HTML file comment parsing wrapper .
*
* Attempts to leverage msxml parser , cannot operate without LIBXML2 .
* This function is only used for Preclassification JSON .
*/
static cl_error_t parseMHTMLComment ( const char * comment , cli_ctx * ctx , void * wrkjobj , void * cbdata )
{
cl_error_t ret = CL_SUCCESS ;
# if HAVE_LIBXML2
const char * xmlsrt , * xmlend ;
xmlTextReaderPtr reader ;
UNUSEDPARAM ( cbdata ) ;
UNUSEDPARAM ( wrkjobj ) ;
xmlend = comment ;
while ( ( xmlsrt = strstr ( xmlend , " <xml> " ) ) ) {
xmlend = strstr ( xmlsrt , " </xml> " ) ;
if ( xmlend = = NULL ) {
cli_dbgmsg ( " parseMHTMLComment: unbounded xml tag \n " ) ;
break ;
}
reader = xmlReaderForMemory ( xmlsrt , xmlend - xmlsrt + 6 , " comment.xml " , NULL , CLAMAV_MIN_XMLREADER_FLAGS ) ;
if ( ! reader ) {
cli_dbgmsg ( " parseMHTMLComment: cannot initialize xmlReader \n " ) ;
# if HAVE_JSON
if ( ctx - > wrkproperty ! = NULL )
ret = cli_json_parse_error ( ctx - > wrkproperty , " MHTML_ERROR_XML_READER_MEM " ) ;
# endif
return ret ; // libxml2 failed!
}
/* comment callback is not set to prevent recursion */
/* TODO: should we separate the key dictionaries? */
/* TODO: should we use the json object pointer? */
ret = cli_msxml_parse_document ( ctx , reader , mhtml_comment_keys , num_mhtml_comment_keys , MSXML_FLAG_JSON , NULL ) ;
xmlTextReaderClose ( reader ) ;
xmlFreeTextReader ( reader ) ;
if ( ret ! = CL_SUCCESS )
return ret ;
}
# else
UNUSEDPARAM ( comment ) ;
UNUSEDPARAM ( ctx ) ;
UNUSEDPARAM ( wrkjobj ) ;
UNUSEDPARAM ( cbdata ) ;
cli_dbgmsg ( " in parseMHTMLComment \n " ) ;
cli_dbgmsg ( " parseMHTMLComment: parsing html xml-comments requires libxml2! \n " ) ;
# endif
return ret ;
}
/*
* The related multipart root HTML file parsing wrapper .
*
* Attempts to leverage msxml parser , cannot operate without LIBXML2 .
* This function is only used for Preclassification JSON .
*/
static mbox_status
parseRootMHTML ( mbox_ctx * mctx , message * m , text * t )
{
cli_ctx * ctx = mctx - > ctx ;
# if HAVE_LIBXML2
# ifdef LIBXML_HTML_ENABLED
struct msxml_ctx mxctx ;
blob * input = NULL ;
htmlDocPtr htmlDoc ;
xmlTextReaderPtr reader ;
int ret = CL_SUCCESS ;
mbox_status rc = OK ;
# if HAVE_JSON
json_object * rhtml ;
# endif
cli_dbgmsg ( " in parseRootMHTML \n " ) ;
if ( ctx = = NULL )
return OK ;
if ( m = = NULL & & t = = NULL )
return OK ;
if ( m ! = NULL )
input = messageToBlob ( m , 0 ) ;
else /* t != NULL */
input = textToBlob ( t , NULL , 0 ) ;
if ( input = = NULL )
return OK ;
htmlDoc = htmlReadMemory ( ( char * ) input - > data , input - > len , " mhtml.html " , NULL , CLAMAV_MIN_XMLREADER_FLAGS | HTML_PARSE_NOWARNING ) ;
if ( htmlDoc = = NULL ) {
cli_dbgmsg ( " parseRootMHTML: cannot initialize read html document \n " ) ;
# if HAVE_JSON
if ( ctx - > wrkproperty ! = NULL )
ret = cli_json_parse_error ( ctx - > wrkproperty , " MHTML_ERROR_HTML_READ " ) ;
if ( ret ! = CL_SUCCESS )
rc = FAIL ;
# endif
blobDestroy ( input ) ;
return rc ;
}
# if HAVE_JSON
if ( mctx - > wrkobj ) {
rhtml = cli_jsonobj ( mctx - > wrkobj , " RootHTML " ) ;
if ( rhtml ! = NULL ) {
/* MHTML-specific properties */
cli_jsonstr ( rhtml , " Encoding " , ( const char * ) htmlGetMetaEncoding ( htmlDoc ) ) ;
cli_jsonint ( rhtml , " CompressMode " , xmlGetDocCompressMode ( htmlDoc ) ) ;
}
}
# endif
reader = xmlReaderWalker ( htmlDoc ) ;
if ( reader = = NULL ) {
cli_dbgmsg ( " parseRootMHTML: cannot initialize xmlTextReader \n " ) ;
# if HAVE_JSON
if ( ctx - > wrkproperty ! = NULL )
ret = cli_json_parse_error ( ctx - > wrkproperty , " MHTML_ERROR_XML_READER_IO " ) ;
if ( ret ! = CL_SUCCESS )
rc = FAIL ;
# endif
blobDestroy ( input ) ;
return rc ;
}
memset ( & mxctx , 0 , sizeof ( mxctx ) ) ;
/* no scanning callback set */
mxctx . comment_cb = parseMHTMLComment ;
ret = cli_msxml_parse_document ( ctx , reader , mhtml_keys , num_mhtml_keys , MSXML_FLAG_JSON | MSXML_FLAG_WALK , & mxctx ) ;
switch ( ret ) {
case CL_SUCCESS :
case CL_ETIMEOUT :
case CL_BREAK :
rc = OK ;
break ;
case CL_EMAXREC :
rc = MAXREC ;
break ;
case CL_EMAXFILES :
rc = MAXFILES ;
break ;
case CL_VIRUS :
rc = VIRUS ;
break ;
default :
rc = FAIL ;
}
xmlTextReaderClose ( reader ) ;
xmlFreeTextReader ( reader ) ;
xmlFreeDoc ( htmlDoc ) ;
blobDestroy ( input ) ;
return rc ;
# else /* LIBXML_HTML_ENABLED */
UNUSEDPARAM ( m ) ;
UNUSEDPARAM ( t ) ;
cli_dbgmsg ( " in parseRootMHTML \n " ) ;
cli_dbgmsg ( " parseRootMHTML: parsing html documents disabled in libxml2! \n " ) ;
# endif /* LIBXML_HTML_ENABLED */
# else /* HAVE_LIBXML2 */
UNUSEDPARAM ( m ) ;
UNUSEDPARAM ( t ) ;
cli_dbgmsg ( " in parseRootMHTML \n " ) ;
cli_dbgmsg ( " parseRootMHTML: parsing html documents requires libxml2! \n " ) ;
return OK ;
# endif /* HAVE_LIBXML2 */
}
/*
* This is a recursive routine .
*
* This function parses the body of mainMessage and saves its attachments in dir
*
* mainMessage is the buffer to be parsed , it contains an e - mail ' s body , without
* any headers . First time of calling it ' ll be
* the whole message . Later it ' ll be parts of a multipart message
* textIn is the plain text message being built up so far
*/
static mbox_status
parseEmailBody ( message * messageIn , text * textIn , mbox_ctx * mctx , unsigned int recursion_level )
{
mbox_status rc ;
text * aText = textIn ;
message * mainMessage = messageIn ;
fileblob * fb ;
bool infected = false ;
const struct cl_engine * engine = mctx - > ctx - > engine ;
const int doPhishingScan = engine - > dboptions & CL_DB_PHISHING_URLS & & ( DCONF_PHISHING & PHISHING_CONF_ENGINE ) ;
# if HAVE_JSON
json_object * saveobj = mctx - > wrkobj ;
# endif
bool heuristicFound = false ;
cli_dbgmsg ( " in parseEmailBody, %u files saved so far \n " ,
mctx - > files ) ;
/* FIXMELIMITS: this should be better integrated */
if ( engine - > max_recursion_level )
/*
* This is approximate
*/
if ( recursion_level > engine - > max_recursion_level ) {
// Note: engine->max_recursion_level is re-purposed here out of convenience.
// ole2 recursion does not leverage the ctx->recursion_stack stack.
cli_dbgmsg ( " parseEmailBody: hit maximum recursion level (%u) \n " , recursion_level ) ;
return MAXREC ;
}
if ( engine - > maxfiles & & ( mctx - > files > = engine - > maxfiles ) ) {
/*
* FIXME : This is only approx - it may have already
* been exceeded
*/
cli_dbgmsg ( " parseEmailBody: number of files exceeded %u \n " , engine - > maxfiles ) ;
return MAXFILES ;
}
rc = OK ;
/* Anything left to be parsed? */
if ( mainMessage & & ( messageGetBody ( mainMessage ) ! = NULL ) ) {
mime_type mimeType ;
int subtype , inhead , htmltextPart , inMimeHead , i ;
const char * mimeSubtype ;
char * boundary ;
const text * t_line ;
/*bool isAlternative;*/
message * aMessage ;
int multiparts = 0 ;
message * * messages = NULL ; /* parts of a multipart message */
cli_dbgmsg ( " Parsing mail file \n " ) ;
mimeType = messageGetMimeType ( mainMessage ) ;
mimeSubtype = messageGetMimeSubtype ( mainMessage ) ;
# if HAVE_JSON
if ( mctx - > wrkobj ! = NULL ) {
mctx - > wrkobj = cli_jsonobj ( mctx - > wrkobj , " Body " ) ;
cli_jsonstr ( mctx - > wrkobj , " MimeType " , getMimeTypeStr ( mimeType ) ) ;
cli_jsonstr ( mctx - > wrkobj , " MimeSubtype " , mimeSubtype ) ;
cli_jsonstr ( mctx - > wrkobj , " EncodingType " , getEncTypeStr ( messageGetEncoding ( mainMessage ) ) ) ;
cli_jsonstr ( mctx - > wrkobj , " Disposition " , messageGetDispositionType ( mainMessage ) ) ;
if ( messageHasFilename ( mainMessage ) ) {
char * filename = messageGetFilename ( mainMessage ) ;
cli_jsonstr ( mctx - > wrkobj , " Filename " , filename ) ;
free ( filename ) ;
} else {
cli_jsonstr ( mctx - > wrkobj , " Filename " , " (inline) " ) ;
}
}
# endif
/* pre-process */
subtype = tableFind ( mctx - > subtypeTable , mimeSubtype ) ;
if ( ( mimeType = = TEXT ) & & ( subtype = = PLAIN ) ) {
/*
* This is effectively no encoding , notice that we
* don ' t check that charset is us - ascii
*/
cli_dbgmsg ( " text/plain: Assume no attachments \n " ) ;
mimeType = NOMIME ;
messageSetMimeSubtype ( mainMessage , " " ) ;
} else if ( ( mimeType = = MESSAGE ) & &
( strcasecmp ( mimeSubtype , " rfc822-headers " ) = = 0 ) ) {
/*
* RFC1892 / RFC3462 : section 2 text / rfc822 - headers
* incorrectly sent as message / rfc822 - headers
*
* Parse as text / plain , i . e . no mime
*/
cli_dbgmsg ( " Changing message/rfc822-headers to text/rfc822-headers \n " ) ;
mimeType = NOMIME ;
messageSetMimeSubtype ( mainMessage , " " ) ;
} else
cli_dbgmsg ( " mimeType = %d \n " , ( int ) mimeType ) ;
switch ( mimeType ) {
case NOMIME :
cli_dbgmsg ( " Not a mime encoded message \n " ) ;
aText = textAddMessage ( aText , mainMessage ) ;
if ( ! doPhishingScan ) {
break ;
}
/*
* Fall through : some phishing mails claim they are
* text / plain , when they are in fact html
*/
/* fall through */
case TEXT :
/* text/plain has been preprocessed as no encoding */
if ( doPhishingScan ) {
/*
* It would be better to save and scan the
* file and only checkURLs if it ' s found to be
* clean
*/
checkURLs ( mainMessage , mctx , & rc , ( subtype = = HTML ) ) ;
/*
* There might be html sent without subtype
* html too , so scan them for phishing
*/
if ( rc = = VIRUS )
infected = true ;
}
break ;
case MULTIPART :
cli_dbgmsg ( " Content-type 'multipart' handler \n " ) ;
boundary = messageFindArgument ( mainMessage , " boundary " ) ;
# if HAVE_JSON
if ( mctx - > wrkobj ! = NULL )
cli_jsonstr ( mctx - > wrkobj , " Boundary " , boundary ) ;
# endif
if ( boundary = = NULL ) {
cli_dbgmsg ( " Multipart/%s MIME message contains no boundary header \n " ,
mimeSubtype ) ;
/* Broken e-mail message */
mimeType = NOMIME ;
/*
* The break means that we will still
* check if the file contains a uuencoded file
*/
break ;
}
cli_chomp ( boundary ) ;
/* Perhaps it should assume mixed? */
if ( mimeSubtype [ 0 ] = = ' \0 ' ) {
cli_dbgmsg ( " Multipart has no subtype assuming alternative \n " ) ;
mimeSubtype = " alternative " ;
messageSetMimeSubtype ( mainMessage , " alternative " ) ;
}
/*
* Get to the start of the first message
*/
t_line = messageGetBody ( mainMessage ) ;
if ( t_line = = NULL ) {
cli_dbgmsg ( " Multipart MIME message has no body \n " ) ;
free ( ( char * ) boundary ) ;
mimeType = NOMIME ;
break ;
}
do
if ( t_line - > t_line ) {
if ( boundaryStart ( lineGetData ( t_line - > t_line ) , boundary ) )
break ;
/*
* Found a binhex file before
* the first multipart
* TODO : check yEnc
*/
if ( binhexBegin ( mainMessage ) = = t_line ) {
if ( exportBinhexMessage ( mctx , mainMessage ) ) {
/* virus found */
rc = VIRUS ;
infected = true ;
break ;
}
} else if ( t_line - > t_next & &
( encodingLine ( mainMessage ) = = t_line - > t_next ) ) {
/*
* We look for the next line
* since later on we ' ll skip
* over the important line when
* we think it ' s a blank line
* at the top of the message -
* which it would have been in
* an RFC compliant world
*/
cli_dbgmsg ( " Found MIME attachment before the first MIME section \" %s \" \n " ,
lineGetData ( t_line - > t_next - > t_line ) ) ;
if ( messageGetEncoding ( mainMessage ) = = NOENCODING )
break ;
}
}
while ( ( t_line = t_line - > t_next ) ! = NULL ) ;
if ( t_line = = NULL ) {
cli_dbgmsg ( " Multipart MIME message contains no boundary lines (%s) \n " ,
boundary ) ;
free ( ( char * ) boundary ) ;
mimeType = NOMIME ;
/*
* The break means that we will still
* check if the file contains a yEnc / binhex file
*/
break ;
}
/*
* Build up a table of all of the parts of this
* multipart message . Remember , each part may itself
* be a multipart message .
*/
inhead = 1 ;
inMimeHead = 0 ;
/*
* Re - read this variable in case mimeSubtype has changed
*/
subtype = tableFind ( mctx - > subtypeTable , mimeSubtype ) ;
/*
* Parse the mainMessage object and create an array
* of objects called messages , one for each of the
* multiparts that mainMessage contains .
*
* This looks like parseEmailHeaders ( ) - maybe there ' s
* some duplication of code to be cleaned up
*
* We may need to create an array rather than just
* save each part as it is found because not all
* elements will need scanning , and we don ' t yet know
* which of those elements it will be , except in
* the case of mixed , when all parts need to be scanned .
*/
for ( multiparts = 0 ; t_line & & ! infected ; multiparts + + ) {
int lines = 0 ;
message * * m ;
mbox_status old_rc ;
m = cli_realloc ( messages , ( ( multiparts + 1 ) * sizeof ( message * ) ) ) ;
if ( m = = NULL )
break ;
messages = m ;
aMessage = messages [ multiparts ] = messageCreate ( ) ;
if ( aMessage = = NULL ) {
multiparts - - ;
/* if allocation failed the first time,
* there ' s no point in retrying , just
* break out */
break ;
}
messageSetCTX ( aMessage , mctx - > ctx ) ;
cli_dbgmsg ( " Now read in part %d \n " , multiparts ) ;
/*
* Ignore blank lines . There shouldn ' t be ANY
* but some viruses insert them
*/
while ( ( t_line = t_line - > t_next ) ! = NULL )
if ( t_line - > t_line & &
/*(cli_chomp(t_line->t_text) > 0))*/
( strlen ( lineGetData ( t_line - > t_line ) ) > 0 ) )
break ;
if ( t_line = = NULL ) {
cli_dbgmsg ( " Empty part \n " ) ;
/*
* Remove this part unless there ' s
* a binhex portion somewhere in
* the complete message that we may
* throw away by mistake if the MIME
* encoding information is incorrect
*/
if ( mainMessage & &
( binhexBegin ( mainMessage ) = = NULL ) ) {
messageDestroy ( aMessage ) ;
- - multiparts ;
}
continue ;
}
do {
const char * line = lineGetData ( t_line - > t_line ) ;
/*
cli_dbgmsg ( " multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s' \n " ,
multiparts , inMimeHead , inhead , boundary , line ,
t_line - > t_next & & t_line - > t_next - > t_line ? lineGetData ( t_line - > t_next - > t_line ) : " (null) " ) ;
*/
if ( inMimeHead ) { /* continuation line */
if ( line = = NULL ) {
/*inhead =*/ inMimeHead = 0 ;
continue ;
}
/*
* Handle continuation lines
* because the previous line
* ended with a ; or this line
* starts with a white space
*/
cli_dbgmsg ( " Multipart %d: About to add mime Argument '%s' \n " ,
multiparts , line ) ;
/*
* Handle the case when it
* isn ' t really a continuation
* line :
* Content - Type : application / octet - stream ;
* Content - Transfer - Encoding : base64
*/
parseEmailHeader ( aMessage , line , mctx - > rfc821Table , mctx - > ctx , & heuristicFound ) ;
if ( heuristicFound ) {
rc = VIRUS ;
break ;
}
while ( isspace ( ( int ) * line ) )
line + + ;
if ( * line = = ' \0 ' ) {
inhead = inMimeHead = 0 ;
continue ;
}
inMimeHead = false ;
messageAddArgument ( aMessage , line ) ;
} else if ( inhead ) { /* handling normal headers */
/*int quotes;*/
char * fullline , * ptr ;
if ( line = = NULL ) {
/*
* empty line , should the end of the headers ,
* but some base64 decoders , e . g . uudeview , are broken
* and will handle this type of entry , decoding the
* base64 content . . .
* Content - Type : application / octet - stream ; name = text . zip
* Content - Transfer - Encoding : base64
* Content - Disposition : attachment ; filename = " text.zip "
*
* Content - Disposition : attachment ;
* filename = text . zip
* Content - Type : application / octet - stream ;
* name = text . zip
* Content - Transfer - Encoding : base64
*
* UEsDBAoAAAAAAACgPjJ2RHw676gAAO + oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
*/
const text * next = t_line - > t_next ;
if ( next & & next - > t_line ) {
const char * data = lineGetData ( next - > t_line ) ;
if ( ( messageGetEncoding ( aMessage ) = = NOENCODING ) & &
( messageGetMimeType ( aMessage ) = = APPLICATION ) & &
data & & strstr ( data , " base64 " ) ) {
/*
* Handle this nightmare ( note the blank
* line in the header and the incorrect
* content - transfer - encoding header )
*
* Content - Type : application / octet - stream ; name = " zipped_files.EXEX-Spanska: Yes
*
* r - Encoding : base64
* Content - Disposition : attachment ; filename = " zipped_files.EXE "
*/
messageSetEncoding ( aMessage , " base64 " ) ;
cli_dbgmsg ( " Ignoring fake end of headers \n " ) ;
continue ;
}
if ( ( strncmp ( data , " Content " , 7 ) = = 0 ) | |
( strncmp ( data , " filename= " , 9 ) = = 0 ) ) {
cli_dbgmsg ( " Ignoring fake end of headers \n " ) ;
continue ;
}
}
cli_dbgmsg ( " Multipart %d: End of header information \n " ,
multiparts ) ;
inhead = 0 ;
continue ;
}
if ( isspace ( ( int ) * line ) ) {
/*
* The first line is
* continuation line .
* This is tricky
* to handle , but
* all we can do is our
* best
*/
cli_dbgmsg ( " Part %d starts with a continuation line \n " ,
multiparts ) ;
messageAddArgument ( aMessage , line ) ;
/*
* Give it a default
* MIME type since
* that may be the
* missing line
*
* Choose application to
* force a save
*/
if ( messageGetMimeType ( aMessage ) = = NOMIME )
messageSetMimeType ( aMessage , " application " ) ;
continue ;
}
inMimeHead = false ;
if ( strlen ( line ) > RFC2821LENGTH ) {
cli_dbgmsg ( " parseEmailBody: line length exceds RFC2821 maximum length (1000) \n " ) ;
// We must skip this line because functions like rfc822comments() may accept output buffers
// that [RFC2821LENGTH + 1] in and don't have any length checks to prevent exceeding that max.
// E.g. See `boundaryStart()`.
// TODO: A larger audit would be needed to remove this limitation, though frankly I recommend
// fully re-writing the email parser (in Rust).
continue ;
}
fullline = rfc822comments ( line , NULL ) ;
if ( fullline = = NULL )
fullline = cli_strdup ( line ) ;
/*quotes = count_quotes(fullline);*/
/*
* Fold next lines to the end of this
* if they start with a white space
* or if this line has an odd number of quotes :
* Content - Type : application / octet - stream ; name = " foo
* "
*/
while ( t_line & & next_is_folded_header ( t_line ) ) {
const char * data ;
size_t datasz ;
t_line = t_line - > t_next ;
data = lineGetData ( t_line - > t_line ) ;
if ( data [ 1 ] = = ' \0 ' ) {
/*
* Broken message : the
* blank line at the end
* of the headers isn ' t blank -
* it contains a space
*/
cli_dbgmsg ( " Multipart %d: headers not terminated by blank line \n " ,
multiparts ) ;
inhead = false ;
break ;
}
datasz = strlen ( fullline ) + strlen ( data ) + 1 ;
ptr = cli_realloc ( fullline , datasz ) ;
if ( ptr = = NULL )
break ;
fullline = ptr ;
cli_strlcat ( fullline , data , datasz ) ;
/*quotes = count_quotes(data);*/
}
cli_dbgmsg ( " Multipart %d: About to parse folded header '%s' \n " ,
multiparts , fullline ) ;
parseEmailHeader ( aMessage , fullline , mctx - > rfc821Table , mctx - > ctx , & heuristicFound ) ;
free ( fullline ) ;
if ( heuristicFound ) {
rc = VIRUS ;
}
} else if ( boundaryEnd ( line , boundary ) ) {
/*
* Some viruses put information
* * after * the end of message ,
* which presumably some broken
* mail clients find , so we
* can ' t assume that this
* is the end of the message
*/
/* t_line = NULL;*/
break ;
} else if ( boundaryStart ( line , boundary ) ) {
inhead = 1 ;
break ;
} else {
if ( messageAddLine ( aMessage , t_line - > t_line ) < 0 )
break ;
lines + + ;
}
} while ( ( t_line = t_line - > t_next ) ! = NULL ) ;
cli_dbgmsg ( " Part %d has %d lines, rc = %d \n " ,
multiparts , lines , ( int ) rc ) ;
/*
* Only save in the array of messages if some
* decision will be taken on whether to scan .
* If all parts will be scanned then save to
* file straight away
*/
switch ( subtype ) {
case MIXED :
case ALTERNATIVE :
case REPORT :
case DIGEST :
case APPLEDOUBLE :
case KNOWBOT :
case - 1 :
old_rc = rc ;
mainMessage = do_multipart ( mainMessage ,
messages , multiparts ,
& rc , mctx , messageIn ,
& aText , recursion_level ) ;
if ( ( rc = = OK_ATTACHMENTS_NOT_SAVED ) & & ( old_rc = = OK ) )
rc = OK ;
if ( messages [ multiparts ] ) {
messageDestroy ( messages [ multiparts ] ) ;
messages [ multiparts ] = NULL ;
}
- - multiparts ;
if ( rc = = VIRUS )
infected = true ;
break ;
case RELATED :
case ENCRYPTED :
case SIGNED :
case PARALLEL :
/* all the subtypes that we handle
* ( all from the switch ( tableFind . . . ) below )
* must be listed here */
break ;
default :
/* this is a subtype that we
* don ' t handle anyway ,
* don ' t store */
if ( messages [ multiparts ] ) {
messageDestroy ( messages [ multiparts ] ) ;
messages [ multiparts ] = NULL ;
}
- - multiparts ;
}
}
free ( ( char * ) boundary ) ;
if ( haveTooManyMIMEPartsPerMessage ( multiparts , mctx - > ctx , & rc ) ) {
if ( messages ) {
for ( i = 0 ; i < multiparts ; i + + ) {
if ( messages [ i ] )
messageDestroy ( messages [ i ] ) ;
}
free ( messages ) ;
messages = NULL ;
}
break ;
}
/*
* Preprocess . Anything special to be done before
* we handle the multiparts ?
*/
switch ( subtype ) {
case KNOWBOT :
/* TODO */
cli_dbgmsg ( " multipart/knowbot parsed as multipart/mixed for now \n " ) ;
mimeSubtype = " mixed " ;
break ;
case - 1 :
/*
* According to section 7.2 .6 of
* RFC1521 , unrecognized multiparts
* should be treated as multipart / mixed .
*/
cli_dbgmsg ( " Unsupported multipart format `%s', parsed as mixed \n " , mimeSubtype ) ;
mimeSubtype = " mixed " ;
break ;
}
/*
* We ' ve finished message we ' re parsing
*/
if ( mainMessage & & ( mainMessage ! = messageIn ) ) {
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
}
cli_dbgmsg ( " The message has %d parts \n " , multiparts ) ;
if ( infected | | ( ( multiparts = = 0 ) & & ( aText = = NULL ) ) ) {
if ( messages ) {
for ( i = 0 ; i < multiparts ; i + + ) {
if ( messages [ i ] )
messageDestroy ( messages [ i ] ) ;
}
free ( messages ) ;
messages = NULL ;
}
if ( aText & & ( textIn = = NULL ) )
textDestroy ( aText ) ;
# if HAVE_JSON
mctx - > wrkobj = saveobj ;
# endif
/*
* Nothing to do
*/
switch ( rc ) {
case VIRUS :
return VIRUS ;
case MAXREC :
return MAXREC ;
default :
return OK_ATTACHMENTS_NOT_SAVED ;
}
}
cli_dbgmsg ( " Find out the multipart type (%s) \n " , mimeSubtype ) ;
/*
* We now have all the parts of the multipart message
* in the messages array :
* message * messages [ multiparts ]
* Let ' s decide what to do with them all
*/
switch ( tableFind ( mctx - > subtypeTable , mimeSubtype ) ) {
case RELATED :
cli_dbgmsg ( " Multipart related handler \n " ) ;
/*
* Have a look to see if there ' s HTML code
* which will need scanning
*/
// It's okay if multiparts == 0
htmltextPart = getTextPart ( messages , multiparts ) ;
if ( htmltextPart > = 0 & & messages ) {
if ( messageGetBody ( messages [ htmltextPart ] ) ) {
aText = textAddMessage ( aText , messages [ htmltextPart ] ) ;
}
} else {
/*
* There isn ' t an HTML bit . If there ' s a
* multipart bit , it ' ll may be in there
* somewhere
*/
for ( i = 0 ; i < multiparts ; i + + ) {
if ( messageGetMimeType ( messages [ i ] ) = = MULTIPART ) {
htmltextPart = i ;
break ;
}
}
}
if ( htmltextPart = = - 1 ) {
cli_dbgmsg ( " No HTML code found to be scanned \n " ) ;
} else {
# if HAVE_JSON
/* Send root HTML file for preclassification */
if ( mctx - > ctx - > wrkproperty )
( void ) parseRootMHTML ( mctx , messages [ htmltextPart ] , aText ) ;
# endif
rc = parseEmailBody ( messages [ htmltextPart ] , aText , mctx , recursion_level + 1 ) ;
if ( ( rc = = OK ) & & messages [ htmltextPart ] ) {
messageDestroy ( messages [ htmltextPart ] ) ;
messages [ htmltextPart ] = NULL ;
} else if ( rc = = VIRUS ) {
infected = true ;
break ;
}
}
/*
* The message is confused about the difference
* between alternative and related . Badtrans . B
* suffers from this problem .
*
* Fall through in this case :
* Content - Type : multipart / related ;
* type = " multipart/alternative "
*/
/* fall through */
case DIGEST :
/*
* According to section 5.1 .5 RFC2046 , the
* default mime type of multipart / digest parts
* is message / rfc822
*
* We consider them as alternative , wrong in
* the strictest sense since they aren ' t
* alternatives - all parts a valid - but it ' s
* OK for our needs since it means each part
* will be scanned
*/
case ALTERNATIVE :
cli_dbgmsg ( " Multipart alternative handler \n " ) ;
/*
* Fall through - some clients are broken and
* say alternative instead of mixed . The Klez
* virus is broken that way , and anyway we
* wish to scan all of the alternatives
*/
/* fall through */
case REPORT :
/*
* According to section 1 of RFC1892 , the
* syntax of multipart / report is the same
* as multipart / mixed . There are some required
* parameters , but there ' s no need for us to
* verify that they exist
*/
case ENCRYPTED :
/* MUAs without encryption plugins can display as multipart/mixed,
* just scan it */
case MIXED :
case APPLEDOUBLE : /* not really supported */
/*
* Look for attachments
*
* Not all formats are supported . If an
* unsupported format turns out to be
* common enough to implement , it is a simple
* matter to add it
*/
if ( aText ) {
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
}
cli_dbgmsg ( " Mixed message with %d parts \n " , multiparts ) ;
for ( i = 0 ; i < multiparts ; i + + ) {
mainMessage = do_multipart ( mainMessage ,
messages , i , & rc , mctx ,
messageIn , & aText , recursion_level + 1 ) ;
if ( rc = = VIRUS ) {
infected = true ;
break ;
}
if ( rc = = MAXREC )
break ;
if ( rc = = OK_ATTACHMENTS_NOT_SAVED )
rc = OK ;
}
/* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */
break ;
case SIGNED :
case PARALLEL :
/*
* If we ' re here it could be because we have a
* multipart / mixed message , consisting of a
* message followed by an attachment . That
* message itself is a multipart / alternative
* message and we need to dig out the plain
* text part of that alternative
*/
if ( messages ) {
htmltextPart = getTextPart ( messages , multiparts ) ;
if ( htmltextPart = = - 1 )
htmltextPart = 0 ;
rc = parseEmailBody ( messages [ htmltextPart ] , aText , mctx , recursion_level + 1 ) ;
}
break ;
default :
2023-01-14 18:28:39 +08:00
cli_dbgmsg ( " Unepxected mime sub type \n " ) ;
rc = CL_EFORMAT ;
break ;
2022-10-22 18:41:00 +08:00
}
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
if ( aText & & ( textIn = = NULL ) ) {
if ( ( ! infected ) & & ( fb = fileblobCreate ( ) ) ! = NULL ) {
cli_dbgmsg ( " Save non mime and/or text/plain part \n " ) ;
fileblobSetFilename ( fb , mctx - > dir , " textpart " ) ;
/*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/
fileblobSetCTX ( fb , mctx - > ctx ) ;
( void ) textToFileblob ( aText , fb , 1 ) ;
fileblobDestroy ( fb ) ;
mctx - > files + + ;
}
textDestroy ( aText ) ;
}
if ( messages ) {
for ( i = 0 ; i < multiparts ; i + + ) {
if ( messages [ i ] )
messageDestroy ( messages [ i ] ) ;
}
free ( messages ) ;
messages = NULL ;
}
# if HAVE_JSON
mctx - > wrkobj = saveobj ;
# endif
return rc ;
case MESSAGE :
/*
* Check for forbidden encodings
*/
switch ( messageGetEncoding ( mainMessage ) ) {
case NOENCODING :
case EIGHTBIT :
case BINARY :
break ;
default :
cli_dbgmsg ( " MIME type 'message' cannot be decoded \n " ) ;
break ;
}
rc = FAIL ;
if ( ( strcasecmp ( mimeSubtype , " rfc822 " ) = = 0 ) | |
( strcasecmp ( mimeSubtype , " delivery-status " ) = = 0 ) ) {
message * m = parseEmailHeaders ( mainMessage , mctx - > rfc821Table , & heuristicFound ) ;
if ( m ) {
cli_dbgmsg ( " Decode rfc822 \n " ) ;
messageSetCTX ( m , mctx - > ctx ) ;
if ( mainMessage & & ( mainMessage ! = messageIn ) ) {
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
} else
messageReset ( mainMessage ) ;
if ( messageGetBody ( m ) )
rc = parseEmailBody ( m , NULL , mctx , recursion_level + 1 ) ;
messageDestroy ( m ) ;
} else if ( heuristicFound ) {
rc = VIRUS ;
}
break ;
} else if ( strcasecmp ( mimeSubtype , " disposition-notification " ) = = 0 ) {
/* RFC 2298 - handle like a normal email */
rc = OK ;
break ;
} else if ( strcasecmp ( mimeSubtype , " partial " ) = = 0 ) {
if ( mctx - > ctx - > options - > mail & CL_SCAN_MAIL_PARTIAL_MESSAGE ) {
/* RFC1341 message split over many emails */
if ( rfc1341 ( mctx , mainMessage ) > = 0 )
rc = OK ;
} else {
cli_warnmsg ( " Partial message received from MUA/MTA - message cannot be scanned \n " ) ;
}
} else if ( strcasecmp ( mimeSubtype , " external-body " ) = = 0 )
/* TODO */
cli_warnmsg ( " Attempt to send Content-type message/external-body trapped \n " ) ;
else
cli_warnmsg ( " Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net \n " , mimeSubtype ) ;
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
if ( messages ) {
for ( i = 0 ; i < multiparts ; i + + ) {
if ( messages [ i ] )
messageDestroy ( messages [ i ] ) ;
}
free ( messages ) ;
messages = NULL ;
}
# if HAVE_JSON
mctx - > wrkobj = saveobj ;
# endif
return rc ;
default :
cli_dbgmsg ( " Message received with unknown mime encoding - assume application \n " ) ;
/*
* Some Yahoo emails attach as
* Content - Type : X - unknown / unknown ;
* instead of
* Content - Type : application / unknown ;
* so let ' s try our best to salvage something
*/
/* fall through */
case APPLICATION :
/*cptr = messageGetMimeSubtype(mainMessage);
if ( ( strcasecmp ( cptr , " octet-stream " ) = = 0 ) | |
( strcasecmp ( cptr , " x-msdownload " ) = = 0 ) ) { */
{
fb = messageToFileblob ( mainMessage , mctx - > dir , 1 ) ;
if ( fb ) {
cli_dbgmsg ( " Saving main message as attachment \n " ) ;
if ( fileblobScanAndDestroy ( fb ) = = CL_VIRUS )
rc = VIRUS ;
mctx - > files + + ;
if ( mainMessage ! = messageIn ) {
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
} else
messageReset ( mainMessage ) ;
}
} /*else
cli_warnmsg ( " Discarded application not sent as attachment \n " ) ; */
break ;
case AUDIO :
case VIDEO :
case IMAGE :
break ;
}
if ( messages ) {
/* "can't happen" */
cli_warnmsg ( " messages != NULL \n " ) ;
for ( i = 0 ; i < multiparts ; i + + ) {
if ( messages [ i ] )
messageDestroy ( messages [ i ] ) ;
}
free ( messages ) ;
messages = NULL ;
}
}
if ( aText & & ( textIn = = NULL ) ) {
/* Look for a bounce in the text (non mime encoded) portion */
const text * t ;
/* isBounceStart() is expensive, reduce the number of calls */
bool lookahead_definately_is_bounce = false ;
for ( t = aText ; t & & ( rc ! = VIRUS ) ; t = t - > t_next ) {
const line_t * l = t - > t_line ;
const text * lookahead , * topofbounce ;
const char * s ;
bool inheader ;
if ( l = = NULL ) {
continue ;
}
if ( lookahead_definately_is_bounce )
lookahead_definately_is_bounce = false ;
else if ( ! isBounceStart ( mctx , lineGetData ( l ) ) )
continue ;
lookahead = t - > t_next ;
if ( lookahead ) {
if ( isBounceStart ( mctx , lineGetData ( lookahead - > t_line ) ) ) {
lookahead_definately_is_bounce = true ;
/* don't save worthless header lines */
continue ;
}
} else /* don't save a single liner */
break ;
/*
* We ' ve found what looks like the start of a bounce
* message . Only bother saving if it really is a bounce
* message , this helps to speed up scanning of ping - pong
* messages that have lots of bounces within bounces in
* them
*/
for ( ; lookahead ; lookahead = lookahead - > t_next ) {
l = lookahead - > t_line ;
if ( l = = NULL )
break ;
s = lineGetData ( l ) ;
if ( strncasecmp ( s , " Content-Type: " , 13 ) = = 0 ) {
/*
* Don ' t bother with text / plain or
* text / html
*/
if ( CLI_STRCASESTR ( s , " text/plain " ) ! = NULL )
/*
* Don ' t bother to save the
* unuseful part , read past
* the headers then we ' ll go
* on to look for the next
* bounce message
*/
continue ;
if ( ( ! doPhishingScan ) & &
( CLI_STRCASESTR ( s , " text/html " ) ! = NULL ) )
continue ;
break ;
}
}
if ( lookahead & & ( lookahead - > t_line = = NULL ) ) {
cli_dbgmsg ( " Non mime part bounce message is not mime encoded, so it will not be scanned \n " ) ;
t = lookahead ;
/* look for next bounce message */
continue ;
}
/*
* Prescan the bounce message to see if there ' s likely
* to be anything nasty .
* This algorithm is hand crafted and may be breakable
* so all submissions are welcome . It ' s best NOT to
* remove this however you may be tempted , because it
* significantly speeds up the scanning of multiple
* bounces ( i . e . bounces within many bounces )
*/
for ( ; lookahead ; lookahead = lookahead - > t_next ) {
l = lookahead - > t_line ;
if ( l ) {
s = lineGetData ( l ) ;
if ( ( strncasecmp ( s , " Content-Type: " , 13 ) = = 0 ) & &
( strstr ( s , " multipart/ " ) = = NULL ) & &
( strstr ( s , " message/rfc822 " ) = = NULL ) & &
( strstr ( s , " text/plain " ) = = NULL ) )
break ;
}
}
if ( lookahead = = NULL ) {
cli_dbgmsg ( " cli_mbox: I believe it's plain text which must be clean \n " ) ;
/* nothing here, move along please */
break ;
}
if ( ( fb = fileblobCreate ( ) ) = = NULL )
break ;
cli_dbgmsg ( " Save non mime part bounce message \n " ) ;
fileblobSetFilename ( fb , mctx - > dir , " bounce " ) ;
fileblobAddData ( fb , ( const unsigned char * ) " Received: by clamd (bounce) \n " , 28 ) ;
fileblobSetCTX ( fb , mctx - > ctx ) ;
inheader = true ;
topofbounce = NULL ;
do {
l = t - > t_line ;
if ( l = = NULL ) {
if ( inheader ) {
inheader = false ;
topofbounce = t ;
}
} else {
s = lineGetData ( l ) ;
fileblobAddData ( fb , ( const unsigned char * ) s , strlen ( s ) ) ;
}
fileblobAddData ( fb , ( const unsigned char * ) " \n " , 1 ) ;
lookahead = t - > t_next ;
if ( lookahead = = NULL )
break ;
t = lookahead ;
l = t - > t_line ;
if ( ( ! inheader ) & & l ) {
s = lineGetData ( l ) ;
if ( isBounceStart ( mctx , s ) ) {
cli_dbgmsg ( " Found the start of another bounce candidate (%s) \n " , s ) ;
lookahead_definately_is_bounce = true ;
break ;
}
}
} while ( ! fileblobInfected ( fb ) ) ;
if ( fileblobScanAndDestroy ( fb ) = = CL_VIRUS )
rc = VIRUS ;
mctx - > files + + ;
if ( topofbounce )
t = topofbounce ;
}
textDestroy ( aText ) ;
aText = NULL ;
}
/*
* No attachments - scan the text portions , often files
* are hidden in HTML code
*/
if ( mainMessage & & ( rc ! = VIRUS ) ) {
text * t_line ;
/*
* Look for uu - encoded main file
*/
if ( mainMessage - > body_first ! = NULL & &
( encodingLine ( mainMessage ) ! = NULL ) & &
( ( t_line = bounceBegin ( mainMessage ) ) ! = NULL ) )
rc = ( exportBounceMessage ( mctx , t_line ) = = CL_VIRUS ) ? VIRUS : OK ;
else {
bool saveIt ;
if ( messageGetMimeType ( mainMessage ) = = MESSAGE )
/*
* Quick peek , if the encapsulated
* message has no
* content encoding statement don ' t
* bother saving to scan , it ' s safe
*/
saveIt = ( bool ) ( encodingLine ( mainMessage ) ! = NULL ) ;
else if ( mainMessage - > body_last ! = NULL & & ( t_line = encodingLine ( mainMessage ) ) ! = NULL ) {
/*
* Some bounces include the message
* body without the headers .
* FIXME : Unfortunately this generates a
* lot of false positives that a bounce
* has been found when it hasn ' t .
*/
if ( ( fb = fileblobCreate ( ) ) ! = NULL ) {
cli_dbgmsg ( " Found a bounce message with no header at '%s' \n " ,
lineGetData ( t_line - > t_line ) ) ;
fileblobSetFilename ( fb , mctx - > dir , " bounce " ) ;
fileblobAddData ( fb ,
( const unsigned char * ) " Received: by clamd (bounce) \n " ,
28 ) ;
fileblobSetCTX ( fb , mctx - > ctx ) ;
if ( fileblobScanAndDestroy ( textToFileblob ( t_line , fb , 1 ) ) = = CL_VIRUS )
rc = VIRUS ;
mctx - > files + + ;
}
saveIt = false ;
} else
/*
* Save the entire text portion ,
* since it it may be an HTML file with
* a JavaScript virus or a phish
*/
saveIt = true ;
if ( saveIt ) {
cli_dbgmsg ( " Saving text part to scan, rc = %d \n " ,
( int ) rc ) ;
if ( saveTextPart ( mctx , mainMessage , 1 ) = = CL_VIRUS )
rc = VIRUS ;
if ( mainMessage ! = messageIn ) {
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
} else
messageReset ( mainMessage ) ;
}
}
} /*else
rc = OK_ATTACHMENTS_NOT_SAVED ; */
/* nothing saved */
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
if ( ( rc ! = FAIL ) & & infected )
rc = VIRUS ;
# if HAVE_JSON
mctx - > wrkobj = saveobj ;
# endif
cli_dbgmsg ( " parseEmailBody() returning %d \n " , ( int ) rc ) ;
return rc ;
}
/*
* Is the current line the start of a new section ?
*
* New sections start with - - boundary
*/
static int
boundaryStart ( const char * line , const char * boundary )
{
const char * ptr ;
char * out ;
int rc ;
char buf [ RFC2821LENGTH + 1 ] ;
char * newline ;
if ( line = = NULL | | * line = = ' \0 ' )
return 0 ; /* empty line */
if ( boundary = = NULL )
return 0 ;
newline = strdup ( line ) ;
if ( ! ( newline ) )
newline = ( char * ) line ;
if ( newline ! = line & & strlen ( line ) ) {
char * p ;
/* Trim trailing spaces */
p = newline + strlen ( line ) - 1 ;
while ( p > = newline & & * p = = ' ' )
* ( p - - ) = ' \0 ' ;
}
if ( newline ! = line )
cli_chomp ( newline ) ;
/* cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary); */
if ( ( * newline ! = ' - ' ) & & ( * newline ! = ' ( ' ) ) {
if ( newline ! = line )
free ( newline ) ;
return 0 ;
}
if ( strchr ( newline , ' - ' ) = = NULL ) {
if ( newline ! = line )
free ( newline ) ;
return 0 ;
}
if ( strlen ( newline ) < = sizeof ( buf ) ) {
out = NULL ;
ptr = rfc822comments ( newline , buf ) ;
} else
ptr = out = rfc822comments ( newline , NULL ) ;
if ( ptr = = NULL )
ptr = newline ;
if ( ( * ptr + + ! = ' - ' ) | | ( * ptr = = ' \0 ' ) ) {
if ( out )
free ( out ) ;
if ( newline ! = line )
free ( newline ) ;
return 0 ;
}
/*
* Gibe . B3 is broken , it has :
* boundary = " ---- =_NextPart_000_01C31177.9DC7C000 "
* but it ' s boundaries look like
* - - - - - - = _NextPart_000_01C31177 .9 DC7C000
* notice the one too few ' - ' .
* Presumably this is a deliberate exploitation of a bug in some mail
* clients .
*
* The trouble is that this creates a lot of false positives for
* boundary conditions , if we ' re too lax about matches . We do our level
* best to avoid these false positives . For example if we have
* boundary = " 1 " we want to ensure that we don ' t break out of every line
* that has - 1 in it instead of starting - - 1. This needs some more work .
*
* Look with and without RFC822 comments stripped , I ' ve seen some
* samples where ( ) are taken as comments in boundaries and some where
* they ' re not . Irrespective of whatever RFC2822 says , we need to find
* viruses in both types of mails .
*/
if ( ( strstr ( & ptr [ 1 ] , boundary ) ! = NULL ) | | ( strstr ( newline , boundary ) ! = NULL ) ) {
const char * k = ptr ;
/*
* We need to ensure that we don ' t match - - 11 = - = - = 11 when
* looking for - - 1 = - = - = 1 in well behaved headers , that ' s a
* false positive problem mentioned above
*/
rc = 0 ;
do
if ( strcmp ( + + k , boundary ) = = 0 ) {
rc = 1 ;
break ;
}
while ( * k = = ' - ' ) ;
if ( rc = = 0 ) {
k = & line [ 1 ] ;
do
if ( strcmp ( + + k , boundary ) = = 0 ) {
rc = 1 ;
break ;
}
while ( * k = = ' - ' ) ;
}
} else if ( * ptr + + ! = ' - ' )
rc = 0 ;
else
rc = ( strcasecmp ( ptr , boundary ) = = 0 ) ;
if ( out )
free ( out ) ;
if ( rc = = 1 )
cli_dbgmsg ( " boundaryStart: found %s in %s \n " , boundary , line ) ;
if ( newline ! = line )
free ( newline ) ;
return rc ;
}
/*
* Is the current line the end ?
*
* The message ends with with - - boundary - -
*/
static int
boundaryEnd ( const char * line , const char * boundary )
{
size_t len ;
char * newline , * p , * p2 ;
if ( line = = NULL | | * line = = ' \0 ' )
return 0 ;
p = newline = strdup ( line ) ;
if ( ! ( newline ) ) {
p = ( char * ) line ;
newline = ( char * ) line ;
}
if ( newline ! = line & & strlen ( line ) ) {
/* Trim trailing spaces */
p2 = newline + strlen ( line ) - 1 ;
while ( p2 > = newline & & * p2 = = ' ' )
* ( p2 - - ) = ' \0 ' ;
}
/* cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", newline, boundary); */
if ( * p + + ! = ' - ' ) {
if ( newline ! = line )
free ( newline ) ;
return 0 ;
}
if ( * p + + ! = ' - ' ) {
if ( newline ! = line )
free ( newline ) ;
return 0 ;
}
len = strlen ( boundary ) ;
if ( strncasecmp ( p , boundary , len ) ! = 0 ) {
if ( newline ! = line )
free ( newline ) ;
return 0 ;
}
/*
* Use < rather than = = because some broken mails have white
* space after the boundary
*/
if ( strlen ( p ) < ( len + 2 ) ) {
if ( newline ! = line )
free ( newline ) ;
return 0 ;
}
p = & p [ len ] ;
if ( * p + + ! = ' - ' ) {
if ( newline ! = line )
free ( newline ) ;
return 0 ;
}
if ( * p = = ' - ' ) {
/* cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, p); */
if ( newline ! = line )
free ( newline ) ;
return 1 ;
}
if ( newline ! = line )
free ( newline ) ;
return 0 ;
}
/*
* Initialise the various lookup tables
*
* Only initializes the tables if not already initialized .
*/
static int
initialiseTables ( table_t * * rfc821Table , table_t * * subtypeTable )
{
const struct tableinit * tableinit ;
/*
* Initialise the various look up tables
*/
if ( NULL = = * rfc821Table ) {
* rfc821Table = tableCreate ( ) ;
if ( * rfc821Table = = NULL ) {
return - 1 ;
}
for ( tableinit = rfc821headers ; tableinit - > key ; tableinit + + ) {
if ( tableInsert ( * rfc821Table , tableinit - > key , tableinit - > value ) < 0 ) {
tableDestroy ( * rfc821Table ) ;
* rfc821Table = NULL ;
return - 1 ;
}
}
}
if ( NULL = = * subtypeTable ) {
* subtypeTable = tableCreate ( ) ;
if ( * subtypeTable = = NULL ) {
tableDestroy ( * rfc821Table ) ;
* rfc821Table = NULL ;
return - 1 ;
}
for ( tableinit = mimeSubtypes ; tableinit - > key ; tableinit + + ) {
if ( tableInsert ( * subtypeTable , tableinit - > key , tableinit - > value ) < 0 ) {
tableDestroy ( * rfc821Table ) ;
tableDestroy ( * subtypeTable ) ;
* rfc821Table = NULL ;
* subtypeTable = NULL ;
return - 1 ;
}
}
}
return 0 ;
}
/*
* If there ' s a HTML text version use that , otherwise
* use the first text part , otherwise just use the
* first one around . HTML text is most likely to include
* a scripting worm
*
* If we can ' t find one , return - 1
*/
static int
getTextPart ( message * const messages [ ] , size_t size )
{
size_t i ;
int textpart = - 1 ;
for ( i = 0 ; i < size ; i + + )
if ( messages [ i ] & & ( messageGetMimeType ( messages [ i ] ) = = TEXT ) ) {
if ( strcasecmp ( messageGetMimeSubtype ( messages [ i ] ) , " html " ) = = 0 )
return ( int ) i ;
textpart = ( int ) i ;
}
return textpart ;
}
/*
* strip -
* Remove the trailing spaces from a buffer . Don ' t call this directly ,
* always call strstrip ( ) which is a wrapper to this routine to be used with
* NUL terminated strings . This code looks a bit strange because of it ' s
* heritage from code that worked on strings that weren ' t necessarily NUL
* terminated .
* TODO : rewrite for clamAV
*
* Returns it ' s new length ( a la strlen )
*
* len must be int not size_t because of the > = 0 test , it is sizeof ( buf )
* not strlen ( buf )
*/
static size_t
strip ( char * buf , int len )
{
register char * ptr ;
register size_t i ;
if ( ( buf = = NULL ) | | ( len < = 0 ) )
return 0 ;
i = strlen ( buf ) ;
if ( len > ( int ) ( i + 1 ) )
return i ;
ptr = & buf [ - - len ] ;
# if defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) /* watch - it may be in shared text area */
do
if ( * ptr )
* ptr = ' \0 ' ;
while ( ( - - len > = 0 ) & & ( ! isgraph ( * - - ptr ) ) & & ( * ptr ! = ' \n ' ) & & ( * ptr ! = ' \r ' ) ) ;
# else /* more characters can be displayed on DOS */
do
# ifndef REAL_MODE_DOS
if ( * ptr ) /* C8.0 puts into a text area */
# endif
* ptr = ' \0 ' ;
while ( ( - - len > = 0 ) & & ( ( * - - ptr = = ' \0 ' ) | | isspace ( ( int ) ( * ptr & 0xFF ) ) ) ) ;
# endif
return ( ( size_t ) ( len + 1 ) ) ;
}
/*
* strstrip :
* Strip a given string
*/
size_t
strstrip ( char * s )
{
if ( s = = ( char * ) NULL )
return ( 0 ) ;
return ( strip ( s , strlen ( s ) + 1 ) ) ;
}
/*
* Returns 0 for OK , - 1 for error
*/
static int
parseMimeHeader ( message * m , const char * cmd , const table_t * rfc821Table , const char * arg , cli_ctx * ctx , bool * heuristicFound )
{
char * copy , * p , * buf ;
const char * ptr ;
int commandNumber ;
size_t argCnt = 0 ;
* heuristicFound = false ;
cli_dbgmsg ( " parseMimeHeader: cmd='%s', arg='%s' \n " , cmd , arg ) ;
copy = rfc822comments ( cmd , NULL ) ;
if ( copy ) {
commandNumber = tableFind ( rfc821Table , copy ) ;
free ( copy ) ;
} else {
commandNumber = tableFind ( rfc821Table , cmd ) ;
}
copy = rfc822comments ( arg , NULL ) ;
if ( copy ) {
ptr = copy ;
} else {
ptr = arg ;
}
buf = NULL ;
switch ( commandNumber ) {
case CONTENT_TYPE :
/*
* Fix for non RFC1521 compliant mailers
* that send content - type : Text instead
* of content - type : Text / Plain , or
* just simply " Content-Type: "
*/
if ( arg = = NULL )
/*
* According to section 4 of RFC1521 :
* " Note also that a subtype specification is
* MANDATORY . There are no default subtypes "
*
* We have to break this and make an assumption
* for the subtype because virus writers and
* email client writers don ' t get it right
*/
cli_dbgmsg ( " Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii \n " ) ;
else if ( strchr ( ptr , ' / ' ) = = NULL )
/*
* Empty field , such as
* Content - Type :
* which I believe is illegal according to
* RFC1521
*/
cli_dbgmsg ( " Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii \n " , ptr ) ;
else {
int i ;
buf = cli_malloc ( strlen ( ptr ) + 1 ) ;
if ( buf = = NULL ) {
cli_errmsg ( " parseMimeHeader: Unable to allocate memory for buf %llu \n " , ( long long unsigned ) ( strlen ( ptr ) + 1 ) ) ;
if ( copy )
free ( copy ) ;
return - 1 ;
}
/*
* Some clients are broken and
* put white space after the ;
*/
if ( * arg = = ' / ' ) {
cli_dbgmsg ( " Content-type '/' received, assuming application/octet-stream \n " ) ;
messageSetMimeType ( m , " application " ) ;
messageSetMimeSubtype ( m , " octet-stream " ) ;
} else {
/*
* The content type could be in quotes :
* Content - Type : " multipart/mixed "
* FIXME : this is a hack in that ignores
* the quotes , it doesn ' t handle
* them properly
*/
while ( isspace ( ( const unsigned char ) * ptr ) )
ptr + + ;
if ( ptr [ 0 ] = = ' \" ' )
ptr + + ;
if ( ptr [ 0 ] ! = ' / ' ) {
char * s ;
# ifdef CL_THREAD_SAFE
char * strptr = NULL ;
# endif
s = cli_strtokbuf ( ptr , 0 , " ; " , buf ) ;
/*
* Handle
* Content - Type : foo / bar multipart / mixed
* and
* Content - Type : multipart / mixed foo / bar
*/
if ( s & & * s ) {
char * buf2 = cli_strdup ( buf ) ;
if ( buf2 = = NULL ) {
if ( copy )
free ( copy ) ;
free ( buf ) ;
return - 1 ;
}
for ( ; ; ) {
# ifdef CL_THREAD_SAFE
int set = messageSetMimeType ( m , strtok_r ( s , " / " , & strptr ) ) ;
# else
int set = messageSetMimeType ( m , strtok ( s , " / " ) ) ;
# endif
# ifdef CL_THREAD_SAFE
s = strtok_r ( NULL , " ; " , & strptr ) ;
# else
s = strtok ( NULL , " ; " ) ;
# endif
if ( s = = NULL )
break ;
if ( set ) {
size_t len = strstrip ( s ) - 1 ;
if ( s [ len ] = = ' \" ' ) {
s [ len ] = ' \0 ' ;
len = strstrip ( s ) ;
}
if ( len ) {
if ( strchr ( s , ' ' ) )
messageSetMimeSubtype ( m ,
cli_strtokbuf ( s , 0 , " " , buf2 ) ) ;
else
messageSetMimeSubtype ( m , s ) ;
}
}
while ( * s & & ! isspace ( ( unsigned char ) * s ) )
s + + ;
if ( * s + + = = ' \0 ' )
break ;
if ( * s = = ' \0 ' )
break ;
}
free ( buf2 ) ;
}
}
}
/*
* Add in all rest of the the arguments .
* e . g . if the header is this :
* Content - Type : ' , arg = ' multipart / mixed ; boundary = foo
* we find the boundary argument set it
*/
i = 1 ;
while ( cli_strtokbuf ( ptr , i + + , " ; " , buf ) ! = NULL ) {
cli_dbgmsg ( " mimeArgs = '%s' \n " , buf ) ;
argCnt + + ;
if ( haveTooManyMIMEArguments ( argCnt , ctx , heuristicFound ) ) {
break ;
}
messageAddArguments ( m , buf ) ;
}
}
break ;
case CONTENT_TRANSFER_ENCODING :
messageSetEncoding ( m , ptr ) ;
break ;
case CONTENT_DISPOSITION :
buf = cli_malloc ( strlen ( ptr ) + 1 ) ;
if ( buf = = NULL ) {
cli_errmsg ( " parseMimeHeader: Unable to allocate memory for buf %llu \n " , ( long long unsigned ) ( strlen ( ptr ) + 1 ) ) ;
if ( copy )
free ( copy ) ;
return - 1 ;
}
p = cli_strtokbuf ( ptr , 0 , " ; " , buf ) ;
if ( p & & * p ) {
messageSetDispositionType ( m , p ) ;
messageAddArgument ( m , cli_strtokbuf ( ptr , 1 , " ; " , buf ) ) ;
}
if ( ! messageHasFilename ( m ) )
/*
* Handle this type of header , without
* a filename ( e . g . some Worm . Torvil . D )
* Content - ID : < nRfkHdrKsAxRU >
* Content - Transfer - Encoding : base64
* Content - Disposition : attachment
*/
messageAddArgument ( m , " filename=unknown " ) ;
}
if ( copy )
free ( copy ) ;
if ( buf )
free ( buf ) ;
return 0 ;
}
/*
* Save the text portion of the message
*/
static int
saveTextPart ( mbox_ctx * mctx , message * m , int destroy_text )
{
fileblob * fb ;
messageAddArgument ( m , " filename=textportion " ) ;
if ( ( fb = messageToFileblob ( m , mctx - > dir , destroy_text ) ) ! = NULL ) {
/*
* Save main part to scan that
*/
cli_dbgmsg ( " Saving main message \n " ) ;
mctx - > files + + ;
return fileblobScanAndDestroy ( fb ) ;
}
return CL_ETMPFILE ;
}
/*
* Handle RFC822 comments in headers .
* If out = = NULL , return a buffer without the comments , the caller must free
* the returned buffer
* Return NULL on error or if the input * has no comments .
* See section 3.4 .3 of RFC822
* TODO : handle comments that go on to more than one line
*/
static char *
rfc822comments ( const char * in , char * out )
{
const char * iptr ;
char * optr ;
int backslash , inquote , commentlevel ;
if ( in = = NULL | | out = = in ) {
cli_errmsg ( " rfc822comments: Invalid parameters.n " ) ;
return NULL ;
}
if ( strchr ( in , ' ( ' ) = = NULL ) {
return NULL ;
}
while ( isspace ( ( const unsigned char ) * in ) ) {
in + + ;
}
if ( out = = NULL ) {
out = cli_malloc ( strlen ( in ) + 1 ) ;
if ( out = = NULL ) {
cli_errmsg ( " rfc822comments: Unable to allocate memory for out %llu \n " , ( long long unsigned ) ( strlen ( in ) + 1 ) ) ;
return NULL ;
}
}
backslash = commentlevel = inquote = 0 ;
optr = out ;
cli_dbgmsg ( " rfc822comments: contains a comment \n " ) ;
for ( iptr = in ; * iptr ; iptr + + )
if ( backslash ) {
if ( commentlevel = = 0 )
* optr + + = * iptr ;
backslash = 0 ;
} else
switch ( * iptr ) {
case ' \\ ' :
backslash = 1 ;
break ;
case ' \" ' :
* optr + + = ' \" ' ;
inquote = ! inquote ;
break ;
case ' ( ' :
if ( inquote )
* optr + + = ' ( ' ;
else
commentlevel + + ;
break ;
case ' ) ' :
if ( inquote )
* optr + + = ' ) ' ;
else if ( commentlevel > 0 )
commentlevel - - ;
break ;
default :
if ( commentlevel = = 0 )
* optr + + = * iptr ;
}
if ( backslash ) /* last character was a single backslash */
* optr + + = ' \\ ' ;
* optr = ' \0 ' ;
/*strstrip(out);*/
cli_dbgmsg ( " rfc822comments '%s'=>'%s' \n " , in , out ) ;
return out ;
}
/*
* Handle RFC2047 encoding . Returns a malloc ' d buffer that the caller must
* free , or NULL on error
*/
static char *
rfc2047 ( const char * in )
{
char * out , * pout ;
size_t len ;
if ( ( strstr ( in , " =? " ) = = NULL ) | | ( strstr ( in , " ?= " ) = = NULL ) )
return cli_strdup ( in ) ;
cli_dbgmsg ( " rfc2047 '%s' \n " , in ) ;
out = cli_malloc ( strlen ( in ) + 1 ) ;
if ( out = = NULL ) {
cli_errmsg ( " rfc2047: Unable to allocate memory for out %llu \n " , ( long long unsigned ) ( strlen ( in ) + 1 ) ) ;
return NULL ;
}
pout = out ;
/* For each RFC2047 string */
while ( * in ) {
char encoding , * ptr , * enctext ;
message * m ;
blob * b ;
/* Find next RFC2047 string */
while ( * in ) {
if ( ( * in = = ' = ' ) & & ( in [ 1 ] = = ' ? ' ) ) {
in + = 2 ;
break ;
}
* pout + + = * in + + ;
}
/* Skip over charset, find encoding */
while ( ( * in ! = ' ? ' ) & & * in )
in + + ;
if ( * in = = ' \0 ' )
break ;
encoding = * + + in ;
encoding = ( char ) tolower ( encoding ) ;
if ( ( encoding ! = ' q ' ) & & ( encoding ! = ' b ' ) ) {
cli_warnmsg ( " Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net \n " , encoding ) ;
free ( out ) ;
out = NULL ;
break ;
}
/* Skip to encoded text */
if ( * + + in ! = ' ? ' )
break ;
if ( * + + in = = ' \0 ' )
break ;
enctext = cli_strdup ( in ) ;
if ( enctext = = NULL ) {
free ( out ) ;
out = NULL ;
break ;
}
in = strstr ( in , " ?= " ) ;
if ( in = = NULL ) {
free ( enctext ) ;
break ;
}
in + = 2 ;
ptr = strstr ( enctext , " ?= " ) ;
if ( NULL = = ptr ) {
free ( enctext ) ;
break ;
}
* ptr = ' \0 ' ;
/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
m = messageCreate ( ) ;
if ( m = = NULL ) {
free ( enctext ) ;
break ;
}
messageAddStr ( m , enctext ) ;
free ( enctext ) ;
enctext = NULL ;
switch ( encoding ) {
case ' q ' :
messageSetEncoding ( m , " quoted-printable " ) ;
break ;
case ' b ' :
messageSetEncoding ( m , " base64 " ) ;
break ;
}
b = messageToBlob ( m , 1 ) ;
if ( b = = NULL ) {
messageDestroy ( m ) ;
break ;
}
len = blobGetDataSize ( b ) ;
cli_dbgmsg ( " Decoded as '%*.*s' \n " , ( int ) len , ( int ) len ,
( const char * ) blobGetData ( b ) ) ;
memcpy ( pout , blobGetData ( b ) , len ) ;
blobDestroy ( b ) ;
messageDestroy ( m ) ;
if ( len > 0 & & pout [ len - 1 ] = = ' \n ' )
pout + = len - 1 ;
else
pout + = len ;
}
if ( out = = NULL )
return NULL ;
* pout = ' \0 ' ;
cli_dbgmsg ( " rfc2047 returns '%s' \n " , out ) ;
return out ;
}
/*
* Handle partial messages
*/
static int
rfc1341 ( mbox_ctx * mctx , message * m )
{
char * arg , * id , * number , * total , * oldfilename ;
const char * tmpdir = NULL ;
int n ;
char pdir [ PATH_MAX + 1 ] ;
unsigned char md5_val [ 16 ] ;
char * md5_hex ;
if ( ( NULL = = mctx ) | | ( NULL = = m ) ) {
cli_dbgmsg ( " rfc1341: Invalid NULL arguments \n " ) ;
return - 1 ;
}
id = ( char * ) messageFindArgument ( m , " id " ) ;
if ( id = = NULL ) {
return - 1 ;
}
if ( NULL ! = mctx - > ctx ) {
tmpdir = cl_engine_get_str ( ( const struct cl_engine * ) mctx - > ctx - > engine , CL_ENGINE_TMPDIR , NULL ) ;
}
if ( NULL = = tmpdir ) {
tmpdir = cli_gettmpdir ( ) ;
}
snprintf ( pdir , sizeof ( pdir ) - 1 , " %s " PATHSEP " clamav-partial " , tmpdir ) ;
if ( ( mkdir ( pdir , S_IRUSR | S_IWUSR ) < 0 ) & & ( errno ! = EEXIST ) ) {
cli_errmsg ( " Can't create the directory '%s' \n " , pdir ) ;
free ( id ) ;
return - 1 ;
} else if ( errno = = EEXIST ) {
STATBUF statb ;
if ( CLAMSTAT ( pdir , & statb ) < 0 ) {
char err [ 128 ] ;
cli_errmsg ( " Partial directory %s: %s \n " , pdir ,
cli_strerror ( errno , err , sizeof ( err ) ) ) ;
free ( id ) ;
return - 1 ;
}
if ( statb . st_mode & 077 )
cli_warnmsg ( " Insecure partial directory %s (mode 0%o) \n " ,
pdir ,
# ifdef ACCESSPERMS
( int ) ( statb . st_mode & ACCESSPERMS )
# else
( int ) ( statb . st_mode & 0777 )
# endif
) ;
}
number = ( char * ) messageFindArgument ( m , " number " ) ;
if ( number = = NULL ) {
free ( id ) ;
return - 1 ;
}
oldfilename = messageGetFilename ( m ) ;
arg = cli_malloc ( 10 + strlen ( id ) + strlen ( number ) ) ;
if ( arg ) {
sprintf ( arg , " filename=%s%s " , id , number ) ;
messageAddArgument ( m , arg ) ;
free ( arg ) ;
}
if ( oldfilename ) {
cli_dbgmsg ( " Must reset to %s \n " , oldfilename ) ;
free ( oldfilename ) ;
}
n = atoi ( number ) ;
cl_hash_data ( " md5 " , id , strlen ( id ) , md5_val , NULL ) ;
md5_hex = cli_str2hex ( ( const char * ) md5_val , 16 ) ;
if ( ! md5_hex ) {
free ( id ) ;
free ( number ) ;
return CL_EMEM ;
}
if ( messageSavePartial ( m , pdir , md5_hex , n ) < 0 ) {
free ( md5_hex ) ;
free ( id ) ;
free ( number ) ;
return - 1 ;
}
total = ( char * ) messageFindArgument ( m , " total " ) ;
cli_dbgmsg ( " rfc1341: %s, %s of %s \n " , id , number , ( total ) ? total : " ? " ) ;
if ( total ) {
int t = atoi ( total ) ;
DIR * dd = NULL ;
free ( total ) ;
/*
* If it ' s the last one - reassemble it
* FIXME : this assumes that we receive the parts in order
*/
if ( ( n = = t ) & & ( ( dd = opendir ( pdir ) ) ! = NULL ) ) {
FILE * fout ;
char outname [ PATH_MAX + 1 ] ;
time_t now ;
sanitiseName ( id ) ;
snprintf ( outname , sizeof ( outname ) - 1 , " %s " PATHSEP " %s " , mctx - > dir , id ) ;
cli_dbgmsg ( " outname: %s \n " , outname ) ;
fout = fopen ( outname , " wb " ) ;
if ( fout = = NULL ) {
cli_errmsg ( " Can't open '%s' for writing " , outname ) ;
free ( id ) ;
free ( number ) ;
free ( md5_hex ) ;
closedir ( dd ) ;
return - 1 ;
}
time ( & now ) ;
for ( n = 1 ; n < = t ; n + + ) {
char filename [ NAME_MAX + 1 ] ;
struct dirent * dent ;
snprintf ( filename , sizeof ( filename ) , " _%s-%u " , md5_hex , n ) ;
while ( ( dent = readdir ( dd ) ) ) {
FILE * fin ;
char buffer [ BUFSIZ ] , fullname [ PATH_MAX + 1 + 256 + 1 ] ;
int nblanks ;
STATBUF statb ;
const char * dentry_idpart ;
int test_fd ;
if ( dent - > d_ino = = 0 )
continue ;
if ( ! strcmp ( " . " , dent - > d_name ) | |
! strcmp ( " .. " , dent - > d_name ) )
continue ;
snprintf ( fullname , sizeof ( fullname ) - 1 ,
" %s " PATHSEP " %s " , pdir , dent - > d_name ) ;
dentry_idpart = strchr ( dent - > d_name , ' _ ' ) ;
if ( ! dentry_idpart | |
strcmp ( filename , dentry_idpart ) ! = 0 ) {
if ( ! m - > ctx - > engine - > keeptmp )
continue ;
2023-01-14 18:28:39 +08:00
if ( ( test_fd = open ( fullname , O_RDONLY | O_BINARY ) ) < 0 )
2022-10-22 18:41:00 +08:00
continue ;
if ( FSTAT ( test_fd , & statb ) < 0 ) {
close ( test_fd ) ;
continue ;
}
if ( now - statb . st_mtime > ( time_t ) ( 7 * 24 * 3600 ) ) {
if ( cli_unlink ( fullname ) ) {
cli_unlink ( outname ) ;
fclose ( fout ) ;
free ( md5_hex ) ;
free ( id ) ;
free ( number ) ;
closedir ( dd ) ;
close ( test_fd ) ;
return - 1 ;
}
}
close ( test_fd ) ;
continue ;
}
fin = fopen ( fullname , " rb " ) ;
if ( fin = = NULL ) {
cli_errmsg ( " Can't open '%s' for reading " , fullname ) ;
fclose ( fout ) ;
cli_unlink ( outname ) ;
free ( md5_hex ) ;
free ( id ) ;
free ( number ) ;
closedir ( dd ) ;
return - 1 ;
}
nblanks = 0 ;
while ( fgets ( buffer , sizeof ( buffer ) - 1 , fin ) ! = NULL )
/*
* Ensure that trailing newlines
* aren ' t copied
*/
if ( buffer [ 0 ] = = ' \n ' )
nblanks + + ;
else {
if ( nblanks )
do {
if ( putc ( ' \n ' , fout ) = = EOF ) break ;
} while ( - - nblanks > 0 ) ;
if ( nblanks | | fputs ( buffer , fout ) = = EOF ) {
fclose ( fin ) ;
fclose ( fout ) ;
cli_unlink ( outname ) ;
free ( md5_hex ) ;
free ( id ) ;
free ( number ) ;
closedir ( dd ) ;
return - 1 ;
}
}
fclose ( fin ) ;
/* don't unlink if leave temps */
if ( ! m - > ctx - > engine - > keeptmp ) {
if ( cli_unlink ( fullname ) ) {
fclose ( fout ) ;
cli_unlink ( outname ) ;
free ( md5_hex ) ;
free ( id ) ;
free ( number ) ;
closedir ( dd ) ;
return - 1 ;
}
}
break ;
}
rewinddir ( dd ) ;
}
closedir ( dd ) ;
fclose ( fout ) ;
}
}
free ( number ) ;
free ( id ) ;
free ( md5_hex ) ;
return 0 ;
}
static void
hrefs_done ( blob * b , tag_arguments_t * hrefs )
{
if ( b )
blobDestroy ( b ) ;
html_tag_arg_free ( hrefs ) ;
}
/* extract URLs from static text */
static void extract_text_urls ( const unsigned char * mem , size_t len , tag_arguments_t * hrefs )
{
char url [ 1024 ] ;
size_t off ;
for ( off = 0 ; off + 10 < len ; off + + ) {
/* check whether this is the start of a URL */
int32_t proto = cli_readint32 ( mem + off ) ;
/* convert to lowercase */
proto | = 0x20202020 ;
/* 'http:', 'https:', or 'ftp:' in little-endian */
if ( ( proto = = 0x70747468 & &
( mem [ off + 4 ] = = ' : ' | | ( mem [ off + 5 ] = = ' s ' & & mem [ off + 6 ] = = ' : ' ) ) ) | |
proto = = 0x3a707466 ) {
size_t url_len ;
for ( url_len = 4 ; off + url_len < len & & url_len < ( sizeof ( url ) - 1 ) ; url_len + + ) {
unsigned char c = mem [ off + url_len ] ;
/* smart compilers will compile this if into
* a single bt + jb instruction */
if ( c = = ' ' | | c = = ' \n ' | | c = = ' \t ' )
break ;
}
memcpy ( url , mem + off , url_len ) ;
url [ url_len ] = ' \0 ' ;
html_tag_arg_add ( hrefs , " href " , url ) ;
off + = url_len ;
}
}
}
/*
* This used to be part of checkURLs , split out , because phishingScan needs it
* too , and phishingScan might be used in situations where checkURLs is
* disabled ( see ifdef )
*/
static blob *
getHrefs ( message * m , tag_arguments_t * hrefs )
{
unsigned char * mem ;
blob * b = messageToBlob ( m , 0 ) ;
size_t len ;
if ( b = = NULL )
return NULL ;
len = blobGetDataSize ( b ) ;
if ( len = = 0 ) {
blobDestroy ( b ) ;
return NULL ;
}
/* TODO: make this size customisable */
if ( len > 100 * 1024 ) {
cli_dbgmsg ( " Viruses pointed to by URLs not scanned in large message \n " ) ;
blobDestroy ( b ) ;
return NULL ;
}
hrefs - > count = 0 ;
hrefs - > tag = hrefs - > value = NULL ;
hrefs - > contents = NULL ;
cli_dbgmsg ( " getHrefs: calling html_normalise_mem \n " ) ;
mem = blobGetData ( b ) ;
if ( ! html_normalise_mem ( mem , ( off_t ) len , NULL , hrefs , m - > ctx - > dconf ) ) {
blobDestroy ( b ) ;
return NULL ;
}
cli_dbgmsg ( " getHrefs: html_normalise_mem returned \n " ) ;
if ( ! hrefs - > count & & hrefs - > scanContents ) {
extract_text_urls ( mem , len , hrefs ) ;
}
/* TODO: Do we need to call remove_html_comments? */
return b ;
}
/*
* validate URLs for phishes
* followurls : see if URLs point to malware
*/
static void
checkURLs ( message * mainMessage , mbox_ctx * mctx , mbox_status * rc , int is_html )
{
blob * b ;
tag_arguments_t hrefs ;
UNUSEDPARAM ( is_html ) ;
if ( * rc = = VIRUS )
return ;
hrefs . scanContents = mctx - > ctx - > engine - > dboptions & CL_DB_PHISHING_URLS & & ( DCONF_PHISHING & PHISHING_CONF_ENGINE ) ;
if ( ! hrefs . scanContents )
/*
* Don ' t waste time extracting hrefs ( parsing html ) , nobody
* will need it
*/
return ;
hrefs . count = 0 ;
hrefs . tag = hrefs . value = NULL ;
hrefs . contents = NULL ;
b = getHrefs ( mainMessage , & hrefs ) ;
if ( b ) {
if ( hrefs . scanContents ) {
if ( phishingScan ( mctx - > ctx , & hrefs ) = = CL_VIRUS ) {
/*
* FIXME : message objects ' contents are
* encapsulated so we should not access
* the members directly
*/
mainMessage - > isInfected = true ;
* rc = VIRUS ;
cli_dbgmsg ( " PH:Phishing found \n " ) ;
}
}
}
hrefs_done ( b , & hrefs ) ;
}
# ifdef HAVE_BACKTRACE
static void
sigsegv ( int sig )
{
signal ( SIGSEGV , SIG_DFL ) ;
print_trace ( 1 ) ;
exit ( SIGSEGV ) ;
}
static void
print_trace ( int use_syslog )
{
void * array [ 10 ] ;
size_t size ;
char * * strings ;
size_t i ;
pid_t pid = getpid ( ) ;
cli_errmsg ( " Segmentation fault, attempting to print backtrace \n " ) ;
size = backtrace ( array , 10 ) ;
strings = backtrace_symbols ( array , size ) ;
cli_errmsg ( " Backtrace of pid %d: \n " , pid ) ;
if ( use_syslog )
syslog ( LOG_ERR , " Backtrace of pid %d: " , pid ) ;
for ( i = 0 ; i < size ; i + + ) {
cli_errmsg ( " %s \n " , strings [ i ] ) ;
if ( use_syslog )
syslog ( LOG_ERR , " bt[%llu]: %s " , ( unsigned long long ) i , strings [ i ] ) ;
}
# ifdef SAVE_TMP
cli_errmsg ( " The errant mail file has been saved \n " ) ;
# endif
/* #else TODO: dump the current email */
free ( strings ) ;
}
# endif
/* See also clamav-milter */
static bool
usefulHeader ( int commandNumber , const char * cmd )
{
switch ( commandNumber ) {
case CONTENT_TRANSFER_ENCODING :
case CONTENT_DISPOSITION :
case CONTENT_TYPE :
return true ;
default :
if ( strcasecmp ( cmd , " From " ) = = 0 )
return true ;
if ( strcasecmp ( cmd , " Received " ) = = 0 )
return true ;
if ( strcasecmp ( cmd , " De " ) = = 0 )
return true ;
}
return false ;
}
/*
* Like fgets but cope with end of line by " \n " , " \r \n " , " \n \r " , " \r "
*/
static char *
getline_from_mbox ( char * buffer , size_t buffer_len , fmap_t * map , size_t * at )
{
const char * src , * cursrc ;
char * curbuf ;
size_t i ;
size_t input_len = MIN ( map - > len - * at , buffer_len + 1 ) ;
src = cursrc = fmap_need_off_once ( map , * at , input_len ) ;
/* we check for eof from the result of GETC()
if ( feof ( fin ) )
return NULL ; */
if ( ! src ) {
cli_dbgmsg ( " getline_from_mbox: fmap need failed \n " ) ;
return NULL ;
}
if ( ( buffer_len = = 0 ) | | ( buffer = = NULL ) ) {
cli_errmsg ( " Invalid call to getline_from_mbox(). Refer to https://docs.clamav.net/manual/Installing.html \n " ) ;
return NULL ;
}
curbuf = buffer ;
for ( i = 0 ; i < buffer_len - 1 ; i + + ) {
char c ;
if ( ! input_len - - ) {
if ( curbuf = = buffer ) {
/* EOF on first char */
return NULL ;
}
break ;
}
switch ( ( c = * cursrc + + ) ) {
case ' \0 ' :
continue ;
case ' \n ' :
* curbuf + + = ' \n ' ;
if ( input_len & & * cursrc = = ' \r ' ) {
i + + ;
cursrc + + ;
}
break ;
case ' \r ' :
* curbuf + + = ' \r ' ;
if ( input_len & & * cursrc = = ' \n ' ) {
i + + ;
cursrc + + ;
}
break ;
default :
* curbuf + + = c ;
continue ;
}
break ;
}
* at + = cursrc - src ;
* curbuf = ' \0 ' ;
return buffer ;
}
/*
* Is this line a candidate for the start of a bounce message ?
*/
static bool
isBounceStart ( mbox_ctx * mctx , const char * line )
{
size_t len ;
if ( line = = NULL )
return false ;
if ( * line = = ' \0 ' )
return false ;
/*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
return false ;
if ( ( strncmp ( line , " >From " , 6 ) = = 0 ) & & ! isalnum ( line [ 6 ] ) )
return false ; */
len = strlen ( line ) ;
if ( ( len < 6 ) | | ( len > = 72 ) )
return false ;
if ( ( memcmp ( line , " From " , 5 ) = = 0 ) | |
( memcmp ( line , " >From " , 6 ) = = 0 ) ) {
int numSpaces = 0 , numDigits = 0 ;
line + = 4 ;
do
if ( * line = = ' ' )
numSpaces + + ;
else if ( isdigit ( ( * line ) & 0xFF ) )
numDigits + + ;
while ( * + + line ! = ' \0 ' ) ;
if ( numSpaces < 6 )
return false ;
if ( numDigits < 11 )
return false ;
return true ;
}
return ( bool ) ( cli_compare_ftm_file ( ( const unsigned char * ) line , len , mctx - > ctx - > engine ) = = CL_TYPE_MAIL ) ;
}
/*
* Extract a binhexEncoded message , return if it ' s found to be infected as we
* extract it
*/
static bool
exportBinhexMessage ( mbox_ctx * mctx , message * m )
{
bool infected = false ;
fileblob * fb ;
if ( messageGetEncoding ( m ) = = NOENCODING )
messageSetEncoding ( m , " x-binhex " ) ;
fb = messageToFileblob ( m , mctx - > dir , 0 ) ;
if ( fb ) {
cli_dbgmsg ( " Binhex file decoded to %s \n " ,
fileblobGetFilename ( fb ) ) ;
if ( fileblobScanAndDestroy ( fb ) = = CL_VIRUS )
infected = true ;
mctx - > files + + ;
} else
cli_errmsg ( " Couldn't decode binhex file to %s \n " , mctx - > dir ) ;
return infected ;
}
/*
* Locate any bounce message and extract it . Return cl_status
*/
static int
exportBounceMessage ( mbox_ctx * mctx , text * start )
{
int rc = CL_CLEAN ;
text * t ;
fileblob * fb ;
/*
* Attempt to save the original ( unbounced )
* message - clamscan will find that in the
* directory and call us again ( with any luck )
* having found an e - mail message to handle .
*
* This finds a lot of false positives , the
* search that a content type is in the
* bounce ( i . e . it ' s after the bounce header )
* helps a bit .
*
* messageAddLine
* optimization could help here , but needs
* careful thought , do it with line numbers
* would be best , since the current method in
* messageAddLine of checking encoding first
* must remain otherwise non bounce messages
* won ' t be scanned
*/
for ( t = start ; t ; t = t - > t_next ) {
const char * txt = lineGetData ( t - > t_line ) ;
char cmd [ RFC2821LENGTH + 1 ] ;
if ( txt = = NULL )
continue ;
if ( cli_strtokbuf ( txt , 0 , " : " , cmd ) = = NULL )
continue ;
switch ( tableFind ( mctx - > rfc821Table , cmd ) ) {
case CONTENT_TRANSFER_ENCODING :
if ( ( strstr ( txt , " 7bit " ) = = NULL ) & &
( strstr ( txt , " 8bit " ) = = NULL ) )
break ;
continue ;
case CONTENT_DISPOSITION :
break ;
case CONTENT_TYPE :
if ( strstr ( txt , " text/plain " ) ! = NULL )
t = NULL ;
break ;
default :
if ( strcasecmp ( cmd , " From " ) = = 0 )
start = t ;
else if ( strcasecmp ( cmd , " Received " ) = = 0 )
start = t ;
continue ;
}
break ;
}
if ( t & & ( ( fb = fileblobCreate ( ) ) ! = NULL ) ) {
cli_dbgmsg ( " Found a bounce message \n " ) ;
fileblobSetFilename ( fb , mctx - > dir , " bounce " ) ;
fileblobSetCTX ( fb , mctx - > ctx ) ;
if ( textToFileblob ( start , fb , 1 ) = = NULL ) {
cli_dbgmsg ( " Nothing new to save in the bounce message \n " ) ;
fileblobDestroy ( fb ) ;
} else
rc = fileblobScanAndDestroy ( fb ) ;
mctx - > files + + ;
} else
cli_dbgmsg ( " Not found a bounce message \n " ) ;
return rc ;
}
/*
* Get string representation of mimetype
*/
static const char * getMimeTypeStr ( mime_type mimetype )
{
const struct tableinit * entry = mimeTypeStr ;
while ( entry - > key ) {
if ( mimetype = = ( ( mime_type ) entry - > value ) ) {
return entry - > key ;
}
entry + + ;
}
return " UNKNOWN " ;
}
/*
* Get string representation of encoding type
*/
static const char * getEncTypeStr ( encoding_type enctype )
{
const struct tableinit * entry = encTypeStr ;
while ( entry - > key ) {
if ( enctype = = ( ( encoding_type ) entry - > value ) ) {
return entry - > key ;
}
entry + + ;
}
return " UNKNOWN " ;
}
/*
* Handle the ith element of a number of multiparts , e . g . multipart / alternative
*/
static message *
do_multipart ( message * mainMessage , message * * messages , int i , mbox_status * rc , mbox_ctx * mctx , message * messageIn , text * * tptr , unsigned int recursion_level )
{
bool addToText = false ;
const char * dtype ;
# ifndef SAVE_TO_DISC
message * body ;
# endif
message * aMessage = messages [ i ] ;
const int doPhishingScan = mctx - > ctx - > engine - > dboptions & CL_DB_PHISHING_URLS & & ( DCONF_PHISHING & PHISHING_CONF_ENGINE ) ;
# if HAVE_JSON
json_object * thisobj = NULL , * saveobj = mctx - > wrkobj ;
if ( mctx - > wrkobj ! = NULL ) {
json_object * multiobj = cli_jsonarray ( mctx - > wrkobj , " Multipart " ) ;
if ( multiobj = = NULL ) {
cli_errmsg ( " Cannot get multipart preclass array \n " ) ;
} else if ( NULL = = ( thisobj = messageGetJObj ( aMessage ) ) ) {
cli_dbgmsg ( " Cannot get message preclass object \n " ) ;
} else if ( CL_SUCCESS ! = cli_json_addowner ( multiobj , thisobj , NULL , - 1 ) ) {
cli_errmsg ( " Cannot assign message preclass object to multipart preclass array \n " ) ;
}
}
# endif
if ( aMessage = = NULL ) {
# if HAVE_JSON
if ( thisobj ! = NULL )
cli_jsonstr ( thisobj , " MimeType " , " NULL " ) ;
# endif
return mainMessage ;
}
if ( * rc ! = OK )
return mainMessage ;
cli_dbgmsg ( " Mixed message part %d is of type %d \n " ,
i , messageGetMimeType ( aMessage ) ) ;
# if HAVE_JSON
if ( thisobj ! = NULL ) {
cli_jsonstr ( thisobj , " MimeType " , getMimeTypeStr ( messageGetMimeType ( aMessage ) ) ) ;
cli_jsonstr ( thisobj , " MimeSubtype " , messageGetMimeSubtype ( aMessage ) ) ;
cli_jsonstr ( thisobj , " EncodingType " , getEncTypeStr ( messageGetEncoding ( aMessage ) ) ) ;
cli_jsonstr ( thisobj , " Disposition " , messageGetDispositionType ( aMessage ) ) ;
if ( messageHasFilename ( aMessage ) ) {
char * filename = messageGetFilename ( aMessage ) ;
cli_jsonstr ( thisobj , " Filename " , filename ) ;
free ( filename ) ;
} else {
cli_jsonstr ( thisobj , " Filename " , " (inline) " ) ;
}
}
# endif
switch ( messageGetMimeType ( aMessage ) ) {
case APPLICATION :
case AUDIO :
case IMAGE :
case VIDEO :
break ;
case NOMIME :
cli_dbgmsg ( " No mime headers found in multipart part %d \n " , i ) ;
if ( mainMessage ) {
if ( binhexBegin ( aMessage ) ) {
cli_dbgmsg ( " Found binhex message in multipart/mixed mainMessage \n " ) ;
if ( exportBinhexMessage ( mctx , mainMessage ) )
* rc = VIRUS ;
}
if ( mainMessage ! = messageIn )
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
} else if ( aMessage ) {
if ( binhexBegin ( aMessage ) ) {
cli_dbgmsg ( " Found binhex message in multipart/mixed non mime part \n " ) ;
if ( exportBinhexMessage ( mctx , aMessage ) )
* rc = VIRUS ;
messageReset ( messages [ i ] ) ;
}
}
addToText = true ;
if ( messageGetBody ( aMessage ) = = NULL )
/*
* No plain text version
*/
cli_dbgmsg ( " No plain text alternative \n " ) ;
break ;
case TEXT :
dtype = messageGetDispositionType ( aMessage ) ;
cli_dbgmsg ( " Mixed message text part disposition \" %s \" \n " ,
dtype ) ;
if ( strcasecmp ( dtype , " attachment " ) = = 0 )
break ;
if ( ( * dtype = = ' \0 ' ) | | ( strcasecmp ( dtype , " inline " ) = = 0 ) ) {
const char * cptr ;
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
cptr = messageGetMimeSubtype ( aMessage ) ;
cli_dbgmsg ( " Mime subtype \" %s \" \n " , cptr ) ;
if ( ( tableFind ( mctx - > subtypeTable , cptr ) = = PLAIN ) & &
( messageGetEncoding ( aMessage ) = = NOENCODING ) ) {
/*
* Strictly speaking , a text / plain part
* is not an attachment . We pretend it
* is so that we can decode and scan it
*/
if ( ! messageHasFilename ( aMessage ) ) {
cli_dbgmsg ( " Adding part to main message \n " ) ;
addToText = true ;
} else
cli_dbgmsg ( " Treating inline as attachment \n " ) ;
} else {
const int is_html = ( tableFind ( mctx - > subtypeTable , cptr ) = = HTML ) ;
if ( doPhishingScan )
checkURLs ( aMessage , mctx , rc , is_html ) ;
messageAddArgument ( aMessage ,
" filename=mixedtextportion " ) ;
}
break ;
}
cli_dbgmsg ( " Text type %s is not supported \n " , dtype ) ;
return mainMessage ;
case MESSAGE :
/* Content-Type: message/rfc822 */
cli_dbgmsg ( " Found message inside multipart (encoding type %d) \n " ,
messageGetEncoding ( aMessage ) ) ;
# ifndef SCAN_UNENCODED_BOUNCES
switch ( messageGetEncoding ( aMessage ) ) {
case NOENCODING :
case EIGHTBIT :
case BINARY :
if ( encodingLine ( aMessage ) = = NULL ) {
/*
* This means that the message
* has no attachments
*
* The test for
* messageGetEncoding is needed
* since encodingLine won ' t have
* been set if the message
* itself has been encoded
*/
cli_dbgmsg ( " Unencoded multipart/message will not be scanned \n " ) ;
messageDestroy ( messages [ i ] ) ;
messages [ i ] = NULL ;
return mainMessage ;
}
/* FALLTHROUGH */
default :
cli_dbgmsg ( " Encoded multipart/message will be scanned \n " ) ;
}
# endif
# ifdef SAVE_TO_DISC
/*
* Save this embedded message
* to a temporary file
*/
if ( saveTextPart ( mctx , aMessage , 1 ) = = CL_VIRUS )
* rc = VIRUS ;
messageDestroy ( messages [ i ] ) ;
messages [ i ] = NULL ;
# else
/*
* Scan in memory , faster but is open to DoS attacks
* when many nested levels are involved .
*/
body = parseEmailHeaders ( aMessage , mctx - > rfc821Table ) ;
/*
* We ' ve finished with the
* original copy of the message ,
* so throw that away and
* deal with the encapsulated
* message as a message .
* This can save a lot of memory
*/
messageDestroy ( messages [ i ] ) ;
messages [ i ] = NULL ;
# if HAVE_JSON
mctx - > wrkobj = thisobj ;
# endif
if ( body ) {
messageSetCTX ( body , mctx - > ctx ) ;
* rc = parseEmailBody ( body , NULL , mctx , recursion_level + 1 ) ;
if ( ( * rc = = OK ) & & messageContainsVirus ( body ) )
* rc = VIRUS ;
messageDestroy ( body ) ;
}
# if HAVE_JSON
mctx - > wrkobj = saveobj ;
# endif
# endif
return mainMessage ;
case MULTIPART :
/*
* It ' s a multi part within a multi part
* Run the message parser on this bit , it won ' t
* be an attachment
*/
cli_dbgmsg ( " Found multipart inside multipart \n " ) ;
# if HAVE_JSON
mctx - > wrkobj = thisobj ;
# endif
if ( aMessage ) {
/*
* The headers were parsed when reading in the
* whole multipart section
*/
* rc = parseEmailBody ( aMessage , * tptr , mctx , recursion_level + 1 ) ;
cli_dbgmsg ( " Finished recursion, rc = %d \n " , ( int ) * rc ) ;
messageDestroy ( messages [ i ] ) ;
messages [ i ] = NULL ;
} else {
* rc = parseEmailBody ( NULL , NULL , mctx , recursion_level + 1 ) ;
if ( mainMessage & & ( mainMessage ! = messageIn ) ) {
messageDestroy ( mainMessage ) ;
}
mainMessage = NULL ;
}
# if HAVE_JSON
mctx - > wrkobj = saveobj ;
# endif
return mainMessage ;
default :
cli_dbgmsg ( " Only text and application attachments are fully supported, type = %d \n " ,
messageGetMimeType ( aMessage ) ) ;
/* fall through - we may be able to salvage something */
}
if ( * rc ! = VIRUS ) {
fileblob * fb = messageToFileblob ( aMessage , mctx - > dir , 1 ) ;
# if HAVE_JSON
json_object * arrobj ;
# if (JSON_C_MAJOR_VERSION == 0) && (JSON_C_MINOR_VERSION < 13)
int arrlen = 0 ;
# else
size_t arrlen = 0 ;
# endif
if ( thisobj ! = NULL ) {
/* attempt to determine container size - prevents incorrect type reporting */
if ( json_object_object_get_ex ( mctx - > ctx - > wrkproperty , " ContainedObjects " , & arrobj ) ) {
arrlen = json_object_array_length ( arrobj ) ;
}
}
# endif
if ( fb ) {
/* aMessage doesn't always have a ctx set */
fileblobSetCTX ( fb , mctx - > ctx ) ;
if ( fileblobScanAndDestroy ( fb ) = = CL_VIRUS ) {
* rc = VIRUS ;
}
if ( ! addToText ) {
mctx - > files + + ;
}
}
# if HAVE_JSON
if ( thisobj ! = NULL ) {
json_object * entry = NULL ;
const char * dtype = NULL ;
/* attempt to acquire container type */
if ( json_object_object_get_ex ( mctx - > ctx - > wrkproperty , " ContainedObjects " , & arrobj ) ) {
if ( json_object_array_length ( arrobj ) > arrlen ) {
entry = json_object_array_get_idx ( arrobj , arrlen ) ;
}
}
if ( entry ) {
json_object_object_get_ex ( entry , " FileType " , & entry ) ;
if ( entry ) {
dtype = json_object_get_string ( entry ) ;
}
}
cli_jsonint ( thisobj , " ContainedObjectsIndex " , ( int32_t ) arrlen ) ;
cli_jsonstr ( thisobj , " ClamAVFileType " , dtype ? dtype : " UNKNOWN " ) ;
}
# endif
if ( messageContainsVirus ( aMessage ) ) {
* rc = VIRUS ;
}
}
messageDestroy ( aMessage ) ;
messages [ i ] = NULL ;
return mainMessage ;
}
/*
* Returns the number of quote characters in the given string
*/
static int
count_quotes ( const char * buf )
{
int quotes = 0 ;
while ( * buf )
if ( * buf + + = = ' \" ' )
quotes + + ;
return quotes ;
}
/*
* Will the next line be a folded header ? See RFC2822 section 2.2 .3
*/
static bool
next_is_folded_header ( const text * t )
{
const text * next = t - > t_next ;
const char * data , * ptr ;
if ( next = = NULL )
return false ;
if ( next - > t_line = = NULL )
return false ;
data = lineGetData ( next - > t_line ) ;
/*
* Section B .2 of RFC822 says TAB or SPACE means a continuation of the
* previous entry .
*/
if ( isblank ( data [ 0 ] ) )
return true ;
if ( strchr ( data , ' = ' ) = = NULL )
/*
* Avoid false positives with
* Content - Type : text / html ;
* Content - Transfer - Encoding : quoted - printable
*/
return false ;
/*
* Some are broken and don ' t fold headers lines
* correctly as per section 2.2 .3 of RFC2822 .
* Generally they miss the white space at
* the start of the fold line :
* Content - Type : multipart / related ;
* type = " multipart/alternative " ;
* boundary = " ----=_NextPart_000_006A_01C6AC47.348CB550 "
* should read :
* Content - Type : multipart / related ;
* type = " multipart/alternative " ;
* boundary = " ----=_NextPart_000_006A_01C6AC47.348CB550 "
* Since we ' re a virus checker not an RFC
* verifier we need to handle these
*/
data = lineGetData ( t - > t_line ) ;
ptr = strchr ( data , ' \0 ' ) ;
while ( - - ptr > data )
switch ( * ptr ) {
case ' ; ' :
return true ;
case ' \n ' :
case ' ' :
case ' \r ' :
case ' \t ' :
continue ; /* white space at end of line */
default :
return false ;
}
return false ;
}
/*
* This routine is called on the first line of the body of
* an email to handle broken messages that have newlines
* in the middle of its headers
*/
static bool
newline_in_header ( const char * line )
{
cli_dbgmsg ( " newline_in_header, check \" %s \" \n " , line ) ;
if ( strncmp ( line , " Message-Id: " , 12 ) = = 0 )
return true ;
if ( strncmp ( line , " Date: " , 6 ) = = 0 )
return true ;
cli_dbgmsg ( " newline_in_header, returning \" %s \" \n " , line ) ;
return false ;
}