denyhosts/clamav/libclamav/matcher.h

399 lines
15 KiB
C
Raw Permalink Normal View History

2022-10-22 18:41:00 +08:00
/*
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Tomasz Kojm
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef __MATCHER_H
#define __MATCHER_H
#include <sys/types.h>
#include "clamav.h"
#include "filetypes.h"
#include "others.h"
#include "execs.h"
struct cli_target_info {
off_t fsize;
struct cli_exe_info exeinfo;
int status; /* 0 == not initialised, 1 == initialised OK, -1 == error */
};
/**
* Initialize a struct cli_target_info so that it's ready to have its exeinfo
* populated by the call to cli_targetinfo and/or destroyed by
* cli_targetinfo_destroy.
*
* @param info a pointer to the struct cli_target_info to initialize
*/
void cli_targetinfo_init(struct cli_target_info *info);
/**
* Free resources associated with a struct cli_target_info initialized
* via cli_targetinfo_init
*
* @param info a pointer to the struct cli_target_info to destroy
*/
void cli_targetinfo_destroy(struct cli_target_info *info);
#include "matcher-ac.h"
#include "matcher-bm.h"
#include "matcher-hash.h"
#include "matcher-pcre.h"
#include "matcher-byte-comp.h"
#include "regex_pcre.h"
#include "fmap.h"
#include "mpool.h"
// clang-format off
#define CLI_MATCH_METADATA 0xff00
#define CLI_MATCH_WILDCARD 0x0f00
#define CLI_MATCH_CHAR 0x0000
#define CLI_MATCH_NOCASE 0x1000
#define CLI_MATCH_IGNORE 0x0100
#define CLI_MATCH_SPECIAL 0x0200
#define CLI_MATCH_NIBBLE_HIGH 0x0300
#define CLI_MATCH_NIBBLE_LOW 0x0400
typedef enum tdb_type {
CLI_TDB_UINT,
CLI_TDB_RANGE,
CLI_TDB_STR,
CLI_TDB_RANGE2,
CLI_TDB_FTYPE,
CLI_TDB_FTYPE_EXPR
} tdb_type_t;
struct cli_lsig_tdb {
uint32_t *val, *range;
char *str;
tdb_type_t cnt[3];
uint32_t subsigs;
const uint32_t *target;
const uint32_t *engine, *nos, *ep, *filesize;
const uint32_t *container, *handlertype;
const uint32_t *intermediates;
/*
const uint32_t *sectoff, *sectrva, *sectvsz, *sectraw, *sectrsz,
*secturva, *sectuvsz, *secturaw, *sectursz;
*/
const char *icongrp1, *icongrp2;
uint32_t *macro_ptids;
#ifdef USE_MPOOL
mpool_t *mempool;
2023-01-14 18:28:39 +08:00
#else
void *_padding_mempool;
2022-10-22 18:41:00 +08:00
#endif
};
// clang-format on
#define CLI_LSIG_FLAG_PRIVATE 0x01
typedef enum lsig_type {
CLI_LSIG_NORMAL,
CLI_YARA_NORMAL,
CLI_YARA_OFFSET
} lsig_type_t;
struct cli_bc;
struct cli_ac_lsig {
uint32_t id;
unsigned bc_idx;
lsig_type_t type;
uint8_t flag;
union {
char *logic;
uint8_t *code_start;
} u;
2023-01-14 18:28:39 +08:00
char *virname;
2022-10-22 18:41:00 +08:00
struct cli_lsig_tdb tdb;
};
2023-01-14 18:28:39 +08:00
typedef void *fuzzyhashmap_t;
2022-10-22 18:41:00 +08:00
struct cli_matcher {
unsigned int type;
/* Extended Boyer-Moore */
uint8_t *bm_shift;
struct cli_bm_patt **bm_suffix, **bm_pattab;
uint32_t *soff, soff_len; /* for PE section sigs */
uint32_t bm_offmode, bm_patterns, bm_reloff_num, bm_absoff_num;
/* HASH */
struct cli_hash_patt hm;
struct cli_hash_wild hwild;
/* Extended Aho-Corasick */
uint32_t ac_partsigs, ac_nodes, ac_lists, ac_patterns, ac_lsigs;
struct cli_ac_lsig **ac_lsigtable;
struct cli_ac_node *ac_root, **ac_nodetable;
struct cli_ac_list **ac_listtable;
struct cli_ac_patt **ac_pattable;
struct cli_ac_patt **ac_reloff;
uint32_t ac_reloff_num, ac_absoff_num;
uint8_t ac_mindepth, ac_maxdepth;
struct filter *filter;
uint16_t maxpatlen;
uint8_t ac_only;
/* Perl-Compiled Regular Expressions */
#if HAVE_PCRE
uint32_t pcre_metas;
struct cli_pcre_meta **pcre_metatable;
uint32_t pcre_reloff_num, pcre_absoff_num;
#endif
/* Byte Compare */
uint32_t bcomp_metas;
struct cli_bcomp_meta **bcomp_metatable;
2023-01-14 18:28:39 +08:00
/* Fuzzy Image Hash */
fuzzyhashmap_t fuzzy_hashmap;
2022-10-22 18:41:00 +08:00
/* Bytecode Tracker */
uint32_t linked_bcs;
2023-01-14 18:28:39 +08:00
/*Store pointers to malloced trans values so that they can be more easily freed*/
struct cli_ac_node ***trans_array;
size_t trans_cnt;
size_t trans_capacity;
2022-10-22 18:41:00 +08:00
#ifdef USE_MPOOL
mpool_t *mempool;
2023-01-14 18:28:39 +08:00
#else
void *_padding_mempool;
2022-10-22 18:41:00 +08:00
#endif
};
struct cli_cdb {
char *virname; /* virus name */
cli_file_t ctype; /* container type */
regex_t name; /* filename regex */
size_t csize[2]; /* container size (min, max); if csize[0] != csize[1]
* then value of 0 makes the field ignored
*/
size_t fsizec[2]; /* file size in container */
size_t fsizer[2]; /* real file size */
int encrypted; /* file is encrypted; 2 == ignore */
unsigned int filepos[2]; /* file position in container */
int res1; /* reserved / format specific */
void *res2; /* reserved / format specific */
struct cli_cdb *next;
};
2023-01-14 18:28:39 +08:00
typedef enum {
TARGET_GENERIC = 0,
TARGET_PE = 1,
TARGET_OLE2 = 2,
TARGET_HTML = 3,
TARGET_MAIL = 4,
TARGET_GRAPHICS = 5,
TARGET_ELF = 6,
TARGET_ASCII = 7,
TARGET_NOT_USED = 8,
TARGET_MACHO = 9,
TARGET_PDF = 10,
TARGET_FLASH = 11,
TARGET_JAVA = 12,
TARGET_INTERNAL = 13,
TARGET_OTHER = 14,
} cli_target_t;
2022-10-22 18:41:00 +08:00
#define CLI_MAX_TARGETS 10 /* maximum filetypes for a specific target */
struct cli_mtarget {
cli_file_t target[CLI_MAX_TARGETS];
const char *name;
2023-01-14 18:28:39 +08:00
cli_target_t idx; /* idx of matcher */
2022-10-22 18:41:00 +08:00
uint8_t ac_only;
uint8_t enable_prefiltering;
uint8_t target_count; /* must be synced with non-zero values in the target array */
};
#define CLI_MTARGETS 15
static const struct cli_mtarget cli_mtargets[CLI_MTARGETS] = {
/* All types for target, name, idx, ac_only, pre-filtering?, # of types */
2023-01-14 18:28:39 +08:00
{{CL_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "GENERIC", TARGET_GENERIC, 0, 1, 1},
{{CL_TYPE_MSEXE, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "PE", TARGET_PE, 0, 1, 1},
{{CL_TYPE_MSOLE2, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "OLE2", TARGET_OLE2, 1, 0, 1},
{{CL_TYPE_HTML, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "HTML", TARGET_HTML, 1, 0, 1},
{{CL_TYPE_MAIL, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "MAIL", TARGET_MAIL, 1, 1, 1},
{{CL_TYPE_GRAPHICS, CL_TYPE_GIF, CL_TYPE_PNG, CL_TYPE_JPEG, CL_TYPE_TIFF, 0, 0, 0, 0, 0}, "GRAPHICS", TARGET_GRAPHICS, 1, 0, 5},
{{CL_TYPE_ELF, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "ELF", TARGET_ELF, 1, 0, 1},
{{CL_TYPE_TEXT_ASCII, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "ASCII", TARGET_ASCII, 1, 1, 1},
{{CL_TYPE_ERROR, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "NOT USED", TARGET_NOT_USED, 1, 0, 1},
{{CL_TYPE_MACHO, CL_TYPE_MACHO_UNIBIN, 0, 0, 0, 0, 0, 0, 0, 0}, "MACH-O", TARGET_MACHO, 1, 0, 2},
{{CL_TYPE_PDF, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "PDF", TARGET_PDF, 1, 0, 1},
{{CL_TYPE_SWF, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "FLASH", TARGET_FLASH, 1, 0, 1},
{{CL_TYPE_JAVA, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "JAVA", TARGET_JAVA, 1, 0, 1},
{{CL_TYPE_INTERNAL, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "INTERNAL", TARGET_INTERNAL, 1, 0, 1},
{{CL_TYPE_OTHER, 0, 0, 0, 0, 0, 0, 0, 0, 0}, "OTHER", TARGET_OTHER, 1, 0, 1}};
2022-10-22 18:41:00 +08:00
// clang-format off
#define CLI_OFF_ANY 0xffffffff
#define CLI_OFF_NONE 0xfffffffe
#define CLI_OFF_ABSOLUTE 1
#define CLI_OFF_EOF_MINUS 2
#define CLI_OFF_EP_PLUS 3
#define CLI_OFF_EP_MINUS 4
#define CLI_OFF_SL_PLUS 5
#define CLI_OFF_SX_PLUS 6
#define CLI_OFF_VERSION 7
#define CLI_OFF_MACRO 8
#define CLI_OFF_SE 9
// clang-format on
/**
* @brief Non-magic scan matching using a file buffer for input. Older API
*
2023-01-14 18:28:39 +08:00
* This function is lower-level than the *magic_scan* functions from scanners.
2022-10-22 18:41:00 +08:00
* This function does not perform file type magic identification and does not use
* the file format scanners.
*
2023-01-14 18:28:39 +08:00
* Unlike the similar functions `cli_scan_desc()` and `cli_scan_fmap()` (below),
* this function:
*
* - REQUIRES a call to `cli_exp_eval()` after the match to evaluate logical
* signatures and yara rules.
*
* - Does NOT support filetype detection.
*
* - Does NOT perform hash-based matching.
*
* - Does NOT support AC, BM, or PCRE relative-offset signature matching.
*
* - DOES support passing in externally initialized AC matcher data
*
* @param buffer The buffer to be matched.
* @param length The length of the buffer or amount of bytets to match.
* @param offset Offset into the buffer from which to start matching.
* @param ctx The scanning context.
* @param ftype If specified, may limit signature matching trie by target type corresponding with the specified CL_TYPE
* @param[in,out] acdata (optional) A list of pattern maching data structs to contain match results, one for generic signatures and one for target-specific signatures.
* If not provided, the matcher results are lost, outside of this function's return value.
* Required if you want to evaluate logical expressions afterwards.
2022-10-22 18:41:00 +08:00
* @return cl_error_t
*/
cl_error_t cli_scan_buff(const unsigned char *buffer, uint32_t length, uint32_t offset, cli_ctx *ctx, cli_file_t ftype, struct cli_ac_data **acdata);
/**
* @brief Non-magic scan matching using a file descriptor for input.
*
2023-01-14 18:28:39 +08:00
* This function is lower-level than the *magic_scan* functions from scanners.
2022-10-22 18:41:00 +08:00
* This function does not perform file type magic identification and does not use
* the file format scanners.
*
2023-01-14 18:28:39 +08:00
* This function does signature matching for generic signatures, target-specific
* signatures, and file type recognition signatures to detect embedded files or
* to correct the current file type.
*
* This function is just a wrapper for `cli_scan_fmap()` that converts the file
* to an fmap and scans it.
2022-10-22 18:41:00 +08:00
*
2023-01-14 18:28:39 +08:00
* @param desc File descriptor to be used for input
* @param ctx The scanning context.
* @param ftype If specified, may limit signature matching trie by target type corresponding with the specified CL_TYPE
* @param filetype_only Boolean indicating if the scan is for file-type detection only.
* @param[out] ftoffset (optional) A list of file type signature matches with their corresponding offsets. If provided, will output the file type signature matches.
* @param acmode Use AC_SCAN_VIR and AC_SCAN_FT to set scanning modes.
* @param[out] acres A list of cli_ac_result AC pattern matching results.
* @param name (optional) Original name of the file (to set fmap name metadata)
* @param attributes Layer attributes for the thing to be scanned.
2022-10-22 18:41:00 +08:00
* @return cl_error_t
*/
2023-01-14 18:28:39 +08:00
cl_error_t cli_scan_desc(int desc, cli_ctx *ctx, cli_file_t ftype, bool filetype_only, struct cli_matched_type **ftoffset, unsigned int acmode, struct cli_ac_result **acres, const char *name, uint32_t attributes);
2022-10-22 18:41:00 +08:00
/**
* @brief Non-magic scan matching of the current fmap in the scan context. Newer API.
*
2023-01-14 18:28:39 +08:00
* This function is lower-level than the *magic_scan* functions from scanners.
* This function does not perform file type magic identification and does not use
* the file format scanners.
*
* This function does signature matching for generic signatures, target-specific
* signatures, and file type recognition signatures to detect embedded files or
* to correct the current file type.
*
2022-10-22 18:41:00 +08:00
* This API will invoke cli_exp_eval() for you.
*
2023-01-14 18:28:39 +08:00
* @param ctx The scanning context.
* @param ftype If specified, may limit signature matching trie by target type corresponding with the specified CL_TYPE
* @param filetype_only Boolean indicating if the scan is for file-type detection only.
* @param[out] ftoffset (optional) A list of file type signature matches with their corresponding offsets. If provided, will output the file type signature matches.
* @param acmode Use AC_SCAN_VIR and AC_SCAN_FT to set scanning modes.
* @param[out] acres A list of cli_ac_result AC pattern matching results.
* @param refhash MD5 hash of the current file, used to save time creating hashes and to limit scan recursion for the HandlerType logical signature FTM feature.
2022-10-22 18:41:00 +08:00
* @return cl_error_t
*/
2023-01-14 18:28:39 +08:00
cl_error_t cli_scan_fmap(cli_ctx *ctx, cli_file_t ftype, bool filetype_only, struct cli_matched_type **ftoffset, unsigned int acmode, struct cli_ac_result **acres, unsigned char *refhash);
2022-10-22 18:41:00 +08:00
/**
* @brief Evaluate logical signatures and yara rules given the AC matching results
* from cli_scan_buff() / matcher_run().
*
* @param ctx The scanning context.
* @param root The AC trie root to match with.
* @param acdata AC match results for a specific AC trie.
* @param target_info File metadata used to evaluate logical sig and yara rule options.
* @param hash Reference hash of the current file, used to limit recursion for the HandlerType logical signature FTM feature.
* @return cl_error_t
*/
cl_error_t cli_exp_eval(cli_ctx *ctx, struct cli_matcher *root, struct cli_ac_data *acdata, struct cli_target_info *target_info, const char *hash);
cl_error_t cli_caloff(const char *offstr, const struct cli_target_info *info, unsigned int target, uint32_t *offdata, uint32_t *offset_min, uint32_t *offset_max);
/**
* @brief Determine if an alert is a known false positive, using each fmap in the the ctx->container stack to check MD5, SHA1, and SHA256 hashes.
*
* @param ctx The scanning context.
* @param vname (Optional) The name of the signature alert.
* @return cl_error_t CL_CLEAN If an allow-list hash matches with one of the fmap hashes in the scan recursion stack.
* CL_VIRUS If no allow-list hash matches.
*/
cl_error_t cli_check_fp(cli_ctx *ctx, const char *vname);
cl_error_t cli_matchmeta(cli_ctx *ctx, const char *fname, size_t fsizec, size_t fsizer, int encrypted, unsigned int filepos, int res1, void *res2);
/** Parse the executable headers and, if successful, populate exeinfo
*
* If target refers to a supported executable file type, the exe header
* will be parsed and, if successful, info->status will be set to 1.
* If parsing the exe header fails, info->status will be set to -1.
* The caller MUST destroy info via a call to cli_targetinfo_destroy
* regardless of what info->status is set to.
*
* @param info A structure to populate with info from the exe header. This
* MUST be initialized via cli_targetinfo_init prior to calling
* @param target the target executable file type. Possible values are:
* - 1 - PE32 / PE32+
* - 6 - ELF
* - 9 - MachO
* @param ctx The current scan context
*/
void cli_targetinfo(struct cli_target_info *info, unsigned int target, cli_ctx *ctx);
#endif