denyhosts/clamav/libclamav/ole2_extract.c

2627 lines
91 KiB
C
Raw Normal View History

2022-10-22 18:41:00 +08:00
/*
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Trog
*
* Summary: Extract component parts of OLE2 files (e.g. MS Office Documents).
*
* Acknowledgements: Some ideas and algorithms were based upon OpenOffice and libgsf.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <errno.h>
#include <conv.h>
#include <zlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdbool.h>
#include "clamav.h"
#include "others.h"
#include "hwp.h"
#include "ole2_extract.h"
2023-01-14 18:28:39 +08:00
#include "xlm_extract.h"
2022-10-22 18:41:00 +08:00
#include "scanners.h"
#include "fmap.h"
#include "json_api.h"
#if HAVE_JSON
#include "msdoc.h"
#endif
2023-01-14 18:28:39 +08:00
#include "rijndael.h"
#include "ole2_encryption.h"
2022-10-22 18:41:00 +08:00
#ifdef DEBUG_OLE2_LIST
#define ole2_listmsg(...) cli_dbgmsg(__VA_ARGS__)
#else
#define ole2_listmsg(...) ;
#endif
#define ole2_endian_convert_16(v) le16_to_host((uint16_t)(v))
#define ole2_endian_convert_32(v) le32_to_host((uint32_t)(v))
2023-01-14 18:28:39 +08:00
#define ole2_endian_convert_64(v) le64_to_host((uint64_t)(v))
2022-10-22 18:41:00 +08:00
#ifndef HAVE_ATTRIB_PACKED
#define __attribute__(x)
#endif
#ifdef HAVE_PRAGMA_PACK
#pragma pack(1)
#endif
#ifdef HAVE_PRAGMA_PACK_HPPA
#pragma pack 1
#endif
2023-01-14 18:28:39 +08:00
// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/05060311-bfce-4b12-874d-71fd4ce63aea
2022-10-22 18:41:00 +08:00
typedef struct ole2_header_tag {
unsigned char magic[8]; /* should be: 0xd0cf11e0a1b11ae1 */
unsigned char clsid[16];
uint16_t minor_version __attribute__((packed));
uint16_t dll_version __attribute__((packed));
int16_t byte_order __attribute__((packed)); /* -2=intel */
uint16_t log2_big_block_size __attribute__((packed)); /* usually 9 (2^9 = 512) */
uint32_t log2_small_block_size __attribute__((packed)); /* usually 6 (2^6 = 64) */
int32_t reserved[2] __attribute__((packed));
int32_t bat_count __attribute__((packed));
int32_t prop_start __attribute__((packed));
uint32_t signature __attribute__((packed));
uint32_t sbat_cutoff __attribute__((packed)); /* cutoff for files held
2023-01-14 18:28:39 +08:00
* in small blocks
* (4096) */
2022-10-22 18:41:00 +08:00
int32_t sbat_start __attribute__((packed));
int32_t sbat_block_count __attribute__((packed));
int32_t xbat_start __attribute__((packed));
int32_t xbat_count __attribute__((packed));
int32_t bat_array[109] __attribute__((packed));
/*
2023-01-14 18:28:39 +08:00
* The following is not part of the ole2 header, but stuff we need in
* order to decode.
*
* IMPORANT: These must take account of the size of variables below here
* when calculating hdr_size to read the header.
*
* See the top of cli_ole2_extract().
2022-10-22 18:41:00 +08:00
*/
int32_t sbat_root_start __attribute__((packed));
uint32_t max_block_no;
2023-01-14 18:28:39 +08:00
size_t m_length;
2022-10-22 18:41:00 +08:00
bitset_t *bitset;
struct uniq *U;
fmap_t *map;
2023-01-14 18:28:39 +08:00
bool has_vba;
bool has_xlm;
bool has_image;
hwp5_header_t *is_hwp; // This value MUST be last in this structure,
// otherwise you will get short file reads.
2022-10-22 18:41:00 +08:00
} ole2_header_t;
2023-01-14 18:28:39 +08:00
/*
* DirectoryEntry
*
* https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/60fe8611-66c3-496b-b70d-a504c94c9ace
*/
2022-10-22 18:41:00 +08:00
typedef struct property_tag {
char name[64]; /* in unicode */
uint16_t name_size __attribute__((packed));
unsigned char type; /* 1=dir 2=file 5=root */
unsigned char color; /* black or red */
uint32_t prev __attribute__((packed));
uint32_t next __attribute__((packed));
uint32_t child __attribute__((packed));
unsigned char clsid[16];
uint32_t user_flags __attribute__((packed));
uint32_t create_lowdate __attribute__((packed));
uint32_t create_highdate __attribute__((packed));
uint32_t mod_lowdate __attribute__((packed));
uint32_t mod_highdate __attribute__((packed));
uint32_t start_block __attribute__((packed));
uint32_t size __attribute__((packed));
unsigned char reserved[4];
} property_t;
struct ole2_list_node;
typedef struct ole2_list_node {
uint32_t Val;
struct ole2_list_node *Next;
} ole2_list_node_t;
typedef struct ole2_list {
uint32_t Size;
ole2_list_node_t *Head;
} ole2_list_t;
int ole2_list_init(ole2_list_t *list);
int ole2_list_is_empty(ole2_list_t *list);
uint32_t ole2_list_size(ole2_list_t *list);
int ole2_list_push(ole2_list_t *list, uint32_t val);
uint32_t ole2_list_pop(ole2_list_t *list);
int ole2_list_delete(ole2_list_t *list);
int ole2_list_init(ole2_list_t *list)
{
list->Head = NULL;
list->Size = 0;
return CL_SUCCESS;
}
int ole2_list_is_empty(ole2_list_t *list)
{
return (list->Head == NULL);
}
uint32_t
ole2_list_size(ole2_list_t *list)
{
return (list->Size);
}
int ole2_list_push(ole2_list_t *list, uint32_t val)
{
2023-01-14 18:28:39 +08:00
ole2_list_node_t *new_node = NULL;
int status = CL_EMEM;
CLI_MALLOC(new_node, sizeof(ole2_list_node_t),
cli_dbgmsg("OLE2: could not allocate new node for worklist!\n"));
2022-10-22 18:41:00 +08:00
new_node->Val = val;
new_node->Next = list->Head;
list->Head = new_node;
(list->Size)++;
2023-01-14 18:28:39 +08:00
status = CL_SUCCESS;
done:
return status;
2022-10-22 18:41:00 +08:00
}
uint32_t
ole2_list_pop(ole2_list_t *list)
{
uint32_t val;
ole2_list_node_t *next;
if (ole2_list_is_empty(list)) {
cli_dbgmsg("OLE2: work list is empty and ole2_list_pop() called!\n");
return -1;
}
val = list->Head->Val;
next = list->Head->Next;
free(list->Head);
list->Head = next;
(list->Size)--;
return val;
}
int ole2_list_delete(ole2_list_t *list)
{
while (!ole2_list_is_empty(list))
ole2_list_pop(list);
return CL_SUCCESS;
}
#ifdef HAVE_PRAGMA_PACK
#pragma pack()
#endif
#ifdef HAVE_PRAGMA_PACK_HPPA
#pragma pack
#endif
static unsigned char magic_id[] = {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1};
char *
cli_ole2_get_property_name2(const char *name, int size)
{
int i, j;
2023-01-14 18:28:39 +08:00
char *newname = NULL;
2022-10-22 18:41:00 +08:00
if ((name[0] == 0 && name[1] == 0) || size <= 0 || size > 128) {
return NULL;
}
2023-01-14 18:28:39 +08:00
CLI_MALLOC(newname, size * 7,
cli_errmsg("OLE2 [cli_ole2_get_property_name2]: Unable to allocate memory for newname: %u\n", size * 7));
2022-10-22 18:41:00 +08:00
j = 0;
/* size-2 to ignore trailing NULL */
for (i = 0; i < size - 2; i += 2) {
if ((!(name[i] & 0x80)) && isprint(name[i]) && name[i + 1] == 0) {
newname[j++] = tolower(name[i]);
} else {
if (name[i] < 10 && name[i] >= 0 && name[i + 1] == 0) {
newname[j++] = '_';
newname[j++] = name[i] + '0';
} else {
const uint16_t x = (((uint16_t)name[i]) << 8) | name[i + 1];
newname[j++] = '_';
newname[j++] = 'a' + ((x & 0xF));
newname[j++] = 'a' + ((x >> 4) & 0xF);
newname[j++] = 'a' + ((x >> 8) & 0xF);
newname[j++] = 'a' + ((x >> 16) & 0xF);
newname[j++] = 'a' + ((x >> 24) & 0xF);
}
newname[j++] = '_';
}
}
newname[j] = '\0';
if (strlen(newname) == 0) {
free(newname);
2023-01-14 18:28:39 +08:00
newname = NULL;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
done:
2022-10-22 18:41:00 +08:00
return newname;
}
static char *
get_property_name(char *name, int size)
{
const char *carray = "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz._";
int csize = size >> 1;
2023-01-14 18:28:39 +08:00
char *newname = NULL;
char *cname = NULL;
char *oname = name;
2022-10-22 18:41:00 +08:00
2023-01-14 18:28:39 +08:00
if (csize <= 0) {
2022-10-22 18:41:00 +08:00
return NULL;
}
2023-01-14 18:28:39 +08:00
CLI_MALLOC(newname, size,
cli_errmsg("OLE2 [get_property_name]: Unable to allocate memory for newname %u\n", size));
cname = newname;
2022-10-22 18:41:00 +08:00
while (--csize) {
uint16_t lo, hi, u = cli_readint16(oname) - 0x3800;
oname += 2;
if (u > 0x1040) {
2023-01-14 18:28:39 +08:00
FREE(newname);
2022-10-22 18:41:00 +08:00
return cli_ole2_get_property_name2(name, size);
}
lo = u % 64;
u >>= 6;
hi = u % 64;
*cname++ = carray[lo];
2023-01-14 18:28:39 +08:00
if (csize != 1 || u != 64) {
2022-10-22 18:41:00 +08:00
*cname++ = carray[hi];
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
}
*cname = '\0';
2023-01-14 18:28:39 +08:00
done:
2022-10-22 18:41:00 +08:00
return newname;
}
static void
print_ole2_property(property_t *property)
{
char spam[128], *buf;
if (property->name_size > 64) {
cli_dbgmsg("[err name len: %d]\n", property->name_size);
return;
}
buf = get_property_name(property->name, property->name_size);
snprintf(spam, sizeof(spam), "OLE2: %s ", buf ? buf : "<noname>");
spam[sizeof(spam) - 1] = '\0';
if (buf)
free(buf);
switch (property->type) {
case 2:
strncat(spam, " [file] ", sizeof(spam) - 1 - strlen(spam));
break;
case 1:
strncat(spam, " [dir ] ", sizeof(spam) - 1 - strlen(spam));
break;
case 5:
strncat(spam, " [root] ", sizeof(spam) - 1 - strlen(spam));
break;
default:
strncat(spam, " [unkn] ", sizeof(spam) - 1 - strlen(spam));
}
spam[sizeof(spam) - 1] = '\0';
switch (property->color) {
case 0:
strncat(spam, " r ", sizeof(spam) - 1 - strlen(spam));
break;
case 1:
strncat(spam, " b ", sizeof(spam) - 1 - strlen(spam));
break;
default:
strncat(spam, " u ", sizeof(spam) - 1 - strlen(spam));
}
spam[sizeof(spam) - 1] = '\0';
cli_dbgmsg("%s size:0x%.8x flags:0x%.8x\n", spam, property->size, property->user_flags);
}
static void
print_ole2_header(ole2_header_t *hdr)
{
if (!hdr || !cli_debug_flag) {
return;
}
cli_dbgmsg("\n");
cli_dbgmsg("Magic:\t\t\t0x%x%x%x%x%x%x%x%x\n",
hdr->magic[0], hdr->magic[1], hdr->magic[2], hdr->magic[3],
hdr->magic[4], hdr->magic[5], hdr->magic[6], hdr->magic[7]);
cli_dbgmsg("CLSID:\t\t\t{%x%x%x%x-%x%x-%x%x-%x%x-%x%x%x%x%x%x}\n",
hdr->clsid[0], hdr->clsid[1], hdr->clsid[2], hdr->clsid[3],
hdr->clsid[4], hdr->clsid[5], hdr->clsid[6], hdr->clsid[7],
hdr->clsid[8], hdr->clsid[9], hdr->clsid[10], hdr->clsid[11],
hdr->clsid[12], hdr->clsid[13], hdr->clsid[14], hdr->clsid[15]);
cli_dbgmsg("Minor version:\t\t0x%x\n", hdr->minor_version);
cli_dbgmsg("DLL version:\t\t0x%x\n", hdr->dll_version);
cli_dbgmsg("Byte Order:\t\t%d\n", hdr->byte_order);
cli_dbgmsg("Big Block Size:\t%i\n", hdr->log2_big_block_size);
cli_dbgmsg("Small Block Size:\t%i\n", hdr->log2_small_block_size);
cli_dbgmsg("BAT count:\t\t%d\n", hdr->bat_count);
cli_dbgmsg("Prop start:\t\t%d\n", hdr->prop_start);
cli_dbgmsg("SBAT cutoff:\t\t%d\n", hdr->sbat_cutoff);
cli_dbgmsg("SBat start:\t\t%d\n", hdr->sbat_start);
cli_dbgmsg("SBat block count:\t%d\n", hdr->sbat_block_count);
cli_dbgmsg("XBat start:\t\t%d\n", hdr->xbat_start);
cli_dbgmsg("XBat block count:\t%d\n", hdr->xbat_count);
cli_dbgmsg("\n");
return;
}
2023-01-14 18:28:39 +08:00
static bool ole2_read_block(ole2_header_t *hdr, void *buff, size_t size, int32_t blockno)
2022-10-22 18:41:00 +08:00
{
2023-01-14 18:28:39 +08:00
size_t offset, offend;
2022-10-22 18:41:00 +08:00
const void *pblock;
if (blockno < 0) {
2023-01-14 18:28:39 +08:00
return false;
2022-10-22 18:41:00 +08:00
}
/* other methods: (blockno+1) * 512 or (blockno * block_size) + 512; */
if (((uint64_t)blockno << hdr->log2_big_block_size) < (INT32_MAX - MAX(512, (uint64_t)1 << hdr->log2_big_block_size))) {
/* 512 is header size */
offset = (blockno << hdr->log2_big_block_size) + MAX(512, 1 << hdr->log2_big_block_size);
offend = offset + size;
} else {
offset = INT32_MAX - size;
offend = INT32_MAX;
}
2023-01-14 18:28:39 +08:00
if ((offend == 0) || (offset >= hdr->m_length)) {
return false;
2022-10-22 18:41:00 +08:00
} else if (offend > hdr->m_length) {
/* bb#11369 - ole2 files may not be a block multiple in size */
memset(buff, 0, size);
size = hdr->m_length - offset;
}
if (!(pblock = fmap_need_off_once(hdr->map, offset, size))) {
2023-01-14 18:28:39 +08:00
return false;
2022-10-22 18:41:00 +08:00
}
memcpy(buff, pblock, size);
2023-01-14 18:28:39 +08:00
return true;
2022-10-22 18:41:00 +08:00
}
static int32_t
ole2_get_next_bat_block(ole2_header_t *hdr, int32_t current_block)
{
int32_t bat_array_index;
uint32_t bat[128];
if (current_block < 0) {
return -1;
}
bat_array_index = current_block / 128;
if (bat_array_index > hdr->bat_count) {
cli_dbgmsg("bat_array index error\n");
return -10;
}
if (!ole2_read_block(hdr, &bat, 512,
ole2_endian_convert_32(hdr->bat_array[bat_array_index]))) {
return -1;
}
return ole2_endian_convert_32(bat[current_block - (bat_array_index * 128)]);
}
static int32_t
ole2_get_next_xbat_block(ole2_header_t *hdr, int32_t current_block)
{
int32_t xbat_index, xbat_block_index, bat_index, bat_blockno;
uint32_t xbat[128], bat[128];
if (current_block < 0) {
return -1;
}
xbat_index = current_block / 128;
/*
* NB: The last entry in each XBAT points to the next XBAT block.
* This reduces the number of entries in each block by 1.
*/
xbat_block_index = (xbat_index - 109) / 127;
bat_blockno = (xbat_index - 109) % 127;
bat_index = current_block % 128;
if (!ole2_read_block(hdr, &xbat, 512, hdr->xbat_start)) {
return -1;
}
/* Follow the chain of XBAT blocks */
while (xbat_block_index > 0) {
if (!ole2_read_block(hdr, &xbat, 512,
ole2_endian_convert_32(xbat[127]))) {
return -1;
}
xbat_block_index--;
}
if (!ole2_read_block(hdr, &bat, 512, ole2_endian_convert_32(xbat[bat_blockno]))) {
return -1;
}
return ole2_endian_convert_32(bat[bat_index]);
}
static int32_t
ole2_get_next_block_number(ole2_header_t *hdr, int32_t current_block)
{
if (current_block < 0) {
return -1;
}
if ((current_block / 128) > 108) {
return ole2_get_next_xbat_block(hdr, current_block);
} else {
return ole2_get_next_bat_block(hdr, current_block);
}
}
static int32_t
ole2_get_next_sbat_block(ole2_header_t *hdr, int32_t current_block)
{
int32_t iter, current_bat_block;
uint32_t sbat[128];
if (current_block < 0) {
return -1;
}
current_bat_block = hdr->sbat_start;
iter = current_block / 128;
while (iter > 0) {
current_bat_block = ole2_get_next_block_number(hdr, current_bat_block);
iter--;
}
if (!ole2_read_block(hdr, &sbat, 512, current_bat_block)) {
return -1;
}
return ole2_endian_convert_32(sbat[current_block % 128]);
}
/* Retrieve the block containing the data for the given sbat index */
2023-01-14 18:28:39 +08:00
static bool ole2_get_sbat_data_block(ole2_header_t *hdr, void *buff, int32_t sbat_index)
2022-10-22 18:41:00 +08:00
{
int32_t block_count, current_block;
if (sbat_index < 0) {
2023-01-14 18:28:39 +08:00
return false;
2022-10-22 18:41:00 +08:00
}
if (hdr->sbat_root_start < 0) {
cli_dbgmsg("No root start block\n");
2023-01-14 18:28:39 +08:00
return false;
2022-10-22 18:41:00 +08:00
}
block_count = sbat_index / (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size));
current_block = hdr->sbat_root_start;
while (block_count > 0) {
current_block = ole2_get_next_block_number(hdr, current_block);
block_count--;
}
/*
* current_block now contains the block number of the sbat array
* containing the entry for the required small block
*/
return (ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block));
}
2023-01-14 18:28:39 +08:00
/**
* @brief File handler for use when walking ole2 property trees.
*
* @param hdr The ole2 header metadata
* @param prop The property
* @param dir (optional) directory to write temp files to.
* @param ctx The scan context
* @param ole2_data (optional) Context needed by the handler
* @return cl_error_t
*/
typedef cl_error_t ole2_walk_property_tree_file_handler(ole2_header_t *hdr,
property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx);
static cl_error_t handler_writefile(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx);
static cl_error_t handler_enum(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx);
static cl_error_t handler_otf_encrypted(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx);
static cl_error_t handler_otf(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx);
/**
* @brief Walk an ole2 property tree, calling the handler for each file found
*
* @param hdr The ole2 header metadata (an ole2-specific context struct)
* @param dir (optional) directory to write temp files to, passed to the handler.
* @param prop_index Index of the property being walked, to be recorded with a pointer to the root node in an ole2 node list.
* @param handler The file handler to call when a file is found.
* @param rec_level The recursion level. Max is 100.
* @param[in,out] file_count A running count of the total # of files. Max is 100000.
* @param ctx The scan context
* @param[in,out] scansize A running sum of the file sizes processed.
* @return int
*/
static int ole2_walk_property_tree(ole2_header_t *hdr, const char *dir, int32_t prop_index,
ole2_walk_property_tree_file_handler handler,
unsigned int rec_level, unsigned int *file_count,
cli_ctx *ctx, unsigned long *scansize, void *handler_ctx)
2022-10-22 18:41:00 +08:00
{
property_t prop_block[4];
int32_t idx, current_block, i, curindex;
char *dirname;
ole2_list_t node_list;
2023-01-14 18:28:39 +08:00
cl_error_t ret;
2022-10-22 18:41:00 +08:00
#if HAVE_JSON
char *name;
int toval = 0;
#endif
ole2_listmsg("ole2_walk_property_tree() called\n");
ole2_list_init(&node_list);
ole2_listmsg("rec_level: %d\n", rec_level);
ole2_listmsg("file_count: %d\n", *file_count);
if ((rec_level > 100) || (*file_count > 100000)) {
return CL_SUCCESS;
}
if (ctx && ctx->engine->max_recursion_level && (rec_level > ctx->engine->max_recursion_level)) {
// Note: engine->max_recursion_level is re-purposed here out of convenience.
// ole2 recursion does not leverage the ctx->recursion_stack stack.
cli_dbgmsg("OLE2: Recursion limit reached (max: %d)\n", ctx->engine->max_recursion_level);
2023-01-14 18:28:39 +08:00
cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxRecursion");
2022-10-22 18:41:00 +08:00
return CL_EMAXREC;
}
2023-01-14 18:28:39 +08:00
// push the 'root' node for the level onto the local list
2022-10-22 18:41:00 +08:00
if ((ret = ole2_list_push(&node_list, prop_index)) != CL_SUCCESS) {
ole2_list_delete(&node_list);
return ret;
}
while (!ole2_list_is_empty(&node_list)) {
ole2_listmsg("within working loop, worklist size: %d\n", ole2_list_size(&node_list));
#if HAVE_JSON
if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
ole2_list_delete(&node_list);
return CL_ETIMEOUT;
}
#endif
current_block = hdr->prop_start;
2023-01-14 18:28:39 +08:00
// pop off a node to work on
2022-10-22 18:41:00 +08:00
curindex = ole2_list_pop(&node_list);
ole2_listmsg("current index: %d\n", curindex);
if ((curindex < 0) || (curindex > (int32_t)hdr->max_block_no)) {
continue;
}
2023-01-14 18:28:39 +08:00
// read in the sector referenced by the current index
2022-10-22 18:41:00 +08:00
idx = curindex / 4;
for (i = 0; i < idx; i++) {
current_block = ole2_get_next_block_number(hdr, current_block);
if (current_block < 0) {
continue;
}
}
idx = curindex % 4;
if (!ole2_read_block(hdr, prop_block, 512, current_block)) {
continue;
}
if (prop_block[idx].type <= 0) {
continue;
}
ole2_listmsg("reading prop block\n");
prop_block[idx].name_size = ole2_endian_convert_16(prop_block[idx].name_size);
prop_block[idx].prev = ole2_endian_convert_32(prop_block[idx].prev);
prop_block[idx].next = ole2_endian_convert_32(prop_block[idx].next);
prop_block[idx].child = ole2_endian_convert_32(prop_block[idx].child);
prop_block[idx].user_flags = ole2_endian_convert_32(prop_block[idx].user_flags);
prop_block[idx].create_lowdate = ole2_endian_convert_32(prop_block[idx].create_lowdate);
prop_block[idx].create_highdate = ole2_endian_convert_32(prop_block[idx].create_highdate);
prop_block[idx].mod_lowdate = ole2_endian_convert_32(prop_block[idx].mod_lowdate);
prop_block[idx].mod_highdate = ole2_endian_convert_32(prop_block[idx].mod_highdate);
prop_block[idx].start_block = ole2_endian_convert_32(prop_block[idx].start_block);
prop_block[idx].size = ole2_endian_convert_32(prop_block[idx].size);
ole2_listmsg("printing ole2 property\n");
if (dir)
print_ole2_property(&prop_block[idx]);
ole2_listmsg("checking bitset\n");
/* Check we aren't in a loop */
if (cli_bitset_test(hdr->bitset, (unsigned long)curindex)) {
/* Loop in property tree detected */
cli_dbgmsg("OLE2: Property tree loop detected at index %d\n", curindex);
ole2_list_delete(&node_list);
return CL_BREAK;
}
ole2_listmsg("setting bitset\n");
if (!cli_bitset_set(hdr->bitset, (unsigned long)curindex)) {
continue;
}
ole2_listmsg("prev: %d next %d child %d\n", prop_block[idx].prev, prop_block[idx].next, prop_block[idx].child);
ole2_listmsg("node type: %d\n", prop_block[idx].type);
switch (prop_block[idx].type) {
case 5: /* Root Entry */
ole2_listmsg("root node\n");
if ((curindex != 0) || (rec_level != 0) ||
(*file_count != 0)) {
/* Can only have RootEntry as the top */
cli_dbgmsg("ERROR: illegal Root Entry\n");
continue;
}
hdr->sbat_root_start = prop_block[idx].start_block;
if ((int)(prop_block[idx].child) != -1) {
2023-01-14 18:28:39 +08:00
ret = ole2_walk_property_tree(hdr, dir, prop_block[idx].child, handler, rec_level + 1, file_count, ctx, scansize, handler_ctx);
2022-10-22 18:41:00 +08:00
if (ret != CL_SUCCESS) {
2023-01-14 18:28:39 +08:00
ole2_list_delete(&node_list);
return ret;
2022-10-22 18:41:00 +08:00
}
}
if ((int)(prop_block[idx].prev) != -1) {
if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) {
ole2_list_delete(&node_list);
return ret;
}
}
if ((int)(prop_block[idx].next) != -1) {
if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) {
ole2_list_delete(&node_list);
return ret;
}
}
break;
case 2: /* File */
ole2_listmsg("file node\n");
if (ctx && ctx->engine->maxfiles && ((*file_count > ctx->engine->maxfiles) || (ctx->scannedfiles > ctx->engine->maxfiles - *file_count))) {
cli_dbgmsg("OLE2: files limit reached (max: %u)\n", ctx->engine->maxfiles);
2023-01-14 18:28:39 +08:00
cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles");
2022-10-22 18:41:00 +08:00
ole2_list_delete(&node_list);
return CL_EMAXFILES;
}
if (!ctx || !(ctx->engine->maxfilesize) || prop_block[idx].size <= ctx->engine->maxfilesize || prop_block[idx].size <= *scansize) {
(*file_count)++;
*scansize -= prop_block[idx].size;
ole2_listmsg("running file handler\n");
2023-01-14 18:28:39 +08:00
ret = handler(hdr, &prop_block[idx], dir, ctx, handler_ctx);
2022-10-22 18:41:00 +08:00
if (ret != CL_SUCCESS) {
2023-01-14 18:28:39 +08:00
ole2_listmsg("file handler returned %d\n", ret);
ole2_list_delete(&node_list);
return ret;
2022-10-22 18:41:00 +08:00
}
} else {
cli_dbgmsg("OLE2: filesize exceeded\n");
}
if ((int)(prop_block[idx].child) != -1) {
2023-01-14 18:28:39 +08:00
ret = ole2_walk_property_tree(hdr, dir, prop_block[idx].child, handler, rec_level, file_count, ctx, scansize, handler_ctx);
2022-10-22 18:41:00 +08:00
if (ret != CL_SUCCESS) {
2023-01-14 18:28:39 +08:00
ole2_list_delete(&node_list);
return ret;
2022-10-22 18:41:00 +08:00
}
}
if ((int)(prop_block[idx].prev) != -1) {
if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) {
ole2_list_delete(&node_list);
return ret;
}
}
if ((int)(prop_block[idx].next) != -1) {
if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) {
ole2_list_delete(&node_list);
return ret;
}
}
break;
case 1: /* Directory */
ole2_listmsg("directory node\n");
if (dir) {
#if HAVE_JSON
if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
if (!json_object_object_get_ex(ctx->wrkproperty, "DigitalSignatures", NULL)) {
name = cli_ole2_get_property_name2(prop_block[idx].name, prop_block[idx].name_size);
if (name) {
if (!strcmp(name, "_xmlsignatures") || !strcmp(name, "_signatures")) {
cli_jsonbool(ctx->wrkproperty, "HasDigitalSignatures", 1);
}
free(name);
}
}
}
#endif
dirname = (char *)cli_malloc(strlen(dir) + 8);
if (!dirname) {
ole2_listmsg("OLE2: malloc failed for dirname\n");
ole2_list_delete(&node_list);
return CL_EMEM;
}
snprintf(dirname, strlen(dir) + 8, "%s" PATHSEP "%.6d", dir, curindex);
if (mkdir(dirname, 0700) != 0) {
ole2_listmsg("OLE2: mkdir failed for directory %s\n", dirname);
free(dirname);
ole2_list_delete(&node_list);
return CL_BREAK;
}
cli_dbgmsg("OLE2 dir entry: %s\n", dirname);
} else
dirname = NULL;
if ((int)(prop_block[idx].child) != -1) {
2023-01-14 18:28:39 +08:00
ret = ole2_walk_property_tree(hdr, dirname, prop_block[idx].child, handler, rec_level + 1, file_count, ctx, scansize, handler_ctx);
2022-10-22 18:41:00 +08:00
if (ret != CL_SUCCESS) {
2023-01-14 18:28:39 +08:00
ole2_list_delete(&node_list);
if (dirname) {
free(dirname);
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
return ret;
2022-10-22 18:41:00 +08:00
}
}
if (dirname) {
free(dirname);
dirname = NULL;
}
if ((int)(prop_block[idx].prev) != -1) {
if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) {
ole2_list_delete(&node_list);
return ret;
}
}
if ((int)(prop_block[idx].next) != -1) {
if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) {
ole2_list_delete(&node_list);
return ret;
}
}
break;
default:
cli_dbgmsg("ERROR: unknown OLE2 entry type: %d\n", prop_block[idx].type);
break;
}
ole2_listmsg("loop ended: %d %d\n", ole2_list_size(&node_list), ole2_list_is_empty(&node_list));
}
ole2_list_delete(&node_list);
2023-01-14 18:28:39 +08:00
return CL_SUCCESS;
2022-10-22 18:41:00 +08:00
}
/* Write file Handler - write the contents of the entry to a file */
2023-01-14 18:28:39 +08:00
static cl_error_t handler_writefile(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx)
2022-10-22 18:41:00 +08:00
{
2023-01-14 18:28:39 +08:00
cl_error_t ret = CL_BREAK;
char newname[1024];
char *name = NULL;
unsigned char *buff = NULL;
int32_t current_block = 0;
size_t len = 0, offset = 0;
int ofd = -1;
char *hash = NULL;
bitset_t *blk_bitset = NULL;
uint32_t cnt = 0;
2022-10-22 18:41:00 +08:00
UNUSEDPARAM(ctx);
2023-01-14 18:28:39 +08:00
UNUSEDPARAM(handler_ctx);
2022-10-22 18:41:00 +08:00
if (prop->type != 2) {
/* Not a file */
2023-01-14 18:28:39 +08:00
ret = CL_SUCCESS;
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
if (prop->name_size > 64) {
cli_dbgmsg("OLE2 [handler_writefile]: property name too long: %d\n", prop->name_size);
2023-01-14 18:28:39 +08:00
ret = CL_SUCCESS;
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
if (name) {
cli_dbgmsg("Storing %s in uniq\n", name);
if (CL_SUCCESS != uniq_add(hdr->U, name, strlen(name), &hash, &cnt)) {
cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
} else {
if (CL_SUCCESS != uniq_add(hdr->U, NULL, 0, &hash, &cnt)) {
cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
snprintf(newname, sizeof(newname), "%s" PATHSEP "%s_%u", dir, hash, cnt);
newname[sizeof(newname) - 1] = '\0';
cli_dbgmsg("OLE2 [handler_writefile]: Dumping '%s' to '%s'\n", name ? name : "<empty>", newname);
ofd = open(newname, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
if (ofd < 0) {
cli_errmsg("OLE2 [handler_writefile]: failed to create file: %s\n", newname);
2023-01-14 18:28:39 +08:00
ret = CL_SUCCESS;
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
current_block = prop->start_block;
len = prop->size;
2023-01-14 18:28:39 +08:00
CLI_MALLOC(buff, 1 << hdr->log2_big_block_size,
cli_errmsg("OLE2 [handler_writefile]: Unable to allocate memory for buff: %u\n", 1 << hdr->log2_big_block_size);
ret = CL_EMEM);
2022-10-22 18:41:00 +08:00
blk_bitset = cli_bitset_init();
if (!blk_bitset) {
cli_errmsg("OLE2 [handler_writefile]: init bitset failed\n");
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
while ((current_block >= 0) && (len > 0)) {
if (current_block > (int32_t)hdr->max_block_no) {
cli_dbgmsg("OLE2 [handler_writefile]: Max block number for file size exceeded: %d\n", current_block);
2023-01-14 18:28:39 +08:00
break;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
/* Check we aren't in a loop */
if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) {
/* Loop in block list */
cli_dbgmsg("OLE2 [handler_writefile]: Block list loop detected\n");
2023-01-14 18:28:39 +08:00
break;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) {
2023-01-14 18:28:39 +08:00
break;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
if (prop->size < (int64_t)hdr->sbat_cutoff) {
/* Small block file */
if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
cli_dbgmsg("OLE2 [handler_writefile]: ole2_get_sbat_data_block failed\n");
2023-01-14 18:28:39 +08:00
break;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
/* buff now contains the block with N small blocks in it */
2023-01-14 18:28:39 +08:00
offset = (((size_t)1) << hdr->log2_small_block_size) * (((size_t)current_block) % (((size_t)1) << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
2022-10-22 18:41:00 +08:00
if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) {
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
len -= MIN(len, 1 << hdr->log2_small_block_size);
current_block = ole2_get_next_sbat_block(hdr, current_block);
} else {
/* Big block file */
if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) {
2023-01-14 18:28:39 +08:00
break;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
if (cli_writen(ofd, buff, MIN(len, (1 << hdr->log2_big_block_size))) != MIN(len, (1 << hdr->log2_big_block_size))) {
2023-01-14 18:28:39 +08:00
ret = CL_EWRITE;
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
current_block = ole2_get_next_block_number(hdr, current_block);
len -= MIN(len, (1 << hdr->log2_big_block_size));
}
}
2023-01-14 18:28:39 +08:00
/*
* Unlike w/ handler_otf(), the ole2 summary JSON will be recorded
* when we re-ingest the files we wrote above when we scan the directory.
* See cli_ole2_tempdir_scan_vba()
*/
ret = CL_SUCCESS;
done:
FREE(name);
if (-1 != ofd) {
close(ofd);
}
FREE(buff);
if (NULL != blk_bitset) {
cli_bitset_free(blk_bitset);
}
return ret;
2022-10-22 18:41:00 +08:00
}
enum biff_parser_states {
BIFF_PARSER_INITIAL,
BIFF_PARSER_EXPECTING_2ND_TAG_BYTE,
BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE,
BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE,
BIFF_PARSER_NAME_RECORD,
BIFF_PARSER_BOUNDSHEET_RECORD,
2023-01-14 18:28:39 +08:00
BIFF_PARSER_MSODRAWINGGROUP_RECORD,
2022-10-22 18:41:00 +08:00
BIFF_PARSER_DATA,
};
struct biff_parser_state {
enum biff_parser_states state;
uint16_t opcode;
uint16_t length;
uint16_t data_offset;
uint8_t tmp;
};
/**
* Scan through a buffer of BIFF records and find PARSERNAME, BOUNDSHEET records (Which indicate XLM macros).
* BIFF streams follow the format OOLLDDDDDDDDD..., where OO is the opcode (little endian 16 bit value),
* LL is the data length (little endian 16 bit value), followed by LL bytes of data. Records are defined in
* the MICROSOFT OFFICE EXCEL 97-2007 BINARY FILE FORMAT SPECIFICATION.
*
* \param state The parser state.
* \param buff The buffer.
* \param len The buffer's size in bytes.
* \param ctx The ClamAV context for emitting JSON about the document.
* \returns true if a macro has been found, false otherwise.
*/
2023-01-14 18:28:39 +08:00
static cl_error_t scan_biff_for_xlm_macros_and_images(
struct biff_parser_state *state,
unsigned char *buff,
size_t len,
cli_ctx *ctx,
bool *found_macro,
bool *found_image)
2022-10-22 18:41:00 +08:00
{
2023-01-14 18:28:39 +08:00
cl_error_t status = CL_EFORMAT;
2022-10-22 18:41:00 +08:00
size_t i;
for (i = 0; i < len; ++i) {
switch (state->state) {
case BIFF_PARSER_INITIAL:
state->opcode = buff[i];
state->state = BIFF_PARSER_EXPECTING_2ND_TAG_BYTE;
break;
case BIFF_PARSER_EXPECTING_2ND_TAG_BYTE:
state->opcode |= buff[i] << 8;
state->state = BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE;
break;
case BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE:
state->length = buff[i];
state->state = BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE;
break;
case BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE:
state->length |= buff[i] << 8;
state->data_offset = 0;
switch (state->opcode) {
2023-01-14 18:28:39 +08:00
case OPC_BOUNDSHEET:
2022-10-22 18:41:00 +08:00
state->state = BIFF_PARSER_BOUNDSHEET_RECORD;
break;
2023-01-14 18:28:39 +08:00
case OPC_NAME:
2022-10-22 18:41:00 +08:00
state->state = BIFF_PARSER_NAME_RECORD;
break;
2023-01-14 18:28:39 +08:00
case OPC_MSODRAWINGGROUP:
state->state = BIFF_PARSER_MSODRAWINGGROUP_RECORD;
break;
2022-10-22 18:41:00 +08:00
default:
state->state = BIFF_PARSER_DATA;
break;
}
if (state->length == 0) {
state->state = BIFF_PARSER_INITIAL;
}
break;
default:
switch (state->state) {
case BIFF_PARSER_NAME_RECORD:
#if HAVE_JSON
if (state->data_offset == 0) {
state->tmp = buff[i] & 0x20;
} else if ((state->data_offset == 14 || state->data_offset == 15) && state->tmp) {
if (buff[i] == 1 || buff[i] == 2) {
if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
json_object *indicators = cli_jsonarray(ctx->wrkproperty, "MacroIndicators");
if (indicators) {
cli_jsonstr(indicators, NULL, "autorun");
} else {
2023-01-14 18:28:39 +08:00
cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Failed to add \"autorun\" entry to MacroIndicators JSON array\n");
2022-10-22 18:41:00 +08:00
}
}
}
if (buff[i] != 0) {
state->tmp = 0;
}
}
#endif
break;
case BIFF_PARSER_BOUNDSHEET_RECORD:
if (state->data_offset == 4) {
state->tmp = buff[i];
2023-01-14 18:28:39 +08:00
} else if (state->data_offset == 5 && buff[i] == 1) { // Excel 4.0 macro sheet
cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Found XLM macro sheet\n");
2022-10-22 18:41:00 +08:00
#if HAVE_JSON
if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
cli_jsonbool(ctx->wrkproperty, "HasMacros", 1);
json_object *macro_languages = cli_jsonarray(ctx->wrkproperty, "MacroLanguages");
if (macro_languages) {
cli_jsonstr(macro_languages, NULL, "XLM");
} else {
2023-01-14 18:28:39 +08:00
cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Failed to add \"XLM\" entry to MacroLanguages JSON array\n");
2022-10-22 18:41:00 +08:00
}
if (state->tmp == 1 || state->tmp == 2) {
json_object *indicators = cli_jsonarray(ctx->wrkproperty, "MacroIndicators");
if (indicators) {
cli_jsonstr(indicators, NULL, "hidden");
} else {
2023-01-14 18:28:39 +08:00
cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Failed to add \"hidden\" entry to MacroIndicators JSON array\n");
2022-10-22 18:41:00 +08:00
}
}
}
#endif
2023-01-14 18:28:39 +08:00
*found_macro = true;
2022-10-22 18:41:00 +08:00
}
break;
case BIFF_PARSER_DATA:
break;
2023-01-14 18:28:39 +08:00
case BIFF_PARSER_MSODRAWINGGROUP_RECORD:
// Embedded image found
if (true != *found_image) {
*found_image = true;
cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Found image in sheet\n");
}
break;
2022-10-22 18:41:00 +08:00
default:
2023-01-14 18:28:39 +08:00
// Should never arrive here
cli_dbgmsg("[scan_biff_for_xlm_macros_and_images] Unexpected state value %d\n", (int)state->state);
2022-10-22 18:41:00 +08:00
break;
}
state->data_offset += 1;
if (state->data_offset >= state->length) {
state->state = BIFF_PARSER_INITIAL;
}
}
}
2023-01-14 18:28:39 +08:00
status = CL_SUCCESS;
return status;
2022-10-22 18:41:00 +08:00
}
/**
2023-01-14 18:28:39 +08:00
* @brief Scan for XLM (Excel 4.0) macro sheets and images in an OLE2 Workbook stream.
*
2022-10-22 18:41:00 +08:00
* The stream should be encoded with <= BIFF8.
2023-01-14 18:28:39 +08:00
* The found_macro and found_image out-params should be checked even if an error occured.
*
* @param hdr
* @param prop
* @param ctx
* @param found_macro [out] If any macros were found
* @param found_image [out] If any images were found
* @return cl_error_t CL_EPARSE if an error was encountered
* @return cl_error_t CL_EMEM if a memory issue was encountered.
* @return cl_error_t CL_SUCCESS if no errors were encountered.
2022-10-22 18:41:00 +08:00
*/
2023-01-14 18:28:39 +08:00
static cl_error_t scan_for_xlm_macros_and_images(ole2_header_t *hdr, property_t *prop, cli_ctx *ctx, bool *found_macro, bool *found_image)
2022-10-22 18:41:00 +08:00
{
2023-01-14 18:28:39 +08:00
cl_error_t status = CL_EPARSE;
unsigned char *buff = NULL;
int32_t current_block = 0;
size_t len = 0, offset = 0;
bitset_t *blk_bitset = NULL;
struct biff_parser_state state = {0};
2022-10-22 18:41:00 +08:00
if (prop->type != 2) {
/* Not a file */
goto done;
}
2023-01-14 18:28:39 +08:00
memset(&state, 0, sizeof(state));
2022-10-22 18:41:00 +08:00
state.state = BIFF_PARSER_INITIAL;
current_block = prop->start_block;
len = prop->size;
2023-01-14 18:28:39 +08:00
CLI_MALLOC(buff, 1 << hdr->log2_big_block_size,
cli_errmsg("OLE2 [scan_for_xlm_macros_and_images]: Unable to allocate memory for buff: %u\n", 1 << hdr->log2_big_block_size);
status = CL_EMEM);
2022-10-22 18:41:00 +08:00
blk_bitset = cli_bitset_init();
if (!blk_bitset) {
2023-01-14 18:28:39 +08:00
cli_errmsg("OLE2 [scan_for_xlm_macros_and_images]: init bitset failed\n");
2022-10-22 18:41:00 +08:00
goto done;
}
while ((current_block >= 0) && (len > 0)) {
if (current_block > (int32_t)hdr->max_block_no) {
2023-01-14 18:28:39 +08:00
cli_dbgmsg("OLE2 [scan_for_xlm_macros_and_images]: Max block number for file size exceeded: %d\n", current_block);
2022-10-22 18:41:00 +08:00
goto done;
}
/* Check we aren't in a loop */
if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) {
/* Loop in block list */
2023-01-14 18:28:39 +08:00
cli_dbgmsg("OLE2 [scan_for_xlm_macros_and_images]: Block list loop detected\n");
2022-10-22 18:41:00 +08:00
goto done;
}
if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) {
goto done;
}
if (prop->size < (int64_t)hdr->sbat_cutoff) {
/* Small block file */
if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
2023-01-14 18:28:39 +08:00
cli_dbgmsg("OLE2 [scan_for_xlm_macros_and_images]: ole2_get_sbat_data_block failed\n");
2022-10-22 18:41:00 +08:00
goto done;
}
/* buff now contains the block with N small blocks in it */
offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
2023-01-14 18:28:39 +08:00
(void)scan_biff_for_xlm_macros_and_images(&state, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size), ctx, found_macro, found_image);
2022-10-22 18:41:00 +08:00
len -= MIN(len, 1 << hdr->log2_small_block_size);
current_block = ole2_get_next_sbat_block(hdr, current_block);
} else {
/* Big block file */
if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) {
goto done;
}
2023-01-14 18:28:39 +08:00
(void)scan_biff_for_xlm_macros_and_images(&state, buff, MIN(len, (1 << hdr->log2_big_block_size)), ctx, found_macro, found_image);
2022-10-22 18:41:00 +08:00
current_block = ole2_get_next_block_number(hdr, current_block);
len -= MIN(len, (1 << hdr->log2_big_block_size));
}
}
2023-01-14 18:28:39 +08:00
status = CL_SUCCESS;
2022-10-22 18:41:00 +08:00
done:
2023-01-14 18:28:39 +08:00
FREE(buff);
2022-10-22 18:41:00 +08:00
if (blk_bitset) {
cli_bitset_free(blk_bitset);
}
2023-01-14 18:28:39 +08:00
return status;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
/**
* @brief enum file Handler - checks for VBA presence
*
* @param hdr
* @param prop
* @param dir
* @param ctx the scan context
* @return cl_error_t
*/
static cl_error_t handler_enum(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx)
2022-10-22 18:41:00 +08:00
{
2023-01-14 18:28:39 +08:00
cl_error_t status = CL_EREAD;
char *name = NULL;
unsigned char *hwp_check = NULL;
int32_t offset = 0;
2022-10-22 18:41:00 +08:00
#if HAVE_JSON
2023-01-14 18:28:39 +08:00
json_object *arrobj = NULL;
json_object *strmobj = NULL;
UNUSEDPARAM(handler_ctx);
2022-10-22 18:41:00 +08:00
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
if (name) {
if (SCAN_COLLECT_METADATA && ctx->wrkproperty != NULL) {
arrobj = cli_jsonarray(ctx->wrkproperty, "Streams");
if (NULL == arrobj) {
cli_warnmsg("ole2: no memory for streams list or streams is not an array\n");
} else {
strmobj = json_object_new_string(name);
json_object_array_add(arrobj, strmobj);
}
if (!strcmp(name, "powerpoint document")) {
cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSPPT");
}
if (!strcmp(name, "worddocument")) {
cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSWORD");
}
if (!strcmp(name, "workbook")) {
cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSXL");
}
}
}
#else
UNUSEDPARAM(ctx);
#endif
UNUSEDPARAM(dir);
if (!hdr->has_vba) {
if (!name)
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
if (name) {
if (!strcmp(name, "_vba_project") || !strcmp(name, "powerpoint document") || !strcmp(name, "worddocument") || !strcmp(name, "_1_ole10native"))
hdr->has_vba = 1;
}
}
/*
* if we can find a root entry fileheader, it may be a HWP file
* identify the HWP signature "HWP Document File" at offset 0 stream
*/
if (!hdr->is_hwp) {
2023-01-14 18:28:39 +08:00
if (!name) {
2022-10-22 18:41:00 +08:00
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
if (name) {
if (!strcmp(name, "fileheader")) {
2023-01-14 18:28:39 +08:00
CLI_CALLOC(hwp_check, 1, 1 << hdr->log2_big_block_size, status = CL_EMEM);
2022-10-22 18:41:00 +08:00
/* reading safety checks; do-while used for breaks */
do {
if (prop->size == 0)
break;
if (prop->start_block > hdr->max_block_no)
break;
/* read the header block (~256 bytes) */
offset = 0;
if (prop->size < (int64_t)hdr->sbat_cutoff) {
if (!ole2_get_sbat_data_block(hdr, hwp_check, prop->start_block)) {
break;
}
offset = (1 << hdr->log2_small_block_size) *
(prop->start_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
/* reading safety */
if (offset + 40 >= 1 << hdr->log2_big_block_size)
break;
} else {
if (!ole2_read_block(hdr, hwp_check, 1 << hdr->log2_big_block_size, prop->start_block)) {
break;
}
}
/* compare against HWP signature; we could add the 15 padding NULLs too */
if (!memcmp(hwp_check + offset, "HWP Document File", 17)) {
hwp5_header_t *hwp_new;
#if HAVE_JSON
cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_HWP5");
#endif
2023-01-14 18:28:39 +08:00
CLI_CALLOC(hwp_new, 1, sizeof(hwp5_header_t), status = CL_EMEM);
2022-10-22 18:41:00 +08:00
2023-01-14 18:28:39 +08:00
/*
* Copy the header information into our header struct.
*/
2022-10-22 18:41:00 +08:00
memcpy(hwp_new, hwp_check + offset, sizeof(hwp5_header_t));
hwp_new->version = ole2_endian_convert_32(hwp_new->version);
hwp_new->flags = ole2_endian_convert_32(hwp_new->flags);
hdr->is_hwp = hwp_new;
}
} while (0);
}
}
}
2023-01-14 18:28:39 +08:00
/* If we've already found a macro and an image, we can skip this initial check.
This scan step is to save a little time so we don't have to fully parse it
later if never find anything.. */
if (!hdr->has_xlm || !hdr->has_image) {
2022-10-22 18:41:00 +08:00
if (!name) {
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
}
if (name && (strcmp(name, "workbook") == 0 || strcmp(name, "book") == 0)) {
2023-01-14 18:28:39 +08:00
(void)scan_for_xlm_macros_and_images(hdr, prop, ctx, &hdr->has_xlm, &hdr->has_image);
2022-10-22 18:41:00 +08:00
}
}
2023-01-14 18:28:39 +08:00
status = CL_SUCCESS;
done:
FREE(name);
FREE(hwp_check);
return status;
2022-10-22 18:41:00 +08:00
}
static int
likely_mso_stream(int fd)
{
off_t fsize;
unsigned char check[2];
fsize = lseek(fd, 0, SEEK_END);
if (fsize == -1) {
cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
return 0;
} else if (fsize < 6) {
return 0;
}
if (lseek(fd, 4, SEEK_SET) == -1) {
cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
return 0;
}
if (cli_readn(fd, check, 2) != 2) {
cli_dbgmsg("likely_mso_stream: reading from fd failed\n");
return 0;
}
if (check[0] == 0x78 && check[1] == 0x9C)
return 1;
return 0;
}
static cl_error_t scan_mso_stream(int fd, cli_ctx *ctx)
{
int zret, ofd;
cl_error_t ret = CL_SUCCESS;
fmap_t *input;
off_t off_in = 0;
size_t count, outsize = 0;
z_stream zstrm;
char *tmpname;
uint32_t prefix;
unsigned char inbuf[FILEBUFF], outbuf[FILEBUFF];
/* fmap the input file for easier manipulation */
if (fd < 0) {
cli_dbgmsg("scan_mso_stream: Invalid file descriptor argument\n");
return CL_ENULLARG;
} else {
STATBUF statbuf;
if (FSTAT(fd, &statbuf) == -1) {
cli_dbgmsg("scan_mso_stream: Can't stat file descriptor\n");
return CL_ESTAT;
}
input = fmap(fd, 0, statbuf.st_size, NULL);
if (!input) {
cli_dbgmsg("scan_mso_stream: Failed to get fmap for input stream\n");
return CL_EMAP;
}
}
/* reserve tempfile for output and scanning */
if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &ofd)) != CL_SUCCESS) {
cli_errmsg("scan_mso_stream: Can't generate temporary file\n");
funmap(input);
return ret;
}
/* initialize zlib inflation stream */
memset(&zstrm, 0, sizeof(zstrm));
zstrm.zalloc = Z_NULL;
zstrm.zfree = Z_NULL;
zstrm.opaque = Z_NULL;
zstrm.next_in = inbuf;
zstrm.next_out = outbuf;
zstrm.avail_in = 0;
zstrm.avail_out = FILEBUFF;
zret = inflateInit(&zstrm);
if (zret != Z_OK) {
cli_dbgmsg("scan_mso_stream: Can't initialize zlib inflation stream\n");
ret = CL_EUNPACK;
goto mso_end;
}
/* extract 32-bit prefix */
if (fmap_readn(input, &prefix, off_in, sizeof(prefix)) != sizeof(prefix)) {
cli_dbgmsg("scan_mso_stream: Can't extract 4-byte prefix\n");
ret = CL_EREAD;
goto mso_end;
}
/* RFC1952 says numbers are stored with least significant byte first */
prefix = le32_to_host(prefix);
off_in += sizeof(uint32_t);
cli_dbgmsg("scan_mso_stream: stream prefix = %08x(%d)\n", prefix, prefix);
/* inflation loop */
do {
if (zstrm.avail_in == 0) {
size_t bytes_read;
zstrm.next_in = inbuf;
bytes_read = fmap_readn(input, inbuf, off_in, FILEBUFF);
if (bytes_read == (size_t)-1) {
cli_errmsg("scan_mso_stream: Error reading MSO file\n");
ret = CL_EUNPACK;
goto mso_end;
}
if (bytes_read == 0)
break;
zstrm.avail_in = bytes_read;
off_in += bytes_read;
}
zret = inflate(&zstrm, Z_SYNC_FLUSH);
count = FILEBUFF - zstrm.avail_out;
if (count) {
if (cli_checklimits("MSO", ctx, outsize + count, 0, 0) != CL_SUCCESS)
break;
if (cli_writen(ofd, outbuf, count) != count) {
cli_errmsg("scan_mso_stream: Can't write to file %s\n", tmpname);
ret = CL_EWRITE;
goto mso_end;
}
outsize += count;
}
zstrm.next_out = outbuf;
zstrm.avail_out = FILEBUFF;
} while (zret == Z_OK);
/* post inflation checks */
if (zret != Z_STREAM_END && zret != Z_OK) {
if (outsize == 0) {
cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. No data decompressed.\n");
ret = CL_EUNPACK;
goto mso_end;
}
cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. Scanning what was decompressed.\n");
}
cli_dbgmsg("scan_mso_stream: Decompressed %llu bytes to %s\n", (long long unsigned)outsize, tmpname);
if (outsize != prefix) {
cli_warnmsg("scan_mso_stream: declared prefix != inflated stream size, %llu != %llu\n",
(long long unsigned)prefix, (long long unsigned)outsize);
} else {
cli_dbgmsg("scan_mso_stream: declared prefix == inflated stream size, %llu == %llu\n",
(long long unsigned)prefix, (long long unsigned)outsize);
}
/* scanning inflated stream */
2023-01-14 18:28:39 +08:00
ret = cli_magic_scan_desc(ofd, tmpname, ctx, NULL, LAYER_ATTRIBUTES_NONE);
2022-10-22 18:41:00 +08:00
/* clean-up */
mso_end:
zret = inflateEnd(&zstrm);
if (zret != Z_OK)
ret = CL_EUNPACK;
close(ofd);
if (!ctx->engine->keeptmp)
if (cli_unlink(tmpname))
ret = CL_EUNLINK;
free(tmpname);
funmap(input);
return ret;
}
2023-01-14 18:28:39 +08:00
static cl_error_t handler_otf(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx)
2022-10-22 18:41:00 +08:00
{
2023-01-14 18:28:39 +08:00
cl_error_t ret = CL_BREAK;
char *tempfile = NULL;
char *name = NULL;
unsigned char *buff = NULL;
int32_t current_block = 0;
size_t len = 0, offset = 0;
int ofd = -1;
int is_mso = 0;
bitset_t *blk_bitset = NULL;
2022-10-22 18:41:00 +08:00
UNUSEDPARAM(dir);
2023-01-14 18:28:39 +08:00
UNUSEDPARAM(handler_ctx);
2022-10-22 18:41:00 +08:00
if (prop->type != 2) {
/* Not a file */
2023-01-14 18:28:39 +08:00
ret = CL_SUCCESS;
goto done;
2022-10-22 18:41:00 +08:00
}
print_ole2_property(prop);
2023-01-14 18:28:39 +08:00
if (!(tempfile = cli_gentemp(ctx->sub_tmpdir))) {
ret = CL_EMEM;
goto done;
}
2022-10-22 18:41:00 +08:00
if ((ofd = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
2023-01-14 18:28:39 +08:00
cli_dbgmsg("OLE2 [handler_otf]: Can't create file %s\n", tempfile);
ret = CL_ECREAT;
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
current_block = prop->start_block;
len = prop->size;
if (cli_debug_flag) {
2023-01-14 18:28:39 +08:00
if (!name) {
2022-10-22 18:41:00 +08:00
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
cli_dbgmsg("OLE2 [handler_otf]: Dumping '%s' to '%s'\n", name, tempfile);
}
2023-01-14 18:28:39 +08:00
CLI_MALLOC(buff, 1 << hdr->log2_big_block_size, ret = CL_EMEM);
2022-10-22 18:41:00 +08:00
2023-01-14 18:28:39 +08:00
blk_bitset = cli_bitset_init();
2022-10-22 18:41:00 +08:00
if (!blk_bitset) {
2023-01-14 18:28:39 +08:00
cli_errmsg("OLE2 [handler_otf]: init bitset failed\n");
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
while ((current_block >= 0) && (len > 0)) {
if (current_block > (int32_t)hdr->max_block_no) {
2023-01-14 18:28:39 +08:00
cli_dbgmsg("OLE2 [handler_otf]: Max block number for file size exceeded: %d\n", current_block);
2022-10-22 18:41:00 +08:00
break;
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
/* Check we aren't in a loop */
if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) {
/* Loop in block list */
2023-01-14 18:28:39 +08:00
cli_dbgmsg("OLE2 [handler_otf]: Block list loop detected\n");
2022-10-22 18:41:00 +08:00
break;
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) {
break;
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
if (prop->size < (int64_t)hdr->sbat_cutoff) {
/* Small block file */
if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
2023-01-14 18:28:39 +08:00
cli_dbgmsg("OLE2 [handler_otf]: ole2_get_sbat_data_block failed\n");
2022-10-22 18:41:00 +08:00
break;
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
/* buff now contains the block with N small blocks in it */
offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) {
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
len -= MIN(len, 1 << hdr->log2_small_block_size);
current_block = ole2_get_next_sbat_block(hdr, current_block);
} else {
/* Big block file */
if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) {
break;
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
if (cli_writen(ofd, buff, MIN(len, (1 << hdr->log2_big_block_size))) != MIN(len, (1 << hdr->log2_big_block_size))) {
2023-01-14 18:28:39 +08:00
ret = CL_EWRITE;
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
current_block = ole2_get_next_block_number(hdr, current_block);
len -= MIN(len, (1 << hdr->log2_big_block_size));
}
}
/* defragmenting of ole2 stream complete */
is_mso = likely_mso_stream(ofd);
if (lseek(ofd, 0, SEEK_SET) == -1) {
2023-01-14 18:28:39 +08:00
ret = CL_ESEEK;
goto done;
2022-10-22 18:41:00 +08:00
}
#if HAVE_JSON
/* JSON Output Summary Information */
if (SCAN_COLLECT_METADATA && (ctx->properties != NULL)) {
2023-01-14 18:28:39 +08:00
if (!name) {
2022-10-22 18:41:00 +08:00
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
if (name) {
if (!strncmp(name, "_5_summaryinformation", 21)) {
cli_dbgmsg("OLE2: detected a '_5_summaryinformation' stream\n");
/* JSONOLE2 - what to do if something breaks? */
if (cli_ole2_summary_json(ctx, ofd, 0) == CL_ETIMEOUT) {
2023-01-14 18:28:39 +08:00
ret = CL_ETIMEOUT;
goto done;
2022-10-22 18:41:00 +08:00
}
}
2023-01-14 18:28:39 +08:00
2022-10-22 18:41:00 +08:00
if (!strncmp(name, "_5_documentsummaryinformation", 29)) {
cli_dbgmsg("OLE2: detected a '_5_documentsummaryinformation' stream\n");
/* JSONOLE2 - what to do if something breaks? */
if (cli_ole2_summary_json(ctx, ofd, 1) == CL_ETIMEOUT) {
2023-01-14 18:28:39 +08:00
ret = CL_ETIMEOUT;
goto done;
2022-10-22 18:41:00 +08:00
}
}
}
}
#endif
if (hdr->is_hwp) {
2023-01-14 18:28:39 +08:00
if (!name) {
2022-10-22 18:41:00 +08:00
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
ret = cli_scanhwp5_stream(ctx, hdr->is_hwp, name, ofd, tempfile);
} else if (is_mso < 0) {
ret = CL_ESEEK;
} else if (is_mso) {
/* MSO Stream Scan */
ret = scan_mso_stream(ofd, ctx);
} else {
/* Normal File Scan */
2023-01-14 18:28:39 +08:00
ret = cli_magic_scan_desc(ofd, tempfile, ctx, NULL, LAYER_ATTRIBUTES_NONE);
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
ret = ret == CL_VIRUS ? CL_VIRUS : CL_SUCCESS;
2022-10-22 18:41:00 +08:00
2023-01-14 18:28:39 +08:00
done:
FREE(name);
if (-1 != ofd) {
close(ofd);
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
FREE(buff);
if (NULL != blk_bitset) {
cli_bitset_free(blk_bitset);
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
if (NULL != tempfile) {
if (!ctx->engine->keeptmp) {
if (cli_unlink(tempfile)) {
ret = CL_EUNLINK;
}
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
free(tempfile);
tempfile = NULL;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
return ret;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
/*
* @brief Extracts encrypted files.
* @param hdr ole2_header_t structure
* @param prop property_t structure (DirectoryEntry)
* @param dir dir pointer. Unused by this function
* @param ctx cli_ctx
* @param handler_ctx handler context. For this function, it is the encryption key
* initialized by 'initialize_encryption_key'
* @return Success or failure depending on whether validation was successful.
*
* For more information, see below
* https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/e5ad39b8-9bc1-4a19-bad3-44e6246d21e6
*/
static cl_error_t handler_otf_encrypted(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx, void *handler_ctx)
2022-10-22 18:41:00 +08:00
{
2023-01-14 18:28:39 +08:00
cl_error_t ret = CL_BREAK;
char *tempfile = NULL;
char *name = NULL;
uint8_t *buff = NULL;
int32_t current_block = 0;
size_t len = 0;
size_t offset = 0;
int ofd = -1;
int is_mso = 0;
bitset_t *blk_bitset = NULL;
int nrounds = 0;
uint8_t *decryptDst = NULL;
encryption_key_t *key = (encryption_key_t *)handler_ctx;
uint64_t *rk = NULL;
uint32_t bytesRead = 0;
uint64_t actualFileLength;
uint64_t bytesWritten = 0;
uint32_t leftover = 0;
uint32_t readIdx = 0;
2022-10-22 18:41:00 +08:00
2023-01-14 18:28:39 +08:00
UNUSEDPARAM(dir);
2022-10-22 18:41:00 +08:00
2023-01-14 18:28:39 +08:00
if (NULL == key) {
cli_errmsg("%s::%d::key NULL\n", __FUNCTION__, __LINE__);
goto done;
}
2022-10-22 18:41:00 +08:00
2023-01-14 18:28:39 +08:00
if (prop->type != 2) {
/* Not a file */
ret = CL_SUCCESS;
goto done;
}
2022-10-22 18:41:00 +08:00
2023-01-14 18:28:39 +08:00
CLI_MALLOC(rk, RKLENGTH(key->key_length_bits) * sizeof(uint64_t), ret = CL_EMEM);
2022-10-22 18:41:00 +08:00
2023-01-14 18:28:39 +08:00
print_ole2_property(prop);
nrounds = rijndaelSetupDecrypt(rk, key->key, key->key_length_bits);
if (!(tempfile = cli_gentemp(ctx->sub_tmpdir))) {
ret = CL_EMEM;
goto done;
}
if ((ofd = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
cli_dbgmsg("OLE2 [handler_otf]: Can't create file %s\n", tempfile);
ret = CL_ECREAT;
goto done;
}
current_block = prop->start_block;
len = prop->size;
if (cli_debug_flag) {
if (!name) {
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
}
cli_dbgmsg("OLE2 [handler_otf]: Dumping '%s' to '%s'\n", name, tempfile);
}
uint32_t blockSize = 1 << hdr->log2_big_block_size;
CLI_MALLOC(buff, blockSize + sizeof(uint64_t), ret = CL_EMEM);
CLI_MALLOC(decryptDst, blockSize, ret = CL_EMEM);
blk_bitset = cli_bitset_init();
if (!blk_bitset) {
cli_errmsg("OLE2 [handler_otf]: init bitset failed\n");
goto done;
}
while (bytesRead < len) {
if (current_block > (int32_t)hdr->max_block_no) {
cli_dbgmsg("OLE2 [handler_otf]: Max block number for file size exceeded: %d\n", current_block);
break;
}
/* Check we aren't in a loop */
if (cli_bitset_test(blk_bitset, (uint64_t)current_block)) {
/* Loop in block list */
cli_dbgmsg("OLE2 [handler_otf]: Block list loop detected\n");
break;
}
if (!cli_bitset_set(blk_bitset, (uint64_t)current_block)) {
break;
}
if (prop->size < (int64_t)hdr->sbat_cutoff) {
/* Small block file */
if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
cli_dbgmsg("OLE2 [handler_otf]: ole2_get_sbat_data_block failed\n");
break;
}
/* buff now contains the block with N small blocks in it */
offset = (((size_t)1) << hdr->log2_small_block_size) * (((size_t)current_block) % (((size_t)1) << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) {
goto done;
}
len -= MIN(len, 1 << hdr->log2_small_block_size);
current_block = ole2_get_next_sbat_block(hdr, current_block);
// These small block files don't seem to be encrypted.
} else {
uint32_t bytesToWrite = MIN(len - bytesRead, blockSize);
uint32_t writeIdx = 0;
uint32_t decryptDstIdx = 0;
if (!ole2_read_block(hdr, &(buff[readIdx]), blockSize, current_block)) {
break;
}
if (0 == bytesRead) {
// first block. account for size of file.
writeIdx += sizeof(uint64_t);
memcpy(&actualFileLength, buff, sizeof(actualFileLength));
actualFileLength = ole2_endian_convert_64(actualFileLength);
}
bytesRead += blockSize;
for (; writeIdx <= (leftover + bytesToWrite) - 16; writeIdx += 16, decryptDstIdx += 16) {
rijndaelDecrypt(rk, nrounds, &(buff[writeIdx]), &(decryptDst[decryptDstIdx]));
}
/*Since our buffer size is a power of 2, leftover should always be
* either 0 or 8, but we have to decrypt in multiples of 16.*/
if (((leftover + bytesToWrite) - writeIdx) > 8) {
goto done;
}
/*Make sure we don't write more data than the file is actually supposed to be.*/
if ((decryptDstIdx + bytesWritten) > actualFileLength) {
decryptDstIdx = actualFileLength - bytesWritten;
}
if (cli_writen(ofd, decryptDst, decryptDstIdx) != decryptDstIdx) {
cli_errmsg("ole2: Error writing to file '%s'\n", tempfile);
goto done;
}
bytesWritten += decryptDstIdx;
leftover = (leftover + bytesToWrite) - writeIdx;
if (leftover) {
memmove(buff, &(buff[writeIdx]), leftover);
}
readIdx = leftover;
current_block = ole2_get_next_block_number(hdr, current_block);
}
}
/* defragmenting of ole2 stream complete */
is_mso = likely_mso_stream(ofd);
if (lseek(ofd, 0, SEEK_SET) == -1) {
ret = CL_ESEEK;
goto done;
}
#if HAVE_JSON
/* JSON Output Summary Information */
if (SCAN_COLLECT_METADATA && (ctx->properties != NULL)) {
if (!name) {
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
}
if (name) {
if (!strncmp(name, "_5_summaryinformation", 21)) {
cli_dbgmsg("OLE2: detected a '_5_summaryinformation' stream\n");
/* JSONOLE2 - what to do if something breaks? */
if (cli_ole2_summary_json(ctx, ofd, 0) == CL_ETIMEOUT) {
ret = CL_ETIMEOUT;
goto done;
}
}
if (!strncmp(name, "_5_documentsummaryinformation", 29)) {
cli_dbgmsg("OLE2: detected a '_5_documentsummaryinformation' stream\n");
/* JSONOLE2 - what to do if something breaks? */
if (cli_ole2_summary_json(ctx, ofd, 1) == CL_ETIMEOUT) {
ret = CL_ETIMEOUT;
goto done;
}
}
}
}
#endif
if (hdr->is_hwp) {
if (!name) {
name = cli_ole2_get_property_name2(prop->name, prop->name_size);
}
ret = cli_scanhwp5_stream(ctx, hdr->is_hwp, name, ofd, tempfile);
} else if (is_mso < 0) {
ret = CL_ESEEK;
} else if (is_mso) {
/* MSO Stream Scan */
ret = scan_mso_stream(ofd, ctx);
} else {
/* Normal File Scan */
ret = cli_magic_scan_desc(ofd, tempfile, ctx, NULL, LAYER_ATTRIBUTES_NONE);
}
ret = ret == CL_VIRUS ? CL_VIRUS : CL_SUCCESS;
done:
FREE(name);
if (-1 != ofd) {
close(ofd);
}
FREE(buff);
if (NULL != blk_bitset) {
cli_bitset_free(blk_bitset);
}
if (NULL != tempfile) {
if (!ctx->engine->keeptmp) {
if (cli_unlink(tempfile)) {
ret = CL_EUNLINK;
}
}
free(tempfile);
tempfile = NULL;
}
FREE(decryptDst);
FREE(rk);
return ret;
}
#if !defined(HAVE_ATTRIB_PACKED) && !defined(HAVE_PRAGMA_PACK) && !defined(HAVE_PRAGMA_PACK_HPPA)
static bool ole2_read_header(int fd, ole2_header_t *hdr)
{
int i;
if (cli_readn(fd, &hdr->magic, 8) != 8) {
return false;
}
if (cli_readn(fd, &hdr->clsid, 16) != 16) {
return false;
}
if (cli_readn(fd, &hdr->minor_version, 2) != 2) {
return false;
}
if (cli_readn(fd, &hdr->dll_version, 2) != 2) {
return false;
}
if (cli_readn(fd, &hdr->byte_order, 2) != 2) {
return false;
}
if (cli_readn(fd, &hdr->log2_big_block_size, 2) != 2) {
return false;
}
if (cli_readn(fd, &hdr->log2_small_block_size, 4) != 4) {
return false;
}
if (cli_readn(fd, &hdr->reserved, 8) != 8) {
return false;
}
if (cli_readn(fd, &hdr->bat_count, 4) != 4) {
return false;
}
if (cli_readn(fd, &hdr->prop_start, 4) != 4) {
return false;
}
if (cli_readn(fd, &hdr->signature, 4) != 4) {
return false;
}
if (cli_readn(fd, &hdr->sbat_cutoff, 4) != 4) {
return false;
}
if (cli_readn(fd, &hdr->sbat_start, 4) != 4) {
return false;
}
if (cli_readn(fd, &hdr->sbat_block_count, 4) != 4) {
return false;
}
if (cli_readn(fd, &hdr->xbat_start, 4) != 4) {
return false;
}
if (cli_readn(fd, &hdr->xbat_count, 4) != 4) {
return false;
}
for (i = 0; i < 109; i++) {
if (cli_readn(fd, &hdr->bat_array[i], 4) != 4) {
return false;
}
}
return true;
}
#endif
void copy_encryption_info_stream_standard(encryption_info_stream_standard_t *dst, const uint8_t *src)
{
memcpy(dst, src, sizeof(encryption_info_stream_standard_t));
dst->version_major = ole2_endian_convert_16(dst->version_major);
dst->version_minor = ole2_endian_convert_16(dst->version_minor);
dst->flags = ole2_endian_convert_32(dst->flags);
dst->size = ole2_endian_convert_32(dst->size);
dst->encryptionInfo.flags = ole2_endian_convert_32(dst->encryptionInfo.flags);
dst->encryptionInfo.sizeExtra = ole2_endian_convert_32(dst->encryptionInfo.sizeExtra);
dst->encryptionInfo.algorithmID = ole2_endian_convert_32(dst->encryptionInfo.algorithmID);
dst->encryptionInfo.algorithmIDHash = ole2_endian_convert_32(dst->encryptionInfo.algorithmIDHash);
dst->encryptionInfo.keySize = ole2_endian_convert_32(dst->encryptionInfo.keySize);
dst->encryptionInfo.providerType = ole2_endian_convert_32(dst->encryptionInfo.providerType);
dst->encryptionInfo.reserved1 = ole2_endian_convert_32(dst->encryptionInfo.reserved1);
dst->encryptionInfo.reserved2 = ole2_endian_convert_32(dst->encryptionInfo.reserved2);
}
void copy_encryption_verifier(encryption_verifier_t *dst, const uint8_t *src)
{
memcpy(dst, src, sizeof(encryption_verifier_t));
dst->salt_size = ole2_endian_convert_32(dst->salt_size);
dst->verifier_hash_size = ole2_endian_convert_32(dst->verifier_hash_size);
}
static inline bool key_length_valid_aes_bits(const uint32_t keyLength)
{
switch (keyLength) {
case SE_HEADER_EI_AES128_KEYSIZE:
/* fall-through */
case SE_HEADER_EI_AES192_KEYSIZE:
/* fall-through */
case SE_HEADER_EI_AES256_KEYSIZE:
return true;
}
return false;
}
/*Definitions for generate_key_aes*/
#define GENERATE_KEY_AES_ITERATIONS 50000
/*
* @brief Generate the key for aes encryption based on the password
* @param password Password to generate the key from
* @param key [out] location to store the key
* @param verifier encryption_verifier_t from the header. Contains information necessary to generate the key
*
* @return Error code based on whether or not the key was generated. This function
* does NOT validate the key, you must call 'verify_key' for that.
*/
static cl_error_t generate_key_aes(const char *const password, encryption_key_t *key,
encryption_verifier_t *verifier)
{
uint8_t *buffer = NULL;
size_t bufLen = 0;
cl_error_t ret = CL_ERROR;
uint32_t i = 0;
uint8_t sha1[sizeof(uint32_t) + SHA1_HASH_SIZE + sizeof(uint32_t)] = {0};
uint8_t *sha1Dst = &(sha1[sizeof(uint32_t)]);
uint8_t buf1[64];
uint8_t buf2[64];
uint8_t doubleSha[SHA1_HASH_SIZE * 2];
uint32_t tmp = 0;
if (!key_length_valid_aes_bits(key->key_length_bits)) {
cli_errmsg("ole2: Invalid key length '0x%x'\n", key->key_length_bits / 8);
goto done;
}
memset(key->key, 0, key->key_length_bits / 8);
bufLen = verifier->salt_size + (strlen(password) * 2);
buffer = calloc(bufLen, 1);
if (NULL == buffer) {
cli_errmsg("ole2: calloc failed\n");
ret = CL_EMEM;
goto done;
}
tmp = verifier->salt_size;
if (verifier->salt_size > sizeof(verifier->salt)) {
cli_warnmsg("ole2: Invalid salt length '0x%x'\n", verifier->salt_size);
tmp = sizeof(verifier->salt);
}
memcpy(buffer, verifier->salt, tmp);
/*Convert to UTF16-LE*/
for (i = 0; i < (uint32_t)strlen(password); i++) {
buffer[verifier->salt_size + (i * 2)] = password[i];
}
(void)cl_sha1(buffer, bufLen, sha1Dst, NULL);
for (i = 0; i < GENERATE_KEY_AES_ITERATIONS; i++) {
uint32_t eye = ole2_endian_convert_32(i);
memcpy(sha1, &eye, sizeof(eye));
(void)cl_sha1(sha1, SHA1_HASH_SIZE + sizeof(uint32_t), sha1Dst, NULL);
}
memset(&(sha1Dst[SHA1_HASH_SIZE]), 0, sizeof(uint32_t));
(void)cl_sha1(sha1Dst, SHA1_HASH_SIZE + sizeof(uint32_t), sha1Dst, NULL);
memset(buf1, 0x36, sizeof(buf1));
for (i = 0; i < SHA1_HASH_SIZE; i++) {
buf1[i] = buf1[i] ^ sha1Dst[i];
}
// now sha1 buf1
(void)cl_sha1(buf1, sizeof(buf1), doubleSha, NULL);
memset(buf2, 0x5c, sizeof(buf2));
for (i = 0; i < SHA1_HASH_SIZE; i++) {
buf2[i] = buf2[i] ^ sha1Dst[i];
}
(void)cl_sha1(buf2, sizeof(buf2), &(doubleSha[SHA1_HASH_SIZE]), NULL);
tmp = key->key_length_bits / 8;
if (tmp > sizeof(key->key)) {
cli_warnmsg("ole2: Invalid key length 0x%x\n", key->key_length_bits / 8);
tmp = sizeof(key->key);
}
memcpy(key->key, doubleSha, tmp);
ret = CL_SUCCESS;
done:
FREE(buffer);
return ret;
}
static bool aes_128ecb_decrypt(const unsigned char *in, size_t length, unsigned char *out, const encryption_key_t *const key)
{
uint64_t rk[RKLENGTH(128)];
int nrounds;
size_t i;
bool bRet = false;
if (SE_HEADER_EI_AES128_KEYSIZE != key->key_length_bits) {
cli_dbgmsg("ole2: Unsupported AES key length in aes_128ecb_decrypt\n");
goto done;
}
nrounds = rijndaelSetupDecrypt(rk, (const unsigned char *)key->key, key->key_length_bits);
if (!nrounds) {
cli_errmsg("ole2: Unable to initialize decryption.\n");
goto done;
} else {
for (i = 0; i < length; i += 16) {
rijndaelDecrypt(rk, nrounds, &(in[i]), &(out[i]));
}
}
bRet = true;
done:
return bRet;
}
/*Definitions for verify_key_aes*/
#define AES_VERIFIER_HASH_LEN 32
/*
* @brief Returns true if it is actually encrypted with the key.
* @param key encryption_key_t to attempt validation
* @param verifier encryption_verifier_t to attempt validation.
* @return Success or failure depending on whether validation was successful.
*
* For more information, see below
* https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/e5ad39b8-9bc1-4a19-bad3-44e6246d21e6
*/
static bool verify_key_aes(const encryption_key_t *const key, encryption_verifier_t *verifier)
{
bool bRet = false;
uint8_t sha[SHA1_HASH_SIZE];
uint8_t decrypted[AES_VERIFIER_HASH_LEN] = {0};
uint32_t actual_hash_size = 0;
// The hash size should be 20 bytes, even though the buffer is 32 bytes.
// If it claims to be LARGER than 32 bytes, we have a problem - because the buffer isn't that big.
actual_hash_size = verifier->verifier_hash_size;
if (actual_hash_size > sizeof(verifier->encrypted_verifier_hash)) {
cli_warnmsg("ole2: Invalid encrypted verifier hash length 0x%x\n", verifier->verifier_hash_size);
actual_hash_size = sizeof(verifier->encrypted_verifier_hash);
}
switch (key->key_length_bits) {
case SE_HEADER_EI_AES128_KEYSIZE:
// Decrypt the verifier, which is a randomly generated Verifier value encrypted using
// the algorithm chosen by the implementation.
if (!aes_128ecb_decrypt(verifier->encrypted_verifier, sizeof(verifier->encrypted_verifier), decrypted, key)) {
goto done;
}
// Get hash of decrypted verifier.
// The hash type is from the encryption header, but in this case should always be SHA1.
(void)cl_sha1(decrypted, sizeof(verifier->encrypted_verifier), sha, NULL);
// Decrypt the verifier hash, which, for contains the encrypted form of the hash of the randomly generated Verifier value
if (!aes_128ecb_decrypt(verifier->encrypted_verifier_hash, actual_hash_size, decrypted, key)) {
goto done;
}
break;
case SE_HEADER_EI_AES192_KEYSIZE:
// not implemented
goto done;
case SE_HEADER_EI_AES256_KEYSIZE:
// not implemented
goto done;
default:
// unsupported/invalid key size
goto done;
}
// Compare our (20-byte) SHA1 with the decrypted hash, which should be the same.
// Note: the hash size is different then ... what are we gonna do? We only support SHA1 hashes for this algorithm.
// So we'll just assume they're the same for this comparison.
bRet = (0 == memcmp(sha, decrypted, SHA1_HASH_SIZE));
done:
return bRet;
}
/*Definitions for initialize_encryption_key*/
#define SE_HEADER_FCRYPTOAPI (1 << 2)
#define SE_HEADER_FEXTERNAL (1 << 4)
#define SE_HEADER_FDOCPROPS (1 << 3)
#define SE_HEADER_FAES (1 << 5)
#define SE_HEADER_EI_AES128 0x0000660e
#define SE_HEADER_EI_AES192 0x0000660f
#define SE_HEADER_EI_AES256 0x00006610
#define SE_HEADER_EI_RC4 0x00006801
#define SE_HEADER_EI_SHA1 0x00008004
#define SE_HEADER_EI_AES_PROVIDERTYPE 0x00000018
/**
* @brief Initialize encryption key, if the encryption validation passes.
*
* @param headerPtr Pointer to the encryption header.
* @param encryptionKey [out] Pointer to encryption_key_t structure to be initialized by this function.
* @return Success or failure depending on whether or not the
* encryption verifier was successful with the
* standard password (VelvetSweatshop).
*
* Information about the encryption keys is here
* https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/dca653b5-b93b-48df-8e1e-0fb9e1c83b0f
* https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/2895eba1-acb1-4624-9bde-2cdad3fea015
*
*/
static bool initialize_encryption_key(const encryption_info_stream_standard_t *headerPtr,
encryption_key_t *encryptionKey)
{
bool bRet = false;
size_t idx = 0;
encryption_key_t key;
encryption_verifier_t ev;
bool bAES = false;
memset(encryptionKey, 0, sizeof(encryption_key_t));
memset(&key, 0, sizeof(encryption_key_t));
cli_dbgmsg("Major Version = 0x%x\n", headerPtr->version_major);
cli_dbgmsg("Minor Version = 0x%x\n", headerPtr->version_minor);
cli_dbgmsg("Flags = 0x%x\n", headerPtr->flags);
/*Bit 0 and 1 must be 0*/
if (1 & headerPtr->flags) {
cli_dbgmsg("ole2: Invalid first bit, must be 0\n");
goto done;
}
if ((1 << 1) & headerPtr->flags) {
cli_dbgmsg("ole2: Invalid second bit, must be 0\n");
goto done;
}
// https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/200a3d61-1ab4-4402-ae11-0290b28ab9cb
if ((SE_HEADER_FDOCPROPS & headerPtr->flags)) {
cli_dbgmsg("ole2: Unsupported document properties encrypted\n");
goto done;
}
if ((SE_HEADER_FEXTERNAL & headerPtr->flags) &&
(SE_HEADER_FEXTERNAL != headerPtr->flags)) {
cli_dbgmsg("ole2: Invalid fExternal flags. If fExternal bit is set, nothing else can be\n");
goto done;
}
if (SE_HEADER_FAES & headerPtr->flags) {
if (!(SE_HEADER_FCRYPTOAPI & headerPtr->flags)) {
cli_dbgmsg("ole2: Invalid combo of fAES and fCryptoApi flags\n");
goto done;
}
cli_dbgmsg("Flags: AES\n");
}
cli_dbgmsg("Size = 0x%x\n", headerPtr->size);
if (headerPtr->flags != headerPtr->encryptionInfo.flags) {
cli_dbgmsg("ole2: Flags must match\n");
goto done;
}
if (0 != headerPtr->encryptionInfo.sizeExtra) {
cli_dbgmsg("ole2: Size Extra must be 0\n");
goto done;
}
switch (headerPtr->encryptionInfo.algorithmID) {
case SE_HEADER_EI_AES128:
if (SE_HEADER_EI_AES128_KEYSIZE != headerPtr->encryptionInfo.keySize) {
cli_dbgmsg("ole2: Key length does not match algorithm id\n");
goto done;
}
bAES = true;
break;
case SE_HEADER_EI_AES192:
// not implemented
if (SE_HEADER_EI_AES192_KEYSIZE != headerPtr->encryptionInfo.keySize) {
cli_dbgmsg("ole2: Key length does not match algorithm id\n");
goto done;
}
bAES = true;
goto done;
case SE_HEADER_EI_AES256:
// not implemented
if (SE_HEADER_EI_AES256_KEYSIZE != headerPtr->encryptionInfo.keySize) {
cli_dbgmsg("ole2: Key length does not match algorithm id\n");
goto done;
}
bAES = true;
goto done;
case SE_HEADER_EI_RC4:
// not implemented
goto done;
default:
cli_dbgmsg("ole2: Invalid Algorithm ID: 0x%x\n", headerPtr->encryptionInfo.algorithmID);
goto done;
}
if (SE_HEADER_EI_SHA1 != headerPtr->encryptionInfo.algorithmIDHash) {
cli_dbgmsg("ole2: Invalid Algorithm ID Hash: 0x%x\n", headerPtr->encryptionInfo.algorithmIDHash);
goto done;
}
if (!key_length_valid_aes_bits(headerPtr->encryptionInfo.keySize)) {
cli_dbgmsg("ole2: Invalid key size: 0x%x\n", headerPtr->encryptionInfo.keySize);
goto done;
}
cli_dbgmsg("KeySize = 0x%x\n", headerPtr->encryptionInfo.keySize);
if (SE_HEADER_EI_AES_PROVIDERTYPE != headerPtr->encryptionInfo.providerType) {
cli_dbgmsg("ole2: WARNING: Provider Type should be '0x%x', is '0x%x'\n",
SE_HEADER_EI_AES_PROVIDERTYPE, headerPtr->encryptionInfo.providerType);
goto done;
}
cli_dbgmsg("Reserved1: 0x%x\n", headerPtr->encryptionInfo.reserved1);
if (0 != headerPtr->encryptionInfo.reserved2) {
cli_dbgmsg("ole2: Reserved 2 must be zero, is 0x%x\n", headerPtr->encryptionInfo.reserved2);
goto done;
}
/*The encryption info is at the end of the CPSName string.
* Find the end, and we'll have the index of the EncryptionVerifier.
* The CPSName string *should* always be either
* 'Microsoft Enhanced RSA and AES Cryptographic Provider'
* or
* 'Microsoft Enhanced RSA and AES Cryptographic Provider (Prototype)'
*
*/
for (idx = 0; idx < CSP_NAME_LENGTH(headerPtr) - 1; idx += 2) {
if (((uint16_t *)&(headerPtr->encryptionInfo.cspName[idx]))[0] == 0) {
break;
}
}
idx += 2;
if ((sizeof(headerPtr->encryptionInfo.cspName) - idx) <= sizeof(encryption_verifier_t)) {
cli_dbgmsg("ole2: No encryption_verifier_t\n");
goto done;
}
copy_encryption_verifier(&ev, &(headerPtr->encryptionInfo.cspName[idx]));
key.key_length_bits = headerPtr->encryptionInfo.keySize;
if (!bAES) {
cli_dbgmsg("ole2: Unsupported encryption algorithm\n");
goto done;
}
if (CL_SUCCESS != generate_key_aes("VelvetSweatshop", &key, &ev)) {
/*Error message printed by generate_key_aes*/
goto done;
}
if (!verify_key_aes(&key, &ev)) {
cli_dbgmsg("ole2: Key verification for '%s' failed, unable to decrypt.\n", "VelvetSweatshop");
goto done;
}
memcpy(encryptionKey, &key, sizeof(encryption_key_t));
bRet = true;
done:
return bRet;
}
/**
* @brief Extract macros and images from an ole2 file
*
* @param dirname A temp directory where we should store extracted content
* @param ctx The scan context
* @param files [out] A store of file names of extracted things to be processed later.
* @param has_vba [out] If the ole2 contained 1 or more VBA macros
* @param has_xlm [out] If the ole2 contained 1 or more XLM macros
* @param has_image [out] If the ole2 contained 1 or more images
* @return cl_error_t
*/
cl_error_t cli_ole2_extract(const char *dirname, cli_ctx *ctx, struct uniq **files, int *has_vba, int *has_xlm, int *has_image)
{
ole2_header_t hdr;
cl_error_t ret = CL_CLEAN;
size_t hdr_size;
unsigned int file_count = 0;
unsigned long scansize, scansize2;
const void *phdr;
encryption_key_t key;
bool bEncrypted = false;
size_t encryption_offset = 0;
cli_dbgmsg("in cli_ole2_extract()\n");
if (!ctx) {
return CL_ENULLARG;
}
hdr.is_hwp = NULL;
hdr.bitset = NULL;
if (ctx->engine->maxscansize) {
if (ctx->engine->maxscansize > ctx->scansize) {
scansize = ctx->engine->maxscansize - ctx->scansize;
} else {
return CL_EMAXSIZE;
}
} else {
scansize = -1;
}
scansize2 = scansize;
/* size of header - size of other values in struct */
hdr_size = sizeof(struct ole2_header_tag) -
sizeof(int32_t) - // sbat_root_start
sizeof(uint32_t) - // max_block_no
sizeof(off_t) - // m_length
sizeof(bitset_t *) - // bitset
sizeof(struct uniq *) - // U
sizeof(fmap_t *) - // map
sizeof(bool) - // has_vba
sizeof(bool) - // has_xlm
sizeof(bool) - // has_image
sizeof(hwp5_header_t *); // is_hwp
if ((size_t)(ctx->fmap->len) < (size_t)(hdr_size)) {
2022-10-22 18:41:00 +08:00
return CL_CLEAN;
}
hdr.map = ctx->fmap;
hdr.m_length = hdr.map->len;
phdr = fmap_need_off_once(hdr.map, 0, hdr_size);
if (phdr) {
memcpy(&hdr, phdr, hdr_size);
} else {
cli_dbgmsg("cli_ole2_extract: failed to read header\n");
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
hdr.minor_version = ole2_endian_convert_16(hdr.minor_version);
hdr.dll_version = ole2_endian_convert_16(hdr.dll_version);
hdr.byte_order = ole2_endian_convert_16(hdr.byte_order);
hdr.log2_big_block_size = ole2_endian_convert_16(hdr.log2_big_block_size);
hdr.log2_small_block_size = ole2_endian_convert_32(hdr.log2_small_block_size);
hdr.bat_count = ole2_endian_convert_32(hdr.bat_count);
hdr.prop_start = ole2_endian_convert_32(hdr.prop_start);
hdr.sbat_cutoff = ole2_endian_convert_32(hdr.sbat_cutoff);
hdr.sbat_start = ole2_endian_convert_32(hdr.sbat_start);
hdr.sbat_block_count = ole2_endian_convert_32(hdr.sbat_block_count);
hdr.xbat_start = ole2_endian_convert_32(hdr.xbat_start);
hdr.xbat_count = ole2_endian_convert_32(hdr.xbat_count);
hdr.sbat_root_start = -1;
hdr.bitset = cli_bitset_init();
if (!hdr.bitset) {
ret = CL_EMEM;
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
if (memcmp(hdr.magic, magic_id, 8) != 0) {
cli_dbgmsg("OLE2 magic failed!\n");
ret = CL_EFORMAT;
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
if (hdr.log2_big_block_size < 6 || hdr.log2_big_block_size > 28) {
// The big block size (aka Sector Shift) is expected to be:
// - 9 for Major Version 3
// - 12 for Major Version 4
// - TBD for Major Version 5?
// To allow for future changes, and prevent overflowing an int32_t, we're limiting to 28.
2022-10-22 18:41:00 +08:00
cli_dbgmsg("CAN'T PARSE: Invalid big block size (2^%u)\n", hdr.log2_big_block_size);
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
if (!hdr.log2_small_block_size || hdr.log2_small_block_size > hdr.log2_big_block_size) {
cli_dbgmsg("CAN'T PARSE: Invalid small block size (2^%u)\n", hdr.log2_small_block_size);
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
if (hdr.sbat_cutoff != 4096) {
cli_dbgmsg("WARNING: Untested sbat cutoff (%u); data may not extract correctly\n", hdr.sbat_cutoff);
}
if (hdr.map->len > INT32_MAX) {
cli_dbgmsg("OLE2 extract: Overflow detected\n");
ret = CL_EFORMAT;
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
/* determine if encrypted with VelvetSweatshop password */
encryption_offset = 4 * (1 << hdr.log2_big_block_size);
if ((encryption_offset + sizeof(encryption_info_stream_standard_t)) <= hdr.m_length) {
encryption_info_stream_standard_t encryption_info_stream_standard;
copy_encryption_info_stream_standard(&encryption_info_stream_standard, &(((const uint8_t *)phdr)[encryption_offset]));
bEncrypted = initialize_encryption_key(&encryption_info_stream_standard, &key);
cli_dbgmsg("Encrypted with VelvetSweatshop\n");
#if HAVE_JSON
if (ctx->wrkproperty == ctx->properties) {
cli_jsonint(ctx->wrkproperty, "EncryptedWithVelvetSweatshop", bEncrypted);
}
#endif /* HAVE_JSON */
}
2022-10-22 18:41:00 +08:00
/* 8 SBAT blocks per file block */
hdr.max_block_no = (hdr.map->len - MAX(512, 1 << hdr.log2_big_block_size)) / (1 << hdr.log2_small_block_size);
print_ole2_header(&hdr);
cli_dbgmsg("Max block number: %lu\n", (unsigned long int)hdr.max_block_no);
/* PASS 1 : Count files and check for VBA */
2023-01-14 18:28:39 +08:00
hdr.has_vba = false;
hdr.has_xlm = false;
hdr.has_image = false;
ret = ole2_walk_property_tree(&hdr, NULL, 0, handler_enum, 0, &file_count, ctx, &scansize, NULL);
2022-10-22 18:41:00 +08:00
cli_bitset_free(hdr.bitset);
hdr.bitset = NULL;
2023-01-14 18:28:39 +08:00
if (!file_count || !(hdr.bitset = cli_bitset_init())) {
goto done;
}
2022-10-22 18:41:00 +08:00
if (hdr.is_hwp) {
cli_dbgmsg("OLE2: identified HWP document\n");
cli_dbgmsg("OLE2: HWP version: 0x%08x\n", hdr.is_hwp->version);
cli_dbgmsg("OLE2: HWP flags: 0x%08x\n", hdr.is_hwp->flags);
ret = cli_hwp5header(ctx, hdr.is_hwp);
2023-01-14 18:28:39 +08:00
if (ret != CL_SUCCESS) {
goto done;
}
2022-10-22 18:41:00 +08:00
}
/* If there's no VBA we scan OTF */
2023-01-14 18:28:39 +08:00
if (hdr.has_vba || hdr.has_xlm || hdr.has_image) {
2022-10-22 18:41:00 +08:00
/* PASS 2/A : VBA scan */
cli_dbgmsg("OLE2: VBA project found\n");
if (!(hdr.U = uniq_init(file_count))) {
cli_dbgmsg("OLE2: uniq_init() failed\n");
ret = CL_EMEM;
2023-01-14 18:28:39 +08:00
goto done;
2022-10-22 18:41:00 +08:00
}
file_count = 0;
2023-01-14 18:28:39 +08:00
ole2_walk_property_tree(&hdr, dirname, 0, handler_writefile, 0, &file_count, ctx, &scansize2, NULL);
2022-10-22 18:41:00 +08:00
ret = CL_CLEAN;
*files = hdr.U;
if (has_vba) {
*has_vba = hdr.has_vba;
}
if (has_xlm) {
*has_xlm = hdr.has_xlm;
}
2023-01-14 18:28:39 +08:00
if (has_image) {
*has_image = hdr.has_image;
}
2022-10-22 18:41:00 +08:00
} else {
cli_dbgmsg("OLE2: no VBA projects found\n");
/* PASS 2/B : OTF scan */
file_count = 0;
2023-01-14 18:28:39 +08:00
if (bEncrypted) {
ret = ole2_walk_property_tree(&hdr, NULL, 0, handler_otf_encrypted, 0, &file_count, ctx, &scansize2, &key);
} else {
ret = ole2_walk_property_tree(&hdr, NULL, 0, handler_otf, 0, &file_count, ctx, &scansize2, NULL);
}
2022-10-22 18:41:00 +08:00
}
2023-01-14 18:28:39 +08:00
done:
if (hdr.bitset) {
2022-10-22 18:41:00 +08:00
cli_bitset_free(hdr.bitset);
2023-01-14 18:28:39 +08:00
}
if (hdr.is_hwp) {
2022-10-22 18:41:00 +08:00
free(hdr.is_hwp);
2023-01-14 18:28:39 +08:00
}
2022-10-22 18:41:00 +08:00
return ret == CL_BREAK ? CL_CLEAN : ret;
}