denyhosts/clamscan/libclamav/macho.c
2022-10-22 18:41:00 +08:00

629 lines
21 KiB
C

/*
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2009-2013 Sourcefire, Inc.
*
* Authors: Tomasz Kojm <tkojm@clamav.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <stdio.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "clamav.h"
#include "others.h"
#include "macho.h"
#include "execs.h"
#include "scanners.h"
#define CLI_TMPUNLK() \
if (!ctx->engine->keeptmp) { \
if (cli_unlink(tempfile)) { \
free(tempfile); \
return CL_EUNLINK; \
} \
}
#define EC32(v, conv) (conv ? cbswap32(v) : v)
#define EC64(v, conv) (conv ? cbswap64(v) : v)
struct macho_hdr {
uint32_t magic;
uint32_t cpu_type;
uint32_t cpu_subtype;
uint32_t filetype;
uint32_t ncmds;
uint32_t sizeofcmds;
uint32_t flags;
};
struct macho_load_cmd {
uint32_t cmd;
uint32_t cmdsize;
};
struct macho_segment_cmd {
char segname[16];
uint32_t vmaddr;
uint32_t vmsize;
uint32_t fileoff;
uint32_t filesize;
uint32_t maxprot;
uint32_t initprot;
uint32_t nsects;
uint32_t flags;
};
struct macho_segment_cmd64 {
char segname[16];
uint64_t vmaddr;
uint64_t vmsize;
uint64_t fileoff;
uint64_t filesize;
uint32_t maxprot;
uint32_t initprot;
uint32_t nsects;
uint32_t flags;
};
struct macho_section {
char sectname[16];
char segname[16];
uint32_t addr;
uint32_t size;
uint32_t offset;
uint32_t align;
uint32_t reloff;
uint32_t nreloc;
uint32_t flags;
uint32_t res1;
uint32_t res2;
};
struct macho_section64 {
char sectname[16];
char segname[16];
uint64_t addr;
uint64_t size;
uint32_t offset;
uint32_t align;
uint32_t reloff;
uint32_t nreloc;
uint32_t flags;
uint32_t res1;
uint32_t res2;
};
struct macho_thread_state_ppc {
uint32_t srr0; /* PC */
uint32_t srr1;
uint32_t reg[32];
uint32_t cr;
uint32_t xer;
uint32_t lr;
uint32_t ctr;
uint32_t mq;
uint32_t vrsave;
};
struct macho_thread_state_ppc64 {
uint64_t srr0; /* PC */
uint64_t srr1;
uint64_t reg[32];
uint32_t cr;
uint64_t xer;
uint64_t lr;
uint64_t ctr;
uint32_t vrsave;
};
struct macho_thread_state_x86 {
uint32_t eax;
uint32_t ebx;
uint32_t ecx;
uint32_t edx;
uint32_t edi;
uint32_t esi;
uint32_t ebp;
uint32_t esp;
uint32_t ss;
uint32_t eflags;
uint32_t eip;
uint32_t cs;
uint32_t ds;
uint32_t es;
uint32_t fs;
uint32_t gs;
};
struct macho_fat_header {
uint32_t magic;
uint32_t nfats;
};
struct macho_fat_arch {
uint32_t cputype;
uint32_t cpusubtype;
uint32_t offset;
uint32_t size;
uint32_t align;
};
#define RETURN_BROKEN \
if (matcher) \
return -1; \
if (SCAN_HEURISTIC_BROKEN) { \
if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Broken.Executable")) \
return CL_VIRUS; \
} \
return CL_EFORMAT
static uint32_t cli_rawaddr(uint32_t vaddr, struct cli_exe_section *sects, uint16_t nsects, unsigned int *err)
{
unsigned int i, found = 0;
for (i = 0; i < nsects; i++) {
if (sects[i].rva <= vaddr && sects[i].rva + sects[i].vsz > vaddr) {
found = 1;
break;
}
}
if (!found) {
*err = 1;
return 0;
}
*err = 0;
return vaddr - sects[i].rva + sects[i].raw;
}
int cli_scanmacho(cli_ctx *ctx, struct cli_exe_info *fileinfo)
{
struct macho_hdr hdr;
struct macho_load_cmd load_cmd;
struct macho_segment_cmd segment_cmd;
struct macho_segment_cmd64 segment_cmd64;
struct macho_section section;
struct macho_section64 section64;
unsigned int i, j, sect = 0, conv, m64, nsects, matcher = 0;
unsigned int arch = 0, ep = 0, err;
struct cli_exe_section *sections = NULL;
char name[16];
fmap_t *map = ctx->fmap;
ssize_t at;
if (fileinfo) {
matcher = 1;
// TODO This code assumes fileinfo->offset == 0, which might not always
// be the case. For now just print this debug message and continue on
if (0 != fileinfo->offset) {
cli_dbgmsg("cli_scanmacho: Assumption Violated: fileinfo->offset != 0\n");
}
}
if (fmap_readn(map, &hdr, 0, sizeof(hdr)) != sizeof(hdr)) {
cli_dbgmsg("cli_scanmacho: Can't read header\n");
return matcher ? -1 : CL_EFORMAT;
}
at = sizeof(hdr);
if (hdr.magic == 0xfeedface) {
conv = 0;
m64 = 0;
} else if (hdr.magic == 0xcefaedfe) {
conv = 1;
m64 = 0;
} else if (hdr.magic == 0xfeedfacf) {
conv = 0;
m64 = 1;
} else if (hdr.magic == 0xcffaedfe) {
conv = 1;
m64 = 1;
} else {
cli_dbgmsg("cli_scanmacho: Incorrect magic\n");
return matcher ? -1 : CL_EFORMAT;
}
switch (EC32(hdr.cpu_type, conv)) {
case 7:
if (!matcher)
cli_dbgmsg("MACHO: CPU Type: Intel 32-bit\n");
arch = 1;
break;
case 7 | 0x1000000:
if (!matcher)
cli_dbgmsg("MACHO: CPU Type: Intel 64-bit\n");
break;
case 12:
if (!matcher)
cli_dbgmsg("MACHO: CPU Type: ARM\n");
break;
case 14:
if (!matcher)
cli_dbgmsg("MACHO: CPU Type: SPARC\n");
break;
case 18:
if (!matcher)
cli_dbgmsg("MACHO: CPU Type: POWERPC 32-bit\n");
arch = 2;
break;
case 18 | 0x1000000:
if (!matcher)
cli_dbgmsg("MACHO: CPU Type: POWERPC 64-bit\n");
arch = 3;
break;
default:
if (!matcher)
cli_dbgmsg("MACHO: CPU Type: ** UNKNOWN ** (%u)\n", EC32(hdr.cpu_type, conv));
break;
}
if (!matcher) switch (EC32(hdr.filetype, conv)) {
case 0x1: /* MH_OBJECT */
cli_dbgmsg("MACHO: Filetype: Relocatable object file\n");
break;
case 0x2: /* MH_EXECUTE */
cli_dbgmsg("MACHO: Filetype: Executable\n");
break;
case 0x3: /* MH_FVMLIB */
cli_dbgmsg("MACHO: Filetype: Fixed VM shared library file\n");
break;
case 0x4: /* MH_CORE */
cli_dbgmsg("MACHO: Filetype: Core file\n");
break;
case 0x5: /* MH_PRELOAD */
cli_dbgmsg("MACHO: Filetype: Preloaded executable file\n");
break;
case 0x6: /* MH_DYLIB */
cli_dbgmsg("MACHO: Filetype: Dynamically bound shared library\n");
break;
case 0x7: /* MH_DYLINKER */
cli_dbgmsg("MACHO: Filetype: Dynamic link editor\n");
break;
case 0x8: /* MH_BUNDLE */
cli_dbgmsg("MACHO: Filetype: Dynamically bound bundle file\n");
break;
case 0x9: /* MH_DYLIB_STUB */
cli_dbgmsg("MACHO: Filetype: Shared library stub for static\n");
break;
default:
cli_dbgmsg("MACHO: Filetype: ** UNKNOWN ** (0x%x)\n", EC32(hdr.filetype, conv));
}
if (!matcher) {
cli_dbgmsg("MACHO: Number of load commands: %u\n", EC32(hdr.ncmds, conv));
cli_dbgmsg("MACHO: Size of load commands: %u\n", EC32(hdr.sizeofcmds, conv));
}
if (m64)
at += 4;
hdr.ncmds = EC32(hdr.ncmds, conv);
if (!hdr.ncmds || hdr.ncmds > 1024) {
cli_dbgmsg("cli_scanmacho: Invalid number of load commands (%u)\n", hdr.ncmds);
RETURN_BROKEN;
}
for (i = 0; i < hdr.ncmds; i++) {
if (fmap_readn(map, &load_cmd, at, sizeof(load_cmd)) != sizeof(load_cmd)) {
cli_dbgmsg("cli_scanmacho: Can't read load command\n");
free(sections);
RETURN_BROKEN;
}
at += sizeof(load_cmd);
/*
if((m64 && EC32(load_cmd.cmdsize, conv) % 8) || (!m64 && EC32(load_cmd.cmdsize, conv) % 4)) {
cli_dbgmsg("cli_scanmacho: Invalid command size (%u)\n", EC32(load_cmd.cmdsize, conv));
free(sections);
RETURN_BROKEN;
}
*/
load_cmd.cmd = EC32(load_cmd.cmd, conv);
if ((m64 && load_cmd.cmd == 0x19) || (!m64 && load_cmd.cmd == 0x01)) { /* LC_SEGMENT */
if (m64) {
if (fmap_readn(map, &segment_cmd64, at, sizeof(segment_cmd64)) != sizeof(segment_cmd64)) {
cli_dbgmsg("cli_scanmacho: Can't read segment command\n");
free(sections);
RETURN_BROKEN;
}
at += sizeof(segment_cmd64);
nsects = EC32(segment_cmd64.nsects, conv);
strncpy(name, segment_cmd64.segname, sizeof(name));
name[sizeof(name) - 1] = '\0';
} else {
if (fmap_readn(map, &segment_cmd, at, sizeof(segment_cmd)) != sizeof(segment_cmd)) {
cli_dbgmsg("cli_scanmacho: Can't read segment command\n");
free(sections);
RETURN_BROKEN;
}
at += sizeof(segment_cmd);
nsects = EC32(segment_cmd.nsects, conv);
strncpy(name, segment_cmd.segname, sizeof(name));
name[sizeof(name) - 1] = '\0';
}
if (!matcher) {
cli_dbgmsg("MACHO: Segment name: %s\n", name);
cli_dbgmsg("MACHO: Number of sections: %u\n", nsects);
}
if (nsects > 255) {
cli_dbgmsg("cli_scanmacho: Invalid number of sections\n");
free(sections);
RETURN_BROKEN;
}
if (!nsects) {
if (!matcher)
cli_dbgmsg("MACHO: ------------------\n");
continue;
}
sections = (struct cli_exe_section *)cli_realloc2(sections, (sect + nsects) * sizeof(struct cli_exe_section));
if (!sections) {
cli_errmsg("cli_scanmacho: Can't allocate memory for 'sections'\n");
return matcher ? -1 : CL_EMEM;
}
for (j = 0; j < nsects; j++) {
if (m64) {
if (fmap_readn(map, &section64, at, sizeof(section64)) != sizeof(section64)) {
cli_dbgmsg("cli_scanmacho: Can't read section\n");
free(sections);
RETURN_BROKEN;
}
at += sizeof(section64);
sections[sect].rva = EC64(section64.addr, conv);
sections[sect].vsz = EC64(section64.size, conv);
sections[sect].raw = EC32(section64.offset, conv);
section64.align = 1 << EC32(section64.align, conv);
sections[sect].rsz = sections[sect].vsz + (section64.align - (sections[sect].vsz % section64.align)) % section64.align; /* most likely we can assume it's the same as .vsz */
strncpy(name, section64.sectname, sizeof(name));
name[sizeof(name) - 1] = '\0';
} else {
if (fmap_readn(map, &section, at, sizeof(section)) != sizeof(section)) {
cli_dbgmsg("cli_scanmacho: Can't read section\n");
free(sections);
RETURN_BROKEN;
}
at += sizeof(section);
sections[sect].rva = EC32(section.addr, conv);
sections[sect].vsz = EC32(section.size, conv);
sections[sect].raw = EC32(section.offset, conv);
if ((uint64_t)1 << EC32(section.align, conv) > INT32_MAX) {
cli_dbgmsg("cli_scanmacho: Section aligned is malformed\n");
free(sections);
RETURN_BROKEN;
}
section.align = 1 << EC32(section.align, conv);
sections[sect].rsz = sections[sect].vsz + (section.align - (sections[sect].vsz % section.align)) % section.align;
strncpy(name, section.sectname, sizeof(name));
name[sizeof(name) - 1] = '\0';
}
if (!matcher) {
cli_dbgmsg("MACHO: --- Section %u ---\n", sect);
cli_dbgmsg("MACHO: Name: %s\n", name);
cli_dbgmsg("MACHO: Virtual address: 0x%x\n", (unsigned int)sections[sect].rva);
cli_dbgmsg("MACHO: Virtual size: %u\n", (unsigned int)sections[sect].vsz);
cli_dbgmsg("MACHO: Raw size: %u\n", (unsigned int)sections[sect].rsz);
if (sections[sect].raw)
cli_dbgmsg("MACHO: File offset: %u\n", (unsigned int)sections[sect].raw);
}
sect++;
}
if (!matcher)
cli_dbgmsg("MACHO: ------------------\n");
} else if (arch && (load_cmd.cmd == 0x4 || load_cmd.cmd == 0x5)) { /* LC_(UNIX)THREAD */
at += 8;
switch (arch) {
case 1: /* x86 */
{
struct macho_thread_state_x86 thread_state_x86;
if (fmap_readn(map, &thread_state_x86, at, sizeof(thread_state_x86)) != sizeof(thread_state_x86)) {
cli_dbgmsg("cli_scanmacho: Can't read thread_state_x86\n");
free(sections);
RETURN_BROKEN;
}
at += sizeof(thread_state_x86);
break;
}
case 2: /* PPC */
{
struct macho_thread_state_ppc thread_state_ppc;
if (fmap_readn(map, &thread_state_ppc, at, sizeof(thread_state_ppc)) != sizeof(thread_state_ppc)) {
cli_dbgmsg("cli_scanmacho: Can't read thread_state_ppc\n");
free(sections);
RETURN_BROKEN;
}
at += sizeof(thread_state_ppc);
ep = EC32(thread_state_ppc.srr0, conv);
break;
}
case 3: /* PPC64 */
{
struct macho_thread_state_ppc64 thread_state_ppc64;
if (fmap_readn(map, &thread_state_ppc64, at, sizeof(thread_state_ppc64)) != sizeof(thread_state_ppc64)) {
cli_dbgmsg("cli_scanmacho: Can't read thread_state_ppc64\n");
free(sections);
RETURN_BROKEN;
}
at += sizeof(thread_state_ppc64);
ep = EC64(thread_state_ppc64.srr0, conv);
break;
}
default:
cli_errmsg("cli_scanmacho: Invalid arch setting!\n");
free(sections);
return matcher ? -1 : CL_EARG;
}
} else {
if (EC32(load_cmd.cmdsize, conv) > sizeof(load_cmd))
at += EC32(load_cmd.cmdsize, conv) - sizeof(load_cmd);
}
}
if (ep) {
if (!matcher)
cli_dbgmsg("Entry Point: 0x%x\n", ep);
if (sections) {
ep = cli_rawaddr(ep, sections, sect, &err);
if (err) {
cli_dbgmsg("cli_scanmacho: Can't calculate EP offset\n");
free(sections);
return matcher ? -1 : CL_EFORMAT;
}
if (!matcher)
cli_dbgmsg("Entry Point file offset: %u\n", ep);
}
}
if (matcher) {
fileinfo->ep = ep;
fileinfo->nsections = sect;
fileinfo->sections = sections;
return 0;
} else {
free(sections);
return CL_SUCCESS;
}
}
int cli_machoheader(cli_ctx *ctx, struct cli_exe_info *fileinfo)
{
return cli_scanmacho(ctx, fileinfo);
}
int cli_scanmacho_unibin(cli_ctx *ctx)
{
struct macho_fat_header fat_header;
struct macho_fat_arch fat_arch;
unsigned int conv, i, matcher = 0;
int ret = CL_CLEAN;
fmap_t *map = ctx->fmap;
ssize_t at;
if (fmap_readn(map, &fat_header, 0, sizeof(fat_header)) != sizeof(fat_header)) {
cli_dbgmsg("cli_scanmacho_unibin: Can't read fat_header\n");
return CL_EFORMAT;
}
at = sizeof(fat_header);
if (fat_header.magic == 0xcafebabe) {
conv = 0;
} else if (fat_header.magic == 0xbebafeca) {
conv = 1;
} else {
cli_dbgmsg("cli_scanmacho_unibin: Incorrect magic\n");
return CL_EFORMAT;
}
fat_header.nfats = EC32(fat_header.nfats, conv);
if ((fat_header.nfats & 0xffff) >= 39) /* Java Bytecode */
return CL_CLEAN;
if (fat_header.nfats > 32) {
cli_dbgmsg("cli_scanmacho_unibin: Invalid number of architectures\n");
return CL_EFORMAT;
}
cli_dbgmsg("UNIBIN: Number of architectures: %u\n", (unsigned int)fat_header.nfats);
for (i = 0; i < fat_header.nfats; i++) {
if (fmap_readn(map, &fat_arch, at, sizeof(fat_arch)) != sizeof(fat_arch)) {
cli_dbgmsg("cli_scanmacho_unibin: Can't read fat_arch\n");
RETURN_BROKEN;
}
at += sizeof(fat_arch);
fat_arch.offset = EC32(fat_arch.offset, conv);
fat_arch.size = EC32(fat_arch.size, conv);
cli_dbgmsg("UNIBIN: Binary %u of %u\n", i + 1, fat_header.nfats);
cli_dbgmsg("UNIBIN: File offset: %u\n", fat_arch.offset);
cli_dbgmsg("UNIBIN: File size: %u\n", fat_arch.size);
/* The offset must be greater than the location of the header or we risk
re-scanning the same data over and over again. The scan recursion max
will save us, but it will still cause other problems and waste CPU. */
if (fat_arch.offset < at) {
cli_dbgmsg("Invalid fat offset: %d\n", fat_arch.offset);
RETURN_BROKEN;
}
ret = cli_magic_scan_nested_fmap_type(map, fat_arch.offset, fat_arch.size, ctx, CL_TYPE_ANY, NULL);
if (ret == CL_VIRUS)
break;
}
return ret; /* result from the last binary */
}
int cli_unpackmacho(cli_ctx *ctx)
{
char *tempfile;
int ndesc;
struct cli_bc_ctx *bc_ctx;
int ret;
/* Bytecode BC_MACHO_UNPACKER hook */
bc_ctx = cli_bytecode_context_alloc();
if (!bc_ctx) {
cli_errmsg("cli_scanelf: can't allocate memory for bc_ctx\n");
return CL_EMEM;
}
cli_bytecode_context_setctx(bc_ctx, ctx);
ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_MACHO_UNPACKER, ctx->fmap);
switch (ret) {
case CL_VIRUS:
cli_bytecode_context_destroy(bc_ctx);
return CL_VIRUS;
case CL_SUCCESS:
ndesc = cli_bytecode_context_getresult_file(bc_ctx, &tempfile);
cli_bytecode_context_destroy(bc_ctx);
if (ndesc != -1 && tempfile) {
if (ctx->engine->keeptmp)
cli_dbgmsg("cli_scanmacho: Unpacked and rebuilt executable saved in %s\n", tempfile);
else
cli_dbgmsg("cli_scanmacho: Unpacked and rebuilt executable\n");
lseek(ndesc, 0, SEEK_SET);
cli_dbgmsg("***** Scanning rebuilt Mach-O file *****\n");
if (cli_magic_scan_desc(ndesc, tempfile, ctx, NULL) == CL_VIRUS) {
close(ndesc);
CLI_TMPUNLK();
free(tempfile);
return CL_VIRUS;
}
close(ndesc);
CLI_TMPUNLK();
free(tempfile);
return CL_SUCCESS;
}
break;
default:
cli_bytecode_context_destroy(bc_ctx);
}
return CL_CLEAN;
}