denyhosts/clamav/libclamav/pdf.h

207 lines
8.2 KiB
C

/*
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Nigel Horne
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef __PDF_H
#define __PDF_H
#include "others.h"
#define PDF_FILTERLIST_MAX 64
#define PDF_OBJECT_RECURSION_LIMIT 25
struct objstm_struct {
uint32_t first; // offset of first obj
uint32_t current; // offset of current obj
uint32_t current_pair; // offset of current pair describing id, location of object
uint32_t length; // total length of all objects (starting at first)
uint32_t n; // number of objects that should be found in the object stream
uint32_t nobjs_found; // number of objects actually found in the object stream
char *streambuf; // address of stream buffer, beginning with first obj pair
size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects
};
struct pdf_obj {
uint32_t start;
size_t size;
uint32_t id;
uint32_t flags;
uint32_t statsflags;
uint32_t numfilters;
uint32_t filterlist[PDF_FILTERLIST_MAX];
const char *stream; // pointer to stream contained in object.
size_t stream_size; // size of stream contained in object.
struct objstm_struct *objstm; // Should be NULL unless the obj exists in an object stream (separate buffer)
char *path;
};
enum pdf_array_type { PDF_ARR_UNKNOWN = 0,
PDF_ARR_STRING,
PDF_ARR_ARRAY,
PDF_ARR_DICT };
enum pdf_dict_type { PDF_DICT_UNKNOWN = 0,
PDF_DICT_STRING,
PDF_DICT_ARRAY,
PDF_DICT_DICT };
struct pdf_array_node {
void *data;
size_t datasz;
enum pdf_array_type type;
struct pdf_array_node *prev;
struct pdf_array_node *next;
};
struct pdf_array {
struct pdf_array_node *nodes;
struct pdf_array_node *tail;
};
struct pdf_dict_node {
char *key;
void *value;
size_t valuesz;
enum pdf_dict_type type;
struct pdf_dict_node *prev;
struct pdf_dict_node *next;
};
struct pdf_dict {
struct pdf_dict_node *nodes;
struct pdf_dict_node *tail;
};
struct pdf_stats_entry {
char *data;
/* populated by pdf_parse_string */
struct pdf_stats_metadata {
int length;
struct pdf_obj *obj;
int success; /* if finalize succeeds */
} meta;
};
struct pdf_stats {
int32_t ninvalidobjs; /* Number of invalid objects */
int32_t njs; /* Number of javascript objects */
int32_t nflate; /* Number of flate-encoded objects */
int32_t nactivex; /* Number of ActiveX objects */
int32_t nflash; /* Number of flash objects */
int32_t ncolors; /* Number of colors */
int32_t nasciihexdecode; /* Number of ASCIIHexDecode-filtered objects */
int32_t nascii85decode; /* Number of ASCII85Decode-filtered objects */
int32_t nembeddedfile; /* Number of embedded files */
int32_t nimage; /* Number of image objects */
int32_t nlzw; /* Number of LZW-filtered objects */
int32_t nrunlengthdecode; /* Number of RunLengthDecode-filtered objects */
int32_t nfaxdecode; /* Number of CCITT-filtered objects */
int32_t njbig2decode; /* Number of JBIG2Decode-filtered objects */
int32_t ndctdecode; /* Number of DCTDecode-filtered objects */
int32_t njpxdecode; /* Number of JPXDecode-filtered objects */
int32_t ncrypt; /* Number of Crypt-filtered objects */
int32_t nstandard; /* Number of Standard-filtered objects */
int32_t nsigned; /* Number of Signed objects */
int32_t nopenaction; /* Number of OpenAction objects */
int32_t nlaunch; /* Number of Launch objects */
int32_t npage; /* Number of Page objects */
int32_t nrichmedia; /* Number of RichMedia objects */
int32_t nacroform; /* Number of AcroForm objects */
int32_t nxfa; /* Number of XFA objects */
struct pdf_stats_entry *author; /* Author of the PDF */
struct pdf_stats_entry *creator; /* Application used to create the PDF */
struct pdf_stats_entry *producer; /* Application used to produce the PDF */
struct pdf_stats_entry *creationdate; /* Date the PDF was created */
struct pdf_stats_entry *modificationdate; /* Date the PDF was modified */
struct pdf_stats_entry *title; /* Title of the PDF */
struct pdf_stats_entry *subject; /* Subject of the PDF */
struct pdf_stats_entry *keywords; /* Keywords of the PDF */
};
enum enc_method {
ENC_UNKNOWN,
ENC_NONE,
ENC_IDENTITY,
ENC_V2,
ENC_AESV2,
ENC_AESV3
};
struct pdf_struct {
struct pdf_obj **objs;
unsigned nobjs;
unsigned flags;
unsigned enc_method_stream;
unsigned enc_method_string;
unsigned enc_method_embeddedfile;
const char *CF;
long CF_n;
const char *map;
size_t size;
off_t offset;
off_t startoff;
cli_ctx *ctx;
const char *dir;
unsigned files;
uint32_t enc_objid;
char *fileID;
unsigned fileIDlen;
char *key;
unsigned keylen;
struct pdf_stats stats;
struct objstm_struct **objstms;
uint32_t nobjstms;
uint32_t parse_recursion_depth;
};
#define OBJ_FLAG_PDFNAME_NONE 0x0
#define OBJ_FLAG_PDFNAME_DONE 0x1
#define PDF_EXTRACT_OBJ_NONE 0x0
#define PDF_EXTRACT_OBJ_SCAN 0x1
cl_error_t cli_pdf(const char *dir, cli_ctx *ctx, off_t offset);
void pdf_parseobj(struct pdf_struct *pdf, struct pdf_obj *obj);
cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags);
cl_error_t pdf_findobj(struct pdf_struct *pdf);
struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid);
void pdf_handle_enc(struct pdf_struct *pdf);
char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, size_t *length, enum enc_method enc_method);
enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj);
enum enc_method parse_enc_method(const char *dict, unsigned len, const char *key, enum enc_method def);
void pdfobj_flag(struct pdf_struct *pdf, struct pdf_obj *obj, enum pdf_flag flag);
char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len);
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta);
struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar);
struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar);
int is_object_reference(char *begin, char **endchar, uint32_t *id);
void pdf_free_dict(struct pdf_dict *dict);
void pdf_free_array(struct pdf_array *array);
void pdf_print_dict(struct pdf_dict *dict, unsigned long depth);
void pdf_print_array(struct pdf_array *array, unsigned long depth);
cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm);
#endif