denyhosts/clamav/libclamav/pdfng.c

1206 lines
32 KiB
C

/*
* Copyright (C) 2014-2022 Cisco and/or its affiliates. All rights reserved.
*
* Author: Shawn Webb
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*
* In addition, as a special exception, the copyright holders give
* permission to link the code of portions of this program with the
* OpenSSL library under certain conditions as described in each
* individual source file, and distribute linked combinations
* including the two.
*
* You must obey the GNU General Public License in all respects
* for all of the code used other than OpenSSL. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you
* do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source
* files in the program, then also delete it here.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
#include <errno.h>
#ifdef HAVE_LIMITS_H
#include <limits.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <zlib.h>
#if HAVE_ICONV
#include <iconv.h>
#endif
#include "clamav.h"
#include "others.h"
#include "pdf.h"
#include "scanners.h"
#include "fmap.h"
#include "str.h"
#include "bytecode.h"
#include "bytecode_api.h"
#include "arc4.h"
#include "rijndael.h"
#include "textnorm.h"
#include "json_api.h"
#include "conv.h"
#include "entconv.h"
char *pdf_convert_utf(char *begin, size_t sz);
char *pdf_convert_utf(char *begin, size_t sz)
{
char *res = NULL;
char *buf, *outbuf;
#if HAVE_ICONV
char *p1, *p2;
size_t inlen, outlen, i;
char *encodings[] = {
"UTF-16",
NULL};
iconv_t cd;
#endif
buf = cli_calloc(1, sz + 1);
if (!(buf))
return NULL;
memcpy(buf, begin, sz);
#if HAVE_ICONV
p1 = buf;
p2 = outbuf = cli_calloc(1, sz + 1);
if (!(outbuf)) {
free(buf);
return NULL;
}
for (i = 0; encodings[i] != NULL; i++) {
p1 = buf;
p2 = outbuf;
inlen = outlen = sz;
cd = iconv_open("UTF-8", encodings[i]);
if (cd == (iconv_t)(-1)) {
char errbuf[128];
cli_strerror(errno, errbuf, sizeof(errbuf));
cli_errmsg("pdf_convert_utf: could not initialize iconv for encoding %s: %s\n", encodings[i], errbuf);
continue;
}
iconv(cd, (char **)(&p1), &inlen, &p2, &outlen);
if (outlen == sz) {
/* Decoding unsuccessful right from the start */
iconv_close(cd);
continue;
}
outbuf[sz - outlen] = '\0';
res = strdup(outbuf);
iconv_close(cd);
break;
}
#else
outbuf = cli_utf16_to_utf8(buf, sz, E_UTF16);
if (!outbuf) {
free(buf);
return NULL;
}
res = strdup(outbuf);
#endif
free(buf);
free(outbuf);
return res;
}
int is_object_reference(char *begin, char **endchar, uint32_t *id)
{
char *end = *endchar;
char *p1 = begin, *p2;
unsigned long n;
uint32_t t = 0;
/*
* Object references are always this format:
* XXXX YYYY R
* Where XXXX is the object ID and YYYY is the revision ID of the object.
* The letter R signifies that this is a reference.
*
* In between each item can be an arbitrary amount of whitespace.
*/
/* Skip whitespace */
while (p1 < end && isspace(p1[0]))
p1++;
if (p1 == end)
return 0;
if (!isdigit(p1[0]))
return 0;
/* Ensure strtoul() isn't going to go past our buffer */
p2 = p1 + 1;
while (p2 < end && !isspace(p2[0]))
p2++;
if (p2 == end)
return 0;
n = strtoul(p1, &p2, 10);
if (n == ULONG_MAX && errno)
return 0;
t = n << 8;
/* Skip more whitespace */
p1 = p2;
while (p1 < end && isspace(p1[0]))
p1++;
if (p1 == end)
return 0;
if (!isdigit(p1[0]))
return 0;
/* Ensure strtoul() is going to go past our buffer */
p2 = p1 + 1;
while (p2 < end && !isspace(p2[0]))
p2++;
if (p2 == end)
return 0;
n = strtoul(p1, &p2, 10);
if (n == ULONG_MAX && errno)
return 0;
t |= (n & 0xff);
/* Skip even more whitespace */
p1 = p2;
while (p1 < end && isspace(p1[0]))
p1++;
if (p1 == end)
return 0;
if (p1[0] == 'R') {
*endchar = p1 + 1;
if (id)
*id = t;
return 1;
}
return 0;
}
static char *pdf_decrypt_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t *length)
{
enum enc_method enc;
/* handled only once in cli_pdf() */
// pdf_handle_enc(pdf);
if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
enc = get_enc_method(pdf, obj);
return decrypt_any(pdf, obj->id, in, length, enc);
}
return NULL;
}
char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len)
{
char *wrkstr, *output = NULL;
size_t wrklen = len, outlen, i;
unsigned int likelyutf = 0;
if (!in)
return NULL;
/* get a working copy */
wrkstr = cli_calloc(len + 1, sizeof(char));
if (!wrkstr)
return NULL;
memcpy(wrkstr, in, len);
// cli_errmsg("pdf_final: start(%d): %s\n", wrklen, wrkstr);
/* convert PDF specific escape sequences, like octal sequences */
/* TODO: replace the escape sequences directly in the wrkstr */
if (strchr(wrkstr, '\\')) {
output = cli_calloc(wrklen + 1, sizeof(char));
if (!output) {
free(wrkstr);
return NULL;
}
outlen = 0;
for (i = 0; i < wrklen; ++i) {
if ((i + 1 < wrklen) && wrkstr[i] == '\\') {
if ((i + 3 < wrklen) &&
(isdigit(wrkstr[i + 1]) && isdigit(wrkstr[i + 2]) && isdigit(wrkstr[i + 3]))) {
/* octal sequence */
char octal[4], *check;
unsigned long value;
memcpy(octal, &wrkstr[i + 1], 3);
octal[3] = '\0';
value = (char)strtoul(octal, &check, 8);
/* check if all characters were converted */
if (check == &octal[3])
output[outlen++] = value;
i += 3; /* 4 with for loop [\ddd] */
} else {
/* other sequences */
switch (wrkstr[i + 1]) {
case 'n':
output[outlen++] = 0x0a;
break;
case 'r':
output[outlen++] = 0x0d;
break;
case 't':
output[outlen++] = 0x09;
break;
case 'b':
output[outlen++] = 0x08;
break;
case 'f':
output[outlen++] = 0x0c;
break;
case '(':
output[outlen++] = 0x28;
break;
case ')':
output[outlen++] = 0x29;
break;
case '\\':
output[outlen++] = 0x5c;
break;
default:
/* IGNORE THE REVERSE SOLIDUS - PDF3000-2008 */
break;
}
i += 1; /* 2 with for loop [\c] */
}
} else {
output[outlen++] = wrkstr[i];
}
}
free(wrkstr);
wrkstr = cli_calloc(outlen + 1, sizeof(char));
if (!wrkstr) {
free(output);
return NULL;
}
memcpy(wrkstr, output, outlen);
free(output);
wrklen = outlen;
}
// cli_errmsg("pdf_final: escaped(%d): %s\n", wrklen, wrkstr);
/* check for encryption and decrypt */
if (pdf->flags & (1 << ENCRYPTED_PDF)) {
size_t tmpsz = wrklen;
output = pdf_decrypt_string(pdf, obj, wrkstr, &tmpsz);
outlen = tmpsz;
free(wrkstr);
if (output) {
wrkstr = cli_calloc(outlen + 1, sizeof(char));
if (!wrkstr) {
free(output);
return NULL;
}
memcpy(wrkstr, output, outlen);
free(output);
wrklen = outlen;
} else {
return NULL;
}
}
// cli_errmsg("pdf_final: decrypt(%d): %s\n", wrklen, wrkstr);
/* check for UTF-* and convert to UTF-8 */
for (i = 0; i < wrklen; ++i) {
if (((unsigned char)wrkstr[i] > (unsigned char)0x7f) || (wrkstr[i] == '\0')) {
likelyutf = 1;
break;
}
}
if (likelyutf) {
output = pdf_convert_utf(wrkstr, wrklen);
free(wrkstr);
wrkstr = output;
}
// cli_errmsg("pdf_final: postutf(%d): %s\n", wrklen, wrkstr);
return wrkstr;
}
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta)
{
const char *q = objstart;
char *p1, *p2;
size_t len, checklen;
char *res = NULL;
uint32_t objid;
if (PDF_OBJECT_RECURSION_LIMIT < pdf->parse_recursion_depth) {
cli_dbgmsg("pdf_parse_string: Recursion limit reached.\n");
return NULL;
}
if (obj->objstm) {
if (objsize > (size_t)(obj->objstm->streambuf_len - (objstart - obj->objstm->streambuf))) {
/* Possible attempt to exploit bb11980 */
cli_dbgmsg("Malformed PDF: Alleged size of obj in object stream in PDF would extend further than the object stream data.\n");
return NULL;
}
} else {
if (objsize > (size_t)(pdf->size - (objstart - pdf->map))) {
/* Possible attempt to exploit bb11980 */
cli_dbgmsg("Malformed PDF: Alleged size of obj in PDF would extend further than the PDF data.\n");
return NULL;
}
}
/*
* Yes, all of this is required to find the start and end of a potentially UTF-* string
*
* First, find the key of the key/value pair we're looking for in this object.
* Second, determine whether the value points to another object (NOTE: this is sketchy behavior)
* Third, attempt to determine if we're ASCII or UTF-*
* If we're ASCII, just copy the ASCII string into a new heap-allocated string and return that
* Fourth, Attempt to decode from UTF-* to UTF-8
*/
if (str) {
checklen = strlen(str);
if (objsize < strlen(str) + 3)
return NULL;
for (p1 = (char *)q; (size_t)(p1 - q) < objsize - checklen; p1++)
if (!strncmp(p1, str, checklen))
break;
if ((size_t)(p1 - q) == objsize - checklen)
return NULL;
p1 += checklen;
} else {
p1 = (char *)q;
}
while ((size_t)(p1 - q) < objsize && isspace(p1[0]))
p1++;
if ((size_t)(p1 - q) == objsize)
return NULL;
/*
* If str is non-null:
* We should be at the start of the string, minus 1
* Else:
* We should be at the start of the string
*/
p2 = (char *)(q + objsize);
if (is_object_reference(p1, &p2, &objid)) {
cl_error_t ret;
struct pdf_obj *newobj;
char *begin, *p3;
STATBUF sb;
uint32_t objflags;
int fd;
size_t objsize2;
newobj = find_obj(pdf, obj, objid);
if (!(newobj))
return NULL;
if (newobj == obj)
return NULL;
/*
* If pdf_handlename hasn't been called for this object,
* then parse the object prior to extracting it
*/
if (!(newobj->statsflags & OBJ_FLAG_PDFNAME_DONE))
pdf_parseobj(pdf, newobj);
/* Extract the object. Force pdf_extract_obj() to dump this object. */
objflags = newobj->flags;
newobj->flags |= (1 << OBJ_FORCEDUMP);
pdf->parse_recursion_depth++;
ret = pdf_extract_obj(pdf, newobj, PDF_EXTRACT_OBJ_NONE);
pdf->parse_recursion_depth--;
if (ret != CL_SUCCESS) {
return NULL;
}
newobj->flags = objflags;
if (!(newobj->path))
return NULL;
fd = open(newobj->path, O_RDONLY | O_BINARY);
if (fd == -1) {
cli_unlink(newobj->path);
free(newobj->path);
newobj->path = NULL;
return NULL;
}
if (FSTAT(fd, &sb)) {
close(fd);
cli_unlink(newobj->path);
free(newobj->path);
newobj->path = NULL;
return NULL;
}
if (sb.st_size) {
begin = calloc(1, sb.st_size + 1);
if (!(begin)) {
close(fd);
cli_unlink(newobj->path);
free(newobj->path);
newobj->path = NULL;
return NULL;
}
if (read(fd, begin, sb.st_size) != sb.st_size) {
close(fd);
cli_unlink(newobj->path);
free(newobj->path);
newobj->path = NULL;
free(begin);
return NULL;
}
p3 = begin;
objsize2 = sb.st_size;
while ((size_t)(p3 - begin) < objsize2 && isspace(p3[0])) {
p3++;
objsize2--;
}
switch (*p3) {
case '(':
case '<':
pdf->parse_recursion_depth++;
res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, meta);
pdf->parse_recursion_depth--;
break;
default:
res = pdf_finalize_string(pdf, obj, begin, objsize2);
if (!res) {
res = cli_calloc(1, objsize2 + 1);
if (!(res)) {
close(fd);
cli_unlink(newobj->path);
free(newobj->path);
newobj->path = NULL;
free(begin);
return NULL;
}
memcpy(res, begin, objsize2);
res[objsize2] = '\0';
if (meta) {
meta->length = objsize2;
meta->obj = obj;
meta->success = 0;
}
} else if (meta) {
meta->length = strlen(res);
meta->obj = obj;
meta->success = 1;
}
}
free(begin);
}
close(fd);
cli_unlink(newobj->path);
free(newobj->path);
newobj->path = NULL;
if (endchar)
*endchar = p2;
return res;
}
if (*p1 == '<') {
/* Hex string */
p2 = p1 + 1;
while ((size_t)(p2 - objstart) < objsize && *p2 != '>')
p2++;
if ((size_t)(p2 - objstart) == objsize) {
return NULL;
}
res = pdf_finalize_string(pdf, obj, p1, (p2 - p1) + 1);
if (!res) {
res = cli_calloc(1, (p2 - p1) + 2);
if (!(res))
return NULL;
memcpy(res, p1, (p2 - p1) + 1);
res[(p2 - p1) + 1] = '\0';
if (meta) {
meta->length = (p2 - p1) + 1;
meta->obj = obj;
meta->success = 0;
}
} else if (meta) {
meta->length = strlen(res);
meta->obj = obj;
meta->success = 1;
}
if (res && endchar)
*endchar = p2;
return res;
}
/* We should be at the start of a string literal (...) here */
if (*p1 != '(')
return NULL;
/* Make a best effort to find the end of the string and determine if UTF-* */
p2 = ++p1;
while (p2 < objstart + objsize) {
int shouldbreak = 0;
switch (*p2) {
case '\\':
p2++;
break;
case ')':
shouldbreak = 1;
break;
}
if (shouldbreak) {
p2--;
break;
}
p2++;
}
if (p2 >= objstart + objsize)
return NULL;
len = (size_t)(p2 - p1) + 1;
res = pdf_finalize_string(pdf, obj, p1, len);
if (!res) {
res = cli_calloc(1, len + 1);
if (!(res))
return NULL;
memcpy(res, p1, len);
res[len] = '\0';
if (meta) {
meta->length = len;
meta->obj = obj;
meta->success = 0;
}
} else if (meta) {
meta->length = strlen(res);
meta->obj = obj;
meta->success = 1;
}
if (res && endchar)
*endchar = p2;
return res;
}
struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar)
{
struct pdf_dict *res = NULL;
struct pdf_dict_node *node = NULL;
const char *objstart;
char *end;
unsigned int in_string = 0, ninner = 0;
/* Sanity checking */
if (!(pdf) || !(obj) || !(begin))
return NULL;
if (PDF_OBJECT_RECURSION_LIMIT < pdf->parse_recursion_depth) {
cli_dbgmsg("pdf_parse_dict: Recursion limit reached\n");
return NULL;
}
objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
: (const char *)(obj->start + pdf->map);
if (begin < objstart || (size_t)(begin - objstart) >= objsize - 2)
return NULL;
if (begin[0] != '<' || begin[1] != '<')
return NULL;
/* Find the end of the dictionary */
end = begin;
while ((size_t)(end - objstart) < objsize) {
int increment = 1;
if (in_string) {
if (*end == '\\') {
end += 2;
continue;
}
if (*end == ')')
in_string = 0;
end++;
continue;
}
switch (*end) {
case '(':
in_string = 1;
break;
case '<':
if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '<')
ninner++;
increment = 2;
break;
case '>':
if ((size_t)(end - objstart) <= objsize - 2 && end[1] == '>')
ninner--;
increment = 2;
break;
}
if ((size_t)(end - objstart) <= objsize - 2)
if (end[0] == '>' && end[1] == '>' && ninner == 0)
break;
end += increment;
}
/* More sanity checking */
if ((size_t)(end - objstart) >= objsize - 2)
return NULL;
if (end[0] != '>' || end[1] != '>')
return NULL;
res = cli_calloc(1, sizeof(struct pdf_dict));
if (!(res))
return NULL;
/* Loop through each element of the dictionary */
begin += 2;
while (begin < end) {
char *val = NULL, *key = NULL, *p1, *p2;
struct pdf_dict *dict = NULL;
struct pdf_array *arr = NULL;
unsigned int nhex = 0, i;
/* Skip any whitespaces */
while (begin < end && isspace(begin[0]))
begin++;
if (begin == end)
break;
/* Get the key */
p1 = begin + 1;
while (p1 < end && !isspace(p1[0])) {
int breakout = 0;
switch (*p1) {
case '<':
case '[':
case '(':
case '/':
case '\r':
case '\n':
case ' ':
case '\t':
breakout = 1;
break;
case '#':
/* Key name obfuscated with hex characters */
nhex++;
if (p1 > end - 3) {
if (endchar) {
*endchar = end;
}
return res;
}
break;
}
if (breakout)
break;
p1++;
}
if (p1 == end)
break;
key = cli_calloc((p1 - begin) + 2, 1);
if (!(key))
break;
if (nhex == 0) {
/* Key isn't obfuscated with hex. Just copy the string */
strncpy(key, begin, p1 - begin);
key[p1 - begin] = '\0';
} else {
for (i = 0, p2 = begin; p2 < p1; p2++, i++) {
if (*p2 == '#') {
cli_hex2str_to(p2 + 1, key + i, 2);
p2 += 2;
} else {
key[i] = *p2;
}
}
}
/* Now for the value */
begin = p1;
/* Skip any whitespaces */
while (begin < end && isspace(begin[0]))
begin++;
if (begin == end) {
free(key);
break;
}
switch (begin[0]) {
case '(':
pdf->parse_recursion_depth++;
val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
pdf->parse_recursion_depth--;
begin = p1 + 2;
break;
case '[':
pdf->parse_recursion_depth++;
arr = pdf_parse_array(pdf, obj, end - objstart, begin, &p1);
pdf->parse_recursion_depth--;
begin = p1 + 1;
break;
case '<':
if ((size_t)(begin - objstart) < objsize - 2) {
if (begin[1] == '<') {
pdf->parse_recursion_depth++;
dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &p1);
pdf->parse_recursion_depth--;
begin = p1 + 2;
break;
}
}
pdf->parse_recursion_depth++;
val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &p1, NULL);
pdf->parse_recursion_depth--;
begin = p1 + 2;
break;
default:
p1 = (begin[0] == '/') ? begin + 1 : begin;
while (p1 < end) {
int shouldbreak = 0;
switch (p1[0]) {
case '>':
case '/':
shouldbreak = 1;
break;
}
if (shouldbreak)
break;
p1++;
}
is_object_reference(begin, &p1, NULL);
val = cli_calloc((p1 - begin) + 2, 1);
if (!(val))
break;
strncpy(val, begin, p1 - begin);
val[p1 - begin] = '\0';
if (p1[0] != '/')
begin = p1 + 1;
else
begin = p1;
break;
}
if (!(val) && !(dict) && !(arr)) {
free(key);
break;
}
if (!(res->nodes)) {
res->nodes = res->tail = node = cli_calloc(1, sizeof(struct pdf_dict_node));
if (!(node)) {
free(key);
if (dict)
pdf_free_dict(dict);
if (val)
free(val);
if (arr)
pdf_free_array(arr);
break;
}
} else {
node = calloc(1, sizeof(struct pdf_dict_node));
if (!(node)) {
free(key);
if (dict)
pdf_free_dict(dict);
if (val)
free(val);
if (arr)
pdf_free_array(arr);
break;
}
node->prev = res->tail;
if (res->tail)
res->tail->next = node;
res->tail = node;
}
node->key = key;
if ((val)) {
node->value = val;
node->valuesz = strlen(val);
node->type = PDF_DICT_STRING;
} else if ((arr)) {
node->value = arr;
node->valuesz = sizeof(struct pdf_array);
node->type = PDF_DICT_ARRAY;
} else if ((dict)) {
node->value = dict;
node->valuesz = sizeof(struct pdf_dict);
node->type = PDF_DICT_DICT;
}
}
if (endchar)
*endchar = end;
return res;
}
struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsize, char *begin, char **endchar)
{
struct pdf_array *res = NULL;
struct pdf_array_node *node = NULL;
const char *objstart;
char *end;
int in_string = 0, ninner = 0;
/* Sanity checking */
if (!(pdf) || !(obj) || !(begin))
return NULL;
if (PDF_OBJECT_RECURSION_LIMIT < pdf->parse_recursion_depth) {
cli_dbgmsg("pdf_parse_array: Recursion limit reached\n");
return NULL;
}
objstart = (obj->objstm) ? (const char *)(obj->start + obj->objstm->streambuf)
: (const char *)(obj->start + pdf->map);
if (begin < objstart || (size_t)(begin - objstart) >= objsize)
return NULL;
if (begin[0] != '[')
return NULL;
/* Find the end of the array */
end = begin;
while ((size_t)(end - objstart) < objsize) {
if (in_string) {
if (*end == '\\') {
end += 2;
continue;
}
if (*end == ')')
in_string = 0;
end++;
continue;
}
switch (*end) {
case '(':
in_string = 1;
break;
case '[':
ninner++;
break;
case ']':
ninner--;
break;
}
if (*end == ']' && ninner == 0)
break;
end++;
}
/* More sanity checking */
if ((size_t)(end - objstart) >= objsize)
return NULL;
if (*end != ']')
return NULL;
res = cli_calloc(1, sizeof(struct pdf_array));
if (!(res))
return NULL;
begin++;
while (begin < end) {
char *val = NULL, *p1;
struct pdf_array *arr = NULL;
struct pdf_dict *dict = NULL;
while (begin < end && isspace(begin[0]))
begin++;
if (begin == end)
break;
switch (begin[0]) {
case '<':
if ((size_t)(begin - objstart) < objsize - 2 && begin[1] == '<') {
pdf->parse_recursion_depth++;
dict = pdf_parse_dict(pdf, obj, end - objstart, begin, &begin);
pdf->parse_recursion_depth--;
begin += 2;
break;
}
/* Not a dictionary. Intentionally fall through. */
/* fall-through */
case '(':
pdf->parse_recursion_depth++;
val = pdf_parse_string(pdf, obj, begin, end - objstart, NULL, &begin, NULL);
pdf->parse_recursion_depth--;
begin += 2;
break;
case '[':
pdf->parse_recursion_depth++;
arr = pdf_parse_array(pdf, obj, end - objstart, begin, &begin);
pdf->parse_recursion_depth--;
begin += 1;
break;
default:
p1 = end;
if (!is_object_reference(begin, &p1, NULL)) {
p1 = begin + 1;
while (p1 < end && !isspace(p1[0]))
p1++;
}
val = cli_calloc((p1 - begin) + 2, 1);
if (!(val))
break;
strncpy(val, begin, p1 - begin);
val[p1 - begin] = '\0';
begin = p1;
break;
}
/* Parse error, just return what we could */
if (!(val) && !(arr) && !(dict))
break;
if (!(node)) {
res->nodes = res->tail = node = calloc(1, sizeof(struct pdf_array_node));
if (!(node)) {
if (dict)
pdf_free_dict(dict);
if (val)
free(val);
if (arr)
pdf_free_array(arr);
break;
}
} else {
node = calloc(1, sizeof(struct pdf_array_node));
if (!(node)) {
if (dict)
pdf_free_dict(dict);
if (val)
free(val);
if (arr)
pdf_free_array(arr);
break;
}
node->prev = res->tail;
if (res->tail)
res->tail->next = node;
res->tail = node;
}
if (val != NULL) {
node->type = PDF_ARR_STRING;
node->data = val;
node->datasz = strlen(val);
} else if (dict != NULL) {
node->type = PDF_ARR_DICT;
node->data = dict;
node->datasz = sizeof(struct pdf_dict);
} else {
node->type = PDF_ARR_ARRAY;
node->data = arr;
node->datasz = sizeof(struct pdf_array);
}
}
if (endchar)
*endchar = end;
return res;
}
void pdf_free_dict(struct pdf_dict *dict)
{
struct pdf_dict_node *node, *next;
node = dict->nodes;
while (node != NULL) {
free(node->key);
if (node->type == PDF_DICT_STRING)
free(node->value);
else if (node->type == PDF_DICT_ARRAY)
pdf_free_array((struct pdf_array *)(node->value));
else if (node->type == PDF_DICT_DICT)
pdf_free_dict((struct pdf_dict *)(node->value));
next = node->next;
free(node);
node = next;
}
free(dict);
}
void pdf_free_array(struct pdf_array *array)
{
struct pdf_array_node *node, *next;
if (!(array))
return;
node = array->nodes;
while (node != NULL) {
if (node->type == PDF_ARR_ARRAY)
pdf_free_array((struct pdf_array *)(node->data));
else if (node->type == PDF_ARR_DICT)
pdf_free_dict((struct pdf_dict *)(node->data));
else
free(node->data);
next = node->next;
free(node);
node = next;
}
free(array);
}
void pdf_print_array(struct pdf_array *array, unsigned long depth)
{
struct pdf_array_node *node;
unsigned long i;
for (i = 0, node = array->nodes; node != NULL; node = node->next, i++) {
if (node->type == PDF_ARR_STRING)
cli_errmsg("array[%lu][%lu]: %s\n", depth, i, (char *)(node->data));
else
pdf_print_array((struct pdf_array *)(node->data), depth + 1);
}
}
void pdf_print_dict(struct pdf_dict *dict, unsigned long depth)
{
struct pdf_dict_node *node;
for (node = dict->nodes; node != NULL; node = node->next) {
if (node->type == PDF_DICT_STRING) {
cli_errmsg("dict[%lu][%s]: %s\n", depth, node->key, (char *)(node->value));
} else if (node->type == PDF_DICT_ARRAY) {
cli_errmsg("dict[%lu][%s]: Array =>\n", depth, node->key);
pdf_print_array((struct pdf_array *)(node->value), depth);
} else if (node->type == PDF_DICT_DICT) {
pdf_print_dict((struct pdf_dict *)(node->value), depth + 1);
}
}
}