denyhosts/clamscan/libclamav/dlp.c

666 lines
22 KiB
C
Raw Normal View History

2022-10-22 18:41:00 +08:00
/*
* Simple library to detect and validate SSN and Credit Card numbers.
*
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Martin Roesch <roesch@sourcefire.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include "dlp.h"
#include "others.h"
#include "str.h"
/* detection mode macros for the contains_* functions */
#define DETECT_MODE_DETECT 0
#define DETECT_MODE_COUNT 1
#define IIN_SIZE 6
#define MAX_CC_BREAKS 8
/* group number mapping is here */
/* http://www.socialsecurity.gov/employer/highgroup.txt */
/* here's a perl script to convert the raw data from the highgroup.txt
* file to the data set in ssn_max_group[]:
--
local $/;
my $i = <>;
my $count = 0;
while ($i =~ s/(\d{3}) (\d{2})//) {
print int($2) .", ";
if ($count == 18)
{
print "\n";
$count = 0;
}
else
{
$count++;
}
}
--
*
* run 'perl convert.pl < highgroup.txt' to generate the data
*
*/
/* MAX_AREA is the maximum assigned area number. This can be derived from
* the data in the highgroup.txt file by looking at the last area->group
* mapping from that file.
*/
#define MAX_AREA 772
/* array of max group numbers for a given area number */
/*
static int ssn_max_group[MAX_AREA+1] = { 0,
6, 6, 4, 8, 8, 8, 6, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 88, 88, 88, 88, 72, 72, 72, 72,
70, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
94, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 82, 82, 82, 82, 82, 82, 82, 82,
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82,
82, 82, 79, 79, 79, 79, 79, 79, 79, 79, 77, 6, 4, 99, 99, 99, 99, 99, 99,
99, 99, 99, 53, 53, 53, 53, 53, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 33, 33,
31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4,
35, 35, 35, 35, 35, 35, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 33, 29, 29, 29, 29, 29, 29, 29, 29, 27, 27, 27, 27, 27,
67, 67, 67, 67, 67, 67, 67, 67, 99, 99, 99, 99, 99, 99, 99, 99, 63, 61, 61,
61, 61, 61, 61, 61, 61, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 23, 23, 23, 23, 23, 23, 23, 21, 21, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 51, 51, 51, 51, 49, 49, 49, 49,
49, 49, 37, 37, 37, 37, 37, 37, 37, 37, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 23, 23, 23, 33, 33, 41, 39, 53, 51, 51, 51, 27, 27, 27, 27, 27,
27, 27, 45, 43, 79, 77, 55, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 63, 63,
63, 61, 61, 61, 61, 61, 61, 75, 73, 73, 73, 73, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 51, 99, 99, 45, 45, 43, 37, 99, 99, 99, 99, 99, 61, 99, 3, 99,
99, 99, 99, 99, 99, 99, 84, 84, 84, 84, 99, 99, 67, 67, 65, 65, 65, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96,
96, 44, 44, 46, 46, 46, 44, 28, 26, 26, 26, 26, 16, 16, 16, 14, 14, 14, 14,
36, 34, 34, 34, 34, 34, 34, 34, 34, 14, 14, 12, 12, 90, 14, 14, 14, 14, 12,
12, 12, 12, 12, 12, 9, 9, 7, 7, 7, 7, 7, 7, 7, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
28, 18, 18, 10, 14, 10, 10, 10, 10, 10, 9, 9, 3, 1, 5, 5, 5, 5, 5,
5, 3, 3, 82, 82, 66, 66, 64, 64, 64, 64, 64
};
*/
/*
Following is a table of payment card "issuer identification number" ranges
and additional info such as card number length.
*/
struct iin_map_struct {
uint32_t iin_start;
uint32_t iin_end;
uint8_t card_min;
uint8_t card_max;
uint8_t is_cc;
uint8_t luhn;
const char *iin_name;
};
static struct iin_map_struct iin_map[] = {
{100000, 199999, 13, 15, 0, 1, "UATP"},
{222100, 272099, 16, 16, 1, 1, "Mastercard 2016"},
{300000, 305999, 14, 16, 1, 1, "Diner's Club - Carte Blanche"},
{309500, 309599, 14, 16, 1, 1, "Diner's Club International"},
{340000, 349999, 15, 15, 1, 1, "American Express"},
{352800, 358999, 16, 16, 1, 1, "JCB"},
{360000, 369999, 14, 16, 1, 1, "Diner's Club International"},
{370000, 379999, 15, 15, 1, 1, "American Express"},
{380000, 399999, 16, 16, 1, 1, "Diner's Club International"},
{400000, 499999, 16, 16, 1, 1, "Visa"},
{500000, 509999, 16, 16, 0, 1, "Maestro"},
{510000, 559999, 16, 16, 1, 1, "Master Card"},
{601100, 601199, 16, 16, 1, 1, "Discover"},
{622126, 622926, 16, 16, 1, 1, "China Union Pay"},
{624000, 626999, 16, 16, 1, 1, "China Union Pay"},
{628200, 628899, 16, 16, 1, 1, "China Union Pay"},
{644000, 659999, 16, 16, 1, 1, "Discover 2009"},
{0}};
static const struct iin_map_struct *get_iin(char *digits, int cc_only)
{
uint32_t iin = atoi(digits);
int i = 0;
while (iin_map[i].iin_start != 0) {
if (iin < iin_map[i].iin_start)
break;
if (iin <= iin_map[i].iin_end && (cc_only == 0 || iin_map[i].is_cc == 1)) {
cli_dbgmsg("Credit card IIN %s matched range for %s\n", digits, iin_map[i].iin_name);
return &iin_map[i];
}
i++;
}
cli_dbgmsg("Credit card %s did not match an IIN range\n", digits);
return NULL;
}
int dlp_is_valid_cc(const unsigned char *buffer, size_t length, int cc_only)
{
int mult = 0;
int sum = 0;
size_t i = 0;
ssize_t j = 0;
int val = 0;
size_t digits = 0;
char cc_digits[20];
size_t pad_allowance = MAX_CC_BREAKS;
const struct iin_map_struct *iin;
if (buffer == NULL || length < 13)
return 0;
/* if the first digit is greater than 6 it isn't one of the major
* credit cards
* reference => http://www.beachnet.com/~hstiles/cardtype.html
*/
if (!isdigit(buffer[0]) || buffer[0] > '6' || buffer[0] == 2)
return 0;
if (length > 19 + pad_allowance) /* max credit card length is 19, with allowance for punctuation */
length = 19 + pad_allowance;
/* Look for possible 6 digit IIN */
for (i = 0; i < length && digits < IIN_SIZE; i++) {
if (isdigit(buffer[i]) == 0) {
if (buffer[i] == ' ' || buffer[i] == '-')
if (pad_allowance-- > 0)
continue;
break;
}
cc_digits[digits] = buffer[i];
digits++;
}
if (digits == IIN_SIZE)
cc_digits[digits] = 0;
else
return 0;
/* See if it is a valid IIN. */
iin = get_iin(cc_digits, cc_only);
if (iin == NULL)
return 0;
/* Look for the remaining needed digits. */
for (/*same 'i' from previous for-loop*/; i < length && digits < iin->card_max; i++) {
if (isdigit(buffer[i]) == 0) {
if (buffer[i] == ' ' || buffer[i] == '-')
if (pad_allowance-- > 0)
continue;
break;
}
cc_digits[digits] = buffer[i];
digits++;
}
if (digits < iin->card_min || (i < length && isdigit(buffer[i])))
return 0;
j = (ssize_t)i;
//figure out luhn digits
for (j = digits - 1; j >= 0; j--) {
val = cc_digits[j] - '0';
if (mult) {
if ((val *= 2) > 9) val -= 9;
}
mult = !mult;
sum += val;
}
if (sum % 10)
return 0;
cli_dbgmsg("Luhn algorithm successful for %s\n", cc_digits);
return 1;
}
static int contains_cc(const unsigned char *buffer, size_t length, int detmode, int cc_only)
{
const unsigned char *idx;
const unsigned char *end;
int count = 0;
if (buffer == NULL || length < 13) {
return 0;
}
end = buffer + length;
idx = buffer;
while (idx < end) {
if (isdigit(*idx)) {
if ((idx == buffer || !isdigit(idx[-1])) && dlp_is_valid_cc(idx, length - (idx - buffer), cc_only) == 1) {
if (detmode == DETECT_MODE_DETECT)
return 1;
else {
count++;
/* if we got a valid match we should increment the idx ptr
* to gain a little performance
*/
idx += (length > 15 ? 15 : (length - 1));
}
}
}
idx++;
}
return count;
}
int dlp_get_cc_count(const unsigned char *buffer, size_t length, int cc_only)
{
return contains_cc(buffer, length, DETECT_MODE_COUNT, cc_only);
}
int dlp_has_cc(const unsigned char *buffer, size_t length, int cc_only)
{
return contains_cc(buffer, length, DETECT_MODE_DETECT, cc_only);
}
int dlp_is_valid_ssn(const unsigned char *buffer, size_t length, int format)
{
int area_number;
int group_number;
int serial_number;
size_t minlength;
int retval = 1;
char numbuf[12];
if (buffer == NULL)
return 0;
minlength = (format == SSN_FORMAT_HYPHENS ? 11 : 9);
if (length < minlength)
return 0;
if ((length > minlength) && isdigit(buffer[minlength]))
return 0;
strncpy(numbuf, (const char *)buffer, minlength);
numbuf[minlength] = 0;
/* sscanf parses and (basically) validates the string for us */
switch (format) {
case SSN_FORMAT_HYPHENS:
if (numbuf[3] != '-' || numbuf[6] != '-')
return 0;
if (sscanf((const char *)numbuf,
"%3d-%2d-%4d",
&area_number,
&group_number,
&serial_number) != 3) {
return 0;
}
break;
case SSN_FORMAT_STRIPPED:
if (!cli_isnumber(numbuf))
return 0;
if (sscanf((const char *)numbuf,
"%3d%2d%4d",
&area_number,
&group_number,
&serial_number) != 3) {
return 0;
}
break;
default:
cli_dbgmsg("dlp_is_valid_ssn: unknown format type %d \n", format);
return 0;
}
/* start validating */
/* validation data taken from
* http://en.wikipedia.org/wiki/Social_Security_number_%28United_States%29
*/
if (area_number > MAX_AREA ||
area_number == 666 ||
area_number <= 0 ||
group_number <= 0 ||
group_number > 99 ||
serial_number <= 0 ||
serial_number > 9999)
retval = 0;
if (area_number == 987 && group_number == 65) {
if (serial_number >= 4320 && serial_number <= 4329)
retval = 0;
}
/*
if(group_number > ssn_max_group[area_number])
retval = 0;
*/
if (retval)
cli_dbgmsg("dlp_is_valid_ssn: SSN_%s: %s\n", format == SSN_FORMAT_HYPHENS ? "HYPHENS" : "STRIPPED", numbuf);
return retval;
}
static int contains_ssn(const unsigned char *buffer, size_t length, int format, int detmode)
{
const unsigned char *idx;
const unsigned char *end;
int count = 0;
if (buffer == NULL || length < 9)
return 0;
end = buffer + length;
idx = buffer;
while (idx < end) {
if (isdigit(*idx)) {
/* check for area number and the first hyphen */
if ((idx == buffer || !isdigit(idx[-1])) && dlp_is_valid_ssn(idx, length - (idx - buffer), format) == 1) {
if (detmode == DETECT_MODE_COUNT) {
count++;
/* hop over the matched bytes if we found an SSN */
idx += ((format == SSN_FORMAT_HYPHENS) ? 11 : 9);
} else {
return 1;
}
}
}
idx++;
}
return count;
}
int dlp_get_stripped_ssn_count(const unsigned char *buffer, size_t length)
{
return contains_ssn(buffer,
length,
SSN_FORMAT_STRIPPED,
DETECT_MODE_COUNT);
}
int dlp_get_normal_ssn_count(const unsigned char *buffer, size_t length)
{
return contains_ssn(buffer,
length,
SSN_FORMAT_HYPHENS,
DETECT_MODE_COUNT);
}
int dlp_get_ssn_count(const unsigned char *buffer, size_t length)
{
/* this will suck for performance but will find SSNs in either
* format
*/
return (dlp_get_stripped_ssn_count(buffer, length) + dlp_get_normal_ssn_count(buffer, length));
}
int dlp_has_ssn(const unsigned char *buffer, size_t length)
{
return (contains_ssn(buffer,
length,
SSN_FORMAT_HYPHENS,
DETECT_MODE_DETECT) |
contains_ssn(buffer,
length,
SSN_FORMAT_STRIPPED,
DETECT_MODE_DETECT));
}
int dlp_has_stripped_ssn(const unsigned char *buffer, size_t length)
{
return contains_ssn(buffer,
length,
SSN_FORMAT_STRIPPED,
DETECT_MODE_DETECT);
}
int dlp_has_normal_ssn(const unsigned char *buffer, size_t length)
{
return contains_ssn(buffer,
length,
SSN_FORMAT_HYPHENS,
DETECT_MODE_DETECT);
}
/* The program below checks for the instances of where a */
/* Canadian Bank Routing Number or EFT is found, or if a */
/* U.S. MICR Bank Routing Number is encountered. */
/* Author: Bill Parker */
/* Date: February 17, 2013 */
/* Last Modified: February 25, 2013 */
/* Purpose: To provide Snort and ClamAV the ability to */
/* detect canadian and U.S. bank routing transaction */
/* numbers via the DLP module in ClamAV or the SDF pre- */
/* processor in the Snort IDS. */
/* Are first three or last three digits a valid bank code */
int is_bank_code_valid(int bank_code)
{
switch (bank_code) {
case 1:
return 1; /* Bank of Montreal */
case 2:
return 1; /* Bank of Nova Scotia */
case 3:
return 1; /* Royal Bank of Canada */
case 4:
return 1; /* Toronto-Dominion Bank */
case 6:
return 1; /* National Bank of Canada */
case 10:
return 1; /* Canadian Imperial Bank of Commerce */
case 16:
return 1; /* HSBC Canada */
case 30:
return 1; /* Canadian Western Bank */
case 39:
return 1; /* Laurentian Bank of Canada */
case 117:
return 1; /* Government of Canada */
case 127:
return 1; /* Canada Post (Money Orders) */
case 177:
return 1; /* Bank of Canada */
case 219:
return 1; /* ATB Financial */
case 260:
return 1; /* Citibank Canada */
case 290:
return 1; /* UBS Bank (Canada) */
case 308:
return 1; /* Bank of China (Canada) */
case 309:
return 1; /* Citizens Bank of Canada */
case 326:
return 1; /* Presidents Choice Financial */
case 338:
return 1; /* Canadian Tire Bank */
case 340:
return 1; /* ICICI Bank Canada */
case 509:
return 1; /* Canada Trust */
case 540:
return 1; /* Manulife Bank */
case 614:
return 1; /* ING Direct Canada */
case 809:
return 1; /* Central 1 [Credit Union] BC Region */
case 815:
return 1; /* Caisses Desjardins du Québec */
case 819:
return 1; /* Caisses populaires Desjardins du Manitoba */
case 828:
return 1; /* Central 1 [Credit Union] ON Region */
case 829:
return 1; /* Caisses populaires Desjardins de lOntario */
case 837:
return 1; /* Meridian Credit Union */
case 839:
return 1; /* Credit Union Heritage (Nova Scotia) */
case 865:
return 1; /* Caisses populaires Desjardins acadiennes */
case 879:
return 1; /* Credit Union Central of Manitoba */
case 889:
return 1; /* Credit Union Central of Saskatchewan */
case 899:
return 1; /* Credit Union Central Alberta */
case 900:
return 1; /* Unknown??? */
default:
return 0; /* NO MATCH...FAIL */
} /* end if switch(bank_code) */
return 0;
} /* end function is_bank_code_valid() */
/* This function checks if the supplied string is a valid */
/* canadian transit number, the format is as follows: */
/* XXXXX-YYY where XXXXX is a branch number, and YYY is */
/* the institutional number. */
/* note: it does NOT appear that the canadian RTN or EFT */
/* number formats contain any type of checksum algorithm */
/* or a check digit. */
int cdn_ctn_is_valid(const char *buffer, size_t length)
{
int i;
int bank_code = 0; /* last three digits of Canada RTN/MICR is Bank I.D. */
if (buffer == NULL || length < 9) /* if the buffer is empty or */
return 0; /* the length is less than 9, it's not valid */
if (buffer[5] != '-') return 0; /* if the 6th char isn't a '-', not a valid RTN */
for (i = 0; i < 5; i++)
if (isdigit(buffer[i]) == 0)
return 0;
/* Check the various branch codes which are listed, but there */
/* may be more valid codes which could be added as well... */
/* convert last three elements in buffer to a numeric value */
for (i = 6; i < 9; i++) {
if (isdigit(buffer[i]) == 0)
return 0;
bank_code = (bank_code * 10) + (buffer[i] - '0');
}
/* now have a switch sandwich for bank codes */
return (is_bank_code_valid(bank_code)); /* return 1 if valid, 0 if not */
}
/* If the string is a canadian EFT (Electronic Fund */
/* Transaction), the format is as follows: */
/* 0YYYXXXX, where a leading zero is required, XXXXX is a */
/* branch number, and YYY is the institution number. */
/* note: it does NOT appear that the canadian RTN or EFT */
/* number formats contain any type of checksum algorithm */
/* or a check digit. */
int cdn_eft_is_valid(const char *buffer, size_t length)
{
int bank_code = 0;
int i;
if (buffer == NULL || length < 9) /* if the buffer is empty or */
return 0; /* the length is less than 9, it's not valid */
if (buffer[0] != '0') return 0; /* if the 1st char isn't a '0', not a valid EFT */
for (i = 1; i < 4; i++) {
if (isdigit(buffer[i]) == 0)
return 0;
bank_code = (bank_code * 10) + (buffer[i] - '0');
}
/* Check the various branch codes which are listed, but there */
/* may be more valid codes which could be added as well... */
if (!is_bank_code_valid(bank_code))
return 0;
for (i = 4; i < 9; i++)
if (isdigit(buffer[i]) == 0)
return 0;
return 1;
}
int us_micr_is_valid(const char *buffer, size_t length)
{
int result, sum, sum1, sum2, sum3;
int i;
unsigned char micr_digits[9];
if (buffer == NULL || length < 9) /* if the buffer is empty or */
return 0; /* the length is < 9, it's not valid */
/* loop and make sure all the characters are actually digits */
for (i = 0; i < 9; i++) {
if (isdigit(buffer[i]) == 0)
return 0;
micr_digits[i] = buffer[i];
}
/* see if we have a valid MICR number via the following formula */
/* 7 * (micr_digits[0] + micr_digits[3] + micr_digits[6]) + */
/* 3 * (micr_digits[1] + micr_digits[4] + micr_digits[7]) + */
/* 9 * (micr_digits[2] + micr_digits[5]) (the check digit is */
/* computed by the sum above modulus 10 */
sum1 = 7 * ((micr_digits[0] - '0') + (micr_digits[3] - '0') + (micr_digits[6] - '0'));
sum2 = 3 * ((micr_digits[1] - '0') + (micr_digits[4] - '0') + (micr_digits[7] - '0'));
sum3 = 9 * ((micr_digits[2] - '0') + (micr_digits[5] - '0'));
sum = sum1 + sum2 + sum3;
result = sum % 10;
if (result == (micr_digits[8] - '0'))
return 1; /* last digit of MICR matches result */
return 0; /* MICR number isn't valid */
}