denyhosts/clamav/libclamav/dlp.c

666 lines
22 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Simple library to detect and validate SSN and Credit Card numbers.
*
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Martin Roesch <roesch@sourcefire.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include "dlp.h"
#include "others.h"
#include "str.h"
/* detection mode macros for the contains_* functions */
#define DETECT_MODE_DETECT 0
#define DETECT_MODE_COUNT 1
#define IIN_SIZE 6
#define MAX_CC_BREAKS 8
/* group number mapping is here */
/* http://www.socialsecurity.gov/employer/highgroup.txt */
/* here's a perl script to convert the raw data from the highgroup.txt
* file to the data set in ssn_max_group[]:
--
local $/;
my $i = <>;
my $count = 0;
while ($i =~ s/(\d{3}) (\d{2})//) {
print int($2) .", ";
if ($count == 18)
{
print "\n";
$count = 0;
}
else
{
$count++;
}
}
--
*
* run 'perl convert.pl < highgroup.txt' to generate the data
*
*/
/* MAX_AREA is the maximum assigned area number. This can be derived from
* the data in the highgroup.txt file by looking at the last area->group
* mapping from that file.
*/
#define MAX_AREA 772
/* array of max group numbers for a given area number */
/*
static int ssn_max_group[MAX_AREA+1] = { 0,
6, 6, 4, 8, 8, 8, 6, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 88, 88, 88, 88, 72, 72, 72, 72,
70, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
94, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 82, 82, 82, 82, 82, 82, 82, 82,
82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82,
82, 82, 79, 79, 79, 79, 79, 79, 79, 79, 77, 6, 4, 99, 99, 99, 99, 99, 99,
99, 99, 99, 53, 53, 53, 53, 53, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 33, 33,
31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4,
35, 35, 35, 35, 35, 35, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 33, 29, 29, 29, 29, 29, 29, 29, 29, 27, 27, 27, 27, 27,
67, 67, 67, 67, 67, 67, 67, 67, 99, 99, 99, 99, 99, 99, 99, 99, 63, 61, 61,
61, 61, 61, 61, 61, 61, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 23, 23, 23, 23, 23, 23, 23, 21, 21, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 51, 51, 51, 51, 49, 49, 49, 49,
49, 49, 37, 37, 37, 37, 37, 37, 37, 37, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 23, 23, 23, 33, 33, 41, 39, 53, 51, 51, 51, 27, 27, 27, 27, 27,
27, 27, 45, 43, 79, 77, 55, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 63, 63,
63, 61, 61, 61, 61, 61, 61, 75, 73, 73, 73, 73, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 51, 99, 99, 45, 45, 43, 37, 99, 99, 99, 99, 99, 61, 99, 3, 99,
99, 99, 99, 99, 99, 99, 84, 84, 84, 84, 99, 99, 67, 67, 65, 65, 65, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96,
96, 44, 44, 46, 46, 46, 44, 28, 26, 26, 26, 26, 16, 16, 16, 14, 14, 14, 14,
36, 34, 34, 34, 34, 34, 34, 34, 34, 14, 14, 12, 12, 90, 14, 14, 14, 14, 12,
12, 12, 12, 12, 12, 9, 9, 7, 7, 7, 7, 7, 7, 7, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
28, 18, 18, 10, 14, 10, 10, 10, 10, 10, 9, 9, 3, 1, 5, 5, 5, 5, 5,
5, 3, 3, 82, 82, 66, 66, 64, 64, 64, 64, 64
};
*/
/*
Following is a table of payment card "issuer identification number" ranges
and additional info such as card number length.
*/
struct iin_map_struct {
uint32_t iin_start;
uint32_t iin_end;
uint8_t card_min;
uint8_t card_max;
uint8_t is_cc;
uint8_t luhn;
const char *iin_name;
};
static struct iin_map_struct iin_map[] = {
{100000, 199999, 13, 15, 0, 1, "UATP"},
{222100, 272099, 16, 16, 1, 1, "Mastercard 2016"},
{300000, 305999, 14, 16, 1, 1, "Diner's Club - Carte Blanche"},
{309500, 309599, 14, 16, 1, 1, "Diner's Club International"},
{340000, 349999, 15, 15, 1, 1, "American Express"},
{352800, 358999, 16, 16, 1, 1, "JCB"},
{360000, 369999, 14, 16, 1, 1, "Diner's Club International"},
{370000, 379999, 15, 15, 1, 1, "American Express"},
{380000, 399999, 16, 16, 1, 1, "Diner's Club International"},
{400000, 499999, 16, 16, 1, 1, "Visa"},
{500000, 509999, 16, 16, 0, 1, "Maestro"},
{510000, 559999, 16, 16, 1, 1, "Master Card"},
{601100, 601199, 16, 16, 1, 1, "Discover"},
{622126, 622926, 16, 16, 1, 1, "China Union Pay"},
{624000, 626999, 16, 16, 1, 1, "China Union Pay"},
{628200, 628899, 16, 16, 1, 1, "China Union Pay"},
{644000, 659999, 16, 16, 1, 1, "Discover 2009"},
{0}};
static const struct iin_map_struct *get_iin(char *digits, int cc_only)
{
uint32_t iin = atoi(digits);
int i = 0;
while (iin_map[i].iin_start != 0) {
if (iin < iin_map[i].iin_start)
break;
if (iin <= iin_map[i].iin_end && (cc_only == 0 || iin_map[i].is_cc == 1)) {
cli_dbgmsg("Credit card IIN %s matched range for %s\n", digits, iin_map[i].iin_name);
return &iin_map[i];
}
i++;
}
cli_dbgmsg("Credit card %s did not match an IIN range\n", digits);
return NULL;
}
int dlp_is_valid_cc(const unsigned char *buffer, size_t length, int cc_only)
{
int mult = 0;
int sum = 0;
size_t i = 0;
ssize_t j = 0;
int val = 0;
size_t digits = 0;
char cc_digits[20];
size_t pad_allowance = MAX_CC_BREAKS;
const struct iin_map_struct *iin;
if (buffer == NULL || length < 13)
return 0;
/* if the first digit is greater than 6 it isn't one of the major
* credit cards
* reference => http://www.beachnet.com/~hstiles/cardtype.html
*/
if (!isdigit(buffer[0]) || buffer[0] > '6' || buffer[0] == 2)
return 0;
if (length > 19 + pad_allowance) /* max credit card length is 19, with allowance for punctuation */
length = 19 + pad_allowance;
/* Look for possible 6 digit IIN */
for (i = 0; i < length && digits < IIN_SIZE; i++) {
if (isdigit(buffer[i]) == 0) {
if (buffer[i] == ' ' || buffer[i] == '-')
if (pad_allowance-- > 0)
continue;
break;
}
cc_digits[digits] = buffer[i];
digits++;
}
if (digits == IIN_SIZE)
cc_digits[digits] = 0;
else
return 0;
/* See if it is a valid IIN. */
iin = get_iin(cc_digits, cc_only);
if (iin == NULL)
return 0;
/* Look for the remaining needed digits. */
for (/*same 'i' from previous for-loop*/; i < length && digits < iin->card_max; i++) {
if (isdigit(buffer[i]) == 0) {
if (buffer[i] == ' ' || buffer[i] == '-')
if (pad_allowance-- > 0)
continue;
break;
}
cc_digits[digits] = buffer[i];
digits++;
}
if (digits < iin->card_min || (i < length && isdigit(buffer[i])))
return 0;
j = (ssize_t)i;
// figure out luhn digits
for (j = digits - 1; j >= 0; j--) {
val = cc_digits[j] - '0';
if (mult) {
if ((val *= 2) > 9) val -= 9;
}
mult = !mult;
sum += val;
}
if (sum % 10)
return 0;
cli_dbgmsg("Luhn algorithm successful for %s\n", cc_digits);
return 1;
}
static int contains_cc(const unsigned char *buffer, size_t length, int detmode, int cc_only)
{
const unsigned char *idx;
const unsigned char *end;
int count = 0;
if (buffer == NULL || length < 13) {
return 0;
}
end = buffer + length;
idx = buffer;
while (idx < end) {
if (isdigit(*idx)) {
if ((idx == buffer || !isdigit(idx[-1])) && dlp_is_valid_cc(idx, length - (idx - buffer), cc_only) == 1) {
if (detmode == DETECT_MODE_DETECT)
return 1;
else {
count++;
/* if we got a valid match we should increment the idx ptr
* to gain a little performance
*/
idx += (length > 15 ? 15 : (length - 1));
}
}
}
idx++;
}
return count;
}
int dlp_get_cc_count(const unsigned char *buffer, size_t length, int cc_only)
{
return contains_cc(buffer, length, DETECT_MODE_COUNT, cc_only);
}
int dlp_has_cc(const unsigned char *buffer, size_t length, int cc_only)
{
return contains_cc(buffer, length, DETECT_MODE_DETECT, cc_only);
}
int dlp_is_valid_ssn(const unsigned char *buffer, size_t length, int format)
{
int area_number;
int group_number;
int serial_number;
size_t minlength;
int retval = 1;
char numbuf[12];
if (buffer == NULL)
return 0;
minlength = (format == SSN_FORMAT_HYPHENS ? 11 : 9);
if (length < minlength)
return 0;
if ((length > minlength) && isdigit(buffer[minlength]))
return 0;
strncpy(numbuf, (const char *)buffer, minlength);
numbuf[minlength] = 0;
/* sscanf parses and (basically) validates the string for us */
switch (format) {
case SSN_FORMAT_HYPHENS:
if (numbuf[3] != '-' || numbuf[6] != '-')
return 0;
if (sscanf((const char *)numbuf,
"%3d-%2d-%4d",
&area_number,
&group_number,
&serial_number) != 3) {
return 0;
}
break;
case SSN_FORMAT_STRIPPED:
if (!cli_isnumber(numbuf))
return 0;
if (sscanf((const char *)numbuf,
"%3d%2d%4d",
&area_number,
&group_number,
&serial_number) != 3) {
return 0;
}
break;
default:
cli_dbgmsg("dlp_is_valid_ssn: unknown format type %d \n", format);
return 0;
}
/* start validating */
/* validation data taken from
* http://en.wikipedia.org/wiki/Social_Security_number_%28United_States%29
*/
if (area_number > MAX_AREA ||
area_number == 666 ||
area_number <= 0 ||
group_number <= 0 ||
group_number > 99 ||
serial_number <= 0 ||
serial_number > 9999)
retval = 0;
if (area_number == 987 && group_number == 65) {
if (serial_number >= 4320 && serial_number <= 4329)
retval = 0;
}
/*
if(group_number > ssn_max_group[area_number])
retval = 0;
*/
if (retval)
cli_dbgmsg("dlp_is_valid_ssn: SSN_%s: %s\n", format == SSN_FORMAT_HYPHENS ? "HYPHENS" : "STRIPPED", numbuf);
return retval;
}
static int contains_ssn(const unsigned char *buffer, size_t length, int format, int detmode)
{
const unsigned char *idx;
const unsigned char *end;
int count = 0;
if (buffer == NULL || length < 9)
return 0;
end = buffer + length;
idx = buffer;
while (idx < end) {
if (isdigit(*idx)) {
/* check for area number and the first hyphen */
if ((idx == buffer || !isdigit(idx[-1])) && dlp_is_valid_ssn(idx, length - (idx - buffer), format) == 1) {
if (detmode == DETECT_MODE_COUNT) {
count++;
/* hop over the matched bytes if we found an SSN */
idx += ((format == SSN_FORMAT_HYPHENS) ? 11 : 9);
} else {
return 1;
}
}
}
idx++;
}
return count;
}
int dlp_get_stripped_ssn_count(const unsigned char *buffer, size_t length)
{
return contains_ssn(buffer,
length,
SSN_FORMAT_STRIPPED,
DETECT_MODE_COUNT);
}
int dlp_get_normal_ssn_count(const unsigned char *buffer, size_t length)
{
return contains_ssn(buffer,
length,
SSN_FORMAT_HYPHENS,
DETECT_MODE_COUNT);
}
int dlp_get_ssn_count(const unsigned char *buffer, size_t length)
{
/* this will suck for performance but will find SSNs in either
* format
*/
return (dlp_get_stripped_ssn_count(buffer, length) + dlp_get_normal_ssn_count(buffer, length));
}
int dlp_has_ssn(const unsigned char *buffer, size_t length)
{
return (contains_ssn(buffer,
length,
SSN_FORMAT_HYPHENS,
DETECT_MODE_DETECT) |
contains_ssn(buffer,
length,
SSN_FORMAT_STRIPPED,
DETECT_MODE_DETECT));
}
int dlp_has_stripped_ssn(const unsigned char *buffer, size_t length)
{
return contains_ssn(buffer,
length,
SSN_FORMAT_STRIPPED,
DETECT_MODE_DETECT);
}
int dlp_has_normal_ssn(const unsigned char *buffer, size_t length)
{
return contains_ssn(buffer,
length,
SSN_FORMAT_HYPHENS,
DETECT_MODE_DETECT);
}
/* The program below checks for the instances of where a */
/* Canadian Bank Routing Number or EFT is found, or if a */
/* U.S. MICR Bank Routing Number is encountered. */
/* Author: Bill Parker */
/* Date: February 17, 2013 */
/* Last Modified: February 25, 2013 */
/* Purpose: To provide Snort and ClamAV the ability to */
/* detect canadian and U.S. bank routing transaction */
/* numbers via the DLP module in ClamAV or the SDF pre- */
/* processor in the Snort IDS. */
/* Are first three or last three digits a valid bank code */
int is_bank_code_valid(int bank_code)
{
switch (bank_code) {
case 1:
return 1; /* Bank of Montreal */
case 2:
return 1; /* Bank of Nova Scotia */
case 3:
return 1; /* Royal Bank of Canada */
case 4:
return 1; /* Toronto-Dominion Bank */
case 6:
return 1; /* National Bank of Canada */
case 10:
return 1; /* Canadian Imperial Bank of Commerce */
case 16:
return 1; /* HSBC Canada */
case 30:
return 1; /* Canadian Western Bank */
case 39:
return 1; /* Laurentian Bank of Canada */
case 117:
return 1; /* Government of Canada */
case 127:
return 1; /* Canada Post (Money Orders) */
case 177:
return 1; /* Bank of Canada */
case 219:
return 1; /* ATB Financial */
case 260:
return 1; /* Citibank Canada */
case 290:
return 1; /* UBS Bank (Canada) */
case 308:
return 1; /* Bank of China (Canada) */
case 309:
return 1; /* Citizens Bank of Canada */
case 326:
return 1; /* Presidents Choice Financial */
case 338:
return 1; /* Canadian Tire Bank */
case 340:
return 1; /* ICICI Bank Canada */
case 509:
return 1; /* Canada Trust */
case 540:
return 1; /* Manulife Bank */
case 614:
return 1; /* ING Direct Canada */
case 809:
return 1; /* Central 1 [Credit Union] BC Region */
case 815:
return 1; /* Caisses Desjardins du Québec */
case 819:
return 1; /* Caisses populaires Desjardins du Manitoba */
case 828:
return 1; /* Central 1 [Credit Union] ON Region */
case 829:
return 1; /* Caisses populaires Desjardins de lOntario */
case 837:
return 1; /* Meridian Credit Union */
case 839:
return 1; /* Credit Union Heritage (Nova Scotia) */
case 865:
return 1; /* Caisses populaires Desjardins acadiennes */
case 879:
return 1; /* Credit Union Central of Manitoba */
case 889:
return 1; /* Credit Union Central of Saskatchewan */
case 899:
return 1; /* Credit Union Central Alberta */
case 900:
return 1; /* Unknown??? */
default:
return 0; /* NO MATCH...FAIL */
} /* end if switch(bank_code) */
return 0;
} /* end function is_bank_code_valid() */
/* This function checks if the supplied string is a valid */
/* canadian transit number, the format is as follows: */
/* XXXXX-YYY where XXXXX is a branch number, and YYY is */
/* the institutional number. */
/* note: it does NOT appear that the canadian RTN or EFT */
/* number formats contain any type of checksum algorithm */
/* or a check digit. */
int cdn_ctn_is_valid(const char *buffer, size_t length)
{
int i;
int bank_code = 0; /* last three digits of Canada RTN/MICR is Bank I.D. */
if (buffer == NULL || length < 9) /* if the buffer is empty or */
return 0; /* the length is less than 9, it's not valid */
if (buffer[5] != '-') return 0; /* if the 6th char isn't a '-', not a valid RTN */
for (i = 0; i < 5; i++)
if (isdigit(buffer[i]) == 0)
return 0;
/* Check the various branch codes which are listed, but there */
/* may be more valid codes which could be added as well... */
/* convert last three elements in buffer to a numeric value */
for (i = 6; i < 9; i++) {
if (isdigit(buffer[i]) == 0)
return 0;
bank_code = (bank_code * 10) + (buffer[i] - '0');
}
/* now have a switch sandwich for bank codes */
return (is_bank_code_valid(bank_code)); /* return 1 if valid, 0 if not */
}
/* If the string is a canadian EFT (Electronic Fund */
/* Transaction), the format is as follows: */
/* 0YYYXXXX, where a leading zero is required, XXXXX is a */
/* branch number, and YYY is the institution number. */
/* note: it does NOT appear that the canadian RTN or EFT */
/* number formats contain any type of checksum algorithm */
/* or a check digit. */
int cdn_eft_is_valid(const char *buffer, size_t length)
{
int bank_code = 0;
int i;
if (buffer == NULL || length < 9) /* if the buffer is empty or */
return 0; /* the length is less than 9, it's not valid */
if (buffer[0] != '0') return 0; /* if the 1st char isn't a '0', not a valid EFT */
for (i = 1; i < 4; i++) {
if (isdigit(buffer[i]) == 0)
return 0;
bank_code = (bank_code * 10) + (buffer[i] - '0');
}
/* Check the various branch codes which are listed, but there */
/* may be more valid codes which could be added as well... */
if (!is_bank_code_valid(bank_code))
return 0;
for (i = 4; i < 9; i++)
if (isdigit(buffer[i]) == 0)
return 0;
return 1;
}
int us_micr_is_valid(const char *buffer, size_t length)
{
int result, sum, sum1, sum2, sum3;
int i;
unsigned char micr_digits[9];
if (buffer == NULL || length < 9) /* if the buffer is empty or */
return 0; /* the length is < 9, it's not valid */
/* loop and make sure all the characters are actually digits */
for (i = 0; i < 9; i++) {
if (isdigit(buffer[i]) == 0)
return 0;
micr_digits[i] = buffer[i];
}
/* see if we have a valid MICR number via the following formula */
/* 7 * (micr_digits[0] + micr_digits[3] + micr_digits[6]) + */
/* 3 * (micr_digits[1] + micr_digits[4] + micr_digits[7]) + */
/* 9 * (micr_digits[2] + micr_digits[5]) (the check digit is */
/* computed by the sum above modulus 10 */
sum1 = 7 * ((micr_digits[0] - '0') + (micr_digits[3] - '0') + (micr_digits[6] - '0'));
sum2 = 3 * ((micr_digits[1] - '0') + (micr_digits[4] - '0') + (micr_digits[7] - '0'));
sum3 = 9 * ((micr_digits[2] - '0') + (micr_digits[5] - '0'));
sum = sum1 + sum2 + sum3;
result = sum % 10;
if (result == (micr_digits[8] - '0'))
return 1; /* last digit of MICR matches result */
return 0; /* MICR number isn't valid */
}