commit 8abf0ce512f2ebd5e1aa0ef2ad0d4f1a6fdd9007 Author: aixiao Date: Fri Sep 20 15:27:11 2024 +0800 init diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..67773ef --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +CROSS_COMPILE ?= +CC := $(CROSS_COMPILE)gcc +STRIP := $(CROSS_COMPILE)strip +CFLAGS += -g -Os -Wall +LIBS = -static +OBJ := denyip + +all: main.o ip2region.o xdb_searcher.o qqwry.o + $(CC) $(CFLAGS) -o $(OBJ) $^ $(LIBS) +.c.o: + $(CC) $(CFLAGS) -c $< + +clean: + rm -rf *.o + rm -rf $(OBJ) + + diff --git a/denyip b/denyip new file mode 100644 index 0000000..d97dfe5 Binary files /dev/null and b/denyip differ diff --git a/ip2region.c b/ip2region.c new file mode 100644 index 0000000..5f894ab --- /dev/null +++ b/ip2region.c @@ -0,0 +1,46 @@ +#include +#include "xdb_searcher.h" +#include "ip2region.h" + +char *ip2region(char *xdb_file, char *ip) +{ + char *db_path = xdb_file; + xdb_vector_index_t *v_index; + xdb_searcher_t searcher; + char region_buffer[256]; + //long s_time; + + // 1、从 db_path 加载 VectorIndex 索引。 + // 得到 v_index 做成全局缓存,便于后续反复使用。 + // 注意:v_index 不需要每次都加载,建议在服务启动的时候加载一次,然后做成全局资源。 + v_index = xdb_load_vector_index_from_file(db_path); + if (v_index == NULL) { + printf("failed to load vector index from `%s`\n", db_path); + return NULL; + } + // 2、使用全局的 VectorIndex 变量创建带 VectorIndex 缓存的 xdb 查询对象 + int err = xdb_new_with_vector_index(&searcher, db_path, v_index); + if (err != 0) { + printf("failed to create vector index cached searcher with errcode=%d\n", err); + return NULL; + } + // 3、调用 search API 查询 + // 得到的 region 信息会存储到 region_buffer 里面,如果你自定义了数据,请确保给足 buffer 的空间。 + //s_time = xdb_now(); + err = xdb_search_by_string(&searcher, ip, region_buffer, sizeof(region_buffer)); + if (err != 0) { + printf("failed search(%s) with errno=%d\n", ip, err); + return NULL; + } else { + ; + //printf("{region: %s, took: %dμs}", region_buffer, (int)(xdb_now() - s_time)); + } + + // 备注:并发使用,没一个线程需要单独定义并且初始化一个 searcher 查询对象。 + + // 4、关闭 xdb 查询器,如果是要关闭服务,也需要释放 v_index 的内存。 + xdb_close(&searcher); + xdb_close_vector_index(v_index); + + return strdup(region_buffer); +} diff --git a/ip2region.h b/ip2region.h new file mode 100644 index 0000000..1795eb9 --- /dev/null +++ b/ip2region.h @@ -0,0 +1,6 @@ +#ifndef IP2REGION_H +#define IP2REGION_H + +extern char *ip2region(char *xdb_file, char *ip); + +#endif diff --git a/ip2region.o b/ip2region.o new file mode 100644 index 0000000..e89cbb5 Binary files /dev/null and b/ip2region.o differ diff --git a/ip2region.xdb b/ip2region.xdb new file mode 100644 index 0000000..9f6502b Binary files /dev/null and b/ip2region.xdb differ diff --git a/ipquery b/ipquery new file mode 100644 index 0000000..8608c9d Binary files /dev/null and b/ipquery differ diff --git a/main.c b/main.c new file mode 100644 index 0000000..ed318c5 --- /dev/null +++ b/main.c @@ -0,0 +1,385 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ip2region.h" +#include "qqwry.h" + +#define RED "\033[31m" +#define RESET "\033[0m" +#define BUFFER 512 +#define WHITELIST_IP_NUM 1024 +#define MAXIPSET 65534 + +char *xdb_path = "ip2region.xdb"; +pid_t pid1, pid2; // 保存子进程的 PID + + +void split_string(char string[], char delims[], char (*whitelist_ip)[WHITELIST_IP_NUM]) +{ + int i = 0; + char *result = NULL; + + result = strtok(string, delims); + while (result != NULL) { + + strcpy(whitelist_ip[i], result); + result = strtok(NULL, delims); + i++; + } +} + +// 地域段白名单对比 +int isregion(char *str, char (*region_list)[WHITELIST_IP_NUM]) +{ + int i; + char *p; + + for (i = 1; i < WHITELIST_IP_NUM - 1; i++) { + if (strcmp(region_list[i], "\0") == 0) // 如果字符串为空就跳出循环 + { + break; + } + //printf("%s %s\n", str, region_list[i]); + // 在str中查找region_list[i] + p = strstr(str, region_list[i]); + if (p != NULL) { + return 1; + } + } + + return 0; +} + +int is_valid_ip(const char *ip) +{ + struct sockaddr_in sa; + // 尝试将字符串转换为IPv4地址 + int result = inet_pton(AF_INET, ip, &(sa.sin_addr)); + + return result != 0; +} + +int nice_(int increment) +{ + int oldprio = getpriority(PRIO_PROCESS, getpid()); + printf("%d\n", oldprio); + + return setpriority(PRIO_PROCESS, getpid(), oldprio + increment); +} + +// 判断命令是否存在 +int command_exists(const char *command) +{ + char buffer[BUFFER]; + snprintf(buffer, sizeof(buffer), "%s > /dev/null 2>&1", command); + int status = system(buffer); + + return (status == 0); +} + +void denyip_help() +{ + puts(" DenyIp"); + puts("Linux system firewall, reject non-Chinese IP"); + puts("Email: aixiao@aixiao.me"); + puts("Version: 0.1"); + puts("Usage: ./denyip [-i eth0|-h|-?] [start|stop] "); + puts("Options:"); + puts(" stop Enable firewall rules"); + puts(" start Disable firewall rules"); + + puts("Parameters:"); + puts(" -h|? Help info "); + puts(" -i interface name"); + puts(""); + puts(""); + + exit(0); +} + +/* 处理僵尸进程 */ +void sigchld_handler(int signal) +{ + while (waitpid(-1, NULL, WNOHANG) > 0) ; +} + +void kill_tcpdump_processes() +{ + int result = system("pkill tcpdump"); // 用 pkill 命令终止所有 tcpdump 进程 + if (result == -1) { + perror("Failed to kill tcpdump processes"); + } + + return ; +} + +// 进程重启功能 +void restart_process(pid_t pid1, pid_t pid2, char *argv[]) +{ + // 发送 SIGTERM 信号终止两个子进程 + kill(pid1, SIGTERM); + kill(pid2, SIGTERM); + + // 等待子进程完全退出 + waitpid(pid1, NULL, 0); + waitpid(pid2, NULL, 0); + + // 终止 tcpdump 进程 + kill_tcpdump_processes(); + + // 使用 execvp 重新启动程序自身 + printf("重启进程...\n"); + execvp(argv[0], argv); // 重新启动程序 + perror("execvp failed"); // 如果 execvp 出错 + exit(EXIT_FAILURE); + + return ; +} + +void cleanup_(int signum) +{ + printf("Received signal %d, cleaning up...\n", signum); + + // 终止子进程 + if (pid1 > 0) { + kill(pid1, SIGTERM); + } + if (pid2 > 0) { + kill(pid2, SIGTERM); + } + // 终止所有 tcpdump 进程 + system("pkill tcpdump"); + + // 退出主进程 + exit(0); + + return ; +} + +int main(int argc, char *argv[]) +{ + signal(SIGCHLD, sigchld_handler); // 防止子进程变成僵尸进程 + + // 主进程设置 + //prctl(PR_SET_PDEATHSIG, SIGTERM); + + // 注册 SIGTERM 信号处理函数 + signal(SIGTERM, cleanup_); + + char interface[BUFFER] = { 0 }; + strcpy(interface, "eth0"); + + int r; + + // 参数处理 + if (argc == 2) { + if (0 == strcmp(argv[1], "start")) { + if ((r = system("iptables -A INPUT -p tcp -m set --match-set root src -j DROP")) == -1) { + puts("\"iptables -A INPUT -p tcp -m set --match-set root src -j DROP\" Error!"); + } + exit(0); + } else if (0 == strcmp(argv[1], "stop")) { + if ((r = system("iptables -D INPUT -p tcp -m set --match-set root src -j DROP")) == -1) { + puts("\"iptables -D INPUT -p tcp -m set --match-set root src -j DROP\" Error!"); + } + exit(0); + } else if (0 == strcmp(argv[1], "-h")) { + denyip_help(); + } else if (0 == strcmp(argv[1], "-?")) { + denyip_help(); + } + } + if (argc == 3) { + if (0 == strcmp(argv[1], "-i")) { + strcpy(interface, argv[2]); + } + } + // 判断运行用户禁止非root用户运行 + if (geteuid() == 0) { + ; + } else { + printf("This process is not running as root.\n"); + printf("\n"); + exit(-1); + } + + // 判断网卡是否存在 + char command_ifconfig[BUFFER + 20] = { 0 }; + snprintf(command_ifconfig, BUFFER + 20, "ifconfig %s", interface); + // 判断必要命令是否存在 + if (command_exists(command_ifconfig)) { + ; + } else { + puts("The network card does not exist!"); + exit(-1); + } + + // 后台运行 + if (daemon(1, 1)) { + perror("daemon"); + return -1; + } + // 进程优先级 + if (-1 == (nice_(-20))) + perror("nice_"); + + // 哈希集合 + if ((r = system("ipset create root hash:ip > /dev/null 2>&1")) != -1) { + ; + } + // 判断必要命令是否存在 + if (command_exists("which tcpdump")) { + ; + } else { + r = system("yum -y install tcpdump > /dev/null 2>&1"); + r = system("apt -y install tcpdump > /dev/null 2>&1"); + } + + // 子进程 + pid1 = fork(); // 创建子进程 + if (pid1 == 0) { + while (1) { + FILE *fp = popen("ipset list root | grep \"Number of entries\" | cut -d : -f 2 | xargs", "r"); + char line[BUFFER] = { 0 }; + while (fgets(line, sizeof(line), fp) != NULL) { + line[strcspn(line, "\n")] = '\0'; + } + + if (atoi(line) >= MAXIPSET) { + r = system("ipset flush root"); + } + + printf("%s\n", line); + + pclose(fp); + sleep(3); + } + } + // 子进程 + pid2 = fork(); // 创建子进程 + if (pid2 == 0) { + // 缓冲区用于存储每行的输出 + char line[BUFFER]; + // 要执行的命令 + char command_tcpdump[BUFFER + 256] = { 0 }; + snprintf(command_tcpdump, BUFFER + 256, "tcpdump -i %s -n 'tcp' | awk '{print $3}' | cut -d '.' -f 1-4", interface); + + // 地域白名单 + char _region_list[WHITELIST_IP_NUM][WHITELIST_IP_NUM] = { { 0 }, { 0 } }; + char qqwry_region_list[WHITELIST_IP_NUM][WHITELIST_IP_NUM] = { { 0 }, { 0 } }; + char _REGION_LIST_COPY[BUFFER] = { 0 }; + char QQWRY_REGION_LIST_COPY[BUFFER] = { 0 }; + + if (access(xdb_path, F_OK) == -1) { // 判断 ip2region 地址定位库是否存在 + xdb_path = "ip2region/ip2region.xdb"; + if (access(xdb_path, F_OK) == -1) { + printf("ip2region.xdb DOESN'T EXIST!\n"); + return -1; + } + } + // 打开管道来执行命令 + FILE *fp = popen(command_tcpdump, "r"); + if (fp == NULL) { + perror("popen failed"); + return 1; + } + // 逐行读取命令输出 + while (fgets(line, sizeof(line), fp) != NULL) { + line[strcspn(line, "\n")] = '\0'; + + if (is_valid_ip(line)) { + + char *qqwry_region = qqwry_(line); + if (qqwry_region == NULL) { + printf("qqwry 解析地域错误\n"); + continue; + } + char *area = ip2region(xdb_path, line); + if (area == NULL) { + printf("ip2region 解析地域错误\n"); + continue; + } + // 取环境变量 + const char *REGION_ENV = getenv("REGION"); + if (REGION_ENV != NULL) { + printf("REGION: %s\n", REGION_ENV); + strcpy(_REGION_LIST_COPY, REGION_ENV); + strcpy(QQWRY_REGION_LIST_COPY, REGION_ENV); + } else { + strcpy(_REGION_LIST_COPY, "局域网 内网 中国 "); + strcpy(QQWRY_REGION_LIST_COPY, "局域网 内网 中国 "); + } + //printf("REGION_LIST : %s\n", _REGION_LIST_COPY); + + split_string(QQWRY_REGION_LIST_COPY, " ", qqwry_region_list); // 分割后存储在 qqwry_region_list + if (isregion(qqwry_region, qqwry_region_list) == 1) { // 返回1表示在白名单列表 + ; + } else { + split_string(_REGION_LIST_COPY, " ", _region_list); + if (isregion(area, _region_list) == 1) { // 返回1表示在白名单列表 + ; + } else { + char ipquery_command[BUFFER + 100] = { 0 }; + snprintf(ipquery_command, BUFFER + 100, "./ipquery %s", line); + FILE *fp = popen(ipquery_command, "r"); + if (fp == NULL) { + perror("popen failed"); + return 1; + } + // 创建足够大的缓冲区来存储命令输出 + char buffer[1024 * 2]; // 2KB 缓冲区 + size_t bytesRead = fread(buffer, 1, sizeof(buffer) - 1, fp); + buffer[bytesRead] = '\0'; + + pclose(fp); + sleep(1); + + char *p = strstr(buffer, "中国"); + if (p == NULL) { + printf("%s %s", line, buffer); + + char command_ipset[BUFFER + 256] = { 0 }; + snprintf(command_ipset, sizeof(command_ipset), "ipset add root %s > /dev/null 2>&1", line); + int r = system(command_ipset); + if (r == -1) { + perror("system command failed"); + } + } + } + } + free(qqwry_region); + free(area); + + } else { // 是正确IP + printf("%s is not a valid IPv4 address.\n", line); + } + } // while + + // 关闭管道 + pclose(fp); + } + + + // 父进程 + int iteration = 0; + // 主进程循环,检查子进程运行情况 + while (1) { + iteration++; + + if (iteration >= 3600) { + printf("准备重启进程...\n"); + restart_process(pid1, pid2, argv); + } + + sleep(1); // 每次检查间隔1秒 + } + + return 0; +} diff --git a/main.o b/main.o new file mode 100644 index 0000000..6332df3 Binary files /dev/null and b/main.o differ diff --git a/qqwry.c b/qqwry.c new file mode 100644 index 0000000..4cd34f7 --- /dev/null +++ b/qqwry.c @@ -0,0 +1,373 @@ + +#include "qqwry.h" + +ip_data ip_defaults = {.parent_data = NULL,.child_data = NULL,.index_size = 7,.isp = 1 }; + +int qqwry_init(char *file) +{ + int buff; + + ip_defaults.fp = fopen(file, "r"); + if (ip_defaults.fp == NULL) { + fprintf(stderr, "failed to open %s\n", file); + return -1; + } + + readvalue(4, &buff); //first 4 bytes represents the offset of first index + ip_defaults.first_item = buff; + readvalue(4, &buff); + ip_defaults.last_item = buff; + ip_defaults.item_number = (ip_defaults.last_item - ip_defaults.first_item) / ip_defaults.index_size; + + return 0; +} + +int qqwry_match(char *pattern, char *subject) +{ + regex_t regex; + int reti, ret; + char msgbuf[100]; + + /* Compile regular expression */ + reti = regcomp(®ex, pattern, 0); + if (reti) { + fprintf(stderr, "Could not compile regex\n"); + return 0; + } + + /* Execute regular expression */ + reti = regexec(®ex, subject, 0, NULL, 0); + if (!reti) { + ret = 1; + } else if (reti == REG_NOMATCH) { + ret = 0; + } else { + regerror(reti, ®ex, msgbuf, sizeof(msgbuf)); + fprintf(stderr, "Regex match failed: %s\n", msgbuf); + ret = 0; + } + + /* Free memory allocated to the pattern buffer by regcomp() */ + regfree(®ex); + return ret; +} + +iconv_t initialize_iconv(const char *target, const char *src) { + // 创建转换描述符 + iconv_t iconvDesc = iconv_open(target, src); + + // 检查 iconv_open 是否成功 + if (iconvDesc == (iconv_t) - 1) { + // 如果失败,打印错误信息并返回 NULL + fprintf(stderr, "Error: Conversion from '%s' to '%s' is not available.\n", src, target); + return (iconv_t)NULL; + } + + // 成功时返回 iconv_t 描述符 + return iconvDesc; +} + +int gbk2utf8(char *utf8_str, char *gbk_str) +{ + iconv_t iconvDesc = initialize_iconv("UTF-8//TRANSLIT//IGNORE", "GBK"); + size_t iconv_value, len, utf8len; + //int len_start; + + len = strlen(gbk_str) + 1; + if (!len) { + fprintf(stderr, "iconvISO2UTF8: input String is empty."); + return -1; + } + + /* Assign enough space to put the UTF-8. */ + utf8len = 3 * len; + if (!utf8_str) { + fprintf(stderr, "iconvISO2UTF8: Calloc failed."); + return -1; + } + + iconv_value = iconv(iconvDesc, &gbk_str, &len, &utf8_str, &utf8len); + /* Handle failures. */ + if (iconv_value == (size_t)-1) { + switch (errno) { + /* See "man 3 iconv" for an explanation. */ + case EILSEQ: + fprintf(stderr, "iconv failed: Invalid multibyte sequence, in string '%s', length %d, out string '%s', length %d\n", gbk_str, (int)len, utf8_str, (int)utf8len); + break; + case EINVAL: + fprintf(stderr, "iconv failed: Incomplete multibyte sequence, in string '%s', length %d, out string '%s', length %d\n", gbk_str, (int)len, utf8_str, (int)utf8len); + break; + case E2BIG: + fprintf(stderr, "iconv failed: No more room, in string '%s', length %d, out string '%s', length %d\n", gbk_str, (int)len, utf8_str, (int)utf8len); + break; + default: + fprintf(stderr, "iconv failed, in string '%s', length %d, out string '%s', length %d\n", gbk_str, (int)len, utf8_str, (int)utf8len); + } + return -1; + } + + if (iconv_close(iconvDesc) != 0) { + fprintf(stderr, "libicon close failed: %s", strerror(errno)); + return -1; + } + + return utf8len; +} + +int readbyte(int size, int offset, int *buff) +{ + int count; + int nbytes = 1; + *buff = 0; + if (ip_defaults.fp != NULL) { + //if offset is negative,keep the current offset unchanged + if (offset >= 0) { + qqwry_seek(offset); + } else { + int curr_pos = ftell(ip_defaults.fp); + fseek(ip_defaults.fp, curr_pos, SEEK_SET); + } + + if ((count = fread(buff, nbytes, size, ip_defaults.fp)) != size) { + return -1; + } + return count; + } + return -1; +} + +int readvalue(unsigned int size, int *buff) +{ + return readbyte(size, -1, buff); +} + +void set_ip_range(unsigned int offset) +{ + readbyte(4, offset, (int *)(&ip_defaults.startip)); + //skip 3 bytes to read the next ip + qqwry_forward(3); + readvalue(4, (int *)(&ip_defaults.endip)); +} + +void qqwry_seek(int offset) +{ + fseek(ip_defaults.fp, offset, SEEK_SET); +} + +void qqwry_forward(unsigned int byte) +{ + fseek(ip_defaults.fp, byte, SEEK_CUR); +} + +void qqwry_back(unsigned int byte) +{ + int currPos = ftell(ip_defaults.fp); + qqwry_seek(currPos - byte); +} + + +char *long2ip(int ip) { + // 分配16字节内存用于存储IP字符串 + char *ip_str = malloc(16 * sizeof(char)); + + if (ip_str == NULL) { + // 如果内存分配失败,返回NULL + fprintf(stderr, "Memory allocation failed\n"); + return NULL; + } + + // 将IP转换为字符串 + snprintf(ip_str, 16, "%d.%d.%d.%d", + (ip >> 24) & 0xFF, + (ip >> 16) & 0xFF, + (ip >> 8) & 0xFF, + ip & 0xFF); + + return ip_str; +} + +unsigned int ip2long(char *ip) +{ + int nip = 0, tmp = 0, step = 24; + char *copy = strdup(ip); + char *token = strtok(copy, "."); + + while (token) { + tmp = (unsigned int)atoi(token); + tmp <<= step; + nip += tmp; + step -= 8; + token = strtok(NULL, "."); + } + free(copy); + return nip; +} + +int search_record(char *ip) +{ + int numeric_ip = ip2long(ip); + int low = 0; + int high = ip_defaults.item_number; + return binary_search(low, high, numeric_ip); +} + +int binary_search(int low, int high, int ip) +{ + unsigned int mid, offset, startip, endip; + + if (low <= high) { + mid = low + (high - low) / 2; + offset = round(ip_defaults.first_item + mid * ip_defaults.index_size); + set_ip_range(offset); + startip = ip_defaults.startip; + endip = ip_defaults.endip; + if (ip >= startip && ip <= endip) { + return offset; + } + //if ip is below the lower limit, decrease the upper limit + if (ip < startip) { + return binary_search(low, mid - 1, ip); + } + //if ip is above the lower limit, increase the lower limit + return binary_search(mid + 1, high, ip); + } + return ip_defaults.last_item; +} + +static char *get_string() +{ + unsigned int buff = 0; + char *str = realloc(NULL, sizeof(char)); + char *tmp; + int i = 0, c = 0; + + if ((c = readvalue(1, (int *)(&buff))) != 1) { + return NULL; + } + + for (i = 0; buff != 0; i++) { + str[i] = buff; + tmp = realloc(str, (sizeof(char)) * (i + 2)); + str = tmp; + readvalue(1, (int *)(&buff)); + } + str[i] = '\0'; + return str; +} + +static char *get_child_data() +{ + unsigned int flag, offset; + readvalue(1, (int *)(&flag)); + if (flag == 0) { //no child data + return 0; + } else if (flag == 1 || flag == 2) { // redirection for child data + readvalue(3, (int *)(&offset)); + qqwry_seek(offset); + return get_string(); + } + // no redirection for child data + qqwry_back(1); + return get_string(); +} + +int convert_data(char *parent_data, char *child_data) +{ + ip_defaults.parent_data = malloc(strlen(parent_data) * 3); //in utf8,one chinese character could consume up to 3 bytes + gbk2utf8(ip_defaults.parent_data, parent_data); + ip_defaults.child_data = malloc(strlen(child_data) * 3); + gbk2utf8(ip_defaults.child_data, child_data); + + if (qqwry_match("移动", ip_defaults.child_data)) { + ip_defaults.isp = 0x03; + } else if (qqwry_match("联通", ip_defaults.child_data)) { + ip_defaults.isp = 0x02; + } else { + ip_defaults.isp = 0x01; + } + free(parent_data); + free(child_data); + + return 0; +} + +int qqwry_redirect(int bytes) +{ + int redirect_offset; + readvalue(bytes, &redirect_offset); + qqwry_seek(redirect_offset); + return redirect_offset; +} + +int get_data(int offset) +{ //get record data + int flag, redirect_offset; + char *parent_data, *child_data; + readbyte(1, offset + 4, &flag); //get the flag value to see if the data is stored elsewhere + + if (flag == 1) { //this means we should look elsewhere for both + redirect_offset = qqwry_redirect(3); //read 3 bytes to get a new offset and redirect there + readvalue(1, &flag); + if (flag == 2) { + // child data is elsewhere + qqwry_redirect(3); + parent_data = get_string(); + qqwry_seek(redirect_offset + 4); + child_data = get_child_data(); + } else { // no redirection for parent data + qqwry_back(1); + parent_data = get_string(); + child_data = get_child_data(); + } + } else if (flag == 2) { //redirection for only parent + qqwry_redirect(3); + parent_data = get_string(); + qqwry_seek(offset + 8); + child_data = get_child_data(); + } else { // no redirection for both parent and child + qqwry_back(1); + parent_data = get_string(); + child_data = get_string(); + } + + convert_data(parent_data, child_data); + + return 0; +} + +int get_location(char *ip) +{ + //offset is the address where the ip is found. first 4 bytes is the start ip address of the ip range and the following 3 bytes is the offset pointing to the actual record data; + unsigned int offset = search_record(ip); + unsigned int tmp_offset; + qqwry_seek(offset + 4); // skip 4 byte to get the offset value pointing to record data + readvalue(3, (int *)(&tmp_offset)); // the offset pointing to the data + get_data(tmp_offset); + + return 0; +} + +char *qqwry_(char *ip) +{ + qqwry_init("qqwry.dat"); + + + get_location(ip); + //printf("%s-%s %d\n", ip_defaults.parent_data, ip_defaults.child_data, ip_defaults.isp); + //printf("QQWRY %s %s-%s\n", ip, ip_defaults.parent_data, ip_defaults.child_data); + + // 计算拼接后的字符串所需的长度 + size_t len = strlen(ip_defaults.parent_data) + strlen(ip_defaults.child_data) + 2; // +2 for the hyphen and null terminator + char *result = malloc(len); + + if (result) { + // 拼接字符串,格式为 "parent_data-child_data" + snprintf(result, len, "%s-%s", ip_defaults.parent_data, ip_defaults.child_data); + } + + free(ip_defaults.parent_data); + free(ip_defaults.child_data); + fclose(ip_defaults.fp); + + return result; +} diff --git a/qqwry.dat b/qqwry.dat new file mode 100644 index 0000000..977f115 Binary files /dev/null and b/qqwry.dat differ diff --git a/qqwry.h b/qqwry.h new file mode 100644 index 0000000..52f495f --- /dev/null +++ b/qqwry.h @@ -0,0 +1,31 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct { + FILE *fp; + unsigned int index_size; + unsigned int first_item, last_item; + unsigned int item_number, startip, endip, curr_data_offset; + char *parent_data, *child_data; + int isp; +} ip_data; + +int search_record(char *ip); +int binary_search(int low, int high, int ip); +int readbyte(int size, int offset, int *buff); +int readvalue(unsigned int size, int *buff); +void qqwry_seek(int offset); +void qqwry_forward(unsigned int byte); +void qqwry_back(unsigned int byte); +int get_location(char *ip); +int gbk2utf8(char *utf8_str, char *gbk_str); + +extern char *qqwry_(char *ip); \ No newline at end of file diff --git a/qqwry.o b/qqwry.o new file mode 100644 index 0000000..7419ec1 Binary files /dev/null and b/qqwry.o differ diff --git a/xdb_searcher.c b/xdb_searcher.c new file mode 100644 index 0000000..61413f6 --- /dev/null +++ b/xdb_searcher.c @@ -0,0 +1,410 @@ +// Copyright 2022 The Ip2Region Authors. All rights reserved. +// Use of this source code is governed by a Apache2.0-style +// license that can be found in the LICENSE file. + +// --- +// @Author Lion +// @Date 2022/06/27 + +#include "sys/time.h" +#include "xdb_searcher.h" + +// internal function prototype define +XDB_PRIVATE(int) read(xdb_searcher_t *, long offset, char *, size_t length); + +XDB_PRIVATE(int) xdb_new_base(xdb_searcher_t *xdb, const char *db_path, const xdb_vector_index_t *v_index, const xdb_content_t *c_buffer) +{ + memset(xdb, 0x00, sizeof(xdb_searcher_t)); + + // check the content buffer first + if (c_buffer != NULL) { + xdb->v_index = NULL; + xdb->content = c_buffer; + return 0; + } + // open the xdb binary file + FILE *handle = fopen(db_path, "rb"); + if (handle == NULL) { + return 1; + } + + xdb->handle = handle; + xdb->v_index = v_index; + + return 0; +} + +// xdb searcher new api define +XDB_PUBLIC(int) xdb_new_with_file_only(xdb_searcher_t *xdb, const char *db_path) +{ + return xdb_new_base(xdb, db_path, NULL, NULL); +} + +XDB_PUBLIC(int) xdb_new_with_vector_index(xdb_searcher_t *xdb, const char *db_path, const xdb_vector_index_t *v_index) +{ + return xdb_new_base(xdb, db_path, v_index, NULL); +} + +XDB_PUBLIC(int) xdb_new_with_buffer(xdb_searcher_t *xdb, const xdb_content_t *c_buffer) +{ + return xdb_new_base(xdb, NULL, NULL, c_buffer); +} + +XDB_PUBLIC(void) xdb_close(void *ptr) +{ + xdb_searcher_t *xdb = (xdb_searcher_t *) ptr; + if (xdb->handle != NULL) { + fclose(xdb->handle); + xdb->handle = NULL; + } +} + +// --- xdb searcher search api define + +XDB_PUBLIC(int) xdb_search_by_string(xdb_searcher_t *xdb, const char *str_ip, char *region_buffer, size_t length) +{ + unsigned int ip = 0; + int errcode = xdb_check_ip(str_ip, &ip); + if (errcode != 0) { + return 10 + errcode; + } else { + return xdb_search(xdb, ip, region_buffer, length); + } +} + +XDB_PUBLIC(int) xdb_search(xdb_searcher_t *xdb, unsigned int ip, char *region_buffer, size_t length) +{ + int il0, il1, idx, err, l, h, m, data_len; + unsigned int s_ptr, e_ptr, p, sip, eip, data_ptr; + char vector_buffer[xdb_vector_index_size], segment_buffer[xdb_segment_index_size]; + + // reset the io counter + xdb->io_count = 0; + + // locate the segment index block based on the vector index + il0 = ((int)(ip >> 24)) & 0xFF; + il1 = ((int)(ip >> 16)) & 0xFF; + idx = il0 * xdb_vector_index_cols * xdb_vector_index_size + il1 * xdb_vector_index_size; + if (xdb->v_index != NULL) { + s_ptr = xdb_get_uint(xdb->v_index->buffer, idx); + e_ptr = xdb_get_uint(xdb->v_index->buffer, idx + 4); + } else if (xdb->content != NULL) { + s_ptr = xdb_get_uint(xdb->content->buffer, xdb_header_info_length + idx); + e_ptr = xdb_get_uint(xdb->content->buffer, xdb_header_info_length + idx + 4); + } else { + err = read(xdb, xdb_header_info_length + idx, vector_buffer, sizeof(vector_buffer)); + if (err != 0) { + return 10 + err; + } + + s_ptr = xdb_get_uint(vector_buffer, 0); + e_ptr = xdb_get_uint(vector_buffer, 4); + } + + // printf("s_ptr=%u, e_ptr=%u\n", s_ptr, e_ptr); + // binary search to get the final region info + data_len = 0, data_ptr = 0; + l = 0, h = ((int)(e_ptr - s_ptr)) / xdb_segment_index_size; + while (l <= h) { + m = (l + h) >> 1; + p = s_ptr + m * xdb_segment_index_size; + + // read the segment index item + err = read(xdb, p, segment_buffer, sizeof(segment_buffer)); + if (err != 0) { + return 20 + err; + } + // decode the data fields as needed + sip = xdb_get_uint(segment_buffer, 0); + if (ip < sip) { + h = m - 1; + } else { + eip = xdb_get_uint(segment_buffer, 4); + if (ip > eip) { + l = m + 1; + } else { + data_len = xdb_get_ushort(segment_buffer, 8); + data_ptr = xdb_get_uint(segment_buffer, 10); + break; + } + } + } + + // printf("data_len=%u, data_ptr=%u\n", data_len, data_ptr); + if (data_len == 0) { + region_buffer[0] = '\0'; + return 0; + } + // buffer length checking + if (data_len >= length) { + return 1; + } + + err = read(xdb, data_ptr, region_buffer, data_len); + if (err != 0) { + return 30 + err; + } + // auto append a NULL-end + region_buffer[data_len] = '\0'; + return 0; +} + +XDB_PRIVATE(int) read(xdb_searcher_t *xdb, long offset, char *buffer, size_t length) +{ + // check the xdb content cache first + if (xdb->content != NULL) { + memcpy(buffer, xdb->content->buffer + offset, length); + return 0; + } + // seek to the offset + if (fseek(xdb->handle, offset, SEEK_SET) == -1) { + return 1; + } + + xdb->io_count++; + if (fread(buffer, 1, length, xdb->handle) != length) { + return 2; + } + + return 0; +} + +XDB_PUBLIC(int) xdb_get_io_count(xdb_searcher_t *xdb) +{ + return xdb->io_count; +} + +// --- buffer load util functions + +XDB_PUBLIC(xdb_header_t *)xdb_load_header(FILE *handle) +{ + xdb_header_t *header; + unsigned int size = xdb_header_info_length; + + // entry alloc + header = (xdb_header_t *) xdb_malloc(sizeof(xdb_header_t)); + if (header == NULL) { + return NULL; + } + + if (fseek(handle, 0, SEEK_SET) == -1) { + xdb_free(header); + return NULL; + } + + if (fread(header->buffer, 1, size, handle) != size) { + xdb_free(header); + return NULL; + } + // fill the fields + header->length = size; + header->version = (unsigned short)xdb_get_ushort(header->buffer, 0); + header->index_policy = (unsigned short)xdb_get_ushort(header->buffer, 2); + header->created_at = xdb_get_uint(header->buffer, 4); + header->start_index_ptr = xdb_get_uint(header->buffer, 8); + header->end_index_ptr = xdb_get_uint(header->buffer, 12); + + return header; +} + +XDB_PUBLIC(xdb_header_t *)xdb_load_header_from_file(const char *db_path) +{ + xdb_header_t *header; + FILE *handle = fopen(db_path, "rb"); + if (handle == NULL) { + return NULL; + } + + header = xdb_load_header(handle); + fclose(handle); + return header; +} + +XDB_PUBLIC(void) xdb_close_header(void *ptr) +{ + xdb_header_t *header = (xdb_header_t *) ptr; + if (header->length > 0) { + header->length = 0; + xdb_free(header); + } +} + +// --- vector index + +XDB_PUBLIC(xdb_vector_index_t *)xdb_load_vector_index(FILE *handle) +{ + xdb_vector_index_t *v_index; + unsigned int size = xdb_vector_index_length; + + // seek to the vector index offset + if (fseek(handle, xdb_header_info_length, SEEK_SET) == -1) { + return NULL; + } + // do the buffer read + v_index = (xdb_vector_index_t *) xdb_malloc(sizeof(xdb_vector_index_t)); + if (v_index == NULL) { + return NULL; + } + + v_index->length = size; + if (fread(v_index->buffer, 1, size, handle) != size) { + xdb_free(v_index); + return NULL; + } + + return v_index; +} + +XDB_PUBLIC(xdb_vector_index_t *)xdb_load_vector_index_from_file(const char *db_path) +{ + xdb_vector_index_t *v_index; + FILE *handle = fopen(db_path, "rb"); + if (handle == NULL) { + return NULL; + } + + v_index = xdb_load_vector_index(handle); + fclose(handle); + return v_index; +} + +XDB_PUBLIC(void) xdb_close_vector_index(void *ptr) +{ + xdb_vector_index_t *v_index = (xdb_vector_index_t *) ptr; + if (v_index->length > 0) { + v_index->length = 0; + xdb_free(v_index); + } +} + +// --- content buffer + +XDB_PUBLIC(xdb_content_t *)xdb_load_content(FILE *handle) +{ + unsigned int size; + xdb_content_t *content; + + // determine the file size + if (fseek(handle, 0, SEEK_END) == -1) { + return NULL; + } + + size = (unsigned int)ftell(handle); + if (fseek(handle, 0, SEEK_SET) == -1) { + return NULL; + } + // do the file read + content = (xdb_content_t *) xdb_malloc(sizeof(xdb_content_t)); + if (content == NULL) { + return NULL; + } + // do the buffer alloc + content->buffer = (char *)xdb_malloc(size); + if (content->buffer == NULL) { + xdb_free(content); + return NULL; + } + // read the content into the buffer + content->length = size; + if (fread(content->buffer, 1, size, handle) != size) { + xdb_free(content); + return NULL; + } + + return content; +} + +XDB_PUBLIC(xdb_content_t *)xdb_load_content_from_file(const char *db_path) +{ + xdb_content_t *content; + FILE *handle = fopen(db_path, "rb"); + if (handle == NULL) { + return NULL; + } + + content = xdb_load_content(handle); + fclose(handle); + return content; +} + +XDB_PUBLIC(void) xdb_close_content(void *ptr) +{ + xdb_content_t *content = (xdb_content_t *) ptr; + if (content->length > 0) { + content->length = 0; + xdb_free(content->buffer); + content->buffer = NULL; + xdb_free(content); + } +} + +// --- End + +// get unsigned long (4bytes) from a specified buffer start from the specified offset +XDB_PUBLIC(unsigned int) xdb_get_uint(const char *buffer, int offset) +{ + return (((buffer[offset]) & 0x000000FF) | ((buffer[offset + 1] << 8) & 0x0000FF00) | ((buffer[offset + 2] << 16) & 0x00FF0000) | ((buffer[offset + 3] << 24) & 0xFF000000) + ); +} + +// get unsigned short (2bytes) from a specified buffer start from the specified offset +XDB_PUBLIC(int) xdb_get_ushort(const char *buffer, int offset) +{ + return (((buffer[offset]) & 0x000000FF) | ((buffer[offset + 1] << 8) & 0x0000FF00) + ); +} + +// string ip to unsigned int +static int shiftIndex[4] = { 24, 16, 8, 0 }; + +XDB_PUBLIC(int) xdb_check_ip(const char *src_ip, unsigned int *dst_ip) +{ + char c; + int i, n, ip = 0; + const char *ptr = src_ip; + for (i = 0; i < 4; i++) { + n = 0; + while (1) { + c = *ptr; + ptr++; + if (c >= '0' && c <= '9') { + n *= 10; + n += c - '0'; + } else if ((i < 3 && c == '.') || i == 3) { + // stopping at the '.' but ignore the tailing chars + // after the 3rd one (auto clean the tailing none-integer ?). + break; + } else { + return 1; + } + } + + if (n > 0xFF) { + return 2; + } + + ip |= (n << shiftIndex[i]); + } + + *dst_ip = ip; + return 0; +} + +// unsigned int ip to string ip +XDB_PUBLIC(void) xdb_long2ip(unsigned int ip, char *buffer) +{ + sprintf(buffer, "%d.%d.%d.%d", (ip >> 24) & 0xFF, (ip >> 16) & 0xFF, (ip >> 8) & 0xFF, ip & 0xFF); +} + +// get the middle ip of a and b +XDB_PUBLIC(unsigned int) xdb_mip(unsigned long a, unsigned long b) +{ + return (unsigned int)((a + b) >> 1); +} + +XDB_PUBLIC(long) xdb_now() +{ + struct timeval c_time; + gettimeofday(&c_time, NULL); + return c_time.tv_sec * (int)1e6 + c_time.tv_usec; +} diff --git a/xdb_searcher.h b/xdb_searcher.h new file mode 100644 index 0000000..1055d45 --- /dev/null +++ b/xdb_searcher.h @@ -0,0 +1,144 @@ +// Copyright 2022 The Ip2Region Authors. All rights reserved. +// Use of this source code is governed by a Apache2.0-style +// license that can be found in the LICENSE file. + +// --- +// @Author Lion +// @Date 2022/06/27 + +#ifndef C_XDB_SEARCHER_H +#define C_XDB_SEARCHER_H + +#include +#include +#include + +#if ( defined(WIN32) || defined(_WIN32) || defined(__WINDOWS_) || defined(WINNT) ) +#define XDB_PUBLIC(type) extern __declspec(dllexport) type +#define XDB_PRIVATE(type) static type +#define XDB_WINDOWS +#elif ( defined(linux) || defined(_UNIX) ) +#define XDB_PUBLIC(type) extern type +#define XDB_PRIVATE(type) static inline type +#define XDB_LINUX +#endif + +#define xdb_calloc( _blocks, _bytes ) calloc( _blocks, _bytes ) +#define xdb_malloc( _bytes ) malloc( _bytes ) +#define xdb_free( _ptr ) free( _ptr ) + +// public constants define +#define xdb_header_info_length 256 +#define xdb_vector_index_rows 256 +#define xdb_vector_index_cols 256 +#define xdb_vector_index_size 8 +#define xdb_segment_index_size 14 + +// cache of vector_index_row × vector_index_rows × vector_index_size +#define xdb_vector_index_length 524288 + +// --- buffer load util functions + +// use the following buffer struct to wrap the binary buffer data +// since the buffer data could not be operated with the string API. +struct xdb_header { + unsigned short version; + unsigned short index_policy; + unsigned int created_at; + unsigned int start_index_ptr; + unsigned int end_index_ptr; + + // the original buffer + unsigned int length; + char buffer[xdb_header_info_length]; +}; +typedef struct xdb_header xdb_header_t; + +XDB_PUBLIC(xdb_header_t *)xdb_load_header(FILE *); + +XDB_PUBLIC(xdb_header_t *)xdb_load_header_from_file(const char *); + +XDB_PUBLIC(void) xdb_close_header(void *); + +// --- vector index buffer +struct xdb_vector_index { + unsigned int length; + char buffer[xdb_vector_index_length]; +}; +typedef struct xdb_vector_index xdb_vector_index_t; + +XDB_PUBLIC(xdb_vector_index_t *)xdb_load_vector_index(FILE *); + +XDB_PUBLIC(xdb_vector_index_t *)xdb_load_vector_index_from_file(const char *); + +XDB_PUBLIC(void) xdb_close_vector_index(void *); + +// --- content buffer +struct xdb_content { + unsigned int length; + char *buffer; +}; +typedef struct xdb_content xdb_content_t; + +XDB_PUBLIC(xdb_content_t *)xdb_load_content(FILE *); + +XDB_PUBLIC(xdb_content_t *)xdb_load_content_from_file(const char *); + +XDB_PUBLIC(void) xdb_close_content(void *); + +// --- End buffer load + +// xdb searcher structure +struct xdb_searcher_entry { + FILE *handle; + + // header info + const char *header; + int io_count; + + // vector index buffer cache. + // preload the vector index will reduce the number of IO operations + // thus speedup the search process. + const xdb_vector_index_t *v_index; + + // content buffer. + // cache the whole xdb content. + const xdb_content_t *content; +}; +typedef struct xdb_searcher_entry xdb_searcher_t; + +// xdb searcher new api define +XDB_PUBLIC(int) xdb_new_with_file_only(xdb_searcher_t *, const char *); + +XDB_PUBLIC(int) xdb_new_with_vector_index(xdb_searcher_t *, const char *, const xdb_vector_index_t *); + +XDB_PUBLIC(int) xdb_new_with_buffer(xdb_searcher_t *, const xdb_content_t *); + +XDB_PUBLIC(void) xdb_close(void *); + +// xdb searcher search api define +XDB_PUBLIC(int) xdb_search_by_string(xdb_searcher_t *, const char *, char *, size_t); + +XDB_PUBLIC(int) xdb_search(xdb_searcher_t *, unsigned int, char *, size_t); + +XDB_PUBLIC(int) xdb_get_io_count(xdb_searcher_t *); + +// get unsigned long (4bytes) from a specified buffer start from the specified offset with little-endian +XDB_PUBLIC(unsigned int) xdb_get_uint(const char *, int); + +// get unsigned short (2bytes) from a specified buffer start from the specified offset with little-endian +XDB_PUBLIC(int) xdb_get_ushort(const char *, int); + +// check the specified string ip and convert it to an unsigned int +XDB_PUBLIC(int) xdb_check_ip(const char *, unsigned int *); + +// unsigned int ip to string ip +XDB_PUBLIC(void) xdb_long2ip(unsigned int, char *); + +// get the middle ip of a and b +XDB_PUBLIC(unsigned int) xdb_mip(unsigned long, unsigned long); + +// get the current time in microseconds +XDB_PUBLIC(long) xdb_now(); + +#endif //C_XDB_SEARCHER_H diff --git a/xdb_searcher.o b/xdb_searcher.o new file mode 100644 index 0000000..7caf3a6 Binary files /dev/null and b/xdb_searcher.o differ