--- /dev/null
+/* Copyright (C) 2025 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+// Author: Shivani Bhardwaj <shivani@oisf.net>
+
+//! This module exposes items from the datasets C code to Rust.
+
+use base64::{self, Engine};
+use std::ffi::{c_char, CStr};
+use std::fs::{File, OpenOptions};
+use std::io::{self, BufRead};
+use std::mem::transmute;
+use std::net::{Ipv4Addr, Ipv6Addr};
+use std::path::Path;
+use std::str::FromStr;
+
+/// Opaque Dataset type defined in C
+#[derive(Copy, Clone)]
+pub enum Dataset {}
+
+// Simple C type converted to Rust
+#[derive(Debug, PartialEq)]
+#[repr(C)]
+pub struct DataRepType {
+ pub value: u16,
+}
+
+#[derive(Debug)]
+#[repr(C)]
+pub enum DatasetType {
+ DSString = 0,
+ DSMd5,
+ DSSha256,
+ DSIpv4,
+ DSIpv6,
+}
+
+// Extern fns operating on the opaque Dataset type above
+/// cbindgen:ignore
+extern "C" {
+ pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32;
+ pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType)
+ -> i32;
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn ParseDatasets(
+ set: &Dataset, name: *const c_char, fname: *const c_char, fmode: *const c_char,
+ dstype: DatasetType,
+) -> i32 {
+ let file_string = unwrap_or_return!(CStr::from_ptr(fname).to_str(), -2);
+ let mode = unwrap_or_return!(CStr::from_ptr(fmode).to_str(), -2);
+ let set_name = unwrap_or_return!(CStr::from_ptr(name).to_str(), -2);
+ let filename = Path::new(file_string);
+ let mut no_rep = false;
+ let mut with_rep = false;
+ let lines = match read_or_create_file(filename, mode) {
+ Ok(fp) => fp,
+ Err(_) => return -1,
+ };
+ for line in lines.map_while(Result::ok) {
+ let v: Vec<&str> = line.split(',').collect();
+ // Ignore empty and invalid lines in dataset/rep file
+ if v.is_empty() || v.len() > 2 {
+ continue;
+ }
+
+ if v.len() == 1 {
+ if with_rep {
+ SCLogError!(
+ "Cannot mix dataset and datarep values for set {} in {}",
+ set_name,
+ filename.display()
+ );
+ return -2;
+ }
+ // Dataset
+ no_rep = true;
+ } else {
+ if no_rep {
+ SCLogError!(
+ "Cannot mix dataset and datarep values for set {} in {}",
+ set_name,
+ filename.display()
+ );
+ return -2;
+ }
+ // Datarep
+ with_rep = true;
+ }
+ match dstype {
+ DatasetType::DSString => {
+ if process_string_set(set, v, set_name, filename, no_rep) == -1 {
+ continue;
+ }
+ }
+ DatasetType::DSMd5 => {
+ if process_md5_set(set, v, set_name, filename, no_rep) == -1 {
+ continue;
+ }
+ }
+ DatasetType::DSSha256 => {
+ if process_sha256_set(set, v, set_name, filename, no_rep) == -1 {
+ continue;
+ }
+ }
+ DatasetType::DSIpv4 => {
+ if process_ipv4_set(set, v, set_name, filename, no_rep) == -1 {
+ continue;
+ }
+ }
+ DatasetType::DSIpv6 => {
+ if process_ipv6_set(set, v, set_name, filename, no_rep) == -1 {
+ continue;
+ }
+ }
+ }
+ }
+
+ 0
+}
+
+unsafe fn process_string_set(
+ set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool,
+) -> i32 {
+ let mut decoded: Vec<u8> = vec![];
+ if base64::engine::general_purpose::STANDARD
+ .decode_vec(v[0], &mut decoded)
+ .is_err()
+ {
+ SCFatalErrorOnInit!("bad base64 encoding {} in {}", set_name, filename.display());
+ return -1;
+ }
+ if no_rep {
+ DatasetAdd(set, decoded.as_ptr(), decoded.len() as u32);
+ } else if let Ok(val) = v[1].to_string().parse::<u16>() {
+ let rep: DataRepType = DataRepType { value: val };
+ DatasetAddwRep(set, decoded.as_ptr(), decoded.len() as u32, &rep);
+ } else {
+ SCFatalErrorOnInit!(
+ "invalid datarep value {} in {}",
+ set_name,
+ filename.display()
+ );
+ return -1;
+ }
+ 0
+}
+
+unsafe fn process_md5_set(
+ set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool,
+) -> i32 {
+ let md5_string = match hex::decode(v[0]) {
+ Ok(rs) => rs,
+ Err(_) => return -1,
+ };
+
+ if no_rep {
+ DatasetAdd(set, md5_string.as_ptr(), 16);
+ } else if let Ok(val) = v[1].to_string().parse::<u16>() {
+ let rep: DataRepType = DataRepType { value: val };
+ DatasetAddwRep(set, md5_string.as_ptr(), 16, &rep);
+ } else {
+ SCFatalErrorOnInit!(
+ "invalid datarep value {} in {}",
+ set_name,
+ filename.display()
+ );
+ return -1;
+ }
+ 0
+}
+
+unsafe fn process_sha256_set(
+ set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool,
+) -> i32 {
+ let sha256_string = match hex::decode(v[0]) {
+ Ok(rs) => rs,
+ Err(_) => return -1,
+ };
+
+ if no_rep {
+ DatasetAdd(set, sha256_string.as_ptr(), 32);
+ } else if let Ok(val) = v[1].to_string().parse::<u16>() {
+ let rep: DataRepType = DataRepType { value: val };
+ DatasetAddwRep(set, sha256_string.as_ptr(), 32, &rep);
+ } else {
+ SCFatalErrorOnInit!(
+ "invalid datarep value {} in {}",
+ set_name,
+ filename.display()
+ );
+ return -1;
+ }
+ 0
+}
+
+unsafe fn process_ipv4_set(
+ set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool,
+) -> i32 {
+ let ipv4 = match Ipv4Addr::from_str(v[0]) {
+ Ok(a) => a,
+ Err(_) => {
+ SCFatalErrorOnInit!("invalid Ipv4 value {} in {}", set_name, filename.display());
+ return -1;
+ }
+ };
+ if no_rep {
+ DatasetAdd(set, ipv4.octets().as_ptr(), 4);
+ } else if let Ok(val) = v[1].to_string().parse::<u16>() {
+ let rep: DataRepType = DataRepType { value: val };
+ DatasetAddwRep(set, ipv4.octets().as_ptr(), 4, &rep);
+ } else {
+ SCFatalErrorOnInit!(
+ "invalid datarep value {} in {}",
+ set_name,
+ filename.display()
+ );
+ return -1;
+ }
+ 0
+}
+
+unsafe fn process_ipv6_set(
+ set: &Dataset, v: Vec<&str>, set_name: &str, filename: &Path, no_rep: bool,
+) -> i32 {
+ let ipv6 = match Ipv6Addr::from_str(v[0]) {
+ Ok(a) => a,
+ Err(_) => {
+ SCFatalErrorOnInit!("invalid Ipv6 value {} in {}", set_name, filename.display());
+ return -1;
+ }
+ };
+ let mut fin_ipv6 = ipv6;
+
+ if ipv6.to_ipv4_mapped().is_some() {
+ let ipv6_octets = ipv6.octets();
+ let mut internal_ipv6: [u8; 16] = [0; 16];
+ internal_ipv6[0] = ipv6_octets[12];
+ internal_ipv6[1] = ipv6_octets[13];
+ internal_ipv6[2] = ipv6_octets[14];
+ internal_ipv6[3] = ipv6_octets[15];
+
+ // [u8; 16] is always safe to transmute to [u16; 8]
+ let [s0, s1, s2, s3, s4, s5, s6, s7] =
+ unsafe { transmute::<[u8; 16], [u16; 8]>(internal_ipv6) };
+ fin_ipv6 = [
+ u16::from_be(s0),
+ u16::from_be(s1),
+ u16::from_be(s2),
+ u16::from_be(s3),
+ u16::from_be(s4),
+ u16::from_be(s5),
+ u16::from_be(s6),
+ u16::from_be(s7),
+ ]
+ .into();
+ }
+ if no_rep {
+ DatasetAdd(set, fin_ipv6.octets().as_ptr(), 16);
+ } else if let Ok(val) = v[1].to_string().parse::<u16>() {
+ let rep: DataRepType = DataRepType { value: val };
+ DatasetAddwRep(set, fin_ipv6.octets().as_ptr(), 16, &rep);
+ } else {
+ SCFatalErrorOnInit!(
+ "invalid datarep value {} in {}",
+ set_name,
+ filename.display()
+ );
+ return -1;
+ }
+ 0
+}
+
+fn read_or_create_file<P>(filename: P, fmode: &str) -> io::Result<io::Lines<io::BufReader<File>>>
+where
+ P: AsRef<Path>,
+{
+ let file: File = if fmode == "r" {
+ File::open(filename)?
+ } else {
+ OpenOptions::new()
+ .append(true)
+ .create(true)
+ .read(true)
+ .open(filename)?
+ };
+ Ok(io::BufReader::new(file).lines())
+}
static Dataset *sets = NULL;
static uint32_t set_ids = 0;
-static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
- DataRepType *rep);
+int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);
static inline void DatasetUnlockData(THashData *d)
{
return 0;
}
-static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out)
-{
- SCLogDebug("in '%s'", in);
- char raw[ins + 1];
- memcpy(raw, in, ins);
- raw[ins] = '\0';
- char *line = raw;
-
- char *ptrs[1] = {NULL};
- int idx = 0;
-
- size_t i = 0;
- while (i < ins + 1) {
- if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
- line[i] = '\0';
- SCLogDebug("line '%s'", line);
-
- ptrs[idx] = line;
- idx++;
-
- if (idx == 1)
- break;
- } else {
- i++;
- }
- }
-
- if (idx != 1) {
- SCLogDebug("idx %d", idx);
- return -1;
- }
-
- uint16_t v = 0;
- int r = StringParseU16RangeCheck(&v, 10, strlen(ptrs[0]), ptrs[0], 0, USHRT_MAX);
- if (r != (int)strlen(ptrs[0])) {
- SCLogError("'%s' is not a valid reputation value (0-65535)", ptrs[0]);
- return -1;
- }
- SCLogDebug("v %"PRIu16" raw %s", v, ptrs[0]);
-
- rep_out->value = v;
- return 0;
-}
-
static int DatasetLoadIPv4(Dataset *set)
{
if (strlen(set->load) == 0)
fopen_mode = "a+";
}
- FILE *fp = fopen(set->load, fopen_mode);
- if (fp == NULL) {
- SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv4);
+ if (retval == -2) {
+ FatalErrorOnInit("dataset %s could not be processed", set->name);
+ } else if (retval == -1) {
return -1;
}
- uint32_t cnt = 0;
- char line[1024];
- while (fgets(line, (int)sizeof(line), fp) != NULL) {
- char *r = strchr(line, ',');
- if (r == NULL) {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("line: '%s'", line);
-
- struct in_addr in;
- if (inet_pton(AF_INET, line, &in) != 1) {
- FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
- continue;
- }
-
- if (DatasetAdd(set, (const uint8_t *)&in.s_addr, 4) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
- cnt++;
-
- /* list with rep data */
- } else {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("IPv4 with REP line: '%s'", line);
-
- *r = '\0';
-
- struct in_addr in;
- if (inet_pton(AF_INET, line, &in) != 1) {
- FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
- continue;
- }
-
- r++;
-
- DataRepType rep = { .value = 0 };
- if (ParseRepLine(r, strlen(r), &rep) < 0) {
- FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
- continue;
- }
-
- SCLogDebug("rep v:%u", rep.value);
- if (DatasetAddwRep(set, (const uint8_t *)&in.s_addr, 4, &rep) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
-
- cnt++;
- }
- }
THashConsolidateMemcap(set->hash);
- fclose(fp);
- SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
fopen_mode = "a+";
}
- FILE *fp = fopen(set->load, fopen_mode);
- if (fp == NULL) {
- SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSIpv6);
+ if (retval == -2) {
+ FatalErrorOnInit("dataset %s could not be processed", set->name);
+ } else if (retval == -1) {
return -1;
}
- uint32_t cnt = 0;
- char line[1024];
- while (fgets(line, (int)sizeof(line), fp) != NULL) {
- char *r = strchr(line, ',');
- if (r == NULL) {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("line: '%s'", line);
-
- struct in6_addr in6;
- int ret = ParseIpv6String(set, line, &in6);
- if (ret < 0) {
- FatalErrorOnInit("unable to parse IP address");
- continue;
- }
-
- if (DatasetAdd(set, (const uint8_t *)&in6.s6_addr, 16) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
- cnt++;
-
- /* list with rep data */
- } else {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("IPv6 with REP line: '%s'", line);
-
- *r = '\0';
-
- struct in6_addr in6;
- int ret = ParseIpv6String(set, line, &in6);
- if (ret < 0) {
- FatalErrorOnInit("unable to parse IP address");
- continue;
- }
-
- r++;
-
- DataRepType rep = { .value = 0 };
- if (ParseRepLine(r, strlen(r), &rep) < 0) {
- FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
- continue;
- }
-
- SCLogDebug("rep v:%u", rep.value);
- if (DatasetAddwRep(set, (const uint8_t *)&in6.s6_addr, 16, &rep) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
-
- cnt++;
- }
- }
THashConsolidateMemcap(set->hash);
- fclose(fp);
- SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
fopen_mode = "a+";
}
- FILE *fp = fopen(set->load, fopen_mode);
- if (fp == NULL) {
- SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSMd5);
+ if (retval == -2) {
+ FatalErrorOnInit("dataset %s could not be processed", set->name);
+ } else if (retval == -1) {
return -1;
}
- uint32_t cnt = 0;
- char line[1024];
- while (fgets(line, (int)sizeof(line), fp) != NULL) {
- /* straight black/white list */
- if (strlen(line) == 33) {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("line: '%s'", line);
-
- uint8_t hash[16];
- if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) {
- FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
- continue;
- }
-
- if (DatasetAdd(set, (const uint8_t *)hash, 16) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
- cnt++;
-
- /* list with rep data */
- } else if (strlen(line) > 33 && line[32] == ',') {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("MD5 with REP line: '%s'", line);
-
- uint8_t hash[16];
- if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) {
- FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
- continue;
- }
-
- DataRepType rep = { .value = 0};
- if (ParseRepLine(line + 33, strlen(line) - 33, &rep) < 0) {
- FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
- continue;
- }
-
- SCLogDebug("rep v:%u", rep.value);
- if (DatasetAddwRep(set, hash, 16, &rep) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
-
- cnt++;
- }
- else {
- FatalErrorOnInit("MD5 bad line len %u: '%s'", (uint32_t)strlen(line), line);
- continue;
- }
- }
THashConsolidateMemcap(set->hash);
- fclose(fp);
- SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
fopen_mode = "a+";
}
- FILE *fp = fopen(set->load, fopen_mode);
- if (fp == NULL) {
- SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSSha256);
+ if (retval == -2) {
+ FatalErrorOnInit("dataset %s could not be processed", set->name);
+ } else if (retval == -1) {
return -1;
}
- uint32_t cnt = 0;
- char line[1024];
- while (fgets(line, (int)sizeof(line), fp) != NULL) {
- /* straight black/white list */
- if (strlen(line) == 65) {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("line: '%s'", line);
-
- uint8_t hash[32];
- if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0) {
- FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
- continue;
- }
-
- if (DatasetAdd(set, (const uint8_t *)hash, (uint32_t)32) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
- cnt++;
-
- /* list with rep data */
- } else if (strlen(line) > 65 && line[64] == ',') {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("SHA-256 with REP line: '%s'", line);
-
- uint8_t hash[32];
- if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0) {
- FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
- continue;
- }
-
- DataRepType rep = { .value = 0 };
- if (ParseRepLine(line + 65, strlen(line) - 65, &rep) < 0) {
- FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
- continue;
- }
-
- SCLogDebug("rep %u", rep.value);
-
- if (DatasetAddwRep(set, hash, 32, &rep) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
- cnt++;
- }
- }
THashConsolidateMemcap(set->hash);
- fclose(fp);
- SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
return 0;
SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
+
const char *fopen_mode = "r";
if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
fopen_mode = "a+";
}
- FILE *fp = fopen(set->load, fopen_mode);
- if (fp == NULL) {
- SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ int retval = ParseDatasets(set, set->name, set->load, fopen_mode, DSString);
+ if (retval == -2) {
+ FatalErrorOnInit("dataset %s could not be processed", set->name);
+ } else if (retval == -1) {
return -1;
}
- uint32_t cnt = 0;
- char line[1024];
- while (fgets(line, (int)sizeof(line), fp) != NULL) {
- if (strlen(line) <= 1)
- continue;
-
- char *r = strchr(line, ',');
- if (r == NULL) {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("line: '%s'", line);
- uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(line));
- // coverity[alloc_strlen : FALSE]
- uint8_t decoded[decoded_size];
- uint32_t num_decoded = SCBase64Decode(
- (const uint8_t *)line, strlen(line), SCBase64ModeStrict, decoded);
- if (num_decoded == 0 && strlen(line) > 0) {
- FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
- continue;
- }
-
- if (DatasetAdd(set, (const uint8_t *)decoded, num_decoded) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
- cnt++;
- } else {
- line[strlen(line) - 1] = '\0';
- SCLogDebug("line: '%s'", line);
-
- *r = '\0';
-
- uint32_t decoded_size = SCBase64DecodeBufferSize(strlen(line));
- uint8_t decoded[decoded_size];
- uint32_t num_decoded = SCBase64Decode(
- (const uint8_t *)line, strlen(line), SCBase64ModeStrict, decoded);
- if (num_decoded == 0) {
- FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
- continue;
- }
-
- r++;
- SCLogDebug("r '%s'", r);
-
- DataRepType rep = { .value = 0 };
- if (ParseRepLine(r, strlen(r), &rep) < 0) {
- FatalErrorOnInit("die: bad rep");
- continue;
- }
- SCLogDebug("rep %u", rep.value);
-
- if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) {
- FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
- continue;
- }
- cnt++;
-
- SCLogDebug("line with rep %s, %s", line, r);
- }
- }
THashConsolidateMemcap(set->hash);
- fclose(fp);
- SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}
return -1;
}
-static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
- DataRepType *rep)
+int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
{
if (set == NULL)
return -1;