for (n=0; n<nbFiles; n++) {
S64 const fileSize = DiB_getFileSize(fileNamesTable[n]);
- DISPLAYLEVEL(1, "[DEBUG] File '%s': size=%lld\n", fileNamesTable[n], (long long)fileSize);
-
/* TODO: is there a minimum sample size? What if the file is 1-byte? */
/* Skip empty or invalid files */
if (fileSize <= 0) {
/* TODO: is there a minimum sample size? Can we have a 1-byte sample? */
fs.nbSamples += (int)((fileSize + chunkSize-1) / chunkSize);
fs.totalSizeToLoad += fileSize;
- DISPLAYLEVEL(1, "[DEBUG] After chunked file: nbSamples=%d, totalSizeToLoad=%lld\n",
- fs.nbSamples, (long long)fs.totalSizeToLoad);
}
else {
/* the case where one file is one sample */
}
fs.nbSamples += 1;
fs.totalSizeToLoad += MIN(fileSize, SAMPLESIZE_MAX);
- DISPLAYLEVEL(1, "[DEBUG] After single file: nbSamples=%d, totalSizeToLoad=%lld\n",
- fs.nbSamples, (long long)fs.totalSizeToLoad);
}
}
DISPLAYLEVEL(4, "Found training data %d files, %d KB, %d samples\n", nbFiles, (int)(fs.totalSizeToLoad / (1 KB)), fs.nbSamples);
- DISPLAYLEVEL(1, "[DEBUG FINAL] fileStats: nbSamples=%d, totalSizeToLoad=%lld (%s)\n",
- fs.nbSamples, (long long)fs.totalSizeToLoad,
- fs.totalSizeToLoad < 0 ? "NEGATIVE!" : "ok");
return fs;
}
/* Limit the size of the training data to 2GB */
/* TODO: there is opportunity to stop DiB_fileStats() early when the data limit is reached */
loadedSize = (size_t)MIN( MIN((S64)maxMem, fs.totalSizeToLoad), MAX_SAMPLES_SIZE );
- DISPLAYLEVEL(1, "[DEBUG] Memory calc: totalSizeToLoad=%lld, maxMem=%zu, loadedSize=%zu (0x%zx)\n",
- (long long)fs.totalSizeToLoad, maxMem, loadedSize, loadedSize);
- if (fs.totalSizeToLoad < 0) {
- DISPLAYLEVEL(1, "[BUG] totalSizeToLoad is NEGATIVE! This will cause allocation issues!\n");
- }
if (memLimit != 0) {
DISPLAYLEVEL(2, "! Warning : setting manual memory limit for dictionary training data at %u MB \n",
(unsigned)(memLimit / (1 MB)));
loadedSize = (size_t)MIN(loadedSize, memLimit);
}
- DISPLAYLEVEL(1, "[DEBUG] About to malloc: srcBuffer size=%zu, sampleSizes array size=%zu\n",
- loadedSize+NOISELENGTH, (size_t)(fs.nbSamples * sizeof(size_t)));
srcBuffer = malloc(loadedSize+NOISELENGTH);
sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
}
+++ /dev/null
-#!/bin/bash
-echo "=== Test to show allocation bug with negative totalSizeToLoad ==="
-echo ""
-echo "We need at least 5 samples to pass the minimum check"
-echo ""
-
-# Clean up
-rm -rf alloc_test
-mkdir alloc_test
-
-# Create exactly 5 valid files (minimum to not exit early)
-echo "Creating 5 valid files (minimum required)..."
-for i in {1..5}; do
- echo "data$i" > alloc_test/good_$i.txt
-done
-
-echo "Valid files created (about 6 bytes each = 30 bytes total)"
-echo ""
-
-# We need enough bad files to make totalSizeToLoad negative
-# 30 bytes positive, so we need at least 31 bad files
-echo "Adding 1000 non-existent files to make totalSizeToLoad very negative..."
-echo "Expected: totalSizeToLoad = 30 + (1000 * -1) = -970 bytes"
-echo ""
-
-# Build command
-CMD="./zstd --train alloc_test/good_*.txt"
-for i in {1..1000}; do
- CMD="$CMD alloc_test/BAD_$i"
-done
-CMD="$CMD -o alloc_test/dict.zst --maxdict=65536 2>&1"
-
-echo "Running command..."
-echo "================="
-
-# Run and capture ALL debug output related to our issue
-eval $CMD | grep -E "\[DEBUG FINAL\]|\[DEBUG\] Memory calc|\[BUG\]|About to malloc|Error|not enough memory"
-
-echo ""
-echo "Output should show something like the following:"
-echo "1. [DEBUG FINAL] fileStats: totalSizeToLoad=-970 (NEGATIVE!)"
-echo "2. [BUG] totalSizeToLoad is NEGATIVE!"
-echo "3. [DEBUG] Memory calc: showing huge loadedSize value"
-echo "4. Error about memory allocation"