From: Stella Lau Date: Wed, 5 Jul 2017 20:57:07 +0000 (-0700) Subject: Initial long distance matcher commit X-Git-Tag: v1.3.1^2~12^2~7^2~61 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=88f3d8641e55544fbf22f7226ee1793828c65983;p=thirdparty%2Fzstd.git Initial long distance matcher commit --- diff --git a/contrib/long_distance_matching/Makefile b/contrib/long_distance_matching/Makefile new file mode 100644 index 000000000..bfe02ea2a --- /dev/null +++ b/contrib/long_distance_matching/Makefile @@ -0,0 +1,27 @@ +# ################################################################ +# Copyright (c) 2016-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. +# ################################################################ + +# This Makefile presumes libzstd is installed, using `sudo make install` + + +.PHONY: default all clean + +default: all + +all: main + + +main : ldm.c main.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ + +clean: + @rm -f core *.o tmp* result* *.ldm *.ldm.dec \ + main + @echo Cleaning completed + diff --git a/contrib/long_distance_matching/ldm.c b/contrib/long_distance_matching/ldm.c new file mode 100644 index 000000000..34118c81f --- /dev/null +++ b/contrib/long_distance_matching/ldm.c @@ -0,0 +1,43 @@ +#include +#include +#include + +#include "ldm.h" + +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; + +typedef uint64_t tag; + +struct hash_entry { + U64 offset; + tag t; +}; + +size_t LDM_compress(const char *source, char *dest, size_t source_size, size_t max_dest_size) { + // max_dest_size >= source_size + + + /** + * Loop: + * Find match at position k (hash next n bytes, rolling hash) + * Compute match length + * Output literal length: k (sequences of 4 + (k-4) bytes) + * Output match length + * Output literals + * Output offset + */ + + memcpy(dest, source, source_size); + return source_size; +} + +size_t LDM_decompress(const char *source, char *dest, size_t compressed_size, size_t max_decompressed_size) { + memcpy(dest, source, compressed_size); + return compressed_size; +} + + diff --git a/contrib/long_distance_matching/ldm.h b/contrib/long_distance_matching/ldm.h new file mode 100644 index 000000000..d0151373c --- /dev/null +++ b/contrib/long_distance_matching/ldm.h @@ -0,0 +1,10 @@ +#ifndef LDM_H +#define LDM_H + +#include /* size_t */ + +size_t LDM_compress(const char *source, char *dest, size_t source_size, size_t max_dest_size); + +size_t LDM_decompress(const char *source, char *dest, size_t compressed_size, size_t max_decompressed_size); + +#endif /* LDM_H */ diff --git a/contrib/long_distance_matching/main.c b/contrib/long_distance_matching/main.c new file mode 100644 index 000000000..ddf5145f7 --- /dev/null +++ b/contrib/long_distance_matching/main.c @@ -0,0 +1,227 @@ +#include +#include +#include + +#include "ldm.h" + +#define BUF_SIZE 16*1024 // Block size +#define LDM_HEADER_SIZE 8 + +static size_t compress_file(FILE *in, FILE *out, size_t *size_in, + size_t *size_out) { + char *src, *buf = NULL; + size_t r = 1; + size_t size, n, k, count_in = 0, count_out = 0, offset, frame_size = 0; + + src = malloc(BUF_SIZE); + if (!src) { + printf("Not enough memory\n"); + goto cleanup; + } + + size = BUF_SIZE + LDM_HEADER_SIZE; + buf = malloc(size); + if (!buf) { + printf("Not enough memory\n"); + goto cleanup; + } + + + for (;;) { + k = fread(src, 1, BUF_SIZE, in); + if (k == 0) + break; + count_in += k; + + n = LDM_compress(src, buf, k, BUF_SIZE); + + // n = k; + // offset += n; + offset = k; + count_out += k; + +// k = fwrite(src, 1, offset, out); + + k = fwrite(buf, 1, offset, out); + if (k < offset) { + if (ferror(out)) + printf("Write failed\n"); + else + printf("Short write\n"); + goto cleanup; + } + + } + *size_in = count_in; + *size_out = count_out; + r = 0; + cleanup: + free(src); + free(buf); + return r; +} + +static size_t decompress_file(FILE *in, FILE *out) { + void *src = malloc(BUF_SIZE); + void *dst = NULL; + size_t dst_capacity = BUF_SIZE; + size_t ret = 1; + size_t bytes_written = 0; + + if (!src) { + perror("decompress_file(src)"); + goto cleanup; + } + + while (ret != 0) { + /* Load more input */ + size_t src_size = fread(src, 1, BUF_SIZE, in); + void *src_ptr = src; + void *src_end = src_ptr + src_size; + if (src_size == 0 || ferror(in)) { + printf("(TODO): Decompress: not enough input or error reading file\n"); + //TODO + ret = 0; + goto cleanup; + } + + /* Allocate destination buffer if it hasn't been allocated already */ + if (!dst) { + dst = malloc(dst_capacity); + if (!dst) { + perror("decompress_file(dst)"); + goto cleanup; + } + } + + // TODO + + /* Decompress: + * Continue while there is more input to read. + */ + while (src_ptr != src_end && ret != 0) { + // size_t dst_size = src_size; + size_t dst_size = LDM_decompress(src, dst, src_size, dst_capacity); + size_t written = fwrite(dst, 1, dst_size, out); +// printf("Writing %zu bytes\n", dst_size); + bytes_written += dst_size; + if (written != dst_size) { + printf("Decompress: Failed to write to file\n"); + goto cleanup; + } + src_ptr += src_size; + src_size = src_end - src_ptr; + } + + /* Update input */ + + } + + printf("Wrote %zu bytes\n", bytes_written); + + cleanup: + free(src); + free(dst); + + return ret; +} + +static int compare(FILE *fp0, FILE *fp1) { + int result = 0; + while (result == 0) { + char b0[1024]; + char b1[1024]; + const size_t r0 = fread(b0, 1, sizeof(b0), fp0); + const size_t r1 = fread(b1, 1, sizeof(b1), fp1); + + result = (int)r0 - (int)r1; + + if (0 == r0 || 0 == r1) { + break; + } + if (0 == result) { + result = memcmp(b0, b1, r0); + } + } + return result; +} + +int main(int argc, char *argv[]) { + char inpFilename[256] = { 0 }; + char ldmFilename[256] = { 0 }; + char decFilename[256] = { 0 }; + + if (argc < 2) { + printf("Please specify input filename\n"); + return 0; + } + snprintf(inpFilename, 256, "%s", argv[1]); + snprintf(ldmFilename, 256, "%s.ldm", argv[1]); + snprintf(decFilename, 256, "%s.ldm.dec", argv[1]); + + printf("inp = [%s]\n", inpFilename); + printf("ldm = [%s]\n", ldmFilename); + printf("dec = [%s]\n", decFilename); + + /* compress */ + { + FILE *inpFp = fopen(inpFilename, "rb"); + FILE *outFp = fopen(ldmFilename, "wb"); + size_t sizeIn = 0; + size_t sizeOut = 0; + size_t ret; + printf("compress : %s -> %s\n", inpFilename, ldmFilename); + ret = compress_file(inpFp, outFp, &sizeIn, &sizeOut); + if (ret) { + printf("compress : failed with code %zu\n", ret); + return ret; + } + printf("%s: %zu → %zu bytes, %.1f%%\n", + inpFilename, sizeIn, sizeOut, + (double)sizeOut / sizeIn * 100); + printf("compress : done\n"); + + fclose(outFp); + fclose(inpFp); + } + + /* decompress */ + { + FILE *inpFp = fopen(ldmFilename, "rb"); + FILE *outFp = fopen(decFilename, "wb"); + size_t ret; + + printf("decompress : %s -> %s\n", ldmFilename, decFilename); + ret = decompress_file(inpFp, outFp); + if (ret) { + printf("decompress : failed with code %zu\n", ret); + return ret; + } + printf("decompress : done\n"); + + fclose(outFp); + fclose(inpFp); + } + + /* verify */ + { + FILE *inpFp = fopen(inpFilename, "rb"); + FILE *decFp = fopen(decFilename, "rb"); + + printf("verify : %s <-> %s\n", inpFilename, decFilename); + const int cmp = compare(inpFp, decFp); + if(0 == cmp) { + printf("verify : OK\n"); + } else { + printf("verify : NG\n"); + } + + fclose(decFp); + fclose(inpFp); + } + + + return 0; +} + + diff --git a/contrib/long_distance_matching/main.h b/contrib/long_distance_matching/main.h new file mode 100644 index 000000000..a0b030121 --- /dev/null +++ b/contrib/long_distance_matching/main.h @@ -0,0 +1,7 @@ +#ifndef _MAIN_H +#define _MAIN_H + +void compress_file(FILE *in, FILE *out, int argc, char *argv[]); +void decompress_file(FILE *in, FILE *out, int argc, char *argv[]); + +#endif /* _MAIN_H */