From 61262f6c0dc137e078bbc4cd1131fc3b88657414 Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Fri, 27 Jul 2018 16:51:38 -0700 Subject: [PATCH] Save segmentFreqs in ctx instead of malloc and memset in SelectSegment --- .../benchmarkDictBuilder/README.md | 113 ++++++++++++++++++ .../benchmarkDictBuilder/test.sh | 10 +- .../fastCover/Makefile | 6 +- .../fastCover/fastCover.c | 28 +++-- 4 files changed, 141 insertions(+), 16 deletions(-) diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md index 654ca4095..a818e6eb4 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md @@ -17,6 +17,8 @@ make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" - For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. - Fourth column is chosen d and fifth column is chosen k +Version 1: + github: NODICT 0.000004 2.999642 RANDOM 0.146096 8.786957 @@ -124,3 +126,114 @@ FAST23 377.196211 2.601416 8 194 FAST23 13.497286 2.601416 8 194 FAST24 503.208577 2.602830 6 194 FAST24 29.538585 2.602830 6 194 + +--------------------------------------------------------------- +Version 2 (save segmentFreqs in ctx instead of malloc and memset in every call to SelectSegment): + +github: +NODICT 0.000005 2.999642 +RANDOM 0.141553 8.786957 +LEGACY 0.904340 8.989482 +COVER 53.621302 10.641263 8 1298 +COVER 4.085037 10.641263 8 1298 +FAST15 17.636211 10.586461 8 1778 +FAST15 0.221236 10.586461 8 1778 +FAST16 18.716259 10.492503 6 1778 +FAST16 0.251522 10.492503 6 1778 +FAST17 17.614391 10.611737 8 1778 +FAST17 0.241011 10.611737 8 1778 +FAST18 19.926270 10.621586 8 1778 +FAST18 0.287195 10.621586 8 1778 +FAST19 19.626808 10.629626 8 1778 +FAST19 0.340191 10.629626 8 1778 +FAST20 18.918657 10.610308 8 1778 +FAST20 0.463307 10.610308 8 1778 +FAST21 20.502362 10.625733 8 1778 +FAST21 0.638202 10.625733 8 1778 +FAST22 22.702695 10.625281 8 1778 +FAST22 1.353399 10.625281 8 1778 +FAST23 28.041990 10.602342 8 1778 +FAST23 3.029502 10.602342 8 1778 +FAST24 35.662961 10.603379 8 1778 +FAST24 6.524258 10.603379 8 1778 + +hg-commands: +NODICT 0.000005 2.425291 +RANDOM 0.080469 3.489515 +LEGACY 0.794417 3.911896 +COVER 54.198788 4.131136 8 386 +COVER 2.191729 4.131136 8 386 +FAST15 11.852793 3.903719 6 1106 +FAST15 0.175406 3.903719 6 1106 +FAST16 12.863315 4.005077 8 530 +FAST16 0.158410 4.005077 8 530 +FAST17 11.977917 4.097811 8 818 +FAST17 0.162381 4.097811 8 818 +FAST18 11.749304 4.136081 8 770 +FAST18 0.173242 4.136081 8 770 +FAST19 11.905785 4.166021 8 530 +FAST19 0.186403 4.166021 8 530 +FAST20 13.293999 4.163740 8 482 +FAST20 0.241508 4.163740 8 482 +FAST21 16.623177 4.157057 8 434 +FAST21 0.372647 4.157057 8 434 +FAST22 20.918409 4.158195 8 290 +FAST22 0.570431 4.158195 8 290 +FAST23 21.762805 4.161450 8 434 +FAST23 1.162206 4.161450 8 434 +FAST24 29.133745 4.159658 8 338 +FAST24 3.054376 4.159658 8 338 + +hg-changelog: +NODICT 0.000006 1.377613 +RANDOM 0.601346 2.096785 +LEGACY 2.544973 2.058273 +COVER 222.639708 2.188654 8 98 +COVER 6.072892 2.188654 8 98 +FAST15 70.394523 2.127194 8 866 +FAST15 0.899766 2.127194 8 866 +FAST16 69.845529 2.145401 8 338 +FAST16 0.881569 2.145401 8 338 +FAST17 69.382431 2.157544 8 194 +FAST17 0.943291 2.157544 8 194 +FAST18 71.348283 2.173127 8 98 +FAST18 1.034765 2.173127 8 98 +FAST19 71.380923 2.179527 8 98 +FAST19 1.254700 2.179527 8 98 +FAST20 72.802714 2.183233 6 98 +FAST20 1.368704 2.183233 6 98 +FAST21 82.042339 2.180920 8 98 +FAST21 2.213864 2.180920 8 98 +FAST22 90.666200 2.184297 8 98 +FAST22 3.590399 2.184297 8 98 +FAST23 108.926377 2.187666 6 98 +FAST23 8.723759 2.187666 6 98 +FAST24 134.296232 2.189889 6 98 +FAST24 19.396532 2.189889 6 98 + +hg-manifest: +NODICT 0.000005 1.866385 +RANDOM 0.982192 2.309485 +LEGACY 9.507729 2.506775 +COVER 922.742066 2.582597 8 434 +COVER 36.500276 2.582597 8 434 +FAST15 163.886717 2.377689 8 1682 +FAST15 2.107328 2.377689 8 1682 +FAST16 152.684592 2.464814 8 1538 +FAST16 2.157789 2.464814 8 1538 +FAST17 154.463459 2.539834 6 1826 +FAST17 2.282455 2.539834 6 1826 +FAST18 155.540044 2.576924 8 1922 +FAST18 2.101807 2.576924 8 1922 +FAST19 152.650343 2.592479 6 290 +FAST19 2.359461 2.592479 6 290 +FAST20 174.623634 2.594551 8 194 +FAST20 2.870022 2.594551 8 194 +FAST21 219.876653 2.597128 6 194 +FAST21 4.386269 2.597128 6 194 +FAST22 247.986803 2.596971 6 386 +FAST22 6.201144 2.596971 6 386 +FAST23 276.051806 2.601416 8 194 +FAST23 11.613477 2.601416 8 194 +FAST24 328.234024 2.602830 6 194 +FAST24 26.710364 2.602830 6 194 diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh b/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh index 5eaf5930a..e5508ded3 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh @@ -1,2 +1,8 @@ -echo "Benchmark with in=../../lib/common" -./benchmark in=../../../lib/common +echo "-----------------github--------------------" +./benchmark in=github +echo "-----------------hg-commands--------------------" +./benchmark in=hg-commands +echo "-----------------hg-changelog--------------------" +./benchmark in=hg-changelog +echo "------------------hg-manifest-------------------" +./benchmark in=hg-manifest diff --git a/contrib/experimental_dict_builders/fastCover/Makefile b/contrib/experimental_dict_builders/fastCover/Makefile index 9c56013d8..4a7cc17d0 100644 --- a/contrib/experimental_dict_builders/fastCover/Makefile +++ b/contrib/experimental_dict_builders/fastCover/Makefile @@ -1,7 +1,7 @@ ARG := CC ?= gcc -CFLAGS ?= -O3 +CFLAGS ?= -O3 -g INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder IO_FILE := ../randomDictBuilder/io.c @@ -9,7 +9,7 @@ IO_FILE := ../randomDictBuilder/io.c TEST_INPUT := ../../../lib TEST_OUTPUT := fastCoverDict -all: main run clean +all: main run .PHONY: test test: main testrun testshell clean @@ -32,7 +32,7 @@ io.o: $(IO_FILE) $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE) libzstd.a: - $(MAKE) -C ../../../lib libzstd.a + $(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a mv ../../../lib/libzstd.a . .PHONY: testrun diff --git a/contrib/experimental_dict_builders/fastCover/fastCover.c b/contrib/experimental_dict_builders/fastCover/fastCover.c index d6b3254ec..3c1aa951c 100644 --- a/contrib/experimental_dict_builders/fastCover/fastCover.c +++ b/contrib/experimental_dict_builders/fastCover/fastCover.c @@ -82,6 +82,7 @@ typedef struct { size_t nbTestSamples; size_t nbDmers; U32 *freqs; + U16 *segmentFreqs; unsigned d; } FASTCOVER_ctx_t; @@ -142,9 +143,6 @@ static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, activeSegment.end = begin; activeSegment.score = 0; { - /* Keep track of number of times an index has been seen in current segment */ - U16* currfreqs =(U16 *)malloc((1 << parameters.f) * sizeof(U16)); - memset(currfreqs, 0, (1 << parameters.f) * sizeof(*currfreqs)); /* Slide the activeSegment through the whole epoch. * Save the best segment in bestSegment. */ @@ -152,19 +150,19 @@ static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, /* Get hash value of current dmer */ const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d); /* Add frequency of this index to score if this is the first occurence of index in active segment */ - if (currfreqs[index] == 0) { + if (ctx->segmentFreqs[index] == 0) { activeSegment.score += freqs[index]; } - currfreqs[index] += 1; + ctx->segmentFreqs[index] += 1; /* Increment end of segment */ activeSegment.end += 1; /* If the window is now too large, drop the first position */ if (activeSegment.end - activeSegment.begin == dmersInK + 1) { /* Get hash value of the dmer to be eliminated from active segment */ const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d); - currfreqs[delIndex] -= 1; + ctx->segmentFreqs[delIndex] -= 1; /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */ - if (currfreqs[delIndex] == 0) { + if (ctx->segmentFreqs[delIndex] == 0) { activeSegment.score -= freqs[delIndex]; } /* Increment start of segment */ @@ -175,7 +173,12 @@ static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, bestSegment = activeSegment; } } - free(currfreqs); + /* Zero out rest of segmentFreqs array */ + while (activeSegment.begin < end) { + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d); + ctx->segmentFreqs[delIndex] -= 1; + activeSegment.begin += 1; + } } { /* Trim off the zero frequency head and tail from the segment. */ @@ -245,6 +248,10 @@ static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) { if (!ctx) { return; } + if (ctx->segmentFreqs) { + free(ctx->segmentFreqs); + ctx->segmentFreqs = NULL; + } if (ctx->freqs) { free(ctx->freqs); ctx->freqs = NULL; @@ -347,9 +354,8 @@ static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer, } /* Initialize frequency array of size 2^f */ - ctx->freqs =(U32 *)malloc((1 << f) * sizeof(U32)); - memset(ctx->freqs, 0, (1 << f) * sizeof(U32)); - + ctx->freqs = (U32 *)calloc((1 << f), sizeof(U32)); + ctx->segmentFreqs = (U16 *)calloc((1 << f), sizeof(U16)); DISPLAYLEVEL(2, "Computing frequencies\n"); FASTCOVER_computeFrequency(ctx->freqs, f, ctx); -- 2.47.2