From b6691092d707860019bbab80eaaf9173ada10586 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 20 May 2011 14:33:31 -0700 Subject: [PATCH] Add streaming filter API This introduces an API to plug custom filters to an input stream. The caller gets get_stream_filter("path") to obtain an appropriate filter for the path, and then uses it when opening an input stream via open_istream(). After that, the caller can read from the stream with read_istream(), and close it with close_istream(), just like an unfiltered stream. This only adds a "null" filter that is a pass-thru filter, but later changes can add LF-to-CRLF and other filters, and the callers of the streaming API do not have to change. Signed-off-by: Junio C Hamano --- convert.c | 84 +++++++++++++++++++++++++++++++++++++++---- convert.h | 23 +++++++++++- entry.c | 16 +++++---- streaming.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++- streaming.h | 2 +- 5 files changed, 209 insertions(+), 16 deletions(-) diff --git a/convert.c b/convert.c index 264af1d5ba..1ec91a370e 100644 --- a/convert.c +++ b/convert.c @@ -814,12 +814,69 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str return ret | convert_to_git(path, src, len, dst, 0); } +/***************************************************************** + * + * Streaming converison support + * + *****************************************************************/ + +typedef int (*filter_fn)(struct stream_filter *, + const char *input, size_t *isize_p, + char *output, size_t *osize_p); +typedef void (*free_fn)(struct stream_filter *); + +struct stream_filter_vtbl { + filter_fn filter; + free_fn free; +}; + +struct stream_filter { + struct stream_filter_vtbl *vtbl; +}; + +static int null_filter_fn(struct stream_filter *filter, + const char *input, size_t *isize_p, + char *output, size_t *osize_p) +{ + size_t count = *isize_p; + if (*osize_p < count) + count = *osize_p; + if (count) { + memmove(output, input, count); + *isize_p -= count; + *osize_p -= count; + } + return 0; +} + +static void null_free_fn(struct stream_filter *filter) +{ + ; /* nothing -- null instances are shared */ +} + +static struct stream_filter_vtbl null_vtbl = { + null_filter_fn, + null_free_fn, +}; + +static struct stream_filter null_filter_singleton = { + &null_vtbl, +}; + +int is_null_stream_filter(struct stream_filter *filter) +{ + return filter == &null_filter_singleton; +} + /* - * You would be crazy to set CRLF, smuge/clean or ident to - * a large binary blob you would want us not to slurp into - * the memory! + * Return an appropriately constructed filter for the path, or NULL if + * the contents cannot be filtered without reading the whole thing + * in-core. + * + * Note that you would be crazy to set CRLF, smuge/clean or ident to a + * large binary blob you would want us not to slurp into the memory! */ -int can_bypass_conversion(const char *path) +struct stream_filter *get_stream_filter(const char *path, const unsigned char *sha1) { struct conv_attrs ca; enum crlf_action crlf_action; @@ -828,11 +885,24 @@ int can_bypass_conversion(const char *path) if (ca.ident || (ca.drv && (ca.drv->smudge || ca.drv->clean))) - return 0; + return NULL; crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) || (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) - return 1; - return 0; + return &null_filter_singleton; + + return NULL; +} + +void free_stream_filter(struct stream_filter *filter) +{ + filter->vtbl->free(filter); +} + +int stream_filter(struct stream_filter *filter, + const char *input, size_t *isize_p, + char *output, size_t *osize_p) +{ + return filter->vtbl->filter(filter, input, isize_p, output, osize_p); } diff --git a/convert.h b/convert.h index b1b4a382df..17d7509832 100644 --- a/convert.h +++ b/convert.h @@ -40,5 +40,26 @@ extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst); extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst); -extern int can_bypass_conversion(const char *path); + +/***************************************************************** + * + * Streaming converison support + * + *****************************************************************/ + +struct stream_filter; /* opaque */ + +extern struct stream_filter *get_stream_filter(const char *path, const unsigned char *); +extern void free_stream_filter(struct stream_filter *); +extern int is_null_stream_filter(struct stream_filter *); + +/* + * Use as much input up to *isize_p and fill output up to *osize_p; + * update isize_p and osize_p to indicate how much buffer space was + * consumed and filled. Return 0 on success, non-zero on error. + */ +extern int stream_filter(struct stream_filter *, + const char *input, size_t *isize_p, + char *output, size_t *osize_p); + #endif /* CONVERT_H */ diff --git a/entry.c b/entry.c index e2dc16c131..852fea1395 100644 --- a/entry.c +++ b/entry.c @@ -116,6 +116,7 @@ static int fstat_output(int fd, const struct checkout *state, struct stat *st) } static int streaming_write_entry(struct cache_entry *ce, char *path, + struct stream_filter *filter, const struct checkout *state, int to_tempfile, int *fstat_done, struct stat *statbuf) { @@ -126,7 +127,7 @@ static int streaming_write_entry(struct cache_entry *ce, char *path, ssize_t kept = 0; int fd = -1; - st = open_istream(ce->sha1, &type, &sz); + st = open_istream(ce->sha1, &type, &sz, filter); if (!st) return -1; if (type != OBJ_BLOB) @@ -186,11 +187,14 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout size_t wrote, newsize = 0; struct stat st; - if ((ce_mode_s_ifmt == S_IFREG) && - can_bypass_conversion(path) && - !streaming_write_entry(ce, path, state, to_tempfile, - &fstat_done, &st)) - goto finish; + if (ce_mode_s_ifmt == S_IFREG) { + struct stream_filter *filter = get_stream_filter(path, ce->sha1); + if (filter && + !streaming_write_entry(ce, path, filter, + state, to_tempfile, + &fstat_done, &st)) + goto finish; + } switch (ce_mode_s_ifmt) { case S_IFREG: diff --git a/streaming.c b/streaming.c index 0602926644..565f000790 100644 --- a/streaming.c +++ b/streaming.c @@ -41,6 +41,9 @@ struct stream_vtbl { static open_method_decl(incore); static open_method_decl(loose); static open_method_decl(pack_non_delta); +static struct git_istream *attach_stream_filter(struct git_istream *st, + struct stream_filter *filter); + static open_istream_fn open_istream_tbl[] = { open_istream_incore, @@ -48,6 +51,17 @@ static open_istream_fn open_istream_tbl[] = { open_istream_pack_non_delta, }; +#define FILTER_BUFFER (1024*16) + +struct filtered_istream { + struct git_istream *upstream; + struct stream_filter *filter; + char ibuf[FILTER_BUFFER]; + char obuf[FILTER_BUFFER]; + int i_end, i_ptr; + int o_end, o_ptr; +}; + struct git_istream { const struct stream_vtbl *vtbl; unsigned long size; /* inflated size of full object */ @@ -72,6 +86,8 @@ struct git_istream { struct packed_git *pack; off_t pos; } in_pack; + + struct filtered_istream filtered; } u; }; @@ -112,7 +128,8 @@ static enum input_source istream_source(const unsigned char *sha1, struct git_istream *open_istream(const unsigned char *sha1, enum object_type *type, - unsigned long *size) + unsigned long *size, + struct stream_filter *filter) { struct git_istream *st; struct object_info oi; @@ -129,6 +146,14 @@ struct git_istream *open_istream(const unsigned char *sha1, return NULL; } } + if (st && filter) { + /* Add "&& !is_null_stream_filter(filter)" for performance */ + struct git_istream *nst = attach_stream_filter(st, filter); + if (!nst) + close_istream(st); + st = nst; + } + *size = st->size; return st; } @@ -147,6 +172,79 @@ static void close_deflated_stream(struct git_istream *st) } +/***************************************************************** + * + * Filtered stream + * + *****************************************************************/ + +static close_method_decl(filtered) +{ + free_stream_filter(st->u.filtered.filter); + return close_istream(st->u.filtered.upstream); +} + +static read_method_decl(filtered) +{ + struct filtered_istream *fs = &(st->u.filtered); + size_t filled = 0; + + while (sz) { + /* do we already have filtered output? */ + if (fs->o_ptr < fs->o_end) { + size_t to_move = fs->o_end - fs->o_ptr; + if (sz < to_move) + to_move = sz; + memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move); + fs->o_ptr += to_move; + sz -= to_move; + filled += to_move; + continue; + } + fs->o_end = fs->o_ptr = 0; + + /* do we have anything to feed the filter with? */ + if (fs->i_ptr < fs->i_end) { + size_t to_feed = fs->i_end - fs->i_ptr; + size_t to_receive = FILTER_BUFFER; + if (stream_filter(fs->filter, + fs->ibuf + fs->i_ptr, &to_feed, + fs->obuf, &to_receive)) + return -1; + fs->i_ptr = fs->i_end - to_feed; + fs->o_end = FILTER_BUFFER - to_receive; + continue; + } + fs->i_end = fs->i_ptr = 0; + + /* refill the input from the upstream */ + fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); + if (fs->i_end <= 0) + break; + } + return filled; +} + +static struct stream_vtbl filtered_vtbl = { + close_istream_filtered, + read_istream_filtered, +}; + +static struct git_istream *attach_stream_filter(struct git_istream *st, + struct stream_filter *filter) +{ + struct git_istream *ifs = xmalloc(sizeof(*ifs)); + struct filtered_istream *fs = &(ifs->u.filtered); + + ifs->vtbl = &filtered_vtbl; + fs->upstream = st; + fs->filter = filter; + fs->i_end = fs->i_ptr = 0; + fs->o_end = fs->o_ptr = 0; + ifs->size = -1; /* unknown */ + return ifs; +} + /***************************************************************** * * Loose object stream diff --git a/streaming.h b/streaming.h index 18cbe68ac1..589e857b8c 100644 --- a/streaming.h +++ b/streaming.h @@ -8,7 +8,7 @@ /* opaque */ struct git_istream; -extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *); +extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *); extern int close_istream(struct git_istream *); extern ssize_t read_istream(struct git_istream *, char *, size_t); -- 2.39.2