From db5dfa331480650c1f889db3cb32a0272dc72ec6 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 21 Sep 2016 20:23:22 +0200 Subject: regex: -G feeds a non NUL-terminated string to regexec() and fails When our pickaxe code feeds file contents to regexec(), it implicitly assumes that the file contents are read into implicitly NUL-terminated buffers (i.e. that we overallocate by 1, appending a single '\0'). This is not so. In particular when the file contents are simply mmap()ed, we can be virtually certain that the buffer is preceding uninitialized bytes, or invalid pages. Note that the test we add here is known to be flakey: we simply cannot know whether the byte following the mmap()ed ones is a NUL or not. Typically, on Linux the test passes. On Windows, it fails virtually every time due to an access violation (that's a segmentation fault for you Unix-y people out there). And Windows would be correct: the regexec() call wants to operate on a regular, NUL-terminated string, there is no NUL in the mmap()ed memory range, and it is undefined whether the next byte is even legal to access. When run with --valgrind it demonstrates quite clearly the breakage, of course. Being marked with `test_expect_failure`, this test will sometimes be declare "TODO fixed", even if it only passes by mistake. This test case represents a Minimal, Complete and Verifiable Example of a breakage reported by Chris Sidi. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t4062-diff-pickaxe.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100755 t/t4062-diff-pickaxe.sh diff --git a/t/t4062-diff-pickaxe.sh b/t/t4062-diff-pickaxe.sh new file mode 100755 index 000000000..5929f2eab --- /dev/null +++ b/t/t4062-diff-pickaxe.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# +# Copyright (c) 2016 Johannes Schindelin +# + +test_description='Pickaxe options' + +. ./test-lib.sh + +test_expect_success setup ' + test_commit initial && + printf "%04096d" 0 >4096-zeroes.txt && + git add 4096-zeroes.txt && + test_tick && + git commit -m "A 4k file" +' +test_expect_failure '-G matches' ' + git diff --name-only -G "^0{4096}$" HEAD^ >out && + test 4096-zeroes.txt = "$(cat out)" +' + +test_done -- cgit v1.2.3 From 2f8952250a84313b74f96abb7b035874854cf202 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 21 Sep 2016 20:24:04 +0200 Subject: regex: add regexec_buf() that can work on a non NUL-terminated string We just introduced a test that demonstrates that our sloppy use of regexec() on a mmap()ed area can result in incorrect results or even hard crashes. So what we need to fix this is a function that calls regexec() on a length-delimited, rather than a NUL-terminated, string. Happily, there is an extension to regexec() introduced by the NetBSD project and present in all major regex implementation including Linux', MacOSX' and the one Git includes in compat/regex/: by using the (non-POSIX) REG_STARTEND flag, it is possible to tell the regexec() function that it should only look at the offsets between pmatch[0].rm_so and pmatch[0].rm_eo. That is exactly what we need. Since support for REG_STARTEND is so widespread by now, let's just introduce a helper function that always uses it, and tell people on a platform whose regex library does not support it to use the one from our compat/regex/ directory. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Makefile | 3 ++- git-compat-util.h | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e11e626d0..c6dc42d06 100644 --- a/Makefile +++ b/Makefile @@ -296,7 +296,8 @@ all:: # Define USE_NED_ALLOCATOR if you want to replace the platforms default # memory allocators with the nedmalloc allocator written by Niall Douglas. # -# Define NO_REGEX if you have no or inferior regex support in your C library. +# Define NO_REGEX if your C library lacks regex support with REG_STARTEND +# feature. # # Define HAVE_DEV_TTY if your system can open /dev/tty to interact with the # user. diff --git a/git-compat-util.h b/git-compat-util.h index 1f8b5f3b1..7047d281e 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -942,6 +942,19 @@ void git_qsort(void *base, size_t nmemb, size_t size, #define qsort git_qsort #endif +#ifndef REG_STARTEND +#error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd" +#endif + +static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + assert(nmatch > 0 && pmatch); + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = size; + return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND); +} + #ifndef DIR_HAS_BSD_GROUP_SEMANTICS # define FORCE_DIR_SET_GID S_ISGID #else -- cgit v1.2.3 From b7d36ffca02c23f545d6e098d78180e6e72dfd8d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 21 Sep 2016 20:24:14 +0200 Subject: regex: use regexec_buf() The new regexec_buf() function operates on buffers with an explicitly specified length, rather than NUL-terminated strings. We need to use this function whenever the buffer we want to pass to regexec(3) may have been mmap(2)ed (and is hence not NUL-terminated). Note: the original motivation for this patch was to fix a bug where `git diff -G ` would crash. This patch converts more callers, though, some of which allocated to construct NUL-terminated strings, or worse, modified buffers to temporarily insert NULs while calling regexec(3). By converting them to use regexec_buf(), the code has become much cleaner. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- diff.c | 3 ++- diffcore-pickaxe.c | 18 ++++++++---------- grep.c | 14 ++------------ t/t4062-diff-pickaxe.sh | 2 +- xdiff-interface.c | 13 ++++--------- 5 files changed, 17 insertions(+), 33 deletions(-) diff --git a/diff.c b/diff.c index 059123c5d..f77324e9e 100644 --- a/diff.c +++ b/diff.c @@ -941,7 +941,8 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, { if (word_regex && *begin < buffer->size) { regmatch_t match[1]; - if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) { + if (!regexec_buf(word_regex, buffer->ptr + *begin, + buffer->size - *begin, 1, match, 0)) { char *p = memchr(buffer->ptr + *begin + match[0].rm_so, '\n', match[0].rm_eo - match[0].rm_so); *end = p ? p - buffer->ptr : match[0].rm_eo + *begin; diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 7715c13ec..8413d7658 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -21,7 +21,6 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len) { struct diffgrep_cb *data = priv; regmatch_t regmatch; - int hold; if (line[0] != '+' && line[0] != '-') return; @@ -31,11 +30,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len) * caller early. */ return; - /* Yuck -- line ought to be "const char *"! */ - hold = line[len]; - line[len] = '\0'; - data->hit = !regexec(data->regexp, line + 1, 1, ®match, 0); - line[len] = hold; + data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1, + ®match, 0); } static int diff_grep(mmfile_t *one, mmfile_t *two, @@ -48,9 +44,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, xdemitconf_t xecfg; if (!one) - return !regexec(regexp, two->ptr, 1, ®match, 0); + return !regexec_buf(regexp, two->ptr, two->size, + 1, ®match, 0); if (!two) - return !regexec(regexp, one->ptr, 1, ®match, 0); + return !regexec_buf(regexp, one->ptr, one->size, + 1, ®match, 0); /* * We have both sides; need to run textual diff and see if @@ -81,8 +79,8 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) regmatch_t regmatch; int flags = 0; - assert(data[sz] == '\0'); - while (*data && !regexec(regexp, data, 1, ®match, flags)) { + while (*data && + !regexec_buf(regexp, data, sz, 1, ®match, flags)) { flags |= REG_NOTBOL; data += regmatch.rm_eo; if (*data && regmatch.rm_so == regmatch.rm_eo) diff --git a/grep.c b/grep.c index 528b652f7..8ed56236f 100644 --- a/grep.c +++ b/grep.c @@ -848,17 +848,6 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol, } } -static int regmatch(const regex_t *preg, char *line, char *eol, - regmatch_t *match, int eflags) -{ -#ifdef REG_STARTEND - match->rm_so = 0; - match->rm_eo = eol - line; - eflags |= REG_STARTEND; -#endif - return regexec(preg, line, 1, match, eflags); -} - static int patmatch(struct grep_pat *p, char *line, char *eol, regmatch_t *match, int eflags) { @@ -869,7 +858,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, else if (p->pcre_regexp) hit = !pcrematch(p, line, eol, match, eflags); else - hit = !regmatch(&p->regexp, line, eol, match, eflags); + hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, + eflags); return hit; } diff --git a/t/t4062-diff-pickaxe.sh b/t/t4062-diff-pickaxe.sh index 5929f2eab..f0bf50bda 100755 --- a/t/t4062-diff-pickaxe.sh +++ b/t/t4062-diff-pickaxe.sh @@ -14,7 +14,7 @@ test_expect_success setup ' test_tick && git commit -m "A 4k file" ' -test_expect_failure '-G matches' ' +test_expect_success '-G matches' ' git diff --name-only -G "^0{4096}$" HEAD^ >out && test 4096-zeroes.txt = "$(cat out)" ' diff --git a/xdiff-interface.c b/xdiff-interface.c index 54236f24b..08a7313e6 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -216,11 +216,10 @@ struct ff_regs { static long ff_regexp(const char *line, long len, char *buffer, long buffer_size, void *priv) { - char *line_buffer; struct ff_regs *regs = priv; regmatch_t pmatch[2]; int i; - int result = -1; + int result; /* Exclude terminating newline (and cr) from matching */ if (len > 0 && line[len-1] == '\n') { @@ -230,18 +229,16 @@ static long ff_regexp(const char *line, long len, len--; } - line_buffer = xstrndup(line, len); /* make NUL terminated */ - for (i = 0; i < regs->nr; i++) { struct ff_reg *reg = regs->array + i; - if (!regexec(®->re, line_buffer, 2, pmatch, 0)) { + if (!regexec_buf(®->re, line, len, 2, pmatch, 0)) { if (reg->negate) - goto fail; + return -1; break; } } if (regs->nr <= i) - goto fail; + return -1; i = pmatch[1].rm_so >= 0 ? 1 : 0; line += pmatch[i].rm_so; result = pmatch[i].rm_eo - pmatch[i].rm_so; @@ -250,8 +247,6 @@ static long ff_regexp(const char *line, long len, while (result > 0 && (isspace(line[result - 1]))) result--; memcpy(buffer, line, result); - fail: - free(line_buffer); return result; } -- cgit v1.2.3