From ef9c8d2ddfe28e3805a5002fd7608aa956d88083 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Thu, 3 Mar 2016 02:04:40 -0500 Subject: [PATCH 01/20] Add be.fi test repo for extractor bug and include it in git-regress and hg-regress targets; those tests fail on this repo. --- test/Makefile | 4 ++-- test/be.fi | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 test/be.fi diff --git a/test/Makefile b/test/Makefile index 5f8bba04f..75eb6a06b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -132,7 +132,7 @@ coalesce: @rm -f /tmp/regress # Test the git extractor -GITLOADS = bs +GITLOADS = bs be git-regress: @echo "=== Testing git-extractor:" @if command -v git >/dev/null 2>&1 ; \ @@ -148,7 +148,7 @@ git-regress: @rm -f /tmp/regress # Test the hg extractor -HGLOADS = testrepo2 +HGLOADS = testrepo2 be hg-regress: @echo "=== Testing hg-extractor:" @if command -v hg >/dev/null 2>&1 && command -v git >/dev/null 2>&1 ; \ diff --git a/test/be.fi b/test/be.fi new file mode 100644 index 000000000..9d38ab405 --- /dev/null +++ b/test/be.fi @@ -0,0 +1,45 @@ +blob +mark :1 +data 13 +Test file 1. + +reset refs/heads/master +commit refs/heads/master +mark :2 +author Peter Donis 1456976347 -0500 +committer Peter Donis 1456976347 -0500 +data 20 +Commit test file 1. +M 100644 :1 testfile1 + +blob +mark :3 +data 13 +Test file 2. + +commit refs/heads/test +mark :4 +author Peter Donis 1456976347 -0500 +committer Peter Donis 1456976347 -0500 +data 20 +Commit test file 2. +from :2 +M 100644 :3 testfile2 + +blob +mark :5 +data 13 +Test file 3. + +commit refs/heads/master +mark :6 +author Peter Donis 1456976347 -0500 +committer Peter Donis 1456976347 -0500 +data 20 +Commit test file 3. +from :2 +M 100644 :5 testfile3 + +reset refs/heads/master +from :6 + -- GitLab From 718d8da1665a0d5e23b83f7e7005bb2f1535025a Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Thu, 3 Mar 2016 11:33:48 -0500 Subject: [PATCH 02/20] Add test of hg extractor with multiple hg branches; test fails with current code. --- test/Makefile | 18 ++++++++++++- test/hg-be-test | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100755 test/hg-be-test diff --git a/test/Makefile b/test/Makefile index 75eb6a06b..97d20522a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -147,7 +147,7 @@ git-regress: fi @rm -f /tmp/regress -# Test the hg extractor +# Test the hg extractor (the "be" test covers the case with hg bookmarks) HGLOADS = testrepo2 be hg-regress: @echo "=== Testing hg-extractor:" @@ -163,6 +163,22 @@ hg-regress: fi @rm -f /tmp/regress +# Test the hg extractor with multiple hg branches +HGBRANCHES = be +hg-regress-branches: + @echo "=== Testing hg-extractor with multiple hg branches:" + @if command -v hg >/dev/null 2>&1 ; \ + then \ + for test in $(HGBRANCHES); do \ + if (echo " $${test}" >&2; \ + ./hg-$${test}-test) | sed -e 1d -e '/^#legacy-id/d' >/tmp/regress; \ + then diff --text -u $${test}.fi /tmp/regress || exit 1; \ + else echo "*** Nonzero return status on $${test}!"; exit 1; fi \ + done; \ + else echo " Skipped, hg missing."; exit 0; \ + fi + @rm -f /tmp/regress + # Test loading from Subversion SVNLOADS := $(shell ls *.svn | sed '/.svn/s///') svnload-buildregress: diff --git a/test/hg-be-test b/test/hg-be-test new file mode 100755 index 000000000..fcf1ac208 --- /dev/null +++ b/test/hg-be-test @@ -0,0 +1,67 @@ +#!/bin/sh +# +# Test reposurgeon branch naming issue with hg repo +# +# This test cannot use the usual hg-to-fi script because it +# needs an hg repo with actual hg branches, not hg bookmarks; +# the hg convert utility converts git branches in a fast-import +# stream to hg bookmarks, so the hg-regress test target only +# tests correct handling of hg bookmarks, not hg branches + +BIN=${PWD}/.. + +build=True +stream=True +cleanup=True + +pecho() { printf %s\\n "$*"; } +log() { pecho "$@"; } +error() { log "ERROR: $@" >&2; } +fatal() { error "$@"; exit 1; } +try() { "$@" || fatal "'$@' failed"; } + +while getopts nr opt +do + case $opt in + n) build=True; stream=False ; cleanup=False ;; + r) build=False; stream=True ; cleanup=False ;; + esac +done +shift $(($OPTIND - 1)) + +testrepo=${1:-/tmp/test-repo} + +# Should we build the repo? +if [ $build = True ] +then + # Build hg test repo with multiple hg branches + try rm -fr $testrepo + try hg init $testrepo || exit 1 + try cd $testrepo >/dev/null + ( + try echo "Test file 1." > testfile1 + try hg add testfile1 >/dev/null + try hg commit -m "Commit test file 1." >/dev/null + try hg branch test >/dev/null + try echo "Test file 2." > testfile2 + try hg add testfile2 >/dev/null + try hg commit -m "Commit test file 2." >/dev/null + try hg update default >/dev/null + try echo "Test file 3." > testfile3 + try hg add testfile3 >/dev/null + try hg commit -m "Commit test file 3." >/dev/null + ) || exit 1 + try cd - >/dev/null +fi + +# Should we stream the repo? +if [ $stream = True ] +then + try ${BIN}/reposurgeon "read $testrepo" "write -" +fi + +# Should we clean up the test directory +if [ $cleanup = True ] +then + try rm -fr $testrepo +fi -- GitLab From a0e686b06d4268ed7c01fde1ea0a78b2df9ce243 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Thu, 3 Mar 2016 11:43:13 -0500 Subject: [PATCH 03/20] Add --date option to hg commits in multiple branch test so commit timestamps will match the .fi file. --- test/hg-be-test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/hg-be-test b/test/hg-be-test index fcf1ac208..2fb559f84 100755 --- a/test/hg-be-test +++ b/test/hg-be-test @@ -41,15 +41,15 @@ then ( try echo "Test file 1." > testfile1 try hg add testfile1 >/dev/null - try hg commit -m "Commit test file 1." >/dev/null + try hg commit --date "1456976347 18000" -m "Commit test file 1." >/dev/null try hg branch test >/dev/null try echo "Test file 2." > testfile2 try hg add testfile2 >/dev/null - try hg commit -m "Commit test file 2." >/dev/null + try hg commit --date "1456976347 18000" -m "Commit test file 2." >/dev/null try hg update default >/dev/null try echo "Test file 3." > testfile3 try hg add testfile3 >/dev/null - try hg commit -m "Commit test file 3." >/dev/null + try hg commit --date "1456976347 18000" -m "Commit test file 3." >/dev/null ) || exit 1 try cd - >/dev/null fi -- GitLab From aeb695caf6e3244264409891ded642e2d10d9bb0 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Thu, 3 Mar 2016 11:45:54 -0500 Subject: [PATCH 04/20] Add comment in hg-be-test on the --date incantation. --- test/hg-be-test | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/hg-be-test b/test/hg-be-test index 2fb559f84..de26545a3 100755 --- a/test/hg-be-test +++ b/test/hg-be-test @@ -38,6 +38,11 @@ then try rm -fr $testrepo try hg init $testrepo || exit 1 try cd $testrepo >/dev/null + # The weird --date incantation in the hg commits is to ensure that the commit + # timestamps match those in the .fi file; the 18000 is because hg wants the time zone + # offset in seconds west of UTC, for what reason I know not--I know there are weird + # time zones in the world but I didn't think any of them got down to one-second + # granularity in offsets... ( try echo "Test file 1." > testfile1 try hg add testfile1 >/dev/null -- GitLab From e579c2799566a8985c3f3d2f26de8a645982bd50 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Thu, 3 Mar 2016 18:56:41 -0500 Subject: [PATCH 05/20] Extract hg branch information using hg log. --- reposurgeon | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/reposurgeon b/reposurgeon index b236ce898..7d3a9c6f5 100755 --- a/reposurgeon +++ b/reposurgeon @@ -821,11 +821,8 @@ class Extractor(object): # fill in self.tags assert baton is not None # pacify pylint def color_branches(self, baton): - """Color branches in the order the tips occur. Emulate the - git-export order.""" - for refname, refobj in sorted(self.refs.items(), - key=lambda ref: self.revlist.index(ref[1])): - self.__branch_color(refobj, refname) + "Color all commits with their branch name." + self._color_branches() uncolored = [revision for revision in self.revlist if 'branch' not in self.meta[revision]] if uncolored: if verbose >= 1: @@ -833,6 +830,16 @@ class Extractor(object): else: raise Fatal("some branches do not have local ref names.") assert baton is not None # pacify pylint + def _color_branches(self): + """Color branches in the order the tips occur.""" + # Note: this algorithm by itself is not correct. It is included + # on the assumption that subclasses will override this method + # to first extract all branch information possible from the repo, + # and then use this algorithm only for coloring of commits that + # are, for whatever reason, not caught + for refname, refobj in sorted(self.refs.items(), + key=lambda ref: self.revlist.index(ref[1])): + self.__branch_color(refobj, refname) def __branch_color(self, rev, color): if rev.startswith("ref"): return @@ -1093,6 +1100,15 @@ class HgExtractor(Extractor): # Conceivably it might be better to treat the commit message that # creates the tag as an annotation, but that's a job for the surgeon # later, not the extractor now. + def _color_branches(self): + # Hg stores branch info in the metadata for each commit, + # so we just need to get it using hg log + with self.hg_or_die("log", "--template", "{node|short} {branch}\\n") as fp: + for line in fp: + h, branch = polystr(line).strip().split() + self.meta[h]['branch'] = "refs/heads/" + branch + # This should not be needed, but do it for completeness + Extractor._color_branches(self) def post_extract(self, repo): super(HgExtractor, self).post_extract(repo) self.hg_capture("update", "-C", "tip") -- GitLab From 6ef7f425bfb44084974b573d743011487232f387 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Thu, 3 Mar 2016 19:04:31 -0500 Subject: [PATCH 06/20] Use sourcetype git in hg-be-test to ensure output consistent with git fast-export. --- test/hg-be-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/hg-be-test b/test/hg-be-test index de26545a3..d79de70fa 100755 --- a/test/hg-be-test +++ b/test/hg-be-test @@ -62,7 +62,7 @@ fi # Should we stream the repo? if [ $stream = True ] then - try ${BIN}/reposurgeon "read $testrepo" "write -" + try ${BIN}/reposurgeon "read $testrepo" "sourcetype git" "write -" fi # Should we clean up the test directory -- GitLab From c595913055c00be31638f54fd018a7df98f8879f Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Thu, 3 Mar 2016 19:17:58 -0500 Subject: [PATCH 07/20] Add reset to be.fi. --- test/be.fi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/be.fi b/test/be.fi index 9d38ab405..539ae1eb1 100644 --- a/test/be.fi +++ b/test/be.fi @@ -40,6 +40,9 @@ Commit test file 3. from :2 M 100644 :5 testfile3 +reset refs/heads/test +from :4 + reset refs/heads/master from :6 -- GitLab From 5895ba43b9d85a4f7f4b8b3664fe4de100b715a6 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Thu, 3 Mar 2016 19:18:31 -0500 Subject: [PATCH 08/20] Delete #reposurgeon sourcetype line from hg-regress test output. --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 97d20522a..486cb7e34 100644 --- a/test/Makefile +++ b/test/Makefile @@ -155,7 +155,7 @@ hg-regress: then \ for test in $(HGLOADS); do \ if (echo " $${test}.fi" >&2; \ - ./hg-to-fi) <$${test}.fi | sed -e 1d -e '/^#legacy-id/d' >/tmp/regress; \ + ./hg-to-fi) <$${test}.fi | sed -e 1d -e '/^#legacy-id/d' | sed -e 1d -e '/^#reposurgeon sourcetype/d' >/tmp/regress; \ then diff --text -u $${test}.fi /tmp/regress || exit 1; \ else echo "*** Nonzero return status on $${test}!"; exit 1; fi \ done; \ -- GitLab From 77e7fcbfd58297247fe08410f4598f9586af4c0a Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Thu, 3 Mar 2016 19:24:02 -0500 Subject: [PATCH 09/20] Tweak sed filter in hg-regress. --- test/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/Makefile b/test/Makefile index 486cb7e34..2bad7b2ec 100644 --- a/test/Makefile +++ b/test/Makefile @@ -147,15 +147,15 @@ git-regress: fi @rm -f /tmp/regress -# Test the hg extractor (the "be" test covers the case with hg bookmarks) -HGLOADS = testrepo2 be +# Test the hg extractor +HGLOADS = testrepo2 hg-regress: @echo "=== Testing hg-extractor:" @if command -v hg >/dev/null 2>&1 && command -v git >/dev/null 2>&1 ; \ then \ for test in $(HGLOADS); do \ if (echo " $${test}.fi" >&2; \ - ./hg-to-fi) <$${test}.fi | sed -e 1d -e '/^#legacy-id/d' | sed -e 1d -e '/^#reposurgeon sourcetype/d' >/tmp/regress; \ + ./hg-to-fi) <$${test}.fi | sed -e 1d -e '/^#legacy-id/d' | sed -e '/^#reposurgeon sourcetype/d' >/tmp/regress; \ then diff --text -u $${test}.fi /tmp/regress || exit 1; \ else echo "*** Nonzero return status on $${test}!"; exit 1; fi \ done; \ -- GitLab From 115698edaf1e9f8af3b51edc2d045ff388f4ecbf Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Fri, 4 Mar 2016 14:21:57 -0500 Subject: [PATCH 10/20] Use git name-rev to pull branch info in git extractor. --- reposurgeon | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/reposurgeon b/reposurgeon index 7d3a9c6f5..da5155099 100755 --- a/reposurgeon +++ b/reposurgeon @@ -981,6 +981,17 @@ class GitExtractor(Extractor): comment=comment, committish=objecthash)) self.refs["refs/tags/" + tag] = objecthash + def _color_branches(self): + # Git will give us branch info with this incantation + with popen_or_die("git name-rev --all") as fp: + for line in fp: + h, branch = polystr(line).strip().split() + branch = branch.split('~')[0].split('^')[0] + if not "/" in branch: + branch = "heads/" + branch + self.meta[h]['branch'] = "refs/" + branch + # This should not be needed, but do it for completeness + Extractor._color_branches(self) def __metadata(self, rev, fmt): with popen_or_die("git log -1 --format='%s' %s" % (fmt, rev)) as fp: return polystr(fp.read())[:-1] -- GitLab From 38fadde622dba82879cb1021ca9e5d5bc4738848 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Fri, 4 Mar 2016 14:22:31 -0500 Subject: [PATCH 11/20] Always generate a reset for a repo's root commit. --- reposurgeon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reposurgeon b/reposurgeon index da5155099..1fc15031a 100755 --- a/reposurgeon +++ b/reposurgeon @@ -3039,7 +3039,7 @@ class RepoStreamer: commit.append_operation(op) del self.filemap[revision][tbd] self.extractor.cleanup(revision, True) - if not parents and commit.branch != "refs/heads/master": + if not parents: #and commit.branch != "refs/heads/master": reset = Reset(repo) reset.ref = commit.branch repo.addEvent(reset) -- GitLab From 2af04f826e07c3312ad07f9746650ea666184ed5 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Fri, 4 Mar 2016 14:52:27 -0500 Subject: [PATCH 12/20] Add hg-regress-branches test target. --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 2bad7b2ec..45079312d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -26,7 +26,7 @@ default: @setpython python all: listcheck roundtrip roundtrip-compress mailboxing fi-regress coalesce \ - git-regress hg-regress svnload-regress legacy-regress \ + git-regress hg-regress hg-regress-branches svnload-regress legacy-regress \ svndump-regress repodiffer-regress repomapper-regress \ repotool-regress repocutter-regress @echo "=== No diff output is good news." -- GitLab From 18f188468fb82b680801783368aa6d7ff8918d3c Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Sat, 5 Mar 2016 13:17:08 -0500 Subject: [PATCH 13/20] Add 2nd hg extractor test with merge, add make target to rebuild .fi files for branch extractor tests. --- test/Makefile | 10 +++++-- test/be2.fi | 71 +++++++++++++++++++++++++++++++++++++++++++ test/hg-be2-test | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 test/be2.fi create mode 100755 test/hg-be2-test diff --git a/test/Makefile b/test/Makefile index 45079312d..bd68a6a36 100644 --- a/test/Makefile +++ b/test/Makefile @@ -132,7 +132,7 @@ coalesce: @rm -f /tmp/regress # Test the git extractor -GITLOADS = bs be +GITLOADS = bs be be2 git-regress: @echo "=== Testing git-extractor:" @if command -v git >/dev/null 2>&1 ; \ @@ -164,7 +164,13 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches -HGBRANCHES = be +HGBRANCHES = be be2 +hg-buildregress-branches: + @for file in $(HGBRANCHES); do \ + echo "Remaking $${file}.fi"; \ + ./hg-$${file}-test | sed -e 1d -e '/^#legacy-id/d' | sed -e '/^#reposurgeon sourcetype/d' >$${file}.fi \ + 2>&1 || exit 1; \ + done hg-regress-branches: @echo "=== Testing hg-extractor with multiple hg branches:" @if command -v hg >/dev/null 2>&1 ; \ diff --git a/test/be2.fi b/test/be2.fi new file mode 100644 index 000000000..fd5ea7c8f --- /dev/null +++ b/test/be2.fi @@ -0,0 +1,71 @@ +blob +mark :1 +data 13 +Test file 1. + +reset refs/heads/master +commit refs/heads/master +mark :2 +author Peter Donis 1456976347 -0500 +committer Peter Donis 1456976347 -0500 +data 20 +Commit test file 1. +M 100644 :1 testfile1 + +blob +mark :3 +data 13 +Test file 2. + +commit refs/heads/test +mark :4 +author Peter Donis 1456976347 -0500 +committer Peter Donis 1456976347 -0500 +data 20 +Commit test file 2. +from :2 +M 100644 :3 testfile2 + +commit refs/heads/master +mark :5 +author Peter Donis 1456976347 -0500 +committer Peter Donis 1456976347 -0500 +data 19 +Merge test branch. +from :2 +merge :4 + +blob +mark :6 +data 13 +Test file 3. + +commit refs/heads/master +mark :7 +author Peter Donis 1456976347 -0500 +committer Peter Donis 1456976347 -0500 +data 20 +Commit test file 3. +from :5 +M 100644 :6 testfile3 + +blob +mark :8 +data 13 +Test file 4. + +commit refs/heads/test +mark :9 +author Peter Donis 1456976347 -0500 +committer Peter Donis 1456976347 -0500 +data 20 +Commit test file 4. +from :4 +M 100644 :8 testfile4 + +reset refs/heads/master +from :7 + +reset refs/heads/test +from :9 + diff --git a/test/hg-be2-test b/test/hg-be2-test new file mode 100755 index 000000000..92073569c --- /dev/null +++ b/test/hg-be2-test @@ -0,0 +1,78 @@ +#!/bin/sh +# +# Test reposurgeon branch naming issue with hg repo including merge +# +# This test cannot use the usual hg-to-fi script because it +# needs an hg repo with actual hg branches, not hg bookmarks; +# the hg convert utility converts git branches in a fast-import +# stream to hg bookmarks, so the hg-regress test target only +# tests correct handling of hg bookmarks, not hg branches + +BIN=${PWD}/.. + +build=True +stream=True +cleanup=True + +pecho() { printf %s\\n "$*"; } +log() { pecho "$@"; } +error() { log "ERROR: $@" >&2; } +fatal() { error "$@"; exit 1; } +try() { "$@" || fatal "'$@' failed"; } + +while getopts nr opt +do + case $opt in + n) build=True; stream=False ; cleanup=False ;; + r) build=False; stream=True ; cleanup=False ;; + esac +done +shift $(($OPTIND - 1)) + +testrepo=${1:-/tmp/test-repo} + +# Should we build the repo? +if [ $build = True ] +then + # Build hg test repo with multiple hg branches + try rm -fr $testrepo + try hg init $testrepo || exit 1 + try cd $testrepo >/dev/null + # The weird --date incantation in the hg commits is to ensure that the commit + # timestamps match those in the .fi file; the 18000 is because hg wants the time zone + # offset in seconds west of UTC, for what reason I know not--I know there are weird + # time zones in the world but I didn't think any of them got down to one-second + # granularity in offsets... + ( + try echo "Test file 1." > testfile1 + try hg add testfile1 >/dev/null + try hg commit --date "1456976347 18000" -m "Commit test file 1." >/dev/null + try hg branch test >/dev/null + try echo "Test file 2." > testfile2 + try hg add testfile2 >/dev/null + try hg commit --date "1456976347 18000" -m "Commit test file 2." >/dev/null + try hg update default >/dev/null + try hg merge test >/dev/null + try hg commit --date "1456976347 18000" -m "Merge test branch." >/dev/null + try echo "Test file 3." > testfile3 + try hg add testfile3 >/dev/null + try hg commit --date "1456976347 18000" -m "Commit test file 3." >/dev/null + try hg update test >/dev/null + try echo "Test file 4." > testfile4 + try hg add testfile4 >/dev/null + try hg commit --date "1456976347 18000" -m "Commit test file 4." >/dev/null + ) || exit 1 + try cd - >/dev/null +fi + +# Should we stream the repo? +if [ $stream = True ] +then + try ${BIN}/reposurgeon "read $testrepo" "sourcetype git" "write -" +fi + +# Should we clean up the test directory +if [ $cleanup = True ] +then + try rm -fr $testrepo +fi -- GitLab From 64c7ee4e3bf1695109ee600aa5320607900ad318 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Sat, 5 Mar 2016 14:52:01 -0500 Subject: [PATCH 14/20] Give hg commits in test repos different times to establish time ordering. --- test/be.fi | 8 ++++---- test/be2.fi | 22 +++++++++++----------- test/hg-be-test | 4 ++-- test/hg-be2-test | 8 ++++---- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/test/be.fi b/test/be.fi index 539ae1eb1..122f409a8 100644 --- a/test/be.fi +++ b/test/be.fi @@ -19,8 +19,8 @@ Test file 2. commit refs/heads/test mark :4 -author Peter Donis 1456976347 -0500 -committer Peter Donis 1456976347 -0500 +author Peter Donis 1456976408 -0500 +committer Peter Donis 1456976408 -0500 data 20 Commit test file 2. from :2 @@ -33,8 +33,8 @@ Test file 3. commit refs/heads/master mark :6 -author Peter Donis 1456976347 -0500 -committer Peter Donis 1456976347 -0500 +author Peter Donis 1456976475 -0500 +committer Peter Donis 1456976475 -0500 data 20 Commit test file 3. from :2 diff --git a/test/be2.fi b/test/be2.fi index fd5ea7c8f..361a11c67 100644 --- a/test/be2.fi +++ b/test/be2.fi @@ -19,8 +19,8 @@ Test file 2. commit refs/heads/test mark :4 -author Peter Donis 1456976347 -0500 -committer Peter Donis 1456976347 -0500 +author Peter Donis 1456976408 -0500 +committer Peter Donis 1456976408 -0500 data 20 Commit test file 2. from :2 @@ -28,8 +28,8 @@ M 100644 :3 testfile2 commit refs/heads/master mark :5 -author Peter Donis 1456976347 -0500 -committer Peter Donis 1456976347 -0500 +author Peter Donis 1456976475 -0500 +committer Peter Donis 1456976475 -0500 data 19 Merge test branch. from :2 @@ -42,8 +42,8 @@ Test file 3. commit refs/heads/master mark :7 -author Peter Donis 1456976347 -0500 -committer Peter Donis 1456976347 -0500 +author Peter Donis 1456976606 -0500 +committer Peter Donis 1456976606 -0500 data 20 Commit test file 3. from :5 @@ -56,16 +56,16 @@ Test file 4. commit refs/heads/test mark :9 -author Peter Donis 1456976347 -0500 -committer Peter Donis 1456976347 -0500 +author Peter Donis 1456976715 -0500 +committer Peter Donis 1456976715 -0500 data 20 Commit test file 4. from :4 M 100644 :8 testfile4 -reset refs/heads/master -from :7 - reset refs/heads/test from :9 +reset refs/heads/master +from :7 + diff --git a/test/hg-be-test b/test/hg-be-test index d79de70fa..8b6463332 100755 --- a/test/hg-be-test +++ b/test/hg-be-test @@ -50,11 +50,11 @@ then try hg branch test >/dev/null try echo "Test file 2." > testfile2 try hg add testfile2 >/dev/null - try hg commit --date "1456976347 18000" -m "Commit test file 2." >/dev/null + try hg commit --date "1456976408 18000" -m "Commit test file 2." >/dev/null try hg update default >/dev/null try echo "Test file 3." > testfile3 try hg add testfile3 >/dev/null - try hg commit --date "1456976347 18000" -m "Commit test file 3." >/dev/null + try hg commit --date "1456976475 18000" -m "Commit test file 3." >/dev/null ) || exit 1 try cd - >/dev/null fi diff --git a/test/hg-be2-test b/test/hg-be2-test index 92073569c..f2badc752 100755 --- a/test/hg-be2-test +++ b/test/hg-be2-test @@ -50,17 +50,17 @@ then try hg branch test >/dev/null try echo "Test file 2." > testfile2 try hg add testfile2 >/dev/null - try hg commit --date "1456976347 18000" -m "Commit test file 2." >/dev/null + try hg commit --date "1456976408 18000" -m "Commit test file 2." >/dev/null try hg update default >/dev/null try hg merge test >/dev/null - try hg commit --date "1456976347 18000" -m "Merge test branch." >/dev/null + try hg commit --date "1456976475 18000" -m "Merge test branch." >/dev/null try echo "Test file 3." > testfile3 try hg add testfile3 >/dev/null - try hg commit --date "1456976347 18000" -m "Commit test file 3." >/dev/null + try hg commit --date "1456976606 18000" -m "Commit test file 3." >/dev/null try hg update test >/dev/null try echo "Test file 4." > testfile4 try hg add testfile4 >/dev/null - try hg commit --date "1456976347 18000" -m "Commit test file 4." >/dev/null + try hg commit --date "1456976715 18000" -m "Commit test file 4." >/dev/null ) || exit 1 try cd - >/dev/null fi -- GitLab From 53ceb7b2e4e4776e436090a8ca981332223c772e Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Sat, 5 Mar 2016 15:00:37 -0500 Subject: [PATCH 15/20] Add commit on default branch before merge in be2 test repo. --- test/be2.fi | 42 ++++++++++++++++++++++++++++-------------- test/hg-be2-test | 10 +++++++--- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/test/be2.fi b/test/be2.fi index 361a11c67..a388b178a 100644 --- a/test/be2.fi +++ b/test/be2.fi @@ -26,28 +26,28 @@ Commit test file 2. from :2 M 100644 :3 testfile2 -commit refs/heads/master +blob mark :5 +data 13 +Test file 3. + +commit refs/heads/master +mark :6 author Peter Donis 1456976475 -0500 committer Peter Donis 1456976475 -0500 -data 19 -Merge test branch. +data 20 +Commit test file 3. from :2 -merge :4 - -blob -mark :6 -data 13 -Test file 3. +M 100644 :5 testfile3 commit refs/heads/master mark :7 author Peter Donis 1456976606 -0500 committer Peter Donis 1456976606 -0500 -data 20 -Commit test file 3. -from :5 -M 100644 :6 testfile3 +data 19 +Merge test branch. +from :6 +merge :4 blob mark :8 @@ -63,9 +63,23 @@ Commit test file 4. from :4 M 100644 :8 testfile4 +blob +mark :10 +data 13 +Test file 5. + +commit refs/heads/master +mark :11 +author Peter Donis 1456976798 -0500 +committer Peter Donis 1456976798 -0500 +data 20 +Commit test file 5. +from :7 +M 100644 :10 testfile5 + reset refs/heads/test from :9 reset refs/heads/master -from :7 +from :11 diff --git a/test/hg-be2-test b/test/hg-be2-test index f2badc752..3c3c1b96e 100755 --- a/test/hg-be2-test +++ b/test/hg-be2-test @@ -52,15 +52,19 @@ then try hg add testfile2 >/dev/null try hg commit --date "1456976408 18000" -m "Commit test file 2." >/dev/null try hg update default >/dev/null - try hg merge test >/dev/null - try hg commit --date "1456976475 18000" -m "Merge test branch." >/dev/null try echo "Test file 3." > testfile3 try hg add testfile3 >/dev/null - try hg commit --date "1456976606 18000" -m "Commit test file 3." >/dev/null + try hg commit --date "1456976475 18000" -m "Commit test file 3." >/dev/null + try hg merge test >/dev/null + try hg commit --date "1456976606 18000" -m "Merge test branch." >/dev/null try hg update test >/dev/null try echo "Test file 4." > testfile4 try hg add testfile4 >/dev/null try hg commit --date "1456976715 18000" -m "Commit test file 4." >/dev/null + try hg update default >/dev/null + try echo "Test file 5." > testfile5 + try hg add testfile5 >/dev/null + try hg commit --date "1456976798 18000" -m "Commit test file 5." >/dev/null ) || exit 1 try cd - >/dev/null fi -- GitLab From 58ff1d1df3531754eb0cd87d07babf32f41cf3b0 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Mon, 7 Mar 2016 11:58:20 -0500 Subject: [PATCH 16/20] Use fast-export with --export-marks to get mapping of commits to branches in git extractor. --- reposurgeon | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/reposurgeon b/reposurgeon index 1fc15031a..0c926923f 100755 --- a/reposurgeon +++ b/reposurgeon @@ -982,14 +982,43 @@ class GitExtractor(Extractor): committish=objecthash)) self.refs["refs/tags/" + tag] = objecthash def _color_branches(self): - # Git will give us branch info with this incantation - with popen_or_die("git name-rev --all") as fp: - for line in fp: - h, branch = polystr(line).strip().split() - branch = branch.split('~')[0].split('^')[0] - if not "/" in branch: - branch = "heads/" + branch - self.meta[h]['branch'] = "refs/" + branch + # This is really cheating since fast-export could give us the + # whole repo, but it's the only way I've found to get the correct + # mapping of commits to branches, and we still want to test the + # rest of the extractor logic independently, so here goes + data = marks = None + _, fname = tempfile.mkstemp() + try: + with popen_or_die("git fast-export --all --export-marks=%s" % fname) as fp: + # We can't iterate line by line here because we need to be sure that + # the entire fast-export process is complete so the marks file is + # written and closed + data = fp.read() + with open(fname, "rb") as fp: + marks = dict(polystr(line).split() for line in fp) + finally: + os.remove(fname) + if not (marks and data): + raise Fatal("could not get branch information") + branch = None + for line in data.splitlines(): + fields = polystr(line).strip().split() + if len(fields) != 2: + # The lines we're interested in will always have exactly 2 fields: + # commit or mark ; so all other lines can be ignored + continue + elif fields[0] == "commit": + assert branch is None + branch = fields[1] + elif (fields[0] == "mark") and (branch is not None): + h = marks[fields[1]] + self.meta[h]['branch'] = branch + branch = None + elif branch is not None: + # The mark line for a commit should always be the next line after + # the commit line, so this should never happen, but we put it in + # just in case + raise Fatal("could not parse branch information") # This should not be needed, but do it for completeness Extractor._color_branches(self) def __metadata(self, rev, fmt): -- GitLab From bcb25fe8fd2dad4e4337dbd218657c716524ba39 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Mon, 7 Mar 2016 12:02:21 -0500 Subject: [PATCH 17/20] Use --date-order in git extractor to ensure all commits are in timestamp order. --- reposurgeon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reposurgeon b/reposurgeon index 0c926923f..d7faa87ea 100755 --- a/reposurgeon +++ b/reposurgeon @@ -932,7 +932,7 @@ class GitExtractor(Extractor): ignorename = ".gitignore" def find_revision_ids(self, baton): assert baton is not None # pacify pylint - with popen_or_die("git log --all --topo-order --reverse --format='%H %P'") as fp: + with popen_or_die("git log --all --date-order --reverse --format='%H %P'") as fp: for line in fp: fields = polystr(line).strip().split() self.revlist.append(fields[0]) -- GitLab From 592346dd02f709ca26be5d057509024220c7671a Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Mon, 7 Mar 2016 12:04:17 -0500 Subject: [PATCH 18/20] Add strip of each line when reading marks file in git extractor. --- reposurgeon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reposurgeon b/reposurgeon index d7faa87ea..8c190a00b 100755 --- a/reposurgeon +++ b/reposurgeon @@ -995,7 +995,7 @@ class GitExtractor(Extractor): # written and closed data = fp.read() with open(fname, "rb") as fp: - marks = dict(polystr(line).split() for line in fp) + marks = dict(polystr(line).strip().split() for line in fp) finally: os.remove(fname) if not (marks and data): -- GitLab From 7c872e95f2a03537adc93f9a6e219da718aeba17 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Mon, 7 Mar 2016 20:34:05 -0500 Subject: [PATCH 19/20] Extractor adds explicit fileops for files from all parents except first of a merge commit; fix be2.fi to correspond. --- reposurgeon | 5 ++++- test/be2.fi | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/reposurgeon b/reposurgeon index 8c190a00b..091cd4d5c 100755 --- a/reposurgeon +++ b/reposurgeon @@ -2994,7 +2994,10 @@ class RepoStreamer: msg = "" announce("r%s: comment '%s'" % (revision, msg.strip())) self.filemap[revision] = {} - for rev in parents: + # Git fast-import constructs the tree from the first parent only + # for a merge commit; fileops from all other parents have to be + # added explicitly + for rev in parents[:1]: self.filemap[revision].update(self.filemap[rev]) if present: removed = set(self.filemap[revision]) - set(present) diff --git a/test/be2.fi b/test/be2.fi index a388b178a..457f1287f 100644 --- a/test/be2.fi +++ b/test/be2.fi @@ -48,6 +48,7 @@ data 19 Merge test branch. from :6 merge :4 +M 100644 :3 testfile2 blob mark :8 -- GitLab From e95f3debd01692df03351a3012a9884a21be5dc3 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Mon, 7 Mar 2016 23:19:35 -0500 Subject: [PATCH 20/20] Close temp file handle after git extractor gets branch data. --- reposurgeon | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reposurgeon b/reposurgeon index 091cd4d5c..b2e161859 100755 --- a/reposurgeon +++ b/reposurgeon @@ -987,7 +987,7 @@ class GitExtractor(Extractor): # mapping of commits to branches, and we still want to test the # rest of the extractor logic independently, so here goes data = marks = None - _, fname = tempfile.mkstemp() + tfp, fname = tempfile.mkstemp() try: with popen_or_die("git fast-export --all --export-marks=%s" % fname) as fp: # We can't iterate line by line here because we need to be sure that @@ -997,6 +997,7 @@ class GitExtractor(Extractor): with open(fname, "rb") as fp: marks = dict(polystr(line).strip().split() for line in fp) finally: + os.close(tfp) os.remove(fname) if not (marks and data): raise Fatal("could not get branch information") -- GitLab