From 61ccf8b6ed166fe2f03b5790499173915036ae42 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 15 Mar 2016 00:51:17 -0400 Subject: [PATCH 01/32] Add hg be4 extractor test, commit coloring is incorrect. --- test/Makefile | 2 +- test/hg-be4-test | 92 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100755 test/hg-be4-test diff --git a/test/Makefile b/test/Makefile index bbfc76f59..713361971 100644 --- a/test/Makefile +++ b/test/Makefile @@ -165,7 +165,7 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches -HGBRANCHES = be be2 be3 +HGBRANCHES = be be2 be3 be4 hg-buildregress-branches: @for file in $(HGBRANCHES); do \ echo "Remaking $${file}.fi"; \ diff --git a/test/hg-be4-test b/test/hg-be4-test new file mode 100755 index 000000000..eec2919be --- /dev/null +++ b/test/hg-be4-test @@ -0,0 +1,92 @@ +#!/bin/sh +# +# Test reposurgeon branch naming issue with hg repo including merge and tags +# +# This test cannot use the usual hg-to-fi script because it +# needs an hg repo with actual hg branches, not hg bookmarks; +# the hg convert utility converts git branches in a fast-import +# stream to hg bookmarks, so the hg-regress test target only +# tests correct handling of hg bookmarks, not hg branches + +# Required because $PWD seems to be undefined in Gitlab's CI environment +BIN=`realpath ..` + +build=True +stream=True +cleanup=True + +pecho() { printf %s\\n "$*"; } +log() { pecho "$@"; } +error() { log "ERROR: $@" >&2; } +fatal() { error "$@"; exit 1; } +try() { "$@" || fatal "'$@' failed"; } + +while getopts nr opt +do + case $opt in + n) build=True; stream=False ; cleanup=False ;; + r) build=False; stream=True ; cleanup=False ;; + esac +done +shift $(($OPTIND - 1)) + +testrepo=${1:-/tmp/test-repo} + +USER='"J. Random Hacker" ' + +# Should we build the repo? +if [ $build = True ] +then + # Build hg test repo with multiple hg branches + try rm -fr $testrepo + try hg init $testrepo || exit 1 + try cd $testrepo >/dev/null + # The weird --date incantation in the hg commits is to ensure that the commit + # timestamps match those in the .fi file; the 18000 is because hg wants the time zone + # offset in seconds west of UTC, for what reason I know not--I know there are weird + # time zones in the world but I didn't think any of them got down to one-second + # granularity in offsets... + ( + try echo "Test file 1." > testfile1 + try hg add testfile1 >/dev/null + try hg commit --user "$USER" --date "1456976347 18000" -m "Commit test file 1." >/dev/null + try hg branch test >/dev/null + try echo "Test file 2." > testfile2 + try hg add testfile2 >/dev/null + try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 2." >/dev/null + try hg update default >/dev/null + try echo "Test file 3." > testfile3 + try hg add testfile3 >/dev/null + try hg commit --user "$USER" --date "1456976475 18000" -m "Commit test file 3." >/dev/null + try hg merge test >/dev/null + try hg commit --user "$USER" --date "1456976606 18000" -m "Merge test branch." >/dev/null + try hg update test >/dev/null + try echo "Test file 4." > testfile4 + try hg add testfile4 >/dev/null + try hg commit --user "$USER" --date "1456976715 18000" -m "Commit test file 4." >/dev/null + try hg tag --local 1.0a >/dev/null + try hg update default >/dev/null + try echo "Test file 5." > testfile5 + try hg add testfile5 >/dev/null + try hg commit --user "$USER" --date "1456976798 18000" -m "Commit test file 5." >/dev/null + try hg tag --local 1.0 >/dev/null + try echo "Second line." >> testfile5 + try hg commit --user "$USER" --date "1457895329 14400" -m "Add line to test file 5." >/dev/null + try hg update test >/dev/null + try echo "Second line." >> testfile4 + try hg commit --user "$USER" --date "1457895350 14400" -m "Add line to test file 4." >/dev/null + ) || exit 1 + try cd - >/dev/null +fi + +# Should we stream the repo? +if [ $stream = True ] +then + try ${BIN}/reposurgeon "read $testrepo" "sourcetype git" "write -" +fi + +# Should we clean up the test directory +if [ $cleanup = True ] +then + try rm -fr $testrepo +fi -- GitLab From 8d3856ac88e80829dcce681c0d100418203549cf Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 15 Mar 2016 01:18:32 -0400 Subject: [PATCH 02/32] Add bt hg extractor coloring test; hg extractor passes. --- test/Makefile | 2 +- test/hg-bt-test | 79 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100755 test/hg-bt-test diff --git a/test/Makefile b/test/Makefile index 713361971..752d069f2 100644 --- a/test/Makefile +++ b/test/Makefile @@ -165,7 +165,7 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches -HGBRANCHES = be be2 be3 be4 +HGBRANCHES = be be2 be3 bt be4 hg-buildregress-branches: @for file in $(HGBRANCHES); do \ echo "Remaking $${file}.fi"; \ diff --git a/test/hg-bt-test b/test/hg-bt-test new file mode 100755 index 000000000..2078976f9 --- /dev/null +++ b/test/hg-bt-test @@ -0,0 +1,79 @@ +#!/bin/sh +# +# Test reposurgeon branch naming issue with hg repo +# +# This test cannot use the usual hg-to-fi script because it +# needs an hg repo with actual hg branches, not hg bookmarks; +# the hg convert utility converts git branches in a fast-import +# stream to hg bookmarks, so the hg-regress test target only +# tests correct handling of hg bookmarks, not hg branches + +# Required because $PWD seems to be undefined in Gitlab's CI environment +BIN=`realpath ..` + +build=True +stream=True +cleanup=True + +pecho() { printf %s\\n "$*"; } +log() { pecho "$@"; } +error() { log "ERROR: $@" >&2; } +fatal() { error "$@"; exit 1; } +try() { "$@" || fatal "'$@' failed"; } + +while getopts nr opt +do + case $opt in + n) build=True; stream=False ; cleanup=False ;; + r) build=False; stream=True ; cleanup=False ;; + esac +done +shift $(($OPTIND - 1)) + +testrepo=${1:-/tmp/test-repo} + +USER='"J. Random Hacker" ' + +# Should we build the repo? +if [ $build = True ] +then + # Build hg test repo with multiple hg branches + try rm -fr $testrepo + try hg init $testrepo || exit 1 + try cd $testrepo >/dev/null + # The weird --date incantation in the hg commits is to ensure that the commit + # timestamps match those in the .fi file; the 18000 is because hg wants the time zone + # offset in seconds west of UTC, for what reason I know not--I know there are weird + # time zones in the world but I didn't think any of them got down to one-second + # granularity in offsets... + ( + try echo "Test file 1." > testfile1 + try hg add testfile1 >/dev/null + try hg commit --user "$USER" --date "1456976347 18000" -m "Commit test file 1." >/dev/null + try hg tag --local before >/dev/null + try echo "Test file 2." > testfile2 + try hg add testfile2 >/dev/null + try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 2." >/dev/null + try hg tag --local after >/dev/null + try hg update before >/dev/null + try echo "Test file 3." > testfile3 + try hg add testfile3 >/dev/null + try hg commit --user "$USER" --date "1456976475 18000" -m "Commit test file 3." >/dev/null + try hg tag --local --force before >/dev/null + try echo "Second line." >> testfile3 + try hg commit --user "$USER" --date "1456976542 18000" -m "Add line to test file 3." >/dev/null + ) || exit 1 + try cd - >/dev/null +fi + +# Should we stream the repo? +if [ $stream = True ] +then + try ${BIN}/reposurgeon "read $testrepo" "sourcetype git" "write -" +fi + +# Should we clean up the test directory +if [ $cleanup = True ] +then + try rm -fr $testrepo +fi -- GitLab From a5d896ed53bee663fbce702feb95d4ea6023850e Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 15 Mar 2016 01:21:16 -0400 Subject: [PATCH 03/32] Add bt2 git extractor test, small change from bt; git extractor fails. --- test/Makefile | 2 +- test/bt2.fi | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 test/bt2.fi diff --git a/test/Makefile b/test/Makefile index 752d069f2..fa0ba8841 100644 --- a/test/Makefile +++ b/test/Makefile @@ -133,7 +133,7 @@ coalesce: @rm -f /tmp/regress # Test the git extractor -GITLOADS = bs be be2 be3 be4 bt +GITLOADS = bs be be2 be3 be4 bt bt2 git-regress: @echo "=== Testing git-extractor:" @if command -v git >/dev/null 2>&1 ; \ diff --git a/test/bt2.fi b/test/bt2.fi new file mode 100644 index 000000000..dad959d4d --- /dev/null +++ b/test/bt2.fi @@ -0,0 +1,60 @@ +blob +mark :1 +data 13 +Test file 1. + +reset refs/tags/before +commit refs/tags/before +mark :2 +author "J. Random Hacker" 1456976347 -0500 +committer "J. Random Hacker" 1456976347 -0500 +data 20 +Commit test file 1. +M 100644 :1 testfile1 + +blob +mark :3 +data 13 +Test file 3. + +commit refs/tags/before +mark :4 +author "J. Random Hacker" 1456976408 -0500 +committer "J. Random Hacker" 1456976408 -0500 +data 20 +Commit test file 3. +from :2 +M 100644 :3 testfile3 + +blob +mark :5 +data 13 +Test file 2. + +commit refs/tags/after +mark :6 +author "J. Random Hacker" 1456976475 -0500 +committer "J. Random Hacker" 1456976475 -0500 +data 20 +Commit test file 2. +from :2 +M 100644 :5 testfile2 + +blob +mark :7 +data 26 +Test file 3. +Second line. + +commit refs/heads/master +mark :8 +author "J. Random Hacker" 1456976542 -0500 +committer "J. Random Hacker" 1456976542 -0500 +data 25 +Add line to test file 3. +from :4 +M 100644 :7 testfile3 + +reset refs/heads/master +from :8 + -- GitLab From 9941971371650bf8e200103ce7cd1e2ec46ff8de Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 15 Mar 2016 01:27:01 -0400 Subject: [PATCH 04/32] Add bt2 hg extractor test; hg extractor fails. --- test/Makefile | 2 +- test/hg-bt2-test | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) create mode 100755 test/hg-bt2-test diff --git a/test/Makefile b/test/Makefile index fa0ba8841..acc491337 100644 --- a/test/Makefile +++ b/test/Makefile @@ -165,7 +165,7 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches -HGBRANCHES = be be2 be3 bt be4 +HGBRANCHES = be be2 be3 bt bt2 be4 hg-buildregress-branches: @for file in $(HGBRANCHES); do \ echo "Remaking $${file}.fi"; \ diff --git a/test/hg-bt2-test b/test/hg-bt2-test new file mode 100755 index 000000000..c41655c6d --- /dev/null +++ b/test/hg-bt2-test @@ -0,0 +1,80 @@ +#!/bin/sh +# +# Test reposurgeon branch naming issue with hg repo +# +# This test cannot use the usual hg-to-fi script because it +# needs an hg repo with actual hg branches, not hg bookmarks; +# the hg convert utility converts git branches in a fast-import +# stream to hg bookmarks, so the hg-regress test target only +# tests correct handling of hg bookmarks, not hg branches + +# Required because $PWD seems to be undefined in Gitlab's CI environment +BIN=`realpath ..` + +build=True +stream=True +cleanup=True + +pecho() { printf %s\\n "$*"; } +log() { pecho "$@"; } +error() { log "ERROR: $@" >&2; } +fatal() { error "$@"; exit 1; } +try() { "$@" || fatal "'$@' failed"; } + +while getopts nr opt +do + case $opt in + n) build=True; stream=False ; cleanup=False ;; + r) build=False; stream=True ; cleanup=False ;; + esac +done +shift $(($OPTIND - 1)) + +testrepo=${1:-/tmp/test-repo} + +USER='"J. Random Hacker" ' + +# Should we build the repo? +if [ $build = True ] +then + # Build hg test repo with multiple hg branches + try rm -fr $testrepo + try hg init $testrepo || exit 1 + try cd $testrepo >/dev/null + # The weird --date incantation in the hg commits is to ensure that the commit + # timestamps match those in the .fi file; the 18000 is because hg wants the time zone + # offset in seconds west of UTC, for what reason I know not--I know there are weird + # time zones in the world but I didn't think any of them got down to one-second + # granularity in offsets... + ( + try echo "Test file 1." > testfile1 + try hg add testfile1 >/dev/null + try hg commit --user "$USER" --date "1456976347 18000" -m "Commit test file 1." >/dev/null + try hg tag --local after >/dev/null + try echo "Test file 3." > testfile3 + try hg add testfile3 >/dev/null + try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 3." >/dev/null + try hg tag --local before >/dev/null + try hg update after >/dev/null + try echo "Test file 2." > testfile2 + try hg add testfile2 >/dev/null + try hg commit --user "$USER" --date "1456976475 18000" -m "Commit test file 2." >/dev/null + try hg tag --local --force after >/dev/null + try hg update before >/dev/null + try echo "Second line." >> testfile3 + try hg commit --user "$USER" --date "1456976542 18000" -m "Add line to test file 3." >/dev/null + ) || exit 1 + try cd - >/dev/null +fi + +# Should we stream the repo? +if [ $stream = True ] +then + try ${BIN}/reposurgeon "read $testrepo" "sourcetype git" "write -" +fi + +# Should we clean up the test directory +if [ $cleanup = True ] +then + try rm -fr $testrepo +fi -- GitLab From 39f632cd8c8a55b8e2d4770374dddfab394ad7d0 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 15 Mar 2016 11:22:07 -0400 Subject: [PATCH 05/32] Add bb git extractor test, small change from be with no tags; git extractor fails. --- test/Makefile | 2 +- test/bb.fi | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 test/bb.fi diff --git a/test/Makefile b/test/Makefile index acc491337..8c09fe130 100644 --- a/test/Makefile +++ b/test/Makefile @@ -133,7 +133,7 @@ coalesce: @rm -f /tmp/regress # Test the git extractor -GITLOADS = bs be be2 be3 be4 bt bt2 +GITLOADS = bs be be2 be3 be4 bt bb bt2 git-regress: @echo "=== Testing git-extractor:" @if command -v git >/dev/null 2>&1 ; \ diff --git a/test/bb.fi b/test/bb.fi new file mode 100644 index 000000000..d4d86af00 --- /dev/null +++ b/test/bb.fi @@ -0,0 +1,48 @@ +blob +mark :1 +data 13 +Test file 1. + +reset refs/heads/master +commit refs/heads/master +mark :2 +author "J. Random Hacker" 1456976347 -0500 +committer "J. Random Hacker" 1456976347 -0500 +data 20 +Commit test file 1. +M 100644 :1 testfile1 + +blob +mark :3 +data 13 +Test file 3. + +commit refs/heads/master +mark :4 +author "J. Random Hacker" 1456976408 -0500 +committer "J. Random Hacker" 1456976408 -0500 +data 20 +Commit test file 3. +from :2 +M 100644 :3 testfile3 + +blob +mark :5 +data 13 +Test file 2. + +commit refs/heads/test +mark :6 +author "J. Random Hacker" 1456976475 -0500 +committer "J. Random Hacker" 1456976475 -0500 +data 20 +Commit test file 2. +from :2 +M 100644 :5 testfile2 + +reset refs/heads/master +from :4 + +reset refs/heads/test +from :6 + -- GitLab From 799c0f5cebdd9c73a9243fd13414e8929ba625d0 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 15 Mar 2016 11:30:34 -0400 Subject: [PATCH 06/32] Fix opening comments in hg bt and bt2 test scripts. --- test/hg-bt-test | 8 +++----- test/hg-bt2-test | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/test/hg-bt-test b/test/hg-bt-test index 2078976f9..07804f0e7 100755 --- a/test/hg-bt-test +++ b/test/hg-bt-test @@ -2,11 +2,9 @@ # # Test reposurgeon branch naming issue with hg repo # -# This test cannot use the usual hg-to-fi script because it -# needs an hg repo with actual hg branches, not hg bookmarks; -# the hg convert utility converts git branches in a fast-import -# stream to hg bookmarks, so the hg-regress test target only -# tests correct handling of hg bookmarks, not hg branches +# This test cannot use the usual hg-to-fi script because we +# want to test the behavior of the actual hg tag command and +# how its results appear to the hg extractor # Required because $PWD seems to be undefined in Gitlab's CI environment BIN=`realpath ..` diff --git a/test/hg-bt2-test b/test/hg-bt2-test index c41655c6d..2bce3a9c1 100755 --- a/test/hg-bt2-test +++ b/test/hg-bt2-test @@ -2,11 +2,9 @@ # # Test reposurgeon branch naming issue with hg repo # -# This test cannot use the usual hg-to-fi script because it -# needs an hg repo with actual hg branches, not hg bookmarks; -# the hg convert utility converts git branches in a fast-import -# stream to hg bookmarks, so the hg-regress test target only -# tests correct handling of hg bookmarks, not hg branches +# This test cannot use the usual hg-to-fi script because we +# want to test the behavior of the actual hg tag command and +# how its results appear to the hg extractor # Required because $PWD seems to be undefined in Gitlab's CI environment BIN=`realpath ..` -- GitLab From 2db5f0e2bd7c73e42d0b5e2ce12738fc54e1ce4f Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 15 Mar 2016 11:57:13 -0400 Subject: [PATCH 07/32] Fix to hg bb test; hg extractor passes. --- test/Makefile | 2 +- test/hg-bb-test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Makefile b/test/Makefile index 8c09fe130..4c96e54e8 100644 --- a/test/Makefile +++ b/test/Makefile @@ -165,7 +165,7 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches -HGBRANCHES = be be2 be3 bt bt2 be4 +HGBRANCHES = be be2 be3 bt bb bt2 be4 hg-buildregress-branches: @for file in $(HGBRANCHES); do \ echo "Remaking $${file}.fi"; \ diff --git a/test/hg-bb-test b/test/hg-bb-test index ee5be641c..5c30596a7 100755 --- a/test/hg-bb-test +++ b/test/hg-bb-test @@ -53,7 +53,7 @@ then try echo "Test file 3." > testfile3 try hg add testfile3 >/dev/null try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 3." >/dev/null - try hg update -r 1 >/dev/null + try hg update -r 0 >/dev/null try hg branch test >/dev/null try echo "Test file 2." > testfile2 try hg add testfile2 >/dev/null -- GitLab From a2920bc6b1ee3d1452921bbf9a9ed9c614d91e1b Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 15 Mar 2016 12:09:14 -0400 Subject: [PATCH 08/32] Fix hg bt and bt2 scripts to update by rev number. --- test/hg-bt-test | 5 ++--- test/hg-bt2-test | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/test/hg-bt-test b/test/hg-bt-test index 07804f0e7..a76771e0b 100755 --- a/test/hg-bt-test +++ b/test/hg-bt-test @@ -48,16 +48,15 @@ then try echo "Test file 1." > testfile1 try hg add testfile1 >/dev/null try hg commit --user "$USER" --date "1456976347 18000" -m "Commit test file 1." >/dev/null - try hg tag --local before >/dev/null try echo "Test file 2." > testfile2 try hg add testfile2 >/dev/null try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 2." >/dev/null try hg tag --local after >/dev/null - try hg update before >/dev/null + try hg update -r 0 >/dev/null try echo "Test file 3." > testfile3 try hg add testfile3 >/dev/null try hg commit --user "$USER" --date "1456976475 18000" -m "Commit test file 3." >/dev/null - try hg tag --local --force before >/dev/null + try hg tag --local before >/dev/null try echo "Second line." >> testfile3 try hg commit --user "$USER" --date "1456976542 18000" -m "Add line to test file 3." >/dev/null ) || exit 1 diff --git a/test/hg-bt2-test b/test/hg-bt2-test index 2bce3a9c1..b780e644f 100755 --- a/test/hg-bt2-test +++ b/test/hg-bt2-test @@ -48,16 +48,15 @@ then try echo "Test file 1." > testfile1 try hg add testfile1 >/dev/null try hg commit --user "$USER" --date "1456976347 18000" -m "Commit test file 1." >/dev/null - try hg tag --local after >/dev/null try echo "Test file 3." > testfile3 try hg add testfile3 >/dev/null try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 3." >/dev/null try hg tag --local before >/dev/null - try hg update after >/dev/null + try hg update -r 0 >/dev/null try echo "Test file 2." > testfile2 try hg add testfile2 >/dev/null try hg commit --user "$USER" --date "1456976475 18000" -m "Commit test file 2." >/dev/null - try hg tag --local --force after >/dev/null + try hg tag --local after >/dev/null try hg update before >/dev/null try echo "Second line." >> testfile3 try hg commit --user "$USER" --date "1456976542 18000" -m "Add line to test file 3." >/dev/null -- GitLab From 9400e34fa9baf19f7fc035d880186186beb52161 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 15 Mar 2016 23:23:43 -0400 Subject: [PATCH 09/32] Reorganize test Makefile to separate tag tests and branch tests and place failed tests at end of their target lists. --- test/Makefile | 65 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/test/Makefile b/test/Makefile index 4c96e54e8..a5f8a4c57 100644 --- a/test/Makefile +++ b/test/Makefile @@ -26,7 +26,8 @@ default: @setpython python all: listcheck roundtrip roundtrip-compress mailboxing fi-regress coalesce \ - git-regress hg-regress hg-regress-branches \ + git-regress git-regress-branches git-regress-tags \ + hg-regress hg-regress-branches hg-regress-tags \ svnload-regress legacy-regress svndump-regress \ repodiffer-regress repomapper-regress \ repotool-regress repocutter-regress @@ -133,7 +134,7 @@ coalesce: @rm -f /tmp/regress # Test the git extractor -GITLOADS = bs be be2 be3 be4 bt bb bt2 +GITLOADS = bs git-regress: @echo "=== Testing git-extractor:" @if command -v git >/dev/null 2>&1 ; \ @@ -148,6 +149,40 @@ git-regress: fi @rm -f /tmp/regress +# Test the git extractor with multiple git branches +# FIXME: bb fails (note that it passes with the hg extractor) +GITBRANCHES = be be2 be3 be4 bb +git-regress-branches: + @echo "=== Testing git-extractor with multiple git branches:" + @if command -v git >/dev/null 2>&1 ; \ + then \ + for test in $(GITBRANCHES); do \ + if (echo " $${test}.fi" >&2; \ + ./fi-to-fi) <$${test}.fi | sed -e 1d -e '/^#legacy-id/d' >/tmp/regress; \ + then diff --text -u $${test}.fi /tmp/regress || exit 1; \ + else echo "*** Nonzero return status on $${test}!"; exit 1; fi \ + done; \ + else echo " Skipped, git missing."; exit 0; \ + fi + @rm -f /tmp/regress + +# Test the git extractor with tags +# FIXME: bt2 fails +GITTAGS = bt bt2 +git-regress-tags: + @echo "=== Testing git-extractor with tags:" + @if command -v git >/dev/null 2>&1 ; \ + then \ + for test in $(GITTAGS); do \ + if (echo " $${test}.fi" >&2; \ + ./fi-to-fi) <$${test}.fi | sed -e 1d -e '/^#legacy-id/d' >/tmp/regress; \ + then diff --text -u $${test}.fi /tmp/regress || exit 1; \ + else echo "*** Nonzero return status on $${test}!"; exit 1; fi \ + done; \ + else echo " Skipped, git missing."; exit 0; \ + fi + @rm -f /tmp/regress + # Test the hg extractor HGLOADS = testrepo2 hg-regress: @@ -165,7 +200,8 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches -HGBRANCHES = be be2 be3 bt bb bt2 be4 +# FIXME: be4 fails (note that it passes with the git extractor) +HGBRANCHES = be be2 be3 bb be4 hg-buildregress-branches: @for file in $(HGBRANCHES); do \ echo "Remaking $${file}.fi"; \ @@ -186,6 +222,29 @@ hg-regress-branches: fi @rm -f /tmp/regress +# Test the hg extractor with tags +# FIXME: bt2 fails +HGTAGS = bt bt2 +hg-buildregress-tags: + @for file in $(HGTAGS); do \ + echo "Remaking $${file}.fi"; \ + ./hg-$${file}-test | sed -e 1d -e '/^#legacy-id/d' | sed -e '/^#reposurgeon sourcetype/d' >$${file}.fi \ + 2>&1 || exit 1; \ + done +hg-regress-tags: + @echo "=== Testing hg-extractor with tags:" + @if command -v hg >/dev/null 2>&1 ; \ + then \ + for test in $(HGTAGS); do \ + if (echo " $${test}" >&2; \ + ./hg-$${test}-test) | sed -e 1d -e '/^#legacy-id/d' >/tmp/regress; \ + then diff --text -u $${test}.fi /tmp/regress || exit 1; \ + else echo "*** Nonzero return status on $${test}!"; exit 1; fi \ + done; \ + else echo " Skipped, hg missing."; exit 0; \ + fi + @rm -f /tmp/regress + # Test loading from Subversion SVNLOADS := $(shell ls *.svn | sed '/.svn/s///') svnload-buildregress: -- GitLab From cf438f01a3edff7e30133bbca17e10c12e97fee9 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Wed, 16 Mar 2016 23:17:59 -0400 Subject: [PATCH 10/32] Add be5 test case for git and hg extractor, both fail; reorganize test Makefile to separate merge tests. --- test/Makefile | 50 ++++++++++++++++++-- test/be5.fi | 116 +++++++++++++++++++++++++++++++++++++++++++++++ test/hg-be5-test | 93 +++++++++++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+), 5 deletions(-) create mode 100644 test/be5.fi create mode 100755 test/hg-be5-test diff --git a/test/Makefile b/test/Makefile index a5f8a4c57..e2708b06e 100644 --- a/test/Makefile +++ b/test/Makefile @@ -26,8 +26,8 @@ default: @setpython python all: listcheck roundtrip roundtrip-compress mailboxing fi-regress coalesce \ - git-regress git-regress-branches git-regress-tags \ - hg-regress hg-regress-branches hg-regress-tags \ + git-regress git-regress-branches git-regress-merges git-regress-tags \ + hg-regress hg-regress-branches hg-regress-merges hg-regress-tags \ svnload-regress legacy-regress svndump-regress \ repodiffer-regress repomapper-regress \ repotool-regress repocutter-regress @@ -151,7 +151,7 @@ git-regress: # Test the git extractor with multiple git branches # FIXME: bb fails (note that it passes with the hg extractor) -GITBRANCHES = be be2 be3 be4 bb +GITBRANCHES = be bb git-regress-branches: @echo "=== Testing git-extractor with multiple git branches:" @if command -v git >/dev/null 2>&1 ; \ @@ -166,6 +166,23 @@ git-regress-branches: fi @rm -f /tmp/regress +# Test the git extractor with merges +# FIXME: be5 fails +GITMERGES = be2 be3 be4 be5 +git-regress-merges: + @echo "=== Testing git-extractor with merges:" + @if command -v git >/dev/null 2>&1 ; \ + then \ + for test in $(GITMERGES); do \ + if (echo " $${test}.fi" >&2; \ + ./fi-to-fi) <$${test}.fi | sed -e 1d -e '/^#legacy-id/d' >/tmp/regress; \ + then diff --text -u $${test}.fi /tmp/regress || exit 1; \ + else echo "*** Nonzero return status on $${test}!"; exit 1; fi \ + done; \ + else echo " Skipped, git missing."; exit 0; \ + fi + @rm -f /tmp/regress + # Test the git extractor with tags # FIXME: bt2 fails GITTAGS = bt bt2 @@ -200,8 +217,8 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches -# FIXME: be4 fails (note that it passes with the git extractor) -HGBRANCHES = be be2 be3 bb be4 +# FIXME: be5 fails (NOTE: be5 really belongs in hg-regress-merges below) +HGBRANCHES = be bb be5 hg-buildregress-branches: @for file in $(HGBRANCHES); do \ echo "Remaking $${file}.fi"; \ @@ -222,6 +239,29 @@ hg-regress-branches: fi @rm -f /tmp/regress +# Test the hg extractor with merges +# FIXME: be4 fails (note that it passes with the git extractor) +HGMERGES = be2 be3 be4 +hg-buildregress-merges: + @for file in $(HGMERGES); do \ + echo "Remaking $${file}.fi"; \ + ./hg-$${file}-test | sed -e 1d -e '/^#legacy-id/d' | sed -e '/^#reposurgeon sourcetype/d' >$${file}.fi \ + 2>&1 || exit 1; \ + done +hg-regress-merges: + @echo "=== Testing hg-extractor with merges:" + @if command -v hg >/dev/null 2>&1 ; \ + then \ + for test in $(HGMERGES); do \ + if (echo " $${test}" >&2; \ + ./hg-$${test}-test) | sed -e 1d -e '/^#legacy-id/d' >/tmp/regress; \ + then diff --text -u $${test}.fi /tmp/regress || exit 1; \ + else echo "*** Nonzero return status on $${test}!"; exit 1; fi \ + done; \ + else echo " Skipped, hg missing."; exit 0; \ + fi + @rm -f /tmp/regress + # Test the hg extractor with tags # FIXME: bt2 fails HGTAGS = bt bt2 diff --git a/test/be5.fi b/test/be5.fi new file mode 100644 index 000000000..e47d77acf --- /dev/null +++ b/test/be5.fi @@ -0,0 +1,116 @@ +blob +mark :1 +data 13 +Test file 1. + +reset refs/tags/1.0 +commit refs/tags/1.0 +mark :2 +author "J. Random Hacker" 1456976347 -0500 +committer "J. Random Hacker" 1456976347 -0500 +data 20 +Commit test file 1. +M 100644 :1 testfile1 + +blob +mark :3 +data 13 +Test file 3. + +commit refs/tags/1.0 +mark :4 +author "J. Random Hacker" 1456976408 -0500 +committer "J. Random Hacker" 1456976408 -0500 +data 20 +Commit test file 3. +from :2 +M 100644 :3 testfile3 + +blob +mark :5 +data 13 +Test file 2. + +commit refs/tags/1.0a +mark :6 +author "J. Random Hacker" 1456976475 -0500 +committer "J. Random Hacker" 1456976475 -0500 +data 20 +Commit test file 2. +from :2 +M 100644 :5 testfile2 + +commit refs/tags/1.0 +mark :7 +author "J. Random Hacker" 1456976606 -0500 +committer "J. Random Hacker" 1456976606 -0500 +data 19 +Merge test branch. +from :4 +merge :6 +M 100644 :5 testfile2 + +blob +mark :8 +data 13 +Test file 4. + +commit refs/tags/1.0a +mark :9 +author "J. Random Hacker" 1456976715 -0500 +committer "J. Random Hacker" 1456976715 -0500 +data 20 +Commit test file 4. +from :6 +M 100644 :8 testfile4 + +blob +mark :10 +data 13 +Test file 5. + +commit refs/tags/1.0 +mark :11 +author "J. Random Hacker" 1456976798 -0500 +committer "J. Random Hacker" 1456976798 -0500 +data 20 +Commit test file 5. +from :7 +M 100644 :10 testfile5 + +blob +mark :12 +data 26 +Test file 5. +Second line. + +commit refs/heads/master +mark :13 +author "J. Random Hacker" 1457895329 -0400 +committer "J. Random Hacker" 1457895329 -0400 +data 25 +Add line to test file 5. +from :11 +M 100644 :12 testfile5 + +blob +mark :14 +data 26 +Test file 4. +Second line. + +commit refs/heads/test +mark :15 +author "J. Random Hacker" 1457895350 -0400 +committer "J. Random Hacker" 1457895350 -0400 +data 25 +Add line to test file 4. +from :9 +M 100644 :14 testfile4 + +reset refs/heads/master +from :13 + +reset refs/heads/test +from :15 + diff --git a/test/hg-be5-test b/test/hg-be5-test new file mode 100755 index 000000000..b51ded3e2 --- /dev/null +++ b/test/hg-be5-test @@ -0,0 +1,93 @@ +#!/bin/sh +# +# Test reposurgeon branch naming issue with hg repo including merge and tags +# +# This test cannot use the usual hg-to-fi script because it +# needs an hg repo with actual hg branches, not hg bookmarks; +# the hg convert utility converts git branches in a fast-import +# stream to hg bookmarks, so the hg-regress test target only +# tests correct handling of hg bookmarks, not hg branches + +# Required because $PWD seems to be undefined in Gitlab's CI environment +BIN=`realpath ..` + +build=True +stream=True +cleanup=True + +pecho() { printf %s\\n "$*"; } +log() { pecho "$@"; } +error() { log "ERROR: $@" >&2; } +fatal() { error "$@"; exit 1; } +try() { "$@" || fatal "'$@' failed"; } + +while getopts nr opt +do + case $opt in + n) build=True; stream=False ; cleanup=False ;; + r) build=False; stream=True ; cleanup=False ;; + esac +done +shift $(($OPTIND - 1)) + +testrepo=${1:-/tmp/test-repo} + +USER='"J. Random Hacker" ' + +# Should we build the repo? +if [ $build = True ] +then + # Build hg test repo with multiple hg branches + try rm -fr $testrepo + try hg init $testrepo || exit 1 + try cd $testrepo >/dev/null + # The weird --date incantation in the hg commits is to ensure that the commit + # timestamps match those in the .fi file; the 18000 is because hg wants the time zone + # offset in seconds west of UTC, for what reason I know not--I know there are weird + # time zones in the world but I didn't think any of them got down to one-second + # granularity in offsets... + ( + try echo "Test file 1." > testfile1 + try hg add testfile1 >/dev/null + try hg commit --user "$USER" --date "1456976347 18000" -m "Commit test file 1." >/dev/null + try echo "Test file 3." > testfile3 + try hg add testfile3 >/dev/null + try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 3." >/dev/null + try hg update -r 0 >/dev/null + try hg branch test >/dev/null + try echo "Test file 2." > testfile2 + try hg add testfile2 >/dev/null + try hg commit --user "$USER" --date "1456976475 18000" -m "Commit test file 2." >/dev/null + try hg update default >/dev/null + try hg merge test >/dev/null + try hg commit --user "$USER" --date "1456976606 18000" -m "Merge test branch." >/dev/null + try hg update test >/dev/null + try echo "Test file 4." > testfile4 + try hg add testfile4 >/dev/null + try hg commit --user "$USER" --date "1456976715 18000" -m "Commit test file 4." >/dev/null + try hg tag --local 1.0a >/dev/null + try hg update default >/dev/null + try echo "Test file 5." > testfile5 + try hg add testfile5 >/dev/null + try hg commit --user "$USER" --date "1456976798 18000" -m "Commit test file 5." >/dev/null + try hg tag --local 1.0 >/dev/null + try echo "Second line." >> testfile5 + try hg commit --user "$USER" --date "1457895329 14400" -m "Add line to test file 5." >/dev/null + try hg update test >/dev/null + try echo "Second line." >> testfile4 + try hg commit --user "$USER" --date "1457895350 14400" -m "Add line to test file 4." >/dev/null + ) || exit 1 + try cd - >/dev/null +fi + +# Should we stream the repo? +if [ $stream = True ] +then + try ${BIN}/reposurgeon "read $testrepo" "sourcetype git" "write -" +fi + +# Should we clean up the test directory +if [ $cleanup = True ] +then + try rm -fr $testrepo +fi -- GitLab From aad54e57bb26cdae86460b22a441fecfc6013749 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Sun, 20 Mar 2016 23:42:58 -0400 Subject: [PATCH 11/32] Add be6 test case for git extractor to test coloring through merge; git extractor fails. --- test/Makefile | 4 +- test/be6.fi | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 test/be6.fi diff --git a/test/Makefile b/test/Makefile index e2708b06e..703bff32a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -167,8 +167,8 @@ git-regress-branches: @rm -f /tmp/regress # Test the git extractor with merges -# FIXME: be5 fails -GITMERGES = be2 be3 be4 be5 +# FIXME: be6 and be5 fail +GITMERGES = be2 be3 be4 be6 be5 git-regress-merges: @echo "=== Testing git-extractor with merges:" @if command -v git >/dev/null 2>&1 ; \ diff --git a/test/be6.fi b/test/be6.fi new file mode 100644 index 000000000..48987c094 --- /dev/null +++ b/test/be6.fi @@ -0,0 +1,116 @@ +blob +mark :1 +data 13 +Test file 1. + +reset refs/tags/1.0 +commit refs/tags/1.0 +mark :2 +author "J. Random Hacker" 1456976347 -0500 +committer "J. Random Hacker" 1456976347 -0500 +data 20 +Commit test file 1. +M 100644 :1 testfile1 + +blob +mark :3 +data 13 +Test file 2. + +commit refs/tags/1.0a +mark :4 +author "J. Random Hacker" 1456976408 -0500 +committer "J. Random Hacker" 1456976408 -0500 +data 20 +Commit test file 2. +from :2 +M 100644 :3 testfile2 + +blob +mark :5 +data 13 +Test file 3. + +commit refs/tags/1.0 +mark :6 +author "J. Random Hacker" 1456976475 -0500 +committer "J. Random Hacker" 1456976475 -0500 +data 20 +Commit test file 3. +from :2 +M 100644 :5 testfile3 + +blob +mark :7 +data 13 +Test file 4. + +commit refs/tags/1.0a +mark :8 +author "J. Random Hacker" 1456976606 -0500 +committer "J. Random Hacker" 1456976606 -0500 +data 20 +Commit test file 4. +from :4 +M 100644 :7 testfile4 + +commit refs/tags/1.0 +mark :9 +author "J. Random Hacker" 1456976715 -0500 +committer "J. Random Hacker" 1456976715 -0500 +data 19 +Merge test branch. +from :6 +merge :4 +M 100644 :3 testfile2 + +blob +mark :10 +data 13 +Test file 5. + +commit refs/tags/1.0 +mark :11 +author "J. Random Hacker" 1456976798 -0500 +committer "J. Random Hacker" 1456976798 -0500 +data 20 +Commit test file 5. +from :9 +M 100644 :10 testfile5 + +blob +mark :12 +data 26 +Test file 5. +Second line. + +commit refs/heads/master +mark :13 +author "J. Random Hacker" 1457895329 -0400 +committer "J. Random Hacker" 1457895329 -0400 +data 25 +Add line to test file 5. +from :11 +M 100644 :12 testfile5 + +blob +mark :14 +data 26 +Test file 4. +Second line. + +commit refs/heads/test +mark :15 +author "J. Random Hacker" 1457895350 -0400 +committer "J. Random Hacker" 1457895350 -0400 +data 25 +Add line to test file 4. +from :8 +M 100644 :14 testfile4 + +reset refs/heads/master +from :13 + +reset refs/heads/test +from :15 + -- GitLab From af8af0b01ac38d6592cf4c0b2f7aeda7345182b8 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 16:31:25 -0400 Subject: [PATCH 12/32] Make hg bb test fail the same way git bb test does. --- test/hg-bb-test | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/hg-bb-test b/test/hg-bb-test index 5c30596a7..397deb502 100755 --- a/test/hg-bb-test +++ b/test/hg-bb-test @@ -47,14 +47,16 @@ then # time zones in the world but I didn't think any of them got down to one-second # granularity in offsets... ( + try hg branch test >/dev/null try echo "Test file 1." > testfile1 try hg add testfile1 >/dev/null try hg commit --user "$USER" --date "1456976347 18000" -m "Commit test file 1." >/dev/null + try hg update -r 0 >/dev/null + try hg branch default >/dev/null try echo "Test file 3." > testfile3 try hg add testfile3 >/dev/null try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 3." >/dev/null - try hg update -r 0 >/dev/null - try hg branch test >/dev/null + try hg update test >/dev/null try echo "Test file 2." > testfile2 try hg add testfile2 >/dev/null try hg commit --user "$USER" --date "1456976475 18000" -m "Commit test file 2." >/dev/null -- GitLab From 4cadc71a7585ffb2d372ea24b7bdecb7b453fa22 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 16:37:04 -0400 Subject: [PATCH 13/32] Update fixme comments in test Makefile. --- test/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 703bff32a..3eb4cbfd1 100644 --- a/test/Makefile +++ b/test/Makefile @@ -150,7 +150,7 @@ git-regress: @rm -f /tmp/regress # Test the git extractor with multiple git branches -# FIXME: bb fails (note that it passes with the hg extractor) +# FIXME: bb fails GITBRANCHES = be bb git-regress-branches: @echo "=== Testing git-extractor with multiple git branches:" @@ -217,6 +217,7 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches +# FIXME: bb fails (same as git extractor) # FIXME: be5 fails (NOTE: be5 really belongs in hg-regress-merges below) HGBRANCHES = be bb be5 hg-buildregress-branches: -- GitLab From b52db3401ccb286ba2ffddff282508efb8f3cc55 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 16:53:05 -0400 Subject: [PATCH 14/32] Refactor extractor code to make checking for retrieval of (commit hash, branch name) pairs from repo generic. --- reposurgeon | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/reposurgeon b/reposurgeon index 48e9b762a..2a4860299 100755 --- a/reposurgeon +++ b/reposurgeon @@ -822,7 +822,12 @@ class Extractor(object): assert baton is not None # pacify pylint def color_branches(self, baton): "Color all commits with their branch name." - self._color_branches() + color_items = self._branch_color_items() + if color_items: + for h, color in color_items: + self.meta[h]['branch'] = color + else: + self._color_branches() uncolored = [revision for revision in self.revlist if 'branch' not in self.meta[revision]] if uncolored: if verbose >= 1: @@ -830,6 +835,12 @@ class Extractor(object): else: raise Fatal("some branches do not have local ref names.") assert baton is not None # pacify pylint + def _branch_color_items(self): + """Return iterable of (commit hash, branch name) pairs""" + # The default is that this information is not retrievable directly + # from the repo; if it is (e.g., for git), this method should be + # overridden to retrieve it + return None def _color_branches(self): """Color branches in the order the tips occur.""" # Note: this algorithm by itself is not correct. It is included @@ -981,7 +992,7 @@ class GitExtractor(Extractor): comment=comment, committish=objecthash)) self.refs["refs/tags/" + tag] = objecthash - def _color_branches(self): + def _branch_color_items(self): # This is really cheating since fast-export could give us the # whole repo, but it's the only way I've found to get the correct # mapping of commits to branches, and we still want to test the @@ -1013,15 +1024,14 @@ class GitExtractor(Extractor): branch = fields[1] elif (fields[0] == "mark") and (branch is not None): h = marks[fields[1]] - self.meta[h]['branch'] = branch + # This is a valid (commit hash, branch name) pair + yield (h, branch) branch = None elif branch is not None: # The mark line for a commit should always be the next line after # the commit line, so this should never happen, but we put it in # just in case raise Fatal("could not parse branch information") - # This should not be needed, but do it for completeness - Extractor._color_branches(self) def __metadata(self, rev, fmt): with popen_or_die("git log -1 --format='%s' %s" % (fmt, rev)) as fp: return polystr(fp.read())[:-1] -- GitLab From 1db5eb29fbbeea12dd813bee48441e26c888470e Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 18:40:40 -0400 Subject: [PATCH 15/32] Fix extractor coloring algorithm to emulate git by coloring a parent commit with multiple children from the latest child. --- reposurgeon | 101 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 38 deletions(-) diff --git a/reposurgeon b/reposurgeon index 2a4860299..db5183f34 100755 --- a/reposurgeon +++ b/reposurgeon @@ -795,6 +795,8 @@ class Extractor(object): self.meta = {} # commit -> {'ci':committer, 'ai':author, 'branch':color} self.refs = collections.OrderedDict() # 'refs/class/name' -> commit self.tags = [] # Tag objects (annotated tags only) + self.timestamps = {} # commit -> Unix timestamp + self.child_timestamps = {} # commit -> timestamp of latest child (used for coloring) def analyze(self, baton): "Analyze a repository for streaming." self.find_revision_ids(baton) @@ -824,9 +826,12 @@ class Extractor(object): "Color all commits with their branch name." color_items = self._branch_color_items() if color_items: + # If the repo will give us a complete list of (commit hash, branch name) pairs, + # use that to do the coloring (this will be the case for git) for h, color in color_items: self.meta[h]['branch'] = color else: + # Otherwise we have to emulate the git coloring algorithm self._color_branches() uncolored = [revision for revision in self.revlist if 'branch' not in self.meta[revision]] if uncolored: @@ -843,28 +848,58 @@ class Extractor(object): return None def _color_branches(self): """Color branches in the order the tips occur.""" - # Note: this algorithm by itself is not correct. It is included - # on the assumption that subclasses will override this method - # to first extract all branch information possible from the repo, - # and then use this algorithm only for coloring of commits that - # are, for whatever reason, not caught for refname, refobj in sorted(self.refs.items(), key=lambda ref: self.revlist.index(ref[1])): self._branch_color(refobj, refname) + def get_commit_timestamps(self): + """Return mapping of commit hash -> Unix timestamp""" + return None def _branch_color(self, rev, color): if rev.startswith("ref"): return - while 'branch' not in self.meta[rev]: + # Get the commit timestamps if not already there + if not self.timestamps: + self.timestamps = self.get_commit_timestamps() + if not self.timestamps: + raise Fatal("Could not retrieve commit timestamps.") + # This ensures that a branch tip rev never gets colored over + self.child_timestamps[rev] = sys.maxsize + # This is used below to ensure that a branch color is never colored + # back to a tag + is_branch = color.startswith('refs/heads/') + # No need for a condition here because we will only be starting + # this while loop from an initial call with a branch tip or from + # a recursive call with a parent we know we want to color; the + # loop exit is controlled by filtering out the parents that are + # already colored properly + while True: + timestamp = self.timestamps[rev] self.meta[rev]['branch'] = color - parents = self.get_parents(rev) + # We only want to color back to parents that don't have a branch + # assigned or whose assigned branch was from an earlier commit + # than the one we're coloring from now; this emulates the git + # algorithm that assigns the color of the latest child commit to + # a parent that has multiple children; note also that tags take + # precedence over branches, so we never color back to a tag with + # a branch color + parents = [p for p in self.get_parents(rev) + if ('branch' not in self.meta[p]) + or ((not (is_branch and self.meta[p]['branch'].startswith('refs/tags/'))) + and (self.child_timestamps.get(p, 0) < timestamp))] if not parents: break elif len(parents) == 1: # This case avoids blowing Python's stack by recursing # too deep on large repos. rev = parents[0] + # Mark the parent with the timestamp of the child it is + # being colored from + self.child_timestamps[rev] = timestamp else: for parent in parents: + # Mark each parent with the timestamp of the child it is + # being colored from + self.child_timestamps[parent] = timestamp self._branch_color(parent, color) break def pre_extract(self, repo): @@ -1065,6 +1100,7 @@ class HgExtractor(Extractor): def __init__(self): super(HgExtractor, self).__init__() self.hgclient = None + self.tags_found = False class _hg_or_die: def __init__(self, client, *cmdline): self.client = client @@ -1147,43 +1183,32 @@ class HgExtractor(Extractor): if n == 'tip': # pseudo-tag for most recent commit continue # We don't want it self.refs['refs/tags/%s'%n] = h + self.tags_found = True # We have no annotated tags, so self.tags = [] # Conceivably it might be better to treat the commit message that # creates the tag as an annotation, but that's a job for the surgeon # later, not the extractor now. - def _color_branches(self): - # Hg stores branch and tag info in the metadata for each commit, - # so we just need to get it using hg log; to match the behavior of - # git fast-export, tags take precedence over branches, and we don't - # color branches here because they should only appear for the branch - # tip commits and their ancestors back to the first tag, which will - # be taken care of by - tags_found = [] - with self.hg_or_die("log", "--template", "{node|short} {tags}\\n") as fp: + def _hg_branch_items(self): + with self.hg_or_die("log", "--template", "{node|short} {branch}\\n") as fp: + for line in fp: + h, branch = polystr(line).strip().split() + yield (h, "refs/heads/" + branch) + def _branch_color_items(self): + if not self.tags_found: + # If we didn't find any tags, we can safely color commits using + # hg branch names, since hg stores them with commit metadata + return self._hg_branch_items() + # Otherwise we have to use the emulated git algorithm since git + # prioritizes tags over branches when coloring + return None + def get_commit_timestamps(self): + """Return mapping of commit hash -> Unix timestamp""" + timestamps = {} + with self.hg_or_die("log", "--template", "{node|short} {date|hgdate}\\n") as fp: for line in fp: fields = polystr(line).strip().split() - if (len(fields) > 1) and (fields[1] != "tip"): - h = fields[0] - tag = fields[1] - self.meta[h]['branch'] = "refs/tags/" + tag - tags_found.append((h, tag)) - if tags_found: - # If we found tags, we need to color the commits in between them, which - # won't be labeled with a tag in the hg log output above - for h, tag in tags_found: - for parent in self.get_parents(h): - self._branch_color(parent, "refs/tags/" + tag) - else: - # If we didn't find any tags, we need to go back and color the commits - # using branch names; we can't depend on the superclass algorithm to - # do this correctly - with self.hg_or_die("log", "--template", "{node|short} {branch}\\n") as fp: - for line in fp: - h, branch = polystr(line).strip().split() - self.meta[h]['branch'] = "refs/heads/" + branch - # This will take care of branch tip commits and their ancestors back - # to the first tag, if we found tags above; otherwise it should be a no-op - Extractor._color_branches(self) + timestamps[fields[0]] = int(fields[1]) + return timestamps def post_extract(self, repo): super(HgExtractor, self).post_extract(repo) self.hg_capture("update", "-C", "tip") -- GitLab From dae0b63093344bb2483f31b70d3df0e6f3c9e6a2 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 18:45:53 -0400 Subject: [PATCH 16/32] Rebuild bb, be5, and bt2 test .fi files to match git coloring algorithm. --- test/bb.fi | 4 ++-- test/be5.fi | 4 ++-- test/bt2.fi | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/bb.fi b/test/bb.fi index d4d86af00..b7f714fdd 100644 --- a/test/bb.fi +++ b/test/bb.fi @@ -3,8 +3,8 @@ mark :1 data 13 Test file 1. -reset refs/heads/master -commit refs/heads/master +reset refs/heads/test +commit refs/heads/test mark :2 author "J. Random Hacker" 1456976347 -0500 committer "J. Random Hacker" 1456976347 -0500 diff --git a/test/be5.fi b/test/be5.fi index e47d77acf..9fda81317 100644 --- a/test/be5.fi +++ b/test/be5.fi @@ -3,8 +3,8 @@ mark :1 data 13 Test file 1. -reset refs/tags/1.0 -commit refs/tags/1.0 +reset refs/tags/1.0a +commit refs/tags/1.0a mark :2 author "J. Random Hacker" 1456976347 -0500 committer "J. Random Hacker" 1456976347 -0500 diff --git a/test/bt2.fi b/test/bt2.fi index dad959d4d..d35844f38 100644 --- a/test/bt2.fi +++ b/test/bt2.fi @@ -3,8 +3,8 @@ mark :1 data 13 Test file 1. -reset refs/tags/before -commit refs/tags/before +reset refs/tags/after +commit refs/tags/after mark :2 author "J. Random Hacker" 1456976347 -0500 committer "J. Random Hacker" 1456976347 -0500 -- GitLab From fa0cd6d7067621fe4740b1ff5e09543f956d6606 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 18:55:30 -0400 Subject: [PATCH 17/32] Add hg be6 test. --- test/hg-be6-test | 93 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100755 test/hg-be6-test diff --git a/test/hg-be6-test b/test/hg-be6-test new file mode 100755 index 000000000..cdc467471 --- /dev/null +++ b/test/hg-be6-test @@ -0,0 +1,93 @@ +#!/bin/sh +# +# Test reposurgeon branch naming issue with hg repo including merge and tags +# +# This test cannot use the usual hg-to-fi script because it +# needs an hg repo with actual hg branches, not hg bookmarks; +# the hg convert utility converts git branches in a fast-import +# stream to hg bookmarks, so the hg-regress test target only +# tests correct handling of hg bookmarks, not hg branches + +# Required because $PWD seems to be undefined in Gitlab's CI environment +BIN=`realpath ..` + +build=True +stream=True +cleanup=True + +pecho() { printf %s\\n "$*"; } +log() { pecho "$@"; } +error() { log "ERROR: $@" >&2; } +fatal() { error "$@"; exit 1; } +try() { "$@" || fatal "'$@' failed"; } + +while getopts nr opt +do + case $opt in + n) build=True; stream=False ; cleanup=False ;; + r) build=False; stream=True ; cleanup=False ;; + esac +done +shift $(($OPTIND - 1)) + +testrepo=${1:-/tmp/test-repo} + +USER='"J. Random Hacker" ' + +# Should we build the repo? +if [ $build = True ] +then + # Build hg test repo with multiple hg branches + try rm -fr $testrepo + try hg init $testrepo || exit 1 + try cd $testrepo >/dev/null + # The weird --date incantation in the hg commits is to ensure that the commit + # timestamps match those in the .fi file; the 18000 is because hg wants the time zone + # offset in seconds west of UTC, for what reason I know not--I know there are weird + # time zones in the world but I didn't think any of them got down to one-second + # granularity in offsets... + ( + try echo "Test file 1." > testfile1 + try hg add testfile1 >/dev/null + try hg commit --user "$USER" --date "1456976347 18000" -m "Commit test file 1." >/dev/null + try hg branch test >/dev/null + try echo "Test file 2." > testfile2 + try hg add testfile2 >/dev/null + try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 2." >/dev/null + try hg update default >/dev/null + try echo "Test file 3." > testfile3 + try hg add testfile3 >/dev/null + try hg commit --user "$USER" --date "1456976475 18000" -m "Commit test file 3." >/dev/null + try hg update test >/dev/null + try echo "Test file 4." > testfile4 + try hg add testfile4 >/dev/null + try hg commit --user "$USER" --date "1456976606 18000" -m "Commit test file 4." >/dev/null + try hg tag --local 1.0a >/dev/null + try hg update default >/dev/null + try hg merge -r 1 >/dev/null + try hg commit --user "$USER" --date "1456976715 18000" -m "Merge test branch." >/dev/null + try hg update default >/dev/null + try echo "Test file 5." > testfile5 + try hg add testfile5 >/dev/null + try hg commit --user "$USER" --date "1456976798 18000" -m "Commit test file 5." >/dev/null + try hg tag --local 1.0 >/dev/null + try echo "Second line." >> testfile5 + try hg commit --user "$USER" --date "1457895329 14400" -m "Add line to test file 5." >/dev/null + try hg update test >/dev/null + try echo "Second line." >> testfile4 + try hg commit --user "$USER" --date "1457895350 14400" -m "Add line to test file 4." >/dev/null + ) || exit 1 + try cd - >/dev/null +fi + +# Should we stream the repo? +if [ $stream = True ] +then + try ${BIN}/reposurgeon "read $testrepo" "sourcetype git" "write -" +fi + +# Should we clean up the test directory +if [ $cleanup = True ] +then + try rm -fr $testrepo +fi -- GitLab From f8a72c60894c325d9139eefa61b42a5b86506343 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 18:56:03 -0400 Subject: [PATCH 18/32] Rebuild be6 test .fi file to match git coloring algorithm. --- test/be6.fi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/be6.fi b/test/be6.fi index 48987c094..061dc185d 100644 --- a/test/be6.fi +++ b/test/be6.fi @@ -17,7 +17,7 @@ mark :3 data 13 Test file 2. -commit refs/tags/1.0a +commit refs/tags/1.0 mark :4 author "J. Random Hacker" 1456976408 -0500 committer "J. Random Hacker" 1456976408 -0500 -- GitLab From 5ab9b36e68782862df0ad1b7a9cacce361301de2 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 19:08:45 -0400 Subject: [PATCH 19/32] Remove FIXME comments from test Makefile and organize extractor tests logically. All tests pass. --- test/Makefile | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/test/Makefile b/test/Makefile index 3eb4cbfd1..89189dc20 100644 --- a/test/Makefile +++ b/test/Makefile @@ -150,7 +150,6 @@ git-regress: @rm -f /tmp/regress # Test the git extractor with multiple git branches -# FIXME: bb fails GITBRANCHES = be bb git-regress-branches: @echo "=== Testing git-extractor with multiple git branches:" @@ -167,8 +166,7 @@ git-regress-branches: @rm -f /tmp/regress # Test the git extractor with merges -# FIXME: be6 and be5 fail -GITMERGES = be2 be3 be4 be6 be5 +GITMERGES = be2 be3 be4 be5 be6 git-regress-merges: @echo "=== Testing git-extractor with merges:" @if command -v git >/dev/null 2>&1 ; \ @@ -184,7 +182,6 @@ git-regress-merges: @rm -f /tmp/regress # Test the git extractor with tags -# FIXME: bt2 fails GITTAGS = bt bt2 git-regress-tags: @echo "=== Testing git-extractor with tags:" @@ -217,9 +214,7 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches -# FIXME: bb fails (same as git extractor) -# FIXME: be5 fails (NOTE: be5 really belongs in hg-regress-merges below) -HGBRANCHES = be bb be5 +HGBRANCHES = be bb hg-buildregress-branches: @for file in $(HGBRANCHES); do \ echo "Remaking $${file}.fi"; \ @@ -241,8 +236,7 @@ hg-regress-branches: @rm -f /tmp/regress # Test the hg extractor with merges -# FIXME: be4 fails (note that it passes with the git extractor) -HGMERGES = be2 be3 be4 +HGMERGES = be2 be3 be4 be5 be6 hg-buildregress-merges: @for file in $(HGMERGES); do \ echo "Remaking $${file}.fi"; \ @@ -264,7 +258,6 @@ hg-regress-merges: @rm -f /tmp/regress # Test the hg extractor with tags -# FIXME: bt2 fails HGTAGS = bt bt2 hg-buildregress-tags: @for file in $(HGTAGS); do \ -- GitLab From b9be6cf7ce66b3c9ce3211386e08067a7a8baf97 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 20:05:23 -0400 Subject: [PATCH 20/32] Make sure that only branch tips have a child timestamp set to sys.maxsize. --- reposurgeon | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reposurgeon b/reposurgeon index db5183f34..c4fd4ca99 100755 --- a/reposurgeon +++ b/reposurgeon @@ -863,7 +863,8 @@ class Extractor(object): if not self.timestamps: raise Fatal("Could not retrieve commit timestamps.") # This ensures that a branch tip rev never gets colored over - self.child_timestamps[rev] = sys.maxsize + if rev not in self.child_timestamps: + self.child_timestamps[rev] = sys.maxsize # This is used below to ensure that a branch color is never colored # back to a tag is_branch = color.startswith('refs/heads/') -- GitLab From 01339982745fc2f064f984fbc72d61b8bf4bd541 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 22:10:27 -0400 Subject: [PATCH 21/32] Add comment on branch coloring algorithm. --- reposurgeon | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/reposurgeon b/reposurgeon index c4fd4ca99..26de6def1 100755 --- a/reposurgeon +++ b/reposurgeon @@ -848,6 +848,11 @@ class Extractor(object): return None def _color_branches(self): """Color branches in the order the tips occur.""" + # This algorithm is intended to emulate git's coloring algorithm; + # note that this includes emulating the fact that git's algorithm + # is not lossless--that is, it is possible to construct a git + # fast-import stream that git cannot reproduce on output with + # git fast-export for refname, refobj in sorted(self.refs.items(), key=lambda ref: self.revlist.index(ref[1])): self._branch_color(refobj, refname) -- GitLab From 88504d932506eedf79b0afbf0ca763745e67ab9a Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 22:16:02 -0400 Subject: [PATCH 22/32] Remove unneeded check in branch coloring code. --- reposurgeon | 2 -- 1 file changed, 2 deletions(-) diff --git a/reposurgeon b/reposurgeon index 26de6def1..49b40cb9e 100755 --- a/reposurgeon +++ b/reposurgeon @@ -860,8 +860,6 @@ class Extractor(object): """Return mapping of commit hash -> Unix timestamp""" return None def _branch_color(self, rev, color): - if rev.startswith("ref"): - return # Get the commit timestamps if not already there if not self.timestamps: self.timestamps = self.get_commit_timestamps() -- GitLab From 61e5262c61b05544ed27cdea80ebfb476f4b1028 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 22:21:28 -0400 Subject: [PATCH 23/32] Move commit timestamp retrieval to before ref iteration in branch coloring code. --- reposurgeon | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/reposurgeon b/reposurgeon index 49b40cb9e..17311e0d2 100755 --- a/reposurgeon +++ b/reposurgeon @@ -853,6 +853,10 @@ class Extractor(object): # is not lossless--that is, it is possible to construct a git # fast-import stream that git cannot reproduce on output with # git fast-export + # First retrieve the commit timestamps + self.timestamps = self.get_commit_timestamps() + if not self.timestamps: + raise Fatal("Could not retrieve commit timestamps.") for refname, refobj in sorted(self.refs.items(), key=lambda ref: self.revlist.index(ref[1])): self._branch_color(refobj, refname) @@ -860,11 +864,6 @@ class Extractor(object): """Return mapping of commit hash -> Unix timestamp""" return None def _branch_color(self, rev, color): - # Get the commit timestamps if not already there - if not self.timestamps: - self.timestamps = self.get_commit_timestamps() - if not self.timestamps: - raise Fatal("Could not retrieve commit timestamps.") # This ensures that a branch tip rev never gets colored over if rev not in self.child_timestamps: self.child_timestamps[rev] = sys.maxsize -- GitLab From a1902027ee414b3fefb770ebdbc9277e6baff7f0 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 22:29:39 -0400 Subject: [PATCH 24/32] Only construct timestamp dicts if needed for branch coloring. --- reposurgeon | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/reposurgeon b/reposurgeon index 17311e0d2..8d22d04bf 100755 --- a/reposurgeon +++ b/reposurgeon @@ -795,8 +795,8 @@ class Extractor(object): self.meta = {} # commit -> {'ci':committer, 'ai':author, 'branch':color} self.refs = collections.OrderedDict() # 'refs/class/name' -> commit self.tags = [] # Tag objects (annotated tags only) - self.timestamps = {} # commit -> Unix timestamp - self.child_timestamps = {} # commit -> timestamp of latest child (used for coloring) + self.timestamps = None # if used, commit -> Unix timestamp + self.child_timestamps = None # if used, commit -> timestamp of latest child (used for coloring) def analyze(self, baton): "Analyze a repository for streaming." self.find_revision_ids(baton) @@ -853,10 +853,12 @@ class Extractor(object): # is not lossless--that is, it is possible to construct a git # fast-import stream that git cannot reproduce on output with # git fast-export - # First retrieve the commit timestamps + # First retrieve the commit timestamps, they are used in _branch_color below self.timestamps = self.get_commit_timestamps() if not self.timestamps: raise Fatal("Could not retrieve commit timestamps.") + # This will be used in _branch_color below + self.child_timestamps = {} for refname, refobj in sorted(self.refs.items(), key=lambda ref: self.revlist.index(ref[1])): self._branch_color(refobj, refname) -- GitLab From 31482f967516534817d1b9c4e139c60d80af03cf Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 22:30:23 -0400 Subject: [PATCH 25/32] Add comment on which extractor methods must be overridden. --- reposurgeon | 2 ++ 1 file changed, 2 insertions(+) diff --git a/reposurgeon b/reposurgeon index 8d22d04bf..9860bdf83 100755 --- a/reposurgeon +++ b/reposurgeon @@ -864,6 +864,8 @@ class Extractor(object): self._branch_color(refobj, refname) def get_commit_timestamps(self): """Return mapping of commit hash -> Unix timestamp""" + # If _branch_color_items above is not overridden, this method + # must be (e.g., for hg) return None def _branch_color(self, rev, color): # This ensures that a branch tip rev never gets colored over -- GitLab From 9e43f4d91ff0be0a7537b2bf8371d2546af4a968 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Tue, 22 Mar 2016 23:46:15 -0400 Subject: [PATCH 26/32] Add note on _branch_color_items extractor method. --- reposurgeon | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reposurgeon b/reposurgeon index 9860bdf83..fb0031f6c 100755 --- a/reposurgeon +++ b/reposurgeon @@ -844,7 +844,8 @@ class Extractor(object): """Return iterable of (commit hash, branch name) pairs""" # The default is that this information is not retrievable directly # from the repo; if it is (e.g., for git), this method should be - # overridden to retrieve it + # overridden to retrieve it--note that every commit in the repo must + # be included return None def _color_branches(self): """Color branches in the order the tips occur.""" -- GitLab From c00e8bbf32ad79570bfc0b466cfa43635077c28c Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Wed, 23 Mar 2016 00:09:54 -0400 Subject: [PATCH 27/32] Add hg-bb-alt script to demonstrate loss of information in hg to git transition. --- test/hg-bb-alt | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100755 test/hg-bb-alt diff --git a/test/hg-bb-alt b/test/hg-bb-alt new file mode 100755 index 000000000..ac57c65cc --- /dev/null +++ b/test/hg-bb-alt @@ -0,0 +1,79 @@ +#!/bin/sh +# Demonstration of information loss in hg to git transition + +# This script constructs an hg repo that, when written to a git +# fast-import stream by reposurgeon and used to initialize a git +# repository, cannot be reproduced by git fast-export. This is +# because git does not store explicit branch information for +# every commit the way hg does; instead, it computes refs for +# each commit using a coloring algorithm that cannot properly +# reconstruct refs for a parent commit that has more than one +# child and does not have the same ref as its latest child. That +# is the case for the first commit in this repo. + +# Required because $PWD seems to be undefined in Gitlab's CI environment +BIN=`realpath ..` + +build=True +stream=True +cleanup=True + +pecho() { printf %s\\n "$*"; } +log() { pecho "$@"; } +error() { log "ERROR: $@" >&2; } +fatal() { error "$@"; exit 1; } +try() { "$@" || fatal "'$@' failed"; } + +while getopts nr opt +do + case $opt in + n) build=True; stream=False ; cleanup=False ;; + r) build=False; stream=True ; cleanup=False ;; + esac +done +shift $(($OPTIND - 1)) + +testrepo=${1:-/tmp/test-repo} + +USER='"J. Random Hacker" ' + +# Should we build the repo? +if [ $build = True ] +then + # Build hg test repo with multiple hg branches + try rm -fr $testrepo + try hg init $testrepo || exit 1 + try cd $testrepo >/dev/null + # The weird --date incantation in the hg commits is to ensure that the commit + # timestamps match those in the .fi file; the 18000 is because hg wants the time zone + # offset in seconds west of UTC, for what reason I know not--I know there are weird + # time zones in the world but I didn't think any of them got down to one-second + # granularity in offsets... + ( + try echo "Test file 1." > testfile1 + try hg add testfile1 >/dev/null + try hg commit --user "$USER" --date "1456976347 18000" -m "Commit test file 1." >/dev/null + try echo "Test file 3." > testfile3 + try hg add testfile3 >/dev/null + try hg commit --user "$USER" --date "1456976408 18000" -m "Commit test file 3." >/dev/null + try hg update -r 0 >/dev/null + try hg branch test >/dev/null + try echo "Test file 2." > testfile2 + try hg add testfile2 >/dev/null + try hg commit --user "$USER" --date "1456976475 18000" -m "Commit test file 2." >/dev/null + try hg update default >/dev/null + ) || exit 1 + try cd - >/dev/null +fi + +# Should we stream the repo? +if [ $stream = True ] +then + try ${BIN}/reposurgeon "read $testrepo" "sourcetype git" "write -" +fi + +# Should we clean up the test directory +if [ $cleanup = True ] +then + try rm -fr $testrepo +fi -- GitLab From d19bab17f8dbb19295b3500fdd0d0319ded12154 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Wed, 23 Mar 2016 00:36:52 -0400 Subject: [PATCH 28/32] Add note on hg branch coloring. --- reposurgeon | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/reposurgeon b/reposurgeon index fb0031f6c..3a6dc6ba6 100755 --- a/reposurgeon +++ b/reposurgeon @@ -1204,7 +1204,11 @@ class HgExtractor(Extractor): def _branch_color_items(self): if not self.tags_found: # If we didn't find any tags, we can safely color commits using - # hg branch names, since hg stores them with commit metadata + # hg branch names, since hg stores them with commit metadata; + # note, however, that the coloring this will produce might not + # be reproducible if the repo is written to a fast-import stream + # and used to construct a git repo, because hg branches can store + # colorings that do not match the git coloring algorithm return self._hg_branch_items() # Otherwise we have to use the emulated git algorithm since git # prioritizes tags over branches when coloring -- GitLab From 3f3455bc001711d97faf8918689da8ab8e2d4698 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Wed, 23 Mar 2016 01:14:34 -0400 Subject: [PATCH 29/32] Use actual hg branches for coloring commits that are not colored by tags. --- reposurgeon | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/reposurgeon b/reposurgeon index 3a6dc6ba6..c47904081 100755 --- a/reposurgeon +++ b/reposurgeon @@ -859,7 +859,7 @@ class Extractor(object): if not self.timestamps: raise Fatal("Could not retrieve commit timestamps.") # This will be used in _branch_color below - self.child_timestamps = {} + self.child_timestamps = self._init_child_timestamps() for refname, refobj in sorted(self.refs.items(), key=lambda ref: self.revlist.index(ref[1])): self._branch_color(refobj, refname) @@ -868,6 +868,12 @@ class Extractor(object): # If _branch_color_items above is not overridden, this method # must be (e.g., for hg) return None + def _init_child_timestamps(self): + """Return initial mapping of commit hash -> timestamp of child it is colored from""" + # This method should only be overridden if it is needed to enforce a + # commit coloring that might not match the git algorithm (e.g., in hg + # when actual hg branches are present) + return {} def _branch_color(self, rev, color): # This ensures that a branch tip rev never gets colored over if rev not in self.child_timestamps: @@ -1221,6 +1227,23 @@ class HgExtractor(Extractor): fields = polystr(line).strip().split() timestamps[fields[0]] = int(fields[1]) return timestamps + def _init_child_timestamps(self): + """Return initial mapping of commit hash -> timestamp of child it is colored from""" + results = {} + for h, branch in self._hg_branch_items(): + # Fill in the branch as a default + self.meta[h]['branch'] = branch + # Branch tips can't be colored, but ancestors can be if tags are present, + # so we only fill in the branch tips here + if self.refs[branch] == h: + results[h] = sys.maxsize + return results + def _branch_color(self, rev, color): + # Branches are not colored here (they already were in init_child_timestamps above) + if color.startswith('refs/heads/'): + return + # This takes care of coloring tags and their ancestors + Extractor._branch_color(self, rev, color) def post_extract(self, repo): super(HgExtractor, self).post_extract(repo) self.hg_capture("update", "-C", "tip") -- GitLab From 615fe92887a3974571870dbdc0807b54ba67ac11 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Wed, 23 Mar 2016 01:18:48 -0400 Subject: [PATCH 30/32] Add bb-alt test demonstrating hg repo with coloring that git cannot reproduce. --- test/Makefile | 3 +- test/bb-alt.fi | 48 ++++++++++++++++++++++++++++++ test/{hg-bb-alt => hg-bb-alt-test} | 0 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 test/bb-alt.fi rename test/{hg-bb-alt => hg-bb-alt-test} (100%) diff --git a/test/Makefile b/test/Makefile index 89189dc20..430e3a22d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -214,7 +214,8 @@ hg-regress: @rm -f /tmp/regress # Test the hg extractor with multiple hg branches -HGBRANCHES = be bb +# NOTE: the bb-alt test demonstrates an hg repo with coloring that git cannot reproduce +HGBRANCHES = be bb bb-alt hg-buildregress-branches: @for file in $(HGBRANCHES); do \ echo "Remaking $${file}.fi"; \ diff --git a/test/bb-alt.fi b/test/bb-alt.fi new file mode 100644 index 000000000..d4d86af00 --- /dev/null +++ b/test/bb-alt.fi @@ -0,0 +1,48 @@ +blob +mark :1 +data 13 +Test file 1. + +reset refs/heads/master +commit refs/heads/master +mark :2 +author "J. Random Hacker" 1456976347 -0500 +committer "J. Random Hacker" 1456976347 -0500 +data 20 +Commit test file 1. +M 100644 :1 testfile1 + +blob +mark :3 +data 13 +Test file 3. + +commit refs/heads/master +mark :4 +author "J. Random Hacker" 1456976408 -0500 +committer "J. Random Hacker" 1456976408 -0500 +data 20 +Commit test file 3. +from :2 +M 100644 :3 testfile3 + +blob +mark :5 +data 13 +Test file 2. + +commit refs/heads/test +mark :6 +author "J. Random Hacker" 1456976475 -0500 +committer "J. Random Hacker" 1456976475 -0500 +data 20 +Commit test file 2. +from :2 +M 100644 :5 testfile2 + +reset refs/heads/master +from :4 + +reset refs/heads/test +from :6 + diff --git a/test/hg-bb-alt b/test/hg-bb-alt-test similarity index 100% rename from test/hg-bb-alt rename to test/hg-bb-alt-test -- GitLab From 7cfa6aa66c278a8207a83246ce53d994ebc3843b Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Wed, 23 Mar 2016 01:24:10 -0400 Subject: [PATCH 31/32] Fix up comments on hg coloring. --- reposurgeon | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/reposurgeon b/reposurgeon index c47904081..508a1a26e 100755 --- a/reposurgeon +++ b/reposurgeon @@ -1209,15 +1209,18 @@ class HgExtractor(Extractor): yield (h, "refs/heads/" + branch) def _branch_color_items(self): if not self.tags_found: - # If we didn't find any tags, we can safely color commits using + # If we didn't find any tags, we can safely color all commits using # hg branch names, since hg stores them with commit metadata; # note, however, that the coloring this will produce might not # be reproducible if the repo is written to a fast-import stream # and used to construct a git repo, because hg branches can store # colorings that do not match the git coloring algorithm return self._hg_branch_items() - # Otherwise we have to use the emulated git algorithm since git - # prioritizes tags over branches when coloring + # Otherwise we have to use the emulated git algorithm to color + # any commits that are tags or the ancestors of tags, since git + # prioritizes tags over branches when coloring; we will color + # commits that are not in the ancestor tree of any tag in + # _init_child_timestamps below, using the hg branch names return None def get_commit_timestamps(self): """Return mapping of commit hash -> Unix timestamp""" @@ -1234,7 +1237,7 @@ class HgExtractor(Extractor): # Fill in the branch as a default self.meta[h]['branch'] = branch # Branch tips can't be colored, but ancestors can be if tags are present, - # so we only fill in the branch tips here + # so we only fill in the branch tips with child timestamps here if self.refs[branch] == h: results[h] = sys.maxsize return results -- GitLab From 7d73eef5b3ac7ade74c977a8d7cf0475dc3ab475 Mon Sep 17 00:00:00 2001 From: Peter Donis Date: Wed, 23 Mar 2016 01:28:53 -0400 Subject: [PATCH 32/32] More comment fixups re hg coloring. --- reposurgeon | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/reposurgeon b/reposurgeon index 508a1a26e..64e409837 100755 --- a/reposurgeon +++ b/reposurgeon @@ -1234,10 +1234,15 @@ class HgExtractor(Extractor): """Return initial mapping of commit hash -> timestamp of child it is colored from""" results = {} for h, branch in self._hg_branch_items(): - # Fill in the branch as a default + # Fill in the branch as a default; this will ensure that any commit that + # is not in the ancestor tree of a tag will get the correct hg branch name, + # even if the hg branch coloring is not compatible with the git coloring + # algorithm self.meta[h]['branch'] = branch - # Branch tips can't be colored, but ancestors can be if tags are present, - # so we only fill in the branch tips with child timestamps here + # Fill in the branch tips with child timestamps to ensure that they can't + # be over-colored (other commits in the ancestor tree of a branch can be + # over-colored if they are in a tag's ancestor tree, so we don't fill in + # any child timestamp for them here) if self.refs[branch] == h: results[h] = sys.maxsize return results -- GitLab