%PDF- %PDF-
| Direktori : /www/old2/_music/ |
| Current File : //www/old2/_music/svn2hg |
#! /usr/bin/env python
# yasvn2hg -- yet another SVN to HG converter
#
# Copyright 2007 Joel Rosdahl <joel@rosdahl.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
# USA
import locale
import optparse
import os
import re
import shutil
import subprocess
import sys
import tempfile
import mercurial.ui
import mercurial.hg
import mercurial.cmdutil
import mercurial.commands
import mercurial.node
def import_etree():
for module in [
"xml.etree.cElementTree", "xml.etree.ElementTree", "cElementTree",
"elementtree.ElementTree"]:
try:
return __import__(module, None, None, ["ElementTree"]).ElementTree
except ImportError:
pass
raise ImportError("No ElementTree module found")
ElementTree = import_etree()
class Error(Exception):
pass
class ClassificationError(Error):
pass
class BranchConfusionError(Error):
pass
class Svn2HgConverter(object):
_branch_re_str = "(/(?:trunk|branches/[^/]+))"
_branch_re = re.compile(_branch_re_str)
_tag_path_re = re.compile("/tags/([^/]+)$")
_branch_path_re = re.compile("%s$" % _branch_re_str)
_ordinary_path_re = re.compile("%s/(.+)" % _branch_re_str)
def __init__(self, svn_url, hg_dest, debug, use_named_branches):
if os.path.exists(hg_dest):
raise Error("hg dest dir %r already exists" % hg_dest)
self._debugflag = debug
self._use_named_branches = use_named_branches
self._hg_dest = hg_dest
self._hg_ui = mercurial.ui.ui(quiet=not debug)
self._hg_repo = None
self._log_path = "%s.log" % self._hg_dest
self._log_fp = open(self._log_path, "w")
self._svn_url = svn_url
self._svn_root = self._get_svn_root()
self._svn_base = self._svn_url[len(self._svn_root):]
self._user_map = {} # svn user --> hg user
self._branch_map = {} # svn branch --> [(svn_rev, hg_rev)]
self._first = True
self._warnings = 0
for x in ["svn_url", "svn_root", "svn_base", "hg_dest"]:
self._debug("%s: %s" % (x, getattr(self, "_%s" % x)))
def convert(self, user_file, revrange, merges_to_add, sloppy, no_structure):
self._no_structure = no_structure
self._read_user_file(user_file)
log_entries = self._get_log_entries(revrange)
self._check_logentries(log_entries, sloppy)
self._check_users(log_entries)
for (i, log_entry) in enumerate(log_entries):
rev = self._get_rev(log_entry)
self._info(
"Revision %d (%d of %d)" % (rev, i + 1, len(log_entries)))
kinds = self._classify_changeset(log_entry)
if len(kinds) != 1 or kinds == set(["unknown"]):
self._warning("Ignoring unclassifiable changeset")
else:
kind = list(kinds)[0]
handler = getattr(self, "_handle_%s_changeset" % kind)
handler(log_entry, rev, revrange, merges_to_add)
sys.stdout.flush()
self._remove_dot_svn_dirs()
self._info("\nBranches in hg repository after conversion:")
for branch in sorted(self._branch_map):
(svn_rev, hg_rev) = self._branch_map[branch][-1]
self._info(" - %s (head: %s)" % (branch, hg_rev))
if self._warnings > 0:
self._info(
"\nThere were %d warnings during conversion. Check the log"
" file\n%r." % (self._warnings, self._log_path))
def generate_user_file_template(self, user_file_template):
log_entries = self._get_log_entries("1:HEAD")
authors = self._get_authors(log_entries)
fp = open(user_file_template, "w")
for author in sorted(authors):
fp.write("%s = %s\n" % (author, author))
def _check_logentries(self, log_entries, sloppy):
for log_entry in log_entries:
kinds = self._classify_changeset(log_entry)
if len(kinds) != 1 or kinds == set(["unknown"]):
rev = self._get_rev(log_entry)
msg = "Could not classify changeset %d (%s)" % (
rev, ", ".join([str(x) for x in kinds]))
if not sloppy:
raise ClassificationError(msg)
def _check_users(self, log_entries):
authors = self._get_authors(log_entries)
unknown = authors - set(self._user_map.iterkeys())
if unknown:
self._info("Unknown authors:\n")
ul = list(unknown)
ul.sort()
for author in ul:
self._info(author)
sys.exit(1)
def _classify_changeset(self, log_entry):
# ordinary commit:
# A {base}/{branch}/a
# D {base}/{branch}/b
# M {base}/{branch}/c
# A {base}/{branch}/d (copy from {base}/{branch}/e:4711)
#
# new/renamed/removed branches:
# A {base}/{branch} (copy from {base}/{branch}:4711)
# D {base}/{branch}
#
# new/renamed/removed tags:
# A {base}/tags/x (copy from {base}/{branch}:4711)
# D {base}/tags/x
#
# new branch manufactured by cvs2svn:
# D {base}/branches/x/foo/fie
# D {base}/branches/x/foo/fum
# A {base}/branches/x (copy from {base}/{branch}:4711)
#
# new tag manufactured by cvs2svn:
# D {base}/tags/x/foo/fie
# D {base}/tags/x/foo/fum
# A {base}/tags/x (copy from {base}/{branch}:4711)
path_nodes = log_entry.findall("paths/path")
paths = [self._path_without_base(x.text) for x in path_nodes]
kinds = set()
for (path_node, path) in zip(path_nodes, paths):
if self._tag_path_re.match(path):
kinds.add("tag")
elif self._branch_path_re.match(path):
kinds.add("branch")
elif self._ordinary_path_re.match(path):
kinds.add("ordinary")
elif path.startswith("/tags/") and \
path_node.attrib["action"] == "M":
self._debug(" - modified tag detected (ignoring)")
elif self._no_structure:
kinds.add("ordinary")
else:
kinds.add("unknown")
# Handle strange changesets manufactured by cvs2svn.
logmsg = self._get_msg(log_entry)
prefix = "This commit was manufactured by cvs2svn to create "
if logmsg.startswith(prefix + "tag"):
kinds.clear()
kinds.add("cvs2svn_tag")
elif logmsg.startswith(prefix + "branch"):
kinds.clear()
kinds.add("cvs2svn_branch")
if len(kinds) == 0:
kinds.add("empty")
if kinds == set(["branch", "ordinary"]) and \
self._branch_path_re.match(os.path.commonprefix(paths)):
# Ordinary commit touching the top directory of the branch.
kinds.remove("branch")
if "unknown" in kinds:
if "/trunk" in paths:
kinds = set(["initial"])
self._debug(
" - changeset %s kind: %s" % (
log_entry.attrib["revision"], ", ".join(kinds)))
return kinds
def _commit(self, log_entry, rev, branch, merges_to_add):
if not branch in self._branch_map:
raise BranchConfusionError(
"Commit in revision %d is to unknown branch %s" % (
rev, branch))
hg_parent = self._find_hg_rev_on_branch(branch, rev)
next_rev = len(self._hg_repo.changelog)
if next_rev in merges_to_add:
other_hg_parent = merges_to_add[next_rev]
else:
other_hg_parent = None
self._debug(
" - hg rev: %s (parents: %s, %s)" % (
next_rev, hg_parent, other_hg_parent))
mercurial.commands.debugrebuildstate(
self._hg_ui,
self._hg_repo,
self._hg_repo.changectx(hg_parent).node())
mercurial.commands.debugsetparents(
self._hg_ui, self._hg_repo, hg_parent, other_hg_parent)
if self._use_named_branches:
if branch == "/trunk":
label = "trunk"
else:
label = branch[10:] # Remove /branches/ to keep branch name.
mercurial.commands.branch(self._hg_ui, self._hg_repo, label)
if self._no_structure:
copy_prefix = self._svn_base
else:
copy_prefix = "%s%s" % (self._svn_base, branch)
for path in log_entry.findall("paths/path"):
if path.attrib["action"] == "A" and "copyfrom-path" in path.attrib:
copy_from = path.attrib["copyfrom-path"]
copy_to = path.text
if copy_to.startswith(copy_prefix) and \
copy_from.startswith(copy_prefix):
cf = copy_from[len(copy_prefix) + 1:]
ct = copy_to[len(copy_prefix) + 1:]
mercurial.commands.copy(
self._hg_ui, self._hg_repo, cf, ct, after=True)
else:
self._warning(
"Don't know how to handle copy from %s"
" to %s (ignoring)" % (copy_from, copy_to))
mercurial.cmdutil.addremove(
self._hg_repo,
opts={"quiet": True, "exclude": [".svn/**", "**/.svn/**"]})
self._hg_repo.commit(
text=self._get_msg(log_entry).encode("utf-8"),
user=self._get_user(log_entry).encode("utf-8"),
date=self._get_date(log_entry),
force=False) # Ignore empty commits.
hg_rev = len(self._hg_repo.changelog) - 1
if hg_rev == next_rev:
self._branch_map[branch].append((rev, hg_rev))
else:
# Empty commit. Ignored.
pass
def _debug(self, msg):
self._log_fp.write("%s\n" % msg)
if self._debugflag:
print msg
def _find_hg_rev_on_branch(self, svn_branch, svn_rev):
for (srev, hrev) in reversed(self._branch_map[svn_branch]):
if srev <= svn_rev:
return hrev
assert False, "%s not found on %s" % (svn_rev, svn_branch)
def _get_authors(self, log_entries):
authors = set()
for log_entry in log_entries:
author = log_entry.findtext("author")
if author is None:
author = "unknown"
if not author in authors:
authors.add(author)
return authors
def _get_branch_of_commit(self, log_entry):
if self._no_structure:
return '/trunk'
first_path = self._path_without_base(log_entry.findtext("paths/path"))
m = self._branch_re.match(first_path)
return m.group(1)
def _get_date(self, log_entry):
m = re.match("(.*)T(.*)\..*Z", log_entry.findtext("date"))
return "%s %s UTC" % (m.group(1), m.group(2))
def _get_log_entries(self, revrange):
(_, log_file) = tempfile.mkstemp()
self._svn_to_file(
log_file,
"log", "-r", revrange, "-v", "--xml", self._svn_url)
et = ElementTree()
et.parse(log_file)
os.remove(log_file)
log_entries = et.findall("logentry")
log_entries.sort(key=lambda x: self._get_rev(x))
return log_entries
def _get_msg(self, log_entry):
msg = log_entry.findtext("msg").strip()
if msg:
return msg
else:
return "[empty log message]"
def _get_rev(self, log_entry):
return int(log_entry.attrib["revision"])
def _get_svn_root(self):
m = re.match("file://(.*)", self._svn_url)
if m:
path = m.group(1)
while not os.path.exists(path):
(path, _) = os.path.split(path)
return "file://%s" % path
else:
(_, log_file) = tempfile.mkstemp()
self._svn_to_file(log_file, "--xml", "info", self._svn_url)
et = ElementTree()
et.parse(log_file)
os.remove(log_file)
return et.findtext("entry/repository/root")
def _get_user(self, log_entry):
author = log_entry.findtext("author")
if author in self._user_map:
return self._user_map[author]
else:
return "unknown"
def _handle_branch_changeset(
self, log_entry, rev, revrange, merges_to_add):
for path_node in log_entry.findall("paths/path"):
action = path_node.attrib["action"]
if action == "A" and "copyfrom-path" in path_node.attrib:
new_branch = self._path_without_base(path_node.text)
from_branch = self._path_without_base(
path_node.attrib["copyfrom-path"])
self._info(
" - new branch %s (from %s)" % (new_branch, from_branch))
if from_branch not in self._branch_map:
raise BranchConfusionError(
"Branch unknown: %s" % from_branch)
from_svn_rev = int(path_node.attrib["copyfrom-rev"])
from_hg_rev = self._find_hg_rev_on_branch(
from_branch, from_svn_rev)
assert new_branch not in self._branch_map
self._branch_map[new_branch] = [(from_svn_rev, from_hg_rev)]
elif action == "D":
pass
else:
self._warning(
"Don't know how to handle action %s on %r in changeset"
" %d" % (action, path_node.text, rev))
def _clean_up_cvs2svn_log_entry(self, log_entry):
paths_node = log_entry.find("paths")
nodes_to_remove = set()
for node in paths_node.findall("path"):
if not (node.attrib["action"] == "A" and
(self._branch_path_re.match(node.text) or
self._tag_path_re.match(node.text))):
paths_node.remove(node)
def _handle_cvs2svn_branch_changeset(
self, log_entry, rev, revrange, merges_to_add):
self._clean_up_cvs2svn_log_entry(log_entry)
self._handle_branch_changeset(log_entry, rev, revrange, merges_to_add)
def _handle_cvs2svn_tag_changeset(
self, log_entry, rev, revrange, merges_to_add):
self._clean_up_cvs2svn_log_entry(log_entry)
self._handle_tag_changeset(log_entry, rev, revrange, merges_to_add)
def _handle_initial_changeset(
self, log_entry, rev, revrange, merges_to_add):
self._info(" - initial changeset (ignoring)")
def _handle_empty_changeset(
self, log_entry, rev, revrange, merges_to_add):
self._info(" - empty changeset (ignoring)")
def _handle_ordinary_changeset(
self, log_entry, rev, revrange, merges_to_add):
branch = self._get_branch_of_commit(log_entry)
self._info(" - branch: %s" % branch)
self._print_log(log_entry)
if self._first:
self._first = False
self._branch_map["/trunk"] = [(rev, -1)]
self._info(" - checking out from svn")
(startrev, stoprev) = revrange.split(":")
if self._no_structure:
url = "%s@%s" % (self._svn_url, stoprev)
else:
url = "%s/trunk@%s" % (self._svn_url, stoprev)
self._svn("checkout", "-r", str(rev), url, self._hg_dest)
os.chdir(self._hg_dest)
self._info(" - committing to hg")
mercurial.commands.init(self._hg_ui)
self._hg_repo = mercurial.hg.repository(self._hg_ui, ".")
else:
self._info(" - updating from svn")
if self._no_structure:
url = self._svn_url
else:
url = "%s%s" % (self._svn_url, branch)
self._svn("switch", "-r", str(rev), url)
self._info(" - committing to hg")
self._commit(log_entry, rev, branch, merges_to_add)
def _handle_tag_changeset(self, log_entry, rev, revrange, merges_to_add):
for path_node in log_entry.findall("paths/path"):
action = path_node.attrib["action"]
tagpath = self._path_without_base(path_node.text)
tagname = self._tag_path_re.match(tagpath).group(1)
if action == "A":
from_branch = self._path_without_base(
path_node.attrib["copyfrom-path"])
m = self._tag_path_re.match(from_branch)
if m:
# Renamed tag.
old_tagname = m.group(1)
tagnode = self._hg_repo.tags()[old_tagname]
else:
from_svn_rev = int(path_node.attrib["copyfrom-rev"])
from_hg_rev = self._find_hg_rev_on_branch(
from_branch, from_svn_rev)
from_hg_node = self._hg_repo.changectx(from_hg_rev).node()
tagnode = from_hg_node
latest_trunk_hg_rev = \
self._find_hg_rev_on_branch("/trunk", rev)
mercurial.commands.debugsetparents(
self._hg_ui, self._hg_repo, latest_trunk_hg_rev)
self._info(" - new tag: %s" % tagname)
elif action == "D":
self._info(" - removed tag: %s" % tagname)
tagnode = mercurial.node.nullid
else:
self._warning(
"Don't know how to handle action %s for path"
" %r in changeset %d" % (action, path, rev))
return
self._hg_repo.tag(
tagname,
tagnode,
self._get_msg(log_entry).encode("utf-8"),
False,
self._get_user(log_entry).encode("utf-8"),
self._get_date(log_entry))
hg_rev = len(self._hg_repo.changelog) - 1
self._branch_map["/trunk"].append((rev, hg_rev))
def _info(self, msg):
self._log_fp.write("%s\n" % msg)
print msg
def _path_without_base(self, path):
return path[len(self._svn_base):]
def _print_log(self, log_entry):
log_lines = self._get_msg(log_entry).split("\n", 1)
if len(log_lines) > 1 or len(log_lines[0]) > 70:
m = log_lines[0][:70] + "[...]"
else:
m = log_lines[0]
self._info(" - author: %s" % self._get_user(log_entry))
encoded_log = m.encode(locale.getpreferredencoding(), "replace")
self._info(" - log: %s" % encoded_log)
def _read_user_file(self, user_file):
fp = open(user_file)
for line in fp:
try:
(svn_user, hg_user) = line.split("=", 1)
except ValueError:
pass
else:
self._user_map[svn_user.strip()] = hg_user.strip()
def _remove_dot_svn_dirs(self):
for (dirpath, dirnames, filenames) in os.walk("."):
if ".svn" in dirnames:
shutil.rmtree(os.path.join(dirpath, ".svn"))
def _run(self, cmdline, stdout_file=None):
self._debug("Run: %r" % cmdline)
if stdout_file is not None:
stdout_fp = open(stdout_file, "w")
else:
stdout_fp = None
p = subprocess.Popen(cmdline, stdout=stdout_fp)
p.wait()
if p.returncode != 0:
raise Error("failed running %r" % cmdline)
def _svn(self, *args):
cmdline = ["svn"]
if not self._debugflag:
cmdline.append("-q")
cmdline.extend(args)
return self._run(cmdline)
def _svn_to_file(self, stdout_file, *args):
return self._run(["svn"] + list(args), stdout_file)
def _warning(self, msg):
self._info(" - *** Warning: %s" % msg)
self._warnings += 1
def main():
option_parser = optparse.OptionParser(
version="0.2+",
usage="yasvn2hg [options] <svn-repo-url> <hg-dest-dir>",
description="yasvn2hg is a simple hack that converts a Subversion"
" repository to a Mercurial repository. Some debug log information is"
" written to <hg-dest-dir>.log.")
option_parser.add_option(
"-g", "--generate-user-file-template",
metavar="FILE",
dest="user_file_template",
help="generate a mapping of SVN user names to HG user names and write"
" it to FILE")
option_parser.add_option(
"-m", "--merges-file",
metavar="FILE",
help="read extra merge arrows from FILE")
option_parser.add_option(
"-r", "--revision-range",
metavar="REVRANGE",
help="SVN revision range to convert (default: 1:HEAD)")
option_parser.add_option(
"-u", "--user-file",
metavar="FILE",
help="file mapping SVN user names to HG user names")
option_parser.add_option(
"--debug",
help="also print debug output to standard output",
action="store_true")
option_parser.add_option(
"--no-structure",
help="don't look for tags/branches/trunk",
action="store_true")
option_parser.add_option(
"--sloppy",
help="sloppy conversion; ignore unclassifiable changesets",
action="store_true")
option_parser.add_option(
"--use-named-branches",
help="use named branches",
action="store_true")
option_parser.set_defaults(
revision_range="1:HEAD",
debug=False)
(options, args) = option_parser.parse_args()
if not options.user_file and not options.user_file_template:
option_parser.error(
"must give -u/--user-file or -g/--generate-user-file-template")
if len(args) != 2:
option_parser.error("bad arguments; try the -h flag")
svn_url = args[0]
hg_dest = args[1]
try:
converter = \
Svn2HgConverter(
svn_url, hg_dest, options.debug, options.use_named_branches)
if options.user_file_template:
converter.generate_user_file_template(options.user_file_template)
else:
merges_to_add = {}
if options.merges_file:
for line in open(options.merges_file):
(child, parent) = line.split()
merges_to_add[int(child)] = int(parent)
converter.convert(
options.user_file, options.revision_range, merges_to_add,
options.sloppy, options.no_structure)
except ClassificationError, e:
sys.stderr.write("Error: %s\n" % e)
sys.stderr.write(
"\nYou might try to rerun with the --sloppy option to ignore"
" unclassifiable\nchangesets.\n")
sys.exit(1)
except Error, e:
sys.stderr.write("Error: %s\n" % e)
sys.exit(1)
main()