%PDF- %PDF-
Direktori : /www/old2/_music/ |
Current File : /www/old2/_music/svn2hg |
#! /usr/bin/env python # yasvn2hg -- yet another SVN to HG converter # # Copyright 2007 Joel Rosdahl <joel@rosdahl.net> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 # USA import locale import optparse import os import re import shutil import subprocess import sys import tempfile import mercurial.ui import mercurial.hg import mercurial.cmdutil import mercurial.commands import mercurial.node def import_etree(): for module in [ "xml.etree.cElementTree", "xml.etree.ElementTree", "cElementTree", "elementtree.ElementTree"]: try: return __import__(module, None, None, ["ElementTree"]).ElementTree except ImportError: pass raise ImportError("No ElementTree module found") ElementTree = import_etree() class Error(Exception): pass class ClassificationError(Error): pass class BranchConfusionError(Error): pass class Svn2HgConverter(object): _branch_re_str = "(/(?:trunk|branches/[^/]+))" _branch_re = re.compile(_branch_re_str) _tag_path_re = re.compile("/tags/([^/]+)$") _branch_path_re = re.compile("%s$" % _branch_re_str) _ordinary_path_re = re.compile("%s/(.+)" % _branch_re_str) def __init__(self, svn_url, hg_dest, debug, use_named_branches): if os.path.exists(hg_dest): raise Error("hg dest dir %r already exists" % hg_dest) self._debugflag = debug self._use_named_branches = use_named_branches self._hg_dest = hg_dest self._hg_ui = mercurial.ui.ui(quiet=not debug) self._hg_repo = None self._log_path = "%s.log" % self._hg_dest self._log_fp = open(self._log_path, "w") self._svn_url = svn_url self._svn_root = self._get_svn_root() self._svn_base = self._svn_url[len(self._svn_root):] self._user_map = {} # svn user --> hg user self._branch_map = {} # svn branch --> [(svn_rev, hg_rev)] self._first = True self._warnings = 0 for x in ["svn_url", "svn_root", "svn_base", "hg_dest"]: self._debug("%s: %s" % (x, getattr(self, "_%s" % x))) def convert(self, user_file, revrange, merges_to_add, sloppy, no_structure): self._no_structure = no_structure self._read_user_file(user_file) log_entries = self._get_log_entries(revrange) self._check_logentries(log_entries, sloppy) self._check_users(log_entries) for (i, log_entry) in enumerate(log_entries): rev = self._get_rev(log_entry) self._info( "Revision %d (%d of %d)" % (rev, i + 1, len(log_entries))) kinds = self._classify_changeset(log_entry) if len(kinds) != 1 or kinds == set(["unknown"]): self._warning("Ignoring unclassifiable changeset") else: kind = list(kinds)[0] handler = getattr(self, "_handle_%s_changeset" % kind) handler(log_entry, rev, revrange, merges_to_add) sys.stdout.flush() self._remove_dot_svn_dirs() self._info("\nBranches in hg repository after conversion:") for branch in sorted(self._branch_map): (svn_rev, hg_rev) = self._branch_map[branch][-1] self._info(" - %s (head: %s)" % (branch, hg_rev)) if self._warnings > 0: self._info( "\nThere were %d warnings during conversion. Check the log" " file\n%r." % (self._warnings, self._log_path)) def generate_user_file_template(self, user_file_template): log_entries = self._get_log_entries("1:HEAD") authors = self._get_authors(log_entries) fp = open(user_file_template, "w") for author in sorted(authors): fp.write("%s = %s\n" % (author, author)) def _check_logentries(self, log_entries, sloppy): for log_entry in log_entries: kinds = self._classify_changeset(log_entry) if len(kinds) != 1 or kinds == set(["unknown"]): rev = self._get_rev(log_entry) msg = "Could not classify changeset %d (%s)" % ( rev, ", ".join([str(x) for x in kinds])) if not sloppy: raise ClassificationError(msg) def _check_users(self, log_entries): authors = self._get_authors(log_entries) unknown = authors - set(self._user_map.iterkeys()) if unknown: self._info("Unknown authors:\n") ul = list(unknown) ul.sort() for author in ul: self._info(author) sys.exit(1) def _classify_changeset(self, log_entry): # ordinary commit: # A {base}/{branch}/a # D {base}/{branch}/b # M {base}/{branch}/c # A {base}/{branch}/d (copy from {base}/{branch}/e:4711) # # new/renamed/removed branches: # A {base}/{branch} (copy from {base}/{branch}:4711) # D {base}/{branch} # # new/renamed/removed tags: # A {base}/tags/x (copy from {base}/{branch}:4711) # D {base}/tags/x # # new branch manufactured by cvs2svn: # D {base}/branches/x/foo/fie # D {base}/branches/x/foo/fum # A {base}/branches/x (copy from {base}/{branch}:4711) # # new tag manufactured by cvs2svn: # D {base}/tags/x/foo/fie # D {base}/tags/x/foo/fum # A {base}/tags/x (copy from {base}/{branch}:4711) path_nodes = log_entry.findall("paths/path") paths = [self._path_without_base(x.text) for x in path_nodes] kinds = set() for (path_node, path) in zip(path_nodes, paths): if self._tag_path_re.match(path): kinds.add("tag") elif self._branch_path_re.match(path): kinds.add("branch") elif self._ordinary_path_re.match(path): kinds.add("ordinary") elif path.startswith("/tags/") and \ path_node.attrib["action"] == "M": self._debug(" - modified tag detected (ignoring)") elif self._no_structure: kinds.add("ordinary") else: kinds.add("unknown") # Handle strange changesets manufactured by cvs2svn. logmsg = self._get_msg(log_entry) prefix = "This commit was manufactured by cvs2svn to create " if logmsg.startswith(prefix + "tag"): kinds.clear() kinds.add("cvs2svn_tag") elif logmsg.startswith(prefix + "branch"): kinds.clear() kinds.add("cvs2svn_branch") if len(kinds) == 0: kinds.add("empty") if kinds == set(["branch", "ordinary"]) and \ self._branch_path_re.match(os.path.commonprefix(paths)): # Ordinary commit touching the top directory of the branch. kinds.remove("branch") if "unknown" in kinds: if "/trunk" in paths: kinds = set(["initial"]) self._debug( " - changeset %s kind: %s" % ( log_entry.attrib["revision"], ", ".join(kinds))) return kinds def _commit(self, log_entry, rev, branch, merges_to_add): if not branch in self._branch_map: raise BranchConfusionError( "Commit in revision %d is to unknown branch %s" % ( rev, branch)) hg_parent = self._find_hg_rev_on_branch(branch, rev) next_rev = len(self._hg_repo.changelog) if next_rev in merges_to_add: other_hg_parent = merges_to_add[next_rev] else: other_hg_parent = None self._debug( " - hg rev: %s (parents: %s, %s)" % ( next_rev, hg_parent, other_hg_parent)) mercurial.commands.debugrebuildstate( self._hg_ui, self._hg_repo, self._hg_repo.changectx(hg_parent).node()) mercurial.commands.debugsetparents( self._hg_ui, self._hg_repo, hg_parent, other_hg_parent) if self._use_named_branches: if branch == "/trunk": label = "trunk" else: label = branch[10:] # Remove /branches/ to keep branch name. mercurial.commands.branch(self._hg_ui, self._hg_repo, label) if self._no_structure: copy_prefix = self._svn_base else: copy_prefix = "%s%s" % (self._svn_base, branch) for path in log_entry.findall("paths/path"): if path.attrib["action"] == "A" and "copyfrom-path" in path.attrib: copy_from = path.attrib["copyfrom-path"] copy_to = path.text if copy_to.startswith(copy_prefix) and \ copy_from.startswith(copy_prefix): cf = copy_from[len(copy_prefix) + 1:] ct = copy_to[len(copy_prefix) + 1:] mercurial.commands.copy( self._hg_ui, self._hg_repo, cf, ct, after=True) else: self._warning( "Don't know how to handle copy from %s" " to %s (ignoring)" % (copy_from, copy_to)) mercurial.cmdutil.addremove( self._hg_repo, opts={"quiet": True, "exclude": [".svn/**", "**/.svn/**"]}) self._hg_repo.commit( text=self._get_msg(log_entry).encode("utf-8"), user=self._get_user(log_entry).encode("utf-8"), date=self._get_date(log_entry), force=False) # Ignore empty commits. hg_rev = len(self._hg_repo.changelog) - 1 if hg_rev == next_rev: self._branch_map[branch].append((rev, hg_rev)) else: # Empty commit. Ignored. pass def _debug(self, msg): self._log_fp.write("%s\n" % msg) if self._debugflag: print msg def _find_hg_rev_on_branch(self, svn_branch, svn_rev): for (srev, hrev) in reversed(self._branch_map[svn_branch]): if srev <= svn_rev: return hrev assert False, "%s not found on %s" % (svn_rev, svn_branch) def _get_authors(self, log_entries): authors = set() for log_entry in log_entries: author = log_entry.findtext("author") if author is None: author = "unknown" if not author in authors: authors.add(author) return authors def _get_branch_of_commit(self, log_entry): if self._no_structure: return '/trunk' first_path = self._path_without_base(log_entry.findtext("paths/path")) m = self._branch_re.match(first_path) return m.group(1) def _get_date(self, log_entry): m = re.match("(.*)T(.*)\..*Z", log_entry.findtext("date")) return "%s %s UTC" % (m.group(1), m.group(2)) def _get_log_entries(self, revrange): (_, log_file) = tempfile.mkstemp() self._svn_to_file( log_file, "log", "-r", revrange, "-v", "--xml", self._svn_url) et = ElementTree() et.parse(log_file) os.remove(log_file) log_entries = et.findall("logentry") log_entries.sort(key=lambda x: self._get_rev(x)) return log_entries def _get_msg(self, log_entry): msg = log_entry.findtext("msg").strip() if msg: return msg else: return "[empty log message]" def _get_rev(self, log_entry): return int(log_entry.attrib["revision"]) def _get_svn_root(self): m = re.match("file://(.*)", self._svn_url) if m: path = m.group(1) while not os.path.exists(path): (path, _) = os.path.split(path) return "file://%s" % path else: (_, log_file) = tempfile.mkstemp() self._svn_to_file(log_file, "--xml", "info", self._svn_url) et = ElementTree() et.parse(log_file) os.remove(log_file) return et.findtext("entry/repository/root") def _get_user(self, log_entry): author = log_entry.findtext("author") if author in self._user_map: return self._user_map[author] else: return "unknown" def _handle_branch_changeset( self, log_entry, rev, revrange, merges_to_add): for path_node in log_entry.findall("paths/path"): action = path_node.attrib["action"] if action == "A" and "copyfrom-path" in path_node.attrib: new_branch = self._path_without_base(path_node.text) from_branch = self._path_without_base( path_node.attrib["copyfrom-path"]) self._info( " - new branch %s (from %s)" % (new_branch, from_branch)) if from_branch not in self._branch_map: raise BranchConfusionError( "Branch unknown: %s" % from_branch) from_svn_rev = int(path_node.attrib["copyfrom-rev"]) from_hg_rev = self._find_hg_rev_on_branch( from_branch, from_svn_rev) assert new_branch not in self._branch_map self._branch_map[new_branch] = [(from_svn_rev, from_hg_rev)] elif action == "D": pass else: self._warning( "Don't know how to handle action %s on %r in changeset" " %d" % (action, path_node.text, rev)) def _clean_up_cvs2svn_log_entry(self, log_entry): paths_node = log_entry.find("paths") nodes_to_remove = set() for node in paths_node.findall("path"): if not (node.attrib["action"] == "A" and (self._branch_path_re.match(node.text) or self._tag_path_re.match(node.text))): paths_node.remove(node) def _handle_cvs2svn_branch_changeset( self, log_entry, rev, revrange, merges_to_add): self._clean_up_cvs2svn_log_entry(log_entry) self._handle_branch_changeset(log_entry, rev, revrange, merges_to_add) def _handle_cvs2svn_tag_changeset( self, log_entry, rev, revrange, merges_to_add): self._clean_up_cvs2svn_log_entry(log_entry) self._handle_tag_changeset(log_entry, rev, revrange, merges_to_add) def _handle_initial_changeset( self, log_entry, rev, revrange, merges_to_add): self._info(" - initial changeset (ignoring)") def _handle_empty_changeset( self, log_entry, rev, revrange, merges_to_add): self._info(" - empty changeset (ignoring)") def _handle_ordinary_changeset( self, log_entry, rev, revrange, merges_to_add): branch = self._get_branch_of_commit(log_entry) self._info(" - branch: %s" % branch) self._print_log(log_entry) if self._first: self._first = False self._branch_map["/trunk"] = [(rev, -1)] self._info(" - checking out from svn") (startrev, stoprev) = revrange.split(":") if self._no_structure: url = "%s@%s" % (self._svn_url, stoprev) else: url = "%s/trunk@%s" % (self._svn_url, stoprev) self._svn("checkout", "-r", str(rev), url, self._hg_dest) os.chdir(self._hg_dest) self._info(" - committing to hg") mercurial.commands.init(self._hg_ui) self._hg_repo = mercurial.hg.repository(self._hg_ui, ".") else: self._info(" - updating from svn") if self._no_structure: url = self._svn_url else: url = "%s%s" % (self._svn_url, branch) self._svn("switch", "-r", str(rev), url) self._info(" - committing to hg") self._commit(log_entry, rev, branch, merges_to_add) def _handle_tag_changeset(self, log_entry, rev, revrange, merges_to_add): for path_node in log_entry.findall("paths/path"): action = path_node.attrib["action"] tagpath = self._path_without_base(path_node.text) tagname = self._tag_path_re.match(tagpath).group(1) if action == "A": from_branch = self._path_without_base( path_node.attrib["copyfrom-path"]) m = self._tag_path_re.match(from_branch) if m: # Renamed tag. old_tagname = m.group(1) tagnode = self._hg_repo.tags()[old_tagname] else: from_svn_rev = int(path_node.attrib["copyfrom-rev"]) from_hg_rev = self._find_hg_rev_on_branch( from_branch, from_svn_rev) from_hg_node = self._hg_repo.changectx(from_hg_rev).node() tagnode = from_hg_node latest_trunk_hg_rev = \ self._find_hg_rev_on_branch("/trunk", rev) mercurial.commands.debugsetparents( self._hg_ui, self._hg_repo, latest_trunk_hg_rev) self._info(" - new tag: %s" % tagname) elif action == "D": self._info(" - removed tag: %s" % tagname) tagnode = mercurial.node.nullid else: self._warning( "Don't know how to handle action %s for path" " %r in changeset %d" % (action, path, rev)) return self._hg_repo.tag( tagname, tagnode, self._get_msg(log_entry).encode("utf-8"), False, self._get_user(log_entry).encode("utf-8"), self._get_date(log_entry)) hg_rev = len(self._hg_repo.changelog) - 1 self._branch_map["/trunk"].append((rev, hg_rev)) def _info(self, msg): self._log_fp.write("%s\n" % msg) print msg def _path_without_base(self, path): return path[len(self._svn_base):] def _print_log(self, log_entry): log_lines = self._get_msg(log_entry).split("\n", 1) if len(log_lines) > 1 or len(log_lines[0]) > 70: m = log_lines[0][:70] + "[...]" else: m = log_lines[0] self._info(" - author: %s" % self._get_user(log_entry)) encoded_log = m.encode(locale.getpreferredencoding(), "replace") self._info(" - log: %s" % encoded_log) def _read_user_file(self, user_file): fp = open(user_file) for line in fp: try: (svn_user, hg_user) = line.split("=", 1) except ValueError: pass else: self._user_map[svn_user.strip()] = hg_user.strip() def _remove_dot_svn_dirs(self): for (dirpath, dirnames, filenames) in os.walk("."): if ".svn" in dirnames: shutil.rmtree(os.path.join(dirpath, ".svn")) def _run(self, cmdline, stdout_file=None): self._debug("Run: %r" % cmdline) if stdout_file is not None: stdout_fp = open(stdout_file, "w") else: stdout_fp = None p = subprocess.Popen(cmdline, stdout=stdout_fp) p.wait() if p.returncode != 0: raise Error("failed running %r" % cmdline) def _svn(self, *args): cmdline = ["svn"] if not self._debugflag: cmdline.append("-q") cmdline.extend(args) return self._run(cmdline) def _svn_to_file(self, stdout_file, *args): return self._run(["svn"] + list(args), stdout_file) def _warning(self, msg): self._info(" - *** Warning: %s" % msg) self._warnings += 1 def main(): option_parser = optparse.OptionParser( version="0.2+", usage="yasvn2hg [options] <svn-repo-url> <hg-dest-dir>", description="yasvn2hg is a simple hack that converts a Subversion" " repository to a Mercurial repository. Some debug log information is" " written to <hg-dest-dir>.log.") option_parser.add_option( "-g", "--generate-user-file-template", metavar="FILE", dest="user_file_template", help="generate a mapping of SVN user names to HG user names and write" " it to FILE") option_parser.add_option( "-m", "--merges-file", metavar="FILE", help="read extra merge arrows from FILE") option_parser.add_option( "-r", "--revision-range", metavar="REVRANGE", help="SVN revision range to convert (default: 1:HEAD)") option_parser.add_option( "-u", "--user-file", metavar="FILE", help="file mapping SVN user names to HG user names") option_parser.add_option( "--debug", help="also print debug output to standard output", action="store_true") option_parser.add_option( "--no-structure", help="don't look for tags/branches/trunk", action="store_true") option_parser.add_option( "--sloppy", help="sloppy conversion; ignore unclassifiable changesets", action="store_true") option_parser.add_option( "--use-named-branches", help="use named branches", action="store_true") option_parser.set_defaults( revision_range="1:HEAD", debug=False) (options, args) = option_parser.parse_args() if not options.user_file and not options.user_file_template: option_parser.error( "must give -u/--user-file or -g/--generate-user-file-template") if len(args) != 2: option_parser.error("bad arguments; try the -h flag") svn_url = args[0] hg_dest = args[1] try: converter = \ Svn2HgConverter( svn_url, hg_dest, options.debug, options.use_named_branches) if options.user_file_template: converter.generate_user_file_template(options.user_file_template) else: merges_to_add = {} if options.merges_file: for line in open(options.merges_file): (child, parent) = line.split() merges_to_add[int(child)] = int(parent) converter.convert( options.user_file, options.revision_range, merges_to_add, options.sloppy, options.no_structure) except ClassificationError, e: sys.stderr.write("Error: %s\n" % e) sys.stderr.write( "\nYou might try to rerun with the --sloppy option to ignore" " unclassifiable\nchangesets.\n") sys.exit(1) except Error, e: sys.stderr.write("Error: %s\n" % e) sys.exit(1) main()