%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /www/old2/_music/
Upload File :
Create Path :
Current File : /www/old2/_music/svn2hg

#! /usr/bin/env python

# yasvn2hg -- yet another SVN to HG converter
#
# Copyright 2007 Joel Rosdahl <joel@rosdahl.net>
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
# USA

import locale
import optparse
import os
import re
import shutil
import subprocess
import sys
import tempfile

import mercurial.ui
import mercurial.hg
import mercurial.cmdutil
import mercurial.commands
import mercurial.node

def import_etree():
    for module in [
            "xml.etree.cElementTree", "xml.etree.ElementTree", "cElementTree",
            "elementtree.ElementTree"]:
        try:
            return __import__(module, None, None, ["ElementTree"]).ElementTree
        except ImportError:
            pass
    raise ImportError("No ElementTree module found")

ElementTree = import_etree()

class Error(Exception):
    pass

class ClassificationError(Error):
    pass

class BranchConfusionError(Error):
    pass

class Svn2HgConverter(object):
    _branch_re_str = "(/(?:trunk|branches/[^/]+))"
    _branch_re = re.compile(_branch_re_str)
    _tag_path_re = re.compile("/tags/([^/]+)$")
    _branch_path_re = re.compile("%s$" % _branch_re_str)
    _ordinary_path_re = re.compile("%s/(.+)" % _branch_re_str)

    def __init__(self, svn_url, hg_dest, debug, use_named_branches):
        if os.path.exists(hg_dest):
            raise Error("hg dest dir %r already exists" % hg_dest)
        self._debugflag = debug
        self._use_named_branches = use_named_branches
        self._hg_dest = hg_dest
        self._hg_ui = mercurial.ui.ui(quiet=not debug)
        self._hg_repo = None
        self._log_path = "%s.log" % self._hg_dest
        self._log_fp = open(self._log_path, "w")
        self._svn_url = svn_url
        self._svn_root = self._get_svn_root()
        self._svn_base = self._svn_url[len(self._svn_root):]
        self._user_map = {} # svn user --> hg user
        self._branch_map = {} # svn branch --> [(svn_rev, hg_rev)]
        self._first = True
        self._warnings = 0
        for x in ["svn_url", "svn_root", "svn_base", "hg_dest"]:
            self._debug("%s: %s" % (x, getattr(self, "_%s" % x)))

    def convert(self, user_file, revrange, merges_to_add, sloppy, no_structure):
        self._no_structure = no_structure
        self._read_user_file(user_file)
        log_entries = self._get_log_entries(revrange)
        self._check_logentries(log_entries, sloppy)
        self._check_users(log_entries)
        for (i, log_entry) in enumerate(log_entries):
            rev = self._get_rev(log_entry)
            self._info(
                "Revision %d (%d of %d)" % (rev, i + 1, len(log_entries)))
            kinds = self._classify_changeset(log_entry)
            if len(kinds) != 1 or kinds == set(["unknown"]):
                self._warning("Ignoring unclassifiable changeset")
            else:
                kind = list(kinds)[0]
                handler = getattr(self, "_handle_%s_changeset" % kind)
                handler(log_entry, rev, revrange, merges_to_add)
            sys.stdout.flush()
        self._remove_dot_svn_dirs()
        self._info("\nBranches in hg repository after conversion:")
        for branch in sorted(self._branch_map):
            (svn_rev, hg_rev) = self._branch_map[branch][-1]
            self._info("  - %s (head: %s)" % (branch, hg_rev))
        if self._warnings > 0:
            self._info(
                "\nThere were %d warnings during conversion. Check the log"
                " file\n%r." % (self._warnings, self._log_path))

    def generate_user_file_template(self, user_file_template):
        log_entries = self._get_log_entries("1:HEAD")
        authors = self._get_authors(log_entries)
        fp = open(user_file_template, "w")
        for author in sorted(authors):
            fp.write("%s = %s\n" % (author, author))

    def _check_logentries(self, log_entries, sloppy):
        for log_entry in log_entries:
            kinds = self._classify_changeset(log_entry)
            if len(kinds) != 1 or kinds == set(["unknown"]):
                rev = self._get_rev(log_entry)
                msg = "Could not classify changeset %d (%s)" % (
                    rev, ", ".join([str(x) for x in kinds]))
                if not sloppy:
                    raise ClassificationError(msg)

    def _check_users(self, log_entries):
        authors = self._get_authors(log_entries)
        unknown = authors - set(self._user_map.iterkeys())
        if unknown:
            self._info("Unknown authors:\n")
            ul = list(unknown)
            ul.sort()
            for author in ul:
                self._info(author)
            sys.exit(1)

    def _classify_changeset(self, log_entry):
        # ordinary commit:
        # A {base}/{branch}/a
        # D {base}/{branch}/b
        # M {base}/{branch}/c
        # A {base}/{branch}/d (copy from {base}/{branch}/e:4711)
        #
        # new/renamed/removed branches:
        # A {base}/{branch} (copy from {base}/{branch}:4711)
        # D {base}/{branch}
        #
        # new/renamed/removed tags:
        # A {base}/tags/x (copy from {base}/{branch}:4711)
        # D {base}/tags/x
        #
        # new branch manufactured by cvs2svn:
        # D {base}/branches/x/foo/fie
        # D {base}/branches/x/foo/fum
        # A {base}/branches/x (copy from {base}/{branch}:4711)
        #
        # new tag manufactured by cvs2svn:
        # D {base}/tags/x/foo/fie
        # D {base}/tags/x/foo/fum
        # A {base}/tags/x (copy from {base}/{branch}:4711)

        path_nodes = log_entry.findall("paths/path")
        paths = [self._path_without_base(x.text) for x in path_nodes]
        kinds = set()
        for (path_node, path) in zip(path_nodes, paths):
            if self._tag_path_re.match(path):
                kinds.add("tag")
            elif self._branch_path_re.match(path):
                kinds.add("branch")
            elif self._ordinary_path_re.match(path):
                kinds.add("ordinary")
            elif path.startswith("/tags/") and \
                     path_node.attrib["action"] == "M":
                self._debug(" - modified tag detected (ignoring)")
            elif self._no_structure:
                kinds.add("ordinary")
            else:
                kinds.add("unknown")

        # Handle strange changesets manufactured by cvs2svn.
        logmsg = self._get_msg(log_entry)
        prefix = "This commit was manufactured by cvs2svn to create "
        if logmsg.startswith(prefix + "tag"):
            kinds.clear()
            kinds.add("cvs2svn_tag")
        elif logmsg.startswith(prefix + "branch"):
            kinds.clear()
            kinds.add("cvs2svn_branch")

        if len(kinds) == 0:
            kinds.add("empty")
        if kinds == set(["branch", "ordinary"]) and \
               self._branch_path_re.match(os.path.commonprefix(paths)):
            # Ordinary commit touching the top directory of the branch.
            kinds.remove("branch")
        if "unknown" in kinds:
            if "/trunk" in paths:
                kinds = set(["initial"])
        self._debug(
            " - changeset %s kind: %s" % (
                log_entry.attrib["revision"], ", ".join(kinds)))
        return kinds

    def _commit(self, log_entry, rev, branch, merges_to_add):
        if not branch in self._branch_map:
            raise BranchConfusionError(
                "Commit in revision %d is to unknown branch %s" % (
                    rev, branch))
        hg_parent = self._find_hg_rev_on_branch(branch, rev)
        next_rev = len(self._hg_repo.changelog)
        if next_rev in merges_to_add:
            other_hg_parent = merges_to_add[next_rev]
        else:
            other_hg_parent = None
        self._debug(
            " - hg rev: %s (parents: %s, %s)" % (
                next_rev, hg_parent, other_hg_parent))
        mercurial.commands.debugrebuildstate(
            self._hg_ui,
            self._hg_repo,
            self._hg_repo.changectx(hg_parent).node())
        mercurial.commands.debugsetparents(
            self._hg_ui, self._hg_repo, hg_parent, other_hg_parent)
        if self._use_named_branches:
            if branch == "/trunk":
                label = "trunk"
            else:
                label = branch[10:] # Remove /branches/ to keep branch name.
            mercurial.commands.branch(self._hg_ui, self._hg_repo, label)
        if self._no_structure:
            copy_prefix = self._svn_base
        else:
            copy_prefix = "%s%s" % (self._svn_base, branch)
        for path in log_entry.findall("paths/path"):
            if path.attrib["action"] == "A" and "copyfrom-path" in path.attrib:
                copy_from = path.attrib["copyfrom-path"]
                copy_to = path.text
                if copy_to.startswith(copy_prefix) and \
                       copy_from.startswith(copy_prefix):
                    cf = copy_from[len(copy_prefix) + 1:]
                    ct = copy_to[len(copy_prefix) + 1:]
                    mercurial.commands.copy(
                        self._hg_ui, self._hg_repo, cf, ct, after=True)
                else:
                    self._warning(
                        "Don't know how to handle copy from %s"
                        " to %s (ignoring)" % (copy_from, copy_to))
        mercurial.cmdutil.addremove(
            self._hg_repo,
            opts={"quiet": True, "exclude": [".svn/**", "**/.svn/**"]})
        self._hg_repo.commit(
            text=self._get_msg(log_entry).encode("utf-8"),
            user=self._get_user(log_entry).encode("utf-8"),
            date=self._get_date(log_entry),
            force=False) # Ignore empty commits.
        hg_rev = len(self._hg_repo.changelog) - 1
        if hg_rev == next_rev:
            self._branch_map[branch].append((rev, hg_rev))
        else:
            # Empty commit. Ignored.
            pass

    def _debug(self, msg):
        self._log_fp.write("%s\n" % msg)
        if self._debugflag:
            print msg

    def _find_hg_rev_on_branch(self, svn_branch, svn_rev):
        for (srev, hrev) in reversed(self._branch_map[svn_branch]):
            if srev <= svn_rev:
                return hrev
        assert False, "%s not found on %s" % (svn_rev, svn_branch)

    def _get_authors(self, log_entries):
        authors = set()
        for log_entry in log_entries:
            author = log_entry.findtext("author")
            if author is None:
                author = "unknown"
            if not author in authors:
                authors.add(author)
        return authors

    def _get_branch_of_commit(self, log_entry):
        if self._no_structure:
            return '/trunk'
        first_path = self._path_without_base(log_entry.findtext("paths/path"))
        m = self._branch_re.match(first_path)
        return m.group(1)

    def _get_date(self, log_entry):
        m = re.match("(.*)T(.*)\..*Z", log_entry.findtext("date"))
        return "%s %s UTC" % (m.group(1), m.group(2))

    def _get_log_entries(self, revrange):
        (_, log_file) = tempfile.mkstemp()
        self._svn_to_file(
            log_file,
            "log", "-r", revrange, "-v", "--xml", self._svn_url)
        et = ElementTree()
        et.parse(log_file)
        os.remove(log_file)
        log_entries = et.findall("logentry")
        log_entries.sort(key=lambda x: self._get_rev(x))
        return log_entries

    def _get_msg(self, log_entry):
        msg = log_entry.findtext("msg").strip()
        if msg:
            return msg
        else:
            return "[empty log message]"

    def _get_rev(self, log_entry):
        return int(log_entry.attrib["revision"])

    def _get_svn_root(self):
        m = re.match("file://(.*)", self._svn_url)
        if m:
            path = m.group(1)
            while not os.path.exists(path):
                (path, _) = os.path.split(path)
            return "file://%s" % path
        else:
            (_, log_file) = tempfile.mkstemp()
            self._svn_to_file(log_file, "--xml", "info", self._svn_url)
            et = ElementTree()
            et.parse(log_file)
            os.remove(log_file)
            return et.findtext("entry/repository/root")

    def _get_user(self, log_entry):
        author = log_entry.findtext("author")
        if author in self._user_map:
            return self._user_map[author]
        else:
            return "unknown"

    def _handle_branch_changeset(
            self, log_entry, rev, revrange, merges_to_add):
        for path_node in log_entry.findall("paths/path"):
            action = path_node.attrib["action"]
            if action == "A" and "copyfrom-path" in path_node.attrib:
                new_branch = self._path_without_base(path_node.text)
                from_branch = self._path_without_base(
                    path_node.attrib["copyfrom-path"])
                self._info(
                    " - new branch %s (from %s)" % (new_branch, from_branch))
                if from_branch not in self._branch_map:
                    raise BranchConfusionError(
                        "Branch unknown: %s" % from_branch)
                from_svn_rev = int(path_node.attrib["copyfrom-rev"])
                from_hg_rev = self._find_hg_rev_on_branch(
                    from_branch, from_svn_rev)
                assert new_branch not in self._branch_map
                self._branch_map[new_branch] = [(from_svn_rev, from_hg_rev)]
            elif action == "D":
                pass
            else:
                self._warning(
                    "Don't know how to handle action %s on %r in changeset"
                    " %d" % (action, path_node.text, rev))

    def _clean_up_cvs2svn_log_entry(self, log_entry):
        paths_node = log_entry.find("paths")
        nodes_to_remove = set()
        for node in paths_node.findall("path"):
            if not (node.attrib["action"] == "A" and
                    (self._branch_path_re.match(node.text) or
                     self._tag_path_re.match(node.text))):
                paths_node.remove(node)

    def _handle_cvs2svn_branch_changeset(
            self, log_entry, rev, revrange, merges_to_add):
        self._clean_up_cvs2svn_log_entry(log_entry)
        self._handle_branch_changeset(log_entry, rev, revrange, merges_to_add)

    def _handle_cvs2svn_tag_changeset(
            self, log_entry, rev, revrange, merges_to_add):
        self._clean_up_cvs2svn_log_entry(log_entry)
        self._handle_tag_changeset(log_entry, rev, revrange, merges_to_add)

    def _handle_initial_changeset(
            self, log_entry, rev, revrange, merges_to_add):
        self._info(" - initial changeset (ignoring)")

    def _handle_empty_changeset(
            self, log_entry, rev, revrange, merges_to_add):
        self._info(" - empty changeset (ignoring)")

    def _handle_ordinary_changeset(
            self, log_entry, rev, revrange, merges_to_add):
        branch = self._get_branch_of_commit(log_entry)
        self._info(" - branch: %s" % branch)
        self._print_log(log_entry)
        if self._first:
            self._first = False
            self._branch_map["/trunk"] = [(rev, -1)]
            self._info(" - checking out from svn")
            (startrev, stoprev) = revrange.split(":")
            if self._no_structure:
                url = "%s@%s" % (self._svn_url, stoprev)
            else:
                url = "%s/trunk@%s" % (self._svn_url, stoprev)
            self._svn("checkout", "-r", str(rev), url, self._hg_dest)
            os.chdir(self._hg_dest)
            self._info(" - committing to hg")
            mercurial.commands.init(self._hg_ui)
            self._hg_repo = mercurial.hg.repository(self._hg_ui, ".")
        else:
            self._info(" - updating from svn")
            if self._no_structure:
            	url = self._svn_url
            else:
            	url = "%s%s" % (self._svn_url, branch)
            self._svn("switch", "-r", str(rev), url)
            self._info(" - committing to hg")
        self._commit(log_entry, rev, branch, merges_to_add)

    def _handle_tag_changeset(self, log_entry, rev, revrange, merges_to_add):
        for path_node in log_entry.findall("paths/path"):
            action = path_node.attrib["action"]
            tagpath = self._path_without_base(path_node.text)
            tagname = self._tag_path_re.match(tagpath).group(1)
            if action == "A":
                from_branch = self._path_without_base(
                    path_node.attrib["copyfrom-path"])
                m = self._tag_path_re.match(from_branch)
                if m:
                    # Renamed tag.
                    old_tagname = m.group(1)
                    tagnode = self._hg_repo.tags()[old_tagname]
                else:
                    from_svn_rev = int(path_node.attrib["copyfrom-rev"])
                    from_hg_rev = self._find_hg_rev_on_branch(
                        from_branch, from_svn_rev)
                    from_hg_node = self._hg_repo.changectx(from_hg_rev).node()
                    tagnode = from_hg_node
                latest_trunk_hg_rev = \
                    self._find_hg_rev_on_branch("/trunk", rev)
                mercurial.commands.debugsetparents(
                    self._hg_ui, self._hg_repo, latest_trunk_hg_rev)
                self._info(" - new tag: %s" % tagname)
            elif action == "D":
                self._info(" - removed tag: %s" % tagname)
                tagnode = mercurial.node.nullid
            else:
                self._warning(
                    "Don't know how to handle action %s for path"
                    " %r in changeset %d" % (action, path, rev))
                return
            self._hg_repo.tag(
                tagname,
                tagnode,
                self._get_msg(log_entry).encode("utf-8"),
                False,
                self._get_user(log_entry).encode("utf-8"),
                self._get_date(log_entry))
            hg_rev = len(self._hg_repo.changelog) - 1
            self._branch_map["/trunk"].append((rev, hg_rev))

    def _info(self, msg):
        self._log_fp.write("%s\n" % msg)
        print msg

    def _path_without_base(self, path):
        return path[len(self._svn_base):]

    def _print_log(self, log_entry):
        log_lines = self._get_msg(log_entry).split("\n", 1)
        if len(log_lines) > 1 or len(log_lines[0]) > 70:
            m = log_lines[0][:70] + "[...]"
        else:
            m = log_lines[0]
        self._info(" - author: %s" % self._get_user(log_entry))
        encoded_log = m.encode(locale.getpreferredencoding(), "replace")
        self._info(" - log: %s" % encoded_log)

    def _read_user_file(self, user_file):
        fp = open(user_file)
        for line in fp:
            try:
                (svn_user, hg_user) = line.split("=", 1)
            except ValueError:
                pass
            else:
                self._user_map[svn_user.strip()] = hg_user.strip()

    def _remove_dot_svn_dirs(self):
        for (dirpath, dirnames, filenames) in os.walk("."):
            if ".svn" in dirnames:
                shutil.rmtree(os.path.join(dirpath, ".svn"))

    def _run(self, cmdline, stdout_file=None):
        self._debug("Run: %r" % cmdline)
        if stdout_file is not None:
            stdout_fp = open(stdout_file, "w")
        else:
            stdout_fp = None
        p = subprocess.Popen(cmdline, stdout=stdout_fp)
        p.wait()
        if p.returncode != 0:
            raise Error("failed running %r" % cmdline)

    def _svn(self, *args):
        cmdline = ["svn"]
        if not self._debugflag:
            cmdline.append("-q")
        cmdline.extend(args)
        return self._run(cmdline)

    def _svn_to_file(self, stdout_file, *args):
        return self._run(["svn"] + list(args), stdout_file)

    def _warning(self, msg):
        self._info(" - *** Warning: %s" % msg)
        self._warnings += 1

def main():
    option_parser = optparse.OptionParser(
        version="0.2+",
        usage="yasvn2hg [options] <svn-repo-url> <hg-dest-dir>",
        description="yasvn2hg is a simple hack that converts a Subversion"
        " repository to a Mercurial repository. Some debug log information is"
        " written to <hg-dest-dir>.log.")
    option_parser.add_option(
        "-g", "--generate-user-file-template",
        metavar="FILE",
        dest="user_file_template",
        help="generate a mapping of SVN user names to HG user names and write"
        " it to FILE")
    option_parser.add_option(
        "-m", "--merges-file",
        metavar="FILE",
        help="read extra merge arrows from FILE")
    option_parser.add_option(
        "-r", "--revision-range",
        metavar="REVRANGE",
        help="SVN revision range to convert (default: 1:HEAD)")
    option_parser.add_option(
        "-u", "--user-file",
        metavar="FILE",
        help="file mapping SVN user names to HG user names")
    option_parser.add_option(
        "--debug",
        help="also print debug output to standard output",
        action="store_true")
    option_parser.add_option(
        "--no-structure",
        help="don't look for tags/branches/trunk",
        action="store_true")
    option_parser.add_option(
        "--sloppy",
        help="sloppy conversion; ignore unclassifiable changesets",
        action="store_true")
    option_parser.add_option(
        "--use-named-branches",
        help="use named branches",
        action="store_true")
    option_parser.set_defaults(
        revision_range="1:HEAD",
        debug=False)
    (options, args) = option_parser.parse_args()

    if not options.user_file and not options.user_file_template:
        option_parser.error(
            "must give -u/--user-file or -g/--generate-user-file-template")

    if len(args) != 2:
        option_parser.error("bad arguments; try the -h flag")

    svn_url = args[0]
    hg_dest = args[1]

    try:
        converter = \
          Svn2HgConverter(
            svn_url, hg_dest, options.debug, options.use_named_branches)
        if options.user_file_template:
            converter.generate_user_file_template(options.user_file_template)
        else:
            merges_to_add = {}
            if options.merges_file:
                for line in open(options.merges_file):
                    (child, parent) = line.split()
                    merges_to_add[int(child)] = int(parent)
            converter.convert(
                options.user_file, options.revision_range, merges_to_add,
                options.sloppy, options.no_structure)
    except ClassificationError, e:
        sys.stderr.write("Error: %s\n" % e)
        sys.stderr.write(
            "\nYou might try to rerun with the --sloppy option to ignore"
            " unclassifiable\nchangesets.\n")
        sys.exit(1)
    except Error, e:
        sys.stderr.write("Error: %s\n" % e)
        sys.exit(1)

main()


Zerion Mini Shell 1.0