]> git.ipfire.org Git - thirdparty/git.git/blob - contrib/remote-helpers/git-remote-bzr
6a7f836ea6a41a59a66f223c3f2aaaf603943e76
[thirdparty/git.git] / contrib / remote-helpers / git-remote-bzr
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2012 Felipe Contreras
4 #
5
6 #
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
10 #
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
15 #
16
17 import sys
18
19 import bzrlib
20 if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
22
23 import bzrlib.plugin
24 bzrlib.plugin.load_plugins()
25
26 import bzrlib.generate_ids
27 import bzrlib.transport
28 import bzrlib.errors
29 import bzrlib.ui
30
31 import sys
32 import os
33 import json
34 import re
35 import StringIO
36 import atexit, shutil, hashlib, urlparse, subprocess
37
38 NAME_RE = re.compile('^([^<>]+)')
39 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
41
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
45
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
48
49 def gittz(tz):
50 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
51
52 class Marks:
53
54 def __init__(self, path):
55 self.path = path
56 self.tips = {}
57 self.marks = {}
58 self.rev_marks = {}
59 self.last_mark = 0
60 self.load()
61
62 def load(self):
63 if not os.path.exists(self.path):
64 return
65
66 tmp = json.load(open(self.path))
67 self.tips = tmp['tips']
68 self.marks = tmp['marks']
69 self.last_mark = tmp['last-mark']
70
71 for rev, mark in self.marks.iteritems():
72 self.rev_marks[mark] = rev
73
74 def dict(self):
75 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
76
77 def store(self):
78 json.dump(self.dict(), open(self.path, 'w'))
79
80 def __str__(self):
81 return str(self.dict())
82
83 def from_rev(self, rev):
84 return self.marks[rev]
85
86 def to_rev(self, mark):
87 return self.rev_marks[mark]
88
89 def next_mark(self):
90 self.last_mark += 1
91 return self.last_mark
92
93 def get_mark(self, rev):
94 self.last_mark += 1
95 self.marks[rev] = self.last_mark
96 return self.last_mark
97
98 def is_marked(self, rev):
99 return rev in self.marks
100
101 def new_mark(self, rev, mark):
102 self.marks[rev] = mark
103 self.rev_marks[mark] = rev
104 self.last_mark = mark
105
106 def get_tip(self, branch):
107 return self.tips.get(branch, None)
108
109 def set_tip(self, branch, tip):
110 self.tips[branch] = tip
111
112 class Parser:
113
114 def __init__(self, repo):
115 self.repo = repo
116 self.line = self.get_line()
117
118 def get_line(self):
119 return sys.stdin.readline().strip()
120
121 def __getitem__(self, i):
122 return self.line.split()[i]
123
124 def check(self, word):
125 return self.line.startswith(word)
126
127 def each_block(self, separator):
128 while self.line != separator:
129 yield self.line
130 self.line = self.get_line()
131
132 def __iter__(self):
133 return self.each_block('')
134
135 def next(self):
136 self.line = self.get_line()
137 if self.line == 'done':
138 self.line = None
139
140 def get_mark(self):
141 i = self.line.index(':') + 1
142 return int(self.line[i:])
143
144 def get_data(self):
145 if not self.check('data'):
146 return None
147 i = self.line.index(' ') + 1
148 size = int(self.line[i:])
149 return sys.stdin.read(size)
150
151 def get_author(self):
152 m = RAW_AUTHOR_RE.match(self.line)
153 if not m:
154 return None
155 _, name, email, date, tz = m.groups()
156 committer = '%s <%s>' % (name, email)
157 tz = int(tz)
158 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
159 return (committer, int(date), tz)
160
161 def rev_to_mark(rev):
162 global marks
163 return marks.from_rev(rev)
164
165 def mark_to_rev(mark):
166 global marks
167 return marks.to_rev(mark)
168
169 def fixup_user(user):
170 name = mail = None
171 user = user.replace('"', '')
172 m = AUTHOR_RE.match(user)
173 if m:
174 name = m.group(1)
175 mail = m.group(2).strip()
176 else:
177 m = NAME_RE.match(user)
178 if m:
179 name = m.group(1).strip()
180
181 return '%s <%s>' % (name, mail)
182
183 def get_filechanges(cur, prev):
184 modified = {}
185 removed = {}
186
187 changes = cur.changes_from(prev)
188
189 def u(s):
190 return s.encode('utf-8')
191
192 for path, fid, kind in changes.added:
193 modified[u(path)] = fid
194 for path, fid, kind in changes.removed:
195 removed[u(path)] = None
196 for path, fid, kind, mod, _ in changes.modified:
197 modified[u(path)] = fid
198 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
199 removed[u(oldpath)] = None
200 if kind == 'directory':
201 lst = cur.list_files(from_dir=newpath, recursive=True)
202 for path, file_class, kind, fid, entry in lst:
203 if kind != 'directory':
204 modified[u(newpath + '/' + path)] = fid
205 else:
206 modified[u(newpath)] = fid
207
208 return modified, removed
209
210 def export_files(tree, files):
211 global marks, filenodes
212
213 final = []
214 for path, fid in files.iteritems():
215 kind = tree.kind(fid)
216
217 h = tree.get_file_sha1(fid)
218
219 if kind == 'symlink':
220 d = tree.get_symlink_target(fid)
221 mode = '120000'
222 elif kind == 'file':
223
224 if tree.is_executable(fid):
225 mode = '100755'
226 else:
227 mode = '100644'
228
229 # is the blob already exported?
230 if h in filenodes:
231 mark = filenodes[h]
232 final.append((mode, mark, path))
233 continue
234
235 d = tree.get_file_text(fid)
236 elif kind == 'directory':
237 continue
238 else:
239 die("Unhandled kind '%s' for path '%s'" % (kind, path))
240
241 mark = marks.next_mark()
242 filenodes[h] = mark
243
244 print "blob"
245 print "mark :%u" % mark
246 print "data %d" % len(d)
247 print d
248
249 final.append((mode, mark, path))
250
251 return final
252
253 def export_branch(branch, name):
254 global prefix
255
256 ref = '%s/heads/%s' % (prefix, name)
257 tip = marks.get_tip(name)
258
259 repo = branch.repository
260 repo.lock_read()
261 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
262 count = 0
263
264 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
265
266 for revid in revs:
267
268 rev = repo.get_revision(revid)
269
270 parents = rev.parent_ids
271 time = rev.timestamp
272 tz = rev.timezone
273 committer = rev.committer.encode('utf-8')
274 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
275 authors = rev.get_apparent_authors()
276 if authors:
277 author = authors[0].encode('utf-8')
278 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
279 else:
280 author = committer
281 msg = rev.message.encode('utf-8')
282
283 msg += '\n'
284
285 if len(parents) == 0:
286 parent = bzrlib.revision.NULL_REVISION
287 else:
288 parent = parents[0]
289
290 cur_tree = repo.revision_tree(revid)
291 prev = repo.revision_tree(parent)
292 modified, removed = get_filechanges(cur_tree, prev)
293
294 modified_final = export_files(cur_tree, modified)
295
296 if len(parents) == 0:
297 print 'reset %s' % ref
298
299 print "commit %s" % ref
300 print "mark :%d" % (marks.get_mark(revid))
301 print "author %s" % (author)
302 print "committer %s" % (committer)
303 print "data %d" % (len(msg))
304 print msg
305
306 for i, p in enumerate(parents):
307 try:
308 m = rev_to_mark(p)
309 except KeyError:
310 # ghost?
311 continue
312 if i == 0:
313 print "from :%s" % m
314 else:
315 print "merge :%s" % m
316
317 for f in removed:
318 print "D %s" % (f,)
319 for f in modified_final:
320 print "M %s :%u %s" % f
321 print
322
323 count += 1
324 if (count % 100 == 0):
325 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
326 print "#############################################################"
327
328 repo.unlock()
329
330 revid = branch.last_revision()
331
332 # make sure the ref is updated
333 print "reset %s" % ref
334 print "from :%u" % rev_to_mark(revid)
335 print
336
337 marks.set_tip(name, revid)
338
339 def export_tag(repo, name):
340 global tags, prefix
341
342 ref = '%s/tags/%s' % (prefix, name)
343 print "reset %s" % ref
344 print "from :%u" % rev_to_mark(tags[name])
345 print
346
347 def do_import(parser):
348 global dirname
349
350 branch = parser.repo
351 path = os.path.join(dirname, 'marks-git')
352
353 print "feature done"
354 if os.path.exists(path):
355 print "feature import-marks=%s" % path
356 print "feature export-marks=%s" % path
357 sys.stdout.flush()
358
359 while parser.check('import'):
360 ref = parser[1]
361 if ref.startswith('refs/heads/'):
362 name = ref[len('refs/heads/'):]
363 export_branch(branch, name)
364 if ref.startswith('refs/tags/'):
365 name = ref[len('refs/tags/'):]
366 export_tag(branch, name)
367 parser.next()
368
369 print 'done'
370
371 sys.stdout.flush()
372
373 def parse_blob(parser):
374 global blob_marks
375
376 parser.next()
377 mark = parser.get_mark()
378 parser.next()
379 data = parser.get_data()
380 blob_marks[mark] = data
381 parser.next()
382
383 class CustomTree():
384
385 def __init__(self, repo, revid, parents, files):
386 global files_cache
387
388 self.updates = {}
389
390 def copy_tree(revid):
391 files = files_cache[revid] = {}
392 repo.lock_read()
393 tree = repo.repository.revision_tree(revid)
394 try:
395 for path, entry in tree.iter_entries_by_dir():
396 files[path] = [entry.file_id, None]
397 finally:
398 repo.unlock()
399 return files
400
401 if len(parents) == 0:
402 self.base_id = bzrlib.revision.NULL_REVISION
403 self.base_files = {}
404 else:
405 self.base_id = parents[0]
406 self.base_files = files_cache.get(self.base_id, None)
407 if not self.base_files:
408 self.base_files = copy_tree(self.base_id)
409
410 self.files = files_cache[revid] = self.base_files.copy()
411 self.rev_files = {}
412
413 for path, data in self.files.iteritems():
414 fid, mark = data
415 self.rev_files[fid] = [path, mark]
416
417 for path, f in files.iteritems():
418 fid, mark = self.files.get(path, [None, None])
419 if not fid:
420 fid = bzrlib.generate_ids.gen_file_id(path)
421 f['path'] = path
422 self.rev_files[fid] = [path, mark]
423 self.updates[fid] = f
424
425 def last_revision(self):
426 return self.base_id
427
428 def iter_changes(self):
429 changes = []
430
431 def get_parent(dirname, basename):
432 parent_fid, mark = self.base_files.get(dirname, [None, None])
433 if parent_fid:
434 return parent_fid
435 parent_fid, mark = self.files.get(dirname, [None, None])
436 if parent_fid:
437 return parent_fid
438 if basename == '':
439 return None
440 fid = bzrlib.generate_ids.gen_file_id(path)
441 add_entry(fid, dirname, 'directory')
442 return fid
443
444 def add_entry(fid, path, kind, mode = None):
445 dirname, basename = os.path.split(path)
446 parent_fid = get_parent(dirname, basename)
447
448 executable = False
449 if mode == '100755':
450 executable = True
451 elif mode == '120000':
452 kind = 'symlink'
453
454 change = (fid,
455 (None, path),
456 True,
457 (False, True),
458 (None, parent_fid),
459 (None, basename),
460 (None, kind),
461 (None, executable))
462 self.files[path] = [change[0], None]
463 changes.append(change)
464
465 def update_entry(fid, path, kind, mode = None):
466 dirname, basename = os.path.split(path)
467 parent_fid = get_parent(dirname, basename)
468
469 executable = False
470 if mode == '100755':
471 executable = True
472 elif mode == '120000':
473 kind = 'symlink'
474
475 change = (fid,
476 (path, path),
477 True,
478 (True, True),
479 (None, parent_fid),
480 (None, basename),
481 (None, kind),
482 (None, executable))
483 self.files[path] = [change[0], None]
484 changes.append(change)
485
486 def remove_entry(fid, path, kind):
487 dirname, basename = os.path.split(path)
488 parent_fid = get_parent(dirname, basename)
489 change = (fid,
490 (path, None),
491 True,
492 (True, False),
493 (parent_fid, None),
494 (None, None),
495 (None, None),
496 (None, None))
497 del self.files[path]
498 changes.append(change)
499
500 for fid, f in self.updates.iteritems():
501 path = f['path']
502
503 if 'deleted' in f:
504 remove_entry(fid, path, 'file')
505 continue
506
507 if path in self.base_files:
508 update_entry(fid, path, 'file', f['mode'])
509 else:
510 add_entry(fid, path, 'file', f['mode'])
511
512 self.files[path][1] = f['mark']
513 self.rev_files[fid][1] = f['mark']
514
515 return changes
516
517 def get_file_with_stat(self, file_id, path=None):
518 path, mark = self.rev_files[file_id]
519 return (StringIO.StringIO(blob_marks[mark]), None)
520
521 def get_symlink_target(self, file_id):
522 path, mark = self.rev_files[file_id]
523 return blob_marks[mark]
524
525 def id2path(self, file_id):
526 path, mark = self.rev_files[file_id]
527 return path
528
529 def c_style_unescape(string):
530 if string[0] == string[-1] == '"':
531 return string.decode('string-escape')[1:-1]
532 return string
533
534 def parse_commit(parser):
535 global marks, blob_marks, parsed_refs
536 global mode
537
538 parents = []
539
540 ref = parser[1]
541 parser.next()
542
543 if ref != 'refs/heads/master':
544 die("bzr doesn't support multiple branches; use 'master'")
545
546 commit_mark = parser.get_mark()
547 parser.next()
548 author = parser.get_author()
549 parser.next()
550 committer = parser.get_author()
551 parser.next()
552 data = parser.get_data()
553 parser.next()
554 if parser.check('from'):
555 parents.append(parser.get_mark())
556 parser.next()
557 while parser.check('merge'):
558 parents.append(parser.get_mark())
559 parser.next()
560
561 # fast-export adds an extra newline
562 if data[-1] == '\n':
563 data = data[:-1]
564
565 files = {}
566
567 for line in parser:
568 if parser.check('M'):
569 t, m, mark_ref, path = line.split(' ', 3)
570 mark = int(mark_ref[1:])
571 f = { 'mode' : m, 'mark' : mark }
572 elif parser.check('D'):
573 t, path = line.split(' ')
574 f = { 'deleted' : True }
575 else:
576 die('Unknown file command: %s' % line)
577 path = c_style_unescape(path).decode('utf-8')
578 files[path] = f
579
580 repo = parser.repo
581
582 committer, date, tz = committer
583 parents = [str(mark_to_rev(p)) for p in parents]
584 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
585 props = {}
586 props['branch-nick'] = repo.nick
587
588 mtree = CustomTree(repo, revid, parents, files)
589 changes = mtree.iter_changes()
590
591 repo.lock_write()
592 try:
593 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid)
594 try:
595 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
596 builder.finish_inventory()
597 builder.commit(data.decode('utf-8', 'replace'))
598 except Exception, e:
599 builder.abort()
600 raise
601 finally:
602 repo.unlock()
603
604 parsed_refs[ref] = revid
605 marks.new_mark(revid, commit_mark)
606
607 def parse_reset(parser):
608 global parsed_refs
609
610 ref = parser[1]
611 parser.next()
612
613 if ref != 'refs/heads/master':
614 die("bzr doesn't support multiple branches; use 'master'")
615
616 # ugh
617 if parser.check('commit'):
618 parse_commit(parser)
619 return
620 if not parser.check('from'):
621 return
622 from_mark = parser.get_mark()
623 parser.next()
624
625 parsed_refs[ref] = mark_to_rev(from_mark)
626
627 def do_export(parser):
628 global parsed_refs, dirname, peer
629
630 parser.next()
631
632 for line in parser.each_block('done'):
633 if parser.check('blob'):
634 parse_blob(parser)
635 elif parser.check('commit'):
636 parse_commit(parser)
637 elif parser.check('reset'):
638 parse_reset(parser)
639 elif parser.check('tag'):
640 pass
641 elif parser.check('feature'):
642 pass
643 else:
644 die('unhandled export command: %s' % line)
645
646 repo = parser.repo
647
648 for ref, revid in parsed_refs.iteritems():
649 if ref == 'refs/heads/master':
650 repo.generate_revision_history(revid, marks.get_tip('master'))
651 if peer:
652 try:
653 repo.push(peer, stop_revision=revid)
654 except bzrlib.errors.DivergedBranches:
655 print "error %s non-fast forward" % ref
656 continue
657
658 try:
659 wt = repo.bzrdir.open_workingtree()
660 wt.update()
661 except bzrlib.errors.NoWorkingTree:
662 pass
663
664 print "ok %s" % ref
665
666 print
667
668 def do_capabilities(parser):
669 global dirname
670
671 print "import"
672 print "export"
673 print "refspec refs/heads/*:%s/heads/*" % prefix
674 print "refspec refs/tags/*:%s/tags/*" % prefix
675
676 path = os.path.join(dirname, 'marks-git')
677
678 if os.path.exists(path):
679 print "*import-marks %s" % path
680 print "*export-marks %s" % path
681
682 print
683
684 def ref_is_valid(name):
685 return not True in [c in name for c in '~^: \\']
686
687 def do_list(parser):
688 global tags
689 print "? refs/heads/%s" % 'master'
690
691 branch = parser.repo
692 branch.lock_read()
693 for tag, revid in branch.tags.get_tag_dict().items():
694 try:
695 branch.revision_id_to_dotted_revno(revid)
696 except bzrlib.errors.NoSuchRevision:
697 continue
698 if not ref_is_valid(tag):
699 continue
700 print "? refs/tags/%s" % tag
701 tags[tag] = revid
702 branch.unlock()
703 print "@refs/heads/%s HEAD" % 'master'
704 print
705
706 def get_repo(url, alias):
707 global dirname, peer
708
709 origin = bzrlib.bzrdir.BzrDir.open(url)
710 branch = origin.open_branch()
711
712 if not isinstance(origin.transport, bzrlib.transport.local.LocalTransport):
713 clone_path = os.path.join(dirname, 'clone')
714 remote_branch = branch
715 if os.path.exists(clone_path):
716 # pull
717 d = bzrlib.bzrdir.BzrDir.open(clone_path)
718 branch = d.open_branch()
719 result = branch.pull(remote_branch, [], None, False)
720 else:
721 # clone
722 d = origin.sprout(clone_path, None,
723 hardlink=True, create_tree_if_local=False,
724 source_branch=remote_branch)
725 branch = d.open_branch()
726 branch.bind(remote_branch)
727
728 peer = remote_branch
729 else:
730 peer = None
731
732 return branch
733
734 def fix_path(alias, orig_url):
735 url = urlparse.urlparse(orig_url, 'file')
736 if url.scheme != 'file' or os.path.isabs(url.path):
737 return
738 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
739 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
740 subprocess.call(cmd)
741
742 def main(args):
743 global marks, prefix, dirname
744 global tags, filenodes
745 global blob_marks
746 global parsed_refs
747 global files_cache
748 global is_tmp
749
750 alias = args[1]
751 url = args[2]
752
753 tags = {}
754 filenodes = {}
755 blob_marks = {}
756 parsed_refs = {}
757 files_cache = {}
758 marks = None
759
760 if alias[5:] == url:
761 is_tmp = True
762 alias = hashlib.sha1(alias).hexdigest()
763 else:
764 is_tmp = False
765
766 prefix = 'refs/bzr/%s' % alias
767 gitdir = os.environ['GIT_DIR']
768 dirname = os.path.join(gitdir, 'bzr', alias)
769
770 if not is_tmp:
771 fix_path(alias, url)
772
773 if not os.path.exists(dirname):
774 os.makedirs(dirname)
775
776 bzrlib.ui.ui_factory.be_quiet(True)
777
778 repo = get_repo(url, alias)
779
780 marks_path = os.path.join(dirname, 'marks-int')
781 marks = Marks(marks_path)
782
783 parser = Parser(repo)
784 for line in parser:
785 if parser.check('capabilities'):
786 do_capabilities(parser)
787 elif parser.check('list'):
788 do_list(parser)
789 elif parser.check('import'):
790 do_import(parser)
791 elif parser.check('export'):
792 do_export(parser)
793 else:
794 die('unhandled command: %s' % line)
795 sys.stdout.flush()
796
797 def bye():
798 if not marks:
799 return
800 if not is_tmp:
801 marks.store()
802 else:
803 shutil.rmtree(dirname)
804
805 atexit.register(bye)
806 sys.exit(main(sys.argv))