]> git.ipfire.org Git - thirdparty/git.git/blob - contrib/remote-helpers/git-remote-bzr
fdead31a6323916b640ccda5555560415f830e67
[thirdparty/git.git] / contrib / remote-helpers / git-remote-bzr
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2012 Felipe Contreras
4 #
5
6 #
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
10 #
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
15 #
16
17 import sys
18
19 import bzrlib
20 if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
22
23 import bzrlib.plugin
24 bzrlib.plugin.load_plugins()
25
26 import bzrlib.generate_ids
27 import bzrlib.transport
28 import bzrlib.errors
29 import bzrlib.ui
30
31 import sys
32 import os
33 import json
34 import re
35 import StringIO
36 import atexit, shutil, hashlib, urlparse, subprocess
37
38 NAME_RE = re.compile('^([^<>]+)')
39 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
41
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
45
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
48
49 def gittz(tz):
50 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
51
52 class Marks:
53
54 def __init__(self, path):
55 self.path = path
56 self.tips = {}
57 self.marks = {}
58 self.rev_marks = {}
59 self.last_mark = 0
60 self.load()
61
62 def load(self):
63 if not os.path.exists(self.path):
64 return
65
66 tmp = json.load(open(self.path))
67 self.tips = tmp['tips']
68 self.marks = tmp['marks']
69 self.last_mark = tmp['last-mark']
70
71 for rev, mark in self.marks.iteritems():
72 self.rev_marks[mark] = rev
73
74 def dict(self):
75 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
76
77 def store(self):
78 json.dump(self.dict(), open(self.path, 'w'))
79
80 def __str__(self):
81 return str(self.dict())
82
83 def from_rev(self, rev):
84 return self.marks[rev]
85
86 def to_rev(self, mark):
87 return self.rev_marks[mark]
88
89 def next_mark(self):
90 self.last_mark += 1
91 return self.last_mark
92
93 def get_mark(self, rev):
94 self.last_mark += 1
95 self.marks[rev] = self.last_mark
96 return self.last_mark
97
98 def is_marked(self, rev):
99 return rev in self.marks
100
101 def new_mark(self, rev, mark):
102 self.marks[rev] = mark
103 self.rev_marks[mark] = rev
104 self.last_mark = mark
105
106 def get_tip(self, branch):
107 return self.tips.get(branch, None)
108
109 def set_tip(self, branch, tip):
110 self.tips[branch] = tip
111
112 class Parser:
113
114 def __init__(self, repo):
115 self.repo = repo
116 self.line = self.get_line()
117
118 def get_line(self):
119 return sys.stdin.readline().strip()
120
121 def __getitem__(self, i):
122 return self.line.split()[i]
123
124 def check(self, word):
125 return self.line.startswith(word)
126
127 def each_block(self, separator):
128 while self.line != separator:
129 yield self.line
130 self.line = self.get_line()
131
132 def __iter__(self):
133 return self.each_block('')
134
135 def next(self):
136 self.line = self.get_line()
137 if self.line == 'done':
138 self.line = None
139
140 def get_mark(self):
141 i = self.line.index(':') + 1
142 return int(self.line[i:])
143
144 def get_data(self):
145 if not self.check('data'):
146 return None
147 i = self.line.index(' ') + 1
148 size = int(self.line[i:])
149 return sys.stdin.read(size)
150
151 def get_author(self):
152 m = RAW_AUTHOR_RE.match(self.line)
153 if not m:
154 return None
155 _, name, email, date, tz = m.groups()
156 committer = '%s <%s>' % (name, email)
157 tz = int(tz)
158 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
159 return (committer, int(date), tz)
160
161 def rev_to_mark(rev):
162 global marks
163 return marks.from_rev(rev)
164
165 def mark_to_rev(mark):
166 global marks
167 return marks.to_rev(mark)
168
169 def fixup_user(user):
170 name = mail = None
171 user = user.replace('"', '')
172 m = AUTHOR_RE.match(user)
173 if m:
174 name = m.group(1)
175 mail = m.group(2).strip()
176 else:
177 m = NAME_RE.match(user)
178 if m:
179 name = m.group(1).strip()
180
181 return '%s <%s>' % (name, mail)
182
183 def get_filechanges(cur, prev):
184 modified = {}
185 removed = {}
186
187 changes = cur.changes_from(prev)
188
189 def u(s):
190 return s.encode('utf-8')
191
192 for path, fid, kind in changes.added:
193 modified[u(path)] = fid
194 for path, fid, kind in changes.removed:
195 removed[u(path)] = None
196 for path, fid, kind, mod, _ in changes.modified:
197 modified[u(path)] = fid
198 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
199 removed[u(oldpath)] = None
200 if kind == 'directory':
201 lst = cur.list_files(from_dir=newpath, recursive=True)
202 for path, file_class, kind, fid, entry in lst:
203 if kind != 'directory':
204 modified[u(newpath + '/' + path)] = fid
205 else:
206 modified[u(newpath)] = fid
207
208 return modified, removed
209
210 def export_files(tree, files):
211 global marks, filenodes
212
213 final = []
214 for path, fid in files.iteritems():
215 kind = tree.kind(fid)
216
217 h = tree.get_file_sha1(fid)
218
219 if kind == 'symlink':
220 d = tree.get_symlink_target(fid)
221 mode = '120000'
222 elif kind == 'file':
223
224 if tree.is_executable(fid):
225 mode = '100755'
226 else:
227 mode = '100644'
228
229 # is the blob already exported?
230 if h in filenodes:
231 mark = filenodes[h]
232 final.append((mode, mark, path))
233 continue
234
235 d = tree.get_file_text(fid)
236 elif kind == 'directory':
237 continue
238 else:
239 die("Unhandled kind '%s' for path '%s'" % (kind, path))
240
241 mark = marks.next_mark()
242 filenodes[h] = mark
243
244 print "blob"
245 print "mark :%u" % mark
246 print "data %d" % len(d)
247 print d
248
249 final.append((mode, mark, path))
250
251 return final
252
253 def export_branch(branch, name):
254 global prefix
255
256 ref = '%s/heads/%s' % (prefix, name)
257 tip = marks.get_tip(name)
258
259 repo = branch.repository
260 repo.lock_read()
261 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
262 count = 0
263
264 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
265
266 for revid in revs:
267
268 rev = repo.get_revision(revid)
269
270 parents = rev.parent_ids
271 time = rev.timestamp
272 tz = rev.timezone
273 committer = rev.committer.encode('utf-8')
274 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
275 authors = rev.get_apparent_authors()
276 if authors:
277 author = authors[0].encode('utf-8')
278 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
279 else:
280 author = committer
281 msg = rev.message.encode('utf-8')
282
283 msg += '\n'
284
285 if len(parents) == 0:
286 parent = bzrlib.revision.NULL_REVISION
287 else:
288 parent = parents[0]
289
290 cur_tree = repo.revision_tree(revid)
291 prev = repo.revision_tree(parent)
292 modified, removed = get_filechanges(cur_tree, prev)
293
294 modified_final = export_files(cur_tree, modified)
295
296 if len(parents) == 0:
297 print 'reset %s' % ref
298
299 print "commit %s" % ref
300 print "mark :%d" % (marks.get_mark(revid))
301 print "author %s" % (author)
302 print "committer %s" % (committer)
303 print "data %d" % (len(msg))
304 print msg
305
306 for i, p in enumerate(parents):
307 try:
308 m = rev_to_mark(p)
309 except KeyError:
310 # ghost?
311 continue
312 if i == 0:
313 print "from :%s" % m
314 else:
315 print "merge :%s" % m
316
317 for f in removed:
318 print "D %s" % (f,)
319 for f in modified_final:
320 print "M %s :%u %s" % f
321 print
322
323 count += 1
324 if (count % 100 == 0):
325 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
326 print "#############################################################"
327
328 repo.unlock()
329
330 revid = branch.last_revision()
331
332 # make sure the ref is updated
333 print "reset %s" % ref
334 print "from :%u" % rev_to_mark(revid)
335 print
336
337 marks.set_tip(name, revid)
338
339 def export_tag(repo, name):
340 global tags, prefix
341
342 ref = '%s/tags/%s' % (prefix, name)
343 print "reset %s" % ref
344 print "from :%u" % rev_to_mark(tags[name])
345 print
346
347 def do_import(parser):
348 global dirname
349
350 branch = parser.repo
351 path = os.path.join(dirname, 'marks-git')
352
353 print "feature done"
354 if os.path.exists(path):
355 print "feature import-marks=%s" % path
356 print "feature export-marks=%s" % path
357 print "feature force"
358 sys.stdout.flush()
359
360 while parser.check('import'):
361 ref = parser[1]
362 if ref.startswith('refs/heads/'):
363 name = ref[len('refs/heads/'):]
364 export_branch(branch, name)
365 if ref.startswith('refs/tags/'):
366 name = ref[len('refs/tags/'):]
367 export_tag(branch, name)
368 parser.next()
369
370 print 'done'
371
372 sys.stdout.flush()
373
374 def parse_blob(parser):
375 global blob_marks
376
377 parser.next()
378 mark = parser.get_mark()
379 parser.next()
380 data = parser.get_data()
381 blob_marks[mark] = data
382 parser.next()
383
384 class CustomTree():
385
386 def __init__(self, repo, revid, parents, files):
387 global files_cache
388
389 self.updates = {}
390 self.branch = repo
391
392 def copy_tree(revid):
393 files = files_cache[revid] = {}
394 repo.lock_read()
395 tree = repo.repository.revision_tree(revid)
396 try:
397 for path, entry in tree.iter_entries_by_dir():
398 files[path] = [entry.file_id, None]
399 finally:
400 repo.unlock()
401 return files
402
403 if len(parents) == 0:
404 self.base_id = bzrlib.revision.NULL_REVISION
405 self.base_files = {}
406 else:
407 self.base_id = parents[0]
408 self.base_files = files_cache.get(self.base_id, None)
409 if not self.base_files:
410 self.base_files = copy_tree(self.base_id)
411
412 self.files = files_cache[revid] = self.base_files.copy()
413 self.rev_files = {}
414
415 for path, data in self.files.iteritems():
416 fid, mark = data
417 self.rev_files[fid] = [path, mark]
418
419 for path, f in files.iteritems():
420 fid, mark = self.files.get(path, [None, None])
421 if not fid:
422 fid = bzrlib.generate_ids.gen_file_id(path)
423 f['path'] = path
424 self.rev_files[fid] = [path, mark]
425 self.updates[fid] = f
426
427 def last_revision(self):
428 return self.base_id
429
430 def iter_changes(self):
431 changes = []
432
433 def get_parent(dirname, basename):
434 parent_fid, mark = self.base_files.get(dirname, [None, None])
435 if parent_fid:
436 return parent_fid
437 parent_fid, mark = self.files.get(dirname, [None, None])
438 if parent_fid:
439 return parent_fid
440 if basename == '':
441 return None
442 fid = bzrlib.generate_ids.gen_file_id(path)
443 add_entry(fid, dirname, 'directory')
444 return fid
445
446 def add_entry(fid, path, kind, mode = None):
447 dirname, basename = os.path.split(path)
448 parent_fid = get_parent(dirname, basename)
449
450 executable = False
451 if mode == '100755':
452 executable = True
453 elif mode == '120000':
454 kind = 'symlink'
455
456 change = (fid,
457 (None, path),
458 True,
459 (False, True),
460 (None, parent_fid),
461 (None, basename),
462 (None, kind),
463 (None, executable))
464 self.files[path] = [change[0], None]
465 changes.append(change)
466
467 def update_entry(fid, path, kind, mode = None):
468 dirname, basename = os.path.split(path)
469 parent_fid = get_parent(dirname, basename)
470
471 executable = False
472 if mode == '100755':
473 executable = True
474 elif mode == '120000':
475 kind = 'symlink'
476
477 change = (fid,
478 (path, path),
479 True,
480 (True, True),
481 (None, parent_fid),
482 (None, basename),
483 (None, kind),
484 (None, executable))
485 self.files[path] = [change[0], None]
486 changes.append(change)
487
488 def remove_entry(fid, path, kind):
489 dirname, basename = os.path.split(path)
490 parent_fid = get_parent(dirname, basename)
491 change = (fid,
492 (path, None),
493 True,
494 (True, False),
495 (parent_fid, None),
496 (None, None),
497 (None, None),
498 (None, None))
499 del self.files[path]
500 changes.append(change)
501
502 for fid, f in self.updates.iteritems():
503 path = f['path']
504
505 if 'deleted' in f:
506 remove_entry(fid, path, 'file')
507 continue
508
509 if path in self.base_files:
510 update_entry(fid, path, 'file', f['mode'])
511 else:
512 add_entry(fid, path, 'file', f['mode'])
513
514 self.files[path][1] = f['mark']
515 self.rev_files[fid][1] = f['mark']
516
517 return changes
518
519 def get_content(self, file_id):
520 path, mark = self.rev_files[file_id]
521 if mark:
522 return blob_marks[mark]
523
524 # last resort
525 tree = self.branch.repository.revision_tree(self.base_id)
526 return tree.get_file_text(file_id)
527
528 def get_file_with_stat(self, file_id, path=None):
529 content = self.get_content(file_id)
530 return (StringIO.StringIO(content), None)
531
532 def get_symlink_target(self, file_id):
533 return self.get_content(file_id)
534
535 def id2path(self, file_id):
536 path, mark = self.rev_files[file_id]
537 return path
538
539 def c_style_unescape(string):
540 if string[0] == string[-1] == '"':
541 return string.decode('string-escape')[1:-1]
542 return string
543
544 def parse_commit(parser):
545 global marks, blob_marks, parsed_refs
546 global mode
547
548 parents = []
549
550 ref = parser[1]
551 parser.next()
552
553 if ref != 'refs/heads/master':
554 die("bzr doesn't support multiple branches; use 'master'")
555
556 commit_mark = parser.get_mark()
557 parser.next()
558 author = parser.get_author()
559 parser.next()
560 committer = parser.get_author()
561 parser.next()
562 data = parser.get_data()
563 parser.next()
564 if parser.check('from'):
565 parents.append(parser.get_mark())
566 parser.next()
567 while parser.check('merge'):
568 parents.append(parser.get_mark())
569 parser.next()
570
571 # fast-export adds an extra newline
572 if data[-1] == '\n':
573 data = data[:-1]
574
575 files = {}
576
577 for line in parser:
578 if parser.check('M'):
579 t, m, mark_ref, path = line.split(' ', 3)
580 mark = int(mark_ref[1:])
581 f = { 'mode' : m, 'mark' : mark }
582 elif parser.check('D'):
583 t, path = line.split(' ')
584 f = { 'deleted' : True }
585 else:
586 die('Unknown file command: %s' % line)
587 path = c_style_unescape(path).decode('utf-8')
588 files[path] = f
589
590 repo = parser.repo
591
592 committer, date, tz = committer
593 parents = [str(mark_to_rev(p)) for p in parents]
594 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
595 props = {}
596 props['branch-nick'] = repo.nick
597
598 mtree = CustomTree(repo, revid, parents, files)
599 changes = mtree.iter_changes()
600
601 repo.lock_write()
602 try:
603 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid)
604 try:
605 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
606 builder.finish_inventory()
607 builder.commit(data.decode('utf-8', 'replace'))
608 except Exception, e:
609 builder.abort()
610 raise
611 finally:
612 repo.unlock()
613
614 parsed_refs[ref] = revid
615 marks.new_mark(revid, commit_mark)
616
617 def parse_reset(parser):
618 global parsed_refs
619
620 ref = parser[1]
621 parser.next()
622
623 if ref != 'refs/heads/master':
624 die("bzr doesn't support multiple branches; use 'master'")
625
626 # ugh
627 if parser.check('commit'):
628 parse_commit(parser)
629 return
630 if not parser.check('from'):
631 return
632 from_mark = parser.get_mark()
633 parser.next()
634
635 parsed_refs[ref] = mark_to_rev(from_mark)
636
637 def do_export(parser):
638 global parsed_refs, dirname, peer
639
640 parser.next()
641
642 for line in parser.each_block('done'):
643 if parser.check('blob'):
644 parse_blob(parser)
645 elif parser.check('commit'):
646 parse_commit(parser)
647 elif parser.check('reset'):
648 parse_reset(parser)
649 elif parser.check('tag'):
650 pass
651 elif parser.check('feature'):
652 pass
653 else:
654 die('unhandled export command: %s' % line)
655
656 repo = parser.repo
657
658 for ref, revid in parsed_refs.iteritems():
659 if ref == 'refs/heads/master':
660 repo.generate_revision_history(revid, marks.get_tip('master'))
661 if peer:
662 try:
663 repo.push(peer, stop_revision=revid)
664 except bzrlib.errors.DivergedBranches:
665 print "error %s non-fast forward" % ref
666 continue
667
668 try:
669 wt = repo.bzrdir.open_workingtree()
670 wt.update()
671 except bzrlib.errors.NoWorkingTree:
672 pass
673
674 print "ok %s" % ref
675
676 print
677
678 def do_capabilities(parser):
679 global dirname
680
681 print "import"
682 print "export"
683 print "refspec refs/heads/*:%s/heads/*" % prefix
684 print "refspec refs/tags/*:%s/tags/*" % prefix
685
686 path = os.path.join(dirname, 'marks-git')
687
688 if os.path.exists(path):
689 print "*import-marks %s" % path
690 print "*export-marks %s" % path
691
692 print
693
694 def ref_is_valid(name):
695 return not True in [c in name for c in '~^: \\']
696
697 def do_list(parser):
698 global tags
699 print "? refs/heads/%s" % 'master'
700
701 branch = parser.repo
702 branch.lock_read()
703 for tag, revid in branch.tags.get_tag_dict().items():
704 try:
705 branch.revision_id_to_dotted_revno(revid)
706 except bzrlib.errors.NoSuchRevision:
707 continue
708 if not ref_is_valid(tag):
709 continue
710 print "? refs/tags/%s" % tag
711 tags[tag] = revid
712 branch.unlock()
713 print "@refs/heads/%s HEAD" % 'master'
714 print
715
716 def get_repo(url, alias):
717 global dirname, peer
718
719 origin = bzrlib.bzrdir.BzrDir.open(url)
720 branch = origin.open_branch()
721
722 if not isinstance(origin.transport, bzrlib.transport.local.LocalTransport):
723 clone_path = os.path.join(dirname, 'clone')
724 remote_branch = branch
725 if os.path.exists(clone_path):
726 # pull
727 d = bzrlib.bzrdir.BzrDir.open(clone_path)
728 branch = d.open_branch()
729 try:
730 result = branch.pull(remote_branch, [], None, False)
731 except bzrlib.errors.DivergedBranches:
732 # use remote branch for now
733 peer = None
734 return remote_branch
735 else:
736 # clone
737 d = origin.sprout(clone_path, None,
738 hardlink=True, create_tree_if_local=False,
739 source_branch=remote_branch)
740 branch = d.open_branch()
741 branch.bind(remote_branch)
742
743 peer = remote_branch
744 else:
745 peer = None
746
747 return branch
748
749 def fix_path(alias, orig_url):
750 url = urlparse.urlparse(orig_url, 'file')
751 if url.scheme != 'file' or os.path.isabs(url.path):
752 return
753 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
754 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
755 subprocess.call(cmd)
756
757 def main(args):
758 global marks, prefix, dirname
759 global tags, filenodes
760 global blob_marks
761 global parsed_refs
762 global files_cache
763 global is_tmp
764
765 alias = args[1]
766 url = args[2]
767
768 tags = {}
769 filenodes = {}
770 blob_marks = {}
771 parsed_refs = {}
772 files_cache = {}
773 marks = None
774
775 if alias[5:] == url:
776 is_tmp = True
777 alias = hashlib.sha1(alias).hexdigest()
778 else:
779 is_tmp = False
780
781 prefix = 'refs/bzr/%s' % alias
782 gitdir = os.environ['GIT_DIR']
783 dirname = os.path.join(gitdir, 'bzr', alias)
784
785 if not is_tmp:
786 fix_path(alias, url)
787
788 if not os.path.exists(dirname):
789 os.makedirs(dirname)
790
791 bzrlib.ui.ui_factory.be_quiet(True)
792
793 repo = get_repo(url, alias)
794
795 marks_path = os.path.join(dirname, 'marks-int')
796 marks = Marks(marks_path)
797
798 parser = Parser(repo)
799 for line in parser:
800 if parser.check('capabilities'):
801 do_capabilities(parser)
802 elif parser.check('list'):
803 do_list(parser)
804 elif parser.check('import'):
805 do_import(parser)
806 elif parser.check('export'):
807 do_export(parser)
808 else:
809 die('unhandled command: %s' % line)
810 sys.stdout.flush()
811
812 def bye():
813 if not marks:
814 return
815 if not is_tmp:
816 marks.store()
817 else:
818 shutil.rmtree(dirname)
819
820 atexit.register(bye)
821 sys.exit(main(sys.argv))