[thirdparty/git.git] / perl / Git.pm

=head1 NAME

Git - Perl interface to the Git version control system

=cut


package Git;

use strict;


BEGIN {

our ($VERSION, @ISA, @EXPORT, @EXPORT_OK);

# Totally unstable API.
$VERSION = '0.01';


=head1 SYNOPSIS

  use Git;

  my $version = Git::command_oneline('version');

  Git::command_noisy('update-server-info');

  my $repo = Git->repository (Directory => '/srv/git/cogito.git');


  my @revs = $repo->command('rev-list', '--since=last monday', '--all');

  my $fh = $repo->command_pipe('rev-list', '--since=last monday', '--all');
  my $lastrev = <$fh>; chomp $lastrev;
  close $fh; # You may want to test rev-list exit status here

  my $lastrev = $repo->command_oneline('rev-list', '--all');

=cut


require Exporter;

@ISA = qw(Exporter);

@EXPORT = qw();

# Methods which can be called as standalone functions as well:
@EXPORT_OK = qw(command command_oneline command_pipe command_noisy
                hash_object);


=head1 DESCRIPTION

This module provides Perl scripts easy way to interface the Git version control
system. The modules have an easy and well-tested way to call arbitrary Git
commands; in the future, the interface will also provide specialized methods
for doing easily operations which are not totally trivial to do over
the generic command interface.

While some commands can be executed outside of any context (e.g. 'version'
or 'init-db'), most operations require a repository context, which in practice
means getting an instance of the Git object using the repository() constructor.
(In the future, we will also get a new_repository() constructor.) All commands
called as methods of the object are then executed in the context of the
repository.

TODO: In the future, we might also do

	my $subdir = $repo->subdir('Documentation');
	# Gets called in the subdirectory context:
	$subdir->command('status');

	my $remoterepo = $repo->remote_repository (Name => 'cogito', Branch => 'master');
	$remoterepo ||= Git->remote_repository ('http://git.or.cz/cogito.git/');
	my @refs = $remoterepo->refs();

So far, all functions just die if anything goes wrong. If you don't want that,
make appropriate provisions to catch the possible deaths. Better error recovery
mechanisms will be provided in the future.

Currently, the module merely wraps calls to external Git tools. In the future,
it will provide a much faster way to interact with Git by linking directly
to libgit. This should be completely opaque to the user, though (performance
increate nonwithstanding).

=cut


use Carp qw(carp croak);

require XSLoader;
XSLoader::load('Git', $VERSION);

}


=head1 CONSTRUCTORS

=over 4

=item repository ( OPTIONS )

=item repository ( DIRECTORY )

=item repository ()

Construct a new repository object.
C<OPTIONS> are passed in a hash like fashion, using key and value pairs.
Possible options are:

B<Repository> - Path to the Git repository.

B<WorkingCopy> - Path to the associated working copy; not strictly required
as many commands will happily crunch on a bare repository.

B<Directory> - Path to the Git working directory in its usual setup. This
is just for convenient setting of both C<Repository> and C<WorkingCopy>
at once: If the directory as a C<.git> subdirectory, C<Repository> is pointed
to the subdirectory and the directory is assumed to be the working copy.
If the directory does not have the subdirectory, C<WorkingCopy> is left
undefined and C<Repository> is pointed to the directory itself.

B<GitPath> - Path to the C<git> binary executable. By default the C<$PATH>
is searched for it.

You should not use both C<Directory> and either of C<Repository> and
C<WorkingCopy> - the results of that are undefined.

Alternatively, a directory path may be passed as a single scalar argument
to the constructor; it is equivalent to setting only the C<Directory> option
field.

Calling the constructor with no options whatsoever is equivalent to
calling it with C<< Directory => '.' >>.

=cut

sub repository {
	my $class = shift;
	my @args = @_;
	my %opts = ();
	my $self;

	if (defined $args[0]) {
		if ($#args % 2 != 1) {
			# Not a hash.
			$#args == 0 or croak "bad usage";
			%opts = (Directory => $args[0]);
		} else {
			%opts = @args;
		}

		if ($opts{Directory}) {
			-d $opts{Directory} or croak "Directory not found: $!";
			if (-d $opts{Directory}."/.git") {
				# TODO: Might make this more clever
				$opts{WorkingCopy} = $opts{Directory};
				$opts{Repository} = $opts{Directory}."/.git";
			} else {
				$opts{Repository} = $opts{Directory};
			}
			delete $opts{Directory};
		}
	}

	$self = { opts => \%opts };
	bless $self, $class;
}


=back

=head1 METHODS

=over 4

=item command ( COMMAND [, ARGUMENTS... ] )

Execute the given Git C<COMMAND> (specify it without the 'git-'
prefix), optionally with the specified extra C<ARGUMENTS>.

The method can be called without any instance or on a specified Git repository
(in that case the command will be run in the repository context).

In scalar context, it returns all the command output in a single string
(verbatim).

In array context, it returns an array containing lines printed to the
command's stdout (without trailing newlines).

In both cases, the command's stdin and stderr are the same as the caller's.

=cut

sub command {
	my $fh = command_pipe(@_);

	if (not defined wantarray) {
		_cmd_close($fh);

	} elsif (not wantarray) {
		local $/;
		my $text = <$fh>;
		_cmd_close($fh);
		return $text;

	} else {
		my @lines = <$fh>;
		_cmd_close($fh);
		chomp @lines;
		return @lines;
	}
}


=item command_oneline ( COMMAND [, ARGUMENTS... ] )

Execute the given C<COMMAND> in the same way as command()
does but always return a scalar string containing the first line
of the command's standard output.

=cut

sub command_oneline {
	my $fh = command_pipe(@_);

	my $line = <$fh>;
	_cmd_close($fh);

	chomp $line;
	return $line;
}


=item command_pipe ( COMMAND [, ARGUMENTS... ] )

Execute the given C<COMMAND> in the same way as command()
does but return a pipe filehandle from which the command output can be
read.

=cut

sub command_pipe {
	my ($self, $cmd, @args) = _maybe_self(@_);

	$cmd =~ /^[a-z0-9A-Z_-]+$/ or croak "bad command: $cmd";

	my $pid = open(my $fh, "-|");
	if (not defined $pid) {
		croak "open failed: $!";
	} elsif ($pid == 0) {
		_cmd_exec($self, $cmd, @args);
	}
	return $fh;
}


=item command_noisy ( COMMAND [, ARGUMENTS... ] )

Execute the given C<COMMAND> in the same way as command() does but do not
capture the command output - the standard output is not redirected and goes
to the standard output of the caller application.

While the method is called command_noisy(), you might want to as well use
it for the most silent Git commands which you know will never pollute your
stdout but you want to avoid the overhead of the pipe setup when calling them.

The function returns only after the command has finished running.

=cut

sub command_noisy {
	my ($self, $cmd, @args) = _maybe_self(@_);

	$cmd =~ /^[a-z0-9A-Z_-]+$/ or croak "bad command: $cmd";

	my $pid = fork;
	if (not defined $pid) {
		croak "fork failed: $!";
	} elsif ($pid == 0) {
		_cmd_exec($self, $cmd, @args);
	}
	if (waitpid($pid, 0) > 0 and $? != 0) {
		croak "exit status: $?";
	}
}


=item hash_object ( FILENAME [, TYPE ] )

=item hash_object ( FILEHANDLE [, TYPE ] )

Compute the SHA1 object id of the given C<FILENAME> (or data waiting in
C<FILEHANDLE>) considering it is of the C<TYPE> object type (C<blob>
(default), C<commit>, C<tree>).

In case of C<FILEHANDLE> passed instead of file name, all the data
available are read and hashed, and the filehandle is automatically
closed. The file handle should be freshly opened - if you have already
read anything from the file handle, the results are undefined (since
this function works directly with the file descriptor and internal
PerlIO buffering might have messed things up).

The method can be called without any instance or on a specified Git repository,
it makes zero difference.

The function returns the SHA1 hash.

Implementation of this function is very fast; no external command calls
are involved.

=cut

# Implemented in Git.xs.


=back

=head1 TODO

This is still fairly crude.
We need some good way to report errors back except just dying.

=head1 COPYRIGHT

Copyright 2006 by Petr Baudis E<lt>pasky@suse.czE<gt>.

This module is free software; it may be used, copied, modified
and distributed under the terms of the GNU General Public Licence,
either version 2, or (at your option) any later version.

=cut


# Take raw method argument list and return ($obj, @args) in case
# the method was called upon an instance and (undef, @args) if
# it was called directly.
sub _maybe_self {
	# This breaks inheritance. Oh well.
	ref $_[0] eq 'Git' ? @_ : (undef, @_);
}

# When already in the subprocess, set up the appropriate state
# for the given repository and execute the git command.
sub _cmd_exec {
	my ($self, @args) = @_;
	if ($self) {
		$self->{opts}->{Repository} and $ENV{'GIT_DIR'} = $self->{opts}->{Repository};
		$self->{opts}->{WorkingCopy} and chdir($self->{opts}->{WorkingCopy});
	}
	my $git = $self->{opts}->{GitPath};
	$git ||= 'git';
	exec ($git, @args) or croak "exec failed: $!";
}

# Close pipe to a subprocess.
sub _cmd_close {
	my ($fh) = @_;
	if (not close $fh) {
		if ($!) {
			# It's just close, no point in fatalities
			carp "error closing pipe: $!";
		} elsif ($? >> 8) {
			croak "exit status: ".($? >> 8);
		}
		# else we might e.g. closed a live stream; the command
		# dying of SIGPIPE would drive us here.
	}
}


# Trickery for .xs routines: In order to avoid having some horrid
# C code trying to do stuff with undefs and hashes, we gate all
# xs calls through the following and in case we are being ran upon
# an instance call a C part of the gate which will set up the
# environment properly.
sub _call_gate {
	my $xsfunc = shift;
	my ($self, @args) = _maybe_self(@_);

	if (defined $self) {
		# XXX: We ignore the WorkingCopy! To properly support
		# that will require heavy changes in libgit.

		# XXX: And we ignore everything else as well. libgit
		# at least needs to be extended to let us specify
		# the $GIT_DIR instead of looking it up in environment.
		#xs_call_gate($self->{opts}->{Repository});
	}

	&$xsfunc(@args);
}

sub AUTOLOAD {
	my $xsname;
	our $AUTOLOAD;
	($xsname = $AUTOLOAD) =~ s/.*:://;
	croak "&Git::$xsname not defined" if $xsname =~ /^xs_/;
	$xsname = 'xs_'.$xsname;
	_call_gate(\&$xsname, @_);
}

sub DESTROY { }


1; # Famous last words
Commit	Line	Data
b1edc53d PB	1	=head1 NAME
	2
	3	Git - Perl interface to the Git version control system
	4
	5	=cut
	6
	7
	8	package Git;
	9
	10	use strict;
	11
	12
	13	BEGIN {
	14
	15	our ($VERSION, @ISA, @EXPORT, @EXPORT_OK);
	16
	17	# Totally unstable API.
	18	$VERSION = '0.01';
	19
	20
	21	=head1 SYNOPSIS
	22
	23	use Git;
	24
	25	my $version = Git::command_oneline('version');
	26
	27	Git::command_noisy('update-server-info');
	28
	29	my $repo = Git->repository (Directory => '/srv/git/cogito.git');
	30
	31
	32	my @revs = $repo->command('rev-list', '--since=last monday', '--all');
	33
	34	my $fh = $repo->command_pipe('rev-list', '--since=last monday', '--all');
	35	my $lastrev = <$fh>; chomp $lastrev;
	36	close $fh; # You may want to test rev-list exit status here
	37
	38	my $lastrev = $repo->command_oneline('rev-list', '--all');
	39
	40	=cut
	41
	42
	43	require Exporter;
	44
	45	@ISA = qw(Exporter);
	46
	47	@EXPORT = qw();
	48
	49	# Methods which can be called as standalone functions as well:
	50	@EXPORT_OK = qw(command command_oneline command_pipe command_noisy
	51	hash_object);
	52
	53
	54	=head1 DESCRIPTION
	55
	56	This module provides Perl scripts easy way to interface the Git version control
	57	system. The modules have an easy and well-tested way to call arbitrary Git
	58	commands; in the future, the interface will also provide specialized methods
	59	for doing easily operations which are not totally trivial to do over
	60	the generic command interface.
	61
	62	While some commands can be executed outside of any context (e.g. 'version'
	63	or 'init-db'), most operations require a repository context, which in practice
	64	means getting an instance of the Git object using the repository() constructor.
65	(In the future, we will also get a new_repository() constructor.) All commands
66	called as methods of the object are then executed in the context of the
67	repository.
68
69	TODO: In the future, we might also do
70
71	my $subdir = $repo->subdir('Documentation');
72	# Gets called in the subdirectory context:
73	$subdir->command('status');
74
75	my $remoterepo = $repo->remote_repository (Name => 'cogito', Branch => 'master');
76	$remoterepo \|\|= Git->remote_repository ('http://git.or.cz/cogito.git/');
77	my @refs = $remoterepo->refs();
78
79	So far, all functions just die if anything goes wrong. If you don't want that,
80	make appropriate provisions to catch the possible deaths. Better error recovery
81	mechanisms will be provided in the future.
82
83	Currently, the module merely wraps calls to external Git tools. In the future,
84	it will provide a much faster way to interact with Git by linking directly
85	to libgit. This should be completely opaque to the user, though (performance
86	increate nonwithstanding).
87
88	=cut
89
90
91	use Carp qw(carp croak);
92
93	require XSLoader;
94	XSLoader::load('Git', $VERSION);
95
96	}
97
98
99	=head1 CONSTRUCTORS
100
101	=over 4
102
103	=item repository ( OPTIONS )
104
105	=item repository ( DIRECTORY )
106
107	=item repository ()
108
109	Construct a new repository object.
110	C<OPTIONS> are passed in a hash like fashion, using key and value pairs.
111	Possible options are:
112
113	B<Repository> - Path to the Git repository.
114
115	B<WorkingCopy> - Path to the associated working copy; not strictly required
116	as many commands will happily crunch on a bare repository.
117
118	B<Directory> - Path to the Git working directory in its usual setup. This
119	is just for convenient setting of both C<Repository> and C<WorkingCopy>
120	at once: If the directory as a C<.git> subdirectory, C<Repository> is pointed
121	to the subdirectory and the directory is assumed to be the working copy.
122	If the directory does not have the subdirectory, C<WorkingCopy> is left
123	undefined and C<Repository> is pointed to the directory itself.
124
125	B<GitPath> - Path to the C<git> binary executable. By default the C<$PATH>
126	is searched for it.
127
128	You should not use both C<Directory> and either of C<Repository> and
129	C<WorkingCopy> - the results of that are undefined.
130
131	Alternatively, a directory path may be passed as a single scalar argument
132	to the constructor; it is equivalent to setting only the C<Directory> option
133	field.
134
135	Calling the constructor with no options whatsoever is equivalent to
136	calling it with C<< Directory => '.' >>.
137
138	=cut
139
140	sub repository {
141	my $class = shift;
142	my @args = @_;
143	my %opts = ();
144	my $self;
145
146	if (defined $args[0]) {
147	if ($#args % 2 != 1) {
148	# Not a hash.
149	$#args == 0 or croak "bad usage";
150	%opts = (Directory => $args[0]);
151	} else {
152	%opts = @args;
153	}
154
155	if ($opts{Directory}) {
156	-d $opts{Directory} or croak "Directory not found: $!";
157	if (-d $opts{Directory}."/.git") {
158	# TODO: Might make this more clever
159	$opts{WorkingCopy} = $opts{Directory};
160	$opts{Repository} = $opts{Directory}."/.git";
161	} else {
162	$opts{Repository} = $opts{Directory};
163	}
164	delete $opts{Directory};
165	}
166	}
167
168	$self = { opts => \%opts };
169	bless $self, $class;
170	}
171
172
173	=back
174
175	=head1 METHODS
176
177	=over 4
178
179	=item command ( COMMAND [, ARGUMENTS... ] )
180
181	Execute the given Git C<COMMAND> (specify it without the 'git-'
182	prefix), optionally with the specified extra C<ARGUMENTS>.
183
184	The method can be called without any instance or on a specified Git repository
185	(in that case the command will be run in the repository context).
186
187	In scalar context, it returns all the command output in a single string
188	(verbatim).
189
190	In array context, it returns an array containing lines printed to the
191	command's stdout (without trailing newlines).
192
193	In both cases, the command's stdin and stderr are the same as the caller's.
194
195	=cut
196
197	sub command {
198	my $fh = command_pipe(@_);
199
200	if (not defined wantarray) {
201	_cmd_close($fh);
202
203	} elsif (not wantarray) {
204	local $/;
205	my $text = <$fh>;
206	_cmd_close($fh);
207	return $text;
208
209	} else {
210	my @lines = <$fh>;
211	_cmd_close($fh);
212	chomp @lines;
213	return @lines;
214	}
215	}
216
217
218	=item command_oneline ( COMMAND [, ARGUMENTS... ] )
219
220	Execute the given C<COMMAND> in the same way as command()
221	does but always return a scalar string containing the first line
222	of the command's standard output.
223
224	=cut
225
226	sub command_oneline {
227	my $fh = command_pipe(@_);
228
229	my $line = <$fh>;
230	_cmd_close($fh);
231
232	chomp $line;
233	return $line;
234	}
235
236
237	=item command_pipe ( COMMAND [, ARGUMENTS... ] )
238
239	Execute the given C<COMMAND> in the same way as command()
240	does but return a pipe filehandle from which the command output can be
241	read.
242
243	=cut
244
245	sub command_pipe {
246	my ($self, $cmd, @args) = _maybe_self(@_);
247
248	$cmd =~ /^[a-z0-9A-Z_-]+$/ or croak "bad command: $cmd";
249
250	my $pid = open(my $fh, "-\|");
251	if (not defined $pid) {
252	croak "open failed: $!";
253	} elsif ($pid == 0) {
254	_cmd_exec($self, $cmd, @args);
255	}
256	return $fh;
257	}
258
259
260	=item command_noisy ( COMMAND [, ARGUMENTS... ] )
261
262	Execute the given C<COMMAND> in the same way as command() does but do not
263	capture the command output - the standard output is not redirected and goes
264	to the standard output of the caller application.
265
266	While the method is called command_noisy(), you might want to as well use
267	it for the most silent Git commands which you know will never pollute your
268	stdout but you want to avoid the overhead of the pipe setup when calling them.
269
270	The function returns only after the command has finished running.
271
272	=cut
273
274	sub command_noisy {
275	my ($self, $cmd, @args) = _maybe_self(@_);
276
277	$cmd =~ /^[a-z0-9A-Z_-]+$/ or croak "bad command: $cmd";
278
279	my $pid = fork;
280	if (not defined $pid) {
281	croak "fork failed: $!";
282	} elsif ($pid == 0) {
283	_cmd_exec($self, $cmd, @args);
284	}
285	if (waitpid($pid, 0) > 0 and $? != 0) {
286	croak "exit status: $?";
287	}
288	}
289
290
291	=item hash_object ( FILENAME [, TYPE ] )
292
293	=item hash_object ( FILEHANDLE [, TYPE ] )
294
295	Compute the SHA1 object id of the given C<FILENAME> (or data waiting in
296	C<FILEHANDLE>) considering it is of the C<TYPE> object type (C<blob>
297	(default), C<commit>, C<tree>).
298
299	In case of C<FILEHANDLE> passed instead of file name, all the data
300	available are read and hashed, and the filehandle is automatically
301	closed. The file handle should be freshly opened - if you have already
302	read anything from the file handle, the results are undefined (since
303	this function works directly with the file descriptor and internal
304	PerlIO buffering might have messed things up).
305
306	The method can be called without any instance or on a specified Git repository,
307	it makes zero difference.
308
309	The function returns the SHA1 hash.
310
311	Implementation of this function is very fast; no external command calls
312	are involved.
313
314	=cut
315
316	# Implemented in Git.xs.
317
318
319	=back
320
321	=head1 TODO
322
323	This is still fairly crude.
324	We need some good way to report errors back except just dying.
325
326	=head1 COPYRIGHT
327
328	Copyright 2006 by Petr Baudis E<lt>pasky@suse.czE<gt>.
329
330	This module is free software; it may be used, copied, modified
331	and distributed under the terms of the GNU General Public Licence,
332	either version 2, or (at your option) any later version.
333
334	=cut
335
336
337	# Take raw method argument list and return ($obj, @args) in case
338	# the method was called upon an instance and (undef, @args) if
339	# it was called directly.
340	sub _maybe_self {
341	# This breaks inheritance. Oh well.
342	ref $_[0] eq 'Git' ? @_ : (undef, @_);
343	}
344
345	# When already in the subprocess, set up the appropriate state
346	# for the given repository and execute the git command.
347	sub _cmd_exec {
348	my ($self, @args) = @_;
349	if ($self) {
350	$self->{opts}->{Repository} and $ENV{'GIT_DIR'} = $self->{opts}->{Repository};
351	$self->{opts}->{WorkingCopy} and chdir($self->{opts}->{WorkingCopy});
352	}
353	my $git = $self->{opts}->{GitPath};
354	$git \|\|= 'git';
355	exec ($git, @args) or croak "exec failed: $!";
356	}
357
358	# Close pipe to a subprocess.
359	sub _cmd_close {
360	my ($fh) = @_;
361	if (not close $fh) {
362	if ($!) {
363	# It's just close, no point in fatalities
364	carp "error closing pipe: $!";
365	} elsif ($? >> 8) {
366	croak "exit status: ".($? >> 8);
367	}
368	# else we might e.g. closed a live stream; the command
369	# dying of SIGPIPE would drive us here.
370	}
371	}
372
373
374	# Trickery for .xs routines: In order to avoid having some horrid
375	# C code trying to do stuff with undefs and hashes, we gate all
376	# xs calls through the following and in case we are being ran upon
377	# an instance call a C part of the gate which will set up the
378	# environment properly.
379	sub _call_gate {
380	my $xsfunc = shift;
381	my ($self, @args) = _maybe_self(@_);
382
383	if (defined $self) {
384	# XXX: We ignore the WorkingCopy! To properly support
385	# that will require heavy changes in libgit.
386
387	# XXX: And we ignore everything else as well. libgit
388	# at least needs to be extended to let us specify
389	# the $GIT_DIR instead of looking it up in environment.
390	#xs_call_gate($self->{opts}->{Repository});
391	}
392
393	&$xsfunc(@args);
394	}
395
396	sub AUTOLOAD {
397	my $xsname;
398	our $AUTOLOAD;
399	($xsname = $AUTOLOAD) =~ s/.*:://;
400	croak "&Git::$xsname not defined" if $xsname =~ /^xs_/;
401	$xsname = 'xs_'.$xsname;
402	_call_gate(\&$xsname, @_);
403	}
404
405	sub DESTROY { }
406
407
408	1; # Famous last words