From: Bruno Haible <bruno@clisp.org>
Date: Mon, 19 Aug 2002 11:02:03 +0000 (+0000)
Subject: Support for PHP.
X-Git-Tag: v0.12~1288
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=012803fbf11d258e87f66b17b390c9bd7ee60ccf;p=thirdparty%2Fgettext.git

Support for PHP.
---

diff --git a/doc/ChangeLog b/doc/ChangeLog
index 3786a4664..4a62043a0 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,13 @@
+2002-08-18  Bruno Haible  <bruno@clisp.org>
+
+	* gettext.texi (php-format): New subsection.
+	(PHP): Update.
+	* xgettext.texi: Mention language PHP.
+
+2002-08-06  Bruno Haible  <bruno@clisp.org>
+
+	* gettext-0.11.5 released.
+
 2002-07-16  Bruno Haible  <bruno@clisp.org>
 
 	* matrix.texi: Update.
diff --git a/doc/gettext.texi b/doc/gettext.texi
index 1ac67b263..85d835ca9 100644
--- a/doc/gettext.texi
+++ b/doc/gettext.texi
@@ -318,6 +318,7 @@ The Translator's View
 * object-pascal-format::        Object Pascal Format Strings
 * ycp-format::                  YCP Format Strings
 * tcl-format::                  Tcl Format Strings
+* php-format::                  PHP Format Strings
 
 Individual Programming Languages
 
@@ -6722,6 +6723,7 @@ strings.
 * object-pascal-format::        Object Pascal Format Strings
 * ycp-format::                  YCP Format Strings
 * tcl-format::                  Tcl Format Strings
+* php-format::                  PHP Format Strings
 @end menu
 
 @node c-format, python-format, Translators for other Languages, Translators for other Languages
@@ -6808,12 +6810,18 @@ YCP sformat strings are described in the libycp documentation
 In summary, a directive starts with @samp{%} and is followed by @samp{%}
 or a nonzero digit (@samp{1} to @samp{9}).
 
-@node tcl-format,  , ycp-format, Translators for other Languages
+@node tcl-format, php-format, ycp-format, Translators for other Languages
 @subsection Tcl Format Strings
 
 Tcl format strings are described in the @file{format.n} manual page,
 @uref{http://www.scriptics.com/man/tcl8.3/TclCmd/format.htm}.
 
+@node php-format,  , tcl-format, Translators for other Languages
+@subsection PHP Format Strings
+
+PHP format strings are described in the documentation of the PHP function
+@code{sprintf}, in @file{phpdoc/manual/function.sprintf.html}.
+
 @node Maintainers for other Languages, List of Programming Languages, Translators for other Languages, Programming Languages
 @section The Maintainer's View
 
@@ -6849,7 +6857,7 @@ that language, and to combine the resulting files using @code{msgcat}.
 @c   Perl                    1911
 @c   C++                     1379     *
 @c   Java                    1200     *
-@c   PHP                     1051
+@c   PHP                     1051     *
 @c   Python                   613     *
 @c   Unix Shell               357
 @c   Tcl                      266     *
@@ -7792,13 +7800,13 @@ use
 
 @table @asis
 @item RPMs
-mod_php4, phplib, phpdoc
+mod_php4, mod_php4-core, phplib, phpdoc
 
 @item File extension
 @code{php}, @code{php3}, @code{php4}
 
 @item String syntax
-@code{"abc"}
+@code{"abc"}, @code{'abc'}
 
 @item gettext shorthand
 @code{_("abc")}
@@ -7813,7 +7821,7 @@ mod_php4, phplib, phpdoc
 @code{bindtextdomain} function
 
 @item setlocale
-@code{setlocale} function
+Programmer must call @code{setlocale (LC_ALL, "")}
 
 @item Prerequisite
 ---
@@ -7822,10 +7830,10 @@ mod_php4, phplib, phpdoc
 use
 
 @item Extractor
----
+@code{xgettext}
 
 @item Formatting with positions
----
+@code{printf "%2\$d %1\$d"}
 
 @item Portability
 On platforms without gettext, the functions are not available.
diff --git a/doc/xgettext.texi b/doc/xgettext.texi
index 9143679f8..49e3c2083 100644
--- a/doc/xgettext.texi
+++ b/doc/xgettext.texi
@@ -71,7 +71,7 @@ is written to standard output.
 Specifies the language of the input files.  The supported languages
 are @code{C}, @code{C++}, @code{ObjectiveC}, @code{PO}, @code{Python},
 @code{Lisp}, @code{EmacsLisp}, @code{librep}, @code{Java}, @code{awk},
-@code{YCP}, @code{Tcl}, @code{RST}, @code{Glade}.
+@code{YCP}, @code{Tcl}, @code{PHP}, @code{RST}, @code{Glade}.
 
 @item -C
 @itemx --c++
diff --git a/src/ChangeLog b/src/ChangeLog
index 325e22160..326e23ab1 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,22 @@
+2002-08-18  Bruno Haible  <bruno@clisp.org>
+
+	* message.h (enum format_type): New enum value 'format_php'.
+	(NFORMATS): Increment.
+	* message.c (format_language, format_language_pretty): Add entry
+	for php.
+	* format.h (formatstring_php): New declaration.
+	* format-php.c: New file.
+	* format.c (formatstring_parsers): Add entry for php.
+	* x-php.h: New file.
+	* x-php.c: New file.
+	* xgettext.c: Include x-php.c.
+	(main): Call x_php_extract_all, x_php_keyword.
+	(language_to_scanner): Add PHP rule.
+	(extension_to_language): Add PHP rule.
+	* Makefile.am (noinst_HEADERS): Add x-php.h.
+	(FORMAT_SOURCE): Add format-php.c.
+	(xgettext_SOURCES): Add x-php.c.
+
 2002-08-17  Bruno Haible  <bruno@clisp.org>
 
 	* urlget.c (fetch): Also try invoking the 'curl' program.
diff --git a/src/Makefile.am b/src/Makefile.am
index 613b5865f..1ad2ad059 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -35,7 +35,7 @@ po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-equal.h msgl-iconv.h \
 msgl-ascii.h msgl-cat.h msgl-english.h msgfmt.h msgunfmt.h read-mo.h \
 write-mo.h read-java.h write-java.h read-tcl.h write-tcl.h po-time.h \
 plural-table.h format.h xgettext.h x-c.h x-po.h x-python.h x-lisp.h \
-x-elisp.h x-librep.h x-java.h x-awk.h x-ycp.h x-tcl.h x-rst.h x-glade.h
+x-elisp.h x-librep.h x-java.h x-awk.h x-ycp.h x-tcl.h x-php.h x-rst.h x-glade.h
 
 EXTRA_DIST = FILES project-id msgunfmt.tcl \
 gnu/gettext/DumpResource.java gnu/gettext/GetURL.java
@@ -74,7 +74,8 @@ open-po.c dir-list.c str-list.c
 # xgettext and msgfmt deal with format strings.
 FORMAT_SOURCE = format.c \
 format-c.c format-python.c format-lisp.c format-elisp.c format-librep.c \
-format-java.c format-awk.c format-pascal.c format-ycp.c format-tcl.c
+format-java.c format-awk.c format-pascal.c format-ycp.c format-tcl.c \
+format-php.c
 
 # libgettextsrc contains all code that is needed by at least two programs.
 libgettextsrc_la_SOURCES = \
@@ -94,7 +95,7 @@ msgmerge_SOURCES = msgmerge.c
 msgunfmt_SOURCES = msgunfmt.c read-mo.c read-java.c read-tcl.c
 xgettext_SOURCES = xgettext.c \
   x-c.c x-po.c x-python.c x-lisp.c x-elisp.c x-librep.c x-java.l x-awk.c \
-  x-ycp.c x-tcl.c x-rst.c x-glade.c
+  x-ycp.c x-tcl.c x-php.c x-rst.c x-glade.c
 msgattrib_SOURCES = msgattrib.c
 msgcat_SOURCES = msgcat.c
 msgcomm_SOURCES = msgcomm.c
diff --git a/src/format-php.c b/src/format-php.c
new file mode 100644
index 000000000..9ca6ef0c8
--- /dev/null
+++ b/src/format-php.c
@@ -0,0 +1,497 @@
+/* PHP format strings.
+   Copyright (C) 2001-2002 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "format.h"
+#include "xmalloc.h"
+#include "error.h"
+#include "progname.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+/* PHP format strings are described in phpdoc-4.0.6, file
+   phpdoc/manual/function.sprintf.html, and are implemented in
+   php-4.1.0/ext/standard/formatted_print.c.
+   A directive
+   - starts with '%' or '%m$' where m is a positive integer,
+   - is optionally followed by any of the characters '0', '-', ' ', or
+     "'<anychar>", each of which acts as a flag,
+   - is optionally followed by a width specification: a nonempty digit
+     sequence,
+   - is optionally followed by '.' and a precision specification: a nonempty
+     digit sequence,
+   - is optionally followed by a size specifier 'l', which is ignored,
+   - is finished by a specifier
+       - 's', that needs a string argument,
+       - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
+       - 'e', 'f', that need a floating-point argument,
+       - 'c', that needs a character argument.
+   Additionally there is the directive '%%', which takes no argument.
+   Numbered and unnumbered argument specifications can be used in the same
+   string.  Numbered argument specifications have no influence on the
+   "current argument index", that is incremented each time an argument is read.
+ */
+
+enum format_arg_type
+{
+  FAT_INTEGER,
+  FAT_FLOAT,
+  FAT_CHARACTER,
+  FAT_STRING
+};
+
+struct numbered_arg
+{
+  unsigned int number;
+  enum format_arg_type type;
+};
+
+struct spec
+{
+  unsigned int directives;
+  unsigned int numbered_arg_count;
+  unsigned int allocated;
+  struct numbered_arg *numbered;
+};
+
+/* Locale independent test for a decimal digit.
+   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
+   <ctype.h> isdigit must be an 'unsigned char'.)  */
+#undef isdigit
+#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
+
+
+/* Prototypes for local functions.  Needed to ensure compiler checking of
+   function argument counts despite of K&R C function definition syntax.  */
+static int numbered_arg_compare PARAMS ((const void *p1, const void *p2));
+static void *format_parse PARAMS ((const char *format));
+static void format_free PARAMS ((void *descr));
+static int format_get_number_of_directives PARAMS ((void *descr));
+static bool format_check PARAMS ((const lex_pos_ty *pos,
+				  void *msgid_descr, void *msgstr_descr,
+				  bool equality,
+				  bool noisy, const char *pretty_msgstr));
+
+
+static int
+numbered_arg_compare (p1, p2)
+     const void *p1;
+     const void *p2;
+{
+  unsigned int n1 = ((const struct numbered_arg *) p1)->number;
+  unsigned int n2 = ((const struct numbered_arg *) p2)->number;
+
+  return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
+}
+
+static void *
+format_parse (format)
+     const char *format;
+{
+  unsigned int directives;
+  unsigned int numbered_arg_count;
+  unsigned int allocated;
+  struct numbered_arg *numbered;
+  unsigned int unnumbered_arg_count;
+  struct spec *result;
+
+  directives = 0;
+  numbered_arg_count = 0;
+  allocated = 0;
+  numbered = NULL;
+  unnumbered_arg_count = 0;
+
+  for (; *format != '\0';)
+    if (*format++ == '%')
+      {
+	/* A directive.  */
+	directives++;
+
+	if (*format != '%')
+	  {
+	    /* A complex directive.  */
+	    unsigned int number;
+	    enum format_arg_type type;
+
+	    number = ++unnumbered_arg_count;
+	    if (isdigit (*format))
+	      {
+		const char *f = format;
+		unsigned int m = 0;
+
+		do
+		  {
+		    m = 10 * m + (*f - '0');
+		    f++;
+		  }
+		while (isdigit (*f));
+
+		if (*f == '$')
+		  {
+		    if (m == 0)
+		      goto bad_format;
+		    number = m;
+		    format = ++f;
+		    --unnumbered_arg_count;
+		  }
+	      }
+
+	    /* Parse flags.  */
+	    for (;;)
+	      {
+		if (*format == '0' || *format == '-' || *format == ' ')
+		  format++;
+		else if (*format == '\'')
+		  {
+		    format++;
+		    if (*format == '\0')
+		      goto bad_format;
+		    format++;
+		  }
+		else
+		  break;
+	      }
+
+	    /* Parse width.  */
+	    if (isdigit (*format))
+	      {
+		do
+		  format++;
+		while (isdigit (*format));
+	      }
+
+	    /* Parse precision.  */
+	    if (*format == '.')
+	      {
+		format++;
+
+		if (isdigit (*format))
+		  {
+		    do
+		      format++;
+		    while (isdigit (*format));
+		  }
+		else
+		  --format;	/* will jump to bad_format */
+	      }
+
+	    /* Parse size.  */
+	    if (*format == 'l')
+	      format++;
+
+	    switch (*format)
+	      {
+	      case 'b': case 'd': case 'u': case 'o': case 'x': case 'X':
+		type = FAT_INTEGER;
+		break;
+	      case 'e': case 'f':
+		type = FAT_FLOAT;
+		break;
+	      case 'c':
+		type = FAT_CHARACTER;
+		break;
+	      case 's':
+		type = FAT_STRING;
+		break;
+	      default:
+		goto bad_format;
+	      }
+
+	    if (allocated == numbered_arg_count)
+	      {
+		allocated = 2 * allocated + 1;
+		numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
+	      }
+	    numbered[numbered_arg_count].number = number;
+	    numbered[numbered_arg_count].type = type;
+	    numbered_arg_count++;
+	  }
+
+	format++;
+      }
+
+  /* Sort the numbered argument array, and eliminate duplicates.  */
+  if (numbered_arg_count > 1)
+    {
+      unsigned int i, j;
+      bool err;
+
+      qsort (numbered, numbered_arg_count,
+	     sizeof (struct numbered_arg), numbered_arg_compare);
+
+      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
+      err = false;
+      for (i = j = 0; i < numbered_arg_count; i++)
+	if (j > 0 && numbered[i].number == numbered[j-1].number)
+	  {
+	    enum format_arg_type type1 = numbered[i].type;
+	    enum format_arg_type type2 = numbered[j-1].type;
+	    enum format_arg_type type_both;
+
+	    if (type1 == type2)
+	      type_both = type1;
+	    else
+	      /* Incompatible types.  */
+	      type_both = type1, err = true;
+
+	    numbered[j-1].type = type_both;
+	  }
+	else
+	  {
+	    if (j < i)
+	      {
+		numbered[j].number = numbered[i].number;
+		numbered[j].type = numbered[i].type;
+	      }
+	    j++;
+	  }
+      numbered_arg_count = j;
+      if (err)
+	goto bad_format;
+    }
+
+  result = (struct spec *) xmalloc (sizeof (struct spec));
+  result->directives = directives;
+  result->numbered_arg_count = numbered_arg_count;
+  result->allocated = allocated;
+  result->numbered = numbered;
+  return result;
+
+ bad_format:
+  if (numbered != NULL)
+    free (numbered);
+  return NULL;
+}
+
+static void
+format_free (descr)
+     void *descr;
+{
+  struct spec *spec = (struct spec *) descr;
+
+  if (spec->numbered != NULL)
+    free (spec->numbered);
+  free (spec);
+}
+
+static int
+format_get_number_of_directives (descr)
+     void *descr;
+{
+  struct spec *spec = (struct spec *) descr;
+
+  return spec->directives;
+}
+
+static bool
+format_check (pos, msgid_descr, msgstr_descr, equality, noisy, pretty_msgstr)
+     const lex_pos_ty *pos;
+     void *msgid_descr;
+     void *msgstr_descr;
+     bool equality;
+     bool noisy;
+     const char *pretty_msgstr;
+{
+  struct spec *spec1 = (struct spec *) msgid_descr;
+  struct spec *spec2 = (struct spec *) msgstr_descr;
+  bool err = false;
+
+  if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
+    {
+      unsigned int i, j;
+      unsigned int n1 = spec1->numbered_arg_count;
+      unsigned int n2 = spec2->numbered_arg_count;
+
+      /* Check the argument names are the same.
+	 Both arrays are sorted.  We search for the first difference.  */
+      for (i = 0, j = 0; i < n1 || j < n2; )
+	{
+	  int cmp = (i >= n1 ? 1 :
+		     j >= n2 ? -1 :
+		     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
+		     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
+		     0);
+
+	  if (cmp > 0)
+	    {
+	      if (noisy)
+		{
+		  error_with_progname = false;
+		  error_at_line (0, 0, pos->file_name, pos->line_number,
+				 _("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
+				 spec2->numbered[j].number, pretty_msgstr);
+		  error_with_progname = true;
+		}
+	      err = true;
+	      break;
+	    }
+	  else if (cmp < 0)
+	    {
+	      if (equality)
+		{
+		  if (noisy)
+		    {
+		      error_with_progname = false;
+		      error_at_line (0, 0, pos->file_name, pos->line_number,
+				     _("a format specification for argument %u doesn't exist in '%s'"),
+				     spec1->numbered[i].number, pretty_msgstr);
+		      error_with_progname = true;
+		    }
+		  err = true;
+		  break;
+		}
+	      else
+		i++;
+	    }
+	  else
+	    j++, i++;
+	}
+      /* Check the argument types are the same.  */
+      if (!err)
+	for (i = 0, j = 0; j < n2; )
+	  {
+	    if (spec1->numbered[i].number == spec2->numbered[j].number)
+	      {
+		if (spec1->numbered[i].type != spec2->numbered[j].type)
+		  {
+		    if (noisy)
+		      {
+			error_with_progname = false;
+			error_at_line (0, 0, pos->file_name, pos->line_number,
+				       _("format specifications in 'msgid' and '%s' for argument %u are not the same"),
+				       pretty_msgstr,
+				       spec2->numbered[j].number);
+			error_with_progname = true;
+		      }
+		    err = true;
+		    break;
+		  }
+		j++, i++;
+	      }
+	    else
+	      i++;
+	  }
+    }
+
+  return err;
+}
+
+
+struct formatstring_parser formatstring_php =
+{
+  format_parse,
+  format_free,
+  format_get_number_of_directives,
+  format_check
+};
+
+
+#ifdef TEST
+
+/* Test program: Print the argument list specification returned by
+   format_parse for strings read from standard input.  */
+
+#include <stdio.h>
+#include "getline.h"
+
+static void
+format_print (descr)
+     void *descr;
+{
+  struct spec *spec = (struct spec *) descr;
+  unsigned int last;
+  unsigned int i;
+
+  if (spec == NULL)
+    {
+      printf ("INVALID");
+      return;
+    }
+
+  printf ("(");
+  last = 1;
+  for (i = 0; i < spec->numbered_arg_count; i++)
+    {
+      unsigned int number = spec->numbered[i].number;
+
+      if (i > 0)
+	printf (" ");
+      if (number < last)
+	abort ();
+      for (; last < number; last++)
+	printf ("_ ");
+      switch (spec->numbered[i].type)
+	{
+	case FAT_INTEGER:
+	  printf ("i");
+	  break;
+	case FAT_FLOAT:
+	  printf ("f");
+	  break;
+	case FAT_CHARACTER:
+	  printf ("c");
+	  break;
+	case FAT_STRING:
+	  printf ("s");
+	  break;
+	default:
+	  abort ();
+	}
+      last = number + 1;
+    }
+  printf (")");
+}
+
+int
+main ()
+{
+  for (;;)
+    {
+      char *line = NULL;
+      size_t line_len = 0;
+      void *descr;
+
+      if (getline (&line, &line_len, stdin) < 0)
+	break;
+
+      descr = format_parse (line);
+
+      format_print (descr);
+      printf ("\n");
+
+      free (line);
+    }
+
+  return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-php.c ../lib/libgettextlib.la"
+ * End:
+ */
+
+#endif /* TEST */
diff --git a/src/format.c b/src/format.c
index a981eb787..00660a780 100644
--- a/src/format.c
+++ b/src/format.c
@@ -36,5 +36,6 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] =
   /* format_awk */		&formatstring_awk,
   /* format_pascal */		&formatstring_pascal,
   /* format_ycp */		&formatstring_ycp,
-  /* format_tcl */		&formatstring_tcl
+  /* format_tcl */		&formatstring_tcl,
+  /* format_php */		&formatstring_php
 };
diff --git a/src/format.h b/src/format.h
index f19634f5c..682744a53 100644
--- a/src/format.h
+++ b/src/format.h
@@ -65,6 +65,7 @@ extern struct formatstring_parser formatstring_awk;
 extern struct formatstring_parser formatstring_pascal;
 extern struct formatstring_parser formatstring_ycp;
 extern struct formatstring_parser formatstring_tcl;
+extern struct formatstring_parser formatstring_php;
 
 /* Table of all format string parsers.  */
 extern struct formatstring_parser *formatstring_parsers[NFORMATS];
diff --git a/src/message.c b/src/message.c
index 9ffcf3031..bb0f84bea 100644
--- a/src/message.c
+++ b/src/message.c
@@ -50,7 +50,8 @@ const char *const format_language[NFORMATS] =
   /* format_awk */		"awk",
   /* format_pascal */		"object-pascal",
   /* format_ycp */		"ycp",
-  /* format_tcl */		"tcl"
+  /* format_tcl */		"tcl",
+  /* format_php */		"php"
 };
 
 const char *const format_language_pretty[NFORMATS] =
@@ -65,7 +66,8 @@ const char *const format_language_pretty[NFORMATS] =
   /* format_awk */		"awk",
   /* format_pascal */		"Object Pascal",
   /* format_ycp */		"YCP",
-  /* format_tcl */		"Tcl"
+  /* format_tcl */		"Tcl",
+  /* format_php */		"PHP"
 };
 
 
diff --git a/src/message.h b/src/message.h
index abe361f7d..ffec3ee08 100644
--- a/src/message.h
+++ b/src/message.h
@@ -44,9 +44,10 @@ enum format_type
   format_awk,
   format_pascal,
   format_ycp,
-  format_tcl
+  format_tcl,
+  format_php
 };
-#define NFORMATS 11	/* Number of format_type enum values.  */
+#define NFORMATS 12	/* Number of format_type enum values.  */
 extern const char *const format_language[NFORMATS];
 extern const char *const format_language_pretty[NFORMATS];
 
diff --git a/src/x-php.c b/src/x-php.c
new file mode 100644
index 000000000..63a61078d
--- /dev/null
+++ b/src/x-php.c
@@ -0,0 +1,1376 @@
+/* xgettext PHP backend.
+   Copyright (C) 2001-2002 Free Software Foundation, Inc.
+
+   This file was written by Bruno Haible <bruno@clisp.org>, 2002.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "message.h"
+#include "x-php.h"
+#include "xgettext.h"
+#include "error.h"
+#include "xmalloc.h"
+#include "exit.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+
+/* The PHP syntax is defined in phpdoc/manual/langref.html.
+   See also php-4.1.0/Zend/zend_language_scanner.l.  */
+
+enum token_type_ty
+{
+  token_type_eof,
+  token_type_lparen,		/* ( */
+  token_type_rparen,		/* ) */
+  token_type_comma,		/* , */
+  token_type_string_literal,	/* "abc" */
+  token_type_symbol,		/* symbol, number */
+  token_type_other		/* misc. operator */
+};
+typedef enum token_type_ty token_type_ty;
+
+typedef struct token_ty token_ty;
+struct token_ty
+{
+  token_type_ty type;
+  char *string;		/* for token_type_string_literal, token_type_symbol */
+  int line_number;
+};
+
+
+/* Prototypes for local functions.  Needed to ensure compiler checking of
+   function argument counts despite of K&R C function definition syntax.  */
+static void init_keywords PARAMS ((void));
+static int phase1_getc PARAMS ((void));
+static void phase1_ungetc PARAMS ((int c));
+static void skip_html PARAMS ((void));
+#if 0
+static int phase2_getc PARAMS ((void));
+static void phase2_ungetc PARAMS ((int c));
+#endif
+static inline void comment_start PARAMS ((void));
+static inline void comment_add PARAMS ((int c));
+static inline void comment_line_end PARAMS ((size_t chars_to_remove));
+static int phase3_getc PARAMS ((void));
+static void phase3_ungetc PARAMS ((int c));
+static inline void free_token PARAMS ((token_ty *tp));
+static void x_php_lex PARAMS ((token_ty *tp));
+static bool extract_parenthesized PARAMS ((message_list_ty *mlp,
+					   int commas_to_skip,
+					   int plural_commas));
+
+
+/* ====================== Keyword set customization.  ====================== */
+
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_php_extract_all ()
+{
+  extract_all = true;
+}
+
+
+void
+x_php_keyword (name)
+     const char *name;
+{
+  if (name == NULL)
+    default_keywords = false;
+  else
+    {
+      const char *end;
+      int argnum1;
+      int argnum2;
+      const char *colon;
+
+      if (keywords.table == NULL)
+	init_hash (&keywords, 100);
+
+      split_keywordspec (name, &end, &argnum1, &argnum2);
+
+      /* The characters between name and end should form a valid C identifier.
+	 A colon means an invalid parse in split_keywordspec().  */
+      colon = strchr (name, ':');
+      if (colon == NULL || colon >= end)
+	{
+	  if (argnum1 == 0)
+	    argnum1 = 1;
+	  insert_entry (&keywords, name, end - name,
+			(void *) (long) (argnum1 + (argnum2 << 10)));
+	}
+    }
+}
+
+/* Finish initializing the keywords hash table.
+   Called after argument processing, before each file is processed.  */
+static void
+init_keywords ()
+{
+  if (default_keywords)
+    {
+      x_php_keyword ("_");
+      x_php_keyword ("gettext");
+      x_php_keyword ("dgettext:2");
+      x_php_keyword ("dcgettext:2");
+      default_keywords = false;
+    }
+}
+
+
+/* ======================== Reading of characters.  ======================== */
+
+
+/* Real filename, used in error messages about the input file.  */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages.  */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream.  */
+static FILE *fp;
+
+
+/* 1. line_number handling.  */
+
+/* Maximum used guaranteed to be < 4.  */
+static unsigned char phase1_pushback[4];
+static int phase1_pushback_length;
+
+static int
+phase1_getc ()
+{
+  int c;
+
+  if (phase1_pushback_length)
+    c = phase1_pushback[--phase1_pushback_length];
+  else
+    {
+      c = getc (fp);
+
+      if (c == EOF)
+	{
+	  if (ferror (fp))
+	    error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+		   real_file_name);
+	  return EOF;
+	}
+    }
+
+  if (c == '\n')
+    line_number++;
+
+  return c;
+}
+
+static void
+phase1_ungetc (c)
+     int c;
+{
+  if (c != EOF)
+    {
+      if (c == '\n')
+	--line_number;
+
+      phase1_pushback[phase1_pushback_length++] = c;
+    }
+}
+
+
+/* 2. Ignore HTML sections.  They are equivalent to PHP echo commands and
+   therefore don't contain translatable strings.  */
+
+static void
+skip_html ()
+{
+  for (;;)
+    {
+      int c = phase1_getc ();
+
+      if (c == EOF)
+	return;
+
+      if (c == '<')
+	{
+	  int c2 = phase1_getc ();
+
+	  if (c2 == EOF)
+	    break;
+
+	  if (c2 == '?')
+	    {
+	      /* <?php is the normal way to enter PHP mode. <? and <?= are
+		 recognized by PHP depending on a configuration setting.  */
+	      int c3 = phase1_getc ();
+
+	      if (c3 != '=')
+		phase1_ungetc (c3);
+
+	      return;
+	    }
+
+	  if (c2 == '%')
+	    {
+	      /* <% and <%= are recognized by PHP depending on a configuration
+		 setting.  */
+	      int c3 = phase1_getc ();
+
+	      if (c3 != '=')
+		phase1_ungetc (c3);
+
+	      return;
+	    }
+
+	  if (c2 == '<')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+
+	  /* < script language = php >
+	     < script language = "php" >
+	     < script language = 'php' >
+	     are always recognized.  */
+	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
+	    c2 = phase1_getc ();
+	  if (c2 != 's' && c2 != 'S')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'c' && c2 != 'C')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'r' && c2 != 'R')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'i' && c2 != 'I')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'p' && c2 != 'P')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 't' && c2 != 'T')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (!(c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r'))
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  do
+	    c2 = phase1_getc ();
+	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
+	  if (c2 != 'l' && c2 != 'L')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'a' && c2 != 'A')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'n' && c2 != 'N')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'g' && c2 != 'G')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'u' && c2 != 'U')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'a' && c2 != 'A')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'g' && c2 != 'G')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  if (c2 != 'e' && c2 != 'E')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
+	    c2 = phase1_getc ();
+	  if (c2 != '=')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  c2 = phase1_getc ();
+	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
+	    c2 = phase1_getc ();
+	  if (c2 == '"')
+	    {
+	      c2 = phase1_getc ();
+	      if (c2 != 'p')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	      c2 = phase1_getc ();
+	      if (c2 != 'h')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	      c2 = phase1_getc ();
+	      if (c2 != 'p')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	      c2 = phase1_getc ();
+	      if (c2 != '"')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	    }
+	  else if (c2 == '\'')
+	    {
+	      c2 = phase1_getc ();
+	      if (c2 != 'p')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	      c2 = phase1_getc ();
+	      if (c2 != 'h')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	      c2 = phase1_getc ();
+	      if (c2 != 'p')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	      c2 = phase1_getc ();
+	      if (c2 != '\'')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	    }
+	  else
+	    {
+	      if (c2 != 'p')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	      c2 = phase1_getc ();
+	      if (c2 != 'h')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	      c2 = phase1_getc ();
+	      if (c2 != 'p')
+		{
+		  phase1_ungetc (c2);
+		  continue;
+		}
+	    }
+	  c2 = phase1_getc ();
+	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
+	    c2 = phase1_getc ();
+	  if (c2 != '>')
+	    {
+	      phase1_ungetc (c2);
+	      continue;
+	    }
+	  return;
+	}
+    }
+}
+
+#if 0
+
+static unsigned char phase2_pushback[1];
+static int phase2_pushback_length;
+
+static int
+phase2_getc ()
+{
+  int c;
+
+  if (phase2_pushback_length)
+    return phase2_pushback[--phase2_pushback_length];
+
+  c = phase1_getc ();
+  switch (c)
+    {
+    case '?':
+    case '%':
+      {
+	int c2 = phase1_getc ();
+	if (c2 == '>')
+	  {
+	    /* ?> and %> terminate PHP mode and switch back to HTML mode.  */
+	    skip_html ();
+	    return ' ';
+	  }
+	phase1_ungetc (c2);
+      }
+      break;
+
+    case '<':
+      {
+	int c2 = phase1_getc ();
+
+	/* < / script > terminates PHP mode and switches back to HTML mode.  */
+	while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
+	  c2 = phase1_getc ();
+	if (c2 == '/')
+	  {
+	    do
+	      c2 = phase1_getc ();
+	    while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
+	    if (c2 == 's' || c2 == 'S')
+	      {
+		c2 = phase1_getc ();
+		if (c2 == 'c' || c2 == 'C')
+		  {
+		    c2 = phase1_getc ();
+		    if (c2 == 'r' || c2 == 'R')
+		      {
+			c2 = phase1_getc ();
+			if (c2 == 'i' || c2 == 'I')
+			  {
+			    c2 = phase1_getc ();
+			    if (c2 == 'p' || c2 == 'P')
+			      {
+				c2 = phase1_getc ();
+				if (c2 == 't' || c2 == 'T')
+				  {
+				    do
+				      c2 = phase1_getc ();
+				    while (c2 == ' ' || c2 == '\t'
+					   || c2 == '\n' || c2 == '\r');
+				    if (c2 == '>')
+				      {
+					skip_html ();
+					return ' ';
+				      }
+				  }
+			      }
+			  }
+		      }
+		  }
+	      }
+	  }
+	phase1_ungetc (c2);
+      }
+      break;
+    }
+
+  return c;
+}
+
+static void
+phase2_ungetc (c)
+     int c;
+{
+  if (c != EOF)
+    phase2_pushback[phase2_pushback_length++] = c;
+}
+
+#endif
+
+
+/* Accumulating comments.  */
+
+static char *buffer;
+static size_t bufmax;
+static size_t buflen;
+
+static inline void
+comment_start ()
+{
+  buflen = 0;
+}
+
+static inline void
+comment_add (c)
+     int c;
+{
+  if (buflen >= bufmax)
+    {
+      bufmax += 100;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen++] = c;
+}
+
+static inline void
+comment_line_end (chars_to_remove)
+     size_t chars_to_remove;
+{
+  buflen -= chars_to_remove;
+  while (buflen >= 1
+	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+    --buflen;
+  if (chars_to_remove == 0 && buflen >= bufmax)
+    {
+      bufmax += 100;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen] = '\0';
+  xgettext_comment_add (buffer);
+}
+
+
+/* 3. Replace each comment that is not inside a string literal with a
+   space character.  We need to remember the comment for later, because
+   it may be attached to a keyword string.  */
+
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
+
+static unsigned char phase3_pushback[1];
+static int phase3_pushback_length;
+
+static int
+phase3_getc ()
+{
+  int lineno;
+  int c;
+
+  if (phase3_pushback_length)
+    return phase3_pushback[--phase3_pushback_length];
+
+  c = phase1_getc ();
+
+  if (c == '#')
+    {
+      /* sh comment.  */
+      bool last_was_qmark;
+
+      comment_start ();
+      lineno = line_number;
+      for (;;)
+	{
+	  c = phase1_getc ();
+	  if (c == '\n' || c == EOF)
+	    {
+	      comment_line_end (0);
+	      break;
+	    }
+	  if (last_was_qmark && c == '>')
+	    {
+	      comment_line_end (1);
+	      skip_html ();
+	      break;
+	    }
+	  /* We skip all leading white space, but not EOLs.  */
+	  if (!(buflen == 0 && (c == ' ' || c == '\t')))
+	    comment_add (c);
+	  last_was_qmark = (c == '?' || c == '%');
+	}
+      last_comment_line = lineno;
+      return '\n';
+    }
+  else if (c == '/')
+    {
+      c = phase1_getc ();
+
+      switch (c)
+	{
+	default:
+	  phase1_ungetc (c);
+	  return '/';
+
+	case '*':
+	  {
+	    /* C comment.  */
+	    bool last_was_star;
+
+	    comment_start ();
+	    lineno = line_number;
+	    last_was_star = false;
+	    for (;;)
+	      {
+		c = phase1_getc ();
+		if (c == EOF)
+		  break;
+		/* We skip all leading white space, but not EOLs.  */
+		if (buflen == 0 && (c == ' ' || c == '\t'))
+		  continue;
+		comment_add (c);
+		switch (c)
+		  {
+		  case '\n':
+		    comment_line_end (1);
+		    comment_start ();
+		    lineno = line_number;
+		    last_was_star = false;
+		    continue;
+
+		  case '*':
+		    last_was_star = true;
+		    continue;
+
+		  case '/':
+		    if (last_was_star)
+		      {
+			comment_line_end (2);
+			break;
+		      }
+		    /* FALLTHROUGH */
+
+		  default:
+		    last_was_star = false;
+		    continue;
+		  }
+		break;
+	      }
+	    last_comment_line = lineno;
+	    return ' ';
+	  }
+
+	case '/':
+	  {
+	    /* C++ comment.  */
+	    bool last_was_qmark;
+
+	    comment_start ();
+	    lineno = line_number;
+	    for (;;)
+	      {
+		c = phase1_getc ();
+		if (c == '\n' || c == EOF)
+		  {
+		    comment_line_end (0);
+		    break;
+		  }
+		if (last_was_qmark && c == '>')
+		  {
+		    comment_line_end (1);
+		    skip_html ();
+		    break;
+		  }
+		/* We skip all leading white space, but not EOLs.  */
+		if (!(buflen == 0 && (c == ' ' || c == '\t')))
+		  comment_add (c);
+		last_was_qmark = (c == '?' || c == '%');
+	      }
+	    last_comment_line = lineno;
+	    return '\n';
+	  }
+	}
+    }
+  else
+    return c;
+}
+
+static void
+phase3_ungetc (c)
+     int c;
+{
+  if (c != EOF)
+    phase3_pushback[phase3_pushback_length++] = c;
+}
+
+
+/* Free the memory pointed to by a 'struct token_ty'.  */
+static inline void
+free_token (tp)
+     token_ty *tp;
+{
+  if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+    free (tp->string);
+}
+
+
+/* 4. Combine characters into tokens.  Discard whitespace.  */
+
+static void
+x_php_lex (tp)
+     token_ty *tp;
+{
+  static char *buffer;
+  static int bufmax;
+  int bufpos;
+  int c;
+
+  tp->string = NULL;
+
+  for (;;)
+    {
+      tp->line_number = line_number;
+      c = phase3_getc ();
+      switch (c)
+	{
+	case EOF:
+	  tp->type = token_type_eof;
+	  return;
+
+	case '\n':
+	  if (last_non_comment_line > last_comment_line)
+	    xgettext_comment_reset ();
+	  /* FALLTHROUGH */
+	case ' ':
+	case '\t':
+	case '\r':
+	  /* Ignore whitespace.  */
+	  continue;
+	}
+
+      last_non_comment_line = tp->line_number;
+
+      switch (c)
+	{
+	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+	case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+	case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+	case 'V': case 'W': case 'X': case 'Y': case 'Z':
+	case '_':
+	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+	case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+	case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+	case 'v': case 'w': case 'x': case 'y': case 'z':
+	  bufpos = 0;
+	  for (;;)
+	    {
+	      if (bufpos >= bufmax)
+		{
+		  bufmax += 100;
+		  buffer = xrealloc (buffer, bufmax);
+		}
+	      buffer[bufpos++] = c;
+	      c = phase1_getc ();
+	      switch (c)
+		{
+		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+		case 'Y': case 'Z':
+		case '_':
+		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+		case 'y': case 'z':
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+		  continue;
+
+		default:
+		  phase1_ungetc (c);
+		  break;
+		}
+	      break;
+	    }
+	  if (bufpos >= bufmax)
+	    {
+	      bufmax += 100;
+	      buffer = xrealloc (buffer, bufmax);
+	    }
+	  buffer[bufpos] = 0;
+	  tp->string = xstrdup (buffer);
+	  tp->type = token_type_symbol;
+	  return;
+
+	case '\'':
+	  /* Single-quoted string literal.  */
+	  bufpos = 0;
+	  for (;;)
+	    {
+	      c = phase1_getc ();
+	      if (c == EOF || c == '\'')
+		break;
+	      if (c == '\\')
+		{
+		  c = phase1_getc ();
+		  if (c != '\\' && c != '\'')
+		    {
+		      phase1_ungetc (c);
+		      c = '\\';
+		    }
+		}
+	      if (bufpos >= bufmax)
+		{
+		  bufmax += 100;
+		  buffer = xrealloc (buffer, bufmax);
+		}
+	      buffer[bufpos++] = c;
+	    }
+	  if (bufpos >= bufmax)
+	    {
+	      bufmax += 100;
+	      buffer = xrealloc (buffer, bufmax);
+	    }
+	  buffer[bufpos] = 0;
+	  tp->type = token_type_string_literal;
+	  tp->string = xstrdup (buffer);
+	  return;
+
+	case '"':
+	  /* Double-quoted string literal.  */
+	  tp->type = token_type_string_literal;
+	  bufpos = 0;
+	  for (;;)
+	    {
+	      c = phase1_getc ();
+	      if (c == EOF || c == '"')
+		break;
+	      if (c == '$')
+		{
+		  c = phase1_getc ();
+		  if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+		      || c == '_' || c == '{' || c >= 0x7f)
+		    {
+		      /* String with variables.  */
+		      tp->type = token_type_other;
+		      continue;
+		    }
+		  phase1_ungetc (c);
+		  c = '$';
+		}
+	      if (c == '{')
+		{
+		  c = phase1_getc ();
+		  if (c == '$')
+		    {
+		      /* String with expressions.  */
+		      tp->type = token_type_other;
+		      continue;
+		    }
+		  phase1_ungetc (c);
+		  c = '{';
+		}
+	      if (c == '\\')
+		{
+		  int n, j;
+
+		  c = phase1_getc ();
+		  switch (c)
+		    {
+		    case '"':
+		    case '\\':
+		    case '$':
+		      break;
+
+		    case '0': case '1': case '2': case '3':
+		    case '4': case '5': case '6': case '7':
+		      n = 0;
+		      for (j = 0; j < 3; ++j)
+			{
+			  n = n * 8 + c - '0';
+			  c = phase1_getc ();
+			  switch (c)
+			    {
+			    default:
+			      break;
+
+			    case '0': case '1': case '2': case '3':
+			    case '4': case '5': case '6': case '7':
+			      continue;
+			    }
+			  break;
+			}
+		      phase1_ungetc (c);
+		      c = n;
+		      break;
+
+		    case 'x':
+		      n = 0;
+		      for (j = 0; j < 2; ++j)
+			{
+			  c = phase1_getc ();
+			  switch (c)
+			    {
+			    case '0': case '1': case '2': case '3': case '4':
+			    case '5': case '6': case '7': case '8': case '9':
+			      n = n * 16 + c - '0';
+			      break;
+			    case 'A': case 'B': case 'C': case 'D': case 'E':
+			    case 'F':
+			      n = n * 16 + 10 + c - 'A';
+			      break;
+			    case 'a': case 'b': case 'c': case 'd': case 'e':
+			    case 'f':
+			      n = n * 16 + 10 + c - 'a';
+			      break;
+			    default:
+			      phase1_ungetc (c);
+			      c = 0;
+			      break;
+			    }
+			  if (c == 0)
+			    break;
+			}
+		      if (j == 0)
+			{
+			  phase1_ungetc ('x');
+			  c = '\\';
+			}
+		      else
+			c = n;
+		      break;
+
+		    case 'n':
+		      c = '\n';
+		      break;
+		    case 't':
+		      c = '\t';
+		      break;
+		    case 'r':
+		      c = '\r';
+		      break;
+
+		    default:
+		      phase1_ungetc (c);
+		      c = '\\';
+		      break;
+		    }
+		}
+	      if (bufpos >= bufmax)
+		{
+		  bufmax += 100;
+		  buffer = xrealloc (buffer, bufmax);
+		}
+	      buffer[bufpos++] = c;
+	    }
+	  if (bufpos >= bufmax)
+	    {
+	      bufmax += 100;
+	      buffer = xrealloc (buffer, bufmax);
+	    }
+	  buffer[bufpos] = 0;
+	  if (tp->type == token_type_string_literal)
+	    tp->string = xstrdup (buffer);
+	  return;
+
+	case '?':
+	case '%':
+	  {
+	    int c2 = phase1_getc ();
+	    if (c2 == '>')
+	      {
+		/* ?> and %> terminate PHP mode and switch back to HTML
+		   mode.  */
+		skip_html ();
+	      }
+	    else
+	      phase1_ungetc (c2);
+	    tp->type = token_type_other;
+	    return;
+	  }
+
+	case '(':
+	  tp->type = token_type_lparen;
+	  return;
+
+	case ')':
+	  tp->type = token_type_rparen;
+	  return;
+
+	case ',':
+	  tp->type = token_type_comma;
+	  return;
+
+	case '<':
+	  {
+	    int c2 = phase1_getc ();
+	    if (c2 == '<')
+	      {
+		int c3 = phase1_getc ();
+		if (c3 == '<')
+		  {
+		    /* Start of here document.
+		       Parse whitespace, then label, then newline.  */
+		    do
+		      c = phase3_getc ();
+		    while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
+
+		    bufpos = 0;
+		    do
+		      {
+			if (bufpos >= bufmax)
+			  {
+			    bufmax += 100;
+			    buffer = xrealloc (buffer, bufmax);
+			  }
+			buffer[bufpos++] = c;
+			c = phase3_getc ();
+		      }
+		    while (c != EOF && c != '\n' && c != '\r');
+		    /* buffer[0..bufpos-1] now contains the label.  */
+
+		    /* Now skip the here document.  */
+		    for (;;)
+		      {
+			c = phase1_getc ();
+			if (c == EOF)
+			  break;
+			if (c == '\n' || c == '\r')
+			  {
+			    int bufidx = 0;
+
+			    while (bufidx < bufpos)
+			      {
+				c = phase1_getc ();
+				if (c == EOF)
+				  break;
+				if (c != buffer[bufidx])
+				  {
+				    phase1_ungetc (c);
+				    break;
+				  }
+			      }
+			    c = phase1_getc ();
+			    if (c != ';')
+			      phase1_ungetc (c);
+			    c = phase1_getc ();
+			    if (c == '\n' || c == '\r')
+			      break;
+			  }
+		      }
+
+		    /* FIXME: Ideally we should turn the here document into a
+		       string literal if it didn't contain $ substitution.  And
+		       we should also respect backslash escape sequences like
+		       in double-quoted strings.  */
+		    tp->type = token_type_other;
+		    return;
+		  }
+		phase1_ungetc (c3);
+	      }
+
+	    /* < / script > terminates PHP mode and switches back to HTML
+	       mode.  */
+	    while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
+	      c2 = phase1_getc ();
+	    if (c2 == '/')
+	      {
+		do
+		  c2 = phase1_getc ();
+		while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
+		if (c2 == 's' || c2 == 'S')
+		  {
+		    c2 = phase1_getc ();
+		    if (c2 == 'c' || c2 == 'C')
+		      {
+			c2 = phase1_getc ();
+			if (c2 == 'r' || c2 == 'R')
+			  {
+			    c2 = phase1_getc ();
+			    if (c2 == 'i' || c2 == 'I')
+			      {
+				c2 = phase1_getc ();
+				if (c2 == 'p' || c2 == 'P')
+				  {
+				    c2 = phase1_getc ();
+				    if (c2 == 't' || c2 == 'T')
+				      {
+					do
+					  c2 = phase1_getc ();
+					while (c2 == ' ' || c2 == '\t'
+					       || c2 == '\n' || c2 == '\r');
+					if (c2 == '>')
+					  {
+					    skip_html ();
+					  }
+					else
+					  phase1_ungetc (c2);
+				      }
+				    else
+				      phase1_ungetc (c2);
+				  }
+				else
+				  phase1_ungetc (c2);
+			      }
+			    else
+			      phase1_ungetc (c2);
+			  }
+			else
+			  phase1_ungetc (c2);
+		      }
+		    else
+		      phase1_ungetc (c2);
+		  }
+		else
+		  phase1_ungetc (c2);
+	      }
+	    else
+	      phase1_ungetc (c2);
+
+	    tp->type = token_type_other;
+	    return;
+	  }
+
+	case '`':
+	  /* Execution operator.  */
+	default:
+	  /* We could carefully recognize each of the 2 and 3 character
+	     operators, but it is not necessary, as we only need to recognize
+	     gettext invocations.  Don't bother.  */
+	  tp->type = token_type_other;
+	  return;
+	}
+    }
+}
+
+/* ========================= Extracting strings.  ========================== */
+
+/* The file is broken into tokens.  Scan the token stream, looking for
+   a keyword, followed by a left paren, followed by a string.  When we
+   see this sequence, we have something to remember.  We assume we are
+   looking at a valid C or C++ program, and leave the complaints about
+   the grammar to the compiler.
+
+     Normal handling: Look for
+       keyword ( ... msgid ... )
+     Plural handling: Look for
+       keyword ( ... msgid ... msgid_plural ... )
+
+   We use recursion because the arguments before msgid or between msgid
+   and msgid_plural can contain subexpressions of the same form.  */
+
+
+/* Extract messages until the next balanced closing parenthesis.
+   Extracted messages are added to MLP.
+   When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and,
+   if also a plural argument shall be extracted, PLURAL_COMMAS > 0,
+   otherwise PLURAL_COMMAS = 0.
+   When no specific argument shall be extracted, COMMAS_TO_SKIP < 0.
+   Return true upon eof, false upon closing parenthesis.  */
+static bool
+extract_parenthesized (mlp, commas_to_skip, plural_commas)
+     message_list_ty *mlp;
+     int commas_to_skip;
+     int plural_commas;
+{
+  /* Remember the message containing the msgid, for msgid_plural.  */
+  message_ty *plural_mp = NULL;
+
+  /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
+  int state;
+  /* Parameters of the keyword just seen.  Defined only in state 1.  */
+  int next_commas_to_skip = -1;
+  int next_plural_commas = 0;
+
+  /* Start state is 0.  */
+  state = 0;
+
+  for (;;)
+    {
+      token_ty token;
+
+      x_php_lex (&token);
+      switch (token.type)
+	{
+	case token_type_symbol:
+	  /* No need to bother if we extract all strings anyway.  */
+	  if (!extract_all)
+	    {
+	      void *keyword_value;
+
+	      if (find_entry (&keywords, token.string, strlen (token.string),
+			      &keyword_value)
+		  == 0)
+		{
+		  int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+		  int argnum2 = (int) (long) keyword_value >> 10;
+
+		  next_commas_to_skip = argnum1 - 1;
+		  next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0);
+		  state = 1;
+		}
+	      else
+		state = 0;
+	    }
+	  free (token.string);
+	  continue;
+
+	case token_type_lparen:
+	  /* No need to recurse if we extract all strings anyway.  */
+	  if (extract_all)
+	    continue;
+	  if (state
+	      ?  extract_parenthesized (mlp, next_commas_to_skip,
+					next_plural_commas)
+	      : extract_parenthesized (mlp, -1, 0))
+	    return true;
+	  state = 0;
+	  continue;
+
+	case token_type_rparen:
+	  /* No need to return if we extract all strings anyway.  */
+	  if (extract_all)
+	    continue;
+	  return false;
+
+	case token_type_comma:
+	  /* No need to bother if we extract all strings anyway.  */
+	  if (extract_all)
+	    continue;
+	  if (commas_to_skip >= 0)
+	    {
+	      if (commas_to_skip > 0)
+		commas_to_skip--;
+	      else
+		if (plural_mp != NULL && plural_commas > 0)
+		  {
+		    commas_to_skip = plural_commas - 1;
+		    plural_commas = 0;
+		  }
+		else
+		  commas_to_skip = -1;
+	    }
+	  state = 0;
+	  continue;
+
+	case token_type_string_literal:
+	  {
+	    lex_pos_ty pos;
+	    pos.file_name = logical_file_name;
+	    pos.line_number = token.line_number;
+
+	    if (extract_all)
+	      remember_a_message (mlp, token.string, &pos);
+	    else
+	      {
+		if (commas_to_skip == 0)
+		  {
+		    if (plural_mp == NULL)
+		      {
+			/* Seen an msgid.  */
+			message_ty *mp = remember_a_message (mlp, token.string,
+							     &pos);
+			if (plural_commas > 0)
+			  plural_mp = mp;
+		      }
+		    else
+		      {
+			/* Seen an msgid_plural.  */
+			remember_a_message_plural (plural_mp, token.string,
+						   &pos);
+			plural_mp = NULL;
+		      }
+		  }
+		else
+		  free (token.string);
+		state = 0;
+	      }
+	    continue;
+	  }
+
+	case token_type_other:
+	  state = 0;
+	  continue;
+
+	case token_type_eof:
+	  return true;
+
+	default:
+	  abort ();
+	}
+    }
+}
+
+
+void
+extract_php (f, real_filename, logical_filename, mdlp)
+     FILE *f;
+     const char *real_filename;
+     const char *logical_filename;
+     msgdomain_list_ty *mdlp;
+{
+  message_list_ty *mlp = mdlp->item[0]->messages;
+
+  fp = f;
+  real_file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  line_number = 1;
+
+  last_comment_line = -1;
+  last_non_comment_line = -1;
+
+  init_keywords ();
+
+  /* Initial mode is HTML mode, not PHP mode.  */
+  skip_html ();
+
+  /* Eat tokens until eof is seen.  When extract_parenthesized returns
+     due to an unbalanced closing parenthesis, just restart it.  */
+  while (!extract_parenthesized (mlp, -1, 0))
+    ;
+
+  /* Close scanner.  */
+  fp = NULL;
+  real_file_name = NULL;
+  logical_file_name = NULL;
+  line_number = 0;
+}
diff --git a/src/x-php.h b/src/x-php.h
new file mode 100644
index 000000000..8583356fc
--- /dev/null
+++ b/src/x-php.h
@@ -0,0 +1,34 @@
+/* xgettext PHP backend.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+
+#define EXTENSIONS_PHP \
+  { "php",    "PHP"   },						\
+  { "php3",   "PHP"   },						\
+  { "php4",   "PHP"   },						\
+
+#define SCANNERS_PHP \
+  { "PHP",        extract_php, &formatstring_php },			\
+
+/* Scan a PHP file and add its translatable strings to mdlp.  */
+extern void extract_php PARAMS ((FILE *fp, const char *real_filename,
+				 const char *logical_filename,
+				 msgdomain_list_ty *mdlp));
+
+extern void x_php_keyword PARAMS ((const char *keyword));
+extern void x_php_extract_all PARAMS ((void));
diff --git a/src/xgettext.c b/src/xgettext.c
index 33982848b..92b9dbf50 100644
--- a/src/xgettext.c
+++ b/src/xgettext.c
@@ -67,6 +67,7 @@
 #include "x-awk.h"
 #include "x-ycp.h"
 #include "x-tcl.h"
+#include "x-php.h"
 #include "x-rst.h"
 #include "x-glade.h"
 
@@ -235,6 +236,7 @@ main (argc, argv)
 	x_java_extract_all ();
 	x_awk_extract_all ();
 	x_tcl_extract_all ();
+	x_php_extract_all ();
 	x_glade_extract_all ();
 	break;
       case 'c':
@@ -293,6 +295,7 @@ main (argc, argv)
 	    x_java_keyword (optarg);
 	    x_awk_keyword (optarg);
 	    x_tcl_keyword (optarg);
+	    x_php_keyword (optarg);
 	    x_glade_keyword (optarg);
 	  }
 	break;
@@ -578,7 +581,7 @@ Choice of input file language:\n\
   -L, --language=NAME            recognise the specified language\n\
                                    (C, C++, ObjectiveC, PO, Python, Lisp,\n\
                                    EmacsLisp, librep, Java, awk, YCP, Tcl,\n\
-                                   RST, Glade)\n\
+                                   PHP, RST, Glade)\n\
   -C, --c++                      shorthand for --language=C++\n\
 By default the language is guessed depending on the input file name extension.\n\
 "));
@@ -1279,6 +1282,7 @@ language_to_extractor (name)
     SCANNERS_AWK
     SCANNERS_YCP
     SCANNERS_TCL
+    SCANNERS_PHP
     SCANNERS_RST
     SCANNERS_GLADE
     /* Here will follow more languages and their scanners: perl, etc...
@@ -1325,6 +1329,7 @@ extension_to_language (extension)
     EXTENSIONS_AWK
     EXTENSIONS_YCP
     EXTENSIONS_TCL
+    EXTENSIONS_PHP
     EXTENSIONS_RST
     EXTENSIONS_GLADE
     /* Here will follow more file extensions: sh, pl ... */
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 4813e3b0a..12a0c5ada 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,14 @@
+2002-08-18  Bruno Haible  <bruno@clisp.org>
+
+	* format-php-1: New file.
+	* format-php-2: New file.
+	* lang-php: New file.
+	* Makefile.am (TESTS): Add them.
+
+2002-08-06  Bruno Haible  <bruno@clisp.org>
+
+	* gettext-0.11.5 released.
+
 2002-08-06  Bruno Haible  <bruno@clisp.org>
 
 	* msgunfmt-2: Fix typo.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 925fe30ff..a7ed94a6a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -52,12 +52,13 @@ TESTS = gettext-1 gettext-2 \
 	format-java-1 format-java-2 \
 	format-librep-1 format-librep-2 \
 	format-lisp-1 format-lisp-2 \
+	format-php-1 format-php-2 \
 	format-python-1 format-python-2 \
 	format-pascal-1 format-pascal-2 \
 	format-tcl-1 format-tcl-2 \
 	format-ycp-1 format-ycp-2 \
 	plural-1 plural-2 \
-	lang-c lang-c++ lang-objc lang-python lang-clisp lang-elisp lang-librep lang-java lang-gawk lang-pascal lang-ycp lang-tcl lang-po lang-rst \
+	lang-c lang-c++ lang-objc lang-python lang-clisp lang-elisp lang-librep lang-java lang-gawk lang-pascal lang-ycp lang-tcl lang-php lang-po lang-rst \
 	rpath-1a rpath-1b \
 	rpath-2aaa rpath-2aab rpath-2aac rpath-2aad \
 	rpath-2aba rpath-2abb rpath-2abc rpath-2abd \
diff --git a/tests/format-php-1 b/tests/format-php-1
new file mode 100755
index 000000000..92389aa39
--- /dev/null
+++ b/tests/format-php-1
@@ -0,0 +1,116 @@
+#! /bin/sh
+
+# Test recognition of PHP format strings.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles f-ph-1.data"
+cat <<\EOF > f-ph-1.data
+# Valid: no argument
+"abc%%"
+# Valid: one string argument
+"abc%s"
+# Valid: one integer argument
+"abc%b"
+# Valid: one integer argument
+"abc%d"
+# Valid: one integer argument
+"abc%u"
+# Valid: one integer argument
+"abc%o"
+# Valid: one integer argument
+"abc%x"
+# Valid: one integer argument
+"abc%X"
+# Valid: one floating-point argument
+"abc%e"
+# Valid: one floating-point argument
+"abc%f"
+# Valid: one character argument
+"abc%c"
+# Valid: one argument with flags
+"abc%-f"
+# Valid: one argument with padding flags
+"abc%'=f"
+# Valid: one argument with width
+"abc%2f"
+# Valid: one argument with precision
+"abc%.4f"
+# Valid: one argument with width and precision
+"abc%14.4f"
+# Invalid: unterminated
+"abc%"
+# Invalid: unknown format specifier
+"abc%y"
+# Invalid: unknown format specifier
+"abc%F"
+# Invalid: flags after width
+"abc%5-f"
+# Invalid: twice precision
+"abc%.4.2f"
+# Valid: three arguments
+"abc%d%x%x"
+# Valid: a numbered argument
+"abc%1$d"
+# Invalid: zero
+"abc%0$d"
+# Valid: two-digit numbered arguments
+"abc%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz"
+# Invalid: unterminated number
+"abc%1"
+# Invalid: flags before number
+"abc%-1$d"
+# Valid: three arguments, two with same number
+"abc%1$4x,%2$c,%1$u"
+# Invalid: argument with conflicting types
+"abc%1$4x,%2$c,%1$s"
+# Valid: no conflict
+"abc%1$4x,%2$c,%1$u"
+# Valid: mixing of numbered and unnumbered arguments
+"abc%d%2$x"
+# Valid: numbered argument with constant precision
+"abc%1$.9x"
+# Valid: missing non-final argument
+"abc%2$x%3$s"
+# Valid: permutation
+"abc%2$ddef%1$d"
+# Valid: multiple uses of same argument
+"abc%2$xdef%1$sghi%2$x"
+EOF
+
+: ${XGETTEXT=xgettext}
+n=0
+while read comment; do
+  read string
+  n=`expr $n + 1`
+  tmpfiles="$tmpfiles f-ph-1-$n.in f-ph-1-$n.po"
+  echo "<?= gettext(${string}) ?>" | sed -e 's/\$/\\\$/g' > f-ph-1-$n.in
+  ${XGETTEXT} -L PHP -o f-ph-1-$n.po f-ph-1-$n.in || exit 1
+  test -f f-ph-1-$n.po || exit 1
+  fail=
+  if echo "$comment" | grep 'Valid:' > /dev/null; then
+    if grep php-format f-ph-1-$n.po > /dev/null; then
+      :
+    else
+      fail=yes
+    fi
+  else
+    if grep php-format f-ph-1-$n.po > /dev/null; then
+      fail=yes
+    else
+      :
+    fi
+  fi
+  if test -n "$fail"; then
+    echo "Format string recognition error:" 1>&2
+    cat f-ph-1-$n.in 1>&2
+    echo "Got:" 1>&2
+    cat f-ph-1-$n.po 1>&2
+    exit 1
+  fi
+done < f-ph-1.data
+
+rm -fr $tmpfiles
+
+exit 0
diff --git a/tests/format-php-2 b/tests/format-php-2
new file mode 100755
index 000000000..a7b96a12d
--- /dev/null
+++ b/tests/format-php-2
@@ -0,0 +1,123 @@
+#! /bin/sh
+
+# Test checking of PHP format strings.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles f-ph-2.data"
+cat <<\EOF > f-ph-2.data
+# Valid: %% doesn't count
+msgid  "abc%%def"
+msgstr "xyz"
+# Invalid: invalid msgstr
+msgid  "abc%%def"
+msgstr "xyz%"
+# Valid: same arguments
+msgid  "abc%s%gdef"
+msgstr "xyz%s%g"
+# Valid: same arguments, with different widths
+msgid  "abc%2sdef"
+msgstr "xyz%3s"
+# Valid: same arguments but in numbered syntax
+msgid  "abc%s%gdef"
+msgstr "xyz%1$s%2$g"
+# Valid: permutation
+msgid  "abc%s%g%cdef"
+msgstr "xyz%3$c%2$g%1$s"
+# Invalid: too few arguments
+msgid  "abc%2$udef%1$s"
+msgstr "xyz%1$s"
+# Invalid: too few arguments
+msgid  "abc%sdef%u"
+msgstr "xyz%s"
+# Invalid: too many arguments
+msgid  "abc%udef"
+msgstr "xyz%uvw%c"
+# Valid: same numbered arguments, with different widths
+msgid  "abc%2$5s%1$4s"
+msgstr "xyz%2$4s%1$5s"
+# Invalid: missing argument
+msgid  "abc%2$sdef%1$u"
+msgstr "xyz%1$u"
+# Invalid: missing argument
+msgid  "abc%1$sdef%2$u"
+msgstr "xyz%2$u"
+# Invalid: added argument
+msgid  "abc%1$udef"
+msgstr "xyz%1$uvw%2$c"
+# Valid: type compatibility
+msgid  "abc%b"
+msgstr "xyz%d"
+# Valid: type compatibility
+msgid  "abc%u"
+msgstr "xyz%d"
+# Valid: type compatibility
+msgid  "abc%o"
+msgstr "xyz%d"
+# Valid: type compatibility
+msgid  "abc%x"
+msgstr "xyz%d"
+# Valid: type compatibility
+msgid  "abc%X"
+msgstr "xyz%d"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%d"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%e"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%e"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid  "abc%e"
+msgstr "xyz%c"
+EOF
+
+: ${MSGFMT=msgfmt}
+n=0
+while read comment; do
+  read msgid_line
+  read msgstr_line
+  n=`expr $n + 1`
+  tmpfiles="$tmpfiles f-ph-2-$n.po f-ph-2-$n.mo"
+  cat <<EOF > f-ph-2-$n.po
+#, php-format
+${msgid_line}
+${msgstr_line}
+EOF
+  fail=
+  if echo "$comment" | grep 'Valid:' > /dev/null; then
+    if ${MSGFMT} --check-format -o f-ph-2-$n.mo f-ph-2-$n.po; then
+      :
+    else
+      fail=yes
+    fi
+  else
+    ${MSGFMT} --check-format -o f-ph-2-$n.mo f-ph-2-$n.po 2> /dev/null
+    if test $? = 1; then
+      :
+    else
+      fail=yes
+    fi
+  fi
+  if test -n "$fail"; then
+    echo "Format string checking error:" 1>&2
+    cat f-ph-2-$n.po 1>&2
+    exit 1
+  fi
+done < f-ph-2.data
+
+rm -fr $tmpfiles
+
+exit 0
diff --git a/tests/lang-php b/tests/lang-php
new file mode 100755
index 000000000..3fd8fbe48
--- /dev/null
+++ b/tests/lang-php
@@ -0,0 +1,86 @@
+#! /bin/sh
+
+# Test of gettext facilities in the PHP language.
+# Assumes an fr_FR locale is installed.
+# Assumes the following packages are installed: mod_php4-core.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles prog.php"
+cat <<\EOF > prog.php
+<?
+  setlocale (LC_ALL, "");
+  textdomain ("prog");
+  bindtextdomain ("prog", ".");
+  echo _("'Your command, please?', asked the waiter.");
+  echo "\n";
+  echo printf(_("%s is replaced by %s."), "FF", "EUR");
+  echo "\n";
+?>
+EOF
+
+tmpfiles="$tmpfiles prog.pot"
+: ${XGETTEXT=xgettext}
+${XGETTEXT} -o prog.pot --omit-header --no-location prog.php
+
+tmpfiles="$tmpfiles prog.ok"
+cat <<EOF > prog.ok
+msgid "'Your command, please?', asked the waiter."
+msgstr ""
+
+#, php-format
+msgid "%s is replaced by %s."
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} prog.ok prog.pot || exit 1
+
+tmpfiles="$tmpfiles fr.po"
+cat <<\EOF > fr.po
+msgid ""
+msgstr "Content-Type: text/plain; charset=ISO-8859-1\n"
+
+msgid "'Your command, please?', asked the waiter."
+msgstr "«Votre commande, s'il vous plait», dit le garçon."
+
+# Reverse the arguments.
+#, php-format
+msgid "%s is replaced by %s."
+msgstr "%2$s remplace %1$s."
+EOF
+
+tmpfiles="$tmpfiles fr.po.new"
+: ${MSGMERGE=msgmerge}
+${MSGMERGE} -q -o fr.po.new fr.po prog.pot
+
+: ${DIFF=diff}
+${DIFF} fr.po fr.po.new || exit 1
+
+tmpfiles="$tmpfiles fr"
+test -d fr || mkdir fr
+test -d fr/LC_MESSAGES || mkdir fr/LC_MESSAGES
+
+: ${MSGFMT=msgfmt}
+${MSGFMT} -o fr/LC_MESSAGES/prog.mo fr.po
+
+# Test for presence of gawk version 4.0 or newer.
+case `(php -v) 2>/dev/null` in
+  [4-9].*) ;;
+  *) echo "SKIP: lang-php"; rm -fr $tmpfiles; exit 77;;
+esac
+
+tmpfiles="$tmpfiles prog.ok prog.out"
+: ${DIFF=diff}
+cat <<\EOF > prog.ok
+«Votre commande, s'il vous plait», dit le garçon.
+EUR remplace FF.
+EOF
+
+LANGUAGE= LC_ALL=fr_FR php -q prog.php > prog.out || exit 1
+${DIFF} prog.ok prog.out || exit 1
+
+rm -fr $tmpfiles
+
+exit 0