]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Support multi-line headers in COPY FROM command.
authorFujii Masao <fujii@postgresql.org>
Thu, 3 Jul 2025 06:27:26 +0000 (15:27 +0900)
committerFujii Masao <fujii@postgresql.org>
Thu, 3 Jul 2025 06:27:26 +0000 (15:27 +0900)
The COPY FROM command now accepts a non-negative integer for the HEADER option,
allowing multiple header lines to be skipped. This is useful when the input
contains multi-line headers that should be ignored during data import.

Author: Shinya Kato <shinya11.kato@gmail.com>
Co-authored-by: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp>
Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com

doc/src/sgml/ref/copy.sgml
src/backend/commands/copy.c
src/backend/commands/copyfromparse.c
src/backend/commands/copyto.c
src/include/commands/copy.h
src/test/regress/expected/copy.out
src/test/regress/expected/copy2.out
src/test/regress/sql/copy.sql
src/test/regress/sql/copy2.sql
src/tools/pgindent/typedefs.list

index 8433344e5b6f5eb7f6db8145f73d5493c23a2cee..c2d1fbc1fbe942c24881a1679905c375589252c9 100644 (file)
@@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
     NULL '<replaceable class="parameter">null_string</replaceable>'
     DEFAULT '<replaceable class="parameter">default_string</replaceable>'
-    HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
+    HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
     QUOTE '<replaceable class="parameter">quote_character</replaceable>'
     ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
     FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@@ -212,6 +212,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><replaceable class="parameter">integer</replaceable></term>
+    <listitem>
+     <para>
+      Specifies a non-negative integer value passed to the selected option.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>FORMAT</literal></term>
     <listitem>
@@ -303,16 +312,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     <term><literal>HEADER</literal></term>
     <listitem>
      <para>
-      Specifies that the file contains a header line with the names of each
-      column in the file.  On output, the first line contains the column
-      names from the table.  On input, the first line is discarded when this
-      option is set to <literal>true</literal> (or equivalent Boolean value).
-      If this option is set to <literal>MATCH</literal>, the number and names
-      of the columns in the header line must match the actual column names of
-      the table, in order;  otherwise an error is raised.
+      On output, if this option is set to <literal>true</literal>
+      (or an equivalent Boolean value), the first line of the output will
+      contain the column names from the table.
+      Integer values <literal>0</literal> and <literal>1</literal> are
+      accepted as Boolean values, but other integers are not allowed for
+      <command>COPY TO</command> commands.
+     </para>
+     <para>
+      On input, if this option is set to <literal>true</literal>
+      (or an equivalent Boolean value), the first line of the input is
+      discarded.  If set to a non-negative integer, that number of
+      lines are discarded.  If set to <literal>MATCH</literal>, the first line
+      is discarded, and it must contain column names that exactly match the
+      table's columns, in both number and order; otherwise, an error is raised.
+      The <literal>MATCH</literal> value is only valid for
+      <command>COPY FROM</command> commands.
+     </para>
+     <para>
       This option is not allowed when using <literal>binary</literal> format.
-      The <literal>MATCH</literal> option is only valid for <command>COPY
-      FROM</command> commands.
      </para>
     </listitem>
    </varlistentry>
index 74ae42b19a710da948a461b506480d38978c64d0..fae9c41db6565cafe058f2c9a6889c92eae61a8e 100644 (file)
@@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 }
 
 /*
- * Extract a CopyHeaderChoice value from a DefElem.  This is like
- * defGetBoolean() but also accepts the special value "match".
+ * Extract the CopyFormatOptions.header_line value from a DefElem.
+ *
+ * Parses the HEADER option for COPY, which can be a boolean, a non-negative
+ * integer (number of lines to skip), or the special value "match".
  */
-static CopyHeaderChoice
-defGetCopyHeaderChoice(DefElem *def, bool is_from)
+static int
+defGetCopyHeaderOption(DefElem *def, bool is_from)
 {
        /*
         * If no parameter value given, assume "true" is meant.
@@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
                return COPY_HEADER_TRUE;
 
        /*
-        * Allow 0, 1, "true", "false", "on", "off", or "match".
+        * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
+        * "match".
         */
        switch (nodeTag(def->arg))
        {
                case T_Integer:
-                       switch (intVal(def->arg))
                        {
-                               case 0:
-                                       return COPY_HEADER_FALSE;
-                               case 1:
-                                       return COPY_HEADER_TRUE;
-                               default:
-                                       /* otherwise, error out below */
-                                       break;
+                               int                     ival = intVal(def->arg);
+
+                               if (ival < 0)
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                        errmsg("a negative integer value cannot be "
+                                                                       "specified for %s", def->defname)));
+
+                               if (!is_from && ival > 1)
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                        errmsg("cannot use multi-line header in COPY TO")));
+
+                               return ival;
                        }
                        break;
                default:
@@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
        }
        ereport(ERROR,
                        (errcode(ERRCODE_SYNTAX_ERROR),
-                        errmsg("%s requires a Boolean value or \"match\"",
+                        errmsg("%s requires a Boolean value, a non-negative integer, "
+                                       "or the string \"match\"",
                                        def->defname)));
        return COPY_HEADER_FALSE;       /* keep compiler quiet */
 }
@@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
                        if (header_specified)
                                errorConflictingDefElem(defel, pstate);
                        header_specified = true;
-                       opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+                       opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
                }
                else if (strcmp(defel->defname, "quote") == 0)
                {
@@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
                                 errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
 
        /* Check header */
-       if (opts_out->binary && opts_out->header_line)
+       if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
                ereport(ERROR,
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
index f52f2477df1292c4a5eea20ea2cd31e64704c535..b1ae97b833dffbd245ae2727975cb68bf5a30412 100644 (file)
@@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
 NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
 {
        int                     fldct;
-       bool            done;
+       bool            done = false;
 
        /* only available for text or csv input */
        Assert(!cstate->opts.binary);
 
        /* on input check that the header line is correct if needed */
-       if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+       if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
        {
                ListCell   *cur;
                TupleDesc       tupDesc;
+               int                     lines_to_skip = cstate->opts.header_line;
+
+               /* If set to "match", one header line is skipped */
+               if (cstate->opts.header_line == COPY_HEADER_MATCH)
+                       lines_to_skip = 1;
 
                tupDesc = RelationGetDescr(cstate->rel);
 
-               cstate->cur_lineno++;
-               done = CopyReadLine(cstate, is_csv);
+               for (int i = 0; i < lines_to_skip; i++)
+               {
+                       cstate->cur_lineno++;
+                       if ((done = CopyReadLine(cstate, is_csv)))
+                               break;
+               }
 
                if (cstate->opts.header_line == COPY_HEADER_MATCH)
                {
index ea6f18f2c80085cb4e70533af18ac359c20c2741..67b94b91cae44427b7bb35bb839501f9a732df30 100644 (file)
@@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
                                                                                                                  cstate->file_encoding);
 
        /* if a header has been requested send the line */
-       if (cstate->opts.header_line)
+       if (cstate->opts.header_line == COPY_HEADER_TRUE)
        {
                ListCell   *cur;
                bool            hdr_delim = false;
index 06dfdfef7210c7ff3ef0f6e8a4d56710ded2f1ae..541176e1980324f3901797643d74663480000811 100644 (file)
 #include "tcop/dest.h"
 
 /*
- * Represents whether a header line should be present, and whether it must
- * match the actual names (which implies "true").
+ * Represents whether a header line must match the actual names
+ * (which implies "true"), and whether it should be present.
  */
-typedef enum CopyHeaderChoice
-{
-       COPY_HEADER_FALSE = 0,
-       COPY_HEADER_TRUE,
-       COPY_HEADER_MATCH,
-} CopyHeaderChoice;
+#define COPY_HEADER_MATCH      -1
+#define COPY_HEADER_FALSE      0
+#define COPY_HEADER_TRUE       1
 
 /*
  * Represents where to save input processing errors.  More values to be added
@@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
        bool            binary;                 /* binary format? */
        bool            freeze;                 /* freeze rows on loading? */
        bool            csv_mode;               /* Comma Separated Value format? */
-       CopyHeaderChoice header_line;   /* header line? */
+       int                     header_line;    /* number of lines to skip or COPY_HEADER_XXX
+                                                                * value (see the above) */
        char       *null_print;         /* NULL marker string (server encoding!) */
        int                     null_print_len; /* length of same */
        char       *null_print_client;  /* same converted to file encoding */
index 8d5a06563c44a677dbcd1b5ef62ae654a7e0019a..ac66eb55aeed4a6a864c0d6f826fde7f0a38c57e 100644 (file)
@@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
 c1     colname with tab: \t
 1      a
 2      b
+-- test multi-line header line feature
+create temp table copytest5 (c1 int);
+copy copytest5 from stdin (format csv, header 2);
+copy copytest5 to stdout (header);
+c1
+1
+2
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 4);
+select count(*) from copytest5;
+ count 
+-------
+     0
+(1 row)
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 5);
+select count(*) from copytest5;
+ count 
+-------
+     0
+(1 row)
+
 -- test copy from with a partitioned table
 create table parted_copytest (
        a int,
@@ -224,7 +247,7 @@ alter table header_copytest add column c text;
 copy header_copytest to stdout with (header match);
 ERROR:  cannot use "match" with HEADER in COPY TO
 copy header_copytest from stdin with (header wrong_choice);
-ERROR:  header requires a Boolean value or "match"
+ERROR:  header requires a Boolean value, a non-negative integer, or the string "match"
 -- works
 copy header_copytest from stdin with (header match);
 copy header_copytest (c, a, b) from stdin with (header match);
index 64ea33aeae8fdb7fd6ce09deaadc3569921e6238..caa3c44f0d0ca68049d58f3d76ad28d09abff002 100644 (file)
@@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
 ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
 COPY x from stdin with (on_error ignore, reject_limit 0);
 ERROR:  REJECT_LIMIT (0) must be greater than zero
+COPY x from stdin with (header -1);
+ERROR:  a negative integer value cannot be specified for header
+COPY x from stdin with (header 2.5);
+ERROR:  header requires a Boolean value, a non-negative integer, or the string "match"
+COPY x to stdout with (header 2);
+ERROR:  cannot use multi-line header in COPY TO
 -- too many columns in column list: should fail
 COPY x (a, b, c, d, e, d, c) from stdin;
 ERROR:  column "d" specified more than once
index f0b88a23db853bbd04318976f7563c76070dc663..a1316c73bac6956e5649aeb89674734212876186 100644 (file)
@@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
 
 copy copytest4 to stdout (header);
 
+-- test multi-line header line feature
+
+create temp table copytest5 (c1 int);
+
+copy copytest5 from stdin (format csv, header 2);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+copy copytest5 to stdout (header);
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 4);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+select count(*) from copytest5;
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 5);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+select count(*) from copytest5;
+
 -- test copy from with a partitioned table
 create table parted_copytest (
        a int,
index 45273557ce04000128bafba87a21f03bd249501b..cef45868db511ab686702df9f982649850d8495b 100644 (file)
@@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
 COPY x from stdin (log_verbosity unsupported);
 COPY x from stdin with (reject_limit 1);
 COPY x from stdin with (on_error ignore, reject_limit 0);
+COPY x from stdin with (header -1);
+COPY x from stdin with (header 2.5);
+COPY x to stdout with (header 2);
 
 -- too many columns in column list: should fail
 COPY x (a, b, c, d, e, d, c) from stdin;
index 66c5782688a23978891f59eb6d38d29bb8155684..e7d1c48e1f20b660685f67f945183cf80d7e975c 100644 (file)
@@ -521,7 +521,6 @@ CopyFormatOptions
 CopyFromRoutine
 CopyFromState
 CopyFromStateData
-CopyHeaderChoice
 CopyInsertMethod
 CopyLogVerbosityChoice
 CopyMethod