Adjust ts_debug's output as per my proposal of yesterday: show the

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 22 Oct 2007 20:13:37 +0000 (20:13 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 22 Oct 2007 20:13:37 +0000 (20:13 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 22 Oct 2007 20:13:37 +0000 (20:13 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 22 Oct 2007 20:13:37 +0000 (20:13 +0000)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml

index afdda69720554070450682be498e6cc5d5a0d59f..368673c66e6c1e1011a22a95a1773db797ecb07a 100644 (file)
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.402 2007/10/21 20:04:37 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.403 2007/10/22 20:13:37 tgl Exp $ -->
  
   <chapter id="functions">
    <title>Functions and Operators</title>
@@ -7857,11 +7857,11 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
        </thead>
        <tbody>
         <row>
-        <entry><literal><function>ts_debug</function>(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>)</literal></entry>
-        <entry><type>setof ts_debug</type></entry>
+        <entry><literal><function>ts_debug</function>(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>, OUT <replaceable class="PARAMETER">alias</> <type>text</>, OUT <replaceable class="PARAMETER">description</> <type>text</>, OUT <replaceable class="PARAMETER">token</> <type>text</>, OUT <replaceable class="PARAMETER">dictionaries</> <type>regdictionary[]</>, OUT <replaceable class="PARAMETER">dictionary</> <type>regdictionary</>, OUT <replaceable class="PARAMETER">lexemes</> <type>text[]</>)</literal></entry>
+        <entry><type>setof record</type></entry>
          <entry>test a configuration</entry>
          <entry><literal>ts_debug('english', 'The Brightest supernovaes')</literal></entry>
-        <entry><literal>(lword,"Latin word",The,{english_stem},"english_stem: {}") ...</literal></entry>
+        <entry><literal>(lword,"Latin word",The,{english_stem},english_stem,{}) ...</literal></entry>
         </row>
         <row>
          <entry><literal><function>ts_lexize</function>(<replaceable class="PARAMETER">dict</replaceable> <type>regdictionary</>, <replaceable class="PARAMETER">token</replaceable> <type>text</>)</literal></entry>
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml

index 03625b41a5ba42ee1798b8646d88f42fd79ab392..81b54d8e1740075e7c2726873678b7165b39a6d0 100644 (file)
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.22 2007/10/22 03:37:04 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.23 2007/10/22 20:13:37 tgl Exp $ -->
  
  <chapter id="textsearch">
   <title id="textsearch-title">Full Text Search</title>
@@ -1699,18 +1699,18 @@ ON messages FOR EACH ROW EXECUTE PROCEDURE messages_trigger();
      <itemizedlist  spacing="compact" mark="bullet">
       <listitem>
        <para>
-       <structname>word</> <type>text</> &mdash; the value of a lexeme
+       <replaceable>word</> <type>text</> &mdash; the value of a lexeme
        </para>
       </listitem>
       <listitem>
        <para>
-       <structname>ndoc</> <type>integer</> &mdash; number of documents
+       <replaceable>ndoc</> <type>integer</> &mdash; number of documents
         (<type>tsvector</>s) the word occurred in
        </para>
       </listitem>
       <listitem>
        <para>
-       <structname>nentry</> <type>integer</> &mdash; total number of
+       <replaceable>nentry</> <type>integer</> &mdash; total number of
         occurrences of the word
        </para>
       </listitem>
@@ -1901,8 +1901,8 @@ LIMIT 10;
     as the entire word and as each component:
  
  <programlisting>
-SELECT "Alias", "Description", "Token" FROM ts_debug('foo-bar-beta1');
-    Alias    |          Description          |     Token     
+SELECT alias, description, token FROM ts_debug('foo-bar-beta1');
+    alias    |          description          |     token     
  -------------+-------------------------------+---------------
   hword       | Hyphenated word               | foo-bar-beta1
   lpart_hword | Latin part of hyphenated word | foo
@@ -1917,8 +1917,8 @@ SELECT "Alias", "Description", "Token" FROM ts_debug('foo-bar-beta1');
     instructive example:
  
  <programlisting>
-SELECT "Alias", "Description", "Token" FROM ts_debug('http://foo.com/stuff/index.html');
-  Alias   |  Description  |          Token           
+SELECT alias, description, token FROM ts_debug('http://foo.com/stuff/index.html');
+  alias   |  description  |          token           
  ----------+---------------+--------------------------
   protocol | Protocol head | http://
   url      | URL           | foo.com/stuff/index.html
@@ -2186,25 +2186,23 @@ SELECT ts_lexize('public.simple_dict','The');
      synonym dictionary and put it before the <literal>english_stem</> dictionary:
  
  <programlisting>
-SELECT * FROM ts_debug('english','Paris');
- Alias | Description | Token |  Dictionaries  |    Lexized token
--------+-------------+-------+----------------+----------------------
- lword | Latin word  | Paris | {english_stem} | english_stem: {pari}
-(1 row)
+SELECT * FROM ts_debug('english', 'Paris');
+ alias | description | token |  dictionaries  |  dictionary  | lexemes 
+-------+-------------+-------+----------------+--------------+---------
+ lword | Latin word  | Paris | {english_stem} | english_stem | {pari}
  
-CREATE TEXT SEARCH DICTIONARY synonym (
+CREATE TEXT SEARCH DICTIONARY my_synonym (
      TEMPLATE = synonym,
      SYNONYMS = my_synonyms
  );
  
  ALTER TEXT SEARCH CONFIGURATION english
-    ALTER MAPPING FOR lword WITH synonym, english_stem;
+    ALTER MAPPING FOR lword WITH my_synonym, english_stem;
  
-SELECT * FROM ts_debug('english','Paris');
- Alias | Description | Token |      Dictionaries      |  Lexized token
--------+-------------+-------+------------------------+------------------
- lword | Latin word  | Paris | {synonym,english_stem} | synonym: {paris}
-(1 row)
+SELECT * FROM ts_debug('english', 'Paris');
+ alias | description | token |       dictionaries        | dictionary | lexemes 
+-------+-------------+-------+---------------------------+------------+---------
+ lword | Latin word  | Paris | {my_synonym,english_stem} | my_synonym | {paris}
  </programlisting>
     </para>
  
@@ -2711,7 +2709,14 @@ SHOW default_text_search_config;
    </indexterm>
  
    <synopsis>
-   ts_debug(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>) returns <type>setof ts_debug</>
+   ts_debug(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>,
+            OUT <replaceable class="PARAMETER">alias</> <type>text</>,
+            OUT <replaceable class="PARAMETER">description</> <type>text</>,
+            OUT <replaceable class="PARAMETER">token</> <type>text</>,
+            OUT <replaceable class="PARAMETER">dictionaries</> <type>regdictionary[]</>,
+            OUT <replaceable class="PARAMETER">dictionary</> <type>regdictionary</>,
+            OUT <replaceable class="PARAMETER">lexemes</> <type>text[]</>)
+            returns setof record
    </synopsis>
  
    <para>
@@ -2725,23 +2730,47 @@ SHOW default_text_search_config;
    </para>
  
    <para>
-   <function>ts_debug</>'s result row type is defined as:
+   <function>ts_debug</> returns one row for each token identified in the text
+   by the parser.  The columns returned are
  
-<programlisting>
-CREATE TYPE ts_debug AS (
-    "Alias" text,
-    "Description" text,
-    "Token" text,
-    "Dictionaries" regdictionary[],
-    "Lexized token" text
-);
-</programlisting>
-
-   One row is produced for each token identified by the parser.
-   The first three columns describe the token, and the fourth lists
-   the dictionaries selected by the configuration for that token's type.
-   The last column shows the result of dictionary processing: which
-   dictionary (if any) recognized the token, and what it produced.
+    <itemizedlist  spacing="compact" mark="bullet">
+     <listitem>
+      <para>
+       <replaceable>alias</> <type>text</> &mdash; short name of the token type
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>description</> <type>text</> &mdash; description of the
+       token type
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>token</> <type>text</> &mdash; text of the token
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>dictionaries</> <type>regdictionary[]</> &mdash; the
+       dictionaries selected by the configuration for this token type
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>dictionary</> <type>regdictionary</> &mdash; the dictionary
+       that recognized the token, or <literal>NULL</> if none did
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <replaceable>lexemes</> <type>text[]</> &mdash; the lexeme(s) produced
+       by the dictionary that recognized the token, or <literal>NULL</> if
+       none did; an empty array (<literal>{}</>) means it was recognized as a
+       stop word
+      </para>
+     </listitem>
+    </itemizedlist>
    </para>
  
    <para>
@@ -2749,33 +2778,32 @@ CREATE TYPE ts_debug AS (
  
  <programlisting>
  SELECT * FROM ts_debug('english','a fat  cat sat on a mat - it ate a fat rats');
- Alias |  Description  | Token | Dictionaries | Lexized token
--------+---------------+-------+--------------+----------------
- lword | Latin word    | a     | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | fat   | {english}    | english: {fat}
- blank | Space symbols |       |              |
- lword | Latin word    | cat   | {english}    | english: {cat}
- blank | Space symbols |       |              |
- lword | Latin word    | sat   | {english}    | english: {sat}
- blank | Space symbols |       |              |
- lword | Latin word    | on    | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | a     | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | mat   | {english}    | english: {mat}
- blank | Space symbols |       |              |
- blank | Space symbols | -     |              |
- lword | Latin word    | it    | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | ate   | {english}    | english: {ate}
- blank | Space symbols |       |              |
- lword | Latin word    | a     | {english}    | english: {}
- blank | Space symbols |       |              |
- lword | Latin word    | fat   | {english}    | english: {fat}
- blank | Space symbols |       |              |
- lword | Latin word    | rats  | {english}    | english: {rat}
-   (24 rows)
+ alias |  description  | token |  dictionaries  |  dictionary  | lexemes 
+-------+---------------+-------+----------------+--------------+---------
+ lword | Latin word    | a     | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | fat   | {english_stem} | english_stem | {fat}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | cat   | {english_stem} | english_stem | {cat}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | sat   | {english_stem} | english_stem | {sat}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | on    | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | a     | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | mat   | {english_stem} | english_stem | {mat}
+ blank | Space symbols |       | {}             |              | 
+ blank | Space symbols | -     | {}             |              | 
+ lword | Latin word    | it    | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | ate   | {english_stem} | english_stem | {ate}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | a     | {english_stem} | english_stem | {}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | fat   | {english_stem} | english_stem | {fat}
+ blank | Space symbols |       | {}             |              | 
+ lword | Latin word    | rats  | {english_stem} | english_stem | {rat}
  </programlisting>
    </para>
  
@@ -2801,34 +2829,33 @@ ALTER TEXT SEARCH CONFIGURATION public.english
  
  <programlisting>
  SELECT * FROM ts_debug('public.english','The Brightest supernovaes');
- Alias |  Description  |    Token    |                   Dictionaries                  |          Lexized token
--------+---------------+-------------+-------------------------------------------------+-------------------------------------
- lword | Latin word    | The         | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {}
- blank | Space symbols |             |                                                 |
- lword | Latin word    | Brightest   | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {bright}
- blank | Space symbols |             |                                                 |
- lword | Latin word    | supernovaes | {public.english_ispell,pg_catalog.english_stem} | pg_catalog.english_stem: {supernova}
-(5 rows)
+ alias |  description  |    token    |         dictionaries          |   dictionary   |   lexemes   
+-------+---------------+-------------+-------------------------------+----------------+-------------
+ lword | Latin word    | The         | {english_ispell,english_stem} | english_ispell | {}
+ blank | Space symbols |             | {}                            |                | 
+ lword | Latin word    | Brightest   | {english_ispell,english_stem} | english_ispell | {bright}
+ blank | Space symbols |             | {}                            |                | 
+ lword | Latin word    | supernovaes | {english_ispell,english_stem} | english_stem   | {supernova}
  </programlisting>
  
    <para>
     In this example, the word <literal>Brightest</> was recognized by the
     parser as a <literal>Latin word</literal> (alias <literal>lword</literal>).
     For this token type the dictionary list is
-   <literal>public.english_ispell</> and
-   <literal>pg_catalog.english_stem</literal>. The word was recognized by
-   <literal>public.english_ispell</literal>, which reduced it to the noun
+   <literal>english_ispell</> and
+   <literal>english_stem</literal>. The word was recognized by
+   <literal>english_ispell</literal>, which reduced it to the noun
     <literal>bright</literal>. The word <literal>supernovaes</literal> is
-   unknown to the <literal>public.english_ispell</literal> dictionary so it
+   unknown to the <literal>english_ispell</literal> dictionary so it
     was passed to the next dictionary, and, fortunately, was recognized (in
-   fact, <literal>public.english_stem</literal> is a Snowball dictionary which
+   fact, <literal>english_stem</literal> is a Snowball dictionary which
     recognizes everything; that is why it was placed at the end of the
     dictionary list).
    </para>
  
    <para>
     The word <literal>The</literal> was recognized by the
-   <literal>public.english_ispell</literal> dictionary as a stop word (<xref
+   <literal>english_ispell</literal> dictionary as a stop word (<xref
     linkend="textsearch-stopwords">) and will not be indexed.
     The spaces are discarded too, since the configuration provides no
     dictionaries at all for them.
@@ -2839,16 +2866,15 @@ SELECT * FROM ts_debug('public.english','The Brightest supernovaes');
     you want to see:
  
  <programlisting>
-SELECT "Alias", "Token", "Lexized token"
+SELECT alias, token, dictionary, lexemes
  FROM ts_debug('public.english','The Brightest supernovaes');
- Alias |    Token    |          Lexized token
--------+-------------+--------------------------------------
- lword | The         | public.english_ispell: {}
- blank |             |
- lword | Brightest   | public.english_ispell: {bright}
- blank |             |
- lword | supernovaes | pg_catalog.english_stem: {supernova}
-(5 rows)
+ alias |    token    |   dictionary   |   lexemes   
+-------+-------------+----------------+-------------
+ lword | The         | english_ispell | {}
+ blank |             |                | 
+ lword | Brightest   | english_ispell | {bright}
+ blank |             |                | 
+ lword | supernovaes | english_stem   | {supernova}
  </programlisting>
    </para>
  
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql

index 5e557efef452ce34c7a4f7c76562ddfbcae9ee54..1f1d983573ab6391e307c65f8a6bcbc3083852ae 100644 (file)
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1996-2007, PostgreSQL Global Development Group
   *
- * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.46 2007/09/25 20:03:37 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.47 2007/10/22 20:13:37 tgl Exp $
   */
  
  CREATE VIEW pg_roles AS 
@@ -386,41 +386,39 @@ CREATE VIEW pg_stat_bgwriter AS
          pg_stat_get_buf_written_backend() AS buffers_backend,
          pg_stat_get_buf_alloc() AS buffers_alloc;
  
--- Tsearch debug function. Defined here because it'd be pretty unwieldy
+-- Tsearch debug function.  Defined here because it'd be pretty unwieldy
  -- to put it into pg_proc.h
  
-CREATE TYPE ts_debug AS (
-    "Alias" text,
-    "Description" text,
-    "Token" text,
-    "Dictionaries" regdictionary[],
-    "Lexized token" text
-);
-
-COMMENT ON TYPE ts_debug IS 'type returned from ts_debug() function';
-
-CREATE FUNCTION ts_debug(regconfig, text)
-RETURNS SETOF ts_debug AS
+CREATE FUNCTION ts_debug(IN config regconfig, IN document text,
+    OUT alias text,
+    OUT description text,
+    OUT token text,
+    OUT dictionaries regdictionary[],
+    OUT dictionary regdictionary,
+    OUT lexemes text[])
+RETURNS SETOF record AS
  $$
  SELECT 
-    tt.alias AS "Alias",
-    tt.description AS "Description",
-    parse.token AS "Token",
+    tt.alias AS alias,
+    tt.description AS description,
+    parse.token AS token,
      ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary
              FROM pg_catalog.pg_ts_config_map AS m
              WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
              ORDER BY m.mapseqno )
-    AS "Dictionaries",
-    (     
-        SELECT
-            dl.mapdict::pg_catalog.regdictionary  || ': ' || dl.lex::pg_catalog.text
-        FROM
-            ( SELECT mapdict, pg_catalog.ts_lexize(mapdict, parse.token) AS lex
-              FROM pg_catalog.pg_ts_config_map AS m
-              WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
-              ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno ) dl
-        LIMIT 1
-    ) AS "Lexized token"
+    AS dictionaries,
+    ( SELECT mapdict::pg_catalog.regdictionary
+      FROM pg_catalog.pg_ts_config_map AS m
+      WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
+      ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno
+      LIMIT 1
+    ) AS dictionary,
+    ( SELECT pg_catalog.ts_lexize(mapdict, parse.token)
+      FROM pg_catalog.pg_ts_config_map AS m
+      WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
+      ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno
+      LIMIT 1
+    ) AS lexemes
  FROM pg_catalog.ts_parse(
          (SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ), $2 
      ) AS parse,
@@ -434,8 +432,14 @@ LANGUAGE SQL STRICT STABLE;
  COMMENT ON FUNCTION ts_debug(regconfig,text) IS
      'debug function for text search configuration';
  
-CREATE FUNCTION ts_debug(text)
-RETURNS SETOF ts_debug AS
+CREATE FUNCTION ts_debug(IN document text,
+    OUT alias text,
+    OUT description text,
+    OUT token text,
+    OUT dictionaries regdictionary[],
+    OUT dictionary regdictionary,
+    OUT lexemes text[])
+RETURNS SETOF record AS
  $$
      SELECT * FROM pg_catalog.ts_debug( pg_catalog.get_current_ts_config(), $1);
  $$
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 467277d8ad0ac11562c8ffaab355d6b57264bea7..1fa5428a9676a4c67a4ca4cefdbe65852370f8b7 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.434 2007/10/19 22:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.435 2007/10/22 20:13:37 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200710192
+#define CATALOG_VERSION_NO     200710221
  
  #endif
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 22 Oct 2007 20:13:37 +0000 (20:13 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 22 Oct 2007 20:13:37 +0000 (20:13 +0000)
doc/src/sgml/func.sgml		patch \| blob \| blame \| history
doc/src/sgml/textsearch.sgml		patch \| blob \| blame \| history
src/backend/catalog/system_views.sql		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history