]>
git.ipfire.org Git - thirdparty/gcc.git/blob - libstdc++-v3/testsuite/20_util/hash/chi2_q_document_words.cc
1 // On some simulators, the workload is simply too large with values big
2 // enough for the test to pass the quality test, so just skip it altogether.
3 // { dg-do run { target { c++11 && { ! simulator } } } }
5 // Copyright (C) 2010-2020 Free Software Foundation, Inc.
7 // This file is part of the GNU ISO C++ Library. This library is free
8 // software; you can redistribute it and/or modify it under the
9 // terms of the GNU General Public License as published by the
10 // Free Software Foundation; either version 3, or (at your option)
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU General Public License for more details.
18 // You should have received a copy of the GNU General Public License
19 // along with this library; see the file COPYING3. If not see
20 // <http://www.gnu.org/licenses/>.
22 #include "chi2_quality.h"
24 // Tests chi^2 for a set of words taken from a document written in English.
28 const std::string f_name
= "thirty_years_among_the_dead_preproc.txt";
29 std::ifstream
in(f_name
);
30 VERIFY( in
.is_open() );
31 std::vector
<std::string
> words
;
32 words
.assign(std::istream_iterator
<std::string
>(in
),
33 std::istream_iterator
<std::string
>());
34 VERIFY( words
.size() > 100000 );
35 std::sort(words
.begin(), words
.end());
36 auto it
= std::unique(words
.begin(), words
.end());
37 words
.erase(it
, words
.end());
38 VERIFY( words
.size() > 5000 );
40 const unsigned long k
= words
.size() / 20;
41 double chi2
= chi2_hash(words
, k
);
42 VERIFY( chi2
< k
*1.1 );
48 test_document_words();