# http://rspamd.com/rspamd_statistics/bayes.ham.sqlite
classifier "bayes" {
- tokenizer {
- name = "osb";
- }
- cache {
- path = "${DBDIR}/learn_cache.sqlite";
- }
- min_tokens = 11;
- backend = "sqlite3";
- languages_enabled = true;
- min_learns = 200;
+ tokenizer {
+ name = "osb";
+ }
+ cache {
+ path = "${DBDIR}/learn_cache.sqlite";
+ }
+ min_tokens = 11;
+ backend = "sqlite3";
+ languages_enabled = true;
+ min_learns = 200;
- statfile {
- symbol = "BAYES_HAM";
- path = "${DBDIR}/bayes.ham.sqlite";
- spam = false;
- }
- statfile {
- symbol = "BAYES_SPAM";
- path = "${DBDIR}/bayes.spam.sqlite";
- spam = true;
- }
+ statfile {
+ symbol = "BAYES_HAM";
+ path = "${DBDIR}/bayes.ham.sqlite";
+ spam = false;
+ }
+ statfile {
+ symbol = "BAYES_SPAM";
+ path = "${DBDIR}/bayes.spam.sqlite";
+ spam = true;
+ }
+ learn_condition =<<EOD
+return function(task, is_spam, is_unlearn)
+ local prob = task:get_mempool():get_variable('bayes_prob', 'double')
+
+ if prob then
+ local in_class = false
+ local cl
+ if is_spam then
+ cl = 'spam'
+ in_class = prob >= 0.9
+ else
+ cl = 'ham'
+ in_class = prob <= 0.1
+ end
+
+ if is_unlearn and (not in_class) then
+ return false,string.format('not in class %s; probability %.2f%%',
+ cl, math.abs((prob - 0.5) * 200.0))
+ elseif (not is_unlearn) and in_class then
+ return false,string.format('already in class %s; probability %.2f%%',
+ cl, math.abs((prob - 0.5) * 200.0))
+ end
+ end
+
+ return true
+end
+EOD
}
.include(try=true; priority=1) "$LOCAL_CONFDIR/local.d/statistic.conf"