#
# $Id: filter_nnrpd.pl,v 1.2 2002/12/12 05:01:42 vinocur Exp $
#
# Sample perl filtering code for nnrpd hook.
#

use MIME::Words qw(:all);
use Text::Iconv;

@cmt_regexp = (
	'^In article <?[^ ]*@[^ ]*>? on ..:..:..,? (.*) (writes|wrote|пишет)',
	'^In article <?[^ ]*@[^ ]*>? on ..\...\...,? (.*) (writes|wrote|пишет)',
	'^In article <?[^ ]*@[^ ]*>?,? (.*) (writes|wrote|пишет):$',
	'^In <[^ ]*@[^ ]*>,? (.*) (writes|wrote|пишет):$',
	'^I[nm] Artikel <?[^ ]*@[^ ]*>?,? (.*) (schreibt|schrieb|пишет)',
	'^([a-z0-9_.\-]*@[a-z0-9_.\-]* \(.*\)) (writes|wrote|schreibt|schrieb|пишет)',
	'^(.* <[a-z0-9_.\-]*@[a-z0-9_.\-]*>) (writes|wrote|schreibt|schrieb|пишет)',
	'^(.* \([a-z0-9_.\-]*@[a-z0-9_.\-]*\)) (writes|wrote|schreibt|schrieb|пишет)',
	'^([a-z0-9_.\-]*@[a-z0-9_.\-]*) (writes|wrote|schreibt|schrieb|пишет)',
	'^On ..., .. ... .... ..:..:.. [+\-]...., (.*) (writes|wrote|schreibt|schrieb|пишет):$',
	'^On .. ... .... ..:..:.. [+\-]...., (.*) (writes|wrote|schreibt|schrieb|пишет):$',
	'^On ....-..-.. ..:..:.. ....., (.*) said:',
	'^[ 	]*([^>|].*) (writes|wrote|schreibt|schrieb|пишет):$' );

#
# This file is loaded when nnrpd starts up. If it defines a sub named
# `filter_post', then that function will be called during processing of a
# posting. It has access to the headers of the article via the associative
# array `%hdr'. If it returns a null string then the article is accepted
# for posting. A non-null string rejects it, and the value returned is used
# in the rejection message.
#

#
# Do any initialization steps.
#
my %config = (checkincludedtext => 0,
              includedcutoff => 40,
              includedratio => 0.6,
              quotere => '^[>:]',
              antiquotere => '^[<]',  # so as not to reject dict(1) output
             );


#
# Sample filter
#
sub filter_post {
    my $rval = "" ;             # assume we'll accept.

### Uncomment this next block to reject articles that have 'make money'
### in their subject, or which have a "Re: " subject, but no References:
### header, or which have an invalid From.

##    if ($hdr{"Subject"} =~ /make.*money/i) {
##        $rval = "Spam is not acceptable here..." ;
##    } elsif ($hdr{'Subject'} =~ /^Re: /o and $hdr{'References'} eq "") {
##        $rval = "Followup without References:";
##    } elsif ($hdr{'From'} =~ /^\w*$/o or
##             $hdr{'From'} !~ /^(.+?)\@([-\w\d]+\.)*([-\w\d]+)\.([-\w\d]{2,})$/o) {
##        $rval = "From: is invalid, must be user\@[host.]domain.tld";
##    }

	# Decode RFC-1522 encoded subjects, because fidogate doesn't
	# support MIME headers. Only for fido7.*.

	$modify_headers = 0;

	if ($hdr{"Newsgroups"} =~ /^fido7\./) {

		@decoded = decode_mimewords($hdr{'Subject'});

		$new_subj = "";

		foreach (@decoded) {
			if (ref $_ eq "ARRAY") {
				$data = @{$_}[0];
		    	$charset = @{$_}[1];

				if ($charset && $data) {
					$conv = Text::Iconv->new($charset, "koi8-r");
					if ($conv) {
						$new_subj .= $conv->convert($data);
					}
				}
			}
		}

		if ($new_subj ne "") {
			$hdr{"X-Original-Subject"} = $hdr{"Subject"};
			$hdr{"Subject"} = $new_subj;
			$modify_headers = 1;
		}

		# Try to generate X-Comment-To
	
		if (!$hdr{"X-Comment-To"}) {	
			foreach (@cmt_regexp) {
				if ($body =~ /$_/) {
					$hdr{"X-Comment-To"} = $1;
					$modify_headers = 1;
				}
			}
		}
	}

### The next block rejects articles with too much quoted text, if the
### config hash directs it to.

    if ($config{checkincludedtext}) {
        my ($lines, $quoted, $antiquoted) = analyze($body);
        if ($lines > $config{includedcutoff}
                && $quoted - $antiquoted > $lines * $config{includedratio}) {
            $rval = "Article contains too much quoted text";
        }
    }

    return $rval;
}

sub analyze {
    my ($lines, $quoted, $antiquoted) = (0, 0, 0);
    local $_ = shift;

    do {
        if ( /\G$config{quotere}/mgc ) {
            $quoted++;
        } elsif ( /\G$config{antiquotere}/mgc ) {
            $antiquoted++;
        }
    } while ( /\G(.*)\n/gc && ++$lines );

    return ($lines, $quoted, $antiquoted);
}
