Additional tests
Locate and open files Validate config file input formatting Check that the hash representation of rules read from files is correct
This commit is contained in:
parent
944c3eed5e
commit
cfbcee7c7d
|
@ -1,9 +1,19 @@
|
||||||
#!/usr/bin/perl
|
# <@LICENSE>
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
package Mail::SpamAssassin::KeywordRuleGenerator;
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
use strict;
|
# The ASF licenses this file to you under the Apache License, Version 2.0
|
||||||
use warnings;
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at:
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# </@LICENSE>
|
||||||
|
|
||||||
=head1 NAME
|
=head1 NAME
|
||||||
|
|
||||||
|
@ -11,11 +21,7 @@ Mail::SpamAssassin::KeywordRuleGenerator - Generate SA rules for keywords
|
||||||
|
|
||||||
=cut
|
=cut
|
||||||
|
|
||||||
our $VERSION = '0.01';
|
package Mail::SpamAssassin::KeywordRuleGenerator;
|
||||||
|
|
||||||
=head1 VERSION
|
|
||||||
|
|
||||||
Version 0.01
|
|
||||||
|
|
||||||
=head1 SYNOPSIS
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
@ -23,6 +29,31 @@ Generate SpamAssassin compatible configuration files given lists of keywords.
|
||||||
Implemented as a module largely for testing purposes.
|
Implemented as a module largely for testing purposes.
|
||||||
|
|
||||||
use Mail::SpamAssassin::KeywordRuleGenerator;
|
use Mail::SpamAssassin::KeywordRuleGenerator;
|
||||||
|
my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new($id);
|
||||||
|
$kw->readFile('keywords.cf');
|
||||||
|
$kw->writeFiles();
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
Mail::SpamAssassin::KeywordRuleGenerator does what it says on the tin; it
|
||||||
|
generates SpamAssassin compatible configuration files to catch keywords that you
|
||||||
|
specify. Most simply, it can take in one or more properly formatted input files
|
||||||
|
(see FILES), generate rules for individual files (optional), as well as meta
|
||||||
|
rules for the counts for each set of keywords (ie. 1 of N hit; 2 of N hit...).
|
||||||
|
See RULES for more on how the rules are generated.
|
||||||
|
|
||||||
|
The sets of keywords can be broken up into groups (see GROUPS).
|
||||||
|
|
||||||
|
=head1 PREREQUISITES
|
||||||
|
|
||||||
|
Requires C<spamassassin> executable and the following Perl modules
|
||||||
|
|
||||||
|
To::Be::Determined
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
#use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
=head2 FILES
|
=head2 FILES
|
||||||
|
|
||||||
|
@ -77,6 +108,14 @@ describe ID_FILE_GROUP_1 1 match in keyword group 'GROUP'
|
||||||
meta ID_FILE_GROUP_2 ( __ID_FILE_WORD + __ID_FILE_OTHER ) > 1
|
meta ID_FILE_GROUP_2 ( __ID_FILE_WORD + __ID_FILE_OTHER ) > 1
|
||||||
describe ID_FILE_GROUP_2 2 matches in keyword group 'GROUP'
|
describe ID_FILE_GROUP_2 2 matches in keyword group 'GROUP'
|
||||||
|
|
||||||
|
=head1 VERSION
|
||||||
|
|
||||||
|
Version 0.01
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
our $VERSION = '0.01';
|
||||||
|
|
||||||
=head1 EXPORT
|
=head1 EXPORT
|
||||||
|
|
||||||
=cut
|
=cut
|
||||||
|
@ -89,86 +128,284 @@ our @EXPORT = qw( new );
|
||||||
our %EXPORT_TAGS = (
|
our %EXPORT_TAGS = (
|
||||||
'all' => [
|
'all' => [
|
||||||
'new',
|
'new',
|
||||||
''
|
'getId',
|
||||||
|
'setId',
|
||||||
|
'getPriority',
|
||||||
|
'setPriority',
|
||||||
|
'getOutfile',
|
||||||
|
'setOutfile',
|
||||||
|
'getScorefile',
|
||||||
|
'setScorefile',
|
||||||
|
'clearFiles',
|
||||||
|
'getFiles',
|
||||||
|
'nextFile',
|
||||||
|
'readFile',
|
||||||
|
'getFile',
|
||||||
|
'setFile',
|
||||||
|
'joinRules',
|
||||||
|
'processMetas',
|
||||||
|
'processWords',
|
||||||
|
'processGroups',
|
||||||
|
'processAll',
|
||||||
|
'writeFile',
|
||||||
|
'writeAll'
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
|
||||||
|
|
||||||
=head1 SUBROUTINES/METHODS
|
=head1 METHODS
|
||||||
|
|
||||||
=head2 C<$kw = new($id)>
|
Except for getter methods, all methods make changes directly within the context
|
||||||
|
object and only return errors. Thus, each call can be verified by checking that
|
||||||
|
it returned undef:
|
||||||
|
|
||||||
|
die "Failed to run 'method'\n" if($kw->method($arg));
|
||||||
|
|
||||||
|
or:
|
||||||
|
|
||||||
|
my $err = $kw->method($arg);
|
||||||
|
die "Received $err\n" if ($err);
|
||||||
|
|
||||||
|
Getter methods will return the attribute or undef if it does not exist.
|
||||||
|
|
||||||
|
=head2 C<$kw = new( %args )>
|
||||||
|
|
||||||
Creates a new C<Mail::SpamAssassin::KeywordRuleGenerator> object and returns it.
|
Creates a new C<Mail::SpamAssassin::KeywordRuleGenerator> object and returns it.
|
||||||
|
|
||||||
C<$id> is an optional identifier which will be used to prefix any rule output.
|
C<$args> is an optional hash to assign any preliminary attributes or flags to
|
||||||
It can be updated at any time via the C<$kw->{id}> value or with the
|
the object. See various 'get' and 'set' functions.
|
||||||
C<$kw->id($id)> setter;
|
|
||||||
|
|
||||||
=cut
|
=cut
|
||||||
|
|
||||||
sub new
|
sub new
|
||||||
{
|
{
|
||||||
my ($class, $id) = @_;
|
my ($class, $args) = @_;
|
||||||
|
my $self = $args;
|
||||||
|
|
||||||
$id = uc($id) || 'KW';
|
=head3 Initial attributes
|
||||||
bless {
|
|
||||||
id => $id || '',
|
|
||||||
keywords => {}
|
|
||||||
} => $class;
|
|
||||||
}
|
|
||||||
|
|
||||||
=head2 C<$kw->id()>
|
While other attributes will exist within the object during processing, those
|
||||||
|
which make sense to define up-front and which will not be overwritten by any of
|
||||||
|
the built-in functions (other than their associated getters) are:
|
||||||
|
|
||||||
Get/Setter for C<$kw->{'id'}>
|
=head3 id
|
||||||
|
|
||||||
|
id scalar
|
||||||
|
A short identifier which will be appending to all output files to allow
|
||||||
|
for easier recognition of the files' sources. Default: 'KW'
|
||||||
|
|
||||||
=cut
|
=cut
|
||||||
|
|
||||||
sub id
|
$self->{'id'} //= 'KW';
|
||||||
|
|
||||||
|
=head3 priority
|
||||||
|
|
||||||
|
priority scalar
|
||||||
|
Typically a number used to define the priority of the rules.
|
||||||
|
SpamAssassin will read configuration files in alphabetical order and the
|
||||||
|
last iteration of a configuration for the same rule will be used. This
|
||||||
|
means that the files read last will overwrite those read earlier. By
|
||||||
|
convention the number is used for easy sorting. Any leading aphabetical
|
||||||
|
character will be ordered after all numbers, and so given high priority.
|
||||||
|
Default: '50'
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
$self->{'priority'} //= 50;
|
||||||
|
|
||||||
|
=head3 debug
|
||||||
|
|
||||||
|
debug boolean
|
||||||
|
Enable (1) or disable (0) debugging output. Default: 0
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
$self->{'debug'} //= 0;
|
||||||
|
|
||||||
|
=head3 singleOutfile
|
||||||
|
|
||||||
|
singleOutfile boolean
|
||||||
|
Indicates whether output rules should all be added to a single file (1),
|
||||||
|
or to one file per input file (0). Note that this still requires
|
||||||
|
'joinScores' to have one file total. Default: 0
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
$self->{'singleOutfile'} //= 0;
|
||||||
|
|
||||||
|
=head3 joinScores
|
||||||
|
|
||||||
|
joinScores bootlean
|
||||||
|
Indicates whether to include the scores in the same file as their
|
||||||
|
associated rule definitions (1) or in a second file on their own (0).
|
||||||
|
The second file will simply append '_SCORES' to the file name (prior to
|
||||||
|
the '.cf'), unless overridden by C<$kw->setScoreFile($path)> or a second
|
||||||
|
path argument in C<$kw->setOutfile($rulePath, $scorePath)>. Default: 1
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
$self->{'joinScores'} //= 1;
|
||||||
|
|
||||||
|
bless $self, $class;
|
||||||
|
return $self;
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->getId()>
|
||||||
|
|
||||||
|
Getter for C<$kw->{'id'}>. 'id' is used for top-level rule names.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub getId
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
|
||||||
|
if (defined($self->{'id'})) {
|
||||||
|
return $self->{'id'};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->setId()>
|
||||||
|
|
||||||
|
Setter for C<$kw->{'id'}>
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub setId
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
my $id = shift;
|
my $id = shift;
|
||||||
|
|
||||||
if (defined($id)) {
|
if (defined($id)) {
|
||||||
$id = uc($id);
|
$id = uc($id);
|
||||||
$self->{'id'} = $id;
|
$self->{'id'} = $id || return "Failed to set $id";
|
||||||
} else {
|
} else {
|
||||||
if (defined($self->{'id'})) {
|
return "No ID provided\n";
|
||||||
return $self->{'id'};
|
|
||||||
} else {
|
|
||||||
return undef;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
=head2 C<$kw->file()>
|
=head2 C<$kw->getPriority()>.
|
||||||
|
|
||||||
Get/Setter for C<$kw->{'file'}>
|
Getter for C<$kw->{'priority'}>. 'priority' value used for the output filepath
|
||||||
|
to indicate the load order. SpamAssassin reads in ascending order with later
|
||||||
|
iterations overriding earlier. The 'score' file will increment so that it comes
|
||||||
|
at the end.
|
||||||
|
|
||||||
=cut
|
=cut
|
||||||
|
|
||||||
sub file
|
sub getPriority
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
|
||||||
|
if (defined($self->{'priority'})) {
|
||||||
|
return $self->{'priority'};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->setPriority()>
|
||||||
|
|
||||||
|
Setter for C<$kw->{'priority'}>.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub setPriority
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my $priority = shift;
|
||||||
|
|
||||||
|
if (defined($priority)) {
|
||||||
|
$path = uc($priority);
|
||||||
|
$self->{'priority'} = $priority || return "Failed to set priority: $priority";
|
||||||
|
} else {
|
||||||
|
return "No 'priority' provided\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->getFile()>
|
||||||
|
|
||||||
|
Getter for C<$kw->{'file'}>. This is the name of the current output file being
|
||||||
|
processed. Not to be confused with C<readFile> or C<getOutfile>.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub getFile
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
|
||||||
|
if (defined($self->{'file'})) {
|
||||||
|
return $self->{'file'};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->setFile()>
|
||||||
|
|
||||||
|
Setter for C<$kw->{'file'}>.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub setFile
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
my $file = shift;
|
my $file = shift;
|
||||||
|
|
||||||
if (defined($file)) {
|
if (defined($file)) {
|
||||||
my $name = $file;
|
$self->{'file'} = $file;
|
||||||
$name =~ s/^([a-zA-Z0-9\-_]*)(\..*)?$/$1/;
|
|
||||||
$name =~ s/-/_/;
|
|
||||||
$name = uc($name);
|
|
||||||
die("invalid filename\n") unless ($name =~ m/^[A-Z_]+$/);
|
|
||||||
$self->{'file'} = $name;
|
|
||||||
} else {
|
} else {
|
||||||
if (defined($self->{'file'})) {
|
return "No File provided\n";
|
||||||
return $self->{'file'};
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->getOutfile()>
|
||||||
|
|
||||||
|
Getter for C<$kw->{'outfile'}>. 'outfile' represents the real filepath of the
|
||||||
|
output file which is currently being processed.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub getOutfile
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
|
||||||
|
if (defined($self->{'outfile'})) {
|
||||||
|
return $self->{'outfile'};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->setOutfile()>
|
||||||
|
|
||||||
|
Setter for C<$kw->{'outfile'}>. Can be defined manually with a scalar argument,
|
||||||
|
otherwise the path is constructed from the existing attributes.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub setOutfile
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my $path = shift;
|
||||||
|
|
||||||
|
if (defined($path)) {
|
||||||
|
$self->{'outfile'} = $path || return "Failed to set $path";
|
||||||
} else {
|
} else {
|
||||||
return undef;
|
if ($self->{'singleOutfile'}) {
|
||||||
|
$self->{'outfile'} = $self->{'priority'} .
|
||||||
|
'_' . uc($self->{'id'}) .
|
||||||
|
'.cf';
|
||||||
|
} else {
|
||||||
|
my $file = $self->{'file'};
|
||||||
|
$file =~ s/\//_/g; # Change dir slashes to _
|
||||||
|
$file =~ s/(\.[^\.]*)$//g; # Remove extensions
|
||||||
|
$file = uc($file); # Convert to uppercase for rule names
|
||||||
|
|
||||||
|
$self->{'outfile'} = $self->{'priority'} .
|
||||||
|
'_' . uc($self->{'id'}) .
|
||||||
|
'_' . $file .
|
||||||
|
'.cf';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
=head2 C<$kw->getFile();>
|
=head2 C<$kw->getFiles($regex);>
|
||||||
|
|
||||||
Simple recursive search for files within a directory. Will validate that each
|
Simple recursive search for files within a directory. Will validate that each
|
||||||
file is readable and return an array of file names.
|
file is readable and return an array of file names.
|
||||||
|
@ -184,24 +421,63 @@ The regex will be used as a file filter and will only return files that match.
|
||||||
sub getFiles
|
sub getFiles
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
my @args = @_;
|
my $regex = shift;
|
||||||
|
|
||||||
|
my $return = '';
|
||||||
foreach (@args) {
|
foreach (@args) {
|
||||||
die "$_ does not exist\n" unless (-e "$_" || -l "$_");
|
$return .= "$_ does not exist\n" unless (-e "$_" || -l "$_");
|
||||||
if (-l $_) {
|
if (-l $_) {
|
||||||
getFiles(readlink($_));
|
$self->getFiles(readlink($_));
|
||||||
} elsif (-d $_) {
|
} elsif (-d $_) {
|
||||||
my @recursive = glob($_."/*");
|
my @recursive = glob($_."/*");
|
||||||
getFiles(@recursive);
|
$self->getFiles(@recursive);
|
||||||
} else {
|
} else {
|
||||||
die "$_ is not readable\n" unless (-r "$_");
|
if (defined($regex)) {
|
||||||
push(@{$self->{files_ref}}, $_);
|
if ($_ =~ $regex) {
|
||||||
|
push(@{$self->{'files_ref'}}, $_);
|
||||||
|
} else {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$return .= "$_ is not readable\n" unless (-r "$_");
|
||||||
|
push(@{$self->{'files_ref'}}, $_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return @$self->{files_ref};
|
}
|
||||||
|
return $return;
|
||||||
}
|
}
|
||||||
|
|
||||||
=head2 C<$kw->readFile();>
|
=head2 C<$kw->nextFile();>
|
||||||
|
|
||||||
|
Shift next message in files_ref queue to current.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub nextFile
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
if (scalar(@{$self->{'files_ref'}})) {
|
||||||
|
$self->{'path'} = shift(@{$self->{'files_ref'}});
|
||||||
|
} else {
|
||||||
|
return "End of array\n";
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->clearFiles();>
|
||||||
|
|
||||||
|
Clear the current file reference queue.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub clearFiles
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
delete($self->{'rules'}) || return "Failed to delete rules hash\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->readFile(%args);>
|
||||||
|
|
||||||
Read in properly formatted keyword list file. The basic format is one keyword
|
Read in properly formatted keyword list file. The basic format is one keyword
|
||||||
per line, an optional score, and an optional list of 'groups'.
|
per line, an optional score, and an optional list of 'groups'.
|
||||||
|
@ -218,45 +494,314 @@ When keyword groups are omitted, that keyword defaults to just the 'LOCAL' group
|
||||||
|
|
||||||
Examples with other combinations are:
|
Examples with other combinations are:
|
||||||
|
|
||||||
# word 0 Same as previous
|
word 0 # Same as previous
|
||||||
# word 2 Used for LOCAL, not GLOBAL, scores 2 on it's own
|
word 2 # Used for LOCAL, not GLOBAL, scores 2
|
||||||
# word GROUP GROUP group, not LOCAL, not GLOBAL, no score
|
word GROUP # GROUP group, not LOCAL, not GLOBAL, no score
|
||||||
# word 0 GROUP Same as previous
|
word 0 GROUP # Same as previous
|
||||||
# word 1 GROUP GLOBAL LOCAL GROUP group, in GLOBAL, in LOCAL, scores 1
|
word 1 GROUP GLOBAL LOCAL # GROUP group, in GLOBAL, in LOCAL, scores 1
|
||||||
|
|
||||||
|
C<%args> can be used to override the attributes used to index the rules,
|
||||||
|
including the id, file
|
||||||
|
|
||||||
|
This function intentionally does not iterate through the files queue so that you
|
||||||
|
can step through and make modifications as you go.
|
||||||
|
|
||||||
=cut
|
=cut
|
||||||
|
|
||||||
sub readFiles
|
sub readFile
|
||||||
{
|
{
|
||||||
my $key_ref = shift;
|
my $self = shift;
|
||||||
my @files = @_;
|
my $file = shift || return 'No file provided';
|
||||||
|
my %args = @_;
|
||||||
|
$self->{'file'} = $file;
|
||||||
|
$self->setOutfile();
|
||||||
|
my $n = 0;
|
||||||
|
|
||||||
foreach my $file (@files) {
|
|
||||||
my $n = 0; # Track line number for errors
|
|
||||||
my $name = $file;
|
|
||||||
$name =~ s/\//_/g; # Change dir slashes to _
|
|
||||||
$name =~ s/(\.[^\.]*)*$//g; # Remove extensions
|
|
||||||
$name = uc($name); # Convert to uppercase for rule names
|
|
||||||
if (open(my $fh, '<', $file)) {
|
if (open(my $fh, '<', $file)) {
|
||||||
|
my $rules = 0;
|
||||||
while (<$fh>) {
|
while (<$fh>) {
|
||||||
$n++;
|
$n++;
|
||||||
# Ignore blank lines and comments
|
# Ignore blank lines and comments
|
||||||
if ($_ =~ m/^\s*$/ || $_ =~ m/^#/) {
|
if ($_ =~ m/^\s*$/ || $_ =~ m/^#/) {
|
||||||
next;
|
next;
|
||||||
# Verify formatting
|
# Verify formatting
|
||||||
} elsif ($_ =~ m/^([^\s]+)\s+([0-9]+)(?:\s+(.*))?/) {
|
} elsif (my ($word, $score, $comment, @groups) = $self->readLine($_)) {
|
||||||
my ($word, $score, $groups) = ($1, $2, $3);
|
if ($self->{'debug'}) {
|
||||||
print "'$word' '$score' " . (join(',',$groups)) . "\n";
|
print "FOUND: '$word' '$score' " . (join(',',@groups)) . "\n";
|
||||||
|
}
|
||||||
|
foreach my $group (@groups) {
|
||||||
|
if ($group eq 'GLOBAL') {
|
||||||
|
push(@{$self->{'rules'}->{'GLOBAL'}}, $word);
|
||||||
} else {
|
} else {
|
||||||
die "Invalid input in $file, line $n: $_\n";
|
push(@{$self->{'rules'}->{$self->{'outfile'}}->{$group}}, $word);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
$self->{'rules'}->{$self->{'outfile'}}->{'SCORED'}->{$word} = $score if ($score);
|
||||||
|
$self->{'rules'}->{$self->{'outfile'}}->{'COMMENTS'}->{$word} = $comment if ($comment);
|
||||||
|
} elsif ($self->{debug}) {
|
||||||
|
print STDERR "ERROR: Invalid input in $file, line $n: $_\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#print "$_\n" foreach(@$files_ref);
|
print STDERR "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'outfile'}} || !$self->{'debug'});
|
||||||
|
return "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'outfile'}});
|
||||||
|
} else {
|
||||||
|
delete($self->{'file'});
|
||||||
|
return "Failed to read $file";
|
||||||
|
}
|
||||||
|
return undef;
|
||||||
}
|
}
|
||||||
|
|
||||||
=head 2 C<$kw->writeFiles($out_dir)>
|
=head2 C<$kw->readAll();>
|
||||||
|
|
||||||
|
Loops through C<readFile> for all files in the queue.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub readAll
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my @files = ( shift ) || return "No files provided\n";
|
||||||
|
while (scalar(@_)) {
|
||||||
|
push(@files, shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
my @failed;
|
||||||
|
foreach my $file (@files) {
|
||||||
|
if (!-e $file) {
|
||||||
|
push(@failed, $file);
|
||||||
|
print STDERR "$file does not exist\n" if ($self->{'debug'});
|
||||||
|
} elsif (-l $file) {
|
||||||
|
# Accept only links within PWD
|
||||||
|
my $dest = readlink($file);
|
||||||
|
if ($dest =~ m/^\//) {
|
||||||
|
$dest =~ s/$ENV{'PWD'}\/// ;
|
||||||
|
if ($dest eq readlink($file)) {
|
||||||
|
push(@failed, $file);
|
||||||
|
print STDERR "Symlink outside of PWD $file\n" if ($self->{'debug'});
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$self->readAll($dest);
|
||||||
|
} elsif (-f $file) {
|
||||||
|
push(@failed, "Failed to read $file") if $self->readFile($file);
|
||||||
|
} elsif (-d $file) {
|
||||||
|
push(@failed, @_) if $self->readAll(glob($file.'/*'));
|
||||||
|
# Bash pattern?
|
||||||
|
} else {
|
||||||
|
if (my @glob = glob($file)) {
|
||||||
|
push(@failed, ( @_ )) if $self->readAll(@glob);
|
||||||
|
} else {
|
||||||
|
print STDERR "Bad file $file\n" if ($self->{'debug'});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return @failed if (scalar(@failed));
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->readLine($line)>
|
||||||
|
|
||||||
|
Reads a line from a configuration file and returns the relevant values or undef
|
||||||
|
if the line is not properly formatted.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub readLine
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my $line = shift;
|
||||||
|
my $invalid = shift;
|
||||||
|
my ($word, $score, $comment, @groups);
|
||||||
|
if (my @sections = $line =~ m/(?:^|\s+)(?:([^\d\s#]\S+)|(\d+(?:\.\d+)?\b)|([^\d\s#]+)|(#.*$))/g) {
|
||||||
|
while (@sections) {
|
||||||
|
next unless my $section = shift(@sections);
|
||||||
|
if (defined($comment)) {
|
||||||
|
$comment .= ' ' if ($comment ne '');
|
||||||
|
$comment .= $section;
|
||||||
|
} elsif (!defined($word) && $section =~ m/^([^\d\s#]\S+)$/) {
|
||||||
|
$word = $section;
|
||||||
|
} elsif (defined($word) && $section =~ m/^(\d+(?:\.\d+)?)$/ && !defined($score)) {
|
||||||
|
$score = $section;
|
||||||
|
} elsif (defined($word) && $section =~ m/^([^\d\s#]+)$/) {
|
||||||
|
push(@groups, $section);
|
||||||
|
} elsif (defined($word) && $section =~ m/^#.*$/ && !defined($comment)) {
|
||||||
|
$comment = $section;
|
||||||
|
$comment =~ s/^#\s*//;
|
||||||
|
} else {
|
||||||
|
push(@invalid, $section);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (scalar(@invalid)) {
|
||||||
|
if ($self->{'debug'}) {
|
||||||
|
print("Invalid clauses: '".join("', ", @invalid)."' in '$line'\n");
|
||||||
|
}
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
return undef unless ($word);
|
||||||
|
$word = lc($word);
|
||||||
|
$score //= 0;
|
||||||
|
$comment //= '';
|
||||||
|
@groups = ( 'LOCAL', 'GLOBAL' ) unless (scalar(@groups));
|
||||||
|
return ( $word, $score, $comment, @groups );
|
||||||
|
}
|
||||||
|
return ();
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->joinRules(%rules)>
|
||||||
|
|
||||||
|
Merge rules hash into working output hash. Generally called from C<readFile>,
|
||||||
|
but if you want to add rules manually, it will require the following format:
|
||||||
|
|
||||||
|
{
|
||||||
|
'word' => {
|
||||||
|
'score' => 0,
|
||||||
|
'groups' => ( 'LOCAL' )
|
||||||
|
},
|
||||||
|
'other' => {
|
||||||
|
'score' => 1,
|
||||||
|
'groups' => ( 'GLOBAL', 'group' )
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
The main context object will provide the working 'ID', 'FILE' and other
|
||||||
|
attributes necessary to nest these rules in the existing hash.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub joinRules
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my %rules = @_;
|
||||||
|
|
||||||
|
if ($self->{'unified'}) {
|
||||||
|
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'out'} = $self->{'priority'}.'_'.$self->{'id'}.'.cf';
|
||||||
|
} else {
|
||||||
|
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'out'} = $self->{'priority'}.'_'.$self->{'id'}.'_'.$self->{'file'}.'.cf';
|
||||||
|
}
|
||||||
|
foreach my $word (keys(%rules)) {
|
||||||
|
my $score = $rules{$word}{'score'} || 0;
|
||||||
|
my @groups = $rules{$word}{'groups'} || ( 'GLOBAL' );
|
||||||
|
if ($score) {
|
||||||
|
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'words'}->{$word} = $score;
|
||||||
|
}
|
||||||
|
foreach my $group (@groups) {
|
||||||
|
if ($group eq 'GLOBAL') {
|
||||||
|
if (scalar(keys(%{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}}))) {
|
||||||
|
push(@{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}}, $word);
|
||||||
|
} else {
|
||||||
|
$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'} = ( $word );
|
||||||
|
}
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
if (scalar(keys(%{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'groups'}->{$group}}))) {
|
||||||
|
push(@{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'groups'}->{$group}}, $word);
|
||||||
|
} else {
|
||||||
|
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'groups'}->{$group} = ( $word );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->processMetas($outfile, $file);>
|
||||||
|
|
||||||
|
Create all of the component meta rules for the declared C<$file>. Those that
|
||||||
|
will be used for the standalone and count rules. Output to C<$outfile>. This
|
||||||
|
must be run before the other process methods and must be run for 'GLOBAL' first,
|
||||||
|
otherwise output will be invalid. Meta rules for file-specific words will not be
|
||||||
|
generated if they are also in the 'GLOBAL' group, instead the meta rules from
|
||||||
|
the 'GLOBAL' file will be used for the count rules in all other files. This
|
||||||
|
will prevent duplicates, but also requires that you not rename output files such
|
||||||
|
that the they appear before the 'GLOBAL' file (without '_C<$file>' at the end).
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub processMetas
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my $outfile = shift;
|
||||||
|
my $file = shift;
|
||||||
|
my $rules = shift;
|
||||||
|
|
||||||
|
my $prefix = $self->{'id'};
|
||||||
|
my @words;
|
||||||
|
if ($file eq 'GLOBAL') {
|
||||||
|
@words = @{$rules->{'GLOBAL'}};
|
||||||
|
} else {
|
||||||
|
$prefix .= "_".$file;
|
||||||
|
foreach (keys(%{$rules->{$file}->{'groups'}})) {
|
||||||
|
next if (grep {/^$_$/} @{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}});
|
||||||
|
}
|
||||||
|
foreach (keys(%{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'words'}})) {
|
||||||
|
next if (grep {/^$_$/} @{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}});
|
||||||
|
next if (grep {/^$_$/} @words);
|
||||||
|
push (@words, $_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
foreach my $word (@words) {
|
||||||
|
$self->{'output'}->{$outfile} .=
|
||||||
|
"body\t__".$prefix."_".uc($word)."_BODY\t/\\b".$word."\\b/\n" .
|
||||||
|
"header\t__".$prefix."_".uc($word)."_SUBJ\tSubject =~ /\\b".$word."\\b/\n" .
|
||||||
|
"meta\t__".$prefix."_".uc($word)."\t( __".$prefix."_".uc($word)."_BODY || __".$prefix."_".uc($word)."_SUBJ )\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->processWords($outfile,%args);>
|
||||||
|
|
||||||
|
Take a list of words with scores and add them to C<$outfile>.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub processWords
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my $out = shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->processGroup(%args);>
|
||||||
|
|
||||||
|
Take a single group, including 'GLOBAL' and add it to C<$outfile>.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub processGroup
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my $outfile = shift;
|
||||||
|
|
||||||
|
for (my $i = 0; $i < scalar(@all); $i++) {
|
||||||
|
$files->{$self->{'priority'}.'_'.$self->{'id'}.'cf'} .=
|
||||||
|
"meta\t".$self->{'id'}."_".$i."\t( ".join(' + ',@all)." ) > $i\n" .
|
||||||
|
"describe\tMatched ".($i+1)."of keywords: ".join(', ',@{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}})."\n\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->processAll(%args);>
|
||||||
|
|
||||||
|
Process all groups.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub processAll
|
||||||
|
{
|
||||||
|
my $self = shift;
|
||||||
|
my %args = @_;
|
||||||
|
|
||||||
|
$self->{'output'} = {};
|
||||||
|
foreach my $id (keys(%{$self->{'rules'}})) {
|
||||||
|
$self->processMetas($self->{'priority'}.'_'.$id.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||||
|
$self->processGroups($self->{'priority'}.'_'.$id.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||||
|
$self->processWords($self->{'priority'}.'_'.$id.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||||
|
foreach my $file (keys(%{$self->{'rules'}->{$id}})) {
|
||||||
|
$self->processMetas($self->{'priority'}.'_'.$id.'_'.$file.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||||
|
$self->processGroups($self->{'priority'}.'_'.$id.'_'.$file.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||||
|
$self->processWords($self->{'priority'}.'_'.$id.'_'.$file.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
=head2 C<$kw->writeFiles($out_dir)>
|
||||||
|
|
||||||
Output files will use the name of each input file, stripping any extension, and
|
Output files will use the name of each input file, stripping any extension, and
|
||||||
forcing the name to uppercase. Rules in each file will be called:
|
forcing the name to uppercase. Rules in each file will be called:
|
||||||
|
@ -282,13 +827,60 @@ body for that word.
|
||||||
sub writeFiles
|
sub writeFiles
|
||||||
{
|
{
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
my $dir = shift;
|
foreach my $out (keys(%{$self->{'rules'}})) {
|
||||||
|
unless ($self->{'joinScores'}) {
|
||||||
|
$file =~ s/(.*)\.cf/$1_SCORES.cf/;
|
||||||
|
print STDERR $self->{'scores'};
|
||||||
|
}
|
||||||
|
if (open(my $fh, '>', $out)) {
|
||||||
|
print $fh $self->{'output'}->{$out};
|
||||||
|
close($fh);
|
||||||
|
} else {
|
||||||
|
print STDERR "Failed to write $out\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
use Data::Dump;
|
|
||||||
print Data::Dump::dump($self->{keywords});
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
=head1 MORE
|
||||||
|
|
||||||
|
For discussion of the module and examples, see:
|
||||||
|
|
||||||
|
E<lt>https://john.me.tz/projects/article.php?topic=Mail-SpamAssassin-KeywordRuleGenerator<gt>
|
||||||
|
|
||||||
|
=head1 SEE ALSO
|
||||||
|
|
||||||
|
Mail::SpamAssassin
|
||||||
|
spamassassin
|
||||||
|
|
||||||
|
=head1 BUGS
|
||||||
|
|
||||||
|
Report issues to:
|
||||||
|
|
||||||
|
E<lt>https://git.john.me.tz/jpm/Mail-SpamAssassin-KeywordRuleGenerator/issuesE<gt>
|
||||||
|
|
||||||
|
=head1 AUTHOR
|
||||||
|
|
||||||
|
John Mertz <git@john.me.tz>
|
||||||
|
|
||||||
|
=head1 COPYRIGHT
|
||||||
|
|
||||||
|
Mail::SpamAssassin::KeywordRuleGenerator is distributed under the Apache License
|
||||||
|
Version 2.0, as described in this file and the file C<LICENSE> included with the
|
||||||
|
distribution.
|
||||||
|
|
||||||
|
=head1 AVAILABILITY
|
||||||
|
|
||||||
|
If possible, the latest version of this library will be made available from CPAN
|
||||||
|
as well as:
|
||||||
|
|
||||||
|
E<lt>https://git.john.me.tz/jpm/Mail-SpamAssassin-KeywordRuleGeneratorE<gt>
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
1;
|
||||||
|
|
||||||
=pod
|
=pod
|
||||||
die "Please provide rules file(s) as an argument\n" unless (defined($ARGV[0]));
|
die "Please provide rules file(s) as an argument\n" unless (defined($ARGV[0]));
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
word
|
|
@ -0,0 +1,3 @@
|
||||||
|
oneone 2 LOCAL
|
||||||
|
onetwo 0 GLOBAL
|
||||||
|
onethree three
|
|
@ -0,0 +1,3 @@
|
||||||
|
twoone 2 LOCAL
|
||||||
|
twotwo 0 GLOBAL
|
||||||
|
twothree three
|
|
@ -0,0 +1 @@
|
||||||
|
/home/jpm/KeywordRuleGenerator/t/02_files.cf
|
|
@ -0,0 +1,47 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# Basic test to ensure files can be found and read. Verification will come later
|
||||||
|
|
||||||
|
use Test::More;
|
||||||
|
|
||||||
|
use lib 'lib/';
|
||||||
|
use Mail::SpamAssassin::KeywordRuleGenerator;
|
||||||
|
|
||||||
|
my $id = '02';
|
||||||
|
|
||||||
|
my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new( { 'id' => $id } );
|
||||||
|
|
||||||
|
# Read a single file
|
||||||
|
my $file = 't/02_files.cf';
|
||||||
|
ok (!$kw->readFile($file), "Run readFile on $file");
|
||||||
|
ok ($kw->getFile($file) == $file, "Get current working file");
|
||||||
|
ok (scalar(keys(%{$kw->{'rules'}})) == 2, "Fetched correct number of files in rules");
|
||||||
|
ok ($kw->{'rules'}->{'50_02_T_02_FILES.cf'}, "Loaded correct output name");
|
||||||
|
ok (!$kw->clearFiles(), "Clear 'rules' hash");
|
||||||
|
ok (!$kw->{'rules'}, "Fetched none after clearing");
|
||||||
|
|
||||||
|
# Read a directory
|
||||||
|
my $dir = 't/02_files.dir';
|
||||||
|
ok (!$kw->readAll($dir), "Run file on dir");
|
||||||
|
ok (scalar(keys(%{$kw->{'rules'}})) == 3, "Fetched correct number of files from dir");
|
||||||
|
ok (!$kw->clearFiles(), "Clear dir files");
|
||||||
|
|
||||||
|
# Read a symlink
|
||||||
|
my $link = 't/02_files.lnk';
|
||||||
|
ok (!$kw->readAll($link), "Run file on link");
|
||||||
|
ok ($kw->getFile() == $file, "Fetched correct name ($file) from link ($link)");
|
||||||
|
ok (scalar(keys(%{$kw->{'rules'}})) == 2, "Fetched correct number of files from link");
|
||||||
|
ok (!$kw->clearFiles(), "Clear link files");
|
||||||
|
|
||||||
|
# Read multiple mixed
|
||||||
|
ok (!$kw->readAll($file, $dir, $link), "Run readAll on multiple/mixed");
|
||||||
|
ok (scalar(keys(%{$kw->{'rules'}})) == 4, "Fetched correct number of files from all");
|
||||||
|
ok (!$kw->clearFiles(), "Clear dir files");
|
||||||
|
|
||||||
|
|
||||||
|
ok ($kw->readFile('does_not_exist'), "Correctly failed for non-existent file");
|
||||||
|
ok (!$kw->getFile(), "Current name undefined for non-existent file");
|
||||||
|
ok ($kw->readFile('t/02_unreadable.cf'), "Correctly failed for non-readable file");
|
||||||
|
ok (!$kw->getFile(), "Current name undefined for non-readable file");
|
||||||
|
|
||||||
|
done_testing();
|
|
@ -0,0 +1,64 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# Verify file input line formatting
|
||||||
|
|
||||||
|
use Test::More;
|
||||||
|
|
||||||
|
use lib 'lib/';
|
||||||
|
use Mail::SpamAssassin::KeywordRuleGenerator;
|
||||||
|
|
||||||
|
my $id = '03';
|
||||||
|
|
||||||
|
my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new( { 'id' => $id, 'debug' => 0 } );
|
||||||
|
|
||||||
|
my @good = (
|
||||||
|
'Lorem 1 GLOBAL # comment',
|
||||||
|
'ipsum 0 GLOBAL # comment',
|
||||||
|
'dolor GLOBAL # comment',
|
||||||
|
'sit 1 # comment',
|
||||||
|
'amet 0 # comment',
|
||||||
|
'consectetur # comment',
|
||||||
|
'adipiscing 1 GLOBAL',
|
||||||
|
'elit 0 GLOBAL',
|
||||||
|
'sed GLOBAL',
|
||||||
|
'do 1',
|
||||||
|
'eiusmod 0',
|
||||||
|
'tempor',
|
||||||
|
);
|
||||||
|
foreach my $score ( '', 0, 1 ) {
|
||||||
|
foreach my $group ( '', 'GLOBAL', 'GLOBAL LOCAL' ) {
|
||||||
|
foreach my $comment ( '', 'TESTING', 'LONGER COMMENT' ) {
|
||||||
|
my $word = "word";
|
||||||
|
my $rule = $word;
|
||||||
|
if ($score ne '') {
|
||||||
|
$rule .= ' '.$score;
|
||||||
|
}
|
||||||
|
if ($group ne '') {
|
||||||
|
$rule .= ' '.$group;
|
||||||
|
}
|
||||||
|
if ($comment ne '') {
|
||||||
|
$rule .= ' # '.$comment;
|
||||||
|
}
|
||||||
|
my ($rword, $rscore, $rcomment, @rgroups) = $kw->readLine($rule);
|
||||||
|
ok($rword eq $word, "Word '$word' found for '$rule'");
|
||||||
|
ok($rscore eq $score, "Score '$score' found for '$rule'") if ($score ne '');
|
||||||
|
ok(join(' ',@rgroups) eq $group, "Groups '$group' found for '$rule'") if ($group ne '');
|
||||||
|
ok(join(' ',@rgroups) eq 'LOCAL GLOBAL', "Inferred groups 'LOCAL GLOBAL' found for '$rule'") if ($group eq '');
|
||||||
|
ok($rcomment eq $comment, "Comment '$comment' found for '$rule'") if ($comment ne '');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check for bad formatting
|
||||||
|
|
||||||
|
my %bad = (
|
||||||
|
'' => 'Ignore empty line',
|
||||||
|
'# comment' => "Ignore line starting with comment",
|
||||||
|
'word 1 2' => "Ignore line with multiple scores",
|
||||||
|
'2 bad' => "Ignore line with score first",
|
||||||
|
);
|
||||||
|
foreach my $input (keys(%bad)) {
|
||||||
|
ok(!$kw->readLine($input), "$bad{$input} ($input)");
|
||||||
|
}
|
||||||
|
|
||||||
|
done_testing();
|
|
@ -0,0 +1,283 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# Basic test to ensure files can be found and read. Verification will come later
|
||||||
|
|
||||||
|
use Test::More;
|
||||||
|
|
||||||
|
use lib 'lib/';
|
||||||
|
use Mail::SpamAssassin::KeywordRuleGenerator;
|
||||||
|
|
||||||
|
my $id = '04';
|
||||||
|
|
||||||
|
my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new( { 'id' => $id, 'debug' => 0 } );
|
||||||
|
|
||||||
|
my @files = ( 't/04_rules0.cf', 't/04_rules1.cf' );
|
||||||
|
my @failed = @{$kw->readAll( @files )};
|
||||||
|
ok(!scalar(@failed), "Load 'rules' hash with readAll");
|
||||||
|
|
||||||
|
my $expected = getExpected();
|
||||||
|
my ( $missing, $extra, $incorrect ) = 0;
|
||||||
|
use Data::Dump;
|
||||||
|
foreach my $file (keys(%{$expected})) {
|
||||||
|
if ($file eq 'GLOBAL') {
|
||||||
|
my ($m, $e) = compareGroups(
|
||||||
|
$expected->{$file},
|
||||||
|
$kw->{'rules'}->{$file}
|
||||||
|
);
|
||||||
|
print("Missing $m words\n") if ($m);
|
||||||
|
print("$e extra words\n") if ($e);
|
||||||
|
$missing += $m;
|
||||||
|
$extra += $e;
|
||||||
|
} else {
|
||||||
|
foreach my $group (keys(%{$expected->{$file}})) {
|
||||||
|
if ($group eq 'SCORED') {
|
||||||
|
my ($m, $e, $i) = compareScores(
|
||||||
|
$expected->{$file}->{$group},
|
||||||
|
$kw->{'rules'}->{$file}->{$group}
|
||||||
|
);
|
||||||
|
$missing += $m;
|
||||||
|
$extra += $e;
|
||||||
|
$incorrect += $i;
|
||||||
|
} elsif ($group eq 'COMMENTS') {
|
||||||
|
my ($m, $e, $i) = compareScores(
|
||||||
|
$expected->{$file}->{$group},
|
||||||
|
$kw->{'rules'}->{$file}->{$group}
|
||||||
|
);
|
||||||
|
$missing += $m;
|
||||||
|
$extra += $e;
|
||||||
|
$incorrect += $i;
|
||||||
|
} else {
|
||||||
|
my ($m, $e) = compareGroups(
|
||||||
|
$expected->{$file}->{$group},
|
||||||
|
$kw->{'rules'}->{$file}->{$group}
|
||||||
|
);
|
||||||
|
$missing += $m;
|
||||||
|
$extra += $e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ok ($missing == 0, "No expected rules are missing");
|
||||||
|
ok ($extra == 0, "No extra rules are found");
|
||||||
|
ok ($incorrect == 0, "No incorrect scores are found");
|
||||||
|
|
||||||
|
done_testing();
|
||||||
|
|
||||||
|
sub compareGroups
|
||||||
|
{
|
||||||
|
my $expect = shift;
|
||||||
|
my $loaded = shift;
|
||||||
|
|
||||||
|
my @e = sort(@{$expect});
|
||||||
|
my @l = sort(@{$loaded});
|
||||||
|
my ($missing, $extra) = (0, 0);
|
||||||
|
|
||||||
|
while (scalar(@e)) {
|
||||||
|
unless (scalar(@l)) {
|
||||||
|
print("extra words @l\n");
|
||||||
|
$extra += scalar(@e);
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
if ($e[0] eq $l[0]) {
|
||||||
|
shift(@e);
|
||||||
|
shift(@l);
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
if ($e[0] lt $l[0]) {
|
||||||
|
print("Extra word $e[0]\n");
|
||||||
|
$extra++;
|
||||||
|
shift(@e);
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
if ($e[0] gt $l[0]) {
|
||||||
|
print("Missing word $l[0]\n");
|
||||||
|
$missing++;
|
||||||
|
shift(@l);
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (scalar(@e)) {
|
||||||
|
print("Missing ".scalar(@e)." at the end of parsing\n");
|
||||||
|
$missing += scalar(@e);
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ($missing, $extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub compareScores
|
||||||
|
{
|
||||||
|
my $expect = shift;
|
||||||
|
my $loaded = shift;
|
||||||
|
|
||||||
|
my %remaining = %{$loaded};
|
||||||
|
my ($missing, $extra, $incorrect) = (0, 0, 0);
|
||||||
|
foreach my $word (keys(%$expect)) {
|
||||||
|
if (!defined($loaded->{$word})) {
|
||||||
|
print("Missing score assignment for $word\n");
|
||||||
|
$missing++;
|
||||||
|
} elsif ($expect->{$word} != $loaded->{$word}) {
|
||||||
|
print("Incorrect score assignment for $word\n");
|
||||||
|
$incorrect++;
|
||||||
|
} else {
|
||||||
|
delete($remaining{$word});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$extra = scalar(keys(%remaining));
|
||||||
|
print("Extra score assignment for $word\n") foreach (@{$extra});
|
||||||
|
|
||||||
|
return ($missing, $extra, $incorrect);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub getExpected
|
||||||
|
{
|
||||||
|
my %expected = (
|
||||||
|
'GLOBAL' => [
|
||||||
|
'lorem',
|
||||||
|
'ipsum',
|
||||||
|
'dolor',
|
||||||
|
'sit',
|
||||||
|
'amet',
|
||||||
|
'consectetur',
|
||||||
|
'adipiscing',
|
||||||
|
'elit',
|
||||||
|
'sed',
|
||||||
|
'do',
|
||||||
|
'eiusmod',
|
||||||
|
'tempor',
|
||||||
|
'minim',
|
||||||
|
'veniam',
|
||||||
|
'quis',
|
||||||
|
'nostrud',
|
||||||
|
'exercitation',
|
||||||
|
'ullamco',
|
||||||
|
'laboris',
|
||||||
|
'nisi',
|
||||||
|
'ut',
|
||||||
|
'aliquip',
|
||||||
|
'ex',
|
||||||
|
'ea',
|
||||||
|
'commodo',
|
||||||
|
'consequat',
|
||||||
|
'duis',
|
||||||
|
'dolore',
|
||||||
|
'eu',
|
||||||
|
'fugiat',
|
||||||
|
],
|
||||||
|
'50_04_T_04_RULES0.cf' => {
|
||||||
|
'SCORED' => {
|
||||||
|
'lorem' => 1,
|
||||||
|
'sit' => 1,
|
||||||
|
'adipiscing' => 1,
|
||||||
|
'do' => 1,
|
||||||
|
'incididunt' => 1,
|
||||||
|
'et' => 1,
|
||||||
|
'aliqua' => 1,
|
||||||
|
'minim' => 1,
|
||||||
|
},
|
||||||
|
'COMMENTS' => {
|
||||||
|
'lorem' => '1, 1, 1, 1',
|
||||||
|
'ipsum' => '1, 1, 1, 0',
|
||||||
|
'dolor' => '1, 1, 1, undef',
|
||||||
|
'sit' => '1, 1, 0, 1',
|
||||||
|
'amet' => '1, 1, 0, 0',
|
||||||
|
'consectetur' => '1, 1, 0, undef',
|
||||||
|
'adipiscing' => '1, 0, 1, 1',
|
||||||
|
'elit' => '1, 0, 1, 0',
|
||||||
|
'sed' => '1, 0, 1, undef',
|
||||||
|
'do' => '1, 0, 0, 1',
|
||||||
|
'eiusmod' => '1, 0, 0, 0',
|
||||||
|
'tempor' => '1, 0, 0, undef',
|
||||||
|
'incididunt' => '0, 1, 1, 1',
|
||||||
|
'ut' => '0, 1, 1, 0',
|
||||||
|
'labore' => '0, 1, 1, undef',
|
||||||
|
'et' => '0, 1, 0, 1',
|
||||||
|
'dolore' => '0, 1, 0, 0',
|
||||||
|
'magna' => '0, 1, 0, undef',
|
||||||
|
'aliqua' => '0, 0, 1, 1',
|
||||||
|
'enim' => '0, 0, 1, 0',
|
||||||
|
'ad' => '0, 0, 1, undef',
|
||||||
|
'minim' => '0, 0, 0, 1',
|
||||||
|
'veniam' => '0, 0, 0, 0',
|
||||||
|
'quis' => '0, 0, 0, undef',
|
||||||
|
},
|
||||||
|
'LOCAL' => [
|
||||||
|
'lorem',
|
||||||
|
'ipsum',
|
||||||
|
'dolor',
|
||||||
|
'sit',
|
||||||
|
'amet',
|
||||||
|
'consectetur',
|
||||||
|
'incididunt',
|
||||||
|
'ut',
|
||||||
|
'labore',
|
||||||
|
'et',
|
||||||
|
'dolore',
|
||||||
|
'magna',
|
||||||
|
'minim',
|
||||||
|
'veniam',
|
||||||
|
'quis',
|
||||||
|
],
|
||||||
|
'group' => [
|
||||||
|
'lorem',
|
||||||
|
'ipsum',
|
||||||
|
'dolor',
|
||||||
|
'adipiscing',
|
||||||
|
'elit',
|
||||||
|
'sed',
|
||||||
|
'incididunt',
|
||||||
|
'ut',
|
||||||
|
'labore',
|
||||||
|
'aliqua',
|
||||||
|
'enim',
|
||||||
|
'ad',
|
||||||
|
]
|
||||||
|
},
|
||||||
|
'50_04_T_04_RULES1.cf' => {
|
||||||
|
'SCORED' => {
|
||||||
|
'nostrud' => 1,
|
||||||
|
'laboris' => 1,
|
||||||
|
'aliquip' => 1,
|
||||||
|
'consequat' => 1,
|
||||||
|
'aute' => 1,
|
||||||
|
'in' => 1,
|
||||||
|
'velit' => 1,
|
||||||
|
'dolore' => 1,
|
||||||
|
},
|
||||||
|
'LOCAL' => [
|
||||||
|
'nostrud',
|
||||||
|
'exercitation',
|
||||||
|
'ullamco',
|
||||||
|
'laboris',
|
||||||
|
'nisi',
|
||||||
|
'ut',
|
||||||
|
'aute',
|
||||||
|
'irure',
|
||||||
|
'dolor',
|
||||||
|
'in',
|
||||||
|
'reprehenderit',
|
||||||
|
'volupatate',
|
||||||
|
'dolore',
|
||||||
|
'eu',
|
||||||
|
'fugiat',
|
||||||
|
],
|
||||||
|
'group' => [
|
||||||
|
'nostrud',
|
||||||
|
'exercitation',
|
||||||
|
'ullamco',
|
||||||
|
'aliquip',
|
||||||
|
'ex',
|
||||||
|
'ea',
|
||||||
|
'aute',
|
||||||
|
'irure',
|
||||||
|
'dolor',
|
||||||
|
'velit',
|
||||||
|
'esse',
|
||||||
|
'cillum',
|
||||||
|
]
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
return \%expected;
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
# Comments to be ignored. As with following blank line.
|
||||||
|
# Must test every combination of:
|
||||||
|
# GLOBAL, LOCAL, GROUP, SCORE
|
||||||
|
|
||||||
|
Lorem 1 GLOBAL LOCAL group # 1, 1, 1, 1
|
||||||
|
ipsum 0 GLOBAL LOCAL group # 1, 1, 1, 0
|
||||||
|
dolor GLOBAL LOCAL group # 1, 1, 1, undef
|
||||||
|
sit 1 GLOBAL LOCAL # 1, 1, 0, 1
|
||||||
|
amet 0 GLOBAL LOCAL # 1, 1, 0, 0
|
||||||
|
consectetur GLOBAL LOCAL # 1, 1, 0, undef
|
||||||
|
adipiscing 1 GLOBAL group # 1, 0, 1, 1
|
||||||
|
elit 0 GLOBAL group # 1, 0, 1, 0
|
||||||
|
sed GLOBAL group # 1, 0, 1, undef
|
||||||
|
do 1 GLOBAL # 1, 0, 0, 1
|
||||||
|
eiusmod 0 GLOBAL # 1, 0, 0, 0
|
||||||
|
tempor GLOBAL # 1, 0, 0, undef
|
||||||
|
incididunt 1 LOCAL group # 0, 1, 1, 1
|
||||||
|
ut 0 LOCAL group # 0, 1, 1, 0
|
||||||
|
labore LOCAL group # 0, 1, 1, undef
|
||||||
|
et 1 LOCAL # 0, 1, 0, 1
|
||||||
|
dolore 0 LOCAL # 0, 1, 0, 0
|
||||||
|
magna LOCAL # 0, 1, 0, undef
|
||||||
|
aliqua 1 group # 0, 0, 1, 1
|
||||||
|
enim 0 group # 0, 0, 1, 0
|
||||||
|
ad group # 0, 0, 1, undef
|
||||||
|
minim 1 # 0, 0, 0, 1
|
||||||
|
veniam 0 # 0, 0, 0, 0
|
||||||
|
quis # 0, 0, 0, undef
|
|
@ -0,0 +1,24 @@
|
||||||
|
nostrud 1 GLOBAL LOCAL group
|
||||||
|
exercitation 0 GLOBAL LOCAL group
|
||||||
|
ullamco GLOBAL LOCAL group
|
||||||
|
laboris 1 GLOBAL LOCAL
|
||||||
|
nisi 0 GLOBAL LOCAL
|
||||||
|
ut GLOBAL LOCAL
|
||||||
|
aliquip 1 GLOBAL group
|
||||||
|
ex 0 GLOBAL group
|
||||||
|
ea GLOBAL group
|
||||||
|
commodo 0 GLOBAL
|
||||||
|
consequat 1 GLOBAL
|
||||||
|
Duis GLOBAL
|
||||||
|
aute 1 LOCAL group
|
||||||
|
irure 0 LOCAL group
|
||||||
|
dolor LOCAL group
|
||||||
|
in 1 LOCAL
|
||||||
|
reprehenderit 0 LOCAL
|
||||||
|
volupatate LOCAL
|
||||||
|
velit 1 group
|
||||||
|
esse 0 group
|
||||||
|
cillum group
|
||||||
|
dolore 1
|
||||||
|
eu 0
|
||||||
|
fugiat
|
Loading…
Reference in New Issue