Additional tests
Locate and open files Validate config file input formatting Check that the hash representation of rules read from files is correct
This commit is contained in:
parent
944c3eed5e
commit
cfbcee7c7d
|
@ -1,9 +1,19 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
package Mail::SpamAssassin::KeywordRuleGenerator;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
# <@LICENSE>
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to you under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at:
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# </@LICENSE>
|
||||
|
||||
=head1 NAME
|
||||
|
||||
|
@ -11,11 +21,7 @@ Mail::SpamAssassin::KeywordRuleGenerator - Generate SA rules for keywords
|
|||
|
||||
=cut
|
||||
|
||||
our $VERSION = '0.01';
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
Version 0.01
|
||||
package Mail::SpamAssassin::KeywordRuleGenerator;
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
|
@ -23,6 +29,31 @@ Generate SpamAssassin compatible configuration files given lists of keywords.
|
|||
Implemented as a module largely for testing purposes.
|
||||
|
||||
use Mail::SpamAssassin::KeywordRuleGenerator;
|
||||
my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new($id);
|
||||
$kw->readFile('keywords.cf');
|
||||
$kw->writeFiles();
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Mail::SpamAssassin::KeywordRuleGenerator does what it says on the tin; it
|
||||
generates SpamAssassin compatible configuration files to catch keywords that you
|
||||
specify. Most simply, it can take in one or more properly formatted input files
|
||||
(see FILES), generate rules for individual files (optional), as well as meta
|
||||
rules for the counts for each set of keywords (ie. 1 of N hit; 2 of N hit...).
|
||||
See RULES for more on how the rules are generated.
|
||||
|
||||
The sets of keywords can be broken up into groups (see GROUPS).
|
||||
|
||||
=head1 PREREQUISITES
|
||||
|
||||
Requires C<spamassassin> executable and the following Perl modules
|
||||
|
||||
To::Be::Determined
|
||||
|
||||
=cut
|
||||
|
||||
#use strict;
|
||||
use warnings;
|
||||
|
||||
=head2 FILES
|
||||
|
||||
|
@ -77,6 +108,14 @@ describe ID_FILE_GROUP_1 1 match in keyword group 'GROUP'
|
|||
meta ID_FILE_GROUP_2 ( __ID_FILE_WORD + __ID_FILE_OTHER ) > 1
|
||||
describe ID_FILE_GROUP_2 2 matches in keyword group 'GROUP'
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
Version 0.01
|
||||
|
||||
=cut
|
||||
|
||||
our $VERSION = '0.01';
|
||||
|
||||
=head1 EXPORT
|
||||
|
||||
=cut
|
||||
|
@ -89,86 +128,284 @@ our @EXPORT = qw( new );
|
|||
our %EXPORT_TAGS = (
|
||||
'all' => [
|
||||
'new',
|
||||
''
|
||||
'getId',
|
||||
'setId',
|
||||
'getPriority',
|
||||
'setPriority',
|
||||
'getOutfile',
|
||||
'setOutfile',
|
||||
'getScorefile',
|
||||
'setScorefile',
|
||||
'clearFiles',
|
||||
'getFiles',
|
||||
'nextFile',
|
||||
'readFile',
|
||||
'getFile',
|
||||
'setFile',
|
||||
'joinRules',
|
||||
'processMetas',
|
||||
'processWords',
|
||||
'processGroups',
|
||||
'processAll',
|
||||
'writeFile',
|
||||
'writeAll'
|
||||
]
|
||||
);
|
||||
|
||||
our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
|
||||
|
||||
=head1 SUBROUTINES/METHODS
|
||||
=head1 METHODS
|
||||
|
||||
=head2 C<$kw = new($id)>
|
||||
Except for getter methods, all methods make changes directly within the context
|
||||
object and only return errors. Thus, each call can be verified by checking that
|
||||
it returned undef:
|
||||
|
||||
die "Failed to run 'method'\n" if($kw->method($arg));
|
||||
|
||||
or:
|
||||
|
||||
my $err = $kw->method($arg);
|
||||
die "Received $err\n" if ($err);
|
||||
|
||||
Getter methods will return the attribute or undef if it does not exist.
|
||||
|
||||
=head2 C<$kw = new( %args )>
|
||||
|
||||
Creates a new C<Mail::SpamAssassin::KeywordRuleGenerator> object and returns it.
|
||||
|
||||
C<$id> is an optional identifier which will be used to prefix any rule output.
|
||||
It can be updated at any time via the C<$kw->{id}> value or with the
|
||||
C<$kw->id($id)> setter;
|
||||
C<$args> is an optional hash to assign any preliminary attributes or flags to
|
||||
the object. See various 'get' and 'set' functions.
|
||||
|
||||
=cut
|
||||
|
||||
sub new
|
||||
{
|
||||
my ($class, $id) = @_;
|
||||
my ($class, $args) = @_;
|
||||
my $self = $args;
|
||||
|
||||
$id = uc($id) || 'KW';
|
||||
bless {
|
||||
id => $id || '',
|
||||
keywords => {}
|
||||
} => $class;
|
||||
}
|
||||
=head3 Initial attributes
|
||||
|
||||
=head2 C<$kw->id()>
|
||||
While other attributes will exist within the object during processing, those
|
||||
which make sense to define up-front and which will not be overwritten by any of
|
||||
the built-in functions (other than their associated getters) are:
|
||||
|
||||
Get/Setter for C<$kw->{'id'}>
|
||||
=head3 id
|
||||
|
||||
id scalar
|
||||
A short identifier which will be appending to all output files to allow
|
||||
for easier recognition of the files' sources. Default: 'KW'
|
||||
|
||||
=cut
|
||||
|
||||
sub id
|
||||
$self->{'id'} //= 'KW';
|
||||
|
||||
=head3 priority
|
||||
|
||||
priority scalar
|
||||
Typically a number used to define the priority of the rules.
|
||||
SpamAssassin will read configuration files in alphabetical order and the
|
||||
last iteration of a configuration for the same rule will be used. This
|
||||
means that the files read last will overwrite those read earlier. By
|
||||
convention the number is used for easy sorting. Any leading aphabetical
|
||||
character will be ordered after all numbers, and so given high priority.
|
||||
Default: '50'
|
||||
|
||||
=cut
|
||||
|
||||
$self->{'priority'} //= 50;
|
||||
|
||||
=head3 debug
|
||||
|
||||
debug boolean
|
||||
Enable (1) or disable (0) debugging output. Default: 0
|
||||
|
||||
=cut
|
||||
|
||||
$self->{'debug'} //= 0;
|
||||
|
||||
=head3 singleOutfile
|
||||
|
||||
singleOutfile boolean
|
||||
Indicates whether output rules should all be added to a single file (1),
|
||||
or to one file per input file (0). Note that this still requires
|
||||
'joinScores' to have one file total. Default: 0
|
||||
|
||||
=cut
|
||||
|
||||
$self->{'singleOutfile'} //= 0;
|
||||
|
||||
=head3 joinScores
|
||||
|
||||
joinScores bootlean
|
||||
Indicates whether to include the scores in the same file as their
|
||||
associated rule definitions (1) or in a second file on their own (0).
|
||||
The second file will simply append '_SCORES' to the file name (prior to
|
||||
the '.cf'), unless overridden by C<$kw->setScoreFile($path)> or a second
|
||||
path argument in C<$kw->setOutfile($rulePath, $scorePath)>. Default: 1
|
||||
|
||||
=cut
|
||||
|
||||
$self->{'joinScores'} //= 1;
|
||||
|
||||
bless $self, $class;
|
||||
return $self;
|
||||
}
|
||||
|
||||
=head2 C<$kw->getId()>
|
||||
|
||||
Getter for C<$kw->{'id'}>. 'id' is used for top-level rule names.
|
||||
|
||||
=cut
|
||||
|
||||
sub getId
|
||||
{
|
||||
my $self = shift;
|
||||
|
||||
if (defined($self->{'id'})) {
|
||||
return $self->{'id'};
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->setId()>
|
||||
|
||||
Setter for C<$kw->{'id'}>
|
||||
|
||||
=cut
|
||||
|
||||
sub setId
|
||||
{
|
||||
my $self = shift;
|
||||
my $id = shift;
|
||||
|
||||
if (defined($id)) {
|
||||
$id = uc($id);
|
||||
$self->{'id'} = $id;
|
||||
$self->{'id'} = $id || return "Failed to set $id";
|
||||
} else {
|
||||
if (defined($self->{'id'})) {
|
||||
return $self->{'id'};
|
||||
} else {
|
||||
return undef;
|
||||
}
|
||||
return "No ID provided\n";
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->file()>
|
||||
=head2 C<$kw->getPriority()>.
|
||||
|
||||
Get/Setter for C<$kw->{'file'}>
|
||||
Getter for C<$kw->{'priority'}>. 'priority' value used for the output filepath
|
||||
to indicate the load order. SpamAssassin reads in ascending order with later
|
||||
iterations overriding earlier. The 'score' file will increment so that it comes
|
||||
at the end.
|
||||
|
||||
=cut
|
||||
|
||||
sub file
|
||||
sub getPriority
|
||||
{
|
||||
my $self = shift;
|
||||
|
||||
if (defined($self->{'priority'})) {
|
||||
return $self->{'priority'};
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->setPriority()>
|
||||
|
||||
Setter for C<$kw->{'priority'}>.
|
||||
|
||||
=cut
|
||||
|
||||
sub setPriority
|
||||
{
|
||||
my $self = shift;
|
||||
my $priority = shift;
|
||||
|
||||
if (defined($priority)) {
|
||||
$path = uc($priority);
|
||||
$self->{'priority'} = $priority || return "Failed to set priority: $priority";
|
||||
} else {
|
||||
return "No 'priority' provided\n";
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->getFile()>
|
||||
|
||||
Getter for C<$kw->{'file'}>. This is the name of the current output file being
|
||||
processed. Not to be confused with C<readFile> or C<getOutfile>.
|
||||
|
||||
=cut
|
||||
|
||||
sub getFile
|
||||
{
|
||||
my $self = shift;
|
||||
|
||||
if (defined($self->{'file'})) {
|
||||
return $self->{'file'};
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->setFile()>
|
||||
|
||||
Setter for C<$kw->{'file'}>.
|
||||
|
||||
=cut
|
||||
|
||||
sub setFile
|
||||
{
|
||||
my $self = shift;
|
||||
my $file = shift;
|
||||
|
||||
if (defined($file)) {
|
||||
my $name = $file;
|
||||
$name =~ s/^([a-zA-Z0-9\-_]*)(\..*)?$/$1/;
|
||||
$name =~ s/-/_/;
|
||||
$name = uc($name);
|
||||
die("invalid filename\n") unless ($name =~ m/^[A-Z_]+$/);
|
||||
$self->{'file'} = $name;
|
||||
$self->{'file'} = $file;
|
||||
} else {
|
||||
if (defined($self->{'file'})) {
|
||||
return $self->{'file'};
|
||||
return "No File provided\n";
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->getOutfile()>
|
||||
|
||||
Getter for C<$kw->{'outfile'}>. 'outfile' represents the real filepath of the
|
||||
output file which is currently being processed.
|
||||
|
||||
=cut
|
||||
|
||||
sub getOutfile
|
||||
{
|
||||
my $self = shift;
|
||||
|
||||
if (defined($self->{'outfile'})) {
|
||||
return $self->{'outfile'};
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->setOutfile()>
|
||||
|
||||
Setter for C<$kw->{'outfile'}>. Can be defined manually with a scalar argument,
|
||||
otherwise the path is constructed from the existing attributes.
|
||||
|
||||
=cut
|
||||
|
||||
sub setOutfile
|
||||
{
|
||||
my $self = shift;
|
||||
my $path = shift;
|
||||
|
||||
if (defined($path)) {
|
||||
$self->{'outfile'} = $path || return "Failed to set $path";
|
||||
} else {
|
||||
if ($self->{'singleOutfile'}) {
|
||||
$self->{'outfile'} = $self->{'priority'} .
|
||||
'_' . uc($self->{'id'}) .
|
||||
'.cf';
|
||||
} else {
|
||||
return undef;
|
||||
my $file = $self->{'file'};
|
||||
$file =~ s/\//_/g; # Change dir slashes to _
|
||||
$file =~ s/(\.[^\.]*)$//g; # Remove extensions
|
||||
$file = uc($file); # Convert to uppercase for rule names
|
||||
|
||||
$self->{'outfile'} = $self->{'priority'} .
|
||||
'_' . uc($self->{'id'}) .
|
||||
'_' . $file .
|
||||
'.cf';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->getFile();>
|
||||
=head2 C<$kw->getFiles($regex);>
|
||||
|
||||
Simple recursive search for files within a directory. Will validate that each
|
||||
file is readable and return an array of file names.
|
||||
|
@ -184,24 +421,63 @@ The regex will be used as a file filter and will only return files that match.
|
|||
sub getFiles
|
||||
{
|
||||
my $self = shift;
|
||||
my @args = @_;
|
||||
my $regex = shift;
|
||||
|
||||
my $return = '';
|
||||
foreach (@args) {
|
||||
die "$_ does not exist\n" unless (-e "$_" || -l "$_");
|
||||
$return .= "$_ does not exist\n" unless (-e "$_" || -l "$_");
|
||||
if (-l $_) {
|
||||
getFiles(readlink($_));
|
||||
$self->getFiles(readlink($_));
|
||||
} elsif (-d $_) {
|
||||
my @recursive = glob($_."/*");
|
||||
getFiles(@recursive);
|
||||
$self->getFiles(@recursive);
|
||||
} else {
|
||||
die "$_ is not readable\n" unless (-r "$_");
|
||||
push(@{$self->{files_ref}}, $_);
|
||||
if (defined($regex)) {
|
||||
if ($_ =~ $regex) {
|
||||
push(@{$self->{'files_ref'}}, $_);
|
||||
} else {
|
||||
next;
|
||||
}
|
||||
} else {
|
||||
$return .= "$_ is not readable\n" unless (-r "$_");
|
||||
push(@{$self->{'files_ref'}}, $_);
|
||||
}
|
||||
}
|
||||
}
|
||||
return @$self->{files_ref};
|
||||
return $return;
|
||||
}
|
||||
|
||||
=head2 C<$kw->readFile();>
|
||||
=head2 C<$kw->nextFile();>
|
||||
|
||||
Shift next message in files_ref queue to current.
|
||||
|
||||
=cut
|
||||
|
||||
sub nextFile
|
||||
{
|
||||
my $self = shift;
|
||||
if (scalar(@{$self->{'files_ref'}})) {
|
||||
$self->{'path'} = shift(@{$self->{'files_ref'}});
|
||||
} else {
|
||||
return "End of array\n";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
=head2 C<$kw->clearFiles();>
|
||||
|
||||
Clear the current file reference queue.
|
||||
|
||||
=cut
|
||||
|
||||
sub clearFiles
|
||||
{
|
||||
my $self = shift;
|
||||
delete($self->{'rules'}) || return "Failed to delete rules hash\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
=head2 C<$kw->readFile(%args);>
|
||||
|
||||
Read in properly formatted keyword list file. The basic format is one keyword
|
||||
per line, an optional score, and an optional list of 'groups'.
|
||||
|
@ -218,45 +494,314 @@ When keyword groups are omitted, that keyword defaults to just the 'LOCAL' group
|
|||
|
||||
Examples with other combinations are:
|
||||
|
||||
# word 0 Same as previous
|
||||
# word 2 Used for LOCAL, not GLOBAL, scores 2 on it's own
|
||||
# word GROUP GROUP group, not LOCAL, not GLOBAL, no score
|
||||
# word 0 GROUP Same as previous
|
||||
# word 1 GROUP GLOBAL LOCAL GROUP group, in GLOBAL, in LOCAL, scores 1
|
||||
word 0 # Same as previous
|
||||
word 2 # Used for LOCAL, not GLOBAL, scores 2
|
||||
word GROUP # GROUP group, not LOCAL, not GLOBAL, no score
|
||||
word 0 GROUP # Same as previous
|
||||
word 1 GROUP GLOBAL LOCAL # GROUP group, in GLOBAL, in LOCAL, scores 1
|
||||
|
||||
C<%args> can be used to override the attributes used to index the rules,
|
||||
including the id, file
|
||||
|
||||
This function intentionally does not iterate through the files queue so that you
|
||||
can step through and make modifications as you go.
|
||||
|
||||
=cut
|
||||
|
||||
sub readFiles
|
||||
sub readFile
|
||||
{
|
||||
my $key_ref = shift;
|
||||
my @files = @_;
|
||||
my $self = shift;
|
||||
my $file = shift || return 'No file provided';
|
||||
my %args = @_;
|
||||
$self->{'file'} = $file;
|
||||
$self->setOutfile();
|
||||
my $n = 0;
|
||||
|
||||
foreach my $file (@files) {
|
||||
my $n = 0; # Track line number for errors
|
||||
my $name = $file;
|
||||
$name =~ s/\//_/g; # Change dir slashes to _
|
||||
$name =~ s/(\.[^\.]*)*$//g; # Remove extensions
|
||||
$name = uc($name); # Convert to uppercase for rule names
|
||||
if (open(my $fh, '<', $file)) {
|
||||
while (<$fh>) {
|
||||
$n++;
|
||||
# Ignore blank lines and comments
|
||||
if ($_ =~ m/^\s*$/ || $_ =~ m/^#/) {
|
||||
next;
|
||||
# Verify formatting
|
||||
} elsif ($_ =~ m/^([^\s]+)\s+([0-9]+)(?:\s+(.*))?/) {
|
||||
my ($word, $score, $groups) = ($1, $2, $3);
|
||||
print "'$word' '$score' " . (join(',',$groups)) . "\n";
|
||||
} else {
|
||||
die "Invalid input in $file, line $n: $_\n";
|
||||
if (open(my $fh, '<', $file)) {
|
||||
my $rules = 0;
|
||||
while (<$fh>) {
|
||||
$n++;
|
||||
# Ignore blank lines and comments
|
||||
if ($_ =~ m/^\s*$/ || $_ =~ m/^#/) {
|
||||
next;
|
||||
# Verify formatting
|
||||
} elsif (my ($word, $score, $comment, @groups) = $self->readLine($_)) {
|
||||
if ($self->{'debug'}) {
|
||||
print "FOUND: '$word' '$score' " . (join(',',@groups)) . "\n";
|
||||
}
|
||||
foreach my $group (@groups) {
|
||||
if ($group eq 'GLOBAL') {
|
||||
push(@{$self->{'rules'}->{'GLOBAL'}}, $word);
|
||||
} else {
|
||||
push(@{$self->{'rules'}->{$self->{'outfile'}}->{$group}}, $word);
|
||||
}
|
||||
}
|
||||
$self->{'rules'}->{$self->{'outfile'}}->{'SCORED'}->{$word} = $score if ($score);
|
||||
$self->{'rules'}->{$self->{'outfile'}}->{'COMMENTS'}->{$word} = $comment if ($comment);
|
||||
} elsif ($self->{debug}) {
|
||||
print STDERR "ERROR: Invalid input in $file, line $n: $_\n";
|
||||
}
|
||||
}
|
||||
print STDERR "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'outfile'}} || !$self->{'debug'});
|
||||
return "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'outfile'}});
|
||||
} else {
|
||||
delete($self->{'file'});
|
||||
return "Failed to read $file";
|
||||
}
|
||||
return undef;
|
||||
}
|
||||
|
||||
=head2 C<$kw->readAll();>
|
||||
|
||||
Loops through C<readFile> for all files in the queue.
|
||||
|
||||
=cut
|
||||
|
||||
sub readAll
|
||||
{
|
||||
my $self = shift;
|
||||
my @files = ( shift ) || return "No files provided\n";
|
||||
while (scalar(@_)) {
|
||||
push(@files, shift);
|
||||
}
|
||||
|
||||
my @failed;
|
||||
foreach my $file (@files) {
|
||||
if (!-e $file) {
|
||||
push(@failed, $file);
|
||||
print STDERR "$file does not exist\n" if ($self->{'debug'});
|
||||
} elsif (-l $file) {
|
||||
# Accept only links within PWD
|
||||
my $dest = readlink($file);
|
||||
if ($dest =~ m/^\//) {
|
||||
$dest =~ s/$ENV{'PWD'}\/// ;
|
||||
if ($dest eq readlink($file)) {
|
||||
push(@failed, $file);
|
||||
print STDERR "Symlink outside of PWD $file\n" if ($self->{'debug'});
|
||||
next;
|
||||
}
|
||||
}
|
||||
$self->readAll($dest);
|
||||
} elsif (-f $file) {
|
||||
push(@failed, "Failed to read $file") if $self->readFile($file);
|
||||
} elsif (-d $file) {
|
||||
push(@failed, @_) if $self->readAll(glob($file.'/*'));
|
||||
# Bash pattern?
|
||||
} else {
|
||||
if (my @glob = glob($file)) {
|
||||
push(@failed, ( @_ )) if $self->readAll(@glob);
|
||||
} else {
|
||||
print STDERR "Bad file $file\n" if ($self->{'debug'});
|
||||
}
|
||||
}
|
||||
}
|
||||
#print "$_\n" foreach(@$files_ref);
|
||||
return @failed if (scalar(@failed));
|
||||
return undef;
|
||||
}
|
||||
|
||||
=head 2 C<$kw->writeFiles($out_dir)>
|
||||
=head2 C<$kw->readLine($line)>
|
||||
|
||||
Reads a line from a configuration file and returns the relevant values or undef
|
||||
if the line is not properly formatted.
|
||||
|
||||
=cut
|
||||
|
||||
sub readLine
|
||||
{
|
||||
my $self = shift;
|
||||
my $line = shift;
|
||||
my $invalid = shift;
|
||||
my ($word, $score, $comment, @groups);
|
||||
if (my @sections = $line =~ m/(?:^|\s+)(?:([^\d\s#]\S+)|(\d+(?:\.\d+)?\b)|([^\d\s#]+)|(#.*$))/g) {
|
||||
while (@sections) {
|
||||
next unless my $section = shift(@sections);
|
||||
if (defined($comment)) {
|
||||
$comment .= ' ' if ($comment ne '');
|
||||
$comment .= $section;
|
||||
} elsif (!defined($word) && $section =~ m/^([^\d\s#]\S+)$/) {
|
||||
$word = $section;
|
||||
} elsif (defined($word) && $section =~ m/^(\d+(?:\.\d+)?)$/ && !defined($score)) {
|
||||
$score = $section;
|
||||
} elsif (defined($word) && $section =~ m/^([^\d\s#]+)$/) {
|
||||
push(@groups, $section);
|
||||
} elsif (defined($word) && $section =~ m/^#.*$/ && !defined($comment)) {
|
||||
$comment = $section;
|
||||
$comment =~ s/^#\s*//;
|
||||
} else {
|
||||
push(@invalid, $section);
|
||||
}
|
||||
}
|
||||
if (scalar(@invalid)) {
|
||||
if ($self->{'debug'}) {
|
||||
print("Invalid clauses: '".join("', ", @invalid)."' in '$line'\n");
|
||||
}
|
||||
return undef;
|
||||
}
|
||||
return undef unless ($word);
|
||||
$word = lc($word);
|
||||
$score //= 0;
|
||||
$comment //= '';
|
||||
@groups = ( 'LOCAL', 'GLOBAL' ) unless (scalar(@groups));
|
||||
return ( $word, $score, $comment, @groups );
|
||||
}
|
||||
return ();
|
||||
}
|
||||
|
||||
=head2 C<$kw->joinRules(%rules)>
|
||||
|
||||
Merge rules hash into working output hash. Generally called from C<readFile>,
|
||||
but if you want to add rules manually, it will require the following format:
|
||||
|
||||
{
|
||||
'word' => {
|
||||
'score' => 0,
|
||||
'groups' => ( 'LOCAL' )
|
||||
},
|
||||
'other' => {
|
||||
'score' => 1,
|
||||
'groups' => ( 'GLOBAL', 'group' )
|
||||
}
|
||||
}
|
||||
|
||||
The main context object will provide the working 'ID', 'FILE' and other
|
||||
attributes necessary to nest these rules in the existing hash.
|
||||
|
||||
=cut
|
||||
|
||||
sub joinRules
|
||||
{
|
||||
my $self = shift;
|
||||
my %rules = @_;
|
||||
|
||||
if ($self->{'unified'}) {
|
||||
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'out'} = $self->{'priority'}.'_'.$self->{'id'}.'.cf';
|
||||
} else {
|
||||
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'out'} = $self->{'priority'}.'_'.$self->{'id'}.'_'.$self->{'file'}.'.cf';
|
||||
}
|
||||
foreach my $word (keys(%rules)) {
|
||||
my $score = $rules{$word}{'score'} || 0;
|
||||
my @groups = $rules{$word}{'groups'} || ( 'GLOBAL' );
|
||||
if ($score) {
|
||||
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'words'}->{$word} = $score;
|
||||
}
|
||||
foreach my $group (@groups) {
|
||||
if ($group eq 'GLOBAL') {
|
||||
if (scalar(keys(%{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}}))) {
|
||||
push(@{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}}, $word);
|
||||
} else {
|
||||
$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'} = ( $word );
|
||||
}
|
||||
next;
|
||||
}
|
||||
if (scalar(keys(%{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'groups'}->{$group}}))) {
|
||||
push(@{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'groups'}->{$group}}, $word);
|
||||
} else {
|
||||
$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'groups'}->{$group} = ( $word );
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
=head2 C<$kw->processMetas($outfile, $file);>
|
||||
|
||||
Create all of the component meta rules for the declared C<$file>. Those that
|
||||
will be used for the standalone and count rules. Output to C<$outfile>. This
|
||||
must be run before the other process methods and must be run for 'GLOBAL' first,
|
||||
otherwise output will be invalid. Meta rules for file-specific words will not be
|
||||
generated if they are also in the 'GLOBAL' group, instead the meta rules from
|
||||
the 'GLOBAL' file will be used for the count rules in all other files. This
|
||||
will prevent duplicates, but also requires that you not rename output files such
|
||||
that the they appear before the 'GLOBAL' file (without '_C<$file>' at the end).
|
||||
|
||||
=cut
|
||||
|
||||
sub processMetas
|
||||
{
|
||||
my $self = shift;
|
||||
my $outfile = shift;
|
||||
my $file = shift;
|
||||
my $rules = shift;
|
||||
|
||||
my $prefix = $self->{'id'};
|
||||
my @words;
|
||||
if ($file eq 'GLOBAL') {
|
||||
@words = @{$rules->{'GLOBAL'}};
|
||||
} else {
|
||||
$prefix .= "_".$file;
|
||||
foreach (keys(%{$rules->{$file}->{'groups'}})) {
|
||||
next if (grep {/^$_$/} @{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}});
|
||||
}
|
||||
foreach (keys(%{$self->{'rules'}->{$self->{'id'}}->{$self->{'file'}}->{'words'}})) {
|
||||
next if (grep {/^$_$/} @{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}});
|
||||
next if (grep {/^$_$/} @words);
|
||||
push (@words, $_);
|
||||
}
|
||||
}
|
||||
foreach my $word (@words) {
|
||||
$self->{'output'}->{$outfile} .=
|
||||
"body\t__".$prefix."_".uc($word)."_BODY\t/\\b".$word."\\b/\n" .
|
||||
"header\t__".$prefix."_".uc($word)."_SUBJ\tSubject =~ /\\b".$word."\\b/\n" .
|
||||
"meta\t__".$prefix."_".uc($word)."\t( __".$prefix."_".uc($word)."_BODY || __".$prefix."_".uc($word)."_SUBJ )\n\n";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
=head2 C<$kw->processWords($outfile,%args);>
|
||||
|
||||
Take a list of words with scores and add them to C<$outfile>.
|
||||
|
||||
=cut
|
||||
|
||||
sub processWords
|
||||
{
|
||||
my $self = shift;
|
||||
my $out = shift;
|
||||
}
|
||||
|
||||
=head2 C<$kw->processGroup(%args);>
|
||||
|
||||
Take a single group, including 'GLOBAL' and add it to C<$outfile>.
|
||||
|
||||
=cut
|
||||
|
||||
sub processGroup
|
||||
{
|
||||
my $self = shift;
|
||||
my $outfile = shift;
|
||||
|
||||
for (my $i = 0; $i < scalar(@all); $i++) {
|
||||
$files->{$self->{'priority'}.'_'.$self->{'id'}.'cf'} .=
|
||||
"meta\t".$self->{'id'}."_".$i."\t( ".join(' + ',@all)." ) > $i\n" .
|
||||
"describe\tMatched ".($i+1)."of keywords: ".join(', ',@{$self->{'rules'}->{$self->{'id'}}->{'GLOBAL'}})."\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->processAll(%args);>
|
||||
|
||||
Process all groups.
|
||||
|
||||
=cut
|
||||
|
||||
sub processAll
|
||||
{
|
||||
my $self = shift;
|
||||
my %args = @_;
|
||||
|
||||
$self->{'output'} = {};
|
||||
foreach my $id (keys(%{$self->{'rules'}})) {
|
||||
$self->processMetas($self->{'priority'}.'_'.$id.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||
$self->processGroups($self->{'priority'}.'_'.$id.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||
$self->processWords($self->{'priority'}.'_'.$id.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||
foreach my $file (keys(%{$self->{'rules'}->{$id}})) {
|
||||
$self->processMetas($self->{'priority'}.'_'.$id.'_'.$file.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||
$self->processGroups($self->{'priority'}.'_'.$id.'_'.$file.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||
$self->processWords($self->{'priority'}.'_'.$id.'_'.$file.'.cf','GLOBAL',$self->{'rules'}->{$id});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
=head2 C<$kw->writeFiles($out_dir)>
|
||||
|
||||
Output files will use the name of each input file, stripping any extension, and
|
||||
forcing the name to uppercase. Rules in each file will be called:
|
||||
|
@ -282,13 +827,60 @@ body for that word.
|
|||
sub writeFiles
|
||||
{
|
||||
my $self = shift;
|
||||
my $dir = shift;
|
||||
foreach my $out (keys(%{$self->{'rules'}})) {
|
||||
unless ($self->{'joinScores'}) {
|
||||
$file =~ s/(.*)\.cf/$1_SCORES.cf/;
|
||||
print STDERR $self->{'scores'};
|
||||
}
|
||||
if (open(my $fh, '>', $out)) {
|
||||
print $fh $self->{'output'}->{$out};
|
||||
close($fh);
|
||||
} else {
|
||||
print STDERR "Failed to write $out\n";
|
||||
}
|
||||
}
|
||||
|
||||
use Data::Dump;
|
||||
print Data::Dump::dump($self->{keywords});
|
||||
return 0;
|
||||
}
|
||||
|
||||
=head1 MORE
|
||||
|
||||
For discussion of the module and examples, see:
|
||||
|
||||
E<lt>https://john.me.tz/projects/article.php?topic=Mail-SpamAssassin-KeywordRuleGenerator<gt>
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
Mail::SpamAssassin
|
||||
spamassassin
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Report issues to:
|
||||
|
||||
E<lt>https://git.john.me.tz/jpm/Mail-SpamAssassin-KeywordRuleGenerator/issuesE<gt>
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
John Mertz <git@john.me.tz>
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Mail::SpamAssassin::KeywordRuleGenerator is distributed under the Apache License
|
||||
Version 2.0, as described in this file and the file C<LICENSE> included with the
|
||||
distribution.
|
||||
|
||||
=head1 AVAILABILITY
|
||||
|
||||
If possible, the latest version of this library will be made available from CPAN
|
||||
as well as:
|
||||
|
||||
E<lt>https://git.john.me.tz/jpm/Mail-SpamAssassin-KeywordRuleGeneratorE<gt>
|
||||
|
||||
=cut
|
||||
|
||||
1;
|
||||
|
||||
=pod
|
||||
die "Please provide rules file(s) as an argument\n" unless (defined($ARGV[0]));
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
word
|
|
@ -0,0 +1,3 @@
|
|||
oneone 2 LOCAL
|
||||
onetwo 0 GLOBAL
|
||||
onethree three
|
|
@ -0,0 +1,3 @@
|
|||
twoone 2 LOCAL
|
||||
twotwo 0 GLOBAL
|
||||
twothree three
|
|
@ -0,0 +1 @@
|
|||
/home/jpm/KeywordRuleGenerator/t/02_files.cf
|
|
@ -0,0 +1,47 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# Basic test to ensure files can be found and read. Verification will come later
|
||||
|
||||
use Test::More;
|
||||
|
||||
use lib 'lib/';
|
||||
use Mail::SpamAssassin::KeywordRuleGenerator;
|
||||
|
||||
my $id = '02';
|
||||
|
||||
my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new( { 'id' => $id } );
|
||||
|
||||
# Read a single file
|
||||
my $file = 't/02_files.cf';
|
||||
ok (!$kw->readFile($file), "Run readFile on $file");
|
||||
ok ($kw->getFile($file) == $file, "Get current working file");
|
||||
ok (scalar(keys(%{$kw->{'rules'}})) == 2, "Fetched correct number of files in rules");
|
||||
ok ($kw->{'rules'}->{'50_02_T_02_FILES.cf'}, "Loaded correct output name");
|
||||
ok (!$kw->clearFiles(), "Clear 'rules' hash");
|
||||
ok (!$kw->{'rules'}, "Fetched none after clearing");
|
||||
|
||||
# Read a directory
|
||||
my $dir = 't/02_files.dir';
|
||||
ok (!$kw->readAll($dir), "Run file on dir");
|
||||
ok (scalar(keys(%{$kw->{'rules'}})) == 3, "Fetched correct number of files from dir");
|
||||
ok (!$kw->clearFiles(), "Clear dir files");
|
||||
|
||||
# Read a symlink
|
||||
my $link = 't/02_files.lnk';
|
||||
ok (!$kw->readAll($link), "Run file on link");
|
||||
ok ($kw->getFile() == $file, "Fetched correct name ($file) from link ($link)");
|
||||
ok (scalar(keys(%{$kw->{'rules'}})) == 2, "Fetched correct number of files from link");
|
||||
ok (!$kw->clearFiles(), "Clear link files");
|
||||
|
||||
# Read multiple mixed
|
||||
ok (!$kw->readAll($file, $dir, $link), "Run readAll on multiple/mixed");
|
||||
ok (scalar(keys(%{$kw->{'rules'}})) == 4, "Fetched correct number of files from all");
|
||||
ok (!$kw->clearFiles(), "Clear dir files");
|
||||
|
||||
|
||||
ok ($kw->readFile('does_not_exist'), "Correctly failed for non-existent file");
|
||||
ok (!$kw->getFile(), "Current name undefined for non-existent file");
|
||||
ok ($kw->readFile('t/02_unreadable.cf'), "Correctly failed for non-readable file");
|
||||
ok (!$kw->getFile(), "Current name undefined for non-readable file");
|
||||
|
||||
done_testing();
|
|
@ -0,0 +1,64 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# Verify file input line formatting
|
||||
|
||||
use Test::More;
|
||||
|
||||
use lib 'lib/';
|
||||
use Mail::SpamAssassin::KeywordRuleGenerator;
|
||||
|
||||
my $id = '03';
|
||||
|
||||
my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new( { 'id' => $id, 'debug' => 0 } );
|
||||
|
||||
my @good = (
|
||||
'Lorem 1 GLOBAL # comment',
|
||||
'ipsum 0 GLOBAL # comment',
|
||||
'dolor GLOBAL # comment',
|
||||
'sit 1 # comment',
|
||||
'amet 0 # comment',
|
||||
'consectetur # comment',
|
||||
'adipiscing 1 GLOBAL',
|
||||
'elit 0 GLOBAL',
|
||||
'sed GLOBAL',
|
||||
'do 1',
|
||||
'eiusmod 0',
|
||||
'tempor',
|
||||
);
|
||||
foreach my $score ( '', 0, 1 ) {
|
||||
foreach my $group ( '', 'GLOBAL', 'GLOBAL LOCAL' ) {
|
||||
foreach my $comment ( '', 'TESTING', 'LONGER COMMENT' ) {
|
||||
my $word = "word";
|
||||
my $rule = $word;
|
||||
if ($score ne '') {
|
||||
$rule .= ' '.$score;
|
||||
}
|
||||
if ($group ne '') {
|
||||
$rule .= ' '.$group;
|
||||
}
|
||||
if ($comment ne '') {
|
||||
$rule .= ' # '.$comment;
|
||||
}
|
||||
my ($rword, $rscore, $rcomment, @rgroups) = $kw->readLine($rule);
|
||||
ok($rword eq $word, "Word '$word' found for '$rule'");
|
||||
ok($rscore eq $score, "Score '$score' found for '$rule'") if ($score ne '');
|
||||
ok(join(' ',@rgroups) eq $group, "Groups '$group' found for '$rule'") if ($group ne '');
|
||||
ok(join(' ',@rgroups) eq 'LOCAL GLOBAL', "Inferred groups 'LOCAL GLOBAL' found for '$rule'") if ($group eq '');
|
||||
ok($rcomment eq $comment, "Comment '$comment' found for '$rule'") if ($comment ne '');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Check for bad formatting
|
||||
|
||||
my %bad = (
|
||||
'' => 'Ignore empty line',
|
||||
'# comment' => "Ignore line starting with comment",
|
||||
'word 1 2' => "Ignore line with multiple scores",
|
||||
'2 bad' => "Ignore line with score first",
|
||||
);
|
||||
foreach my $input (keys(%bad)) {
|
||||
ok(!$kw->readLine($input), "$bad{$input} ($input)");
|
||||
}
|
||||
|
||||
done_testing();
|
|
@ -0,0 +1,283 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# Basic test to ensure files can be found and read. Verification will come later
|
||||
|
||||
use Test::More;
|
||||
|
||||
use lib 'lib/';
|
||||
use Mail::SpamAssassin::KeywordRuleGenerator;
|
||||
|
||||
my $id = '04';
|
||||
|
||||
my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new( { 'id' => $id, 'debug' => 0 } );
|
||||
|
||||
my @files = ( 't/04_rules0.cf', 't/04_rules1.cf' );
|
||||
my @failed = @{$kw->readAll( @files )};
|
||||
ok(!scalar(@failed), "Load 'rules' hash with readAll");
|
||||
|
||||
my $expected = getExpected();
|
||||
my ( $missing, $extra, $incorrect ) = 0;
|
||||
use Data::Dump;
|
||||
foreach my $file (keys(%{$expected})) {
|
||||
if ($file eq 'GLOBAL') {
|
||||
my ($m, $e) = compareGroups(
|
||||
$expected->{$file},
|
||||
$kw->{'rules'}->{$file}
|
||||
);
|
||||
print("Missing $m words\n") if ($m);
|
||||
print("$e extra words\n") if ($e);
|
||||
$missing += $m;
|
||||
$extra += $e;
|
||||
} else {
|
||||
foreach my $group (keys(%{$expected->{$file}})) {
|
||||
if ($group eq 'SCORED') {
|
||||
my ($m, $e, $i) = compareScores(
|
||||
$expected->{$file}->{$group},
|
||||
$kw->{'rules'}->{$file}->{$group}
|
||||
);
|
||||
$missing += $m;
|
||||
$extra += $e;
|
||||
$incorrect += $i;
|
||||
} elsif ($group eq 'COMMENTS') {
|
||||
my ($m, $e, $i) = compareScores(
|
||||
$expected->{$file}->{$group},
|
||||
$kw->{'rules'}->{$file}->{$group}
|
||||
);
|
||||
$missing += $m;
|
||||
$extra += $e;
|
||||
$incorrect += $i;
|
||||
} else {
|
||||
my ($m, $e) = compareGroups(
|
||||
$expected->{$file}->{$group},
|
||||
$kw->{'rules'}->{$file}->{$group}
|
||||
);
|
||||
$missing += $m;
|
||||
$extra += $e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ok ($missing == 0, "No expected rules are missing");
|
||||
ok ($extra == 0, "No extra rules are found");
|
||||
ok ($incorrect == 0, "No incorrect scores are found");
|
||||
|
||||
done_testing();
|
||||
|
||||
sub compareGroups
|
||||
{
|
||||
my $expect = shift;
|
||||
my $loaded = shift;
|
||||
|
||||
my @e = sort(@{$expect});
|
||||
my @l = sort(@{$loaded});
|
||||
my ($missing, $extra) = (0, 0);
|
||||
|
||||
while (scalar(@e)) {
|
||||
unless (scalar(@l)) {
|
||||
print("extra words @l\n");
|
||||
$extra += scalar(@e);
|
||||
last;
|
||||
}
|
||||
if ($e[0] eq $l[0]) {
|
||||
shift(@e);
|
||||
shift(@l);
|
||||
next();
|
||||
}
|
||||
if ($e[0] lt $l[0]) {
|
||||
print("Extra word $e[0]\n");
|
||||
$extra++;
|
||||
shift(@e);
|
||||
next;
|
||||
}
|
||||
if ($e[0] gt $l[0]) {
|
||||
print("Missing word $l[0]\n");
|
||||
$missing++;
|
||||
shift(@l);
|
||||
next;
|
||||
}
|
||||
}
|
||||
if (scalar(@e)) {
|
||||
print("Missing ".scalar(@e)." at the end of parsing\n");
|
||||
$missing += scalar(@e);
|
||||
last;
|
||||
}
|
||||
|
||||
return ($missing, $extra);
|
||||
}
|
||||
|
||||
sub compareScores
|
||||
{
|
||||
my $expect = shift;
|
||||
my $loaded = shift;
|
||||
|
||||
my %remaining = %{$loaded};
|
||||
my ($missing, $extra, $incorrect) = (0, 0, 0);
|
||||
foreach my $word (keys(%$expect)) {
|
||||
if (!defined($loaded->{$word})) {
|
||||
print("Missing score assignment for $word\n");
|
||||
$missing++;
|
||||
} elsif ($expect->{$word} != $loaded->{$word}) {
|
||||
print("Incorrect score assignment for $word\n");
|
||||
$incorrect++;
|
||||
} else {
|
||||
delete($remaining{$word});
|
||||
}
|
||||
}
|
||||
$extra = scalar(keys(%remaining));
|
||||
print("Extra score assignment for $word\n") foreach (@{$extra});
|
||||
|
||||
return ($missing, $extra, $incorrect);
|
||||
}
|
||||
|
||||
sub getExpected
|
||||
{
|
||||
my %expected = (
|
||||
'GLOBAL' => [
|
||||
'lorem',
|
||||
'ipsum',
|
||||
'dolor',
|
||||
'sit',
|
||||
'amet',
|
||||
'consectetur',
|
||||
'adipiscing',
|
||||
'elit',
|
||||
'sed',
|
||||
'do',
|
||||
'eiusmod',
|
||||
'tempor',
|
||||
'minim',
|
||||
'veniam',
|
||||
'quis',
|
||||
'nostrud',
|
||||
'exercitation',
|
||||
'ullamco',
|
||||
'laboris',
|
||||
'nisi',
|
||||
'ut',
|
||||
'aliquip',
|
||||
'ex',
|
||||
'ea',
|
||||
'commodo',
|
||||
'consequat',
|
||||
'duis',
|
||||
'dolore',
|
||||
'eu',
|
||||
'fugiat',
|
||||
],
|
||||
'50_04_T_04_RULES0.cf' => {
|
||||
'SCORED' => {
|
||||
'lorem' => 1,
|
||||
'sit' => 1,
|
||||
'adipiscing' => 1,
|
||||
'do' => 1,
|
||||
'incididunt' => 1,
|
||||
'et' => 1,
|
||||
'aliqua' => 1,
|
||||
'minim' => 1,
|
||||
},
|
||||
'COMMENTS' => {
|
||||
'lorem' => '1, 1, 1, 1',
|
||||
'ipsum' => '1, 1, 1, 0',
|
||||
'dolor' => '1, 1, 1, undef',
|
||||
'sit' => '1, 1, 0, 1',
|
||||
'amet' => '1, 1, 0, 0',
|
||||
'consectetur' => '1, 1, 0, undef',
|
||||
'adipiscing' => '1, 0, 1, 1',
|
||||
'elit' => '1, 0, 1, 0',
|
||||
'sed' => '1, 0, 1, undef',
|
||||
'do' => '1, 0, 0, 1',
|
||||
'eiusmod' => '1, 0, 0, 0',
|
||||
'tempor' => '1, 0, 0, undef',
|
||||
'incididunt' => '0, 1, 1, 1',
|
||||
'ut' => '0, 1, 1, 0',
|
||||
'labore' => '0, 1, 1, undef',
|
||||
'et' => '0, 1, 0, 1',
|
||||
'dolore' => '0, 1, 0, 0',
|
||||
'magna' => '0, 1, 0, undef',
|
||||
'aliqua' => '0, 0, 1, 1',
|
||||
'enim' => '0, 0, 1, 0',
|
||||
'ad' => '0, 0, 1, undef',
|
||||
'minim' => '0, 0, 0, 1',
|
||||
'veniam' => '0, 0, 0, 0',
|
||||
'quis' => '0, 0, 0, undef',
|
||||
},
|
||||
'LOCAL' => [
|
||||
'lorem',
|
||||
'ipsum',
|
||||
'dolor',
|
||||
'sit',
|
||||
'amet',
|
||||
'consectetur',
|
||||
'incididunt',
|
||||
'ut',
|
||||
'labore',
|
||||
'et',
|
||||
'dolore',
|
||||
'magna',
|
||||
'minim',
|
||||
'veniam',
|
||||
'quis',
|
||||
],
|
||||
'group' => [
|
||||
'lorem',
|
||||
'ipsum',
|
||||
'dolor',
|
||||
'adipiscing',
|
||||
'elit',
|
||||
'sed',
|
||||
'incididunt',
|
||||
'ut',
|
||||
'labore',
|
||||
'aliqua',
|
||||
'enim',
|
||||
'ad',
|
||||
]
|
||||
},
|
||||
'50_04_T_04_RULES1.cf' => {
|
||||
'SCORED' => {
|
||||
'nostrud' => 1,
|
||||
'laboris' => 1,
|
||||
'aliquip' => 1,
|
||||
'consequat' => 1,
|
||||
'aute' => 1,
|
||||
'in' => 1,
|
||||
'velit' => 1,
|
||||
'dolore' => 1,
|
||||
},
|
||||
'LOCAL' => [
|
||||
'nostrud',
|
||||
'exercitation',
|
||||
'ullamco',
|
||||
'laboris',
|
||||
'nisi',
|
||||
'ut',
|
||||
'aute',
|
||||
'irure',
|
||||
'dolor',
|
||||
'in',
|
||||
'reprehenderit',
|
||||
'volupatate',
|
||||
'dolore',
|
||||
'eu',
|
||||
'fugiat',
|
||||
],
|
||||
'group' => [
|
||||
'nostrud',
|
||||
'exercitation',
|
||||
'ullamco',
|
||||
'aliquip',
|
||||
'ex',
|
||||
'ea',
|
||||
'aute',
|
||||
'irure',
|
||||
'dolor',
|
||||
'velit',
|
||||
'esse',
|
||||
'cillum',
|
||||
]
|
||||
}
|
||||
);
|
||||
|
||||
return \%expected;
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
# Comments to be ignored. As with following blank line.
|
||||
# Must test every combination of:
|
||||
# GLOBAL, LOCAL, GROUP, SCORE
|
||||
|
||||
Lorem 1 GLOBAL LOCAL group # 1, 1, 1, 1
|
||||
ipsum 0 GLOBAL LOCAL group # 1, 1, 1, 0
|
||||
dolor GLOBAL LOCAL group # 1, 1, 1, undef
|
||||
sit 1 GLOBAL LOCAL # 1, 1, 0, 1
|
||||
amet 0 GLOBAL LOCAL # 1, 1, 0, 0
|
||||
consectetur GLOBAL LOCAL # 1, 1, 0, undef
|
||||
adipiscing 1 GLOBAL group # 1, 0, 1, 1
|
||||
elit 0 GLOBAL group # 1, 0, 1, 0
|
||||
sed GLOBAL group # 1, 0, 1, undef
|
||||
do 1 GLOBAL # 1, 0, 0, 1
|
||||
eiusmod 0 GLOBAL # 1, 0, 0, 0
|
||||
tempor GLOBAL # 1, 0, 0, undef
|
||||
incididunt 1 LOCAL group # 0, 1, 1, 1
|
||||
ut 0 LOCAL group # 0, 1, 1, 0
|
||||
labore LOCAL group # 0, 1, 1, undef
|
||||
et 1 LOCAL # 0, 1, 0, 1
|
||||
dolore 0 LOCAL # 0, 1, 0, 0
|
||||
magna LOCAL # 0, 1, 0, undef
|
||||
aliqua 1 group # 0, 0, 1, 1
|
||||
enim 0 group # 0, 0, 1, 0
|
||||
ad group # 0, 0, 1, undef
|
||||
minim 1 # 0, 0, 0, 1
|
||||
veniam 0 # 0, 0, 0, 0
|
||||
quis # 0, 0, 0, undef
|
|
@ -0,0 +1,24 @@
|
|||
nostrud 1 GLOBAL LOCAL group
|
||||
exercitation 0 GLOBAL LOCAL group
|
||||
ullamco GLOBAL LOCAL group
|
||||
laboris 1 GLOBAL LOCAL
|
||||
nisi 0 GLOBAL LOCAL
|
||||
ut GLOBAL LOCAL
|
||||
aliquip 1 GLOBAL group
|
||||
ex 0 GLOBAL group
|
||||
ea GLOBAL group
|
||||
commodo 0 GLOBAL
|
||||
consequat 1 GLOBAL
|
||||
Duis GLOBAL
|
||||
aute 1 LOCAL group
|
||||
irure 0 LOCAL group
|
||||
dolor LOCAL group
|
||||
in 1 LOCAL
|
||||
reprehenderit 0 LOCAL
|
||||
volupatate LOCAL
|
||||
velit 1 group
|
||||
esse 0 group
|
||||
cillum group
|
||||
dolore 1
|
||||
eu 0
|
||||
fugiat
|
Loading…
Reference in New Issue