diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..b32914e --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,93 @@ +use strict; +use warnings; + +use 5.006; +use ExtUtils::MakeMaker; + +#if ( $^O eq 'MSWin32' ) { + #die "AnyEvent::Sway cannot be used on win32 (unix sockets are missing)"; +#} + +my %meta = ( + name => 'Mail-SpamAssassin-KeywordRuleGenerator', + author => 'John Mertz, C<< >>', + license => ['apache_2_0'], + 'meta-spec' => { version => 2 }, + resources => { + repository => { + url => 'git://git.john.me.tz:233/jpm/Mail-SpamAssassin-KeywordRuleGenerator', + web => 'https://git.john.me.tz/jpm/Mail-SpamAssassin-KeywordRuleGenerator', + type => 'git', + }, + bugtracker => { + web => 'https://git.john.me.tz/jpm/Mail-SpamAssassin-KeywordRuleGenerator/issues', + }, + homepage => 'https://john.me.tz/projects/article.php?topic=Mail-SpamAssassin-KeywordRuleGenerator', + license => ['https://www.apache.org/licenses/LICENSE-2.0.html'], + }, +); + +my %requirements = ( + configure_requires => { + 'ExtUtils::MakeMaker' => 6.36, + }, + build_requires => { + 'ExtUtils::MakeMaker' => 6.36 + }, + runtime_requires => { + 'Mail::SpamAssassin' => 0, + }, + test_requires => { + 'Test::More' => 0.80, + }, +); + +my %merged_requirements = ( + 'Mail::SpamAssassin' => 0, + 'Test::More' => 0.80, +); + +$meta{prereqs}{configure}{requires} = $requirements{configure_requires}; +$meta{prereqs}{build}{requires} = $requirements{build_requires}; +$meta{prereqs}{runtime}{requires} = $requirements{runtime_requires}; +$meta{prereqs}{test}{requires} = $requirements{test_requires}; + +my %MM_Args = ( + AUTHOR => 'John Mertz', + NAME => 'Mail::SpamAssassin::KeywordRuleGenerator', + DISTNAME => 'Mail-SpamAssassin-KeywordRuleGenerator', + EXE_FILES => [], + MIN_PERL_VERSION => '5.006', + VERSION_FROM => 'lib/Mail/SpamAssassin/KeywordRuleGenerator.pm', + ABSTRACT_FROM => 'lib/Mail/SpamAssassin/KeywordRuleGenerator.pm', + test => { + TESTS => 't/*.t', + }, +); + +sub is_eumm { + eval { ExtUtils::MakeMaker->VERSION( $_[0] ) }; +} + +is_eumm(6.30) and $MM_Args{LICENSE} = $meta{license}[0]; +is_eumm(6.47_01) or delete $MM_Args{MIN_PERL_VERSION}; +is_eumm(6.52) + and $MM_Args{CONFIGURE_REQUIRES} = $requirements{configure_requires}; + +is_eumm(6.57_02) and !is_eumm(6.57_07) and $MM_Args{NO_MYMETA} = 1; + +if ( is_eumm(6.63_03) ) { + %MM_Args = ( + %MM_Args, + TEST_REQUIRES => $requirements{test_requires}, + BUILD_REQUIRES => $requirements{build_requires}, + PREREQ_PM => $requirements{runtime_requires}, + ); +} +else { + $MM_Args{PREREQ_PM} = \%merged_requirements; +} +unless ( -f 'META.yml' ) { + $MM_Args{META_ADD} = \%meta; +} +WriteMakefile(%MM_Args); diff --git a/bin/generate-keyword-rules b/bin/generate-keyword-rules new file mode 100755 index 0000000..6f5e506 --- /dev/null +++ b/bin/generate-keyword-rules @@ -0,0 +1,34 @@ +#!/usr/bin/perl + +# Create a directory called 'output' with generated files for all config files in the $PWD + +use lib '../lib/'; +use Mail::SpamAssassin::KeywordRuleGenerator; + +use strict; +use warnings; + +my $id = 'MC'; +my $dir = 'output'; + +# setup +my $kw = Mail::SpamAssassin::KeywordRuleGenerator->new( { 'id' =>$id, 'debug' => 0, 'joinScores' => 0 , 'dir' => $dir } ); + +# clean +unlink(glob("$dir/*")); +rmdir($dir); + +# get files in PWD +my @files = glob("./*"); +my @clean; + +$kw->createDir($dir); + +my @failed = $kw->readAll( @files ); +die scalar(@failed)." error(s) - ".join(', ', @failed)."\n" if (scalar(@failed)); +$kw->writeAll(); + +use Mail::SpamAssassin; +my $sa = Mail::SpamAssassin->new( { 'site_rules_filename' => "./$dir", 'pre_config_text' => "loadplugin Mail::SpamAssassin::Plugin::Check" } ); +my $fail = $sa->lint_rules(); +die "Failed to verify with SpamAssassin: $fail" if ($fail); diff --git a/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm b/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm index 4144db2..7264403 100644 --- a/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm +++ b/lib/Mail/SpamAssassin/KeywordRuleGenerator.pm @@ -48,20 +48,20 @@ The sets of keywords can be broken up into groups (see GROUPS). Requires C executable and the following Perl modules - To::Be::Determined + Mail::SpamAssassin =cut -#use strict; +use strict; use warnings; =head2 FILES -There are built-in functions to ingest formatted list files. See C +There are built-in functions to ingest formatted list files. See C method. By default, the output file name and the rules therein will use a stripped and capitalized version of those filenames. - $kw->readFiles( 'example.txt' ); + $kw->readAll( 'example.cf', 'example.txt' ); $kw->writeAll(); This will creates rules formatted like: @@ -70,20 +70,44 @@ This will creates rules formatted like: and will output to the file: - 70_id_example.cf + 70_ID_EXAMPLE.cf + +Leading './' or $PWD will be stripped, but other paths will be included with '_' +substituted '/'. For example: + + $kw->readAll ( './example.cf', '/to/pwd/example2.cf', 'dir/example3.cf' ); + +will produce: + + 70_KW_EXAMPLE.cf + 70_KW_EXAMPLE2.cf + 70_KW_DIR_EXAMPLE3.cf + +If there are any rules in the 'GLOBAL' group, or if in C mode, +a file with just the priority and keyword will be produced with the select rules +or all rules, respectively. This will look like: + + 70_KW.cf See the C methods for more information on this formatting. Also see the C method for discussion of the 'id'. -Finally, a like file: +Finally, unless in C mode, each configuration will have a matching +scores file like: - 71_id_scores.cf + 70_KW_EXAMPLE_SCORES.cf + 70_KW_SCORES.cf -Will be created with the scores for all of the rules in the prior file(s). The -C variable is true by default, creating the above file. If made -false, then will determine a unique score file will be created for each file. -Alternatively, C can be set to include the scores directly in the -config file with the rule definitions. +In C mode, the scores will be directly appended in the same config +file with the rule definitions. + +Note that the global files may have an incremented priority since it requires +all meta rules from the other files be defined first. For example, the above +would be left with 70 for the global priority, however if the C were lower +in the alphabet, the global files would be incremented: + + 70_AB_EXAMPLES.cf + 71_AB.cf =head2 RULES @@ -91,22 +115,22 @@ Two types of rules are created. One is a set of standalone keyword rules when a score is provided for those words. This will create a meta rule for a simple match in either the headers or body -header __ID_FILE_WORD_H /\bword\b/i -body __ID_FILE_WORD_B /\bword\b/i -meta __ID_FILE_WORD ( __ID_FILE_WORD_H || __ID_FILE_WORD_B ) -meta ID_FILE_WORD __ID_FILE_WORD -describe ID_FILE_WORD Keyword 'word' found -score ID_FILE_WORD 1 +header __ID_FILE_WORD_H /\bword\b/i +body __ID_FILE_WORD_B /\bword\b/i +meta __ID_FILE_WORD ( __ID_FILE_WORD_H || __ID_FILE_WORD_B ) +meta ID_FILE_WORD __ID_FILE_WORD +describe ID_FILE_WORD Keyword 'word' found +score ID_FILE_WORD 1 The other is a set of counters for each group. These will add the same first three component rules (or co-opt the ones already created for the standalone rules). It will then add a rule for each number of possible matches within that group: -meta ID_FILE_GROUP_1 ( __ID_FILE_WORD + __ID_FILE_OTHER ) > 0 -describe ID_FILE_GROUP_1 1 match in keyword group 'GROUP' -meta ID_FILE_GROUP_2 ( __ID_FILE_WORD + __ID_FILE_OTHER ) > 1 -describe ID_FILE_GROUP_2 2 matches in keyword group 'GROUP' +meta ID_FILE_GROUP_1 ( __ID_FILE_WORD + __ID_FILE_OTHER ) > 0 +describe ID_FILE_GROUP_1 1 match in keyword group 'GROUP' +meta ID_FILE_GROUP_2 ( __ID_FILE_WORD + __ID_FILE_OTHER ) > 1 +describe ID_FILE_GROUP_2 2 matches in keyword group 'GROUP' =head1 VERSION @@ -126,40 +150,40 @@ use base 'Exporter'; our @EXPORT = qw( new ); our %EXPORT_TAGS = ( - 'all' => [ - 'new', - 'getId', - 'setId', - 'getPriority', - 'setPriority', - 'getOutfile', - 'setOutfile', - 'getScoresOutfile', - 'setScoresOutfile', - 'getGlobalOutfile', - 'setGlobalOutfile', - 'getGlobalScoresOutfile', - 'setGlobalScoresOutfile', - 'getOutput', - 'getScoresOutput', - 'getGlobalOutput', - 'getGlobalScoresOutput', - 'clearFile', - 'getFiles', - 'nextFile', - 'readFile', - 'getFile', - 'setFile', - 'createDir', - 'cleanDir', - 'generateMetas', - 'generateScored', - 'generateGroups', - 'generateGlobals', - 'generateAll', - 'writeFile', - 'writeAll' - ] + 'all' => [ + 'new', + 'getId', + 'setId', + 'getPriority', + 'setPriority', + 'getOutfile', + 'setOutfile', + 'getScoresOutfile', + 'setScoresOutfile', + 'getGlobalOutfile', + 'setGlobalOutfile', + 'getGlobalScoresOutfile', + 'setGlobalScoresOutfile', + 'getOutput', + 'getScoresOutput', + 'getGlobalOutput', + 'getGlobalScoresOutput', + 'clearFile', + 'nextFile', + 'readFile', + 'getFile', + 'setFile', + 'createDir', + 'cleanDir', + 'generateMetas', + 'generateScored', + 'generateGroups', + 'generateGlobals', + 'generateAll', + 'writeFile', + 'writeAll', + 'verifyOutput' + ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } ); @@ -190,8 +214,8 @@ the object. See various 'get' and 'set' functions. sub new { - my ($class, $args) = @_; - my $self = $args; + my ($class, $args) = @_; + my $self = $args; =head3 Initial attributes @@ -202,63 +226,63 @@ the built-in functions (other than their associated getters) are: =head3 id id scalar - A short identifier which will be appending to all output files to allow - for easier recognition of the files' sources. Default: 'KW' + A short identifier which will be appending to all output files to allow + for easier recognition of the files' sources. Default: 'KW' =cut - $self->{'id'} //= 'KW'; + $self->{'id'} //= 'KW'; =head3 priority priority scalar - Typically a number used to define the priority of the rules. - SpamAssassin will read configuration files in alphabetical order and the - last iteration of a configuration for the same rule will be used. This - means that the files read last will overwrite those read earlier. By - convention the number is used for easy sorting. Any leading aphabetical - character will be ordered after all numbers, and so given high priority. - Default: '50' + Typically a number used to define the priority of the rules. + SpamAssassin will read configuration files in alphabetical order and the + last iteration of a configuration for the same rule will be used. This + means that the files read last will overwrite those read earlier. By + convention the number is used for easy sorting. Any leading aphabetical + character will be ordered after all numbers, and so given high priority. + Default: '50' =cut - $self->{'priority'} //= 50; + $self->{'priority'} //= 50; =head3 debug debug boolean - Enable (1) or disable (0) debugging output. Default: 0 + Enable (1) or disable (0) debugging output. Default: 0 =cut - $self->{'debug'} //= 0; + $self->{'debug'} //= 0; =head3 singleOutfile singleOutfile boolean - Indicates whether output rules should all be added to a single file (1), - or to one file per input file (0). Note that this still requires - 'joinScores' to have one file total. Default: 0 + Indicates whether output rules should all be added to a single file (1), + or to one file per input file (0). Note that this still requires + 'joinScores' to have one file total. Default: 0 =cut - $self->{'singleOutfile'} //= 0; + $self->{'singleOutfile'} //= 0; =head3 joinScores joinScores bootlean - Indicates whether to include the scores in the same file as their - associated rule definitions (1) or in a second file on their own (0). - The second file will simply append '_SCORES' to the file name (prior to - the '.cf'), unless overridden by C<$kw->setScoreFile($path)> or a second - path argument in C<$kw->setOutfile($rulePath, $scorePath)>. Default: 1 + Indicates whether to include the scores in the same file as their + associated rule definitions (1) or in a second file on their own (0). + The second file will simply append '_SCORES' to the file name (prior to + the '.cf'), unless overridden by C<$kw->setScoreFile($path)> or a second + path argument in C<$kw->setOutfile($rulePath, $scorePath)>. Default: 1 =cut - $self->{'joinScores'} //= 1; + $self->{'joinScores'} //= 1; - bless $self, $class; - return $self; + bless $self, $class; + return $self; } =head2 C<$kw->getId()> @@ -269,11 +293,11 @@ Getter for C<$kw->{'id'}>. 'id' is used for top-level rule names. sub getId { - my $self = shift; + my $self = shift; - if (defined($self->{'id'})) { - return $self->{'id'}; - } + if (defined($self->{'id'})) { + return $self->{'id'}; + } } =head2 C<$kw->setId()> @@ -284,16 +308,16 @@ Setter for C<$kw->{'id'}> sub setId { - my $self = shift; - my $id = shift; + my $self = shift; + my $id = shift; - if (defined($id)) { - $id = uc($id); - return "Must start with a letter" unless ($id =~ m/^[A-Z]/); - $self->{'id'} = $id || return "Failed to set $id"; - } else { - return "No ID provided\n"; - } + if (defined($id)) { + $id = uc($id); + return "Must start with a letter" unless ($id =~ m/^[A-Z]/); + $self->{'id'} = $id || return "Failed to set $id"; + } else { + return "No ID provided\n"; + } } =head2 C<$kw->getPriority()>. @@ -307,11 +331,11 @@ at the end. sub getPriority { - my $self = shift; + my $self = shift; - if (defined($self->{'priority'})) { - return $self->{'priority'}; - } + if (defined($self->{'priority'})) { + return $self->{'priority'}; + } } =head2 C<$kw->setPriority()> @@ -322,15 +346,14 @@ Setter for C<$kw->{'priority'}>. sub setPriority { - my $self = shift; - my $priority = shift; + my $self = shift; + my $priority = shift; - if (defined($priority)) { - $path = uc($priority); - $self->{'priority'} = $priority || return "Failed to set priority: $priority"; - } else { - return "No 'priority' provided\n"; - } + if (defined($priority)) { + $self->{'priority'} = $priority || return "Failed to set priority: $priority"; + } else { + return "No 'priority' provided\n"; + } } =head2 C<$kw->getFile()> @@ -342,12 +365,12 @@ processed. Not to be confused with C or C. sub getFile { - my $self = shift; + my $self = shift; - if (defined($self->{'file'})) { - return $self->{'file'}; - } - return undef; + if (defined($self->{'file'})) { + return $self->{'file'}; + } + return undef; } =head2 C<$kw->setFile()> @@ -358,16 +381,16 @@ Setter for C<$kw->{'file'}>. sub setFile { - my $self = shift; - my $file = shift; + my $self = shift; + my $file = shift; - if (defined($file)) { - $self->{'file'} = $file; - $self->setOutfile(); - $self->setScoresOutfile(); - } else { - delete($self->{'file'}); - } + if (defined($file)) { + $self->{'file'} = $file; + $self->setOutfile(); + $self->setScoresOutfile(); + } else { + delete($self->{'file'}); + } } =head2 C<$kw->getOutfile()> @@ -381,12 +404,12 @@ if available). sub getOutfile { - my $self = shift; - my $file = shift || $self->{'file'}; + my $self = shift; + my $file = shift || $self->{'file'}; - if (defined($self->{'filemap'}->{$file})) { - return $self->{'outfile'}; - } + if (defined($self->{'filemap'}->{$file})) { + return $self->{'outfile'}; + } } =head2 C<$kw->setOutfile()> @@ -398,28 +421,28 @@ otherwise the path is constructed from the existing attributes. sub setOutfile { - my $self = shift; - my $path = shift; + my $self = shift; + my $path = shift; - if (defined($path)) { - $self->{'filemap'}->{$self->{'file'}} = $path || return "Failed to set $path"; + if (defined($path)) { + $self->{'filemap'}->{$self->{'file'}} = $path || return "Failed to set $path"; + } else { + if ($self->{'singleOutfile'}) { + $self->{'filemap'}->{$self->{'file'}} = $self->{'dir'}."/".$self->{'priority'} . + '_' . uc($self->{'id'}) . + '.cf'; } else { - if ($self->{'singleOutfile'}) { - $self->{'filemap'}->{$self->{'file'}} = $self->{'dir'}."/".$self->{'priority'} . - '_' . uc($self->{'id'}) . - '.cf'; - } else { - my $file = $self->{'file'}; - $file =~ s/\//_/g; # Change dir slashes to _ - $file =~ s/(\.[^\.]*)$//g; # Remove extensions - $file = uc($file); # Convert to uppercase for rule names + my $file = $self->{'file'}; + $file =~ s/\//_/g; # Change dir slashes to _ + $file =~ s/(\.[^\.]*)$//g; # Remove extensions + $file = uc($file); # Convert to uppercase for rule names - $self->{'filemap'}->{$self->{'file'}} = $self->{'dir'}."/".$self->{'priority'} . - '_' . uc($self->{'id'}) . - '_' . $file . - '.cf'; - } + $self->{'filemap'}->{$self->{'file'}} = $self->{'dir'}."/".$self->{'priority'} . + '_' . uc($self->{'id'}) . + '_' . $file . + '.cf'; } + } } =head2 C<$kw->getScoresOutfile()> @@ -430,11 +453,11 @@ Getter for the output path for scores for the current C<$kw->{'file'}>. sub getScoresOutfile { - my $self = shift; - my $file = shift || $self->{'file'}; + my $self = shift; + my $file = shift || $self->{'file'}; - $self->setScoresOutfile() unless (defined($self->{'filemap'}->{$file."_SCORES"})); - return $self->{'filemap'}->{$file."_SCORES"}; + $self->setScoresOutfile() unless (defined($self->{'filemap'}->{$file."_SCORES"})); + return $self->{'filemap'}->{$file."_SCORES"}; } =head2 C<$kw->setScoresOutfile()> @@ -447,38 +470,38 @@ from the existing attributes. sub setScoresOutfile { - my $self = shift; - my $path = shift; + my $self = shift; + my $path = shift; - if (defined($path)) { - $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $path || return "Failed to set $path"; - return undef; - } - if ($self->{'joinScores'}) { - if ($self->{'singleOutfile'}) { - $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); - $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{'GLOBAL'}; - } else { - $self->setOutfile() unless (defined($self->{'filemap'}->{$self->{'file'}})); - $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{$self->{'file'}}; - } - return undef; + if (defined($path)) { + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $path || return "Failed to set $path"; + return undef; + } + if ($self->{'joinScores'}) { + if ($self->{'singleOutfile'}) { + $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{'GLOBAL'}; } else { - if ($self->{'singleOutfile'}) { - $self->setGlobalScoresOutfile() unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); - $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{'GLOBAL_SCORES'}; - return undef; - } + $self->setOutfile() unless (defined($self->{'filemap'}->{$self->{'file'}})); + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{$self->{'file'}}; } - my $file = $self->{'file'}; - $file =~ s/\//_/g; # Change dir slashes to _ - $file =~ s/(\.[^\.]*)$//g; # Remove extensions - $file = uc($file); # Convert to uppercase for rule names - $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'dir'}."/".$self->{'priority'} . - '_' . uc($self->{'id'}) . - '_' . $file . - '_SCORES' . - '.cf'; + return undef; + } else { + if ($self->{'singleOutfile'}) { + $self->setGlobalScoresOutfile() unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'filemap'}->{'GLOBAL_SCORES'}; + return undef; + } + } + my $file = $self->{'file'}; + $file =~ s/\//_/g; # Change dir slashes to _ + $file =~ s/(\.[^\.]*)$//g; # Remove extensions + $file = uc($file); # Convert to uppercase for rule names + $self->{'filemap'}->{$self->{'file'}."_SCORES"} = $self->{'dir'}."/".$self->{'priority'} . + '_' . uc($self->{'id'}) . + '_' . $file . + '_SCORES' . + '.cf'; } =head2 C<$kw->getGlobalOutfile();> @@ -491,10 +514,10 @@ fails, then nothing will be returned. sub getGlobalOutfile { - my $self = shift; - return $self->{'filemap'}->{'GLOBAL'} if (defined($self->{'filemap'}->{'GLOBAL'})); - my $ret = $self->setGlobalOutfile(); - return $self->{'filemap'}->{'GLOBAL'} unless ($ret); + my $self = shift; + return $self->{'filemap'}->{'GLOBAL'} if (defined($self->{'filemap'}->{'GLOBAL'})); + my $ret = $self->setGlobalOutfile(); + return $self->{'filemap'}->{'GLOBAL'} unless ($ret); } =head2 C<$kw->setGlobalOutfile($file);> @@ -504,8 +527,8 @@ alphabetically after the last file with 'meta' rules. C<$file> can be used to bypass that check, but it might lead to rules that do not work. Will try to select a filename as close to the existing output files as possible. -First it will simply duplicate a name if it is in C mode. Then, -it will try to use the base name without the C<$file> portion. This will +First it will simply duplicate a name if it is in C mode. Then, +it will try to use the base name without the C<$file> portion. This will generally not work because the '_' before the file will come after the dot without: @@ -524,36 +547,36 @@ If none of these techniques work, it will return an error. sub setGlobalOutfile { - my $self = shift; - my $path = shift; + my $self = shift; + my $path = shift; - if (defined($path)) { - $self->{'filename'}->{'GLOBAL'} = $self->{'filemap'}->{(keys(%{$self->{'filemap'}}))[0]}; - return undef; - } - if ($self->{'singleOutfile'}) { - $self->{'filename'}->{'GLOBAL'} = $self->{'filemap'}->{(keys(%{$self->{'filemap'}}))[0]}; - return undef; - } - my $last; - foreach my $file (keys(%{$self->{'filemap'}})) { - $last = $file if (!defined($last) || $file gt $last); - } - my $file = $self->getDir().'/'.$self->getPriority()."_".$self->getId().".cf"; - if (defined($last) && $file gt $last) { - $self->{'filename'}->{'GLOBAL'} = $file; - return undef; - } - if ($self->getPriority < 99) { - $self->{'filemap'}->{'GLOBAL'} = $self->getDir().'/'.($self->getPriority()+1)."_".$self->getId().".cf"; - return undef; - } - $file =~ s/_/__/ unless ($file gt $last); - if ($file gt $last) { - $self->{'filename'}->{'GLOBAL'} = $file; - return undef; - } - return ("Cannot determine a valid GLOBAL output file\n"); + if (defined($path)) { + $self->{'filemap'}->{'GLOBAL'} = $self->{'filemap'}->{(keys(%{$self->{'filemap'}}))[0]}; + return undef; + } + if ($self->{'singleOutfile'}) { + $self->{'filemap'}->{'GLOBAL'} = $self->{'filemap'}->{(keys(%{$self->{'filemap'}}))[0]}; + return undef; + } + my $last; + foreach my $file (keys(%{$self->{'filemap'}})) { + $last = $file if (!defined($last) || $file gt $last); + } + my $file = $self->getDir().'/'.$self->getPriority()."_".$self->getId().".cf"; + if (defined($last) && $file gt $last) { + $self->{'filemap'}->{'GLOBAL'} = $file; + return undef; + } + if ($self->getPriority < 99) { + $self->{'filemap'}->{'GLOBAL'} = $self->getDir().'/'.($self->getPriority()+1)."_".$self->getId().".cf"; + return undef; + } + $file =~ s/_/__/ unless ($file gt $last); + if ($file gt $last) { + $self->{'filemap'}->{'GLOBAL'} = $file; + return undef; + } + return ("Cannot determine a valid GLOBAL output file\n"); } =head2 C<$kw->getGlobalScoresOutfile();> @@ -565,10 +588,10 @@ fails, then nothing will be returned. =cut sub getGlobalScoresOutfile { - my $self = shift; - return $self->{'filemap'}->{'GLOBAL_SCORES'} if (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); - my $ret = $self->setGlobalScoresOutfile(); - return $self->{'filemap'}->{'GLOBAL_SCORES'} unless ($ret); + my $self = shift; + return $self->{'filemap'}->{'GLOBAL_SCORES'} if (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); + my $ret = $self->setGlobalScoresOutfile(); + return $self->{'filemap'}->{'GLOBAL_SCORES'} unless ($ret); } =head2 C<$kw->setGlobalScoresOutfile($file);> @@ -588,46 +611,46 @@ techniques work, it will return an error. sub setGlobalScoresOutfile { - my $self = shift; - my $path = shift; + my $self = shift; + my $path = shift; - if (defined($path)) { - $self->{'filemap'}->{'GLOBAL_SCORES'} = $path; - return undef; - } - $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); - if ($self->{'joinScores'}) { - $self->{'filemap'}->{'GLOBAL_SCORES'} = $self->{'filemap'}->{'GLOBAL'}; - return undef; - } - if (defined($self->{'filemap'}->{'GLOBAL_SCORES'}) && $self->{'filemap'}->{'GLOBAL_SCORES'} gt $self->{'filemap'}->{'GLOBAL'}) { - return undef; - } - my $file = $self->{'filemap'}->{'GLOBAL'}; - $file =~ s/\.cf$/_SCORES.cf/; - if ($file gt $self->{'filemap'}->{'GLOBAL'}) { - $self->{'filemap'}->{'GLOBAL_SCORES'} = $file; - return undef; - } - $file = $self->getDir().'/'.$self->getPriority()."_".$self->getId()."_SCORES.cf"; - if ($file gt $self->{'filemap'}->{'GLOBAL'}) { - $self->{'filename'}->{'GLOBAL_SCORES'} = $file; - return undef; - } - my $gpriority = $self->{'filename'}->{'GLOBAL'}; - $gpriority =~ s/^(\d\d).*/$1/; - $file = $self->getDir().'/'.($gpriority+1)."_".$self->getId()."_SCORES.cf" if ($gpriority < 99); - if ($file gt $self->{'filemap'}->{'GLOBAL'}) { - $self->{'filename'}->{'GLOBAL_SCORES'} = $file; - return undef; - } - $file = $self->{'filename'}->{'GLOBAL'}; - $file =~ s/_/__/; - if ($file gt $self->{'filemap'}->{'GLOBAL'}) { - $self->{'filename'}->{'GLOBAL_SCORES'} = $file; - return undef; - } - return ("Cannot determine a valid GLOBAL output file\n"); + if (defined($path)) { + $self->{'filemap'}->{'GLOBAL_SCORES'} = $path; + return undef; + } + $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); + if ($self->{'joinScores'}) { + $self->{'filemap'}->{'GLOBAL_SCORES'} = $self->{'filemap'}->{'GLOBAL'}; + return undef; + } + if (defined($self->{'filemap'}->{'GLOBAL_SCORES'}) && $self->{'filemap'}->{'GLOBAL_SCORES'} gt $self->{'filemap'}->{'GLOBAL'}) { + return undef; + } + my $file = $self->getGlobalOutfile() || die "HUH?"; + $file =~ s/\.cf$/_SCORES.cf/; + if ($file gt $self->{'filemap'}->{'GLOBAL'}) { + $self->{'filemap'}->{'GLOBAL_SCORES'} = $file; + return undef; + } + $file = $self->getDir().'/'.$self->getPriority()."_".$self->getId()."_SCORES.cf"; + if ($file gt $self->{'filemap'}->{'GLOBAL'}) { + $self->{'filemap'}->{'GLOBAL_SCORES'} = $file; + return undef; + } + my $gpriority = $self->{'filemap'}->{'GLOBAL'}; + $gpriority =~ s/^(\d\d).*/$1/; + $file = $self->getDir().'/'.($gpriority+1)."_".$self->getId()."_SCORES.cf" if ($gpriority < 99); + if ($file gt $self->{'filemap'}->{'GLOBAL'}) { + $self->{'filemap'}->{'GLOBAL_SCORES'} = $file; + return undef; + } + $file = $self->{'filemap'}->{'GLOBAL'}; + $file =~ s/_/__/; + if ($file gt $self->{'filemap'}->{'GLOBAL'}) { + $self->{'filemap'}->{'GLOBAL_SCORES'} = $file; + return undef; + } + return ("Cannot determine a valid GLOBAL output file\n"); } =head2 C<$kw->getOutput()> @@ -638,14 +661,14 @@ Returns a reference to the output array buffer for the current C<$file>. sub getOutput { - my $self = shift; - my $file = shift || $self->{'file'}; + my $self = shift; + my $file = shift || $self->{'file'}; - if ($self->{'singleOutfile'}) { - return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; - } elsif (defined($self->{'filemap'}->{$file})) { - return \@{$self->{'output'}->{$self->{'filemap'}->{$file}}}; - } + if ($self->{'singleOutfile'}) { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; + } elsif (defined($self->{'filemap'}->{$file})) { + return \@{$self->{'output'}->{$self->{'filemap'}->{$file}}}; + } } =head2 C<$kw->getScoresOutput()> @@ -656,23 +679,23 @@ Returns a reference to the score output array buffer for the current C<$file>. sub getScoresOutput { - my $self = shift; - my $file = shift || $self->{'file'}; + my $self = shift; + my $file = shift || $self->{'file'}; - $self->setScoreOutfile() unless (defined($self->{'filemap'}->{$file."_SCORES"})); - if ($self->{'joinScores'}) { - if ($self->{'singleOutfile'}) { - return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; - } elsif (defined($self->{'filemap'}->{$file})) { - return \@{$self->{'output'}->{$self->{'filemap'}->{$file}}}; - } - } else { - if ($self->{'singleOutfile'}) { - return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL_SCORES'}}}; - } elsif (defined($self->{'filemap'}->{$file})) { - return \@{$self->{'output'}->{$self->{'filemap'}->{$file."_SCORES"}}}; - } + $self->setScoreOutfile() unless (defined($self->{'filemap'}->{$file."_SCORES"})); + if ($self->{'joinScores'}) { + if ($self->{'singleOutfile'}) { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; + } elsif (defined($self->{'filemap'}->{$file})) { + return \@{$self->{'output'}->{$self->{'filemap'}->{$file}}}; } + } else { + if ($self->{'singleOutfile'}) { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL_SCORES'}}}; + } elsif (defined($self->{'filemap'}->{$file})) { + return \@{$self->{'output'}->{$self->{'filemap'}->{$file."_SCORES"}}}; + } + } } =head2 C<$kw->getGlobalOutput()> @@ -683,10 +706,10 @@ Returns a reference to the output array buffer for the GLOBAL file. sub getGlobalOutput { - my $self = shift; + my $self = shift; - $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); - return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; + $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; } =head2 C<$kw->getGlobalScoresOutput()> @@ -697,73 +720,14 @@ Returns a reference to the output array buffer for the GLOBAL scores file. sub getGlobalScoresOutput { - my $self = shift; + my $self = shift; - $self->setGlobalScoresOutfile unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); - if ($self->{'joinScores'}) { - return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; - } else { - return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL_SCORES'}}}; - } -} - -=head2 C<$kw->getFiles($regex);> - -Simple recursive search for files within a directory. Will validate that each -file is readable and return an array of file names. - -Expects a single file or directory path scalar as first argument and an optional -regex as the secord. If you have multiple entries to fetch, run separately and -append to your array. - -The regex will be used as a file filter and will only return files that match. - -=cut - -sub getFiles -{ - my $self = shift; - my $regex = shift; - - my $return = ''; - foreach (@args) { - $return .= "$_ does not exist\n" unless (-e "$_" || -l "$_"); - if (-l $_) { - $self->getFiles(readlink($_)); - } elsif (-d $_) { - my @recursive = glob($_."/*"); - $self->getFiles(@recursive); - } else { - if (defined($regex)) { - if ($_ =~ $regex) { - push(@{$self->{'files_ref'}}, $_); - } else { - next; - } - } else { - $return .= "$_ is not readable\n" unless (-r "$_"); - push(@{$self->{'files_ref'}}, $_); - } - } - } - return $return; -} - -=head2 C<$kw->nextFile();> - -Shift next message in files_ref queue to current. - -=cut - -sub nextFile -{ - my $self = shift; - if (scalar(@{$self->{'files_ref'}})) { - $self->{'path'} = shift(@{$self->{'files_ref'}}); - } else { - return "End of array\n"; - } - return 0; + $self->setGlobalScoresOutfile unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); + if ($self->{'joinScores'}) { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL'}}}; + } else { + return \@{$self->{'output'}->{$self->{'filemap'}->{'GLOBAL_SCORES'}}}; + } } =head2 C<$kw->clearFile();> @@ -774,12 +738,27 @@ Clear the current file reference queue. sub clearFile { - my $self = shift; - my $file = shift || $self->{'file'}; - delete($self->{'rules'}->{$file}) || return "Failed to delete rules hash\n"; - return 0; + my $self = shift; + my $file = shift || $self->{'file'}; + delete($self->{'rules'}->{$file}) || return "Failed to delete rules hash\n"; + return 0; } +=head2 C<$kw->clearAll();> + +Clear all file reference queues. + +=cut + +sub clearAll +{ + my $self = shift; + foreach my $file (keys(%{$self->{'rules'}})) { + %{$self->{'rules'}->{'DEFAULT'}} = () if ($file eq 'DEFAULT'); + $self->clearFile($file); + } + return 0; +} =head2 C<$kw->readFile(%args);> Read in properly formatted keyword list file. The basic format is one keyword @@ -797,10 +776,10 @@ When keyword groups are omitted, that keyword defaults to just the 'LOCAL' group Examples with other combinations are: - word 0 # Same as previous - word 2 # Used for LOCAL, not GLOBAL, scores 2 - word GROUP # GROUP group, not LOCAL, not GLOBAL, no score - word 0 GROUP # Same as previous + word 0 # Same as previous + word 2 # Used for LOCAL, not GLOBAL, scores 2 + word GROUP # GROUP group, not LOCAL, not GLOBAL, no score + word 0 GROUP # Same as previous word 1 GROUP GLOBAL LOCAL # GROUP group, in GLOBAL, in LOCAL, scores 1 C<%args> can be used to override the attributes used to index the rules, @@ -813,48 +792,52 @@ can step through and make modifications as you go. sub readFile { - my $self = shift; - my $file = shift || return 'No file provided'; - my %args = @_; - $self->getDir(); - $self->{'file'} = $file; - $self->setOutfile(); + my $self = shift; + my $file = shift || return 'No file provided'; + my %args = @_; + # Clean to relative path + $file =~ s/^\.\///; + $file =~ s/^$ENV{'PWD'}\///; - my $n = 0; - if (open(my $fh, '<', $file)) { - my $rules = 0; - while (<$fh>) { - $n++; - # Ignore blank lines and comments - if ($_ =~ m/^\s*$/ || $_ =~ m/^#/) { - next; - # Verify formatting - } elsif (my ($word, $score, $comment, @groups) = $self->readLine($_)) { - if ($self->{'debug'}) { - print "FOUND: '$word' '$score' " . (join(',',@groups)) . "\n"; - } - foreach my $group (@groups) { - # Global rules must references the file where the component rule is located - if ($group eq 'GLOBAL') { - $self->{'rules'}->{'GLOBAL'}->{$word} = $self->{'file'}; - # Local rules have enough information from context to determine the component rule - } else { - push(@{$self->{'rules'}->{$self->{'file'}}->{$group}}, $word); - } - } - $self->{'rules'}->{$self->{'file'}}->{'SCORED'}->{$word} = $score if ($score); - $self->{'rules'}->{$self->{'file'}}->{'COMMENTS'}->{$word} = $comment if ($comment); - } elsif ($self->{debug}) { - print STDERR "ERROR: Invalid input in $file, line $n: $_\n"; - } + $self->getDir(); + $self->{'file'} = $file; + $self->setOutfile(); + + my $n = 0; + if (open(my $fh, '<', $file)) { + my $rules = 0; + while (<$fh>) { + $n++; + # Ignore blank lines and comments + if ($_ =~ m/^\s*$/ || $_ =~ m/^#/) { + next; + # Verify formatting + } elsif (my ($word, $score, $comment, @groups) = $self->readLine($_)) { + if ($self->{'debug'}) { + print "FOUND: '$word' '$score' " . (join(',',@groups)) . "\n"; } - print STDERR "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'file'}} || !$self->{'debug'}); - return "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'file'}}); - } else { - delete($self->{'file'}); - return "Failed to read $file"; + foreach my $group (@groups) { + # Global rules must references the file where the component rule is located + if ($group eq 'GLOBAL') { + $self->{'rules'}->{'GLOBAL'}->{$word} = $self->{'file'}; + # Local rules have enough information from context to determine the component rule + } else { + push(@{$self->{'rules'}->{$self->{'file'}}->{$group}}, $word); + } + } + $self->{'rules'}->{$self->{'file'}}->{'SCORED'}->{$word} = $score if ($score); + $self->{'rules'}->{$self->{'file'}}->{'COMMENTS'}->{$word} = $comment if ($comment); + } elsif ($self->{debug}) { + print STDERR "ERROR: Invalid input in $file, line $n: $_\n"; + } } - return undef; + print STDERR "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'file'}} || !$self->{'debug'}); + return "No rules found in $file\n" unless ($self->{'rules'}->{$self->{'file'}}); + } else { + delete($self->{'file'}); + return "Failed to read $file"; + } + return undef; } =head2 C<$kw->readAll();> @@ -865,44 +848,40 @@ Loops through C for all files in the queue. sub readAll { - my $self = shift; - my @files = ( shift ) || return "No files provided\n"; - while (scalar(@_)) { - push(@files, shift); - } + my $self = shift; + my @files = @_; - my @failed; - foreach my $file (@files) { - if (!-e $file) { - push(@failed, $file); - print STDERR "$file does not exist\n" if ($self->{'debug'}); - } elsif (-l $file) { - # Accept only links within PWD - my $dest = readlink($file); - if ($dest =~ m/^\//) { - $dest =~ s/$ENV{'PWD'}\/// ; - if ($dest eq readlink($file)) { - push(@failed, $file); - print STDERR "Symlink outside of PWD $file\n" if ($self->{'debug'}); - next; - } - } - $self->readAll($dest); - } elsif (-f $file) { - push(@failed, "Failed to read $file") if $self->readFile($file); - } elsif (-d $file) { - push(@failed, @_) if $self->readAll(glob($file.'/*')); - # Bash pattern? - } else { - if (my @glob = glob($file)) { - push(@failed, ( @_ )) if $self->readAll(@glob); - } else { - print STDERR "Bad file $file\n" if ($self->{'debug'}); - } + my @failed; + foreach my $file (@files) { + if (!-e $file) { + push(@failed, "$file does not exist\n"); + print STDERR "$file does not exist\n" if ($self->{'debug'}); + } elsif (-l $file) { + # Accept only links within PWD + my $dest = readlink($file); + if ($dest =~ m/^\//) { + $dest =~ s/$ENV{'PWD'}\/// ; + if ($dest eq readlink($file)) { + push(@failed, "Symlink outside of PWD $file\n"); + print STDERR "Symlink outside of PWD $file\n" if ($self->{'debug'}); + next; } + } + $self->readAll($dest); + } elsif (-f $file) { + push(@failed, "Failed to read $file") if $self->readFile($file); + } elsif (-d $file) { + push(@failed, $self->readAll(glob($file.'/*')) ); + # Bash pattern? + } else { + if (my @glob = glob($file)) { + push(@failed, ( @_ )) if $self->readAll(@glob); + } else { + print STDERR "Bad file $file\n" if ($self->{'debug'}); + } } - return @failed if (scalar(@failed)); - return undef; + } + return @failed; } =head2 C<$kw->readLine($line)> @@ -914,43 +893,43 @@ if the line is not properly formatted. sub readLine { - my $self = shift; - my $line = shift; - my $invalid = shift; - my ($word, $score, $comment, @groups); - if (my @sections = $line =~ m/(?:^|\s+)(?:([^\d\s#]\S+)|(\d+(?:\.\d+)?\b)|([^\d\s#]+)|(#.*$))/g) { - while (@sections) { - next unless my $section = shift(@sections); - if (defined($comment)) { - $comment .= ' ' if ($comment ne ''); - $comment .= $section; - } elsif (!defined($word) && $section =~ m/^([^\d\s#]\S+)$/) { - $word = $section; - } elsif (defined($word) && $section =~ m/^(\d+(?:\.\d+)?)$/ && !defined($score)) { - $score = $section; - } elsif (defined($word) && $section =~ m/^([^\d\s#]+)$/) { - push(@groups, uc($section)); - } elsif (defined($word) && $section =~ m/^#.*$/ && !defined($comment)) { - $comment = $section; - $comment =~ s/^#\s*//; - } else { - push(@invalid, $section); - } - } - if (scalar(@invalid)) { - if ($self->{'debug'}) { - print("Invalid clauses: '".join("', ", @invalid)."' in '$line'\n"); - } - return undef; - } - return undef unless ($word); - $word = lc($word); - $score //= 0; - $comment //= ''; - @groups = ( 'LOCAL', 'GLOBAL' ) unless (scalar(@groups)); - return ( $word, $score, $comment, @groups ); + my $self = shift; + my $line = shift; + my @invalid; + my ($word, $score, $comment, @groups); + if (my @sections = $line =~ m/(?:^|\s+)(?:([^\d\s#]\S+)|(\d+(?:\.\d+)?\b)|([^\d\s#]+)|(#.*$))/g) { + while (@sections) { + next unless my $section = shift(@sections); + if (defined($comment)) { + $comment .= ' ' if ($comment ne ''); + $comment .= $section; + } elsif (!defined($word) && $section =~ m/^([^\d\s#]\S+)$/) { + $word = $section; + } elsif (defined($word) && $section =~ m/^(\d+(?:\.\d+)?)$/ && !defined($score)) { + $score = $section; + } elsif (defined($word) && $section =~ m/^([^\d\s#]+)$/) { + push(@groups, uc($section)); + } elsif (defined($word) && $section =~ m/^#.*$/ && !defined($comment)) { + $comment = $section; + $comment =~ s/^#\s*//; + } else { + push(@invalid, $section); + } } - return (); + if (scalar(@invalid)) { + if ($self->{'debug'}) { + print("Invalid clauses: '".join("', ", @invalid)."' in '$line'\n"); + } + return undef; + } + return undef unless ($word); + $word = lc($word); + $score //= 0; + $comment //= ''; + @groups = ( 'LOCAL' ) unless (scalar(@groups)); + return ( $word, $score, $comment, @groups ); + } + return (); } @@ -962,11 +941,11 @@ Return a standardized rule prefix using C and C. sub getPrefix { - my $self = shift; - my $file = shift || $self->getFile(); - my $id = $self->getId(); - $file =~ s/(?:.*\/)*(.*)\.cf/$1/ if (defined($file)); - return uc($id.(defined($file) ? "_$file" : '')); + my $self = shift; + my $file = shift || $self->getFile(); + my $id = $self->getId(); + $file =~ s/(?:.*\/)*(.*)\.cf/$1/ if (defined($file)); + return uc($id.(defined($file) ? "_$file" : '')); } =head2 C<$kw->generateMetas($file);> @@ -977,44 +956,44 @@ make them available to all other rule types. These are a match for each word across all groups and 'SCORED' in the body and subject header, then a 'meta' rule to connect them. - body __KW_FILE_WORD_BODY /\bword\b/ - header __KW_FILE_WORD_SUBJ Subject =~ /\bword\b/ - meta __KW_FILE_WORD ( __KW_FILE_WORD_BODY || __KW_FILE_WORD_SUBJ ) + body __KW_FILE_WORD_BODY /\bword\b/ + header __KW_FILE_WORD_SUBJ Subject =~ /\bword\b/ + meta __KW_FILE_WORD ( __KW_FILE_WORD_BODY || __KW_FILE_WORD_SUBJ ) =cut sub generateMetas { - my $self = shift; - my $words = shift; - my $file = $self->getFile(); - my $prefix = $self->getPrefix(); + my $self = shift; + my $words = shift; + my $file = $self->getFile(); + my $prefix = $self->getPrefix(); - my $output = $self->getOutput(); - my $scoresoutput = $self->getScoresOutput(); + my $output = $self->getOutput(); + my $scoresoutput = $self->getScoresOutput(); - if ($self->{'debug'}) { - print STDERR "Writing Metas for $file\n"; - } - if ($self->{'singleOutfile'}) { - push(@{$output}, - "############".('#'*length($prefix))."#\n". - "# Metas for $prefix\n". - "############".('#'*length($prefix))."#\n\n" - ); - } - foreach my $word (keys(%{$words})) { - push(@{$output}, "# ".$words->{$word}."\n") if ($words->{$word}); - push(@{$output}, - "body __${prefix}_".uc($word). - "_BODY /\\b${word}\\b/\n", - "header __${prefix}_".uc($word). - "_SUBJ Subject =~ /\\b${word}\\b/\n", - "meta __${prefix}_".uc($word). - " ( __${prefix}_".uc($word)."_BODY || __${prefix}_". - uc($word)."_SUBJ )\n\n" - ); - } + if ($self->{'debug'}) { + print STDERR "Writing Metas for $file\n"; + } + if ($self->{'singleOutfile'}) { + push(@{$output}, + "############".('#'*length($prefix))."#\n". + "# Metas for $prefix\n". + "############".('#'*length($prefix))."#\n\n" + ); + } + foreach my $word (keys(%{$words})) { + push(@{$output}, "# ".$words->{$word}."\n") if ($words->{$word}); + push(@{$output}, + "body __${prefix}_".uc($word). + "_BODY /\\b${word}\\b/\n", + "header __${prefix}_".uc($word). + "_SUBJ Subject =~ /\\b${word}\\b/\n", + "meta __${prefix}_".uc($word). + " ( __${prefix}_".uc($word)."_BODY || __${prefix}_". + uc($word)."_SUBJ )\n\n" + ); + } } =head2 C<$kw->generateScored($file);> @@ -1024,41 +1003,41 @@ Write 'SCORED' word rules for file in the current file, or one set by C<$file>. These are simply a 'meta' rule for only the existing component rule (the same rule with a '__' prefix). - meta KW_FILE_WORD ( __KW_FILE_WORD ) + meta KW_FILE_WORD ( __KW_FILE_WORD ) score KW_FILE_WORD 1.0 =cut sub generateScored { - my $self = shift; - my $file = shift || $self->getFile(); - my $prefix = $self->getPrefix(); + my $self = shift; + my $file = shift || $self->getFile(); + my $prefix = $self->getPrefix(); - my $output = $self->getOutput(); - my $scoreoutput = $self->getScoresOutput(); - if ($self->{'singleOutfile'}) { - push(@{$output}, - "############".('#'*length($prefix))."#\n". - "# Scored words for $prefix\n". - "############".('#'*length($prefix))."#\n\n" - ); - } - foreach ( keys(%{$self->{'rules'}->{$file}->{'SCORED'}}) ) { - push(@{$output}, "meta ${prefix}_".uc($_). - " ( __${prefix}_".uc($_)." )\n" - ); - if (defined($self->{'rules'}->{$file}->{'COMMENTS'}->{$_})) { - push(@{$scoreoutput}, "describe ${prefix}_".uc($_)." ". - $self->{'rules'}->{$file}->{'COMMENTS'}->{$_}. - "\n" - ); - } - push(@{$scoreoutput}, - "score ${prefix}_".uc($_)." ". - $self->{'rules'}->{$file}->{'SCORED'}->{$_}."\n\n" - ); + my $output = $self->getOutput(); + my $scoreoutput = $self->getScoresOutput(); + if ($self->{'singleOutfile'}) { + push(@{$output}, + "############".('#'*length($prefix))."#\n". + "# Scored words for $prefix\n". + "############".('#'*length($prefix))."#\n\n" + ); + } + foreach ( keys(%{$self->{'rules'}->{$file}->{'SCORED'}}) ) { + push(@{$output}, "meta ${prefix}_".uc($_). + " ( __${prefix}_".uc($_)." )\n" + ); + if (defined($self->{'rules'}->{$file}->{'COMMENTS'}->{$_})) { + push(@{$scoreoutput}, "describe ${prefix}_".uc($_)." ". + $self->{'rules'}->{$file}->{'COMMENTS'}->{$_}. + "\n" + ); } + push(@{$scoreoutput}, + "score ${prefix}_".uc($_)." ". + $self->{'rules'}->{$file}->{'SCORED'}->{$_}."\n\n" + ); + } } =head2 C<$kw->generateGroups($file);> @@ -1067,11 +1046,11 @@ Write group rules for file in the current file, or one set by C<$file>. These are a list of 'meta' rules where the component rules are all of the 'meta's in the group for each of the available match counts. - meta KW_FILE_GROUP_1 ( __KW_WORD1 + __KW_WORD2 ) >= 1 + meta KW_FILE_GROUP_1 ( __KW_WORD1 + __KW_WORD2 ) >= 1 describe KW_FILE_GROUP_1 Found 1 word(s) from GROUP score KW_FILE_GROUP_1 0.01 - meta KW_FILE_GROUP_2 ( __KW_WORD1 + __KW_WORD2 ) >= 2 + meta KW_FILE_GROUP_2 ( __KW_WORD1 + __KW_WORD2 ) >= 2 describe KW_FILE_GROUP_2 Found 2 word(s) from GROUP score KW_FILE_GROUP_2 0.01 @@ -1079,41 +1058,41 @@ the group for each of the available match counts. sub generateGroups { - my $self = shift; - my $file = shift || $self->getFile(); - my $prefix = $self->getPrefix(); + my $self = shift; + my $file = shift || $self->getFile(); + my $prefix = $self->getPrefix(); - my $output = $self->getOutput(); - my $scoreoutput = $self->getScoresOutput(); - if ($self->{'singleOutfile'}) { - push(@{$output}, - "#############".('#'*length($prefix))."#\n". - "# Groups for $prefix\n". - "#############".('#'*length($prefix))."#\n\n" - ); + my $output = $self->getOutput(); + my $scoreoutput = $self->getScoresOutput(); + if ($self->{'singleOutfile'}) { + push(@{$output}, + "#############".('#'*length($prefix))."#\n". + "# Groups for $prefix\n". + "#############".('#'*length($prefix))."#\n\n" + ); + } + foreach my $group ( keys(%{$self->{'rules'}->{$file}}) ) { + next if ( $group eq 'COMMENTS' || $group eq 'SCORED' ); + push(@{$output}, "# $group\n"); + my $gprefix = $prefix; + unless ($group eq 'LOCAL') { + $gprefix .= '_'.$group; } - foreach my $group ( keys(%{$self->{'rules'}->{$file}}) ) { - next if ( $group eq 'COMMENTS' || $group eq 'SCORED' ); - push(@{$output}, "# $group\n"); - my $gprefix = $prefix; - unless ($group eq 'LOCAL') { - $gprefix .= '_'.$group; - } - my $start = "meta ${gprefix}_"; - my $words = " ( "; - foreach my $word ( @{$self->{'rules'}->{$file}->{$group}} ) { - $words .= "__${prefix}_".uc($word)." + "; - } - $words =~ s/\+ $/\) >= /; - for (my $i = 1; $i <= scalar(@{$self->{'rules'}->{$file}->{$group}}); $i++) { - push(@{$output}, $start.$i.$words.$i."\n\n"); - push(@{$scoreoutput}, - "describe ${gprefix}_$i Found $i $group word" - . ($i>1 ? 's' : '')." from $prefix\n", - "score ${gprefix}_$i 0.01\n\n" - ); - } + my $start = "meta ${gprefix}_"; + my $words = " ( "; + foreach my $word ( @{$self->{'rules'}->{$file}->{$group}} ) { + $words .= "__${prefix}_".uc($word)." + "; } + $words =~ s/\+ $/\) >= /; + for (my $i = 1; $i <= scalar(@{$self->{'rules'}->{$file}->{$group}}); $i++) { + push(@{$output}, $start.$i.$words.$i."\n\n"); + push(@{$scoreoutput}, + "describe ${gprefix}_$i Found $i $group word" + . ($i>1 ? 's' : '')." from $prefix\n", + "score ${gprefix}_$i 0.01\n\n" + ); + } + } } =head2 C<$kw->generateGlobals();> @@ -1121,11 +1100,11 @@ sub generateGroups Write 'GLOBAL' group rules. Similar to C except that component rules must be included from external files. - meta KW_1 ( ( __KW_FILE1_WORD + __KW_FILE2_WORD ) >= 1 ) + meta KW_1 ( ( __KW_FILE1_WORD + __KW_FILE2_WORD ) >= 1 ) describe KW_1 Found 1 GLOBAL word(s) from KW score KW_1 0.01 - meta KW_2 ( ( __KW_FILE1_WORD + __KW_FILE2_WORD ) >= 2 ) + meta KW_2 ( ( __KW_FILE1_WORD + __KW_FILE2_WORD ) >= 2 ) describe KW_2 Found 2 GLOBAL word(s) from KW score KW_2 0.01 @@ -1133,84 +1112,84 @@ rules must be included from external files. sub generateGlobals { - my $self = shift; + my $self = shift; - $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); - $self->setGlobalScoresOutfile() unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); - my $output = $self->getGlobalOutput(); - my $scoreoutput = $self->getGlobalScoresOutput(); - if ($self->{'singleOutfile'}) { - push(@{$output}, - "##########\n". - "# Globals\n". - "##########\n\n" - ); - } - $self->setFile(undef); - my $gprefix = $self->getPrefix(); - my $start = "meta ${gprefix}_"; - my $words = " ( "; - my $prefix; - foreach my $word ( keys(%{$self->{'rules'}->{'GLOBAL'}}) ) { - $prefix = $self->getPrefix($self->{'rules'}->{'GLOBAL'}->{$word}); - $words .= "__${prefix}_" . uc($word) . " + "; - } - $words =~ s/\+ $/\) >= /; - for (my $i = 1; $i <= scalar(keys(%{$self->{'rules'}->{'GLOBAL'}})); $i++) { - my $line = $start.$i.$words.$i."\n"; - push(@{$output}, $line.($self->{'joinScores'} ? '' : "\n")); - push(@{$scoreoutput}, "describe ${gprefix}_${i} Found $i ". - "GLOBAL word(s) from ${gprefix}\n", - "score ${gprefix}_${i} 0.01\n\n" - ); - } + $self->setGlobalOutfile() unless (defined($self->{'filemap'}->{'GLOBAL'})); + $self->setGlobalScoresOutfile() unless (defined($self->{'filemap'}->{'GLOBAL_SCORES'})); + my $output = $self->getGlobalOutput(); + my $scoreoutput = $self->getGlobalScoresOutput(); + if ($self->{'singleOutfile'}) { + push(@{$output}, + "##########\n". + "# Globals\n". + "##########\n\n" + ); + } + $self->setFile(undef); + my $gprefix = $self->getPrefix(); + my $start = "meta ${gprefix}_"; + my $words = " ( "; + my $prefix; + foreach my $word ( keys(%{$self->{'rules'}->{'GLOBAL'}}) ) { + $prefix = $self->getPrefix($self->{'rules'}->{'GLOBAL'}->{$word}); + $words .= "__${prefix}_" . uc($word) . " + "; + } + $words =~ s/\+ $/\) >= /; + for (my $i = 1; $i <= scalar(keys(%{$self->{'rules'}->{'GLOBAL'}})); $i++) { + my $line = $start.$i.$words.$i."\n"; + push(@{$output}, $line.($self->{'joinScores'} ? '' : "\n")); + push(@{$scoreoutput}, "describe ${gprefix}_${i} Found $i ". + "GLOBAL word(s) from ${gprefix}\n", + "score ${gprefix}_${i} 0.01\n\n" + ); + } } sub getDir { - my $self = shift; - my $dir = $self->{'dir'} || $self->setDir(); - return $dir; + my $self = shift; + my $dir = $self->{'dir'} || $self->setDir(); + return $dir; } sub setDir { - my $self = shift; - my $dir = shift || ("$ENV{'PWD'}/$self->{'id'}"); - $dir =~ m/([^\0]+)/; - $self->{'dir'} = $1; - return undef; + my $self = shift; + my $dir = shift || ("$ENV{'PWD'}/$self->{'id'}"); + $dir =~ m/([^\0]+)/; + $self->{'dir'} = $1; + return undef; } sub createDir { - my $self = shift; - my $dir = shift || $self->getDir(); - unless (-d $dir) { - mkdir($dir) || return "Failed to mkdir '$dir'"; - } - return undef; + my $self = shift; + my $dir = shift || $self->getDir(); + unless (-d $dir) { + mkdir($dir) || return "Failed to mkdir '$dir'"; + } + return undef; } sub cleanDir { - my $self = shift; - my @files = @_; - unless (scalar(@files)) { - my %uniq; - foreach (keys(%{$self->{'filemap'}})) { - $uniq{$_} = 1; - } - @files = keys(%uniq); + my $self = shift; + my @files = @_; + unless (scalar(@files)) { + my %uniq; + foreach (keys(%{$self->{'filemap'}})) { + $uniq{$_} = 1; } - foreach my $file (@files) { - if (-e $self->{'filemap'}->{$file}) { - if ($self->{'debug'}) { - print STDERR "Removing old file ".$self->{'filemap'}->{$file}."\n"; - } - unlink($self->{'filemap'}->{$file}) || die "Output file '".$self->{'filemap'}->{$file}."' exists and could not be deleted\n"; - } + @files = keys(%uniq); + } + foreach my $file (@files) { + if (-e $self->{'filemap'}->{$file}) { + if ($self->{'debug'}) { + print STDERR "Removing old file ".$self->{'filemap'}->{$file}."\n"; + } + unlink($self->{'filemap'}->{$file}) || die "Output file '".$self->{'filemap'}->{$file}."' exists and could not be deleted\n"; } + } } =head2 C<$kw->generateAll($dir)> @@ -1225,7 +1204,7 @@ where ID - Meaningful identifier. From C<$kw->new($id)>, or with C<$kw->id($id)>. FILENAME- Trimmed input file name. Override with C<$kw->file($file)>. Absent - for GLOBAL. + for GLOBAL. WORD - The individual keyword. Used only if it has a independent score. GROUP - The group name. Absent for 'LOCAL'. 1 - The count for hits in that group. @@ -1238,32 +1217,32 @@ body for that word. sub generateAll { - my $self = shift; - my @written; + my $self = shift; + my @written; - $self->cleanDir() if (-d $self->getDir()); - $self->createDir() unless (-d $self->getDir()); + $self->cleanDir() if (-d $self->getDir()); + $self->createDir() unless (-d $self->getDir()); - foreach my $file (keys(%{$self->{'rules'}})) { - # Reserve GLOBAL for last - next if ($file eq 'GLOBAL'); - $self->setFile($file); - my %all; - foreach my $group (keys(%{$self->{'rules'}->{$file}})) { - if ($group eq 'SCORED' || $group eq 'COMMENTS') { - next; - } else { - foreach my $word (@{$self->{'rules'}->{$file}->{$group}}) { - $all{$word} = $self->{'rules'}->{$file}->{'COMMENTS'}->{$word} || ''; - } - } + foreach my $file (keys(%{$self->{'rules'}})) { + # Reserve GLOBAL for last + next if ($file eq 'GLOBAL'); + $self->setFile($file); + my %all; + foreach my $group (keys(%{$self->{'rules'}->{$file}})) { + if ($group eq 'SCORED' || $group eq 'COMMENTS') { + next; + } else { + foreach my $word (@{$self->{'rules'}->{$file}->{$group}}) { + $all{$word} = $self->{'rules'}->{$file}->{'COMMENTS'}->{$word} || ''; } - $self->generateMetas(\%all); - $self->generateGroups(); - $self->generateScored() unless ($self->{joinScores}); + } } - $self->generateGlobals(); - return 0; + $self->generateMetas(\%all); + $self->generateGroups(); + $self->generateScored() unless ($self->{joinScores}); + } + $self->generateGlobals(); + return 0; } =head2 C<$kw->writeFile($file)> @@ -1272,18 +1251,19 @@ sub generateAll sub writeFile { - my $self = shift; - my $file = shift || return "Requires filename"; - return "Nothing generated for $file" unless (defined($self->{'output'}->{$file})); + my $self = shift; + my $file = shift || return "Requires filename"; + return "Nothing generated for $file" unless (defined($self->{'output'}->{$file})); - if (open(my $fh, ">", $file)) { - foreach my $line (@{$self->{'output'}->{$file}}) { - print $fh $line; - } - close($fh); - } else { - return "Could not open $file for writing"; + return unless (scalar(@{$self->{'output'}->{$file}})); + if (open(my $fh, ">", $file)) { + foreach my $line (@{$self->{'output'}->{$file}}) { + print $fh $line; } + close($fh); + } else { + return "Could not open $file for writing"; + } } =head2 C<$kw->writeAll()> @@ -1292,14 +1272,33 @@ sub writeFile sub writeAll { - my $self = shift; - $self->generateAll() unless (defined($self->{'output'})); - my @errors; - foreach my $file (keys(%{$self->{'output'}})) { - my $ret = $self->writeFile($file); - push(@errors, $ret) if (defined($ret)); - } - return ("Errors:\n ".join("\n ", @errors)) if (scalar(@errors)); + my $self = shift; + $self->generateAll() unless (defined($self->{'output'})); + my @errors; + foreach my $file (keys(%{$self->{'output'}})) { + my $ret = $self->writeFile($file); + push(@errors, $ret) if (defined($ret)); + } + return ("Errors:\n ".join("\n ", @errors)) if (scalar(@errors)); +} + +=head2 C<$kw->verifyOutput()> + +Process output directory with SpamAssassin linter. Return errors or +undef if successful. An optional C<$path> can be provided to test a +different file or directory. + +=cut + +sub verifyOutput +{ + my $self = shift; + my $path = shift || $self->{'dir'}; + + use Mail::SpamAssassin; + my $sa = Mail::SpamAssassin->new( { 'site_rules_filename' => $path } ); + + return $sa->lint_rules(); } =head1 MORE diff --git a/t/01_attributes.t b/t/01_attributes.t index 32d8e37..b620741 100644 --- a/t/01_attributes.t +++ b/t/01_attributes.t @@ -16,7 +16,7 @@ my $defaults = { }; my $args = { - 'id' => '01', + 'id' => 'TEST', 'priority' => 10, 'debug' => 1, 'singleOutfile' => 1, diff --git a/t/02_files.lnk b/t/02_files.lnk deleted file mode 120000 index d2114e9..0000000 --- a/t/02_files.lnk +++ /dev/null @@ -1 +0,0 @@ -/home/jpm/KeywordRuleGenerator/t/02_files.cf \ No newline at end of file diff --git a/t/02_files.lnk b/t/02_files.lnk new file mode 100644 index 0000000..4f5b278 --- /dev/null +++ b/t/02_files.lnk @@ -0,0 +1 @@ +word diff --git a/t/02_files.t b/t/02_files.t index f03f4b0..b8d682c 100644 --- a/t/02_files.t +++ b/t/02_files.t @@ -18,26 +18,26 @@ ok ($kw->getFile($file) == $file, "Get current working file"); ok (scalar(keys(%{$kw->{'rules'}})) == 2, "Fetched correct number of files in rules"); ok ($kw->{'rules'}->{$file}, "Loaded correct hash key"); ok ($kw->{'filemap'}->{$file} =~ m/50_02_T_02_FILES.cf$/, "Loaded correct output name"); -ok (!$kw->clearFiles(), "Clear 'rules' hash"); -ok (!$kw->{'rules'}, "Fetched none after clearing"); +ok (!$kw->clearAll(), "Clear 'rules' hash"); +ok (!scalar(keys(%{$kw->{'rules'}})), "Fetched none after clearing"); # Read a directory my $dir = 't/02_files.dir'; ok (!$kw->readAll($dir), "Run file on dir"); ok (scalar(keys(%{$kw->{'rules'}})) == 3, "Fetched correct number of files from dir"); -ok (!$kw->clearFiles(), "Clear dir files"); +ok (!$kw->clearAll(), "Clear dir files"); # Read a symlink my $link = 't/02_files.lnk'; ok (!$kw->readAll($link), "Run file on link"); ok ($kw->getFile() == $file, "Fetched correct name ($file) from link ($link)"); ok (scalar(keys(%{$kw->{'rules'}})) == 2, "Fetched correct number of files from link"); -ok (!$kw->clearFiles(), "Clear link files"); +ok (!$kw->clearAll(), "Clear link files"); # Read multiple mixed ok (!$kw->readAll($file, $dir, $link), "Run readAll on multiple/mixed"); -ok (scalar(keys(%{$kw->{'rules'}})) == 4, "Fetched correct number of files from all"); -ok (!$kw->clearFiles(), "Clear dir files"); +ok (scalar(keys(%{$kw->{'rules'}})) == 5, "Fetched correct number of files from all"); +ok (!$kw->clearAll(), "Clear dir files"); ok ($kw->readFile('does_not_exist'), "Correctly failed for non-existent file"); diff --git a/t/04_rules.t b/t/04_rules.t index 8035ff2..644e36e 100644 --- a/t/04_rules.t +++ b/t/04_rules.t @@ -17,7 +17,6 @@ ok(!scalar(@failed), "Load 'rules' hash with readAll"); my $expected = getExpected(); my ( $missing, $extra, $incorrect ) = 0; -use Data::Dump; foreach my $file (keys(%{$expected})) { if ($file eq 'GLOBAL') { my ($m, $e) = compareValues( diff --git a/t/05_write.t b/t/05_write.t index 094e463..5b08075 100644 --- a/t/05_write.t +++ b/t/05_write.t @@ -46,10 +46,8 @@ foreach (@files) { } ok (!scalar(keys(%remaining)), "All expected output files found"); -use Mail::SpamAssassin; -my $sa = Mail::SpamAssassin->new( { 'site_rules_filename' => $testdir } ); -$failed = $sa->lint_rules(); -ok (!$failed, "Verified by spamassassin".($res ? "\n$failed" : "")); +$failed = $kw->verifyOutput(); +ok (!$failed, "Verified by spamassassin".($failed ? "\n$failed" : "")); $kw->cleanDir();